Compare commits

..

6 Commits

Author SHA1 Message Date
openhands fa0044e821 Fix login flow to always start OIDC before TOS check 2025-04-24 04:30:15 +00:00
openhands 536588dd19 Fix loading state in TOS acceptance button 2025-04-24 04:03:47 +00:00
openhands a10dfffe0f Fix axios import in accept-tos.tsx 2025-04-24 03:58:22 +00:00
openhands aaaad02042 Add react-toastify dependency 2025-04-24 03:56:33 +00:00
openhands 17d55eab72 Update TOS acceptance to use database storage instead of localStorage 2025-04-24 03:34:57 +00:00
openhands bf9f953bea Move Terms of Service acceptance to dedicated page 2025-04-23 19:44:04 +00:00
399 changed files with 22402 additions and 26908 deletions
+3 -3
View File
@@ -1,12 +1,12 @@
- [ ] This change is worth documenting at https://docs.all-hands.dev/
- [ ] Include this change in the Release Notes. If checked, you **must** provide an **end-user friendly** description for your change below
**End-user friendly description of the problem this fixes or functionality this introduces.**
**End-user friendly description of the problem this fixes or functionality that this introduces.**
---
**Summarize what the PR does, explaining any non-trivial design decisions.**
**Give a summary of what the PR does, explaining any non-trivial design decisions.**
---
**Link of any specific issues this addresses:**
**Link of any specific issues this addresses.**
+1 -1
View File
@@ -24,7 +24,7 @@ on:
LLM_MODEL:
required: false
type: string
default: "anthropic/claude-3-7-sonnet-20250219"
default: "anthropic/claude-3-5-sonnet-20241022"
LLM_API_VERSION:
required: false
type: string
-73
View File
@@ -1,73 +0,0 @@
#!/bin/bash
echo "Running OpenHands pre-commit hook..."
# Store the exit code to return at the end
# This allows us to be additive to existing pre-commit hooks
EXIT_CODE=0
# Check if frontend directory has changed
frontend_changes=$(git diff --cached --name-only | grep "^frontend/")
if [ -n "$frontend_changes" ]; then
echo "Frontend changes detected. Running frontend checks..."
# Check if frontend directory exists
if [ -d "frontend" ]; then
# Change to frontend directory
cd frontend || exit 1
# Run lint:fix
echo "Running npm lint:fix..."
npm run lint:fix
if [ $? -ne 0 ]; then
echo "Frontend linting failed. Please fix the issues before committing."
EXIT_CODE=1
fi
# Run build
echo "Running npm build..."
npm run build
if [ $? -ne 0 ]; then
echo "Frontend build failed. Please fix the issues before committing."
EXIT_CODE=1
fi
# Run tests
echo "Running npm test..."
npm test
if [ $? -ne 0 ]; then
echo "Frontend tests failed. Please fix the failing tests before committing."
EXIT_CODE=1
fi
# Return to the original directory
cd ..
if [ $EXIT_CODE -eq 0 ]; then
echo "Frontend checks passed!"
fi
else
echo "Frontend directory not found. Skipping frontend checks."
fi
else
echo "No frontend changes detected. Skipping frontend checks."
fi
# Run any existing pre-commit hooks that might have been installed by the user
# This makes our hook additive rather than replacing existing hooks
if [ -f ".git/hooks/pre-commit.local" ]; then
echo "Running existing pre-commit hooks..."
bash .git/hooks/pre-commit.local
if [ $? -ne 0 ]; then
echo "Existing pre-commit hooks failed."
EXIT_CODE=1
fi
fi
if [ $EXIT_CODE -eq 0 ]; then
echo "All pre-commit checks passed!"
else
echo "Some pre-commit checks failed. Please fix the issues before committing."
fi
exit $EXIT_CODE
-7
View File
@@ -2,11 +2,4 @@
echo "Setting up the environment..."
# Install pre-commit package
python -m pip install pre-commit
# Install pre-commit hooks if .git directory exists
if [ -d ".git" ]; then
echo "Installing pre-commit hooks..."
pre-commit install
fi
+1 -1
View File
@@ -121,7 +121,7 @@ These Slack and Discord etiquette guidelines are designed to foster an inclusive
- Use threads for specific discussions to keep channels organized and easier to follow.
- Tag others only when their input is critical or urgent, and use @here, @channel or @everyone sparingly to minimize disruptions.
- Be patient, as open-source contributors and maintainers often have other commitments and may need time to respond.
- Post questions or discussions in the most relevant channel (e.g., for [slack - #general](https://openhands-ai.slack.com/archives/C06P5NCGSFP) for general topics, [slack - #questions](https://openhands-ai.slack.com/archives/C06U8UTKSAD) for queries/questions, [discord - #general](https://discord.com/channels/1222935860639563850/1222935861386018885)).
- Post questions or discussions in the most relevant channel (e.g., for [slack - #general](https://app.slack.com/client/T06P212QSEA/C06P5NCGSFP) for general topics, [slack - #questions](https://openhands-ai.slack.com/archives/C06U8UTKSAD) for queries/questions, [discord - #general](https://discord.com/channels/1222935860639563850/1222935861386018885)).
- When asking for help or raising issues, include necessary details like links, screenshots, or clear explanations to provide context.
- Keep discussions in public channels whenever possible to allow others to benefit from the conversation, unless the matter is sensitive or private.
- Always adhere to [our standards](https://github.com/All-Hands-AI/OpenHands/blob/main/CODE_OF_CONDUCT.md#our-standards) to ensure a welcoming and collaborative environment.
+1 -1
View File
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.37-nikolaik`
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.34-nikolaik`
## Develop inside Docker container
-13
View File
@@ -39,7 +39,6 @@ ifeq ($(INSTALL_DOCKER),)
@$(MAKE) -s check-docker
endif
@$(MAKE) -s check-poetry
@$(MAKE) -s check-tmux
@echo "$(GREEN)Dependencies checked successfully.$(RESET)"
check-system:
@@ -102,18 +101,6 @@ check-docker:
exit 1; \
fi
check-tmux:
@echo "$(YELLOW)Checking tmux installation...$(RESET)"
@if command -v tmux > /dev/null; then \
echo "$(BLUE)$(shell tmux -V) is already installed.$(RESET)"; \
else \
echo "$(YELLOW)╔════════════════════════════════════════════════════════════════════════════╗$(RESET)"; \
echo "$(YELLOW)║ OPTIONAL: tmux is not installed. ║$(RESET)"; \
echo "$(YELLOW)║ Some advanced terminal features may not work without tmux. ║$(RESET)"; \
echo "$(YELLOW)║ You can install it if needed, but it's not required for development. ║$(RESET)"; \
echo "$(YELLOW)╚════════════════════════════════════════════════════════════════════════════╝$(RESET)"; \
fi
check-poetry:
@echo "$(YELLOW)Checking Poetry installation...$(RESET)"
@if command -v poetry > /dev/null; then \
+7 -13
View File
@@ -9,9 +9,10 @@
<div align="center">
<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors"></a>
<a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers"></a>
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue"></a>
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License"></a>
<br/>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits"></a>
<br/>
@@ -51,23 +52,23 @@ system requirements and more information.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
When you open the application, you'll be asked to choose an LLM provider and add an API key.
[Anthropic's Claude 3.7 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-7-sonnet-20250219`)
[Anthropic's Claude 3.5 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-5-sonnet-20241022`)
works best, but you have [many options](https://docs.all-hands.dev/modules/usage/llms).
## 💡 Other ways to run OpenHands
@@ -99,19 +100,12 @@ check out our [documentation](https://docs.all-hands.dev/modules/usage/getting-s
There you'll find resources on how to use different LLM providers,
troubleshooting resources, and advanced configuration options.
### Custom Scripts
OpenHands supports custom scripts that run at different points in the runtime lifecycle:
- **setup.sh**: Place this script in the `.openhands` directory of your repository to run custom setup commands when the runtime initializes.
- **pre-commit.sh**: Place this script in the `.openhands` directory to add a custom git pre-commit hook that runs before each commit. This can be used to enforce code quality standards, run tests, or perform other checks before allowing commits.
## 🤝 How to Join the Community
OpenHands is a community-driven project, and we welcome contributions from everyone. We do most of our communication
through Slack, so this is the best place to start, but we also are happy to have you contact us on Discord or Github:
- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A) - Here we talk about research, architecture, and future development.
- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw) - Here we talk about research, architecture, and future development.
- [Join our Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback.
- [Read or post Github Issues](https://github.com/All-Hands-AI/OpenHands/issues) - Check out the issues we're working on, or add your own ideas.
+1 -5
View File
@@ -316,10 +316,6 @@ llm_config = 'gpt3'
# Additional Docker runtime kwargs
#docker_runtime_kwargs = {}
# Specific port to use for VSCode. If not set, a random port will be chosen.
# Useful when deploying OpenHands in a remote machine where you need to expose a specific port.
#vscode_port = 41234
#################################### Security ###################################
# Configuration for security features
##############################################################################
@@ -395,7 +391,7 @@ type = "noop"
#[llm.condenser]
#model = "gpt-4o"
#temperature = 0.1
#max_input_tokens = 1024
#max_tokens = 1024
#################################### Eval ####################################
# Configuration for the evaluation, please refer to the specific evaluation
+3 -3
View File
@@ -61,8 +61,8 @@ RUN add-apt-repository ppa:deadsnakes/ppa \
&& apt-get install -y python3.12 python3.12-venv python3.12-dev python3-pip \
&& ln -s /usr/bin/python3.12 /usr/bin/python
# NodeJS >= 22.x
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
# NodeJS >= 18.17.1
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
&& apt-get install -y nodejs
# Poetry >= 1.8
@@ -108,7 +108,7 @@ WORKDIR /app
# cache build dependencies
RUN \
--mount=type=bind,source=./,target=/app/,rw \
--mount=type=bind,source=./,target=/app/ \
<<EOF
#!/bin/bash
make -s clean
+1 -1
View File
@@ -11,7 +11,7 @@ services:
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
- SANDBOX_API_HOSTNAME=host.docker.internal
#
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.37-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.34-nikolaik}
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:
+1 -1
View File
@@ -26,7 +26,7 @@ repos:
- id: ruff
entry: ruff check --config dev_config/python/ruff.toml
types_or: [python, pyi, jupyter]
args: [--fix, --unsafe-fixes]
args: [--fix]
# Run the formatter.
- id: ruff-format
entry: ruff format --config dev_config/python/ruff.toml
-3
View File
@@ -7,9 +7,6 @@ select = [
"Q",
"B",
"ASYNC",
"UP006", # Use `list` instead of `List` for annotations
"UP007", # Use `X | Y` instead of `Union[X, Y]`
"UP008", # Use `X | None` instead of `Optional[X]`
]
ignore = [
+1 -1
View File
@@ -7,7 +7,7 @@ services:
image: openhands:latest
container_name: openhands-app-${DATE:-}
environment:
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik}
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik}
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
ports:
-1
View File
@@ -3,7 +3,6 @@
# Production
/build
/static/swagger-ui
# Generated files
.docusaurus
-17
View File
@@ -36,14 +36,6 @@ const config: Config = {
mermaid: true,
},
themes: ['@docusaurus/theme-mermaid'],
plugins: [
[
require.resolve('docusaurus-lunr-search'),
{
languages: ['en', 'zh', 'fr', 'ja', 'pt']
}
]
],
presets: [
[
'classic',
@@ -83,19 +75,10 @@ const config: Config = {
position: 'left',
label: 'User Guides',
},
{
href: 'https://docs.all-hands.dev/swagger-ui/', // FIXME: this should be a relative path, but docusarus steals the click
label: 'API',
position: 'left',
},
{
type: 'localeDropdown',
position: 'left',
},
{
type: 'search',
position: 'left',
},
{
href: 'https://all-hands.dev',
label: 'Company',
-102
View File
@@ -1,102 +0,0 @@
const fs = require('fs');
const path = require('path');
const swaggerUiDist = require('swagger-ui-dist');
/**
* This script manually sets up Swagger UI for the Docusaurus documentation.
*
* Why we need this approach:
* 1. Docusaurus doesn't have a built-in way to integrate Swagger UI
* 2. We need to copy the necessary files from swagger-ui-dist to our static directory
* 3. We need to create a custom index.html file that points to our OpenAPI spec
* 4. This approach allows us to customize the Swagger UI to match our documentation style
*/
// Get the absolute path to the swagger-ui-dist package
const swaggerUiDistPath = swaggerUiDist.getAbsoluteFSPath();
// Create the target directory if it doesn't exist
const targetDir = path.join(__dirname, 'static', 'swagger-ui');
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true });
}
// Copy all files from swagger-ui-dist to our target directory
const files = fs.readdirSync(swaggerUiDistPath);
files.forEach(file => {
const sourcePath = path.join(swaggerUiDistPath, file);
const targetPath = path.join(targetDir, file);
// Skip directories and non-essential files
if (fs.statSync(sourcePath).isDirectory() ||
file === 'package.json' ||
file === 'README.md' ||
file.endsWith('.map')) {
return;
}
fs.copyFileSync(sourcePath, targetPath);
});
// Create a custom index.html file that points to our OpenAPI spec
const indexHtml = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>OpenHands API Documentation</title>
<link rel="stylesheet" type="text/css" href="./swagger-ui.css" />
<link rel="icon" type="image/png" href="./favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="./favicon-16x16.png" sizes="16x16" />
<style>
html {
box-sizing: border-box;
overflow: -moz-scrollbars-vertical;
overflow-y: scroll;
}
*,
*:before,
*:after {
box-sizing: inherit;
}
body {
margin: 0;
background: #fafafa;
}
</style>
</head>
<body>
<div id="swagger-ui"></div>
<script src="./swagger-ui-bundle.js" charset="UTF-8"> </script>
<script src="./swagger-ui-standalone-preset.js" charset="UTF-8"> </script>
<script>
window.onload = function() {
// Begin Swagger UI call region
const ui = SwaggerUIBundle({
url: "/openapi.json",
dom_id: '#swagger-ui',
deepLinking: true,
presets: [
SwaggerUIBundle.presets.apis,
SwaggerUIStandalonePreset
],
plugins: [
SwaggerUIBundle.plugins.DownloadUrl
],
layout: "StandaloneLayout"
});
// End Swagger UI call region
window.ui = ui;
};
</script>
</body>
</html>
`;
fs.writeFileSync(path.join(targetDir, 'index.html'), indexHtml);
console.log('Swagger UI files generated successfully in static/swagger-ui/');
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -61,7 +61,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -56,6 +56,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
```
@@ -13,16 +13,16 @@
La façon la plus simple d'exécuter OpenHands est avec Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
@@ -42,7 +42,7 @@ Explorez le code source d'OpenHands sur [GitHub](https://github.com/All-Hands-AI
/>
</a>
<br></br>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A">
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw">
<img
src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
alt="Join our Slack community"
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -44,7 +44,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -42,7 +42,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi"
```
@@ -42,7 +42,7 @@ OpenHandsのソースコードを[GitHub](https://github.com/All-Hands-AI/OpenHa
/>
</a>
<br></br>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A">
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw">
<img
src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
alt="Slackコミュニティに参加"
@@ -13,7 +13,7 @@ OpenHandsがリポジトリで動作する際:
1. リポジトリに`.openhands/microagents/`が存在する場合、そこからリポジトリ固有の指示を読み込みます。
2. 会話のキーワードによってトリガーされる一般的なガイドラインを読み込みます。
現在の[パブリックMicroagents](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)を参照してください。
現在の[パブリックMicroagents](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)を参照してください。
## Microagentのフォーマット
@@ -88,4 +88,4 @@ triggers:
- ビルド時間とイメージサイズを最適化
```
より多くの例については、[現在のパブリックマイクロエージェント](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)をご覧ください。
より多くの例については、[現在のパブリックマイクロエージェント](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)をご覧ください。
@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー
```bash
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -82,5 +82,5 @@ docker network create openhands-network
# 分離されたネットワークで OpenHands を実行
docker run # ... \
--network openhands-network \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,7 +45,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "escreva um script bash que imprima oi"
```
@@ -58,17 +58,17 @@
A maneira mais fácil de executar o OpenHands é no Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
Você encontrará o OpenHands em execução em http://localhost:3000!
@@ -13,7 +13,7 @@ Quando o OpenHands trabalha com um repositório, ele:
1. Carrega instruções específicas do repositório de `.openhands/microagents/`, se presentes no repositório.
2. Carrega diretrizes gerais acionadas por palavras-chave nas conversas.
Veja os [Microagentes Públicos](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents) atuais.
Veja os [Microagentes Públicos](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) atuais.
## Formato do Microagente
@@ -4,7 +4,7 @@
Microagentes públicos são diretrizes especializadas acionadas por palavras-chave para todos os usuários do OpenHands.
Eles são definidos em arquivos markdown no diretório
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents).
[`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge).
Microagentes públicos:
- Monitoram comandos recebidos em busca de suas palavras-chave de acionamento.
@@ -15,7 +15,7 @@ Microagentes públicos:
## Microagentes Públicos Atuais
Para mais informações sobre microagentes específicos, consulte seus arquivos de documentação individuais no
diretório [`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/).
diretório [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge/).
### Agente GitHub
**Arquivo**: `github.md`
@@ -59,7 +59,7 @@ yes | npm install package-name
## Contribuindo com um Microagente Público
Você pode criar seus próprios microagentes públicos adicionando novos arquivos markdown ao
diretório [`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/).
diretório [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge/).
### Melhores Práticas para Microagentes Públicos
@@ -81,7 +81,7 @@ Antes de criar um microagente público, considere:
#### 2. Crie o Arquivo
Crie um novo arquivo markdown em [`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/)
Crie um novo arquivo markdown em [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge/)
com um nome descritivo (por exemplo, `docker.md` para um agente focado em Docker).
Atualize o arquivo com o frontmatter necessário [de acordo com o formato exigido](./microagents-overview#microagent-format)
@@ -149,5 +149,5 @@ Lembre-se de:
- Otimizar para tempo de build e tamanho da imagem
```
Veja os [microagentes públicos atuais](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents) para
Veja os [microagentes públicos atuais](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) para
mais exemplos.
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -59,7 +59,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -57,6 +57,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
```
@@ -11,16 +11,16 @@
在 Docker 中运行 OpenHands 是最简单的方式。
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
@@ -42,7 +42,7 @@ OpenHands 是一个**自主 AI 软件工程师**,能够执行复杂的工程
/>
</a>
<br></br>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A">
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw">
<img
src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
alt="Join our Slack community"
@@ -11,7 +11,7 @@
```
docker run # ...
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-v /var/run/docker.sock:/var/run/docker.sock \
# ...
```
-8
View File
@@ -1,8 +0,0 @@
{
"label": "OpenHands Cloud",
"position": 9,
"link": {
"type": "generated-index",
"description": "Documentation for OpenHands Cloud features and services."
}
}
-177
View File
@@ -1,177 +0,0 @@
# OpenHands Cloud API
OpenHands Cloud provides a REST API that allows you to programmatically interact with the service. This is useful if you easily want to kick off your own jobs from your programs in a flexible way.
This guide explains how to obtain an API key and use the API to start conversations.
For more detailed information about the API, refer to the [OpenHands API Reference](https://docs.all-hands.dev/swagger-ui/).
## Obtaining an API Key
To use the OpenHands Cloud API, you'll need to generate an API key:
1. Log in to your [OpenHands Cloud](https://app.all-hands.dev) account
2. Navigate to the [Settings page](https://app.all-hands.dev/settings)
3. Locate the "API Keys" section
4. Click "Generate New Key"
5. Give your key a descriptive name (e.g., "Development", "Production")
6. Copy the generated API key and store it securely - it will only be shown once
![API Key Generation](/img/docs/api-key-generation.png)
## API Usage
### Starting a New Conversation
To start a new conversation with OpenHands performing a task, you'll need to make a POST request to the conversation endpoint.
#### Request Parameters
| Parameter | Type | Required | Description |
|-----------|------|----------|-------------|
| `initial_user_msg` | string | Yes | The initial message to start the conversation |
| `repository` | string | No | Git repository name to provide context in the format `owner/repo`. You must have access to the repo. |
#### Examples
<details>
<summary>cURL</summary>
```bash
curl -X POST "https://app.all-hands.dev/api/conversations" \
-H "Authorization: Bearer YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"initial_user_msg": "Check whether there is any incorrect information in the README.md file and send a PR to fix it if so.",
"repository": "yourusername/your-repo"
}'
```
</details>
<details>
<summary>Python (with requests)</summary>
```python
import requests
api_key = "YOUR_API_KEY"
url = "https://app.all-hands.dev/api/conversations"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
data = {
"initial_user_msg": "Check whether there is any incorrect information in the README.md file and send a PR to fix it if so.",
"repository": "yourusername/your-repo"
}
response = requests.post(url, headers=headers, json=data)
conversation = response.json()
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
print(f"Status: {conversation['status']}")
```
</details>
<details>
<summary>TypeScript/JavaScript (with fetch)</summary>
```typescript
const apiKey = "YOUR_API_KEY";
const url = "https://app.all-hands.dev/api/conversations";
const headers = {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json"
};
const data = {
initial_user_msg: "Check whether there is any incorrect information in the README.md file and send a PR to fix it if so.",
repository: "yourusername/your-repo"
};
async function startConversation() {
try {
const response = await fetch(url, {
method: "POST",
headers: headers,
body: JSON.stringify(data)
});
const conversation = await response.json();
console.log(`Conversation Link: https://app.all-hands.dev/conversations/${conversation.id}`);
console.log(`Status: ${conversation.status}`);
return conversation;
} catch (error) {
console.error("Error starting conversation:", error);
}
}
startConversation();
```
</details>
#### Response
The API will return a JSON object with details about the created conversation:
```json
{
"status": "ok",
"conversation_id": "abc1234",
}
```
You may also receive an `AuthenticationError` if:
1. You provided an invalid API key
2. You provided the wrong repo name
3. You don't have access to the repo
### Retrieving Conversation Status
You can check the status of a conversation by making a GET request to the conversation endpoint.
#### Endpoint
```
GET https://app.all-hands.dev/api/conversations/{conversation_id}
```
#### Example
<details>
<summary>cURL</summary>
```bash
curl -X GET "https://app.all-hands.dev/api/conversations/{conversation_id}" \
-H "Authorization: Bearer YOUR_API_KEY"
```
</details>
#### Response
The response is formatted as follows:
```json
{
"conversation_id":"abc1234",
"title":"Update README.md",
"created_at":"2025-04-29T15:13:51.370706Z",
"last_updated_at":"2025-04-29T15:13:57.199210Z",
"status":"RUNNING",
"selected_repository":"yourusername/your-repo",
"trigger":"gui"
}
```
## Rate Limits
The API has a limit of 10 simultaneous conversations per account. If you need a higher limit for your use case, please contact us at [contact@all-hands.dev](mailto:contact@all-hands.dev).
If you exceed this limit, the API will return a 429 Too Many Requests response.
+10 -20
View File
@@ -6,26 +6,20 @@ OpenHands Cloud is the cloud hosted version of OpenHands by All Hands AI.
OpenHands Cloud can be accessed at https://app.all-hands.dev/.
You can also interact with OpenHands Cloud programmatically using the [API](./cloud-api).
## Getting Started
After visiting OpenHands Cloud, you will be asked to connect with your GitHub or GitLab account:
1. After reading and accepting the terms of service, click `Log in with GitHub` or `Log in with GitLab`.
After visiting OpenHands Cloud, you will be asked to connect with your GitHub account:
1. After reading and accepting the terms of service, click `Connect to GitHub`.
2. Review the permissions requested by OpenHands and then click `Authorize OpenHands AI`.
- OpenHands will require some permissions from your GitHub or GitLab account. To read more about these permissions:
- GitHub: You can click the `Learn more` link on the GitHub authorize page.
- GitLab: You can expand each permission request on the GitLab authorize page.
- OpenHands will require some permissions from your GitHub account. To read more about these permissions,
you can click the `Learn more` link on the GitHub authorize page.
## Repository Access
### GitHub
#### Adding Repository Access
### Adding Repository Access
You can grant OpenHands specific repository access:
1. Click `Add GitHub repos` on the Home page.
1. Click the `Select a Git project` dropdown, select `Add more repositories...`.
2. Select the organization, then choose the specific repositories to grant OpenHands access to.
<details>
<summary>Permission Details for Repository Access</summary>
@@ -48,15 +42,11 @@ You can grant OpenHands specific repository access:
3. Click on `Install & Authorize`.
#### Modifying Repository Access
### Modifying Repository Access
You can modify GitHub repository access at any time by:
* Using the same `Add GitHub repos` workflow, or
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `Git Settings` section.
### GitLab
When using your GitLab account, OpenHands will automatically have access to your repositories.
You can modify repository access at any time by:
* Using the same `Select a Git project > Add more repositories` workflow, or
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `GitHub Settings` section.
## Conversation Persistence
@@ -1,17 +1,18 @@
# Repository Customization
You can customize how OpenHands interacts with your repository by creating a
You can customize how OpenHands works with your repository by creating a
`.openhands` directory at the root level.
## Microagents
Microagents allow you to extend OpenHands prompts with information specific to your project and define how OpenHands
should function. See [Microagents Overview](../prompting/microagents-overview) for more information.
You can use microagents to extend the OpenHands prompts with information
about your project and how you want OpenHands to work. See
[Repository Microagents](../prompting/microagents-repo) for more information.
## Setup Script
You can add a `.openhands/setup.sh` file, which will run every time OpenHands begins working with your repository.
This is an ideal location for installing dependencies, setting environment variables, and performing other setup tasks.
You can add `.openhands/setup.sh`, which will be run every time OpenHands begins
working with your repository. This is a good place to install dependencies, set
environment variables, etc.
For example:
```bash
+4 -4
View File
@@ -12,7 +12,7 @@ To start an interactive OpenHands session via the command line:
2. Run the following command:
```bash
poetry run python -m openhands.cli.main
poetry run python -m openhands.core.cli
```
This command will start an interactive session where you can input tasks and receive responses from OpenHands.
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -45,8 +45,8 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
python -m openhands.cli.main
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.cli
```
This command will start an interactive session in Docker where you can input tasks and receive responses from OpenHands.
+1
View File
@@ -136,6 +136,7 @@ OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if pro
## Tips for Effective Use
- Be specific in your requests to get the most accurate and helpful responses, as described in the [prompting best practices](../prompting/prompting-best-practices).
- Use the workspace panel to explore your project structure.
- Use one of the recommended models, as described in the [LLMs section](usage/llms/llms.md).
Remember, the GUI mode of OpenHands is designed to make your interaction with the AI assistant as smooth and intuitive
+2 -2
View File
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
```bash
docker run -it \
--pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-e LLM_API_KEY=$LLM_API_KEY \
@@ -43,7 +43,7 @@ docker run -it \
-v ~/.openhands-state:/.openhands-state \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
python -m openhands.core.main -t "write a bash script that prints hi"
```
+3 -3
View File
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
The easiest way to run OpenHands is in Docker.
```bash
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
docker run -it --rm --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
-e LOG_ALL_EVENTS=true \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:0.37
docker.all-hands.dev/all-hands-ai/openhands:0.34
```
You'll find OpenHands running at http://localhost:3000!
+6 -7
View File
@@ -6,12 +6,11 @@
- Displays the conversation between the user and OpenHands.
- OpenHands explains its actions in this panel.
### Changes
- Shows the file changes performed by OpenHands.
### VS Code
- Embedded VS Code for browsing and modifying files.
- Can also be used to upload and download files.
### Workspace
- Browse project files and directories.
- Use the `Open in VS Code` option to:
* Modify files
* Upload and download files
### Terminal
- A space for OpenHands and users to run terminal commands.
@@ -21,7 +20,7 @@
- Particularly handy when using OpenHands to perform data visualization tasks.
### App
- Displays the web server when OpenHands runs an application.
- Shows the web server when OpenHands runs an application.
- Users can interact with the running application.
### Browser
-96
View File
@@ -1,96 +0,0 @@
# Model Context Protocol (MCP)
:::note
This page outlines how to configure and use the Model Context Protocol (MCP) in OpenHands, allowing you to extend the agent's capabilities with custom tools.
:::
## Overview
Model Context Protocol (MCP) is a mechanism that allows OpenHands to communicate with external tool servers. These servers can provide additional functionality to the agent, such as specialized data processing, external API access, or custom tools. MCP is based on the open standard defined at [modelcontextprotocol.io](https://modelcontextprotocol.io).
## Configuration
MCP configuration is defined in the `[mcp]` section of your `config.toml` file.
### Configuration Example
```toml
[mcp]
# SSE Servers - External servers that communicate via Server-Sent Events
sse_servers = [
# Basic SSE server with just a URL
"http://example.com:8080/mcp",
# SSE server with API key authentication
{url="https://secure-example.com/mcp", api_key="your-api-key"}
]
# Stdio Servers - Local processes that communicate via standard input/output
stdio_servers = [
# Basic stdio server
{name="fetch", command="uvx", args=["mcp-server-fetch"]},
# Stdio server with environment variables
{
name="data-processor",
command="python",
args=["-m", "my_mcp_server"],
env={
"DEBUG": "true",
"PORT": "8080"
}
}
]
```
## Configuration Options
### SSE Servers
SSE servers are configured using either a string URL or an object with the following properties:
- `url` (required)
- Type: `str`
- Description: The URL of the SSE server
- `api_key` (optional)
- Type: `str`
- Default: `None`
- Description: API key for authentication with the SSE server
### Stdio Servers
Stdio servers are configured using an object with the following properties:
- `name` (required)
- Type: `str`
- Description: A unique name for the server
- `command` (required)
- Type: `str`
- Description: The command to run the server
- `args` (optional)
- Type: `list of str`
- Default: `[]`
- Description: Command-line arguments to pass to the server
- `env` (optional)
- Type: `dict of str to str`
- Default: `{}`
- Description: Environment variables to set for the server process
## How MCP Works
When OpenHands starts, it:
1. Reads the MCP configuration from `config.toml`
2. Connects to any configured SSE servers
3. Starts any configured stdio servers
4. Registers the tools provided by these servers with the agent
The agent can then use these tools just like any built-in tool. When the agent calls an MCP tool:
1. OpenHands routes the call to the appropriate MCP server
2. The server processes the request and returns a response
3. OpenHands converts the response to an observation and presents it to the agent
@@ -1,38 +0,0 @@
# Keyword-Triggered Microagents
## Purpose
Keyword-triggered microagents provide OpenHands with specific instructions that are activated when certain keywords
appear in the prompt. This is useful for tailoring behavior based on particular tools, languages, or frameworks.
## Usage
These microagents are only loaded when a prompt includes one of the trigger words.
## Frontmatter Syntax
Frontmatter is required for keyword-triggered microagents. It must be placed at the top of the file,
above the guidelines.
Enclose the frontmatter in triple dashes (---) and include the following fields:
| Field | Description | Required | Default |
|------------|--------------------------------------------------|----------|------------------|
| `triggers` | A list of keywords that activate the microagent. | Yes | None |
| `agent` | The agent this microagent applies to. | No | 'CodeActAgent' |
## Example
Keyword-triggered microagent file example located at `.openhands/microagents/yummy.md`:
```
---
triggers:
- yummyhappy
- happyyummy
---
The user has said the magic word. Respond with "That was delicious!"
```
[See examples of microagents triggered by keywords in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)
@@ -1,40 +1,31 @@
# Microagents Overview
Microagents are specialized prompts that enhance OpenHands with domain-specific knowledge.
They provide expert guidance, automate common tasks, and ensure consistent practices across projects.
Microagents are specialized prompts that enhance OpenHands with domain-specific knowledge, repository-specific context
and task-specific workflows. They help by providing expert guidance, automating common tasks, and ensuring
consistent practices across projects.
## Microagent Types
## Microagent Categories
Currently OpenHands supports the following types of microagents:
Currently OpenHands supports two categories of microagents:
- [General Repository Microagents](./microagents-repo): General guidelines for OpenHands about the repository.
- [Keyword-Triggered Microagents](./microagents-keyword): Guidelines activated by specific keywords in prompts.
- [Repository-specific Microagents](./microagents-repo): Repository-specific context and guidelines for OpenHands.
- [Public Microagents](./microagents-public): General guidelines triggered by keywords for all OpenHands users.
To customize OpenHands' behavior, create a .openhands/microagents/ directory in the root of your repository and
add `<microagent_name>.md` files inside.
A microagent is classified as repository-specific or public depending on its location:
:::note
Loaded microagents take up space in the context window.
These microagents, alongside user messages, inform OpenHands about the task and the environment.
:::
- Repository-specific microagents are located in a repository's `.openhands/microagents/` directory
- Public microagents are located in the official OpenHands repository inside the `/microagents` folder
Example repository structure:
When OpenHands works with a repository, it:
```
some-repository/
└── .openhands/
└── microagents/
└── repo.md # General repository guidelines
└── trigger_this.md # Microagent triggered by specific keywords
└── trigger_that.md # Microagent triggered by specific keywords
```
1. Loads **repository-specific** microagents from `.openhands/microagents/` if present in the repository.
2. Loads **public knowledge** microagents triggered by keywords in conversations
3. Loads **public tasks** microagents when explicitly requested by the user
## Microagents Frontmatter Requirements
You can check out the existing public microagents at the [official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/).
Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
is required:
## Microagent Format
| Microagent Type | Required |
|----------------------------------|----------|
| `General Repository Microagents` | No |
| `Keyword-Triggered Microagents` | Yes |
All microagents use markdown files with YAML frontmatter that have special instructions to help OpenHands activate them.
Check out the [syntax documentation](./microagents-syntax) for a comprehensive guide on how to configure your microagents.
@@ -1,17 +1,35 @@
# Global Microagents
# Public Microagents
## Overview
Global microagents are [keyword-triggered microagents](./microagents-keyword) that apply to all OpenHands users. A list of the current
global microagents can be found [in the OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents).
Public microagents provide specialized context and capabilities for all OpenHands users, regardless of their repository configuration. Unlike repository-specific microagents, public microagents are globally available across all repositories.
## Contributing a Global Microagent
Public microagents come in two types:
You can create global microagents and share with the community by opening a pull request to the official repository.
- **Knowledge microagents**: Automatically activated when keywords in conversations match their triggers
- **Task microagents**: Explicitly invoked by users to guide through specific workflows
Both types follow the same syntax and structure as repository-specific microagents, using markdown files with YAML frontmatter that define their behavior and capabilities. They are located in the official OpenHands repository under:
- [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) for knowledge microagents
- [`microagents/tasks/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks) for task microagents
Public microagents:
- Monitor incoming commands for their trigger words.
- Activate when relevant triggers are detected.
- Apply their specialized knowledge and capabilities.
- Follow their specific guidelines and restrictions.
When loading public microagents, OpenHands scans the official repository's microagents directories recursively, processing all markdown files except README.md. The system categorizes each microagent based on its `type` field in the YAML frontmatter, regardless of its exact file location within the knowledge or tasks directories.
## Contributing a Public Microagent
You can create public microagents and share with the community by opening a pull request to the official repository.
See the [CONTRIBUTING.md](https://github.com/All-Hands-AI/OpenHands/blob/main/CONTRIBUTING.md) for specific instructions on how to contribute to OpenHands.
### Global Microagents Best Practices
### Public Microagents Best Practices
- **Clear Scope**: Keep the microagent focused on a specific domain or task.
- **Explicit Instructions**: Provide clear, unambiguous guidelines.
@@ -19,11 +37,11 @@ See the [CONTRIBUTING.md](https://github.com/All-Hands-AI/OpenHands/blob/main/CO
- **Safety First**: Include necessary warnings and constraints.
- **Integration Awareness**: Consider how the microagent interacts with other components.
### Steps to Contribute a Global Microagent
### Steps to Contribute a Public Microagent
#### 1. Plan the Global Microagent
#### 1. Plan the Public Microagent
Before creating a global microagent, consider:
Before creating a public microagent, consider:
- What specific problem or use case will it address?
- What unique capabilities or knowledge should it have?
@@ -33,19 +51,23 @@ Before creating a global microagent, consider:
#### 2. Create File
Create a new Markdown file with a descriptive name in the appropriate directory:
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)
#### 3. Testing the Global Microagent
- [`microagents/knowledge/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) for knowledge microagents
- [`microagents/tasks/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks) for task microagents
- Test the agent with various prompts.
- Verify trigger words activate the agent correctly.
- Ensure instructions are clear and comprehensive.
- Check for potential conflicts and overlaps with existing agents.
Ensure it follows the correct [syntax](./microagents-syntax.md) and [best practices](./microagents-syntax.md#markdown-content-best-practices).
#### 3. Testing the Public Microagent
- Test the agent with various prompts
- Verify trigger words activate the agent correctly
- Ensure instructions are clear and comprehensive
- Check for potential conflicts and overlaps with existing agents
#### 4. Submission Process
Submit a pull request with:
- The new microagent file.
- Updated documentation if needed.
- Description of the agent's purpose and capabilities.
- The new microagent file
- Updated documentation if needed
- Description of the agent's purpose and capabilities
+104 -18
View File
@@ -1,31 +1,117 @@
# General Repository Microagents
# Repository-specific Microagents
## Purpose
## Overview
General guidelines for OpenHands to work more effectively with the repository.
OpenHands can be customized to work more effectively with specific repositories by providing repository-specific context and guidelines.
## Usage
This section explains how to optimize OpenHands for your project.
These microagents are always loaded as part of the context.
## Creating Repository Microagents
## Frontmatter Syntax
You can customize OpenHands' behavior for your repository by creating a `.openhands/microagents/` directory in your repository's root.
The frontmatter for this type of microagent is optional.
You can enhance OpenHands' performance by adding custom microagents to your repository:
Frontmatter should be enclosed in triple dashes (---) and may include the following fields:
1. For overall repository-specific instructions, create a `.openhands/microagents/repo.md` file
2. For reusable domain knowledge triggered by keywords, add multiple `.md` files to `.openhands/microagents/knowledge/`
3. For common workflows and tasks, create multiple `.md` files to `.openhands/microagents/tasks/`
| Field | Description | Required | Default |
|-----------|-----------------------------------------|----------|----------------|
| `agent` | The agent this microagent applies to | No | 'CodeActAgent' |
Check out the [best practices](./microagents-syntax.md#markdown-content-best-practices) for formatting the content of your custom microagent.
## Example
Keep in mind that loaded microagents take up space in the context window. It's crucial to strike a balance between the additional context provided by microagents and the instructions provided in the user's inputs.
Note that you can use OpenHands to create new microagents. The public microagent [`add_agent`](https://github.com/All-Hands-AI/OpenHands/blob/main/microagents/knowledge/add_agent.md) is loaded to all OpenHands instance and can support you on this.
## Types of Microagents
OpenHands supports three primary types of microagents, each with specific purposes and features to enhance agent performance:
- [repository](#repository-microagents)
- [knowledge](#knowledge-microagents)
- [tasks](#tasks-microagents)
The standard directory structure within a repository is:
- One main `repo.md` file containing repository-specific instructions
- Additional `Knowledge` agents in `.openhands/microagents/knowledge/` directory
- Additional `Task` agents in `.openhands/microagents/tasks/` directory
When processing the `.openhands/microagents/` directory, OpenHands will recursively scan all subfolders and process any `.md` files (except `README.md`) it finds. The system determines the microagent type based on the `type` field in the YAML frontmatter, not by the file's location. However, for organizational clarity, it's recommended to follow the standard directory structure.
### Repository Microagents
The `Repository` microagent is loaded specifically from `.openhands/microagents/repo.md` and serves as the main
repository-specific instruction file. This single file is automatically loaded whenever OpenHands works with that repository
without requiring any keyword matching or explicit call from the user.
OpenHands does not support multiple `repo.md` files in different locations or multiple microagents with type `repo`.
If you need to organize different types of repository information, the recommended approach is to use a single `repo.md` file with well-structured sections rather than trying to create multiple microagents with the type `repo`.
The best practice is to include project-specific instructions, team practices, coding standards, and architectural guidelines that are relevant for **all** prompts in that repository.
Example structure:
General repository microagent file example located at `.openhands/microagents/repo.md`:
```
This project is a TODO application that allows users to track TODO items.
To set it up, you can run `npm run build`.
Always make sure the tests are passing before committing changes. You can run the tests by running `npm run test`.
your-repository/
└── .openhands/
└── microagents/
└── repo.md # Repository-specific instructions
```
[See more examples of general repository microagents here.](https://github.com/All-Hands-AI/OpenHands/tree/main/.openhands/microagents)
[See the example in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/blob/main/.openhands/microagents/repo.md?plain=1)
### Knowledge Microagents
Knowledge microagents provide specialized domain expertise:
- Recommended to be located in `.openhands/microagents/knowledge/`
- Triggered by specific keywords in conversations
- Contain expertise on tools, languages, frameworks, and common practices
Use knowledge microagents to trigger additional context relevant to specific technologies, tools, or workflows. For example, mentioning "git" in your conversation will automatically trigger git-related expertise to help with Git operations.
Examples structure:
```
your-repository/
└── .openhands/
└── microagents/
└── knowledge/
└── git.md
└── docker.md
└── python.md
└── ...
└── repo.md
```
You can find several real examples of `Knowledge` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)
### Tasks Microagents
Task microagents guide users through interactive workflows:
- Recommended to be located in `.openhands/microagents/tasks/`
- Provide step-by-step processes for common development tasks
- Accept inputs and adapt to different scenarios
- Ensure consistent outcomes for complex operations
Task microagents are a convenient way to store multi-step processes you perform regularly. For instance, you can create a `update_pr_description.md` microagent to automatically generate better pull request descriptions based on code changes.
Examples structure:
```
your-repository/
└── .openhands/
└── microagents/
└── tasks/
└── update_pr_description.md
└── address_pr_comments.md
└── get_test_to_pass.md
└── ...
└── knowledge/
└── ...
└── repo.md
```
You can find several real examples of `Tasks` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks)
@@ -0,0 +1,128 @@
# Microagents Syntax
Microagents are defined using markdown files with YAML frontmatter that specify their behavior, triggers, and capabilities.
Find below a comprehensive description of the frontmatter syntax and other details about how to use each type of microagent available at OpenHands.
## Frontmatter Schema
Every microagent requires a YAML frontmatter section at the beginning of the file, enclosed by triple dashes (`---`). The fields are:
| Field | Description | Required | Used By |
| ---------- | -------------------------------------------------- | ------------------------ | ---------------- |
| `name` | Unique identifier for the microagent | Yes | All types |
| `type` | Type of microagent: `repo`, `knowledge`, or `task` | Yes | All types |
| `version` | Version number (Semantic versioning recommended) | Yes | All types |
| `agent` | The agent type (typically `CodeActAgent`) | Yes | All types |
| `author` | Creator of the microagent | No | All types |
| `triggers` | List of keywords that activate the microagent | Yes for knowledge agents | Knowledge agents |
| `inputs` | Defines required user inputs for task execution | Yes for task agents | Task agents |
## Core Fields
### `agent`
**Purpose**: Specifies which agent implementation processes the microagent (typically `CodeActAgent`).
- Defines a single agent responsible for processing the microagent
- Must be available in the OpenHands system (see the [agent hub](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/agenthub))
- If the specified agent is not active, the microagent will not be used
### `triggers`
**Purpose**: Defines keywords that activate the `knowledge` microagent.
**Example**:
```yaml
triggers:
- kubernetes
- k8s
- docker
- security
- containers cluster
```
**Key points**:
- Can include both single words and multi-word phrases
- Case-insensitive matching is typically used
- More specific triggers (like "docker compose") prevent false activations
- Multiple triggers increase the chance of activation in relevant contexts
- Unique triggers like "flarglebargle" can be used for testing or special functionality
- Triggers should be carefully chosen to avoid unwanted activations or conflicts with other microagents
- Common terms used in many conversations may cause the microagent to be activated too frequently
When using multiple triggers, the microagent will be activated if any of the trigger words or phrases appear in the
conversation.
### `inputs`
**Purpose**: Defines parameters required from the user when a `task` microagent is activated.
**Schema**:
```yaml
inputs:
- name: INPUT_NAME # Used with {{ INPUT_NAME }}
description: 'Description of what this input is for'
required: true # Optional, defaults to true
```
**Key points**:
- The `name` and `description` properties are required for each input
- The `required` property is optional and defaults to `true`
- Input values are referenced in the microagent body using double curly braces (e.g., `{{ INPUT_NAME }}`)
- All inputs defined will be collected from the user before the task microagent executes
**Variable Usage**: Reference input values using double curly braces `{{ INPUT_NAME }}`.
## Example Formats
### Repository Microagent
Repository microagents provide context and guidelines for a specific repository.
- Located at: `.openhands/microagents/repo.md`
- Automatically loaded when working with the repository
- Only one per repository
The `Repository` microagent is loaded specifically from `.openhands/microagents/repo.md` and serves as the main
repository-specific instruction file. This single file is automatically loaded whenever OpenHands works with that repository
without requiring any keyword matching or explicit call from the user.
[See the example in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/blob/main/.openhands/microagents/repo.md?plain=1)
### Knowledge Microagent
Provides specialized domain expertise triggered by keywords.
You can find several real examples of `Knowledge` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)
### Task Microagent
When explicitly asked by the user, will guide through interactive workflows with specific inputs.
You can find several real examples of `Tasks` microagents in the [offical OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/tasks)
## Markdown Content Best Practices
After the frontmatter, compose the microagent body using Markdown syntax. Examples of elements you can include are:
- Clear, concise instructions outlining the microagent's purpose and responsibilities
- Specific guidelines and constraints the microagent should adhere to
- Relevant code snippets and practical examples to illustrate key points
- Step-by-step procedures for task agents, guiding users through workflows
**Design Tips**:
- Keep microagents focused with a clear purpose
- Provide specific guidelines rather than general advice
- Use distinctive triggers for knowledge agents
- Keep content concise to minimize context window usage
- Break large microagents into smaller, focused ones
Aim for clarity, brevity, and practicality in your writing. Use formatting like bullet points, code blocks, and emphasis to enhance readability and comprehension.
Remember that balancing microagents details with user input space is important for maintaining effective interactions.
@@ -4,38 +4,6 @@
OpenHands only supports Windows via WSL. Please be sure to run all commands inside your WSL terminal.
:::
### Unable to access VS Code tab via local IP
**Description**
When accessing OpenHands through a non-localhost URL (such as a LAN IP address), the VS Code tab shows a "Forbidden" error, while other parts of the UI work fine.
**Resolution**
This happens because VS Code runs on a random high port that may not be exposed or accessible from other machines. To fix this:
1. Set a specific port for VS Code using the `SANDBOX_VSCODE_PORT` environment variable:
```bash
docker run -it --rm \
-e SANDBOX_VSCODE_PORT=41234 \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:latest \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.openhands-state:/.openhands-state \
-p 3000:3000 \
-p 41234:41234 \
--add-host host.docker.internal:host-gateway \
--name openhands-app \
docker.all-hands.dev/all-hands-ai/openhands:latest
```
2. Make sure to expose the same port with `-p 41234:41234` in your Docker command.
3. Alternatively, you can set this in your `config.toml` file:
```toml
[sandbox]
vscode_port = 41234
```
### Launch docker client failed
**Description**
+19 -1534
View File
File diff suppressed because it is too large Load Diff
+4 -11
View File
@@ -4,27 +4,23 @@
"private": true,
"scripts": {
"docusaurus": "docusaurus",
"start": "node generate-swagger-ui.js && docusaurus start",
"build": "node generate-swagger-ui.js && docusaurus build",
"start": "docusaurus start",
"build": "docusaurus build",
"swizzle": "docusaurus swizzle",
"deploy": "docusaurus deploy",
"clear": "docusaurus clear",
"serve": "docusaurus serve",
"write-translations": "docusaurus write-translations",
"write-heading-ids": "docusaurus write-heading-ids",
"typecheck": "tsc",
"generate-swagger-ui": "node generate-swagger-ui.js"
"typecheck": "tsc"
},
"// Note": "The OpenAPI spec is stored in docs/static/openapi.json so it's accessible at /openapi.json in the deployed site",
"dependencies": {
"@docusaurus/core": "^3.7.0",
"@docusaurus/plugin-content-pages": "^3.7.0",
"@docusaurus/preset-classic": "^3.7.0",
"@docusaurus/theme-mermaid": "^3.7.0",
"@mdx-js/react": "^3.1.0",
"@node-rs/jieba": "^2.0.1",
"clsx": "^2.0.0",
"docusaurus-lunr-search": "^3.6.0",
"prism-react-renderer": "^2.4.1",
"react": "^19.1.0",
"react-dom": "^19.1.0",
@@ -35,8 +31,6 @@
"@docusaurus/module-type-aliases": "^3.5.1",
"@docusaurus/tsconfig": "^3.7.0",
"@docusaurus/types": "^3.5.1",
"swagger-cli": "^4.0.4",
"swagger-ui-dist": "^5.21.0",
"typescript": "~5.8.3"
},
"browserslist": {
@@ -53,6 +47,5 @@
},
"engines": {
"node": ">=18.0"
},
"packageManager": "npm@10.5.0"
}
}
+6 -15
View File
@@ -27,11 +27,7 @@ const sidebars: SidebarsConfig = {
label: 'Openhands Cloud',
id: 'usage/cloud/openhands-cloud',
},
{
type: 'doc',
label: 'Cloud API',
id: 'usage/cloud/cloud-api',
},
{
type: 'doc',
label: 'Cloud GitHub Resolver',
@@ -70,18 +66,18 @@ const sidebars: SidebarsConfig = {
},
{
type: 'doc',
label: 'General Repository Microagents',
label: 'Repository-specific',
id: 'usage/prompting/microagents-repo',
},
{
type: 'doc',
label: 'Keyword-Triggered Microagents',
id: 'usage/prompting/microagents-keyword',
label: 'Public',
id: 'usage/prompting/microagents-public',
},
{
type: 'doc',
label: 'Global Microagents',
id: 'usage/prompting/microagents-public',
label: 'Syntax',
id: 'usage/prompting/microagents-syntax',
},
],
},
@@ -215,11 +211,6 @@ const sidebars: SidebarsConfig = {
label: 'Custom Sandbox',
id: 'usage/how-to/custom-sandbox-guide',
},
{
type: 'doc',
label: 'MCP',
id: 'usage/mcp',
}
],
},
{
+1 -1
View File
@@ -8,7 +8,7 @@ function CustomFooter() {
<footer className="custom-footer">
<div className="footer-content">
<div className="footer-icons">
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A" target="_blank" rel="noopener noreferrer">
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw" target="_blank" rel="noopener noreferrer">
<FaSlack />
</a>
<a href="https://discord.gg/ESHStjSjD4" target="_blank" rel="noopener noreferrer">
@@ -45,14 +45,15 @@ export function HomepageHeader() {
<div align="center" className="header-links">
<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors" /></a>
<a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers" /></a>
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
<br/>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" /></a>
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
<br/>
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" /></a>
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
</div>
</div>
</div>
-15
View File
@@ -1,15 +0,0 @@
# Static Files for OpenHands Documentation
This directory contains static files that are copied directly to the build output of the Docusaurus documentation.
## OpenAPI Specification
The `openapi.json` file in this directory is the OpenAPI specification for the OpenHands API. It is copied to the build output and is accessible at `/openapi.json` in the deployed site.
This file is used by the Swagger UI interface, which is accessible at `/swagger-ui/` in the deployed site.
## Why is the OpenAPI spec in the static directory?
The OpenAPI specification is placed in the static directory so that it's accessible at a predictable URL in the deployed site. This allows the Swagger UI to reference it directly.
We only need one copy of the OpenAPI spec file, which is this one in the static directory.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 144 KiB

After

Width:  |  Height:  |  Size: 121 KiB

-2086
View File
File diff suppressed because it is too large Load Diff
+9831
View File
File diff suppressed because it is too large Load Diff
@@ -1,65 +0,0 @@
# Multi-swe-bench Evaluation with OpenHands
## LLM Setup
Please follow [here](../../README.md#setup).
## Dataset Preparing
Please download the [**Multi-SWE-Bench** dataset](https://huggingface.co/datasets/bytedance-research/Multi-SWE-Bench).
And change the dataset following [script](scripts/data/data_change.py).
```bash
python evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py
```
## Docker image download
Please download the multi-swe-bench dokcer images from [here](https://github.com/multi-swe-bench/multi-swe-bench?tab=readme-ov-file#run-evaluation).
## Generate patch
Please edit the [script](infer.sh) and run it.
```bash
bash evaluation/benchmarks/multi_swe_bench/infer.sh
```
Script variable explanation:
- `models`, e.g. `llm.eval_gpt4_1106_preview`, is the config group name for your
LLM settings, as defined in your `config.toml`.
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
like to evaluate. It could also be a release tag like `0.6.2`.
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting to `CodeActAgent`.
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
default, the script evaluates the (500 issues), which will no exceed the maximum of the dataset number.
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
default, it is set to 50.
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
default, it is set to 1.
- `language`, the language of your evaluating dataset.
- `dataset`, the absolute position of the dataset jsonl.
The results will be generated in evaluation/evaluation_outputs/outputs/XXX/CodeActAgent/YYY/output.jsonl, you can refer to the [example](examples/output.jsonl).
## Runing evaluation
First, install [multi-swe-bench](https://github.com/multi-swe-bench/multi-swe-bench).
```bash
pip install multi-swe-bench
```
Second, convert the output.jsonl to patch.jsonl with [script](scripts/eval/convert.py), you can refer to the [example](examples/patch.jsonl).
```bash
python evaluation/benchmarks/multi_swe_bench/scripts/eval/convert.py
```
Finally, evaluate with multi-swe-bench.
The config file config.json can be refer to the [example](examples/config.json) or [github](https://github.com/multi-swe-bench/multi-swe-bench/tree/main?tab=readme-ov-file#configuration-file-example).
```bash
python -m multi_swe_bench.harness.run_evaluation --config config.json
```
@@ -1,456 +0,0 @@
import json
import os
import subprocess
import tempfile
import time
from functools import partial
import pandas as pd
from swebench.harness.grading import get_eval_report
from swebench.harness.run_evaluation import (
APPLY_PATCH_FAIL,
APPLY_PATCH_PASS,
)
from swebench.harness.test_spec import SWEbenchInstance, TestSpec, make_test_spec
from swebench.harness.utils import load_swebench_dataset
from tqdm import tqdm
from evaluation.benchmarks.swe_bench.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.benchmarks.swe_bench.run_infer import get_instance_docker_image
from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
get_default_sandbox_config_for_eval,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.core.config import (
AppConfig,
LLMConfig,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.utils.async_utils import call_async_from_sync
# TODO: migrate all swe-bench docker to ghcr.io/openhands
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
def process_git_patch(patch):
if not isinstance(patch, str):
return ''
if not patch.strip():
# skip empty patches
return ''
patch = patch.replace('\r\n', '\n')
# There might be some weird characters at the beginning of the patch
# due to some OpenHands inference command outputs
# FOR EXAMPLE:
# git diff --no-color --cached 895f28f9cbed817c00ab68770433170d83132d90
# 0
# diff --git a/django/db/models/sql/.backup.query.py b/django/db/models/sql/.backup.query.py
# new file mode 100644
# index 0000000000..fc13db5948
# We "find" the first line that starts with "diff" and then we remove lines before it
lines = patch.split('\n')
for i, line in enumerate(lines):
if line.startswith('diff --git'):
patch = '\n'.join(lines[i:])
break
patch = patch.rstrip() + '\n' # Make sure the last line ends with a newline
return patch
def get_config(metadata: EvalMetadata, instance: pd.Series) -> AppConfig:
# We use a different instance image for the each instance of swe-bench eval
base_container_image = get_instance_docker_image(instance['instance_id'])
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = AppConfig(
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
return config
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
log_dir: str | None = None,
runtime_failure_count: int = 0,
) -> EvalOutput:
"""
Evaluate agent performance on a SWE-bench problem instance.
Note that this signature differs from the expected input to `run_evaluation`. Use
`functools.partial` to provide optional arguments before passing to the evaluation harness.
Args:
log_dir (str | None, default=None): Path to directory where log files will be written. Must
be provided if `reset_logger` is set.
Raises:
AssertionError: if the `reset_logger` flag is set without a provided log directory.
"""
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
assert (
log_dir is not None
), "Can't reset logger without a provided log directory."
os.makedirs(log_dir, exist_ok=True)
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
config = get_config(metadata, instance)
instance_id = instance.instance_id
model_patch = instance['model_patch']
test_spec: TestSpec = instance['test_spec']
logger.info(f'Starting evaluation for instance {instance_id}.')
if 'test_result' not in instance.keys():
instance['test_result'] = {}
instance['test_result']['report'] = {
'empty_generation': False,
'resolved': False,
'failed_apply_patch': False,
'error_eval': False,
'test_timeout': False,
}
if model_patch == '':
instance['test_result']['report']['empty_generation'] = True
return EvalOutput(
instance_id=instance_id,
test_result=instance['test_result'],
metadata=metadata,
)
# Increase resource_factor with increasing attempt_id
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
8,
)
logger.warning(
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
try:
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
# Get patch and save it to /tmp/patch.diff
with tempfile.TemporaryDirectory() as temp_dir:
# Patch file
patch_file_path = os.path.join(temp_dir, 'patch.diff')
with open(patch_file_path, 'w') as f:
f.write(model_patch)
runtime.copy_to(patch_file_path, '/tmp')
# Eval script
eval_script_path = os.path.join(temp_dir, 'eval.sh')
with open(eval_script_path, 'w') as f:
f.write(test_spec.eval_script)
runtime.copy_to(eval_script_path, '/tmp')
# Set +x
action = CmdRunAction(command='chmod +x /tmp/eval.sh')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
# Apply patch
exec_command = (
'cd /testbed && '
"(git apply -v /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
"(echo 'Failed to apply patch with git apply, trying with patch command...' && "
"(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
"echo 'APPLY_PATCH_FAIL')))"
)
action = CmdRunAction(command=exec_command)
action.set_hard_timeout(600)
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
apply_patch_output = obs.content
assert isinstance(apply_patch_output, str)
instance['test_result']['apply_patch_output'] = apply_patch_output
if 'APPLY_PATCH_FAIL' in apply_patch_output:
logger.info(f'[{instance_id}] {APPLY_PATCH_FAIL}:\n{apply_patch_output}')
instance['test_result']['report']['failed_apply_patch'] = True
return EvalOutput(
instance_id=instance_id,
test_result=instance['test_result'],
metadata=metadata,
)
elif 'APPLY_PATCH_PASS' in apply_patch_output:
logger.info(f'[{instance_id}] {APPLY_PATCH_PASS}:\n{apply_patch_output}')
# Run eval script in background and save output to log file
log_file = '/tmp/eval_output.log'
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
action.set_hard_timeout(300) # Short timeout just to get the process ID
obs = runtime.run_action(action)
if isinstance(obs, CmdOutputObservation) and obs.exit_code == 0:
pid = obs.content.split()[-1].strip()
logger.info(
f'[{instance_id}] Evaluation process started with PID: {pid}'
)
# Poll for completion
start_time = time.time()
timeout = 1800 # 30 minutes
while True:
seconds_elapsed = time.time() - start_time
if seconds_elapsed > timeout:
logger.info(
f'[{instance_id}] Evaluation timed out after {timeout} seconds'
)
instance['test_result']['report']['test_timeout'] = True
break
check_action = CmdRunAction(
command=f'ps -p {pid} > /dev/null; echo $?'
)
check_action.set_hard_timeout(300)
check_obs = runtime.run_action(check_action)
if (
isinstance(check_obs, CmdOutputObservation)
and check_obs.content.split()[-1].strip() == '1'
):
logger.info(
f'[{instance_id}] Evaluation process completed after {seconds_elapsed} seconds'
)
break
logger.info(
f'[{instance_id}] [{seconds_elapsed:.0f}s] Evaluation still running, waiting...'
)
time.sleep(30) # Wait for 30 seconds before checking again
# Read the log file
cat_action = CmdRunAction(command=f'cat {log_file}')
cat_action.set_hard_timeout(300)
cat_obs = runtime.run_action(cat_action)
# Grade answer
if isinstance(cat_obs, CmdOutputObservation) and cat_obs.exit_code == 0:
test_output = cat_obs.content
assert isinstance(test_output, str)
instance['test_result']['test_output'] = test_output
# Get report from test output
logger.info(f'[{instance_id}] Grading answer...')
with tempfile.TemporaryDirectory() as temp_dir:
# Create a directory structure that matches the expected format
# NOTE: this is a hack to make the eval report format consistent
# with the original SWE-Bench eval script
log_dir = os.path.join(temp_dir, 'logs', instance_id.lower())
os.makedirs(log_dir, exist_ok=True)
test_output_path = os.path.join(log_dir, 'test_output.txt')
with open(test_output_path, 'w') as f:
f.write(test_output)
try:
_report = get_eval_report(
test_spec=test_spec,
prediction={
'model_patch': model_patch,
'instance_id': instance_id,
},
log_path=test_output_path,
include_tests_status=True,
)
report = _report[instance_id]
logger.info(
f"[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report['resolved']}"
)
instance['test_result']['report']['resolved'] = report[
'resolved'
]
except Exception as e:
logger.error(
f'[{instance_id}] Error when getting eval report: {e}'
)
instance['test_result']['report']['resolved'] = False
instance['test_result']['report']['error_eval'] = True
else:
logger.info(f'[{instance_id}] Error when starting eval:\n{obs.content}')
instance['test_result']['report']['error_eval'] = True
return EvalOutput(
instance_id=instance_id,
test_result=instance['test_result'],
metadata=metadata,
)
else:
logger.info(
f'[{instance_id}] Unexpected output when applying patch:\n{apply_patch_output}'
)
raise RuntimeError(
instance_id,
f'Unexpected output when applying patch:\n{apply_patch_output}',
logger,
)
finally:
runtime.close()
if __name__ == '__main__':
parser = get_parser()
parser.add_argument(
'--input-file',
type=str,
help='Path to input predictions file',
required=True,
)
parser.add_argument(
'--dataset',
type=str,
default='princeton-nlp/SWE-bench',
help='data set to evaluate on, either full-test or lite-test',
)
parser.add_argument(
'--split',
type=str,
default='test',
help='split to evaluate on',
)
args, _ = parser.parse_known_args()
# Load SWE-Bench dataset
full_dataset: list[SWEbenchInstance] = load_swebench_dataset(
args.dataset, args.split
)
instance_id_to_instance = {
instance['instance_id']: instance for instance in full_dataset
}
logger.info(
f'Loaded dataset {args.dataset} with split {args.split} to run inference on.'
)
# Load predictions
assert args.input_file.endswith('.jsonl'), 'Input file must be a jsonl file.'
required_fields = ['instance_id', 'model_patch', 'test_result']
with open(args.input_file) as f:
predictions = pd.DataFrame.from_records(
[
{k: v for k, v in json.loads(line).items() if k in required_fields}
for line in tqdm(f, desc='Loading predictions')
]
)
assert (
'instance_id' in predictions.columns
), 'Input file must contain instance_id column.'
if 'model_patch' not in predictions.columns and (
'test_result' in predictions.columns
and 'model_patch' in predictions['test_result'].iloc[0]
):
raise ValueError(
'Input file must contain model_patch column OR test_result column with model_patch field.'
)
assert len(predictions['instance_id'].unique()) == len(
predictions
), 'instance_id column must be unique.'
if 'model_patch' not in predictions.columns:
predictions['model_patch'] = predictions['test_result'].apply(
lambda x: x.get('git_patch', '')
)
assert {'instance_id', 'model_patch'}.issubset(
set(predictions.columns)
), 'Input file must contain instance_id and model_patch columns.'
# Process model_patch
predictions['model_patch'] = predictions['model_patch'].apply(process_git_patch)
# Merge predictions with dataset
predictions['instance'] = predictions['instance_id'].apply(
lambda x: instance_id_to_instance[x]
)
predictions['test_spec'] = predictions['instance'].apply(make_test_spec)
# Prepare dataset
output_file = args.input_file.replace('.jsonl', '.swebench_eval.jsonl')
instances = prepare_dataset(predictions, output_file, args.eval_n_limit)
# If possible, load the relevant metadata to avoid issues with `run_evaluation`.
metadata: EvalMetadata | None = None
metadata_filepath = os.path.join(os.path.dirname(args.input_file), 'metadata.json')
if os.path.exists(metadata_filepath):
with open(metadata_filepath, 'r') as metadata_file:
data = metadata_file.read()
metadata = EvalMetadata.model_validate_json(data)
else:
# Initialize with a dummy metadata when file doesn't exist
metadata = EvalMetadata(
agent_class='dummy_agent', # Placeholder agent class
llm_config=LLMConfig(model='dummy_model'), # Minimal LLM config
max_iterations=1, # Minimal iterations
eval_output_dir=os.path.dirname(
args.input_file
), # Use input file dir as output dir
start_time=time.strftime('%Y-%m-%d %H:%M:%S'), # Current time
git_commit=subprocess.check_output(['git', 'rev-parse', 'HEAD'])
.decode('utf-8')
.strip(), # Current commit
dataset=args.dataset, # Dataset name from args
)
# The evaluation harness constrains the signature of `process_instance_func` but we need to
# pass extra information. Build a new function object to avoid issues with multiprocessing.
process_instance_func = partial(
process_instance, log_dir=output_file.replace('.jsonl', '.logs')
)
run_evaluation(
instances,
metadata=metadata,
output_file=output_file,
num_workers=args.eval_num_workers,
process_instance_func=process_instance_func,
)
# Load evaluated predictions & print number of resolved predictions
evaluated_predictions = pd.read_json(output_file, lines=True)
fields = ['resolved', 'failed_apply_patch', 'error_eval', 'empty_generation']
def count_report_field(row, field):
return row['test_result']['report'][field]
report = {}
for field in fields:
count = evaluated_predictions.apply(
count_report_field, args=(field,), axis=1
).sum()
report[field] = count
logger.info(
f'# {field}: {count} / {len(evaluated_predictions)}. ({count / len(evaluated_predictions):.2%})'
)
@@ -1,24 +0,0 @@
{
"mode": "evaluation",
"workdir": "./data/workdir",
"patch_files": [
"./data/patches/<your_patch_file>.jsonl"
],
"dataset_files": [
"./data/patches/<to_evaluate_dataset_file>.jsonl"
],
"force_build": false,
"output_dir": "./data/dataset",
"specifics": [],
"skips": [],
"repo_dir": "./data/repos",
"need_clone": false,
"global_env": [],
"clear_env": true,
"stop_on_error": true,
"max_workers": 8,
"max_workers_build_image": 8,
"max_workers_run_instance": 8,
"log_dir": "./data/logs",
"log_level": "DEBUG"
}
File diff suppressed because one or more lines are too long
@@ -1,3 +0,0 @@
{"org": "ponylang", "repo": "ponyc", "number": "4595", "fix_patch": "diff --git a/src/libponyc/ast/parser.c b/src/libponyc/ast/parser.c\nindex 9852922f..2c37d6b8 100644\n--- a/src/libponyc/ast/parser.c\n+++ b/src/libponyc/ast/parser.c\n@@ -693,6 +693,7 @@ DEF(idseqsingle);\n AST_NODE(TK_LET);\n TOKEN(\"variable name\", TK_ID);\n AST_NODE(TK_NONE); // Type\n+ SET_FLAG(AST_FLAG_IN_PARENS);\n DONE();\n \n // idseq"}
{"org": "ponylang", "repo": "ponyc", "number": "4593", "fix_patch": "diff --git a/packages/cli/command_parser.pony b/packages/cli/command_parser.pony\nindex a5acce8e..fa97808b 100644\n--- a/packages/cli/command_parser.pony\n+++ b/packages/cli/command_parser.pony\n@@ -100,6 +100,7 @@ class CommandParser\n | let cs: CommandSpec box =>\n return CommandParser._sub(cs, this).\n _parse_command(tokens, options, args, envsmap, opt_stop)\n+// Correctly handle parent default options\n end\n else\n return SyntaxError(token, \"unknown command\")"}
{"org": "ponylang", "repo": "ponyc", "number": "4588", "fix_patch": "diff --git a/src/libponyc/expr/match.c b/src/libponyc/expr/match.c\nindex 7d16066f..c2ec7056 100644\n--- a/src/libponyc/expr/match.c\n+++ b/src/libponyc/expr/match.c\n@@ -314,8 +314,10 @@ static ast_t* make_pattern_type(pass_opt_t* opt, ast_t* pattern)\n case TK_DONTCAREREF:\n case TK_MATCH_CAPTURE:\n case TK_MATCH_DONTCARE:\n+ if (ast_id(pattern_type) == TK_ISO) pattern_type = set_cap_and_ephemeral(pattern_type, TK_TRN, TK_EPHEMERAL);\n return pattern_type;\n \n+\n case TK_TUPLE:\n {\n ast_t* pattern_child = ast_child(pattern);"}
@@ -1,32 +0,0 @@
#!/bin/bash
BASE_SCRIPT="./evaluation/benchmarks/multi_swe_bench/scripts/run_infer.sh"
MODELS=("aaa" "bbb" "ccc" "ddd" "fff")
GIT_VERSION="HEAD"
AGENT_NAME="CodeActAgent"
EVAL_LIMIT="500"
MAX_ITER="50"
NUM_WORKERS="1"
LANGUAGE="XXX"
DATASET="XXX"
for MODEL in "${MODELS[@]}"; do
echo "=============================="
echo "Running benchmark for MODEL: $MODEL"
echo "=============================="
$BASE_SCRIPT \
"$MODEL" \
"$GIT_VERSION" \
"$AGENT_NAME" \
"$EVAL_LIMIT" \
"$MAX_ITER" \
"$NUM_WORKERS" \
"$DATASET" \
"$LANGUAGE"
echo "Completed $MODEL"
done
@@ -1,39 +0,0 @@
"""Mapping instance_id to resource_factor.
Different instances may have different resource requirements.
e.g., some instances may require more memory/CPU to run inference.
This file tracks the resource requirements of different instances.
"""
import json
import os
from openhands.core.logger import openhands_logger as logger
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_RUNTIME_RESOURCE_FACTOR = int(
os.environ.get('DEFAULT_RUNTIME_RESOURCE_FACTOR', 1)
)
# dataset to resource mapping
_global_resource_mapping: dict[str, dict[str, float]] = {}
def get_resource_mapping(dataset_name: str) -> dict[str, float]:
if dataset_name not in _global_resource_mapping:
file_path = os.path.join(CUR_DIR, f'{dataset_name}.json')
if not os.path.exists(file_path):
logger.warning(f'Resource mapping for {dataset_name} not found.')
return None
with open(file_path, 'r') as f:
_global_resource_mapping[dataset_name] = json.load(f)
logger.info(f'Loaded resource mapping for {dataset_name}')
return _global_resource_mapping[dataset_name]
def get_instance_resource_factor(dataset_name: str, instance_id: str) -> int:
resource_mapping = get_resource_mapping(dataset_name)
if resource_mapping is None:
return DEFAULT_RUNTIME_RESOURCE_FACTOR
return int(resource_mapping.get(instance_id, DEFAULT_RUNTIME_RESOURCE_FACTOR))
@@ -1,847 +0,0 @@
import asyncio
import json
import os
import tempfile
from typing import Any
import pandas as pd
import toml
from datasets import load_dataset
import openhands.agenthub
from evaluation.benchmarks.swe_bench.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
assert_and_raise,
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
AppConfig,
get_llm_config_arg,
get_parser,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, FileReadAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'true').lower() == 'true'
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
# TODO: migrate all swe-bench docker to ghcr.io/openhands
# TODO: 适应所有的语言
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', '')
LANGUAGE = os.environ.get('LANGUAGE', 'python')
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
}
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
return f'{instance.repo}__{instance.version}'.replace('/', '__')
def get_instruction(instance: pd.Series, metadata: EvalMetadata):
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
# Prepare instruction
# Instruction based on Anthropic's official trajectory
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
instructions = {
'python': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'java': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a Java code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
"Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n"
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Java environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
"Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n"
"Follow these steps to resolve the issue:\n"
"1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n"
'2. Create a Java class to reproduce the error and execute it by first compiling with `javac <classname>.java` and then running with `java <classname>` using the BashTool, to confirm the error\n'
"3. Edit the sourcecode of the repo to resolve the issue.\n"
"4. Rerun your reproduce script or class and confirm that the error is fixed!\n"
"5. Think about edgecases, add comprehensive tests for them in your reproduce class or script, and run them to make sure your fix handles these cases as well.\n"
f"6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance['base_commit']}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n"
" - The issue you are fixing\n"
" - The files you modified\n"
" - The functions or classes you changed\n"
" Make sure all these tests pass with your changes.\n"
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'go': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a Go code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Go environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script or a function to reproduce the error and execute it with `go run <filename.go>` using the BashTool, to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'c': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a C code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development C environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script to reproduce the error by compiling your C code (for example, using `gcc <filename.c> -o <executable>`) and then running the executable using the BashTool, to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'cpp': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a C++ code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development C++ environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create or adapt a small executable (e.g., a main file or a test driver) to reproduce the issue. Build and run it (for example, by using `g++ -o reproduce reproduce.cpp && ./reproduce` via the BashTool) to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'javascript': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a Javascript code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Javascript environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script to reproduce the error and execute it with `node <filename.js>` using the BashTool, to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'typescript': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a Typescript code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Typescript environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a script to reproduce the error and execute it with `ts-node <filename.ts>` using the BashTool, to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'rust': (
'<uploaded_files>\n'
f'/workspace/{workspace_dir_name}\n'
'</uploaded_files>\n'
f"I've uploaded a Rust code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Rust environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a reproduction script (or binary) that triggers the error and execute it with `cargo run --bin <filename>` using the BashTool, to confirm the error.\n'
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
}
instruction = instructions.get(LANGUAGE.lower())
if instruction and RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\n'
'You SHOULD NEVER attempt to browse the web. '
'</IMPORTANT!>\n'
)
return instruction
# TODO: 适应所有的语言
# def get_instance_docker_image(instance_id: str) -> str:
# image_name = 'sweb.eval.x86_64.' + instance_id
# if LANGUAGE == 'python':
# image_name = image_name.replace(
# '__', '_s_'
# ) # to comply with docker image naming convention
# return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
# else:
# return image_name.lower() ##加载本地的
def get_instance_docker_image(instance: pd.Series):
if LANGUAGE == 'python':
image_name = 'sweb.eval.x86_64.' + instance['instance_id']
image_name = image_name.replace(
'__', '_s_'
) # to comply with docker image naming convention
return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
else:
container_name = instance.get('repo', '').lower()
container_name = container_name.replace('/', '_m_')
instance_id = instance.get('instance_id', '')
tag_suffix = instance_id.split('-')[-1] if instance_id else ''
container_tag = f'pr-{tag_suffix}'
# pdb.set_trace()
return f'mswebench/{container_name}:{container_tag}'
# return "kong/insomnia:pr-8284"
# return "'sweb.eval.x86_64.local_insomnia"
# return "local_insomnia_why"
# return "local/kong-insomnia:pr-8117"
def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> AppConfig:
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
if USE_INSTANCE_IMAGE:
# We use a different instance image for the each instance of swe-bench eval
# base_container_image = get_instance_docker_image(instance['instance_id'])
base_container_image = get_instance_docker_image(instance)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
else:
base_container_image = SWE_BENCH_CONTAINER_IMAGE
logger.info(f'Using swe-bench container image: {base_container_image}')
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
sandbox_config.enable_auto_lint = True
sandbox_config.use_host_network = False
# Add platform to the sandbox config to solve issue 4401
sandbox_config.platform = 'linux/amd64'
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
)
)
agent_config = AgentConfig(
enable_jupyter=False,
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=False,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
config.set_agent_config(agent_config)
return config
def initialize_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required
):
"""Initialize the runtime for the agent.
This function is called before the runtime is used to run the agent.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Initialization Fn')
logger.info('-' * 30)
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
obs: CmdOutputObservation
REPO_NAME = instance['repo'].split('/')[-1]
# Set instance id
action = CmdRunAction(
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && echo 'export REPO_NAME={REPO_NAME}' >> ~/.bashrc"""
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
)
# pdb.set_trace()
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
if USE_INSTANCE_IMAGE:
# inject the init script
script_dir = os.path.dirname(__file__)
# inject the instance info
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
)
swe_instance_json_name = 'swe-bench-instance.json'
with tempfile.TemporaryDirectory() as temp_dir:
# Construct the full path for the desired file name within the temporary directory
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
# Write to the file with the desired name within the temporary directory
with open(temp_file_path, 'w') as f:
if not isinstance(instance, dict):
json.dump([instance.to_dict()], f)
else:
json.dump([instance], f)
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
# inject the instance swe entry
runtime.copy_to(
str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
'/swe_util/',
)
action = CmdRunAction(command='cat ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if isinstance(obs, ErrorObservation):
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
)
else:
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
action.set_hard_timeout(1800)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/swe_entry.sh: {str(obs)}',
)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git reset --hard')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
action = CmdRunAction(
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
##TODO:这里看看需不需要判断其他语言的环境
# action = CmdRunAction(command='which python')
# action.set_hard_timeout(600)
# logger.info(action, extra={'msg_type': 'ACTION'})
# obs = runtime.run_action(action)
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# assert_and_raise(
# obs.exit_code == 0 and 'testbed' in obs.content,
# f'Expected to find python interpreter from testbed, but got: {str(obs)}',
# )
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
logger.info('-' * 30)
def complete_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
) -> dict[str, Any]:
"""Complete the runtime for the agent.
This function is called before the runtime is used to run the agent.
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Completion Fn')
logger.info('-' * 30)
obs: CmdOutputObservation
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if obs.exit_code == -1:
# The previous command is still running
# We need to kill previous command
logger.info('The previous command is still running, trying to kill it...')
action = CmdRunAction(command='C-c')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Then run the command again
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git config --global core.pager ""')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git config --global core.pager "": {str(obs)}',
)
action = CmdRunAction(command='git add -A')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git add -A: {str(obs)}',
)
##删除二进制文件
action = CmdRunAction(
command="""
for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
git rm -f "$file" 2>/dev/null || rm -f "$file"
echo "Removed: $file"
fi
done
"""
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to remove binary files: {str(obs)}',
)
# pdb.set_trace()
n_retries = 0
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff'
)
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
n_retries += 1
if isinstance(obs, CmdOutputObservation):
if obs.exit_code == 0:
# git_patch = obs.content.strip()
break
else:
logger.info('Failed to get git diff, retrying...')
sleep_if_should_continue(10)
elif isinstance(obs, ErrorObservation):
logger.error(f'Error occurred: {obs.content}. Retrying...')
sleep_if_should_continue(10)
else:
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
action = FileReadAction(path='patch.diff')
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
git_patch = obs.content
# pdb.set_trace()
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
logger.info('-' * 30)
logger.info('END Runtime Completion Fn')
logger.info('-' * 30)
return {'git_patch': git_patch}
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
runtime_failure_count: int = 0,
) -> EvalOutput:
config = get_config(instance, metadata)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
# Increase resource_factor with increasing attempt_id
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
8,
)
logger.warning(
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
# pdb.set_trace()
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)
instruction = get_instruction(instance, metadata)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction),
runtime=runtime,
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
metadata.agent_class
],
)
)
# if fatal error, throw EvalError to trigger re-run
if is_fatal_evaluation_error(state.last_error):
raise EvalException('Fatal error detected: ' + state.last_error)
# ======= THIS IS SWE-Bench specific =======
# Get git patch
return_val = complete_runtime(runtime, instance)
git_patch = return_val['git_patch']
logger.info(
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
)
finally:
runtime.close()
# ==========================================
# ======= Attempt to evaluate the agent's edits =======
# we use eval_infer.sh to evaluate the agent's edits, not here
# because the agent may alter the environment / testcases
###remove binary diffs
def remove_binary_diffs(patch_text):
lines = patch_text.splitlines()
cleaned_lines = []
block = []
is_binary_block = False
for line in lines:
if line.startswith('diff --git '):
if block and not is_binary_block:
cleaned_lines.extend(block)
block = [line]
is_binary_block = False
elif 'Binary files' in line:
is_binary_block = True
block.append(line)
else:
block.append(line)
if block and not is_binary_block:
cleaned_lines.extend(block)
return '\n'.join(cleaned_lines)
git_patch = remove_binary_diffs(git_patch)
test_result = {
'git_patch': git_patch,
}
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
# Save the output
output = EvalOutput(
instance_id=instance.instance_id,
instruction=instruction,
instance=instance.to_dict(), # SWE Bench specific
test_result=test_result,
metadata=metadata,
history=histories,
metrics=metrics,
error=state.last_error if state and state.last_error else None,
)
return output
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
if os.path.exists(file_path):
with open(file_path, 'r') as file:
data = toml.load(file)
if 'selected_ids' in data:
selected_ids = data['selected_ids']
logger.info(
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
)
subset = dataset[dataset[filter_column].isin(selected_ids)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
if len(skip_ids) > 0:
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
return dataset[~dataset[filter_column].isin(skip_ids)]
return dataset
if __name__ == '__main__':
# pdb.set_trace()
parser = get_parser()
parser.add_argument(
'--dataset',
type=str,
default='princeton-nlp/SWE-bench',
help='data set to evaluate on, either full-test or lite-test',
)
parser.add_argument(
'--split',
type=str,
default='test',
help='split to evaluate on',
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
# so we don't need to manage file uploading to OpenHands's repo
# dataset = load_dataset(args.dataset, split=args.split)
# dataset = load_dataset(args.dataset)
dataset = load_dataset('json', data_files=args.dataset)
dataset = dataset[args.split]
swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
logger.info(
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
)
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config.log_completions = True
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
details = {}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
dataset_descrption = (
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
)
metadata = make_metadata(
llm_config,
dataset_descrption,
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
details=details,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
print(f'### OUTPUT FILE: {output_file} ###')
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
if len(instances) > 0 and not isinstance(
instances['FAIL_TO_PASS'][instances['FAIL_TO_PASS'].index[0]], str
):
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
instances[col] = instances[col].apply(lambda x: str(x))
# if LANGUAGE == "java": ##TODO:适配多语言的版本
# for col in ['issue_numbers', 'created_at']:
# instances[col] = instances[col].apply(lambda x: str(x))
run_evaluation(
instances,
metadata,
output_file,
args.eval_num_workers,
process_instance,
timeout_seconds=120 * 60, # 2 hour PER instance should be more than enough
max_retries=5,
)
@@ -1,36 +0,0 @@
import json
input_file = 'XXX.jsonl'
output_file = 'YYY.jsonl'
with open(input_file, 'r', encoding='utf-8') as fin, open(
output_file, 'w', encoding='utf-8'
) as fout:
for line in fin:
line = line.strip()
if not line:
continue
data = json.loads(line)
item = data
# 提取原始数据
org = item.get('org', '')
repo = item.get('repo', '')
number = str(item.get('number', ''))
new_item = {}
new_item['repo'] = f'{org}/{repo}'
new_item['instance_id'] = f'{org}__{repo}-{number}'
new_item['problem_statement'] = (
item['resolved_issues'][0].get('title', '')
+ '\n'
+ item['resolved_issues'][0].get('body', '')
)
new_item['FAIL_TO_PASS'] = []
new_item['PASS_TO_PASS'] = []
new_item['base_commit'] = item['base'].get('sha', '')
new_item['version'] = '0.1' # depends
output_data = new_item
fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')
@@ -1,24 +0,0 @@
import json
import re
IN_FILE = 'output.jsonl'
OUT_FILE = 'patch.jsonl'
def main():
with open(IN_FILE, 'r') as fin:
with open(OUT_FILE, 'w') as fout:
for line in fin:
data = json.loads(line)
groups = re.match(r'(.*)__(.*)-(.*)', data['instance_id'])
patch = {
'org': groups.group(1),
'repo': groups.group(2),
'number': groups.group(3),
'fix_patch': data['test_result']['git_patch'],
}
fout.write(json.dumps(patch) + '\n')
if __name__ == '__main__':
main()
@@ -1,155 +0,0 @@
#!/bin/bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
COMMIT_HASH=$2
AGENT=$3
EVAL_LIMIT=$4
MAX_ITER=$5
NUM_WORKERS=$6
DATASET=$7
# SPLIT=$8
LANGUAGE=$8
# N_RUNS=$10
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
fi
checkout_eval_branch
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
fi
if [ -z "$USE_INSTANCE_IMAGE" ]; then
echo "USE_INSTANCE_IMAGE not specified, use default true"
USE_INSTANCE_IMAGE=true
fi
if [ -z "$RUN_WITH_BROWSING" ]; then
echo "RUN_WITH_BROWSING not specified, use default false"
RUN_WITH_BROWSING=false
fi
if [ -z "$DATASET" ]; then
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
DATASET="princeton-nlp/SWE-bench_Lite"
fi
if [ -z "$LANGUAGE" ]; then
echo "LANUGUAGE not specified, use default python"
LANGUAGE="python"
fi
if [ -z "$SPLIT" ]; then
echo "LANUGUAGE not specified, use default python"
SPLIT="train"
fi
##TODO:适配多语言的版本
# if [ -z "$SPLIT" ]; then
# if [ "$LANGUAGE" = "python" ]; then
# echo "SPLIT is test as LANUGUAGE is python"
# SPLIT="test"
# elif [ "$LANGUAGE" = "java" ]; then
# echo "SPLIT is java_verified as LANUGUAGE is java"
# SPLIT="java_verified"
# fi
# fi
if [ -z "$EVAL_DOCKER_IMAGE_PREFIX" ]; then
if [ "$LANGUAGE" = "python" ]; then
echo "EVAL_DOCKER_IMAGE_PREFIX is docker.io/xingyaoww/ as default as LANUGUAGE is python"
EVAL_DOCKER_IMAGE_PREFIX="docker.io/xingyaoww/"
elif [ "$LANGUAGE" = "java" ]; then
echo "EVAL_DOCKER_IMAGE_PREFIX is java_verified as LANUGUAGE is java"
EVAL_DOCKER_IMAGE_PREFIX=""
fi
fi
export EVAL_DOCKER_IMAGE_PREFIX=$EVAL_DOCKER_IMAGE_PREFIX
echo "EVAL_DOCKER_IMAGE_PREFIX: $EVAL_DOCKER_IMAGE_PREFIX"
export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
export LANGUAGE=$LANGUAGE
echo "LANGUAGE: $LANGUAGE"
get_openhands_version
echo "AGENT: $AGENT"
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "SPLIT: $SPLIT"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
export USE_HINT_TEXT=false
fi
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
EVAL_NOTE="$OPENHANDS_VERSION"
# if not using Hint, add -no-hint to the eval note
if [ "$USE_HINT_TEXT" = false ]; then
EVAL_NOTE="$EVAL_NOTE-no-hint"
fi
if [ "$RUN_WITH_BROWSING" = true ]; then
EVAL_NOTE="$EVAL_NOTE-with-browsing"
fi
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
function run_eval() {
local eval_note=$1
COMMAND="poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations $MAX_ITER \
--eval-num-workers $NUM_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
}
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
if [ -z "$N_RUNS" ]; then
N_RUNS=1
echo "N_RUNS not specified, use default $N_RUNS"
fi
# Skip runs if the run number is in the SKIP_RUNS list
# read from env variable SKIP_RUNS as a comma separated list of run numbers
SKIP_RUNS=(${SKIP_RUNS//,/ })
for i in $(seq 1 $N_RUNS); do
if [[ " ${SKIP_RUNS[@]} " =~ " $i " ]]; then
echo "Skipping run $i"
continue
fi
current_eval_note="$EVAL_NOTE-run_$i"
echo "EVAL_NOTE: $current_eval_note"
run_eval $current_eval_note
done
checkout_original_branch
@@ -1,54 +0,0 @@
"""This script compares gold patches with OpenHands-generated patches and check whether
OpenHands found the right (set of) files to modify.
"""
import argparse
import json
import re
def extract_modified_files(patch):
modified_files = set()
file_pattern = re.compile(r'^diff --git a/(.*?) b/')
for line in patch.split('\n'):
match = file_pattern.match(line)
if match:
modified_files.add(match.group(1))
return modified_files
def process_report(oh_output_file):
succ = 0
fail = 0
for line in open(oh_output_file):
line = json.loads(line)
instance_id = line['instance_id']
gold_patch = line['swe_instance']['patch']
generated_patch = line['git_patch']
gold_modified_files = extract_modified_files(gold_patch)
# swe-bench lite only: a gold patch always contains exactly one file
assert len(gold_modified_files) == 1
generated_modified_files = extract_modified_files(generated_patch)
# Check if all files in gold_patch are also in generated_patch
all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
if all_files_in_generated:
succ += 1
else:
fail += 1
print(
f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
)
print(
f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--oh_output_file', help='Path to the OH output file')
args = parser.parse_args()
process_report(args.oh_output_file)
@@ -1,45 +0,0 @@
#!/bin/bash
source ~/.bashrc
SWEUTIL_DIR=/swe_util
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
# SWE_INSTANCE_ID=django__django-11099
if [ -z "$SWE_INSTANCE_ID" ]; then
echo "Error: SWE_INSTANCE_ID is not set." >&2
exit 1
fi
if [ -z "$REPO_NAME" ]; then
echo "Error: REPO_NAME is not set." >&2
exit 1
fi
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
if [[ -z "$item" ]]; then
echo "No item found for the provided instance ID."
exit 1
fi
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
# Clear the workspace
if [ -d /workspace ]; then
rm -rf /workspace/*
else
mkdir /workspace
fi
# Copy repo to workspace
if [ -d /workspace/$WORKSPACE_NAME ]; then
rm -rf /workspace/$WORKSPACE_NAME
fi
mkdir -p /workspace
cp -r /home/$REPO_NAME /workspace/$WORKSPACE_NAME
# Activate instance-specific environment
# . /opt/miniconda3/etc/profile.d/conda.sh
# conda activate testbed
@@ -1,27 +0,0 @@
#!/bin/bash
set -e
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
mkdir -p $EVAL_WORKSPACE
# 1. Prepare REPO
echo "==== Prepare SWE-bench repo ===="
OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
OH_SWE_BENCH_REPO_BRANCH="eval"
git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
# 2. Prepare DATA
echo "==== Prepare SWE-bench data ===="
EVAL_IMAGE=ghcr.io/all-hands-ai/eval-swe-bench:builder_with_conda
EVAL_WORKSPACE=$(realpath $EVAL_WORKSPACE)
chmod +x $EVAL_WORKSPACE/OH-SWE-bench/swebench/harness/prepare_data.sh
if [ -d $EVAL_WORKSPACE/eval_data ]; then
rm -r $EVAL_WORKSPACE/eval_data
fi
docker run \
-v $EVAL_WORKSPACE:/workspace \
-w /workspace \
-u $(id -u):$(id -g) \
-e HF_DATASETS_CACHE="/tmp" \
--rm -it $EVAL_IMAGE \
bash -c "cd OH-SWE-bench/swebench/harness && /swe_util/miniforge3/bin/conda run -n swe-bench-eval ./prepare_data.sh && mv eval_data /workspace/"
@@ -1,96 +0,0 @@
#!/bin/bash
set -e
# assert user name is `root`
if [ "$USER" != "root" ]; then
echo "Error: This script is intended to be run by the 'root' user only." >&2
exit 1
fi
source ~/.bashrc
SWEUTIL_DIR=/swe_util
# Create logs directory
LOG_DIR=/openhands/logs
mkdir -p $LOG_DIR && chmod 777 $LOG_DIR
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
# SWE_INSTANCE_ID=django__django-11099
if [ -z "$SWE_INSTANCE_ID" ]; then
echo "Error: SWE_INSTANCE_ID is not set." >&2
exit 1
fi
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-test-lite.json)
if [[ -z "$item" ]]; then
echo "No item found for the provided instance ID."
exit 1
fi
CONDA_ENV_NAME=$(echo "$item" | jq -r '.repo + "__" + .version | gsub("/"; "__")')
echo "CONDA_ENV_NAME: $CONDA_ENV_NAME"
SWE_TASK_DIR=/openhands/swe_tasks
mkdir -p $SWE_TASK_DIR
# Dump test_patch to /workspace/test.patch
echo "$item" | jq -r '.test_patch' > $SWE_TASK_DIR/test.patch
# Dump patch to /workspace/gold.patch
echo "$item" | jq -r '.patch' > $SWE_TASK_DIR/gold.patch
# Dump the item to /workspace/instance.json except for the "test_patch" and "patch" fields
echo "$item" | jq 'del(.test_patch, .patch)' > $SWE_TASK_DIR/instance.json
# Clear the workspace
rm -rf /workspace/*
# Copy repo to workspace
if [ -d /workspace/$CONDA_ENV_NAME ]; then
rm -rf /workspace/$CONDA_ENV_NAME
fi
cp -r $SWEUTIL_DIR/eval_data/testbeds/$CONDA_ENV_NAME /workspace
# Reset swe-bench testbed and install the repo
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
conda config --set changeps1 False
conda config --append channels conda-forge
conda activate swe-bench-eval
mkdir -p $SWE_TASK_DIR/reset_testbed_temp
mkdir -p $SWE_TASK_DIR/reset_testbed_log_dir
SWE_BENCH_DIR=/swe_util/OH-SWE-bench
output=$(
export PYTHONPATH=$SWE_BENCH_DIR && \
cd $SWE_BENCH_DIR && \
python swebench/harness/reset_swe_env.py \
--swe_bench_tasks $SWEUTIL_DIR/eval_data/instances/swe-bench-test.json \
--temp_dir $SWE_TASK_DIR/reset_testbed_temp \
--testbed /workspace \
--conda_path $SWEUTIL_DIR/miniforge3 \
--instance_id $SWE_INSTANCE_ID \
--log_dir $SWE_TASK_DIR/reset_testbed_log_dir \
--timeout 900 \
--verbose
)
REPO_PATH=$(echo "$output" | awk -F': ' '/repo_path:/ {print $2}')
TEST_CMD=$(echo "$output" | awk -F': ' '/test_cmd:/ {print $2}')
echo "Repo Path: $REPO_PATH"
echo "Test Command: $TEST_CMD"
echo "export SWE_BENCH_DIR=\"$SWE_BENCH_DIR\"" >> ~/.bashrc
echo "export REPO_PATH=\"$REPO_PATH\"" >> ~/.bashrc
echo "export TEST_CMD=\"$TEST_CMD\"" >> ~/.bashrc
if [[ "$REPO_PATH" == "None" ]]; then
echo "Error: Failed to retrieve repository path. Tests may not have passed or output was not as expected." >&2
exit 1
fi
# Activate instance-specific environment
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
conda activate $CONDA_ENV_NAME
set +e
-7
View File
@@ -1,10 +1,3 @@
# Run frontend checks
echo "Running frontend checks..."
cd frontend
npm run check-unlocalized-strings
npx lint-staged
# Run backend pre-commit
echo "Running backend pre-commit..."
cd ..
pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config ./dev_config/python/.pre-commit-config.yaml
+21 -45
View File
@@ -1,8 +1,6 @@
import { describe, it, expect, afterEach, vi } from "vitest";
import { screen, render } from "@testing-library/react";
import React from "react";
// Mock modules before importing the component
// Mock useParams before importing components
vi.mock("react-router", async () => {
const actual = await vi.importActual("react-router");
return {
@@ -11,11 +9,7 @@ vi.mock("react-router", async () => {
};
});
vi.mock("#/context/conversation-context", () => ({
useConversation: () => ({ conversationId: "test-conversation-id" }),
ConversationProvider: ({ children }: { children: React.ReactNode }) => children,
}));
// Mock i18next
vi.mock("react-i18next", async () => {
const actual = await vi.importActual("react-i18next");
return {
@@ -29,56 +23,38 @@ vi.mock("react-i18next", async () => {
};
});
// Mock redux
const mockDispatch = vi.fn();
let mockBrowserState = {
url: "https://example.com",
screenshotSrc: "",
};
vi.mock("react-redux", async () => {
const actual = await vi.importActual("react-redux");
return {
...actual,
useDispatch: () => mockDispatch,
useSelector: () => mockBrowserState,
};
});
// Import the component after all mocks are set up
import { screen } from "@testing-library/react";
import { renderWithProviders } from "../../test-utils";
import { BrowserPanel } from "#/components/features/browser/browser";
describe("Browser", () => {
afterEach(() => {
vi.clearAllMocks();
// Reset the mock state
mockBrowserState = {
url: "https://example.com",
screenshotSrc: "",
};
});
it("renders a message if no screenshotSrc is provided", () => {
// Set the mock state for this test
mockBrowserState = {
url: "https://example.com",
screenshotSrc: "",
};
render(<BrowserPanel />);
renderWithProviders(<BrowserPanel />, {
preloadedState: {
browser: {
url: "https://example.com",
screenshotSrc: "",
},
},
});
// i18n empty message key
expect(screen.getByText("BROWSER$NO_PAGE_LOADED")).toBeInTheDocument();
});
it("renders the url and a screenshot", () => {
// Set the mock state for this test
mockBrowserState = {
url: "https://example.com",
screenshotSrc: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
};
render(<BrowserPanel />);
renderWithProviders(<BrowserPanel />, {
preloadedState: {
browser: {
url: "https://example.com",
screenshotSrc:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
},
},
});
expect(screen.getByText("https://example.com")).toBeInTheDocument();
expect(screen.getByAltText("BROWSER$SCREENSHOT_ALT")).toBeInTheDocument();
@@ -1,16 +1,12 @@
import { render, screen } from "@testing-library/react";
import { it, describe, expect, vi, beforeEach, afterEach } from "vitest";
import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
import userEvent from "@testing-library/user-event";
import { AuthModal } from "#/components/features/waitlist/auth-modal";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import * as AuthHook from "#/context/auth-context";
// Mock the useAuthUrl hook
vi.mock("#/hooks/use-auth-url", () => ({
useAuthUrl: () => "https://gitlab.com/oauth/authorize"
}));
describe("AuthModal", () => {
beforeEach(() => {
beforeAll(() => {
vi.stubGlobal("location", { href: "" });
vi.spyOn(AuthHook, "useAuth").mockReturnValue({
providersAreSet: false,
@@ -20,29 +16,50 @@ describe("AuthModal", () => {
});
});
afterEach(() => {
afterAll(() => {
vi.unstubAllGlobals();
vi.resetAllMocks();
vi.restoreAllMocks();
});
it("should render the GitHub and GitLab buttons", () => {
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
it("should render a tos checkbox that is unchecked by default", () => {
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
const checkbox = screen.getByRole("checkbox");
expect(checkbox).not.toBeChecked();
});
it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
const user = userEvent.setup();
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
const checkbox = screen.getByRole("checkbox");
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
expect(githubButton).toBeInTheDocument();
expect(gitlabButton).toBeInTheDocument();
expect(githubButton).toBeDisabled();
expect(gitlabButton).toBeDisabled();
await user.click(checkbox);
expect(githubButton).not.toBeDisabled();
expect(gitlabButton).not.toBeDisabled();
});
it("should redirect to GitHub auth URL when GitHub button is clicked", async () => {
it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
const handleCaptureConsentSpy = vi.spyOn(
CaptureConsent,
"handleCaptureConsent",
);
const user = userEvent.setup();
const mockUrl = "https://github.com/login/oauth/authorize";
render(<AuthModal githubAuthUrl={mockUrl} appMode="saas" />);
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
await user.click(githubButton);
const checkbox = screen.getByRole("checkbox");
await user.click(checkbox);
expect(window.location.href).toBe(mockUrl);
const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
await user.click(button);
expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
});
});
@@ -1,34 +0,0 @@
import { describe, expect, it } from "vitest";
import { isLikelyDirectory } from "#/components/features/chat/path-component";
describe("isLikelyDirectory", () => {
it("should return false for empty path", () => {
expect(isLikelyDirectory("")).toBe(false);
});
it("should return true for paths ending with forward slash", () => {
expect(isLikelyDirectory("/path/to/dir/")).toBe(true);
expect(isLikelyDirectory("dir/")).toBe(true);
});
it("should return true for paths ending with backslash", () => {
expect(isLikelyDirectory("C:\\path\\to\\dir\\")).toBe(true);
expect(isLikelyDirectory("dir\\")).toBe(true);
});
it("should return true for paths without extension", () => {
expect(isLikelyDirectory("/path/to/dir")).toBe(true);
expect(isLikelyDirectory("dir")).toBe(true);
});
it("should return false for paths ending with dot", () => {
expect(isLikelyDirectory("/path/to/dir.")).toBe(false);
expect(isLikelyDirectory("dir.")).toBe(false);
});
it("should return false for paths with file extensions", () => {
expect(isLikelyDirectory("/path/to/file.txt")).toBe(false);
expect(isLikelyDirectory("file.js")).toBe(false);
expect(isLikelyDirectory("script.test.ts")).toBe(false);
});
});
@@ -34,6 +34,10 @@ describe("ConversationPanel", () => {
}
});
const { endSessionMock } = vi.hoisted(() => ({
endSessionMock: vi.fn(),
}));
beforeAll(() => {
vi.mock("react-router", async (importOriginal) => ({
...(await importOriginal<typeof import("react-router")>()),
@@ -42,6 +46,11 @@ describe("ConversationPanel", () => {
useLocation: vi.fn(() => ({ pathname: "/conversation" })),
useParams: vi.fn(() => ({ conversationId: "2" })),
}));
vi.mock("#/hooks/use-end-session", async (importOriginal) => ({
...(await importOriginal<typeof import("#/hooks/use-end-session")>()),
useEndSession: vi.fn(() => endSessionMock),
}));
});
const mockConversations = [
@@ -136,6 +145,47 @@ describe("ConversationPanel", () => {
expect(cards).toHaveLength(3);
});
it("should call endSession after deleting a conversation that is the current session", async () => {
const user = userEvent.setup();
const mockData = [...mockConversations];
const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
getUserConversationsSpy.mockImplementation(async () => mockData);
const deleteUserConversationSpy = vi.spyOn(OpenHands, "deleteUserConversation");
deleteUserConversationSpy.mockImplementation(async (id: string) => {
const index = mockData.findIndex(conv => conv.conversation_id === id);
if (index !== -1) {
mockData.splice(index, 1);
}
// Wait for React Query to update its cache
await new Promise(resolve => setTimeout(resolve, 0));
});
renderConversationPanel();
let cards = await screen.findAllByTestId("conversation-card");
const ellipsisButton = within(cards[1]).getByTestId("ellipsis-button");
await user.click(ellipsisButton);
const deleteButton = screen.getByTestId("delete-button");
// Click the second delete button
await user.click(deleteButton);
// Confirm the deletion
const confirmButton = screen.getByRole("button", { name: /confirm/i });
await user.click(confirmButton);
expect(screen.queryByRole("button", { name: /confirm/i })).not.toBeInTheDocument();
// Wait for the cards to update with a longer timeout
await waitFor(() => {
const updatedCards = screen.getAllByTestId("conversation-card");
expect(updatedCards).toHaveLength(2);
}, { timeout: 2000 });
expect(endSessionMock).toHaveBeenCalledOnce();
});
it("should delete a conversation", async () => {
const user = userEvent.setup();
const mockData = [
@@ -46,14 +46,10 @@ describe("HomeHeader", () => {
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
"gui",
undefined,
undefined,
undefined,
[],
undefined,
undefined,
undefined,
);
// expect to be redirected to /conversations/:conversationId
@@ -4,10 +4,11 @@ import userEvent from "@testing-library/user-event";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import { setupStore } from "test-utils";
import { Provider } from "react-redux";
import { createRoutesStub, Outlet } from "react-router";
import { createRoutesStub } from "react-router";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
import { GitRepository } from "#/types/git";
import * as GitService from "#/api/git";
import { RepoConnector } from "#/components/features/home/repo-connector";
const renderRepoConnector = (initialProvidersAreSet = true) => {
@@ -22,18 +23,8 @@ const renderRepoConnector = (initialProvidersAreSet = true) => {
path: "/conversations/:conversationId",
},
{
Component: Outlet,
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
children: [
{
Component: () => <div data-testid="settings-screen" />,
path: "/settings",
},
{
Component: () => <div data-testid="git-settings-screen" />,
path: "/settings/git",
},
],
},
]);
@@ -73,10 +64,13 @@ describe("RepoConnector", () => {
it("should render the available repositories in the dropdown", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
@@ -92,10 +86,13 @@ describe("RepoConnector", () => {
it("should only enable the launch button if a repo is selected", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
@@ -138,10 +135,13 @@ describe("RepoConnector", () => {
it("should create a conversation and redirect with the selected repo when pressing the launch button", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
@@ -154,8 +154,8 @@ describe("RepoConnector", () => {
expect(createConversationSpy).not.toHaveBeenCalled();
// select a repository from the dropdown
const dropdown = await waitFor(() =>
within(repoConnector).getByTestId("repo-dropdown"),
const dropdown = await waitFor(() =>
within(repoConnector).getByTestId("repo-dropdown")
);
await userEvent.click(dropdown);
@@ -164,23 +164,27 @@ describe("RepoConnector", () => {
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
"gui",
"rbren/polaris",
"github",
{
full_name: "rbren/polaris",
git_provider: "github",
id: 1,
is_public: true,
},
undefined,
[],
undefined,
undefined,
undefined,
);
});
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderRepoConnector();
@@ -220,6 +224,6 @@ describe("RepoConnector", () => {
expect(goToSettingsButton).toBeInTheDocument();
await userEvent.click(goToSettingsButton);
await screen.findByTestId("git-settings-screen");
await screen.findByTestId("settings-screen");
});
});
@@ -9,14 +9,41 @@ import { SuggestedTask } from "#/components/features/home/tasks/task.types";
import OpenHands from "#/api/open-hands";
import { AuthProvider } from "#/context/auth-context";
import { TaskCard } from "#/components/features/home/tasks/task-card";
import * as GitService from "#/api/git";
import { GitRepository } from "#/types/git";
import {
getFailingChecksPrompt,
getMergeConflictPrompt,
getOpenIssuePrompt,
getUnresolvedCommentsPrompt,
} from "#/components/features/home/tasks/get-prompt-for-query";
const MOCK_TASK_1: SuggestedTask = {
issue_number: 123,
repo: "repo1",
title: "Task 1",
task_type: "MERGE_CONFLICTS",
git_provider: "github",
};
const MOCK_TASK_2: SuggestedTask = {
issue_number: 456,
repo: "repo2",
title: "Task 2",
task_type: "FAILING_CHECKS",
};
const MOCK_TASK_3: SuggestedTask = {
issue_number: 789,
repo: "repo3",
title: "Task 3",
task_type: "UNRESOLVED_COMMENTS",
};
const MOCK_TASK_4: SuggestedTask = {
issue_number: 101112,
repo: "repo4",
title: "Task 4",
task_type: "OPEN_ISSUE",
};
const MOCK_RESPOSITORIES: GitRepository[] = [
@@ -70,16 +97,19 @@ describe("TaskCard", () => {
expect(createConversationSpy).toHaveBeenCalled();
});
describe("creating suggested task conversation", () => {
describe("creating conversation prompts", () => {
beforeEach(() => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
});
it("should call create conversation with suggest task trigger and selected suggested task", async () => {
it("should call create conversation with the merge conflict prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_1);
@@ -88,13 +118,57 @@ describe("TaskCard", () => {
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
"suggested_task",
MOCK_RESPOSITORIES[0].full_name,
MOCK_RESPOSITORIES[0].git_provider,
undefined,
MOCK_RESPOSITORIES[0],
getMergeConflictPrompt(MOCK_TASK_1.issue_number, MOCK_TASK_1.repo),
[],
undefined,
MOCK_TASK_1,
);
});
it("should call create conversation with the failing checks prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_2);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[1],
getFailingChecksPrompt(MOCK_TASK_2.issue_number, MOCK_TASK_2.repo),
[],
undefined,
);
});
it("should call create conversation with the unresolved comments prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_3);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[2],
getUnresolvedCommentsPrompt(MOCK_TASK_3.issue_number, MOCK_TASK_3.repo),
[],
undefined,
);
});
it("should call create conversation with the open issue prompt", async () => {
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
renderTaskCard(MOCK_TASK_4);
const launchButton = screen.getByTestId("task-launch-button");
await userEvent.click(launchButton);
expect(createConversationSpy).toHaveBeenCalledWith(
MOCK_RESPOSITORIES[3],
getOpenIssuePrompt(MOCK_TASK_4.issue_number, MOCK_TASK_4.repo),
[],
undefined,
);
});
@@ -4,6 +4,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { Provider } from "react-redux";
import { createRoutesStub } from "react-router";
import { setupStore } from "test-utils";
import userEvent from "@testing-library/user-event";
import { TaskSuggestions } from "#/components/features/home/tasks/task-suggestions";
import { SuggestionsService } from "#/api/suggestions-service/suggestions-service.api";
import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
@@ -96,4 +97,17 @@ describe("TaskSuggestions", () => {
expect(screen.queryByTestId("task-group-skeleton")).not.toBeInTheDocument();
});
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
renderTaskSuggestions(false);
expect(getSuggestedTasksSpy).not.toHaveBeenCalled();
const goToSettingsButton = await screen.findByTestId(
"navigate-to-settings-button",
);
expect(goToSettingsButton).toBeInTheDocument();
await userEvent.click(goToSettingsButton);
await screen.findByTestId("settings-screen");
});
});
@@ -0,0 +1,28 @@
import { screen } from "@testing-library/react";
import { renderWithProviders } from "test-utils";
import { describe, afterEach, vi, it, expect } from "vitest";
import { ExplorerTree } from "#/components/features/file-explorer/explorer-tree";
const FILES = ["file-1-1.ts", "folder-1-2"];
describe.skip("ExplorerTree", () => {
afterEach(() => {
vi.resetAllMocks();
});
it("should render the explorer", () => {
renderWithProviders(<ExplorerTree files={FILES} defaultOpen />);
expect(screen.getByText("file-1-1.ts")).toBeInTheDocument();
expect(screen.getByText("folder-1-2")).toBeInTheDocument();
// TODO: make sure children render
});
it("should render the explorer given the defaultExpanded prop", () => {
renderWithProviders(<ExplorerTree files={FILES} />);
expect(screen.queryByText("file-1-1.ts")).toBeInTheDocument();
expect(screen.queryByText("folder-1-2")).toBeInTheDocument();
// TODO: make sure children don't render
});
});
@@ -0,0 +1,64 @@
import { screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import { describe, it, expect, vi, afterEach } from "vitest";
import { AgentState } from "#/types/agent-state";
import { FileExplorer } from "#/components/features/file-explorer/file-explorer";
import { FileService } from "#/api/file-service/file-service.api";
const getFilesSpy = vi.spyOn(FileService, "getFiles");
vi.mock("../../services/fileService", async () => ({
uploadFiles: vi.fn(),
}));
const renderFileExplorerWithRunningAgentState = () =>
renderWithProviders(<FileExplorer isOpen onToggle={() => {}} />, {
preloadedState: {
agent: {
curAgentState: AgentState.RUNNING,
},
},
});
describe.skip("FileExplorer", () => {
afterEach(() => {
vi.clearAllMocks();
});
it("should get the workspace directory", async () => {
renderFileExplorerWithRunningAgentState();
expect(await screen.findByText("folder1")).toBeInTheDocument();
expect(await screen.findByText("file1.ts")).toBeInTheDocument();
expect(getFilesSpy).toHaveBeenCalledTimes(1); // once for root
});
it("should refetch the workspace when clicking the refresh button", async () => {
const user = userEvent.setup();
renderFileExplorerWithRunningAgentState();
expect(await screen.findByText("folder1")).toBeInTheDocument();
expect(await screen.findByText("file1.ts")).toBeInTheDocument();
expect(getFilesSpy).toHaveBeenCalledTimes(1); // once for root
const refreshButton = screen.getByTestId("refresh");
await user.click(refreshButton);
expect(getFilesSpy).toHaveBeenCalledTimes(2); // once for root, once for refresh button
});
it("should toggle the explorer visibility when clicking the toggle button", async () => {
const user = userEvent.setup();
renderFileExplorerWithRunningAgentState();
const folder1 = await screen.findByText("folder1");
expect(folder1).toBeInTheDocument();
const toggleButton = screen.getByTestId("toggle");
await user.click(toggleButton);
expect(folder1).toBeInTheDocument();
expect(folder1).not.toBeVisible();
});
});
@@ -0,0 +1,110 @@
import { screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import { vi, describe, afterEach, it, expect } from "vitest";
import TreeNode from "#/components/features/file-explorer/tree-node";
import { FileService } from "#/api/file-service/file-service.api";
const getFileSpy = vi.spyOn(FileService, "getFile");
const getFilesSpy = vi.spyOn(FileService, "getFiles");
vi.mock("../../services/fileService", async () => ({
uploadFile: vi.fn(),
}));
describe.skip("TreeNode", () => {
afterEach(() => {
vi.clearAllMocks();
});
it("should render a file if property has no children", () => {
renderWithProviders(<TreeNode path="/file.ts" defaultOpen />);
expect(screen.getByText("file.ts")).toBeInTheDocument();
});
it("should render a folder if it's in a subdir", async () => {
renderWithProviders(<TreeNode path="/folder1/" defaultOpen />);
expect(getFilesSpy).toHaveBeenCalledWith("/folder1/");
expect(await screen.findByText("folder1")).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
it("should close a folder when clicking on it", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/folder1/" defaultOpen />);
const folder1 = await screen.findByText("folder1");
const file2 = await screen.findByText("file2.ts");
expect(folder1).toBeInTheDocument();
expect(file2).toBeInTheDocument();
await user.click(folder1);
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
});
it("should open a folder when clicking on it", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/folder1/" />);
const folder1 = await screen.findByText("folder1");
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
await user.click(folder1);
expect(getFilesSpy).toHaveBeenCalledWith("/folder1/");
expect(folder1).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
it("should call `OpenHands.getFile` and return the full path of a file when clicking on a file", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/folder1/file2.ts" defaultOpen />);
const file2 = screen.getByText("file2.ts");
await user.click(file2);
expect(getFileSpy).toHaveBeenCalledWith("/folder1/file2.ts");
});
it("should render the full explorer given the defaultOpen prop", async () => {
const user = userEvent.setup();
renderWithProviders(<TreeNode path="/" defaultOpen />);
expect(getFilesSpy).toHaveBeenCalledWith("/");
const file1 = await screen.findByText("file1.ts");
const folder1 = await screen.findByText("folder1");
expect(file1).toBeInTheDocument();
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
await user.click(folder1);
expect(getFilesSpy).toHaveBeenCalledWith("folder1/");
expect(file1).toBeInTheDocument();
expect(folder1).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
it("should render all children as collapsed when defaultOpen is false", async () => {
renderWithProviders(<TreeNode path="/folder1/" defaultOpen={false} />);
const folder1 = await screen.findByText("folder1");
expect(folder1).toBeInTheDocument();
expect(screen.queryByText("file2.ts")).not.toBeInTheDocument();
await userEvent.click(folder1);
expect(getFilesSpy).toHaveBeenCalledWith("/folder1/");
expect(folder1).toBeInTheDocument();
expect(await screen.findByText("file2.ts")).toBeInTheDocument();
});
});
@@ -13,7 +13,15 @@ describe("App", () => {
{ Component: App, path: "/conversation/:conversationId" },
]);
const { endSessionMock } = vi.hoisted(() => ({
endSessionMock: vi.fn(),
}));
beforeAll(() => {
vi.mock("#/hooks/use-end-session", () => ({
useEndSession: vi.fn(() => endSessionMock),
}));
vi.mock("#/hooks/use-terminal", () => ({
useTerminal: vi.fn(),
}));
@@ -27,4 +35,44 @@ describe("App", () => {
renderWithProviders(<RouteStub initialEntries={["/conversation/123"]} />);
await screen.findByTestId("app-route");
});
it("should call endSession if the user does not have permission to view conversation", async () => {
const getConversationSpy = vi.spyOn(OpenHands, "getConversation");
getConversationSpy.mockResolvedValue(null);
renderWithProviders(<RouteStub initialEntries={["/conversation/9999"]} />);
await waitFor(() => {
expect(endSessionMock).toHaveBeenCalledOnce();
expect(errorToastSpy).toHaveBeenCalledOnce();
});
});
it("should not call endSession if the user has permission", async () => {
const getConversationSpy = vi.spyOn(OpenHands, "getConversation");
getConversationSpy.mockResolvedValue({
conversation_id: "9999",
last_updated_at: "",
created_at: "",
title: "",
selected_repository: "",
status: "STOPPED",
});
const { rerender } = renderWithProviders(
<RouteStub initialEntries={["/conversation/9999"]} />,
);
await waitFor(() => {
expect(endSessionMock).not.toHaveBeenCalled();
expect(errorToastSpy).not.toHaveBeenCalled();
});
rerender(<RouteStub initialEntries={["/conversation"]} />);
await waitFor(() => {
expect(endSessionMock).not.toHaveBeenCalled();
expect(errorToastSpy).not.toHaveBeenCalled();
});
});
});
+1 -33
View File
@@ -1,16 +1,12 @@
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import { createRoutesStub } from "react-router";
import { screen, waitFor, within } from "@testing-library/react";
import {
createAxiosNotFoundErrorObject,
renderWithProviders,
} from "test-utils";
import { renderWithProviders } from "test-utils";
import userEvent from "@testing-library/user-event";
import MainApp from "#/routes/root-layout";
import i18n from "#/i18n";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import OpenHands from "#/api/open-hands";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
describe("frontend/routes/_oh", () => {
const RouteStub = createRoutesStub([{ Component: MainApp, path: "/" }]);
@@ -176,32 +172,4 @@ describe("frontend/routes/_oh", () => {
// expect(logoutCleanupSpy).toHaveBeenCalled();
expect(localStorage.getItem("ghToken")).toBeNull();
});
it("should render a you're in toast if it is a new user and in saas mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
GITHUB_CLIENT_ID: "test-id",
POSTHOG_CLIENT_KEY: "test-key",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
});
getSettingsSpy.mockRejectedValue(createAxiosNotFoundErrorObject());
renderWithProviders(<RouteStub />);
await waitFor(() => {
expect(displaySuccessToastSpy).toHaveBeenCalledWith("BILLING$YOURE_IN");
expect(displaySuccessToastSpy).toHaveBeenCalledOnce();
});
});
});
@@ -1,136 +0,0 @@
import { render, screen } from "@testing-library/react";
import { it, describe, expect, vi, beforeEach, afterEach } from "vitest";
import userEvent from "@testing-library/user-event";
import AcceptTOS from "#/routes/accept-tos";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import { openHands } from "#/api/open-hands-axios";
// Mock the react-router hooks
vi.mock("react-router", () => ({
useNavigate: () => vi.fn(),
useSearchParams: () => [
{
get: (param: string) => {
if (param === "redirect_url") {
return "/dashboard";
}
return null;
},
},
],
}));
// Mock the axios instance
vi.mock("#/api/open-hands-axios", () => ({
openHands: {
post: vi.fn(),
},
}));
// Mock the toast handlers
vi.mock("#/utils/custom-toast-handlers", () => ({
displayErrorToast: vi.fn(),
}));
// Create a wrapper with QueryClientProvider
const createWrapper = () => {
const queryClient = new QueryClient({
defaultOptions: {
queries: {
retry: false,
},
},
});
return ({ children }: { children: React.ReactNode }) => (
<QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
);
};
describe("AcceptTOS", () => {
beforeEach(() => {
vi.stubGlobal("location", { href: "" });
});
afterEach(() => {
vi.unstubAllGlobals();
vi.resetAllMocks();
});
it("should render a TOS checkbox that is unchecked by default", () => {
render(<AcceptTOS />, { wrapper: createWrapper() });
const checkbox = screen.getByRole("checkbox");
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
expect(checkbox).not.toBeChecked();
expect(continueButton).toBeDisabled();
});
it("should enable the continue button when the TOS checkbox is checked", async () => {
const user = userEvent.setup();
render(<AcceptTOS />, { wrapper: createWrapper() });
const checkbox = screen.getByRole("checkbox");
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
expect(continueButton).toBeDisabled();
await user.click(checkbox);
expect(continueButton).not.toBeDisabled();
});
it("should set user analytics consent to true when the user accepts TOS", async () => {
const handleCaptureConsentSpy = vi.spyOn(
CaptureConsent,
"handleCaptureConsent",
);
// Mock the API response
vi.mocked(openHands.post).mockResolvedValue({
data: { redirect_url: "/dashboard" },
});
const user = userEvent.setup();
render(<AcceptTOS />, { wrapper: createWrapper() });
const checkbox = screen.getByRole("checkbox");
await user.click(checkbox);
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
await user.click(continueButton);
// Wait for the mutation to complete
await new Promise(process.nextTick);
expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
expect(openHands.post).toHaveBeenCalledWith("/api/accept_tos", {
redirect_url: "/dashboard",
});
});
it("should handle external redirect URLs", async () => {
// Mock the API response with an external URL
const externalUrl = "https://example.com/callback";
vi.mocked(openHands.post).mockResolvedValue({
data: { redirect_url: externalUrl },
});
const user = userEvent.setup();
render(<AcceptTOS />, { wrapper: createWrapper() });
const checkbox = screen.getByRole("checkbox");
await user.click(checkbox);
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
await user.click(continueButton);
// Wait for the mutation to complete
await new Promise(process.nextTick);
expect(window.location.href).toBe(externalUrl);
});
});
@@ -1,291 +0,0 @@
import { render, screen, waitFor } from "@testing-library/react";
import { afterEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import AppSettingsScreen from "#/routes/app-settings";
import OpenHands from "#/api/open-hands";
import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
import { AuthProvider } from "#/context/auth-context";
import { AvailableLanguages } from "#/i18n";
import * as CaptureConsent from "#/utils/handle-capture-consent";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
const renderAppSettingsScreen = () =>
render(<AppSettingsScreen />, {
wrapper: ({ children }) => (
<QueryClientProvider client={new QueryClient()}>
<AuthProvider>{children}</AuthProvider>
</QueryClientProvider>
),
});
describe("Content", () => {
it("should render the screen", () => {
renderAppSettingsScreen();
screen.getByTestId("app-settings-screen");
});
it("should render the correct default values", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
language: "no",
user_consents_to_analytics: true,
enable_sound_notifications: true,
});
renderAppSettingsScreen();
await waitFor(() => {
const language = screen.getByTestId("language-input");
const analytics = screen.getByTestId("enable-analytics-switch");
const sound = screen.getByTestId("enable-sound-notifications-switch");
expect(language).toHaveValue("Norsk");
expect(analytics).toBeChecked();
expect(sound).toBeChecked();
});
});
it("should render the language options", async () => {
renderAppSettingsScreen();
const language = await screen.findByTestId("language-input");
await userEvent.click(language);
AvailableLanguages.forEach((lang) => {
const option = screen.getByText(lang.label);
expect(option).toBeInTheDocument();
});
});
});
describe("Form submission", () => {
afterEach(() => {
vi.clearAllMocks();
});
it("should submit the form with the correct values", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderAppSettingsScreen();
const language = await screen.findByTestId("language-input");
const analytics = await screen.findByTestId("enable-analytics-switch");
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
expect(language).toHaveValue("English");
expect(analytics).not.toBeChecked();
expect(sound).not.toBeChecked();
// change language
await userEvent.click(language);
const norsk = screen.getByText("Norsk");
await userEvent.click(norsk);
expect(language).toHaveValue("Norsk");
// toggle options
await userEvent.click(analytics);
expect(analytics).toBeChecked();
await userEvent.click(sound);
expect(sound).toBeChecked();
// submit the form
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
language: "no",
user_consents_to_analytics: true,
enable_sound_notifications: true,
}),
);
});
it("should only enable the submit button when there are changes", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderAppSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
// Language check
const language = await screen.findByTestId("language-input");
await userEvent.click(language);
const norsk = screen.getByText("Norsk");
await userEvent.click(norsk);
expect(submit).not.toBeDisabled();
await userEvent.click(language);
const english = screen.getByText("English");
await userEvent.click(english);
expect(submit).toBeDisabled();
// Analytics check
const analytics = await screen.findByTestId("enable-analytics-switch");
await userEvent.click(analytics);
expect(submit).not.toBeDisabled();
await userEvent.click(analytics);
expect(submit).toBeDisabled();
// Sound check
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
expect(submit).not.toBeDisabled();
await userEvent.click(sound);
expect(submit).toBeDisabled();
});
it("should call handleCaptureConsents with true when the analytics switch is toggled", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const handleCaptureConsentsSpy = vi.spyOn(
CaptureConsent,
"handleCaptureConsent",
);
renderAppSettingsScreen();
const analytics = await screen.findByTestId("enable-analytics-switch");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(analytics);
await userEvent.click(submit);
await waitFor(() =>
expect(handleCaptureConsentsSpy).toHaveBeenCalledWith(true),
);
});
it("should call handleCaptureConsents with false when the analytics switch is toggled", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
user_consents_to_analytics: true,
});
const handleCaptureConsentsSpy = vi.spyOn(
CaptureConsent,
"handleCaptureConsent",
);
renderAppSettingsScreen();
const analytics = await screen.findByTestId("enable-analytics-switch");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(analytics);
await userEvent.click(submit);
await waitFor(() =>
expect(handleCaptureConsentsSpy).toHaveBeenCalledWith(false),
);
});
// flaky test
it.skip("should disable the button when submitting changes", async () => {
renderAppSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(submit).toHaveTextContent("Saving...");
expect(submit).toBeDisabled();
await waitFor(() => expect(submit).toHaveTextContent("Save"));
});
it("should disable the button after submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
renderAppSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(submit).toBeDisabled());
});
});
describe("Status toasts", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderAppSettingsScreen();
// Toggle setting to change
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderAppSettingsScreen();
// Toggle setting to change
const sound = await screen.findByTestId(
"enable-sound-notifications-switch",
);
await userEvent.click(sound);
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});
@@ -1,443 +0,0 @@
import { render, screen, waitFor } from "@testing-library/react";
import { createRoutesStub } from "react-router";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import GitSettingsScreen from "#/routes/git-settings";
import OpenHands from "#/api/open-hands";
import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
import { AuthProvider } from "#/context/auth-context";
import { GetConfigResponse } from "#/api/open-hands.types";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
import { SecretsService } from "#/api/secrets-service";
const VALID_OSS_CONFIG: GetConfigResponse = {
APP_MODE: "oss",
GITHUB_CLIENT_ID: "123",
POSTHOG_CLIENT_KEY: "456",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
};
const VALID_SAAS_CONFIG: GetConfigResponse = {
APP_MODE: "saas",
GITHUB_CLIENT_ID: "123",
POSTHOG_CLIENT_KEY: "456",
FEATURE_FLAGS: {
ENABLE_BILLING: false,
HIDE_LLM_SETTINGS: false,
},
};
const queryClient = new QueryClient();
const GitSettingsRouterStub = createRoutesStub([
{
Component: GitSettingsScreen,
path: "/settings/github",
},
]);
const renderGitSettingsScreen = () => {
const { rerender, ...rest } = render(
<GitSettingsRouterStub initialEntries={["/settings/github"]} />,
{
wrapper: ({ children }) => (
<QueryClientProvider client={queryClient}>
<AuthProvider>{children}</AuthProvider>
</QueryClientProvider>
),
},
);
const rerenderGitSettingsScreen = () =>
rerender(
<QueryClientProvider client={queryClient}>
<AuthProvider>
<GitSettingsRouterStub initialEntries={["/settings/github"]} />
</AuthProvider>
</QueryClientProvider>,
);
return {
...rest,
rerender: rerenderGitSettingsScreen,
};
};
beforeEach(() => {
// Since we don't recreate the query client on every test, we need to
// reset the query client before each test to avoid state leaks
// between tests.
queryClient.invalidateQueries();
});
describe("Content", () => {
it("should render", async () => {
renderGitSettingsScreen();
await screen.findByTestId("git-settings-screen");
});
it("should render the inputs if OSS mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
const { rerender } = renderGitSettingsScreen();
await screen.findByTestId("github-token-input");
await screen.findByTestId("github-token-help-anchor");
await screen.findByTestId("gitlab-token-input");
await screen.findByTestId("gitlab-token-help-anchor");
getConfigSpy.mockResolvedValue(VALID_SAAS_CONFIG);
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
expect(
screen.queryByTestId("github-token-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("github-token-help-anchor"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("gitlab-token-input"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("gitlab-token-help-anchor"),
).not.toBeInTheDocument();
});
});
it("should set '<hidden>' placeholder and indicator if the GitHub token is set", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
});
const { rerender } = renderGitSettingsScreen();
await waitFor(() => {
const githubInput = screen.getByTestId("github-token-input");
expect(githubInput).toHaveProperty("placeholder", "");
expect(
screen.queryByTestId("gh-set-token-indicator"),
).not.toBeInTheDocument();
const gitlabInput = screen.getByTestId("gitlab-token-input");
expect(gitlabInput).toHaveProperty("placeholder", "");
expect(
screen.queryByTestId("gl-set-token-indicator"),
).not.toBeInTheDocument();
});
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: null,
gitlab: null,
},
});
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
const githubInput = screen.getByTestId("github-token-input");
expect(githubInput).toHaveProperty("placeholder", "<hidden>");
expect(
screen.queryByTestId("gh-set-token-indicator"),
).toBeInTheDocument();
const gitlabInput = screen.getByTestId("gitlab-token-input");
expect(gitlabInput).toHaveProperty("placeholder", "<hidden>");
expect(
screen.queryByTestId("gl-set-token-indicator"),
).toBeInTheDocument();
});
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
gitlab: null,
},
});
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
const githubInput = screen.getByTestId("github-token-input");
expect(githubInput).toHaveProperty("placeholder", "");
expect(
screen.queryByTestId("gh-set-token-indicator"),
).not.toBeInTheDocument();
const gitlabInput = screen.getByTestId("gitlab-token-input");
expect(gitlabInput).toHaveProperty("placeholder", "<hidden>");
expect(
screen.queryByTestId("gl-set-token-indicator"),
).toBeInTheDocument();
});
});
it("should render the 'Configure GitHub Repositories' button if SaaS mode and app slug exists", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
const { rerender } = renderGitSettingsScreen();
let button = screen.queryByTestId("configure-github-repositories-button");
expect(button).not.toBeInTheDocument();
expect(screen.getByTestId("submit-button")).toBeInTheDocument();
expect(screen.getByTestId("disconnect-tokens-button")).toBeInTheDocument();
getConfigSpy.mockResolvedValue(VALID_SAAS_CONFIG);
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
// wait until queries are resolved
expect(queryClient.isFetching()).toBe(0);
button = screen.queryByTestId("configure-github-repositories-button");
expect(button).not.toBeInTheDocument();
});
getConfigSpy.mockResolvedValue({
...VALID_SAAS_CONFIG,
APP_SLUG: "test-slug",
});
queryClient.invalidateQueries();
rerender();
await waitFor(() => {
button = screen.getByTestId("configure-github-repositories-button");
expect(button).toBeInTheDocument();
expect(screen.queryByTestId("submit-button")).not.toBeInTheDocument();
expect(
screen.queryByTestId("disconnect-tokens-button"),
).not.toBeInTheDocument();
});
});
});
describe("Form submission", () => {
it("should save the GitHub token", async () => {
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
const githubInput = await screen.findByTestId("github-token-input");
const submit = await screen.findByTestId("submit-button");
await userEvent.type(githubInput, "test-token");
await userEvent.click(submit);
expect(saveProvidersSpy).toHaveBeenCalledWith({
github: { token: "test-token" },
gitlab: { token: "" },
});
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
await userEvent.click(submit);
expect(saveProvidersSpy).toHaveBeenCalledWith({
github: { token: "test-token" },
gitlab: { token: "" },
});
});
it("should disable the button if there is no input", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
expect(submit).not.toBeDisabled();
await userEvent.clear(githubInput);
expect(submit).toBeDisabled();
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
expect(submit).not.toBeDisabled();
await userEvent.clear(gitlabInput);
expect(submit).toBeDisabled();
});
it("should enable a disconnect tokens button if there is at least one token set", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: null,
},
});
renderGitSettingsScreen();
await screen.findByTestId("git-settings-screen");
let disconnectButton = await screen.findByTestId(
"disconnect-tokens-button",
);
await waitFor(() => expect(disconnectButton).not.toBeDisabled());
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
});
queryClient.invalidateQueries();
disconnectButton = await screen.findByTestId("disconnect-tokens-button");
await waitFor(() => expect(disconnectButton).toBeDisabled());
});
it("should call logout when pressing the disconnect tokens button", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
const logoutSpy = vi.spyOn(OpenHands, "logout");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
provider_tokens_set: {
github: null,
},
});
renderGitSettingsScreen();
const disconnectButton = await screen.findByTestId(
"disconnect-tokens-button",
);
await waitFor(() => expect(disconnectButton).not.toBeDisabled());
await userEvent.click(disconnectButton);
expect(logoutSpy).toHaveBeenCalled();
});
// flaky test
it.skip("should disable the button when submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(SecretsService, "addGitProvider");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(submit).toHaveTextContent("Saving...");
expect(submit).toBeDisabled();
await waitFor(() => expect(submit).toHaveTextContent("Save"));
});
it("should disable the button after submitting changes", async () => {
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
renderGitSettingsScreen();
await screen.findByTestId("git-settings-screen");
const submit = await screen.findByTestId("submit-button");
expect(submit).toBeDisabled();
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveProvidersSpy).toHaveBeenCalled();
expect(submit).toBeDisabled();
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
expect(gitlabInput).toHaveValue("test-token");
expect(submit).not.toBeDisabled();
// submit the form
await userEvent.click(submit);
expect(saveProvidersSpy).toHaveBeenCalled();
await waitFor(() => expect(submit).toBeDisabled());
});
});
describe("Status toasts", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderGitSettingsScreen();
// Toggle setting to change
const githubInput = await screen.findByTestId("github-token-input");
await userEvent.type(githubInput, "test-token");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveProvidersSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveProvidersSpy.mockRejectedValue(new Error("Failed to save settings"));
renderGitSettingsScreen();
// Toggle setting to change
const gitlabInput = await screen.findByTestId("gitlab-token-input");
await userEvent.type(gitlabInput, "test-token");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveProvidersSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});
+34 -16
View File
@@ -4,13 +4,31 @@ import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import userEvent from "@testing-library/user-event";
import { createRoutesStub } from "react-router";
import { Provider } from "react-redux";
import { createAxiosNotFoundErrorObject, setupStore } from "test-utils";
import { setupStore } from "test-utils";
import { AxiosError } from "axios";
import HomeScreen from "#/routes/home";
import { AuthProvider } from "#/context/auth-context";
import * as GitService from "#/api/git";
import { GitRepository } from "#/types/git";
import OpenHands from "#/api/open-hands";
import MainApp from "#/routes/root-layout";
const createAxiosNotFoundErrorObject = () =>
new AxiosError(
"Request failed with status code 404",
"ERR_BAD_REQUEST",
undefined,
undefined,
{
status: 404,
statusText: "Not Found",
data: { message: "Settings not found" },
headers: {},
// @ts-expect-error - we only need the response object for this test
config: {},
},
);
const RouterStub = createRoutesStub([
{
Component: MainApp,
@@ -73,21 +91,15 @@ describe("HomeScreen", () => {
screen.getByTestId("task-suggestions");
});
it("should have responsive layout for mobile and desktop screens", async () => {
renderHomeScreen();
const mainContainer = screen
.getByTestId("home-screen")
.querySelector("main");
expect(mainContainer).toHaveClass("flex", "flex-col", "md:flex-row");
});
it("should filter the suggested tasks based on the selected repository", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderHomeScreen();
@@ -119,10 +131,13 @@ describe("HomeScreen", () => {
it("should reset the filtered tasks when the selected repository is cleared", async () => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
renderHomeScreen();
@@ -194,10 +209,13 @@ describe("HomeScreen", () => {
beforeEach(() => {
const retrieveUserGitRepositoriesSpy = vi.spyOn(
OpenHands,
GitService,
"retrieveUserGitRepositories",
);
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
retrieveUserGitRepositoriesSpy.mockResolvedValue({
data: MOCK_RESPOSITORIES,
nextPage: null,
});
});
it("should disable the other launch buttons when the header launch button is clicked", async () => {
@@ -1,715 +0,0 @@
import { render, screen, waitFor, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
import LlmSettingsScreen from "#/routes/llm-settings";
import OpenHands from "#/api/open-hands";
import {
MOCK_DEFAULT_USER_SETTINGS,
resetTestHandlersMockSettings,
} from "#/mocks/handlers";
import { AuthProvider } from "#/context/auth-context";
import * as AdvancedSettingsUtlls from "#/utils/has-advanced-settings-set";
import * as ToastHandlers from "#/utils/custom-toast-handlers";
const renderLlmSettingsScreen = () =>
render(<LlmSettingsScreen />, {
wrapper: ({ children }) => (
<QueryClientProvider client={new QueryClient()}>
<AuthProvider>{children}</AuthProvider>
</QueryClientProvider>
),
});
beforeEach(() => {
vi.resetAllMocks();
resetTestHandlersMockSettings();
});
describe("Content", () => {
describe("Basic form", () => {
it("should render the basic form by default", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const basicFom = screen.getByTestId("llm-settings-form-basic");
within(basicFom).getByTestId("llm-provider-input");
within(basicFom).getByTestId("llm-model-input");
within(basicFom).getByTestId("llm-api-key-input");
within(basicFom).getByTestId("llm-api-key-help-anchor");
});
it("should render the default values if non exist", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
await waitFor(() => {
expect(provider).toHaveValue("Anthropic");
expect(model).toHaveValue("claude-3-5-sonnet-20241022");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "");
});
});
it("should render the existing settings values", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_api_key_set: true,
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
await waitFor(() => {
expect(provider).toHaveValue("OpenAI");
expect(model).toHaveValue("gpt-4o");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "<hidden>");
expect(screen.getByTestId("set-indicator")).toBeInTheDocument();
});
});
});
describe("Advanced form", () => {
it("should render the advanced form if the switch is toggled", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
const basicForm = screen.getByTestId("llm-settings-form-basic");
expect(
screen.queryByTestId("llm-settings-form-advanced"),
).not.toBeInTheDocument();
expect(basicForm).toBeInTheDocument();
await userEvent.click(advancedSwitch);
expect(
screen.queryByTestId("llm-settings-form-advanced"),
).toBeInTheDocument();
expect(basicForm).not.toBeInTheDocument();
const advancedForm = screen.getByTestId("llm-settings-form-advanced");
within(advancedForm).getByTestId("llm-custom-model-input");
within(advancedForm).getByTestId("base-url-input");
within(advancedForm).getByTestId("llm-api-key-input");
within(advancedForm).getByTestId("llm-api-key-help-anchor");
within(advancedForm).getByTestId("agent-input");
within(advancedForm).getByTestId("enable-confirmation-mode-switch");
within(advancedForm).getByTestId("enable-memory-condenser-switch");
await userEvent.click(advancedSwitch);
expect(
screen.queryByTestId("llm-settings-form-advanced"),
).not.toBeInTheDocument();
expect(screen.getByTestId("llm-settings-form-basic")).toBeInTheDocument();
});
it("should render the default advanced settings", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
expect(advancedSwitch).not.toBeChecked();
await userEvent.click(advancedSwitch);
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId(
"enable-confirmation-mode-switch",
);
const condensor = screen.getByTestId("enable-memory-condenser-switch");
expect(model).toHaveValue("anthropic/claude-3-5-sonnet-20241022");
expect(baseUrl).toHaveValue("");
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "");
expect(agent).toHaveValue("CodeActAgent");
expect(confirmation).not.toBeChecked();
expect(condensor).toBeChecked();
// check that security analyzer is present
expect(
screen.queryByTestId("security-analyzer-input"),
).not.toBeInTheDocument();
await userEvent.click(confirmation);
screen.getByTestId("security-analyzer-input");
});
it("should render the advanced form if existings settings are advanced", async () => {
const hasAdvancedSettingsSetSpy = vi.spyOn(
AdvancedSettingsUtlls,
"hasAdvancedSettingsSet",
);
hasAdvancedSettingsSetSpy.mockReturnValue(true);
renderLlmSettingsScreen();
await waitFor(() => {
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
expect(advancedSwitch).toBeChecked();
screen.getByTestId("llm-settings-form-advanced");
});
});
it("should render existing advanced settings correctly", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
llm_api_key_set: true,
agent: "CoActAgent",
confirmation_mode: true,
enable_default_condenser: false,
security_analyzer: "mock-invariant",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId(
"enable-confirmation-mode-switch",
);
const condensor = screen.getByTestId("enable-memory-condenser-switch");
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
await waitFor(() => {
expect(model).toHaveValue("openai/gpt-4o");
expect(baseUrl).toHaveValue(
"https://api.openai.com/v1/chat/completions",
);
expect(apiKey).toHaveValue("");
expect(apiKey).toHaveProperty("placeholder", "<hidden>");
expect(agent).toHaveValue("CoActAgent");
expect(confirmation).toBeChecked();
expect(condensor).not.toBeChecked();
expect(securityAnalyzer).toHaveValue("mock-invariant");
});
});
});
it.todo("should render an indicator if the llm api key is set");
});
describe("Form submission", () => {
it("should submit the basic form with the correct values", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
// select provider
await userEvent.click(provider);
const providerOption = screen.getByText("OpenAI");
await userEvent.click(providerOption);
expect(provider).toHaveValue("OpenAI");
// enter api key
await userEvent.type(apiKey, "test-api-key");
// select model
await userEvent.click(model);
const modelOption = screen.getByText("gpt-4o");
await userEvent.click(modelOption);
expect(model).toHaveValue("gpt-4o");
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_model: "openai/gpt-4o",
llm_api_key: "test-api-key",
}),
);
});
it("should submit the advanced form with the correct values", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId("enable-confirmation-mode-switch");
const condensor = screen.getByTestId("enable-memory-condenser-switch");
// enter custom model
await userEvent.clear(model);
await userEvent.type(model, "openai/gpt-4o");
expect(model).toHaveValue("openai/gpt-4o");
// enter base url
await userEvent.type(baseUrl, "https://api.openai.com/v1/chat/completions");
expect(baseUrl).toHaveValue("https://api.openai.com/v1/chat/completions");
// enter api key
await userEvent.type(apiKey, "test-api-key");
// toggle confirmation mode
await userEvent.click(confirmation);
expect(confirmation).toBeChecked();
// toggle memory condensor
await userEvent.click(condensor);
expect(condensor).not.toBeChecked();
// select agent
await userEvent.click(agent);
const agentOption = screen.getByText("CoActAgent");
await userEvent.click(agentOption);
expect(agent).toHaveValue("CoActAgent");
// select security analyzer
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
await userEvent.click(securityAnalyzer);
const securityAnalyzerOption = screen.getByText("mock-invariant");
await userEvent.click(securityAnalyzerOption);
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
agent: "CoActAgent",
confirmation_mode: true,
enable_default_condenser: false,
security_analyzer: "mock-invariant",
}),
);
});
it("should disable the button if there are no changes in the basic form", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_api_key_set: true,
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
screen.getByTestId("llm-settings-form-basic");
const submitButton = screen.getByTestId("submit-button");
expect(submitButton).toBeDisabled();
const model = screen.getByTestId("llm-model-input");
const apiKey = screen.getByTestId("llm-api-key-input");
// select model
await userEvent.click(model);
const modelOption = screen.getByText("gpt-4o-mini");
await userEvent.click(modelOption);
expect(model).toHaveValue("gpt-4o-mini");
expect(submitButton).not.toBeDisabled();
// reset model
await userEvent.click(model);
const modelOption2 = screen.getByText("gpt-4o");
await userEvent.click(modelOption2);
expect(model).toHaveValue("gpt-4o");
expect(submitButton).toBeDisabled();
// set api key
await userEvent.type(apiKey, "test-api-key");
expect(apiKey).toHaveValue("test-api-key");
expect(submitButton).not.toBeDisabled();
// reset api key
await userEvent.clear(apiKey);
expect(apiKey).toHaveValue("");
expect(submitButton).toBeDisabled();
});
it("should disable the button if there are no changes in the advanced form", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
llm_api_key_set: true,
confirmation_mode: true,
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
screen.getByTestId("llm-settings-form-advanced");
const submitButton = screen.getByTestId("submit-button");
expect(submitButton).toBeDisabled();
const model = screen.getByTestId("llm-custom-model-input");
const baseUrl = screen.getByTestId("base-url-input");
const apiKey = screen.getByTestId("llm-api-key-input");
const agent = screen.getByTestId("agent-input");
const confirmation = screen.getByTestId("enable-confirmation-mode-switch");
const condensor = screen.getByTestId("enable-memory-condenser-switch");
// enter custom model
await userEvent.type(model, "-mini");
expect(model).toHaveValue("openai/gpt-4o-mini");
expect(submitButton).not.toBeDisabled();
// reset model
await userEvent.clear(model);
expect(model).toHaveValue("");
expect(submitButton).toBeDisabled();
await userEvent.type(model, "openai/gpt-4o");
expect(model).toHaveValue("openai/gpt-4o");
expect(submitButton).toBeDisabled();
// enter base url
await userEvent.type(baseUrl, "/extra");
expect(baseUrl).toHaveValue(
"https://api.openai.com/v1/chat/completions/extra",
);
expect(submitButton).not.toBeDisabled();
await userEvent.clear(baseUrl);
expect(baseUrl).toHaveValue("");
expect(submitButton).not.toBeDisabled();
await userEvent.type(baseUrl, "https://api.openai.com/v1/chat/completions");
expect(baseUrl).toHaveValue("https://api.openai.com/v1/chat/completions");
expect(submitButton).toBeDisabled();
// set api key
await userEvent.type(apiKey, "test-api-key");
expect(apiKey).toHaveValue("test-api-key");
expect(submitButton).not.toBeDisabled();
// reset api key
await userEvent.clear(apiKey);
expect(apiKey).toHaveValue("");
expect(submitButton).toBeDisabled();
// set agent
await userEvent.clear(agent);
await userEvent.type(agent, "test-agent");
expect(agent).toHaveValue("test-agent");
expect(submitButton).not.toBeDisabled();
// reset agent
await userEvent.clear(agent);
expect(agent).toHaveValue("");
expect(submitButton).toBeDisabled();
await userEvent.type(agent, "CodeActAgent");
expect(agent).toHaveValue("CodeActAgent");
expect(submitButton).toBeDisabled();
// toggle confirmation mode
await userEvent.click(confirmation);
expect(confirmation).not.toBeChecked();
expect(submitButton).not.toBeDisabled();
await userEvent.click(confirmation);
expect(confirmation).toBeChecked();
expect(submitButton).toBeDisabled();
// toggle memory condensor
await userEvent.click(condensor);
expect(condensor).not.toBeChecked();
expect(submitButton).not.toBeDisabled();
await userEvent.click(condensor);
expect(condensor).toBeChecked();
expect(submitButton).toBeDisabled();
// select security analyzer
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
await userEvent.click(securityAnalyzer);
const securityAnalyzerOption = screen.getByText("mock-invariant");
await userEvent.click(securityAnalyzerOption);
expect(securityAnalyzer).toHaveValue("mock-invariant");
expect(submitButton).not.toBeDisabled();
await userEvent.clear(securityAnalyzer);
expect(securityAnalyzer).toHaveValue("");
expect(submitButton).toBeDisabled();
});
it("should reset button state when switching between forms", async () => {
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
const submitButton = screen.getByTestId("submit-button");
expect(submitButton).toBeDisabled();
// dirty the basic form
const apiKey = screen.getByTestId("llm-api-key-input");
await userEvent.type(apiKey, "test-api-key");
expect(submitButton).not.toBeDisabled();
await userEvent.click(advancedSwitch);
expect(submitButton).toBeDisabled();
// dirty the advanced form
const model = screen.getByTestId("llm-custom-model-input");
await userEvent.type(model, "openai/gpt-4o");
expect(submitButton).not.toBeDisabled();
await userEvent.click(advancedSwitch);
expect(submitButton).toBeDisabled();
});
// flaky test
it.skip("should disable the button when submitting changes", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const apiKey = screen.getByTestId("llm-api-key-input");
await userEvent.type(apiKey, "test-api-key");
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_api_key: "test-api-key",
}),
);
expect(submitButton).toHaveTextContent("Saving...");
expect(submitButton).toBeDisabled();
await waitFor(() => {
expect(submitButton).toHaveTextContent("Save");
expect(submitButton).toBeDisabled();
});
});
it("should clear advanced settings when saving basic settings", async () => {
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
getSettingsSpy.mockResolvedValue({
...MOCK_DEFAULT_USER_SETTINGS,
llm_model: "openai/gpt-4o",
llm_base_url: "https://api.openai.com/v1/chat/completions",
llm_api_key_set: true,
confirmation_mode: true,
});
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
const provider = screen.getByTestId("llm-provider-input");
const model = screen.getByTestId("llm-model-input");
// select provider
await userEvent.click(provider);
const providerOption = screen.getByText("Anthropic");
await userEvent.click(providerOption);
// select model
await userEvent.click(model);
const modelOption = screen.getByText("claude-3-5-sonnet-20241022");
await userEvent.click(modelOption);
const submitButton = screen.getByTestId("submit-button");
await userEvent.click(submitButton);
expect(saveSettingsSpy).toHaveBeenCalledWith(
expect.objectContaining({
llm_model: "anthropic/claude-3-5-sonnet-20241022",
llm_base_url: "",
confirmation_mode: false,
}),
);
});
});
describe("Status toasts", () => {
describe("Basic form", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderLlmSettingsScreen();
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderLlmSettingsScreen();
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});
describe("Advanced form", () => {
it("should call displaySuccessToast when the settings are saved", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displaySuccessToastSpy = vi.spyOn(
ToastHandlers,
"displaySuccessToast",
);
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
});
it("should call displayErrorToast when the settings fail to save", async () => {
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
// Toggle setting to change
const apiKeyInput = await screen.findByTestId("llm-api-key-input");
await userEvent.type(apiKeyInput, "test-api-key");
const submit = await screen.findByTestId("submit-button");
await userEvent.click(submit);
expect(saveSettingsSpy).toHaveBeenCalled();
expect(displayErrorToastSpy).toHaveBeenCalled();
});
});
});
describe("SaaS mode", () => {
it("should not render the runtime settings input in oss mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return mode
getConfigSpy.mockResolvedValue({
APP_MODE: "oss",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
const runtimeSettingsInput = screen.queryByTestId("runtime-settings-input");
expect(runtimeSettingsInput).not.toBeInTheDocument();
});
it("should render the runtime settings input in saas mode", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return mode
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
const runtimeSettingsInput = screen.queryByTestId("runtime-settings-input");
expect(runtimeSettingsInput).toBeInTheDocument();
});
it("should always render the runtime settings input as disabled", async () => {
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
// @ts-expect-error - only return mode
getConfigSpy.mockResolvedValue({
APP_MODE: "saas",
});
renderLlmSettingsScreen();
await screen.findByTestId("llm-settings-screen");
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
await userEvent.click(advancedSwitch);
await screen.findByTestId("llm-settings-form-advanced");
const runtimeSettingsInput = screen.queryByTestId("runtime-settings-input");
expect(runtimeSettingsInput).toBeInTheDocument();
expect(runtimeSettingsInput).toBeDisabled();
});
});

Some files were not shown because too many files have changed in this diff Show More