Compare commits
8 Commits
0.37.0
...
gitlab-doc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bc1305fc0e | ||
|
|
f985a46cfd | ||
|
|
9ca96afe29 | ||
|
|
7f3f44432e | ||
|
|
35b6b8ae2f | ||
|
|
52110305b3 | ||
|
|
877644be8c | ||
|
|
3bc85eb7ac |
6
.github/pull_request_template.md
vendored
@@ -1,12 +1,12 @@
|
||||
- [ ] This change is worth documenting at https://docs.all-hands.dev/
|
||||
- [ ] Include this change in the Release Notes. If checked, you **must** provide an **end-user friendly** description for your change below
|
||||
|
||||
**End-user friendly description of the problem this fixes or functionality this introduces.**
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces.**
|
||||
|
||||
|
||||
---
|
||||
**Summarize what the PR does, explaining any non-trivial design decisions.**
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses:**
|
||||
**Link of any specific issues this addresses.**
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "Running OpenHands pre-commit hook..."
|
||||
|
||||
# Store the exit code to return at the end
|
||||
# This allows us to be additive to existing pre-commit hooks
|
||||
EXIT_CODE=0
|
||||
|
||||
# Check if frontend directory has changed
|
||||
frontend_changes=$(git diff --cached --name-only | grep "^frontend/")
|
||||
if [ -n "$frontend_changes" ]; then
|
||||
echo "Frontend changes detected. Running frontend checks..."
|
||||
|
||||
# Check if frontend directory exists
|
||||
if [ -d "frontend" ]; then
|
||||
# Change to frontend directory
|
||||
cd frontend || exit 1
|
||||
|
||||
# Run lint:fix
|
||||
echo "Running npm lint:fix..."
|
||||
npm run lint:fix
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Frontend linting failed. Please fix the issues before committing."
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
# Run build
|
||||
echo "Running npm build..."
|
||||
npm run build
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Frontend build failed. Please fix the issues before committing."
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
# Run tests
|
||||
echo "Running npm test..."
|
||||
npm test
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Frontend tests failed. Please fix the failing tests before committing."
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
|
||||
# Return to the original directory
|
||||
cd ..
|
||||
|
||||
if [ $EXIT_CODE -eq 0 ]; then
|
||||
echo "Frontend checks passed!"
|
||||
fi
|
||||
else
|
||||
echo "Frontend directory not found. Skipping frontend checks."
|
||||
fi
|
||||
else
|
||||
echo "No frontend changes detected. Skipping frontend checks."
|
||||
fi
|
||||
|
||||
# Run any existing pre-commit hooks that might have been installed by the user
|
||||
# This makes our hook additive rather than replacing existing hooks
|
||||
if [ -f ".git/hooks/pre-commit.local" ]; then
|
||||
echo "Running existing pre-commit hooks..."
|
||||
bash .git/hooks/pre-commit.local
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Existing pre-commit hooks failed."
|
||||
EXIT_CODE=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $EXIT_CODE -eq 0 ]; then
|
||||
echo "All pre-commit checks passed!"
|
||||
else
|
||||
echo "Some pre-commit checks failed. Please fix the issues before committing."
|
||||
fi
|
||||
|
||||
exit $EXIT_CODE
|
||||
@@ -2,11 +2,4 @@
|
||||
|
||||
echo "Setting up the environment..."
|
||||
|
||||
# Install pre-commit package
|
||||
python -m pip install pre-commit
|
||||
|
||||
# Install pre-commit hooks if .git directory exists
|
||||
if [ -d ".git" ]; then
|
||||
echo "Installing pre-commit hooks..."
|
||||
pre-commit install
|
||||
fi
|
||||
|
||||
@@ -121,7 +121,7 @@ These Slack and Discord etiquette guidelines are designed to foster an inclusive
|
||||
- Use threads for specific discussions to keep channels organized and easier to follow.
|
||||
- Tag others only when their input is critical or urgent, and use @here, @channel or @everyone sparingly to minimize disruptions.
|
||||
- Be patient, as open-source contributors and maintainers often have other commitments and may need time to respond.
|
||||
- Post questions or discussions in the most relevant channel (e.g., for [slack - #general](https://openhands-ai.slack.com/archives/C06P5NCGSFP) for general topics, [slack - #questions](https://openhands-ai.slack.com/archives/C06U8UTKSAD) for queries/questions, [discord - #general](https://discord.com/channels/1222935860639563850/1222935861386018885)).
|
||||
- Post questions or discussions in the most relevant channel (e.g., for [slack - #general](https://app.slack.com/client/T06P212QSEA/C06P5NCGSFP) for general topics, [slack - #questions](https://openhands-ai.slack.com/archives/C06U8UTKSAD) for queries/questions, [discord - #general](https://discord.com/channels/1222935860639563850/1222935861386018885)).
|
||||
- When asking for help or raising issues, include necessary details like links, screenshots, or clear explanations to provide context.
|
||||
- Keep discussions in public channels whenever possible to allow others to benefit from the conversation, unless the matter is sensitive or private.
|
||||
- Always adhere to [our standards](https://github.com/All-Hands-AI/OpenHands/blob/main/CODE_OF_CONDUCT.md#our-standards) to ensure a welcoming and collaborative environment.
|
||||
|
||||
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.37-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.34-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
20
README.md
@@ -9,9 +9,10 @@
|
||||
<div align="center">
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers"></a>
|
||||
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License"></a>
|
||||
<br/>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
|
||||
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits"></a>
|
||||
<br/>
|
||||
@@ -51,23 +52,23 @@ system requirements and more information.
|
||||
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
When you open the application, you'll be asked to choose an LLM provider and add an API key.
|
||||
[Anthropic's Claude 3.7 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-7-sonnet-20250219`)
|
||||
[Anthropic's Claude 3.5 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-5-sonnet-20241022`)
|
||||
works best, but you have [many options](https://docs.all-hands.dev/modules/usage/llms).
|
||||
|
||||
## 💡 Other ways to run OpenHands
|
||||
@@ -99,19 +100,12 @@ check out our [documentation](https://docs.all-hands.dev/modules/usage/getting-s
|
||||
There you'll find resources on how to use different LLM providers,
|
||||
troubleshooting resources, and advanced configuration options.
|
||||
|
||||
### Custom Scripts
|
||||
|
||||
OpenHands supports custom scripts that run at different points in the runtime lifecycle:
|
||||
|
||||
- **setup.sh**: Place this script in the `.openhands` directory of your repository to run custom setup commands when the runtime initializes.
|
||||
- **pre-commit.sh**: Place this script in the `.openhands` directory to add a custom git pre-commit hook that runs before each commit. This can be used to enforce code quality standards, run tests, or perform other checks before allowing commits.
|
||||
|
||||
## 🤝 How to Join the Community
|
||||
|
||||
OpenHands is a community-driven project, and we welcome contributions from everyone. We do most of our communication
|
||||
through Slack, so this is the best place to start, but we also are happy to have you contact us on Discord or Github:
|
||||
|
||||
- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A) - Here we talk about research, architecture, and future development.
|
||||
- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw) - Here we talk about research, architecture, and future development.
|
||||
- [Join our Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback.
|
||||
- [Read or post Github Issues](https://github.com/All-Hands-AI/OpenHands/issues) - Check out the issues we're working on, or add your own ideas.
|
||||
|
||||
|
||||
@@ -316,10 +316,6 @@ llm_config = 'gpt3'
|
||||
# Additional Docker runtime kwargs
|
||||
#docker_runtime_kwargs = {}
|
||||
|
||||
# Specific port to use for VSCode. If not set, a random port will be chosen.
|
||||
# Useful when deploying OpenHands in a remote machine where you need to expose a specific port.
|
||||
#vscode_port = 41234
|
||||
|
||||
#################################### Security ###################################
|
||||
# Configuration for security features
|
||||
##############################################################################
|
||||
@@ -395,7 +391,7 @@ type = "noop"
|
||||
#[llm.condenser]
|
||||
#model = "gpt-4o"
|
||||
#temperature = 0.1
|
||||
#max_input_tokens = 1024
|
||||
#max_tokens = 1024
|
||||
|
||||
#################################### Eval ####################################
|
||||
# Configuration for the evaluation, please refer to the specific evaluation
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.37-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.34-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -26,7 +26,7 @@ repos:
|
||||
- id: ruff
|
||||
entry: ruff check --config dev_config/python/ruff.toml
|
||||
types_or: [python, pyi, jupyter]
|
||||
args: [--fix, --unsafe-fixes]
|
||||
args: [--fix]
|
||||
# Run the formatter.
|
||||
- id: ruff-format
|
||||
entry: ruff format --config dev_config/python/ruff.toml
|
||||
|
||||
@@ -7,9 +7,6 @@ select = [
|
||||
"Q",
|
||||
"B",
|
||||
"ASYNC",
|
||||
"UP006", # Use `list` instead of `List` for annotations
|
||||
"UP007", # Use `X | Y` instead of `Union[X, Y]`
|
||||
"UP008", # Use `X | None` instead of `Optional[X]`
|
||||
]
|
||||
|
||||
ignore = [
|
||||
|
||||
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -36,14 +36,7 @@ const config: Config = {
|
||||
mermaid: true,
|
||||
},
|
||||
themes: ['@docusaurus/theme-mermaid'],
|
||||
plugins: [
|
||||
[
|
||||
require.resolve('docusaurus-lunr-search'),
|
||||
{
|
||||
languages: ['en', 'zh', 'fr', 'ja', 'pt']
|
||||
}
|
||||
]
|
||||
],
|
||||
plugins: [],
|
||||
presets: [
|
||||
[
|
||||
'classic',
|
||||
@@ -92,10 +85,6 @@ const config: Config = {
|
||||
type: 'localeDropdown',
|
||||
position: 'left',
|
||||
},
|
||||
{
|
||||
type: 'search',
|
||||
position: 'left',
|
||||
},
|
||||
{
|
||||
href: 'https://all-hands.dev',
|
||||
label: 'Company',
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -42,7 +42,7 @@ Explorez le code source d'OpenHands sur [GitHub](https://github.com/All-Hands-AI
|
||||
/>
|
||||
</a>
|
||||
<br></br>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A">
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw">
|
||||
<img
|
||||
src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
|
||||
alt="Join our Slack community"
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -44,7 +44,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -42,7 +42,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ OpenHandsのソースコードを[GitHub](https://github.com/All-Hands-AI/OpenHa
|
||||
/>
|
||||
</a>
|
||||
<br></br>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A">
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw">
|
||||
<img
|
||||
src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
|
||||
alt="Slackコミュニティに参加"
|
||||
|
||||
@@ -13,7 +13,7 @@ OpenHandsがリポジトリで動作する際:
|
||||
|
||||
1. リポジトリに`.openhands/microagents/`が存在する場合、そこからリポジトリ固有の指示を読み込みます。
|
||||
2. 会話のキーワードによってトリガーされる一般的なガイドラインを読み込みます。
|
||||
現在の[パブリックMicroagents](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)を参照してください。
|
||||
現在の[パブリックMicroagents](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)を参照してください。
|
||||
|
||||
## Microagentのフォーマット
|
||||
|
||||
|
||||
@@ -88,4 +88,4 @@ triggers:
|
||||
- ビルド時間とイメージサイズを最適化
|
||||
```
|
||||
|
||||
より多くの例については、[現在のパブリックマイクロエージェント](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)をご覧ください。
|
||||
より多くの例については、[現在のパブリックマイクロエージェント](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)をご覧ください。
|
||||
|
||||
@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー
|
||||
|
||||
```bash
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-v $WORKSPACE_BASE:/opt/workspace_base \
|
||||
@@ -82,5 +82,5 @@ docker network create openhands-network
|
||||
# 分離されたネットワークで OpenHands を実行
|
||||
docker run # ... \
|
||||
--network openhands-network \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "escreva um script bash que imprima oi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@
|
||||
A maneira mais fácil de executar o OpenHands é no Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
Você encontrará o OpenHands em execução em http://localhost:3000!
|
||||
|
||||
@@ -13,7 +13,7 @@ Quando o OpenHands trabalha com um repositório, ele:
|
||||
|
||||
1. Carrega instruções específicas do repositório de `.openhands/microagents/`, se presentes no repositório.
|
||||
2. Carrega diretrizes gerais acionadas por palavras-chave nas conversas.
|
||||
Veja os [Microagentes Públicos](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents) atuais.
|
||||
Veja os [Microagentes Públicos](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) atuais.
|
||||
|
||||
## Formato do Microagente
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
Microagentes públicos são diretrizes especializadas acionadas por palavras-chave para todos os usuários do OpenHands.
|
||||
Eles são definidos em arquivos markdown no diretório
|
||||
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents).
|
||||
[`microagents/`](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge).
|
||||
|
||||
Microagentes públicos:
|
||||
- Monitoram comandos recebidos em busca de suas palavras-chave de acionamento.
|
||||
@@ -149,5 +149,5 @@ Lembre-se de:
|
||||
- Otimizar para tempo de build e tamanho da imagem
|
||||
```
|
||||
|
||||
Veja os [microagentes públicos atuais](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents) para
|
||||
Veja os [microagentes públicos atuais](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge) para
|
||||
mais exemplos.
|
||||
|
||||
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -42,7 +42,7 @@ OpenHands 是一个**自主 AI 软件工程师**,能够执行复杂的工程
|
||||
/>
|
||||
</a>
|
||||
<br></br>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A">
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw">
|
||||
<img
|
||||
src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge"
|
||||
alt="Join our Slack community"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"label": "OpenHands Cloud",
|
||||
"position": 9,
|
||||
"link": {
|
||||
"type": "generated-index",
|
||||
"description": "Documentation for OpenHands Cloud features and services."
|
||||
}
|
||||
}
|
||||
@@ -1,177 +0,0 @@
|
||||
# OpenHands Cloud API
|
||||
|
||||
OpenHands Cloud provides a REST API that allows you to programmatically interact with the service. This is useful if you easily want to kick off your own jobs from your programs in a flexible way.
|
||||
|
||||
This guide explains how to obtain an API key and use the API to start conversations.
|
||||
For more detailed information about the API, refer to the [OpenHands API Reference](https://docs.all-hands.dev/swagger-ui/).
|
||||
|
||||
## Obtaining an API Key
|
||||
|
||||
To use the OpenHands Cloud API, you'll need to generate an API key:
|
||||
|
||||
1. Log in to your [OpenHands Cloud](https://app.all-hands.dev) account
|
||||
2. Navigate to the [Settings page](https://app.all-hands.dev/settings)
|
||||
3. Locate the "API Keys" section
|
||||
4. Click "Generate New Key"
|
||||
5. Give your key a descriptive name (e.g., "Development", "Production")
|
||||
6. Copy the generated API key and store it securely - it will only be shown once
|
||||
|
||||

|
||||
|
||||
## API Usage
|
||||
|
||||
### Starting a New Conversation
|
||||
|
||||
To start a new conversation with OpenHands performing a task, you'll need to make a POST request to the conversation endpoint.
|
||||
|
||||
#### Request Parameters
|
||||
|
||||
| Parameter | Type | Required | Description |
|
||||
|-----------|------|----------|-------------|
|
||||
| `initial_user_msg` | string | Yes | The initial message to start the conversation |
|
||||
| `repository` | string | No | Git repository name to provide context in the format `owner/repo`. You must have access to the repo. |
|
||||
|
||||
#### Examples
|
||||
|
||||
<details>
|
||||
<summary>cURL</summary>
|
||||
|
||||
```bash
|
||||
curl -X POST "https://app.all-hands.dev/api/conversations" \
|
||||
-H "Authorization: Bearer YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"initial_user_msg": "Check whether there is any incorrect information in the README.md file and send a PR to fix it if so.",
|
||||
"repository": "yourusername/your-repo"
|
||||
}'
|
||||
```
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Python (with requests)</summary>
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
api_key = "YOUR_API_KEY"
|
||||
url = "https://app.all-hands.dev/api/conversations"
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
data = {
|
||||
"initial_user_msg": "Check whether there is any incorrect information in the README.md file and send a PR to fix it if so.",
|
||||
"repository": "yourusername/your-repo"
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
conversation = response.json()
|
||||
|
||||
print(f"Conversation Link: https://app.all-hands.dev/conversations/{conversation['id']}")
|
||||
print(f"Status: {conversation['status']}")
|
||||
```
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>TypeScript/JavaScript (with fetch)</summary>
|
||||
|
||||
```typescript
|
||||
const apiKey = "YOUR_API_KEY";
|
||||
const url = "https://app.all-hands.dev/api/conversations";
|
||||
|
||||
const headers = {
|
||||
"Authorization": `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json"
|
||||
};
|
||||
|
||||
const data = {
|
||||
initial_user_msg: "Check whether there is any incorrect information in the README.md file and send a PR to fix it if so.",
|
||||
repository: "yourusername/your-repo"
|
||||
};
|
||||
|
||||
async function startConversation() {
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: headers,
|
||||
body: JSON.stringify(data)
|
||||
});
|
||||
|
||||
const conversation = await response.json();
|
||||
|
||||
console.log(`Conversation Link: https://app.all-hands.dev/conversations/${conversation.id}`);
|
||||
console.log(`Status: ${conversation.status}`);
|
||||
|
||||
return conversation;
|
||||
} catch (error) {
|
||||
console.error("Error starting conversation:", error);
|
||||
}
|
||||
}
|
||||
|
||||
startConversation();
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
#### Response
|
||||
|
||||
The API will return a JSON object with details about the created conversation:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"conversation_id": "abc1234",
|
||||
}
|
||||
```
|
||||
|
||||
You may also receive an `AuthenticationError` if:
|
||||
|
||||
1. You provided an invalid API key
|
||||
2. You provided the wrong repo name
|
||||
3. You don't have access to the repo
|
||||
|
||||
|
||||
### Retrieving Conversation Status
|
||||
|
||||
You can check the status of a conversation by making a GET request to the conversation endpoint.
|
||||
|
||||
#### Endpoint
|
||||
|
||||
```
|
||||
GET https://app.all-hands.dev/api/conversations/{conversation_id}
|
||||
```
|
||||
|
||||
#### Example
|
||||
|
||||
<details>
|
||||
<summary>cURL</summary>
|
||||
|
||||
```bash
|
||||
curl -X GET "https://app.all-hands.dev/api/conversations/{conversation_id}" \
|
||||
-H "Authorization: Bearer YOUR_API_KEY"
|
||||
```
|
||||
</details>
|
||||
|
||||
#### Response
|
||||
|
||||
The response is formatted as follows:
|
||||
|
||||
```json
|
||||
{
|
||||
"conversation_id":"abc1234",
|
||||
"title":"Update README.md",
|
||||
"created_at":"2025-04-29T15:13:51.370706Z",
|
||||
"last_updated_at":"2025-04-29T15:13:57.199210Z",
|
||||
"status":"RUNNING",
|
||||
"selected_repository":"yourusername/your-repo",
|
||||
"trigger":"gui"
|
||||
}
|
||||
```
|
||||
|
||||
## Rate Limits
|
||||
|
||||
The API has a limit of 10 simultaneous conversations per account. If you need a higher limit for your use case, please contact us at [contact@all-hands.dev](mailto:contact@all-hands.dev).
|
||||
|
||||
If you exceed this limit, the API will return a 429 Too Many Requests response.
|
||||
@@ -6,8 +6,6 @@ OpenHands Cloud is the cloud hosted version of OpenHands by All Hands AI.
|
||||
|
||||
OpenHands Cloud can be accessed at https://app.all-hands.dev/.
|
||||
|
||||
You can also interact with OpenHands Cloud programmatically using the [API](./cloud-api).
|
||||
|
||||
## Getting Started
|
||||
|
||||
After visiting OpenHands Cloud, you will be asked to connect with your GitHub or GitLab account:
|
||||
|
||||
@@ -12,7 +12,7 @@ To start an interactive OpenHands session via the command line:
|
||||
2. Run the following command:
|
||||
|
||||
```bash
|
||||
poetry run python -m openhands.cli.main
|
||||
poetry run python -m openhands.core.cli
|
||||
```
|
||||
|
||||
This command will start an interactive session where you can input tasks and receive responses from OpenHands.
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,8 +45,8 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
python -m openhands.cli.main
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
This command will start an interactive session in Docker where you can input tasks and receive responses from OpenHands.
|
||||
|
||||
1
docs/modules/usage/how-to/gitlab-runner.md
Normal file
@@ -0,0 +1 @@
|
||||
# Using GitLab CI Runners
|
||||
@@ -136,6 +136,7 @@ OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if pro
|
||||
## Tips for Effective Use
|
||||
|
||||
- Be specific in your requests to get the most accurate and helpful responses, as described in the [prompting best practices](../prompting/prompting-best-practices).
|
||||
- Use the workspace panel to explore your project structure.
|
||||
- Use one of the recommended models, as described in the [LLMs section](usage/llms/llms.md).
|
||||
|
||||
Remember, the GUI mode of OpenHands is designed to make your interaction with the AI assistant as smooth and intuitive
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.37-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.34-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.37
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.34
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
|
||||
@@ -9,9 +9,11 @@
|
||||
### Changes
|
||||
- Shows the file changes performed by OpenHands.
|
||||
|
||||
### VS Code
|
||||
- Embedded VS Code for browsing and modifying files.
|
||||
- Can also be used to upload and download files.
|
||||
### Workspace
|
||||
- Browse project files and directories.
|
||||
- Use the `Open in VS Code` option to:
|
||||
* Modify files
|
||||
* Upload and download files
|
||||
|
||||
### Terminal
|
||||
- A space for OpenHands and users to run terminal commands.
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
# Model Context Protocol (MCP)
|
||||
|
||||
:::note
|
||||
This page outlines how to configure and use the Model Context Protocol (MCP) in OpenHands, allowing you to extend the agent's capabilities with custom tools.
|
||||
:::
|
||||
|
||||
## Overview
|
||||
|
||||
Model Context Protocol (MCP) is a mechanism that allows OpenHands to communicate with external tool servers. These servers can provide additional functionality to the agent, such as specialized data processing, external API access, or custom tools. MCP is based on the open standard defined at [modelcontextprotocol.io](https://modelcontextprotocol.io).
|
||||
|
||||
## Configuration
|
||||
|
||||
MCP configuration is defined in the `[mcp]` section of your `config.toml` file.
|
||||
|
||||
### Configuration Example
|
||||
|
||||
```toml
|
||||
[mcp]
|
||||
# SSE Servers - External servers that communicate via Server-Sent Events
|
||||
sse_servers = [
|
||||
# Basic SSE server with just a URL
|
||||
"http://example.com:8080/mcp",
|
||||
|
||||
# SSE server with API key authentication
|
||||
{url="https://secure-example.com/mcp", api_key="your-api-key"}
|
||||
]
|
||||
|
||||
# Stdio Servers - Local processes that communicate via standard input/output
|
||||
stdio_servers = [
|
||||
# Basic stdio server
|
||||
{name="fetch", command="uvx", args=["mcp-server-fetch"]},
|
||||
|
||||
# Stdio server with environment variables
|
||||
{
|
||||
name="data-processor",
|
||||
command="python",
|
||||
args=["-m", "my_mcp_server"],
|
||||
env={
|
||||
"DEBUG": "true",
|
||||
"PORT": "8080"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### SSE Servers
|
||||
|
||||
SSE servers are configured using either a string URL or an object with the following properties:
|
||||
|
||||
- `url` (required)
|
||||
- Type: `str`
|
||||
- Description: The URL of the SSE server
|
||||
|
||||
- `api_key` (optional)
|
||||
- Type: `str`
|
||||
- Default: `None`
|
||||
- Description: API key for authentication with the SSE server
|
||||
|
||||
### Stdio Servers
|
||||
|
||||
Stdio servers are configured using an object with the following properties:
|
||||
|
||||
- `name` (required)
|
||||
- Type: `str`
|
||||
- Description: A unique name for the server
|
||||
|
||||
- `command` (required)
|
||||
- Type: `str`
|
||||
- Description: The command to run the server
|
||||
|
||||
- `args` (optional)
|
||||
- Type: `list of str`
|
||||
- Default: `[]`
|
||||
- Description: Command-line arguments to pass to the server
|
||||
|
||||
- `env` (optional)
|
||||
- Type: `dict of str to str`
|
||||
- Default: `{}`
|
||||
- Description: Environment variables to set for the server process
|
||||
|
||||
## How MCP Works
|
||||
|
||||
When OpenHands starts, it:
|
||||
|
||||
1. Reads the MCP configuration from `config.toml`
|
||||
2. Connects to any configured SSE servers
|
||||
3. Starts any configured stdio servers
|
||||
4. Registers the tools provided by these servers with the agent
|
||||
|
||||
The agent can then use these tools just like any built-in tool. When the agent calls an MCP tool:
|
||||
|
||||
1. OpenHands routes the call to the appropriate MCP server
|
||||
2. The server processes the request and returns a response
|
||||
3. OpenHands converts the response to an observation and presents it to the agent
|
||||
@@ -5,9 +5,10 @@
|
||||
Keyword-triggered microagents provide OpenHands with specific instructions that are activated when certain keywords
|
||||
appear in the prompt. This is useful for tailoring behavior based on particular tools, languages, or frameworks.
|
||||
|
||||
## Usage
|
||||
## Microagent File
|
||||
|
||||
These microagents are only loaded when a prompt includes one of the trigger words.
|
||||
Create a keyword-triggered microagent (example: `.openhands/microagents/trigger-keyword.md`) to include instructions
|
||||
that activate only for prompts with specific keywords.
|
||||
|
||||
## Frontmatter Syntax
|
||||
|
||||
@@ -18,21 +19,31 @@ Enclose the frontmatter in triple dashes (---) and include the following fields:
|
||||
|
||||
| Field | Description | Required | Default |
|
||||
|------------|--------------------------------------------------|----------|------------------|
|
||||
| `name` | A unique identifier for the microagent. | Yes | 'default' |
|
||||
| `type` | Type of microagent. Must be set to `knowledge`. | Yes | 'repo' |
|
||||
| `triggers` | A list of keywords that activate the microagent. | Yes | None |
|
||||
| `agent` | The agent this microagent applies to. | No | 'CodeActAgent' |
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
Keyword-triggered microagent file example located at `.openhands/microagents/yummy.md`:
|
||||
```
|
||||
---
|
||||
name: magic_word
|
||||
type: knowledge
|
||||
triggers:
|
||||
- yummyhappy
|
||||
- happyyummy
|
||||
agent: CodeActAgent
|
||||
---
|
||||
|
||||
The user has said the magic word. Respond with "That was delicious!"
|
||||
```
|
||||
|
||||
[See examples of microagents triggered by keywords in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents)
|
||||
Keyword-triggered microagents:
|
||||
- Monitor incoming prompts for specified trigger words.
|
||||
- Activate when relevant triggers are detected.
|
||||
- Apply their specialized knowledge and capabilities.
|
||||
- Follow defined guidelines and restrictions.
|
||||
|
||||
[See examples of microagents triggered by keywords in the official OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents/knowledge)
|
||||
|
||||
@@ -34,7 +34,7 @@ some-repository/
|
||||
Each microagent file may include frontmatter that provides additional information. In some cases, this frontmatter
|
||||
is required:
|
||||
|
||||
| Microagent Type | Required |
|
||||
|----------------------------------|----------|
|
||||
| `General Repository Microagents` | No |
|
||||
| `Keyword-Triggered Microagents` | Yes |
|
||||
| Microagent Type | Frontmatter Requirement |
|
||||
|----------------------------------|-------------------------------------------------------|
|
||||
| `General Repository Microagents` | Required only if more than one of this type exists. |
|
||||
| `Keyword-Triggered Microagents` | Required. |
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
|
||||
## Overview
|
||||
|
||||
Global microagents are [keyword-triggered microagents](./microagents-keyword) that apply to all OpenHands users. A list of the current
|
||||
global microagents can be found [in the OpenHands repository](https://github.com/All-Hands-AI/OpenHands/tree/main/microagents).
|
||||
Global microagents are [keyword-triggered microagents](./microagents-keyword) that apply to all OpenHands users.
|
||||
|
||||
## Contributing a Global Microagent
|
||||
|
||||
|
||||
@@ -4,24 +4,31 @@
|
||||
|
||||
General guidelines for OpenHands to work more effectively with the repository.
|
||||
|
||||
## Usage
|
||||
## Microagent File
|
||||
|
||||
These microagents are always loaded as part of the context.
|
||||
Create a general repository microagent (example: `.openhands/microagents/repo.md`) to include
|
||||
project-specific instructions, team practices, coding standards, and architectural guidelines that are relevant for
|
||||
**all** prompts in that repository.
|
||||
|
||||
## Frontmatter Syntax
|
||||
|
||||
The frontmatter for this type of microagent is optional.
|
||||
The frontmatter for this type of microagent is optional, unless you plan to include more than one general
|
||||
repository microagent.
|
||||
|
||||
Frontmatter should be enclosed in triple dashes (---) and may include the following fields:
|
||||
|
||||
| Field | Description | Required | Default |
|
||||
|-----------|-----------------------------------------|----------|----------------|
|
||||
| `agent` | The agent this microagent applies to | No | 'CodeActAgent' |
|
||||
| Field | Description | Required | Default |
|
||||
|-----------|-----------------------------------------|--------------------------------------------------------------------|----------------|
|
||||
| `name` | A unique identifier for the microagent | Required only if using more than one general repository microagent | 'default' |
|
||||
| `agent` | The agent this microagent applies to | No | 'CodeActAgent' |
|
||||
|
||||
## Example
|
||||
|
||||
General repository microagent file example located at `.openhands/microagents/repo.md`:
|
||||
```
|
||||
---
|
||||
name: repo
|
||||
---
|
||||
|
||||
This project is a TODO application that allows users to track TODO items.
|
||||
|
||||
To set it up, you can run `npm run build`.
|
||||
|
||||
@@ -4,38 +4,6 @@
|
||||
OpenHands only supports Windows via WSL. Please be sure to run all commands inside your WSL terminal.
|
||||
:::
|
||||
|
||||
### Unable to access VS Code tab via local IP
|
||||
|
||||
**Description**
|
||||
|
||||
When accessing OpenHands through a non-localhost URL (such as a LAN IP address), the VS Code tab shows a "Forbidden" error, while other parts of the UI work fine.
|
||||
|
||||
**Resolution**
|
||||
|
||||
This happens because VS Code runs on a random high port that may not be exposed or accessible from other machines. To fix this:
|
||||
|
||||
1. Set a specific port for VS Code using the `SANDBOX_VSCODE_PORT` environment variable:
|
||||
```bash
|
||||
docker run -it --rm \
|
||||
-e SANDBOX_VSCODE_PORT=41234 \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:latest \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
-p 41234:41234 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:latest
|
||||
```
|
||||
|
||||
2. Make sure to expose the same port with `-p 41234:41234` in your Docker command.
|
||||
|
||||
3. Alternatively, you can set this in your `config.toml` file:
|
||||
```toml
|
||||
[sandbox]
|
||||
vscode_port = 41234
|
||||
```
|
||||
|
||||
### Launch docker client failed
|
||||
|
||||
**Description**
|
||||
|
||||
1120
docs/package-lock.json
generated
@@ -22,9 +22,7 @@
|
||||
"@docusaurus/preset-classic": "^3.7.0",
|
||||
"@docusaurus/theme-mermaid": "^3.7.0",
|
||||
"@mdx-js/react": "^3.1.0",
|
||||
"@node-rs/jieba": "^2.0.1",
|
||||
"clsx": "^2.0.0",
|
||||
"docusaurus-lunr-search": "^3.6.0",
|
||||
"prism-react-renderer": "^2.4.1",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
@@ -54,5 +52,5 @@
|
||||
"engines": {
|
||||
"node": ">=18.0"
|
||||
},
|
||||
"packageManager": "npm@10.5.0"
|
||||
"packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
|
||||
}
|
||||
|
||||
@@ -27,11 +27,7 @@ const sidebars: SidebarsConfig = {
|
||||
label: 'Openhands Cloud',
|
||||
id: 'usage/cloud/openhands-cloud',
|
||||
},
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Cloud API',
|
||||
id: 'usage/cloud/cloud-api',
|
||||
},
|
||||
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Cloud GitHub Resolver',
|
||||
@@ -215,11 +211,6 @@ const sidebars: SidebarsConfig = {
|
||||
label: 'Custom Sandbox',
|
||||
id: 'usage/how-to/custom-sandbox-guide',
|
||||
},
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'MCP',
|
||||
id: 'usage/mcp',
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@ function CustomFooter() {
|
||||
<footer className="custom-footer">
|
||||
<div className="footer-content">
|
||||
<div className="footer-icons">
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A" target="_blank" rel="noopener noreferrer">
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw" target="_blank" rel="noopener noreferrer">
|
||||
<FaSlack />
|
||||
</a>
|
||||
<a href="https://discord.gg/ESHStjSjD4" target="_blank" rel="noopener noreferrer">
|
||||
|
||||
@@ -45,14 +45,15 @@ export function HomepageHeader() {
|
||||
<div align="center" className="header-links">
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/graphs/contributors"><img src="https://img.shields.io/github/contributors/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Contributors" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/stargazers"><img src="https://img.shields.io/github/stars/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="Stargazers" /></a>
|
||||
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
|
||||
<br/>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-34zm4j0gj-Qz5kRHoca8DFCbqXPS~f_A"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2ngejmfw6-9gW4APWOC9XUp1n~SiQ6iw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
|
||||
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
|
||||
<br/>
|
||||
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv" /></a>
|
||||
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
|
||||
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score" /></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
BIN
docs/static/img/docs/api-key-generation.png
vendored
|
Before Width: | Height: | Size: 26 KiB |
BIN
docs/static/img/oh-features.png
vendored
|
Before Width: | Height: | Size: 144 KiB After Width: | Height: | Size: 120 KiB |
15
docs/static/openapi.json
vendored
@@ -858,15 +858,14 @@
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"repository": {
|
||||
"type": "string",
|
||||
"selected_repository": {
|
||||
"type": "object",
|
||||
"nullable": true,
|
||||
"description": "Full name of the repository (e.g., owner/repo)"
|
||||
},
|
||||
"git_provider": {
|
||||
"type": "string",
|
||||
"nullable": true,
|
||||
"description": "The Git provider (e.g., github or gitlab). If omitted, all configured providers are checked for the repository."
|
||||
"properties": {
|
||||
"full_name": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"selected_branch": {
|
||||
"type": "string",
|
||||
|
||||
10103
docs/yarn.lock
Normal file
@@ -1,65 +0,0 @@
|
||||
# Multi-swe-bench Evaluation with OpenHands
|
||||
|
||||
## LLM Setup
|
||||
|
||||
Please follow [here](../../README.md#setup).
|
||||
|
||||
## Dataset Preparing
|
||||
|
||||
Please download the [**Multi-SWE-Bench** dataset](https://huggingface.co/datasets/bytedance-research/Multi-SWE-Bench).
|
||||
And change the dataset following [script](scripts/data/data_change.py).
|
||||
|
||||
```bash
|
||||
python evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py
|
||||
```
|
||||
|
||||
## Docker image download
|
||||
|
||||
Please download the multi-swe-bench dokcer images from [here](https://github.com/multi-swe-bench/multi-swe-bench?tab=readme-ov-file#run-evaluation).
|
||||
|
||||
## Generate patch
|
||||
|
||||
Please edit the [script](infer.sh) and run it.
|
||||
|
||||
```bash
|
||||
bash evaluation/benchmarks/multi_swe_bench/infer.sh
|
||||
```
|
||||
|
||||
Script variable explanation:
|
||||
|
||||
- `models`, e.g. `llm.eval_gpt4_1106_preview`, is the config group name for your
|
||||
LLM settings, as defined in your `config.toml`.
|
||||
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
|
||||
like to evaluate. It could also be a release tag like `0.6.2`.
|
||||
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting to `CodeActAgent`.
|
||||
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
|
||||
default, the script evaluates the (500 issues), which will no exceed the maximum of the dataset number.
|
||||
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
|
||||
default, it is set to 50.
|
||||
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
|
||||
default, it is set to 1.
|
||||
- `language`, the language of your evaluating dataset.
|
||||
- `dataset`, the absolute position of the dataset jsonl.
|
||||
|
||||
The results will be generated in evaluation/evaluation_outputs/outputs/XXX/CodeActAgent/YYY/output.jsonl, you can refer to the [example](examples/output.jsonl).
|
||||
|
||||
## Runing evaluation
|
||||
|
||||
First, install [multi-swe-bench](https://github.com/multi-swe-bench/multi-swe-bench).
|
||||
|
||||
```bash
|
||||
pip install multi-swe-bench
|
||||
```
|
||||
|
||||
Second, convert the output.jsonl to patch.jsonl with [script](scripts/eval/convert.py), you can refer to the [example](examples/patch.jsonl).
|
||||
|
||||
```bash
|
||||
python evaluation/benchmarks/multi_swe_bench/scripts/eval/convert.py
|
||||
```
|
||||
|
||||
Finally, evaluate with multi-swe-bench.
|
||||
The config file config.json can be refer to the [example](examples/config.json) or [github](https://github.com/multi-swe-bench/multi-swe-bench/tree/main?tab=readme-ov-file#configuration-file-example).
|
||||
|
||||
```bash
|
||||
python -m multi_swe_bench.harness.run_evaluation --config config.json
|
||||
```
|
||||
@@ -1,456 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from functools import partial
|
||||
|
||||
import pandas as pd
|
||||
from swebench.harness.grading import get_eval_report
|
||||
from swebench.harness.run_evaluation import (
|
||||
APPLY_PATCH_FAIL,
|
||||
APPLY_PATCH_PASS,
|
||||
)
|
||||
from swebench.harness.test_spec import SWEbenchInstance, TestSpec, make_test_spec
|
||||
from swebench.harness.utils import load_swebench_dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from evaluation.benchmarks.swe_bench.resource.mapping import (
|
||||
get_instance_resource_factor,
|
||||
)
|
||||
from evaluation.benchmarks.swe_bench.run_infer import get_instance_docker_image
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
LLMConfig,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
|
||||
# TODO: migrate all swe-bench docker to ghcr.io/openhands
|
||||
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
|
||||
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
|
||||
|
||||
|
||||
def process_git_patch(patch):
|
||||
if not isinstance(patch, str):
|
||||
return ''
|
||||
|
||||
if not patch.strip():
|
||||
# skip empty patches
|
||||
return ''
|
||||
|
||||
patch = patch.replace('\r\n', '\n')
|
||||
# There might be some weird characters at the beginning of the patch
|
||||
# due to some OpenHands inference command outputs
|
||||
|
||||
# FOR EXAMPLE:
|
||||
# git diff --no-color --cached 895f28f9cbed817c00ab68770433170d83132d90
|
||||
# [A[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[C[K0
|
||||
# diff --git a/django/db/models/sql/.backup.query.py b/django/db/models/sql/.backup.query.py
|
||||
# new file mode 100644
|
||||
# index 0000000000..fc13db5948
|
||||
|
||||
# We "find" the first line that starts with "diff" and then we remove lines before it
|
||||
lines = patch.split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith('diff --git'):
|
||||
patch = '\n'.join(lines[i:])
|
||||
break
|
||||
|
||||
patch = patch.rstrip() + '\n' # Make sure the last line ends with a newline
|
||||
return patch
|
||||
|
||||
|
||||
def get_config(metadata: EvalMetadata, instance: pd.Series) -> AppConfig:
|
||||
# We use a different instance image for the each instance of swe-bench eval
|
||||
base_container_image = get_instance_docker_image(instance['instance_id'])
|
||||
logger.info(
|
||||
f'Using instance container image: {base_container_image}. '
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = AppConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
def process_instance(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
log_dir: str | None = None,
|
||||
runtime_failure_count: int = 0,
|
||||
) -> EvalOutput:
|
||||
"""
|
||||
Evaluate agent performance on a SWE-bench problem instance.
|
||||
|
||||
Note that this signature differs from the expected input to `run_evaluation`. Use
|
||||
`functools.partial` to provide optional arguments before passing to the evaluation harness.
|
||||
|
||||
Args:
|
||||
log_dir (str | None, default=None): Path to directory where log files will be written. Must
|
||||
be provided if `reset_logger` is set.
|
||||
|
||||
Raises:
|
||||
AssertionError: if the `reset_logger` flag is set without a provided log directory.
|
||||
"""
|
||||
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
|
||||
if reset_logger:
|
||||
assert (
|
||||
log_dir is not None
|
||||
), "Can't reset logger without a provided log directory."
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
config = get_config(metadata, instance)
|
||||
instance_id = instance.instance_id
|
||||
model_patch = instance['model_patch']
|
||||
test_spec: TestSpec = instance['test_spec']
|
||||
logger.info(f'Starting evaluation for instance {instance_id}.')
|
||||
|
||||
if 'test_result' not in instance.keys():
|
||||
instance['test_result'] = {}
|
||||
instance['test_result']['report'] = {
|
||||
'empty_generation': False,
|
||||
'resolved': False,
|
||||
'failed_apply_patch': False,
|
||||
'error_eval': False,
|
||||
'test_timeout': False,
|
||||
}
|
||||
|
||||
if model_patch == '':
|
||||
instance['test_result']['report']['empty_generation'] = True
|
||||
return EvalOutput(
|
||||
instance_id=instance_id,
|
||||
test_result=instance['test_result'],
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
# Increase resource_factor with increasing attempt_id
|
||||
if runtime_failure_count > 0:
|
||||
config.sandbox.remote_runtime_resource_factor = min(
|
||||
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
|
||||
8,
|
||||
)
|
||||
logger.warning(
|
||||
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||
)
|
||||
|
||||
try:
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
# Get patch and save it to /tmp/patch.diff
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Patch file
|
||||
patch_file_path = os.path.join(temp_dir, 'patch.diff')
|
||||
with open(patch_file_path, 'w') as f:
|
||||
f.write(model_patch)
|
||||
runtime.copy_to(patch_file_path, '/tmp')
|
||||
# Eval script
|
||||
eval_script_path = os.path.join(temp_dir, 'eval.sh')
|
||||
with open(eval_script_path, 'w') as f:
|
||||
f.write(test_spec.eval_script)
|
||||
runtime.copy_to(eval_script_path, '/tmp')
|
||||
|
||||
# Set +x
|
||||
action = CmdRunAction(command='chmod +x /tmp/eval.sh')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
# Apply patch
|
||||
exec_command = (
|
||||
'cd /testbed && '
|
||||
"(git apply -v /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
|
||||
"(echo 'Failed to apply patch with git apply, trying with patch command...' && "
|
||||
"(patch --batch --fuzz=5 -p1 -i /tmp/patch.diff && echo 'APPLY_PATCH_PASS' || "
|
||||
"echo 'APPLY_PATCH_FAIL')))"
|
||||
)
|
||||
action = CmdRunAction(command=exec_command)
|
||||
action.set_hard_timeout(600)
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
apply_patch_output = obs.content
|
||||
assert isinstance(apply_patch_output, str)
|
||||
instance['test_result']['apply_patch_output'] = apply_patch_output
|
||||
|
||||
if 'APPLY_PATCH_FAIL' in apply_patch_output:
|
||||
logger.info(f'[{instance_id}] {APPLY_PATCH_FAIL}:\n{apply_patch_output}')
|
||||
instance['test_result']['report']['failed_apply_patch'] = True
|
||||
|
||||
return EvalOutput(
|
||||
instance_id=instance_id,
|
||||
test_result=instance['test_result'],
|
||||
metadata=metadata,
|
||||
)
|
||||
elif 'APPLY_PATCH_PASS' in apply_patch_output:
|
||||
logger.info(f'[{instance_id}] {APPLY_PATCH_PASS}:\n{apply_patch_output}')
|
||||
|
||||
# Run eval script in background and save output to log file
|
||||
log_file = '/tmp/eval_output.log'
|
||||
action = CmdRunAction(command=f'/tmp/eval.sh > {log_file} 2>&1 & echo $!')
|
||||
action.set_hard_timeout(300) # Short timeout just to get the process ID
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
if isinstance(obs, CmdOutputObservation) and obs.exit_code == 0:
|
||||
pid = obs.content.split()[-1].strip()
|
||||
logger.info(
|
||||
f'[{instance_id}] Evaluation process started with PID: {pid}'
|
||||
)
|
||||
|
||||
# Poll for completion
|
||||
start_time = time.time()
|
||||
timeout = 1800 # 30 minutes
|
||||
while True:
|
||||
seconds_elapsed = time.time() - start_time
|
||||
if seconds_elapsed > timeout:
|
||||
logger.info(
|
||||
f'[{instance_id}] Evaluation timed out after {timeout} seconds'
|
||||
)
|
||||
instance['test_result']['report']['test_timeout'] = True
|
||||
break
|
||||
check_action = CmdRunAction(
|
||||
command=f'ps -p {pid} > /dev/null; echo $?'
|
||||
)
|
||||
check_action.set_hard_timeout(300)
|
||||
check_obs = runtime.run_action(check_action)
|
||||
if (
|
||||
isinstance(check_obs, CmdOutputObservation)
|
||||
and check_obs.content.split()[-1].strip() == '1'
|
||||
):
|
||||
logger.info(
|
||||
f'[{instance_id}] Evaluation process completed after {seconds_elapsed} seconds'
|
||||
)
|
||||
break
|
||||
logger.info(
|
||||
f'[{instance_id}] [{seconds_elapsed:.0f}s] Evaluation still running, waiting...'
|
||||
)
|
||||
time.sleep(30) # Wait for 30 seconds before checking again
|
||||
|
||||
# Read the log file
|
||||
cat_action = CmdRunAction(command=f'cat {log_file}')
|
||||
cat_action.set_hard_timeout(300)
|
||||
cat_obs = runtime.run_action(cat_action)
|
||||
|
||||
# Grade answer
|
||||
if isinstance(cat_obs, CmdOutputObservation) and cat_obs.exit_code == 0:
|
||||
test_output = cat_obs.content
|
||||
assert isinstance(test_output, str)
|
||||
instance['test_result']['test_output'] = test_output
|
||||
|
||||
# Get report from test output
|
||||
logger.info(f'[{instance_id}] Grading answer...')
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Create a directory structure that matches the expected format
|
||||
# NOTE: this is a hack to make the eval report format consistent
|
||||
# with the original SWE-Bench eval script
|
||||
log_dir = os.path.join(temp_dir, 'logs', instance_id.lower())
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
test_output_path = os.path.join(log_dir, 'test_output.txt')
|
||||
with open(test_output_path, 'w') as f:
|
||||
f.write(test_output)
|
||||
try:
|
||||
_report = get_eval_report(
|
||||
test_spec=test_spec,
|
||||
prediction={
|
||||
'model_patch': model_patch,
|
||||
'instance_id': instance_id,
|
||||
},
|
||||
log_path=test_output_path,
|
||||
include_tests_status=True,
|
||||
)
|
||||
report = _report[instance_id]
|
||||
logger.info(
|
||||
f"[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report['resolved']}"
|
||||
)
|
||||
instance['test_result']['report']['resolved'] = report[
|
||||
'resolved'
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f'[{instance_id}] Error when getting eval report: {e}'
|
||||
)
|
||||
instance['test_result']['report']['resolved'] = False
|
||||
instance['test_result']['report']['error_eval'] = True
|
||||
else:
|
||||
logger.info(f'[{instance_id}] Error when starting eval:\n{obs.content}')
|
||||
instance['test_result']['report']['error_eval'] = True
|
||||
|
||||
return EvalOutput(
|
||||
instance_id=instance_id,
|
||||
test_result=instance['test_result'],
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f'[{instance_id}] Unexpected output when applying patch:\n{apply_patch_output}'
|
||||
)
|
||||
raise RuntimeError(
|
||||
instance_id,
|
||||
f'Unexpected output when applying patch:\n{apply_patch_output}',
|
||||
logger,
|
||||
)
|
||||
finally:
|
||||
runtime.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = get_parser()
|
||||
parser.add_argument(
|
||||
'--input-file',
|
||||
type=str,
|
||||
help='Path to input predictions file',
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dataset',
|
||||
type=str,
|
||||
default='princeton-nlp/SWE-bench',
|
||||
help='data set to evaluate on, either full-test or lite-test',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--split',
|
||||
type=str,
|
||||
default='test',
|
||||
help='split to evaluate on',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# Load SWE-Bench dataset
|
||||
full_dataset: list[SWEbenchInstance] = load_swebench_dataset(
|
||||
args.dataset, args.split
|
||||
)
|
||||
instance_id_to_instance = {
|
||||
instance['instance_id']: instance for instance in full_dataset
|
||||
}
|
||||
logger.info(
|
||||
f'Loaded dataset {args.dataset} with split {args.split} to run inference on.'
|
||||
)
|
||||
|
||||
# Load predictions
|
||||
assert args.input_file.endswith('.jsonl'), 'Input file must be a jsonl file.'
|
||||
required_fields = ['instance_id', 'model_patch', 'test_result']
|
||||
with open(args.input_file) as f:
|
||||
predictions = pd.DataFrame.from_records(
|
||||
[
|
||||
{k: v for k, v in json.loads(line).items() if k in required_fields}
|
||||
for line in tqdm(f, desc='Loading predictions')
|
||||
]
|
||||
)
|
||||
assert (
|
||||
'instance_id' in predictions.columns
|
||||
), 'Input file must contain instance_id column.'
|
||||
|
||||
if 'model_patch' not in predictions.columns and (
|
||||
'test_result' in predictions.columns
|
||||
and 'model_patch' in predictions['test_result'].iloc[0]
|
||||
):
|
||||
raise ValueError(
|
||||
'Input file must contain model_patch column OR test_result column with model_patch field.'
|
||||
)
|
||||
assert len(predictions['instance_id'].unique()) == len(
|
||||
predictions
|
||||
), 'instance_id column must be unique.'
|
||||
|
||||
if 'model_patch' not in predictions.columns:
|
||||
predictions['model_patch'] = predictions['test_result'].apply(
|
||||
lambda x: x.get('git_patch', '')
|
||||
)
|
||||
assert {'instance_id', 'model_patch'}.issubset(
|
||||
set(predictions.columns)
|
||||
), 'Input file must contain instance_id and model_patch columns.'
|
||||
|
||||
# Process model_patch
|
||||
predictions['model_patch'] = predictions['model_patch'].apply(process_git_patch)
|
||||
|
||||
# Merge predictions with dataset
|
||||
predictions['instance'] = predictions['instance_id'].apply(
|
||||
lambda x: instance_id_to_instance[x]
|
||||
)
|
||||
predictions['test_spec'] = predictions['instance'].apply(make_test_spec)
|
||||
|
||||
# Prepare dataset
|
||||
output_file = args.input_file.replace('.jsonl', '.swebench_eval.jsonl')
|
||||
instances = prepare_dataset(predictions, output_file, args.eval_n_limit)
|
||||
|
||||
# If possible, load the relevant metadata to avoid issues with `run_evaluation`.
|
||||
metadata: EvalMetadata | None = None
|
||||
metadata_filepath = os.path.join(os.path.dirname(args.input_file), 'metadata.json')
|
||||
if os.path.exists(metadata_filepath):
|
||||
with open(metadata_filepath, 'r') as metadata_file:
|
||||
data = metadata_file.read()
|
||||
metadata = EvalMetadata.model_validate_json(data)
|
||||
else:
|
||||
# Initialize with a dummy metadata when file doesn't exist
|
||||
metadata = EvalMetadata(
|
||||
agent_class='dummy_agent', # Placeholder agent class
|
||||
llm_config=LLMConfig(model='dummy_model'), # Minimal LLM config
|
||||
max_iterations=1, # Minimal iterations
|
||||
eval_output_dir=os.path.dirname(
|
||||
args.input_file
|
||||
), # Use input file dir as output dir
|
||||
start_time=time.strftime('%Y-%m-%d %H:%M:%S'), # Current time
|
||||
git_commit=subprocess.check_output(['git', 'rev-parse', 'HEAD'])
|
||||
.decode('utf-8')
|
||||
.strip(), # Current commit
|
||||
dataset=args.dataset, # Dataset name from args
|
||||
)
|
||||
|
||||
# The evaluation harness constrains the signature of `process_instance_func` but we need to
|
||||
# pass extra information. Build a new function object to avoid issues with multiprocessing.
|
||||
process_instance_func = partial(
|
||||
process_instance, log_dir=output_file.replace('.jsonl', '.logs')
|
||||
)
|
||||
|
||||
run_evaluation(
|
||||
instances,
|
||||
metadata=metadata,
|
||||
output_file=output_file,
|
||||
num_workers=args.eval_num_workers,
|
||||
process_instance_func=process_instance_func,
|
||||
)
|
||||
|
||||
# Load evaluated predictions & print number of resolved predictions
|
||||
evaluated_predictions = pd.read_json(output_file, lines=True)
|
||||
fields = ['resolved', 'failed_apply_patch', 'error_eval', 'empty_generation']
|
||||
|
||||
def count_report_field(row, field):
|
||||
return row['test_result']['report'][field]
|
||||
|
||||
report = {}
|
||||
for field in fields:
|
||||
count = evaluated_predictions.apply(
|
||||
count_report_field, args=(field,), axis=1
|
||||
).sum()
|
||||
report[field] = count
|
||||
logger.info(
|
||||
f'# {field}: {count} / {len(evaluated_predictions)}. ({count / len(evaluated_predictions):.2%})'
|
||||
)
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"mode": "evaluation",
|
||||
"workdir": "./data/workdir",
|
||||
"patch_files": [
|
||||
"./data/patches/<your_patch_file>.jsonl"
|
||||
],
|
||||
"dataset_files": [
|
||||
"./data/patches/<to_evaluate_dataset_file>.jsonl"
|
||||
],
|
||||
"force_build": false,
|
||||
"output_dir": "./data/dataset",
|
||||
"specifics": [],
|
||||
"skips": [],
|
||||
"repo_dir": "./data/repos",
|
||||
"need_clone": false,
|
||||
"global_env": [],
|
||||
"clear_env": true,
|
||||
"stop_on_error": true,
|
||||
"max_workers": 8,
|
||||
"max_workers_build_image": 8,
|
||||
"max_workers_run_instance": 8,
|
||||
"log_dir": "./data/logs",
|
||||
"log_level": "DEBUG"
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
{"org": "ponylang", "repo": "ponyc", "number": "4595", "fix_patch": "diff --git a/src/libponyc/ast/parser.c b/src/libponyc/ast/parser.c\nindex 9852922f..2c37d6b8 100644\n--- a/src/libponyc/ast/parser.c\n+++ b/src/libponyc/ast/parser.c\n@@ -693,6 +693,7 @@ DEF(idseqsingle);\n AST_NODE(TK_LET);\n TOKEN(\"variable name\", TK_ID);\n AST_NODE(TK_NONE); // Type\n+ SET_FLAG(AST_FLAG_IN_PARENS);\n DONE();\n \n // idseq"}
|
||||
{"org": "ponylang", "repo": "ponyc", "number": "4593", "fix_patch": "diff --git a/packages/cli/command_parser.pony b/packages/cli/command_parser.pony\nindex a5acce8e..fa97808b 100644\n--- a/packages/cli/command_parser.pony\n+++ b/packages/cli/command_parser.pony\n@@ -100,6 +100,7 @@ class CommandParser\n | let cs: CommandSpec box =>\n return CommandParser._sub(cs, this).\n _parse_command(tokens, options, args, envsmap, opt_stop)\n+// Correctly handle parent default options\n end\n else\n return SyntaxError(token, \"unknown command\")"}
|
||||
{"org": "ponylang", "repo": "ponyc", "number": "4588", "fix_patch": "diff --git a/src/libponyc/expr/match.c b/src/libponyc/expr/match.c\nindex 7d16066f..c2ec7056 100644\n--- a/src/libponyc/expr/match.c\n+++ b/src/libponyc/expr/match.c\n@@ -314,8 +314,10 @@ static ast_t* make_pattern_type(pass_opt_t* opt, ast_t* pattern)\n case TK_DONTCAREREF:\n case TK_MATCH_CAPTURE:\n case TK_MATCH_DONTCARE:\n+ if (ast_id(pattern_type) == TK_ISO) pattern_type = set_cap_and_ephemeral(pattern_type, TK_TRN, TK_EPHEMERAL);\n return pattern_type;\n \n+\n case TK_TUPLE:\n {\n ast_t* pattern_child = ast_child(pattern);"}
|
||||
@@ -1,32 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
BASE_SCRIPT="./evaluation/benchmarks/multi_swe_bench/scripts/run_infer.sh"
|
||||
|
||||
MODELS=("aaa" "bbb" "ccc" "ddd" "fff")
|
||||
GIT_VERSION="HEAD"
|
||||
AGENT_NAME="CodeActAgent"
|
||||
EVAL_LIMIT="500"
|
||||
MAX_ITER="50"
|
||||
NUM_WORKERS="1"
|
||||
LANGUAGE="XXX"
|
||||
DATASET="XXX"
|
||||
|
||||
|
||||
for MODEL in "${MODELS[@]}"; do
|
||||
echo "=============================="
|
||||
echo "Running benchmark for MODEL: $MODEL"
|
||||
echo "=============================="
|
||||
|
||||
$BASE_SCRIPT \
|
||||
"$MODEL" \
|
||||
"$GIT_VERSION" \
|
||||
"$AGENT_NAME" \
|
||||
"$EVAL_LIMIT" \
|
||||
"$MAX_ITER" \
|
||||
"$NUM_WORKERS" \
|
||||
"$DATASET" \
|
||||
"$LANGUAGE"
|
||||
|
||||
echo "Completed $MODEL"
|
||||
done
|
||||
@@ -1,39 +0,0 @@
|
||||
"""Mapping instance_id to resource_factor.
|
||||
|
||||
Different instances may have different resource requirements.
|
||||
e.g., some instances may require more memory/CPU to run inference.
|
||||
This file tracks the resource requirements of different instances.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
DEFAULT_RUNTIME_RESOURCE_FACTOR = int(
|
||||
os.environ.get('DEFAULT_RUNTIME_RESOURCE_FACTOR', 1)
|
||||
)
|
||||
|
||||
# dataset to resource mapping
|
||||
_global_resource_mapping: dict[str, dict[str, float]] = {}
|
||||
|
||||
|
||||
def get_resource_mapping(dataset_name: str) -> dict[str, float]:
|
||||
if dataset_name not in _global_resource_mapping:
|
||||
file_path = os.path.join(CUR_DIR, f'{dataset_name}.json')
|
||||
if not os.path.exists(file_path):
|
||||
logger.warning(f'Resource mapping for {dataset_name} not found.')
|
||||
return None
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
_global_resource_mapping[dataset_name] = json.load(f)
|
||||
logger.info(f'Loaded resource mapping for {dataset_name}')
|
||||
return _global_resource_mapping[dataset_name]
|
||||
|
||||
|
||||
def get_instance_resource_factor(dataset_name: str, instance_id: str) -> int:
|
||||
resource_mapping = get_resource_mapping(dataset_name)
|
||||
if resource_mapping is None:
|
||||
return DEFAULT_RUNTIME_RESOURCE_FACTOR
|
||||
return int(resource_mapping.get(instance_id, DEFAULT_RUNTIME_RESOURCE_FACTOR))
|
||||
@@ -1,847 +0,0 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import toml
|
||||
from datasets import load_dataset
|
||||
|
||||
import openhands.agenthub
|
||||
from evaluation.benchmarks.swe_bench.resource.mapping import (
|
||||
get_instance_resource_factor,
|
||||
)
|
||||
from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
update_llm_config_for_completions_logging,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import CmdRunAction, FileReadAction, MessageAction
|
||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||
from openhands.events.serialization.event import event_to_dict
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
from openhands.utils.shutdown_listener import sleep_if_should_continue
|
||||
|
||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||
USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'true').lower() == 'true'
|
||||
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
|
||||
|
||||
# TODO: migrate all swe-bench docker to ghcr.io/openhands
|
||||
# TODO: 适应所有的语言
|
||||
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', '')
|
||||
LANGUAGE = os.environ.get('LANGUAGE', 'python')
|
||||
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
|
||||
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
}
|
||||
|
||||
|
||||
def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
|
||||
return f'{instance.repo}__{instance.version}'.replace('/', '__')
|
||||
|
||||
|
||||
def get_instruction(instance: pd.Series, metadata: EvalMetadata):
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
# Prepare instruction
|
||||
|
||||
# Instruction based on Anthropic's official trajectory
|
||||
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
|
||||
instructions = {
|
||||
'python': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'java': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Java code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
"Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n"
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Java environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
"Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n"
|
||||
"Follow these steps to resolve the issue:\n"
|
||||
"1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n"
|
||||
'2. Create a Java class to reproduce the error and execute it by first compiling with `javac <classname>.java` and then running with `java <classname>` using the BashTool, to confirm the error\n'
|
||||
"3. Edit the sourcecode of the repo to resolve the issue.\n"
|
||||
"4. Rerun your reproduce script or class and confirm that the error is fixed!\n"
|
||||
"5. Think about edgecases, add comprehensive tests for them in your reproduce class or script, and run them to make sure your fix handles these cases as well.\n"
|
||||
f"6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance['base_commit']}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n"
|
||||
" - The issue you are fixing\n"
|
||||
" - The files you modified\n"
|
||||
" - The functions or classes you changed\n"
|
||||
" Make sure all these tests pass with your changes.\n"
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'go': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Go code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Go environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script or a function to reproduce the error and execute it with `go run <filename.go>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'c': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a C code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development C environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error by compiling your C code (for example, using `gcc <filename.c> -o <executable>`) and then running the executable using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'cpp': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a C++ code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development C++ environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create or adapt a small executable (e.g., a main file or a test driver) to reproduce the issue. Build and run it (for example, by using `g++ -o reproduce reproduce.cpp && ./reproduce` via the BashTool) to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'javascript': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Javascript code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Javascript environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `node <filename.js>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'typescript': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Typescript code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Typescript environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a script to reproduce the error and execute it with `ts-node <filename.ts>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
'rust': (
|
||||
'<uploaded_files>\n'
|
||||
f'/workspace/{workspace_dir_name}\n'
|
||||
'</uploaded_files>\n'
|
||||
f"I've uploaded a Rust code repository in the directory {workspace_dir_name}. Consider the following issue description:\n\n"
|
||||
f'<issue_description>\n'
|
||||
f'{instance.problem_statement}\n'
|
||||
'</issue_description>\n\n'
|
||||
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
|
||||
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
|
||||
"Also the development Rust environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
|
||||
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
|
||||
'Follow these steps to resolve the issue:\n'
|
||||
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
|
||||
'2. Create a reproduction script (or binary) that triggers the error and execute it with `cargo run --bin <filename>` using the BashTool, to confirm the error.\n'
|
||||
'3. Edit the sourcecode of the repo to resolve the issue.\n'
|
||||
'4. Rerun your reproduce script and confirm that the error is fixed!\n'
|
||||
'5. Think about edgecases, add comprehensive tests for them in your reproduce script, and run them to make sure your fix handles them as well.\n'
|
||||
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
|
||||
' - The issue you are fixing\n'
|
||||
' - The files you modified\n'
|
||||
' - The functions you changed\n'
|
||||
' Make sure all these tests pass with your changes.\n'
|
||||
"Your thinking should be thorough and so it's fine if it's very long.\n"
|
||||
),
|
||||
}
|
||||
instruction = instructions.get(LANGUAGE.lower())
|
||||
|
||||
if instruction and RUN_WITH_BROWSING:
|
||||
instruction += (
|
||||
'<IMPORTANT!>\n'
|
||||
'You SHOULD NEVER attempt to browse the web. '
|
||||
'</IMPORTANT!>\n'
|
||||
)
|
||||
return instruction
|
||||
|
||||
|
||||
# TODO: 适应所有的语言
|
||||
# def get_instance_docker_image(instance_id: str) -> str:
|
||||
# image_name = 'sweb.eval.x86_64.' + instance_id
|
||||
# if LANGUAGE == 'python':
|
||||
# image_name = image_name.replace(
|
||||
# '__', '_s_'
|
||||
# ) # to comply with docker image naming convention
|
||||
# return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
|
||||
# else:
|
||||
# return image_name.lower() ##加载本地的
|
||||
def get_instance_docker_image(instance: pd.Series):
|
||||
if LANGUAGE == 'python':
|
||||
image_name = 'sweb.eval.x86_64.' + instance['instance_id']
|
||||
image_name = image_name.replace(
|
||||
'__', '_s_'
|
||||
) # to comply with docker image naming convention
|
||||
return (DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name).lower()
|
||||
else:
|
||||
container_name = instance.get('repo', '').lower()
|
||||
container_name = container_name.replace('/', '_m_')
|
||||
instance_id = instance.get('instance_id', '')
|
||||
tag_suffix = instance_id.split('-')[-1] if instance_id else ''
|
||||
container_tag = f'pr-{tag_suffix}'
|
||||
# pdb.set_trace()
|
||||
return f'mswebench/{container_name}:{container_tag}'
|
||||
# return "kong/insomnia:pr-8284"
|
||||
# return "'sweb.eval.x86_64.local_insomnia"
|
||||
# return "local_insomnia_why"
|
||||
# return "local/kong-insomnia:pr-8117"
|
||||
|
||||
|
||||
def get_config(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
|
||||
if USE_INSTANCE_IMAGE:
|
||||
# We use a different instance image for the each instance of swe-bench eval
|
||||
# base_container_image = get_instance_docker_image(instance['instance_id'])
|
||||
base_container_image = get_instance_docker_image(instance)
|
||||
logger.info(
|
||||
f'Using instance container image: {base_container_image}. '
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
else:
|
||||
base_container_image = SWE_BENCH_CONTAINER_IMAGE
|
||||
logger.info(f'Using swe-bench container image: {base_container_image}')
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.enable_auto_lint = True
|
||||
sandbox_config.use_host_network = False
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
enable_jupyter=False,
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
|
||||
def initialize_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required
|
||||
):
|
||||
"""Initialize the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
"""
|
||||
logger.info('-' * 30)
|
||||
logger.info('BEGIN Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
obs: CmdOutputObservation
|
||||
|
||||
REPO_NAME = instance['repo'].split('/')[-1]
|
||||
# Set instance id
|
||||
action = CmdRunAction(
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && echo 'export REPO_NAME={REPO_NAME}' >> ~/.bashrc"""
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
|
||||
)
|
||||
# pdb.set_trace()
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
|
||||
|
||||
if USE_INSTANCE_IMAGE:
|
||||
# inject the init script
|
||||
script_dir = os.path.dirname(__file__)
|
||||
|
||||
# inject the instance info
|
||||
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
|
||||
)
|
||||
|
||||
swe_instance_json_name = 'swe-bench-instance.json'
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Construct the full path for the desired file name within the temporary directory
|
||||
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
|
||||
# Write to the file with the desired name within the temporary directory
|
||||
with open(temp_file_path, 'w') as f:
|
||||
if not isinstance(instance, dict):
|
||||
json.dump([instance.to_dict()], f)
|
||||
else:
|
||||
json.dump([instance], f)
|
||||
|
||||
# Copy the file to the desired location
|
||||
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
|
||||
|
||||
# inject the instance swe entry
|
||||
runtime.copy_to(
|
||||
str(os.path.join(script_dir, 'scripts/setup/instance_swe_entry.sh')),
|
||||
'/swe_util/',
|
||||
)
|
||||
action = CmdRunAction(command='cat ~/.bashrc')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source ~/.bashrc')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if isinstance(obs, ErrorObservation):
|
||||
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
|
||||
)
|
||||
else:
|
||||
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
|
||||
action.set_hard_timeout(1800)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to source /swe_util/swe_entry.sh: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git reset --hard')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
|
||||
|
||||
action = CmdRunAction(
|
||||
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
|
||||
##TODO:这里看看需不需要判断其他语言的环境
|
||||
# action = CmdRunAction(command='which python')
|
||||
# action.set_hard_timeout(600)
|
||||
# logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
# obs = runtime.run_action(action)
|
||||
# logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
# assert_and_raise(
|
||||
# obs.exit_code == 0 and 'testbed' in obs.content,
|
||||
# f'Expected to find python interpreter from testbed, but got: {str(obs)}',
|
||||
# )
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Initialization Fn')
|
||||
logger.info('-' * 30)
|
||||
|
||||
|
||||
def complete_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
|
||||
) -> dict[str, Any]:
|
||||
"""Complete the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
If you need to do something in the sandbox to get the correctness metric after
|
||||
the agent has run, modify this function.
|
||||
"""
|
||||
logger.info('-' * 30)
|
||||
logger.info('BEGIN Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
obs: CmdOutputObservation
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if obs.exit_code == -1:
|
||||
# The previous command is still running
|
||||
# We need to kill previous command
|
||||
logger.info('The previous command is still running, trying to kill it...')
|
||||
action = CmdRunAction(command='C-c')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Then run the command again
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git config --global core.pager ""')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git config --global core.pager "": {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command='git add -A')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to git add -A: {str(obs)}',
|
||||
)
|
||||
|
||||
##删除二进制文件
|
||||
action = CmdRunAction(
|
||||
command="""
|
||||
for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do
|
||||
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
|
||||
git rm -f "$file" 2>/dev/null || rm -f "$file"
|
||||
echo "Removed: $file"
|
||||
fi
|
||||
done
|
||||
"""
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to remove binary files: {str(obs)}',
|
||||
)
|
||||
|
||||
# pdb.set_trace()
|
||||
|
||||
n_retries = 0
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff'
|
||||
)
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
n_retries += 1
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
if obs.exit_code == 0:
|
||||
# git_patch = obs.content.strip()
|
||||
break
|
||||
else:
|
||||
logger.info('Failed to get git diff, retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
logger.error(f'Error occurred: {obs.content}. Retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
else:
|
||||
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
|
||||
|
||||
action = FileReadAction(path='patch.diff')
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
git_patch = obs.content
|
||||
# pdb.set_trace()
|
||||
|
||||
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
return {'git_patch': git_patch}
|
||||
|
||||
|
||||
def process_instance(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
runtime_failure_count: int = 0,
|
||||
) -> EvalOutput:
|
||||
config = get_config(instance, metadata)
|
||||
|
||||
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
|
||||
if reset_logger:
|
||||
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
|
||||
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
|
||||
|
||||
# Increase resource_factor with increasing attempt_id
|
||||
if runtime_failure_count > 0:
|
||||
config.sandbox.remote_runtime_resource_factor = min(
|
||||
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
|
||||
8,
|
||||
)
|
||||
logger.warning(
|
||||
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
|
||||
)
|
||||
# pdb.set_trace()
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
|
||||
try:
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
instruction = get_instruction(instance, metadata)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=MessageAction(content=instruction),
|
||||
runtime=runtime,
|
||||
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
|
||||
metadata.agent_class
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
# if fatal error, throw EvalError to trigger re-run
|
||||
if is_fatal_evaluation_error(state.last_error):
|
||||
raise EvalException('Fatal error detected: ' + state.last_error)
|
||||
|
||||
# ======= THIS IS SWE-Bench specific =======
|
||||
# Get git patch
|
||||
return_val = complete_runtime(runtime, instance)
|
||||
git_patch = return_val['git_patch']
|
||||
logger.info(
|
||||
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
|
||||
)
|
||||
finally:
|
||||
runtime.close()
|
||||
# ==========================================
|
||||
|
||||
# ======= Attempt to evaluate the agent's edits =======
|
||||
# we use eval_infer.sh to evaluate the agent's edits, not here
|
||||
# because the agent may alter the environment / testcases
|
||||
###remove binary diffs
|
||||
def remove_binary_diffs(patch_text):
|
||||
lines = patch_text.splitlines()
|
||||
cleaned_lines = []
|
||||
block = []
|
||||
is_binary_block = False
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('diff --git '):
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
block = [line]
|
||||
is_binary_block = False
|
||||
elif 'Binary files' in line:
|
||||
is_binary_block = True
|
||||
block.append(line)
|
||||
else:
|
||||
block.append(line)
|
||||
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
return '\n'.join(cleaned_lines)
|
||||
|
||||
git_patch = remove_binary_diffs(git_patch)
|
||||
test_result = {
|
||||
'git_patch': git_patch,
|
||||
}
|
||||
|
||||
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
instance_id=instance.instance_id,
|
||||
instruction=instruction,
|
||||
instance=instance.to_dict(), # SWE Bench specific
|
||||
test_result=test_result,
|
||||
metadata=metadata,
|
||||
history=histories,
|
||||
metrics=metrics,
|
||||
error=state.last_error if state and state.last_error else None,
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
|
||||
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
|
||||
if os.path.exists(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
data = toml.load(file)
|
||||
if 'selected_ids' in data:
|
||||
selected_ids = data['selected_ids']
|
||||
logger.info(
|
||||
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
|
||||
)
|
||||
subset = dataset[dataset[filter_column].isin(selected_ids)]
|
||||
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
|
||||
return subset
|
||||
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
|
||||
if len(skip_ids) > 0:
|
||||
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
|
||||
return dataset[~dataset[filter_column].isin(skip_ids)]
|
||||
return dataset
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# pdb.set_trace()
|
||||
parser = get_parser()
|
||||
parser.add_argument(
|
||||
'--dataset',
|
||||
type=str,
|
||||
default='princeton-nlp/SWE-bench',
|
||||
help='data set to evaluate on, either full-test or lite-test',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--split',
|
||||
type=str,
|
||||
default='test',
|
||||
help='split to evaluate on',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
||||
# so we don't need to manage file uploading to OpenHands's repo
|
||||
# dataset = load_dataset(args.dataset, split=args.split)
|
||||
# dataset = load_dataset(args.dataset)
|
||||
dataset = load_dataset('json', data_files=args.dataset)
|
||||
dataset = dataset[args.split]
|
||||
swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
|
||||
logger.info(
|
||||
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_bench_tests)} tasks'
|
||||
)
|
||||
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
llm_config.log_completions = True
|
||||
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
|
||||
llm_config.modify_params = False
|
||||
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
details = {}
|
||||
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
|
||||
|
||||
dataset_descrption = (
|
||||
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
|
||||
)
|
||||
metadata = make_metadata(
|
||||
llm_config,
|
||||
dataset_descrption,
|
||||
args.agent_cls,
|
||||
args.max_iterations,
|
||||
args.eval_note,
|
||||
args.eval_output_dir,
|
||||
details=details,
|
||||
)
|
||||
|
||||
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
|
||||
print(f'### OUTPUT FILE: {output_file} ###')
|
||||
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
|
||||
|
||||
if len(instances) > 0 and not isinstance(
|
||||
instances['FAIL_TO_PASS'][instances['FAIL_TO_PASS'].index[0]], str
|
||||
):
|
||||
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
|
||||
instances[col] = instances[col].apply(lambda x: str(x))
|
||||
# if LANGUAGE == "java": ##TODO:适配多语言的版本
|
||||
# for col in ['issue_numbers', 'created_at']:
|
||||
# instances[col] = instances[col].apply(lambda x: str(x))
|
||||
run_evaluation(
|
||||
instances,
|
||||
metadata,
|
||||
output_file,
|
||||
args.eval_num_workers,
|
||||
process_instance,
|
||||
timeout_seconds=120 * 60, # 2 hour PER instance should be more than enough
|
||||
max_retries=5,
|
||||
)
|
||||
@@ -1,36 +0,0 @@
|
||||
import json
|
||||
|
||||
input_file = 'XXX.jsonl'
|
||||
output_file = 'YYY.jsonl'
|
||||
|
||||
with open(input_file, 'r', encoding='utf-8') as fin, open(
|
||||
output_file, 'w', encoding='utf-8'
|
||||
) as fout:
|
||||
for line in fin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
data = json.loads(line)
|
||||
item = data
|
||||
|
||||
# 提取原始数据
|
||||
org = item.get('org', '')
|
||||
repo = item.get('repo', '')
|
||||
number = str(item.get('number', ''))
|
||||
|
||||
new_item = {}
|
||||
new_item['repo'] = f'{org}/{repo}'
|
||||
new_item['instance_id'] = f'{org}__{repo}-{number}'
|
||||
new_item['problem_statement'] = (
|
||||
item['resolved_issues'][0].get('title', '')
|
||||
+ '\n'
|
||||
+ item['resolved_issues'][0].get('body', '')
|
||||
)
|
||||
new_item['FAIL_TO_PASS'] = []
|
||||
new_item['PASS_TO_PASS'] = []
|
||||
new_item['base_commit'] = item['base'].get('sha', '')
|
||||
new_item['version'] = '0.1' # depends
|
||||
|
||||
output_data = new_item
|
||||
fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')
|
||||
@@ -1,24 +0,0 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
IN_FILE = 'output.jsonl'
|
||||
OUT_FILE = 'patch.jsonl'
|
||||
|
||||
|
||||
def main():
|
||||
with open(IN_FILE, 'r') as fin:
|
||||
with open(OUT_FILE, 'w') as fout:
|
||||
for line in fin:
|
||||
data = json.loads(line)
|
||||
groups = re.match(r'(.*)__(.*)-(.*)', data['instance_id'])
|
||||
patch = {
|
||||
'org': groups.group(1),
|
||||
'repo': groups.group(2),
|
||||
'number': groups.group(3),
|
||||
'fix_patch': data['test_result']['git_patch'],
|
||||
}
|
||||
fout.write(json.dumps(patch) + '\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,155 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
COMMIT_HASH=$2
|
||||
AGENT=$3
|
||||
EVAL_LIMIT=$4
|
||||
MAX_ITER=$5
|
||||
NUM_WORKERS=$6
|
||||
DATASET=$7
|
||||
# SPLIT=$8
|
||||
LANGUAGE=$8
|
||||
# N_RUNS=$10
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
echo "Number of workers not specified, use default $NUM_WORKERS"
|
||||
fi
|
||||
checkout_eval_branch
|
||||
|
||||
if [ -z "$AGENT" ]; then
|
||||
echo "Agent not specified, use default CodeActAgent"
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
if [ -z "$MAX_ITER" ]; then
|
||||
echo "MAX_ITER not specified, use default 100"
|
||||
MAX_ITER=100
|
||||
fi
|
||||
|
||||
if [ -z "$USE_INSTANCE_IMAGE" ]; then
|
||||
echo "USE_INSTANCE_IMAGE not specified, use default true"
|
||||
USE_INSTANCE_IMAGE=true
|
||||
fi
|
||||
|
||||
if [ -z "$RUN_WITH_BROWSING" ]; then
|
||||
echo "RUN_WITH_BROWSING not specified, use default false"
|
||||
RUN_WITH_BROWSING=false
|
||||
fi
|
||||
|
||||
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
|
||||
DATASET="princeton-nlp/SWE-bench_Lite"
|
||||
fi
|
||||
|
||||
if [ -z "$LANGUAGE" ]; then
|
||||
echo "LANUGUAGE not specified, use default python"
|
||||
LANGUAGE="python"
|
||||
fi
|
||||
|
||||
if [ -z "$SPLIT" ]; then
|
||||
echo "LANUGUAGE not specified, use default python"
|
||||
SPLIT="train"
|
||||
fi
|
||||
|
||||
##TODO:适配多语言的版本
|
||||
# if [ -z "$SPLIT" ]; then
|
||||
# if [ "$LANGUAGE" = "python" ]; then
|
||||
# echo "SPLIT is test as LANUGUAGE is python"
|
||||
# SPLIT="test"
|
||||
# elif [ "$LANGUAGE" = "java" ]; then
|
||||
# echo "SPLIT is java_verified as LANUGUAGE is java"
|
||||
# SPLIT="java_verified"
|
||||
# fi
|
||||
# fi
|
||||
|
||||
if [ -z "$EVAL_DOCKER_IMAGE_PREFIX" ]; then
|
||||
if [ "$LANGUAGE" = "python" ]; then
|
||||
echo "EVAL_DOCKER_IMAGE_PREFIX is docker.io/xingyaoww/ as default as LANUGUAGE is python"
|
||||
EVAL_DOCKER_IMAGE_PREFIX="docker.io/xingyaoww/"
|
||||
elif [ "$LANGUAGE" = "java" ]; then
|
||||
echo "EVAL_DOCKER_IMAGE_PREFIX is java_verified as LANUGUAGE is java"
|
||||
EVAL_DOCKER_IMAGE_PREFIX=""
|
||||
fi
|
||||
fi
|
||||
|
||||
export EVAL_DOCKER_IMAGE_PREFIX=$EVAL_DOCKER_IMAGE_PREFIX
|
||||
echo "EVAL_DOCKER_IMAGE_PREFIX: $EVAL_DOCKER_IMAGE_PREFIX"
|
||||
export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
|
||||
echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
export LANGUAGE=$LANGUAGE
|
||||
echo "LANGUAGE: $LANGUAGE"
|
||||
|
||||
get_openhands_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "SPLIT: $SPLIT"
|
||||
|
||||
# Default to NOT use Hint
|
||||
if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
fi
|
||||
|
||||
if [ "$RUN_WITH_BROWSING" = true ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-with-browsing"
|
||||
fi
|
||||
|
||||
if [ -n "$EXP_NAME" ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
COMMAND="poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations $MAX_ITER \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $eval_note \
|
||||
--dataset $DATASET \
|
||||
--split $SPLIT"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
fi
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
}
|
||||
|
||||
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
|
||||
if [ -z "$N_RUNS" ]; then
|
||||
N_RUNS=1
|
||||
echo "N_RUNS not specified, use default $N_RUNS"
|
||||
fi
|
||||
|
||||
# Skip runs if the run number is in the SKIP_RUNS list
|
||||
# read from env variable SKIP_RUNS as a comma separated list of run numbers
|
||||
SKIP_RUNS=(${SKIP_RUNS//,/ })
|
||||
for i in $(seq 1 $N_RUNS); do
|
||||
if [[ " ${SKIP_RUNS[@]} " =~ " $i " ]]; then
|
||||
echo "Skipping run $i"
|
||||
continue
|
||||
fi
|
||||
current_eval_note="$EVAL_NOTE-run_$i"
|
||||
echo "EVAL_NOTE: $current_eval_note"
|
||||
run_eval $current_eval_note
|
||||
done
|
||||
|
||||
checkout_original_branch
|
||||
@@ -1,54 +0,0 @@
|
||||
"""This script compares gold patches with OpenHands-generated patches and check whether
|
||||
OpenHands found the right (set of) files to modify.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
def extract_modified_files(patch):
|
||||
modified_files = set()
|
||||
file_pattern = re.compile(r'^diff --git a/(.*?) b/')
|
||||
|
||||
for line in patch.split('\n'):
|
||||
match = file_pattern.match(line)
|
||||
if match:
|
||||
modified_files.add(match.group(1))
|
||||
|
||||
return modified_files
|
||||
|
||||
|
||||
def process_report(oh_output_file):
|
||||
succ = 0
|
||||
fail = 0
|
||||
for line in open(oh_output_file):
|
||||
line = json.loads(line)
|
||||
instance_id = line['instance_id']
|
||||
gold_patch = line['swe_instance']['patch']
|
||||
generated_patch = line['git_patch']
|
||||
gold_modified_files = extract_modified_files(gold_patch)
|
||||
# swe-bench lite only: a gold patch always contains exactly one file
|
||||
assert len(gold_modified_files) == 1
|
||||
generated_modified_files = extract_modified_files(generated_patch)
|
||||
|
||||
# Check if all files in gold_patch are also in generated_patch
|
||||
all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
|
||||
if all_files_in_generated:
|
||||
succ += 1
|
||||
else:
|
||||
fail += 1
|
||||
print(
|
||||
f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
|
||||
)
|
||||
print(
|
||||
f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--oh_output_file', help='Path to the OH output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
process_report(args.oh_output_file)
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
source ~/.bashrc
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
|
||||
# SWE_INSTANCE_ID=django__django-11099
|
||||
if [ -z "$SWE_INSTANCE_ID" ]; then
|
||||
echo "Error: SWE_INSTANCE_ID is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$REPO_NAME" ]; then
|
||||
echo "Error: REPO_NAME is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
|
||||
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
|
||||
|
||||
if [[ -z "$item" ]]; then
|
||||
echo "No item found for the provided instance ID."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
|
||||
|
||||
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
|
||||
|
||||
# Clear the workspace
|
||||
if [ -d /workspace ]; then
|
||||
rm -rf /workspace/*
|
||||
else
|
||||
mkdir /workspace
|
||||
fi
|
||||
# Copy repo to workspace
|
||||
if [ -d /workspace/$WORKSPACE_NAME ]; then
|
||||
rm -rf /workspace/$WORKSPACE_NAME
|
||||
fi
|
||||
mkdir -p /workspace
|
||||
cp -r /home/$REPO_NAME /workspace/$WORKSPACE_NAME
|
||||
|
||||
# Activate instance-specific environment
|
||||
# . /opt/miniconda3/etc/profile.d/conda.sh
|
||||
# conda activate testbed
|
||||
@@ -1,27 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
EVAL_WORKSPACE="evaluation/benchmarks/swe_bench/eval_workspace"
|
||||
mkdir -p $EVAL_WORKSPACE
|
||||
|
||||
# 1. Prepare REPO
|
||||
echo "==== Prepare SWE-bench repo ===="
|
||||
OH_SWE_BENCH_REPO_PATH="https://github.com/All-Hands-AI/SWE-bench.git"
|
||||
OH_SWE_BENCH_REPO_BRANCH="eval"
|
||||
git clone -b $OH_SWE_BENCH_REPO_BRANCH $OH_SWE_BENCH_REPO_PATH $EVAL_WORKSPACE/OH-SWE-bench
|
||||
|
||||
# 2. Prepare DATA
|
||||
echo "==== Prepare SWE-bench data ===="
|
||||
EVAL_IMAGE=ghcr.io/all-hands-ai/eval-swe-bench:builder_with_conda
|
||||
EVAL_WORKSPACE=$(realpath $EVAL_WORKSPACE)
|
||||
chmod +x $EVAL_WORKSPACE/OH-SWE-bench/swebench/harness/prepare_data.sh
|
||||
if [ -d $EVAL_WORKSPACE/eval_data ]; then
|
||||
rm -r $EVAL_WORKSPACE/eval_data
|
||||
fi
|
||||
docker run \
|
||||
-v $EVAL_WORKSPACE:/workspace \
|
||||
-w /workspace \
|
||||
-u $(id -u):$(id -g) \
|
||||
-e HF_DATASETS_CACHE="/tmp" \
|
||||
--rm -it $EVAL_IMAGE \
|
||||
bash -c "cd OH-SWE-bench/swebench/harness && /swe_util/miniforge3/bin/conda run -n swe-bench-eval ./prepare_data.sh && mv eval_data /workspace/"
|
||||
@@ -1,96 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# assert user name is `root`
|
||||
if [ "$USER" != "root" ]; then
|
||||
echo "Error: This script is intended to be run by the 'root' user only." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
source ~/.bashrc
|
||||
|
||||
SWEUTIL_DIR=/swe_util
|
||||
|
||||
# Create logs directory
|
||||
LOG_DIR=/openhands/logs
|
||||
mkdir -p $LOG_DIR && chmod 777 $LOG_DIR
|
||||
|
||||
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
|
||||
# SWE_INSTANCE_ID=django__django-11099
|
||||
if [ -z "$SWE_INSTANCE_ID" ]; then
|
||||
echo "Error: SWE_INSTANCE_ID is not set." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
|
||||
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-test-lite.json)
|
||||
|
||||
if [[ -z "$item" ]]; then
|
||||
echo "No item found for the provided instance ID."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CONDA_ENV_NAME=$(echo "$item" | jq -r '.repo + "__" + .version | gsub("/"; "__")')
|
||||
|
||||
echo "CONDA_ENV_NAME: $CONDA_ENV_NAME"
|
||||
|
||||
SWE_TASK_DIR=/openhands/swe_tasks
|
||||
mkdir -p $SWE_TASK_DIR
|
||||
# Dump test_patch to /workspace/test.patch
|
||||
echo "$item" | jq -r '.test_patch' > $SWE_TASK_DIR/test.patch
|
||||
# Dump patch to /workspace/gold.patch
|
||||
echo "$item" | jq -r '.patch' > $SWE_TASK_DIR/gold.patch
|
||||
# Dump the item to /workspace/instance.json except for the "test_patch" and "patch" fields
|
||||
echo "$item" | jq 'del(.test_patch, .patch)' > $SWE_TASK_DIR/instance.json
|
||||
|
||||
# Clear the workspace
|
||||
rm -rf /workspace/*
|
||||
# Copy repo to workspace
|
||||
if [ -d /workspace/$CONDA_ENV_NAME ]; then
|
||||
rm -rf /workspace/$CONDA_ENV_NAME
|
||||
fi
|
||||
cp -r $SWEUTIL_DIR/eval_data/testbeds/$CONDA_ENV_NAME /workspace
|
||||
|
||||
# Reset swe-bench testbed and install the repo
|
||||
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
|
||||
conda config --set changeps1 False
|
||||
conda config --append channels conda-forge
|
||||
conda activate swe-bench-eval
|
||||
|
||||
mkdir -p $SWE_TASK_DIR/reset_testbed_temp
|
||||
mkdir -p $SWE_TASK_DIR/reset_testbed_log_dir
|
||||
SWE_BENCH_DIR=/swe_util/OH-SWE-bench
|
||||
output=$(
|
||||
export PYTHONPATH=$SWE_BENCH_DIR && \
|
||||
cd $SWE_BENCH_DIR && \
|
||||
python swebench/harness/reset_swe_env.py \
|
||||
--swe_bench_tasks $SWEUTIL_DIR/eval_data/instances/swe-bench-test.json \
|
||||
--temp_dir $SWE_TASK_DIR/reset_testbed_temp \
|
||||
--testbed /workspace \
|
||||
--conda_path $SWEUTIL_DIR/miniforge3 \
|
||||
--instance_id $SWE_INSTANCE_ID \
|
||||
--log_dir $SWE_TASK_DIR/reset_testbed_log_dir \
|
||||
--timeout 900 \
|
||||
--verbose
|
||||
)
|
||||
|
||||
REPO_PATH=$(echo "$output" | awk -F': ' '/repo_path:/ {print $2}')
|
||||
TEST_CMD=$(echo "$output" | awk -F': ' '/test_cmd:/ {print $2}')
|
||||
echo "Repo Path: $REPO_PATH"
|
||||
echo "Test Command: $TEST_CMD"
|
||||
|
||||
echo "export SWE_BENCH_DIR=\"$SWE_BENCH_DIR\"" >> ~/.bashrc
|
||||
echo "export REPO_PATH=\"$REPO_PATH\"" >> ~/.bashrc
|
||||
echo "export TEST_CMD=\"$TEST_CMD\"" >> ~/.bashrc
|
||||
|
||||
if [[ "$REPO_PATH" == "None" ]]; then
|
||||
echo "Error: Failed to retrieve repository path. Tests may not have passed or output was not as expected." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Activate instance-specific environment
|
||||
. $SWEUTIL_DIR/miniforge3/etc/profile.d/conda.sh
|
||||
conda activate $CONDA_ENV_NAME
|
||||
|
||||
set +e
|
||||
@@ -1,8 +1,6 @@
|
||||
import { describe, it, expect, afterEach, vi } from "vitest";
|
||||
import { screen, render } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
// Mock modules before importing the component
|
||||
// Mock useParams before importing components
|
||||
vi.mock("react-router", async () => {
|
||||
const actual = await vi.importActual("react-router");
|
||||
return {
|
||||
@@ -11,11 +9,7 @@ vi.mock("react-router", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("#/context/conversation-context", () => ({
|
||||
useConversation: () => ({ conversationId: "test-conversation-id" }),
|
||||
ConversationProvider: ({ children }: { children: React.ReactNode }) => children,
|
||||
}));
|
||||
|
||||
// Mock i18next
|
||||
vi.mock("react-i18next", async () => {
|
||||
const actual = await vi.importActual("react-i18next");
|
||||
return {
|
||||
@@ -29,56 +23,38 @@ vi.mock("react-i18next", async () => {
|
||||
};
|
||||
});
|
||||
|
||||
// Mock redux
|
||||
const mockDispatch = vi.fn();
|
||||
let mockBrowserState = {
|
||||
url: "https://example.com",
|
||||
screenshotSrc: "",
|
||||
};
|
||||
|
||||
vi.mock("react-redux", async () => {
|
||||
const actual = await vi.importActual("react-redux");
|
||||
return {
|
||||
...actual,
|
||||
useDispatch: () => mockDispatch,
|
||||
useSelector: () => mockBrowserState,
|
||||
};
|
||||
});
|
||||
|
||||
// Import the component after all mocks are set up
|
||||
import { screen } from "@testing-library/react";
|
||||
import { renderWithProviders } from "../../test-utils";
|
||||
import { BrowserPanel } from "#/components/features/browser/browser";
|
||||
|
||||
describe("Browser", () => {
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
// Reset the mock state
|
||||
mockBrowserState = {
|
||||
url: "https://example.com",
|
||||
screenshotSrc: "",
|
||||
};
|
||||
});
|
||||
|
||||
it("renders a message if no screenshotSrc is provided", () => {
|
||||
// Set the mock state for this test
|
||||
mockBrowserState = {
|
||||
url: "https://example.com",
|
||||
screenshotSrc: "",
|
||||
};
|
||||
|
||||
render(<BrowserPanel />);
|
||||
renderWithProviders(<BrowserPanel />, {
|
||||
preloadedState: {
|
||||
browser: {
|
||||
url: "https://example.com",
|
||||
screenshotSrc: "",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// i18n empty message key
|
||||
expect(screen.getByText("BROWSER$NO_PAGE_LOADED")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("renders the url and a screenshot", () => {
|
||||
// Set the mock state for this test
|
||||
mockBrowserState = {
|
||||
url: "https://example.com",
|
||||
screenshotSrc: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
|
||||
};
|
||||
|
||||
render(<BrowserPanel />);
|
||||
renderWithProviders(<BrowserPanel />, {
|
||||
preloadedState: {
|
||||
browser: {
|
||||
url: "https://example.com",
|
||||
screenshotSrc:
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(screen.getByText("https://example.com")).toBeInTheDocument();
|
||||
expect(screen.getByAltText("BROWSER$SCREENSHOT_ALT")).toBeInTheDocument();
|
||||
|
||||
@@ -1,16 +1,12 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { it, describe, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { AuthModal } from "#/components/features/waitlist/auth-modal";
|
||||
import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import * as AuthHook from "#/context/auth-context";
|
||||
|
||||
// Mock the useAuthUrl hook
|
||||
vi.mock("#/hooks/use-auth-url", () => ({
|
||||
useAuthUrl: () => "https://gitlab.com/oauth/authorize"
|
||||
}));
|
||||
|
||||
describe("AuthModal", () => {
|
||||
beforeEach(() => {
|
||||
beforeAll(() => {
|
||||
vi.stubGlobal("location", { href: "" });
|
||||
vi.spyOn(AuthHook, "useAuth").mockReturnValue({
|
||||
providersAreSet: false,
|
||||
@@ -20,29 +16,50 @@ describe("AuthModal", () => {
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
afterAll(() => {
|
||||
vi.unstubAllGlobals();
|
||||
vi.resetAllMocks();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("should render the GitHub and GitLab buttons", () => {
|
||||
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
|
||||
it("should render a tos checkbox that is unchecked by default", () => {
|
||||
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
|
||||
expect(checkbox).not.toBeChecked();
|
||||
});
|
||||
|
||||
it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
|
||||
|
||||
expect(githubButton).toBeInTheDocument();
|
||||
expect(gitlabButton).toBeInTheDocument();
|
||||
expect(githubButton).toBeDisabled();
|
||||
expect(gitlabButton).toBeDisabled();
|
||||
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(githubButton).not.toBeDisabled();
|
||||
expect(gitlabButton).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should redirect to GitHub auth URL when GitHub button is clicked", async () => {
|
||||
it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
|
||||
const handleCaptureConsentSpy = vi.spyOn(
|
||||
CaptureConsent,
|
||||
"handleCaptureConsent",
|
||||
);
|
||||
|
||||
const user = userEvent.setup();
|
||||
const mockUrl = "https://github.com/login/oauth/authorize";
|
||||
render(<AuthModal githubAuthUrl={mockUrl} appMode="saas" />);
|
||||
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
|
||||
|
||||
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
await user.click(githubButton);
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(window.location.href).toBe(mockUrl);
|
||||
const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
await user.click(button);
|
||||
|
||||
expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { isLikelyDirectory } from "#/components/features/chat/path-component";
|
||||
|
||||
describe("isLikelyDirectory", () => {
|
||||
it("should return false for empty path", () => {
|
||||
expect(isLikelyDirectory("")).toBe(false);
|
||||
});
|
||||
|
||||
it("should return true for paths ending with forward slash", () => {
|
||||
expect(isLikelyDirectory("/path/to/dir/")).toBe(true);
|
||||
expect(isLikelyDirectory("dir/")).toBe(true);
|
||||
});
|
||||
|
||||
it("should return true for paths ending with backslash", () => {
|
||||
expect(isLikelyDirectory("C:\\path\\to\\dir\\")).toBe(true);
|
||||
expect(isLikelyDirectory("dir\\")).toBe(true);
|
||||
});
|
||||
|
||||
it("should return true for paths without extension", () => {
|
||||
expect(isLikelyDirectory("/path/to/dir")).toBe(true);
|
||||
expect(isLikelyDirectory("dir")).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for paths ending with dot", () => {
|
||||
expect(isLikelyDirectory("/path/to/dir.")).toBe(false);
|
||||
expect(isLikelyDirectory("dir.")).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for paths with file extensions", () => {
|
||||
expect(isLikelyDirectory("/path/to/file.txt")).toBe(false);
|
||||
expect(isLikelyDirectory("file.js")).toBe(false);
|
||||
expect(isLikelyDirectory("script.test.ts")).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -34,6 +34,10 @@ describe("ConversationPanel", () => {
|
||||
}
|
||||
});
|
||||
|
||||
const { endSessionMock } = vi.hoisted(() => ({
|
||||
endSessionMock: vi.fn(),
|
||||
}));
|
||||
|
||||
beforeAll(() => {
|
||||
vi.mock("react-router", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("react-router")>()),
|
||||
@@ -42,6 +46,11 @@ describe("ConversationPanel", () => {
|
||||
useLocation: vi.fn(() => ({ pathname: "/conversation" })),
|
||||
useParams: vi.fn(() => ({ conversationId: "2" })),
|
||||
}));
|
||||
|
||||
vi.mock("#/hooks/use-end-session", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("#/hooks/use-end-session")>()),
|
||||
useEndSession: vi.fn(() => endSessionMock),
|
||||
}));
|
||||
});
|
||||
|
||||
const mockConversations = [
|
||||
@@ -136,6 +145,47 @@ describe("ConversationPanel", () => {
|
||||
expect(cards).toHaveLength(3);
|
||||
});
|
||||
|
||||
it("should call endSession after deleting a conversation that is the current session", async () => {
|
||||
const user = userEvent.setup();
|
||||
const mockData = [...mockConversations];
|
||||
const getUserConversationsSpy = vi.spyOn(OpenHands, "getUserConversations");
|
||||
getUserConversationsSpy.mockImplementation(async () => mockData);
|
||||
|
||||
const deleteUserConversationSpy = vi.spyOn(OpenHands, "deleteUserConversation");
|
||||
deleteUserConversationSpy.mockImplementation(async (id: string) => {
|
||||
const index = mockData.findIndex(conv => conv.conversation_id === id);
|
||||
if (index !== -1) {
|
||||
mockData.splice(index, 1);
|
||||
}
|
||||
// Wait for React Query to update its cache
|
||||
await new Promise(resolve => setTimeout(resolve, 0));
|
||||
});
|
||||
|
||||
renderConversationPanel();
|
||||
|
||||
let cards = await screen.findAllByTestId("conversation-card");
|
||||
const ellipsisButton = within(cards[1]).getByTestId("ellipsis-button");
|
||||
await user.click(ellipsisButton);
|
||||
const deleteButton = screen.getByTestId("delete-button");
|
||||
|
||||
// Click the second delete button
|
||||
await user.click(deleteButton);
|
||||
|
||||
// Confirm the deletion
|
||||
const confirmButton = screen.getByRole("button", { name: /confirm/i });
|
||||
await user.click(confirmButton);
|
||||
|
||||
expect(screen.queryByRole("button", { name: /confirm/i })).not.toBeInTheDocument();
|
||||
|
||||
// Wait for the cards to update with a longer timeout
|
||||
await waitFor(() => {
|
||||
const updatedCards = screen.getAllByTestId("conversation-card");
|
||||
expect(updatedCards).toHaveLength(2);
|
||||
}, { timeout: 2000 });
|
||||
|
||||
expect(endSessionMock).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("should delete a conversation", async () => {
|
||||
const user = userEvent.setup();
|
||||
const mockData = [
|
||||
|
||||
@@ -49,11 +49,9 @@ describe("HomeHeader", () => {
|
||||
"gui",
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
// expect to be redirected to /conversations/:conversationId
|
||||
|
||||
@@ -4,10 +4,11 @@ import userEvent from "@testing-library/user-event";
|
||||
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
|
||||
import { setupStore } from "test-utils";
|
||||
import { Provider } from "react-redux";
|
||||
import { createRoutesStub, Outlet } from "react-router";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import * as GitService from "#/api/git";
|
||||
import { RepoConnector } from "#/components/features/home/repo-connector";
|
||||
|
||||
const renderRepoConnector = (initialProvidersAreSet = true) => {
|
||||
@@ -22,18 +23,8 @@ const renderRepoConnector = (initialProvidersAreSet = true) => {
|
||||
path: "/conversations/:conversationId",
|
||||
},
|
||||
{
|
||||
Component: Outlet,
|
||||
Component: () => <div data-testid="settings-screen" />,
|
||||
path: "/settings",
|
||||
children: [
|
||||
{
|
||||
Component: () => <div data-testid="settings-screen" />,
|
||||
path: "/settings",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="git-settings-screen" />,
|
||||
path: "/settings/git",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
@@ -73,10 +64,13 @@ describe("RepoConnector", () => {
|
||||
|
||||
it("should render the available repositories in the dropdown", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
@@ -92,10 +86,13 @@ describe("RepoConnector", () => {
|
||||
|
||||
it("should only enable the launch button if a repo is selected", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
@@ -138,10 +135,13 @@ describe("RepoConnector", () => {
|
||||
it("should create a conversation and redirect with the selected repo when pressing the launch button", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
@@ -165,22 +165,28 @@ describe("RepoConnector", () => {
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
|
||||
"gui",
|
||||
"rbren/polaris",
|
||||
"github",
|
||||
{
|
||||
full_name: "rbren/polaris",
|
||||
git_provider: "github",
|
||||
id: 1,
|
||||
is_public: true,
|
||||
},
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
@@ -220,6 +226,6 @@ describe("RepoConnector", () => {
|
||||
expect(goToSettingsButton).toBeInTheDocument();
|
||||
|
||||
await userEvent.click(goToSettingsButton);
|
||||
await screen.findByTestId("git-settings-screen");
|
||||
await screen.findByTestId("settings-screen");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -9,6 +9,7 @@ import { SuggestedTask } from "#/components/features/home/tasks/task.types";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import { TaskCard } from "#/components/features/home/tasks/task-card";
|
||||
import * as GitService from "#/api/git";
|
||||
import { GitRepository } from "#/types/git";
|
||||
|
||||
const MOCK_TASK_1: SuggestedTask = {
|
||||
@@ -19,6 +20,30 @@ const MOCK_TASK_1: SuggestedTask = {
|
||||
git_provider: "github",
|
||||
};
|
||||
|
||||
const MOCK_TASK_2: SuggestedTask = {
|
||||
issue_number: 456,
|
||||
repo: "repo2",
|
||||
title: "Task 2",
|
||||
task_type: "FAILING_CHECKS",
|
||||
git_provider: "github",
|
||||
};
|
||||
|
||||
const MOCK_TASK_3: SuggestedTask = {
|
||||
issue_number: 789,
|
||||
repo: "repo3",
|
||||
title: "Task 3",
|
||||
task_type: "UNRESOLVED_COMMENTS",
|
||||
git_provider: "gitlab",
|
||||
};
|
||||
|
||||
const MOCK_TASK_4: SuggestedTask = {
|
||||
issue_number: 101112,
|
||||
repo: "repo4",
|
||||
title: "Task 4",
|
||||
task_type: "OPEN_ISSUE",
|
||||
git_provider: "gitlab",
|
||||
};
|
||||
|
||||
const MOCK_RESPOSITORIES: GitRepository[] = [
|
||||
{ id: 1, full_name: "repo1", git_provider: "github", is_public: true },
|
||||
{ id: 2, full_name: "repo2", git_provider: "github", is_public: true },
|
||||
@@ -73,10 +98,13 @@ describe("TaskCard", () => {
|
||||
describe("creating suggested task conversation", () => {
|
||||
beforeEach(() => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
});
|
||||
|
||||
it("should call create conversation with suggest task trigger and selected suggested task", async () => {
|
||||
@@ -89,13 +117,11 @@ describe("TaskCard", () => {
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledWith(
|
||||
"suggested_task",
|
||||
MOCK_RESPOSITORIES[0].full_name,
|
||||
MOCK_RESPOSITORIES[0].git_provider,
|
||||
MOCK_RESPOSITORIES[0],
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
MOCK_TASK_1,
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,6 +4,7 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { Provider } from "react-redux";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { setupStore } from "test-utils";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { TaskSuggestions } from "#/components/features/home/tasks/task-suggestions";
|
||||
import { SuggestionsService } from "#/api/suggestions-service/suggestions-service.api";
|
||||
import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
|
||||
@@ -96,4 +97,17 @@ describe("TaskSuggestions", () => {
|
||||
|
||||
expect(screen.queryByTestId("task-group-skeleton")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
|
||||
renderTaskSuggestions(false);
|
||||
|
||||
expect(getSuggestedTasksSpy).not.toHaveBeenCalled();
|
||||
const goToSettingsButton = await screen.findByTestId(
|
||||
"navigate-to-settings-button",
|
||||
);
|
||||
expect(goToSettingsButton).toBeInTheDocument();
|
||||
|
||||
await userEvent.click(goToSettingsButton);
|
||||
await screen.findByTestId("settings-screen");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,7 +13,15 @@ describe("App", () => {
|
||||
{ Component: App, path: "/conversation/:conversationId" },
|
||||
]);
|
||||
|
||||
const { endSessionMock } = vi.hoisted(() => ({
|
||||
endSessionMock: vi.fn(),
|
||||
}));
|
||||
|
||||
beforeAll(() => {
|
||||
vi.mock("#/hooks/use-end-session", () => ({
|
||||
useEndSession: vi.fn(() => endSessionMock),
|
||||
}));
|
||||
|
||||
vi.mock("#/hooks/use-terminal", () => ({
|
||||
useTerminal: vi.fn(),
|
||||
}));
|
||||
@@ -27,4 +35,44 @@ describe("App", () => {
|
||||
renderWithProviders(<RouteStub initialEntries={["/conversation/123"]} />);
|
||||
await screen.findByTestId("app-route");
|
||||
});
|
||||
|
||||
it("should call endSession if the user does not have permission to view conversation", async () => {
|
||||
const getConversationSpy = vi.spyOn(OpenHands, "getConversation");
|
||||
|
||||
getConversationSpy.mockResolvedValue(null);
|
||||
renderWithProviders(<RouteStub initialEntries={["/conversation/9999"]} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(endSessionMock).toHaveBeenCalledOnce();
|
||||
expect(errorToastSpy).toHaveBeenCalledOnce();
|
||||
});
|
||||
});
|
||||
|
||||
it("should not call endSession if the user has permission", async () => {
|
||||
const getConversationSpy = vi.spyOn(OpenHands, "getConversation");
|
||||
|
||||
getConversationSpy.mockResolvedValue({
|
||||
conversation_id: "9999",
|
||||
last_updated_at: "",
|
||||
created_at: "",
|
||||
title: "",
|
||||
selected_repository: "",
|
||||
status: "STOPPED",
|
||||
});
|
||||
const { rerender } = renderWithProviders(
|
||||
<RouteStub initialEntries={["/conversation/9999"]} />,
|
||||
);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(endSessionMock).not.toHaveBeenCalled();
|
||||
expect(errorToastSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
rerender(<RouteStub initialEntries={["/conversation"]} />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(endSessionMock).not.toHaveBeenCalled();
|
||||
expect(errorToastSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,16 +1,12 @@
|
||||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { screen, waitFor, within } from "@testing-library/react";
|
||||
import {
|
||||
createAxiosNotFoundErrorObject,
|
||||
renderWithProviders,
|
||||
} from "test-utils";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import MainApp from "#/routes/root-layout";
|
||||
import i18n from "#/i18n";
|
||||
import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import * as ToastHandlers from "#/utils/custom-toast-handlers";
|
||||
|
||||
describe("frontend/routes/_oh", () => {
|
||||
const RouteStub = createRoutesStub([{ Component: MainApp, path: "/" }]);
|
||||
@@ -176,32 +172,4 @@ describe("frontend/routes/_oh", () => {
|
||||
// expect(logoutCleanupSpy).toHaveBeenCalled();
|
||||
expect(localStorage.getItem("ghToken")).toBeNull();
|
||||
});
|
||||
|
||||
it("should render a you're in toast if it is a new user and in saas mode", async () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
const displaySuccessToastSpy = vi.spyOn(
|
||||
ToastHandlers,
|
||||
"displaySuccessToast",
|
||||
);
|
||||
|
||||
getConfigSpy.mockResolvedValue({
|
||||
APP_MODE: "saas",
|
||||
GITHUB_CLIENT_ID: "test-id",
|
||||
POSTHOG_CLIENT_KEY: "test-key",
|
||||
FEATURE_FLAGS: {
|
||||
ENABLE_BILLING: false,
|
||||
HIDE_LLM_SETTINGS: false,
|
||||
},
|
||||
});
|
||||
|
||||
getSettingsSpy.mockRejectedValue(createAxiosNotFoundErrorObject());
|
||||
|
||||
renderWithProviders(<RouteStub />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(displaySuccessToastSpy).toHaveBeenCalledWith("BILLING$YOURE_IN");
|
||||
expect(displaySuccessToastSpy).toHaveBeenCalledOnce();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { it, describe, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import AcceptTOS from "#/routes/accept-tos";
|
||||
import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import * as ToastHandlers from "#/utils/custom-toast-handlers";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { openHands } from "#/api/open-hands-axios";
|
||||
|
||||
// Mock the react-router hooks
|
||||
vi.mock("react-router", () => ({
|
||||
useNavigate: () => vi.fn(),
|
||||
useSearchParams: () => [
|
||||
{
|
||||
get: (param: string) => {
|
||||
if (param === "redirect_url") {
|
||||
return "/dashboard";
|
||||
}
|
||||
return null;
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
// Mock the axios instance
|
||||
vi.mock("#/api/open-hands-axios", () => ({
|
||||
openHands: {
|
||||
post: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock the toast handlers
|
||||
vi.mock("#/utils/custom-toast-handlers", () => ({
|
||||
displayErrorToast: vi.fn(),
|
||||
}));
|
||||
|
||||
// Create a wrapper with QueryClientProvider
|
||||
const createWrapper = () => {
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
retry: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
return ({ children }: { children: React.ReactNode }) => (
|
||||
<QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
|
||||
);
|
||||
};
|
||||
|
||||
describe("AcceptTOS", () => {
|
||||
beforeEach(() => {
|
||||
vi.stubGlobal("location", { href: "" });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
it("should render a TOS checkbox that is unchecked by default", () => {
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
|
||||
expect(checkbox).not.toBeChecked();
|
||||
expect(continueButton).toBeDisabled();
|
||||
});
|
||||
|
||||
it("should enable the continue button when the TOS checkbox is checked", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
|
||||
expect(continueButton).toBeDisabled();
|
||||
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(continueButton).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should set user analytics consent to true when the user accepts TOS", async () => {
|
||||
const handleCaptureConsentSpy = vi.spyOn(
|
||||
CaptureConsent,
|
||||
"handleCaptureConsent",
|
||||
);
|
||||
|
||||
// Mock the API response
|
||||
vi.mocked(openHands.post).mockResolvedValue({
|
||||
data: { redirect_url: "/dashboard" },
|
||||
});
|
||||
|
||||
const user = userEvent.setup();
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
await user.click(continueButton);
|
||||
|
||||
// Wait for the mutation to complete
|
||||
await new Promise(process.nextTick);
|
||||
|
||||
expect(handleCaptureConsentSpy).toHaveBeenCalledWith(true);
|
||||
expect(openHands.post).toHaveBeenCalledWith("/api/accept_tos", {
|
||||
redirect_url: "/dashboard",
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle external redirect URLs", async () => {
|
||||
// Mock the API response with an external URL
|
||||
const externalUrl = "https://example.com/callback";
|
||||
vi.mocked(openHands.post).mockResolvedValue({
|
||||
data: { redirect_url: externalUrl },
|
||||
});
|
||||
|
||||
const user = userEvent.setup();
|
||||
render(<AcceptTOS />, { wrapper: createWrapper() });
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
const continueButton = screen.getByRole("button", { name: "TOS$CONTINUE" });
|
||||
await user.click(continueButton);
|
||||
|
||||
// Wait for the mutation to complete
|
||||
await new Promise(process.nextTick);
|
||||
|
||||
expect(window.location.href).toBe(externalUrl);
|
||||
});
|
||||
});
|
||||
@@ -9,7 +9,6 @@ import { MOCK_DEFAULT_USER_SETTINGS } from "#/mocks/handlers";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import { GetConfigResponse } from "#/api/open-hands.types";
|
||||
import * as ToastHandlers from "#/utils/custom-toast-handlers";
|
||||
import { SecretsService } from "#/api/secrets-service";
|
||||
|
||||
const VALID_OSS_CONFIG: GetConfigResponse = {
|
||||
APP_MODE: "oss",
|
||||
@@ -120,6 +119,10 @@ describe("Content", () => {
|
||||
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
provider_tokens_set: {
|
||||
github: false,
|
||||
gitlab: false,
|
||||
},
|
||||
});
|
||||
|
||||
const { rerender } = renderGitSettingsScreen();
|
||||
@@ -141,8 +144,8 @@ describe("Content", () => {
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
provider_tokens_set: {
|
||||
github: null,
|
||||
gitlab: null,
|
||||
github: true,
|
||||
gitlab: true,
|
||||
},
|
||||
});
|
||||
queryClient.invalidateQueries();
|
||||
@@ -166,7 +169,8 @@ describe("Content", () => {
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
provider_tokens_set: {
|
||||
gitlab: null,
|
||||
github: false,
|
||||
gitlab: true,
|
||||
},
|
||||
});
|
||||
queryClient.invalidateQueries();
|
||||
@@ -231,7 +235,7 @@ describe("Content", () => {
|
||||
|
||||
describe("Form submission", () => {
|
||||
it("should save the GitHub token", async () => {
|
||||
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
|
||||
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
|
||||
|
||||
@@ -243,19 +247,27 @@ describe("Form submission", () => {
|
||||
await userEvent.type(githubInput, "test-token");
|
||||
await userEvent.click(submit);
|
||||
|
||||
expect(saveProvidersSpy).toHaveBeenCalledWith({
|
||||
github: { token: "test-token" },
|
||||
gitlab: { token: "" },
|
||||
});
|
||||
expect(saveSettingsSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider_tokens: {
|
||||
github: "test-token",
|
||||
gitlab: "",
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const gitlabInput = await screen.findByTestId("gitlab-token-input");
|
||||
await userEvent.type(gitlabInput, "test-token");
|
||||
await userEvent.click(submit);
|
||||
|
||||
expect(saveProvidersSpy).toHaveBeenCalledWith({
|
||||
github: { token: "test-token" },
|
||||
gitlab: { token: "" },
|
||||
});
|
||||
expect(saveSettingsSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider_tokens: {
|
||||
github: "",
|
||||
gitlab: "test-token",
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("should disable the button if there is no input", async () => {
|
||||
@@ -292,7 +304,8 @@ describe("Form submission", () => {
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
provider_tokens_set: {
|
||||
github: null,
|
||||
github: true,
|
||||
gitlab: false,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -306,6 +319,10 @@ describe("Form submission", () => {
|
||||
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
provider_tokens_set: {
|
||||
github: false,
|
||||
gitlab: false,
|
||||
},
|
||||
});
|
||||
queryClient.invalidateQueries();
|
||||
|
||||
@@ -322,7 +339,8 @@ describe("Form submission", () => {
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
provider_tokens_set: {
|
||||
github: null,
|
||||
github: true,
|
||||
gitlab: false,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -339,7 +357,7 @@ describe("Form submission", () => {
|
||||
|
||||
// flaky test
|
||||
it.skip("should disable the button when submitting changes", async () => {
|
||||
const saveSettingsSpy = vi.spyOn(SecretsService, "addGitProvider");
|
||||
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
|
||||
|
||||
@@ -363,7 +381,7 @@ describe("Form submission", () => {
|
||||
});
|
||||
|
||||
it("should disable the button after submitting changes", async () => {
|
||||
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
|
||||
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
getConfigSpy.mockResolvedValue(VALID_OSS_CONFIG);
|
||||
|
||||
@@ -379,7 +397,7 @@ describe("Form submission", () => {
|
||||
|
||||
// submit the form
|
||||
await userEvent.click(submit);
|
||||
expect(saveProvidersSpy).toHaveBeenCalled();
|
||||
expect(saveSettingsSpy).toHaveBeenCalled();
|
||||
expect(submit).toBeDisabled();
|
||||
|
||||
const gitlabInput = await screen.findByTestId("gitlab-token-input");
|
||||
@@ -389,7 +407,7 @@ describe("Form submission", () => {
|
||||
|
||||
// submit the form
|
||||
await userEvent.click(submit);
|
||||
expect(saveProvidersSpy).toHaveBeenCalled();
|
||||
expect(saveSettingsSpy).toHaveBeenCalled();
|
||||
|
||||
await waitFor(() => expect(submit).toBeDisabled());
|
||||
});
|
||||
@@ -397,7 +415,7 @@ describe("Form submission", () => {
|
||||
|
||||
describe("Status toasts", () => {
|
||||
it("should call displaySuccessToast when the settings are saved", async () => {
|
||||
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
|
||||
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
|
||||
|
||||
@@ -415,18 +433,18 @@ describe("Status toasts", () => {
|
||||
const submit = await screen.findByTestId("submit-button");
|
||||
await userEvent.click(submit);
|
||||
|
||||
expect(saveProvidersSpy).toHaveBeenCalled();
|
||||
expect(saveSettingsSpy).toHaveBeenCalled();
|
||||
await waitFor(() => expect(displaySuccessToastSpy).toHaveBeenCalled());
|
||||
});
|
||||
|
||||
it("should call displayErrorToast when the settings fail to save", async () => {
|
||||
const saveProvidersSpy = vi.spyOn(SecretsService, "addGitProvider");
|
||||
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
getSettingsSpy.mockResolvedValue(MOCK_DEFAULT_USER_SETTINGS);
|
||||
|
||||
const displayErrorToastSpy = vi.spyOn(ToastHandlers, "displayErrorToast");
|
||||
|
||||
saveProvidersSpy.mockRejectedValue(new Error("Failed to save settings"));
|
||||
saveSettingsSpy.mockRejectedValue(new Error("Failed to save settings"));
|
||||
|
||||
renderGitSettingsScreen();
|
||||
|
||||
@@ -437,7 +455,7 @@ describe("Status toasts", () => {
|
||||
const submit = await screen.findByTestId("submit-button");
|
||||
await userEvent.click(submit);
|
||||
|
||||
expect(saveProvidersSpy).toHaveBeenCalled();
|
||||
expect(saveSettingsSpy).toHaveBeenCalled();
|
||||
expect(displayErrorToastSpy).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,13 +4,31 @@ import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { Provider } from "react-redux";
|
||||
import { createAxiosNotFoundErrorObject, setupStore } from "test-utils";
|
||||
import { setupStore } from "test-utils";
|
||||
import { AxiosError } from "axios";
|
||||
import HomeScreen from "#/routes/home";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import * as GitService from "#/api/git";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import MainApp from "#/routes/root-layout";
|
||||
|
||||
const createAxiosNotFoundErrorObject = () =>
|
||||
new AxiosError(
|
||||
"Request failed with status code 404",
|
||||
"ERR_BAD_REQUEST",
|
||||
undefined,
|
||||
undefined,
|
||||
{
|
||||
status: 404,
|
||||
statusText: "Not Found",
|
||||
data: { message: "Settings not found" },
|
||||
headers: {},
|
||||
// @ts-expect-error - we only need the response object for this test
|
||||
config: {},
|
||||
},
|
||||
);
|
||||
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: MainApp,
|
||||
@@ -75,19 +93,20 @@ describe("HomeScreen", () => {
|
||||
|
||||
it("should have responsive layout for mobile and desktop screens", async () => {
|
||||
renderHomeScreen();
|
||||
|
||||
const mainContainer = screen
|
||||
.getByTestId("home-screen")
|
||||
.querySelector("main");
|
||||
|
||||
const mainContainer = screen.getByTestId("home-screen").querySelector("main");
|
||||
expect(mainContainer).toHaveClass("flex", "flex-col", "md:flex-row");
|
||||
});
|
||||
|
||||
it("should filter the suggested tasks based on the selected repository", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
@@ -119,10 +138,13 @@ describe("HomeScreen", () => {
|
||||
|
||||
it("should reset the filtered tasks when the selected repository is cleared", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
@@ -194,10 +216,13 @@ describe("HomeScreen", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
OpenHands,
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue(MOCK_RESPOSITORIES);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
});
|
||||
|
||||
it("should disable the other launch buttons when the header launch button is clicked", async () => {
|
||||
|
||||
@@ -516,47 +516,6 @@ describe("Form submission", () => {
|
||||
expect(submitButton).toBeDisabled();
|
||||
});
|
||||
});
|
||||
|
||||
it("should clear advanced settings when saving basic settings", async () => {
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
getSettingsSpy.mockResolvedValue({
|
||||
...MOCK_DEFAULT_USER_SETTINGS,
|
||||
llm_model: "openai/gpt-4o",
|
||||
llm_base_url: "https://api.openai.com/v1/chat/completions",
|
||||
llm_api_key_set: true,
|
||||
confirmation_mode: true,
|
||||
});
|
||||
const saveSettingsSpy = vi.spyOn(OpenHands, "saveSettings");
|
||||
renderLlmSettingsScreen();
|
||||
|
||||
await screen.findByTestId("llm-settings-screen");
|
||||
const advancedSwitch = screen.getByTestId("advanced-settings-switch");
|
||||
await userEvent.click(advancedSwitch);
|
||||
|
||||
const provider = screen.getByTestId("llm-provider-input");
|
||||
const model = screen.getByTestId("llm-model-input");
|
||||
|
||||
// select provider
|
||||
await userEvent.click(provider);
|
||||
const providerOption = screen.getByText("Anthropic");
|
||||
await userEvent.click(providerOption);
|
||||
|
||||
// select model
|
||||
await userEvent.click(model);
|
||||
const modelOption = screen.getByText("claude-3-5-sonnet-20241022");
|
||||
await userEvent.click(modelOption);
|
||||
|
||||
const submitButton = screen.getByTestId("submit-button");
|
||||
await userEvent.click(submitButton);
|
||||
|
||||
expect(saveSettingsSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
llm_model: "anthropic/claude-3-5-sonnet-20241022",
|
||||
llm_base_url: "",
|
||||
confirmation_mode: false,
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Status toasts", () => {
|
||||
|
||||
4059
frontend/package-lock.json
generated
@@ -1,37 +1,37 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.37.0",
|
||||
"version": "0.34.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@heroui/react": "2.7.8",
|
||||
"@heroui/react": "2.7.6",
|
||||
"@microlink/react-json-view": "^1.26.1",
|
||||
"@monaco-editor/react": "^4.7.0-rc.0",
|
||||
"@react-router/node": "^7.5.3",
|
||||
"@react-router/serve": "^7.5.3",
|
||||
"@react-router/node": "^7.5.2",
|
||||
"@react-router/serve": "^7.5.2",
|
||||
"@react-types/shared": "^3.29.0",
|
||||
"@reduxjs/toolkit": "^2.7.0",
|
||||
"@stripe/react-stripe-js": "^3.6.0",
|
||||
"@stripe/stripe-js": "^7.2.0",
|
||||
"@tanstack/react-query": "^5.75.1",
|
||||
"@tanstack/react-query": "^5.74.7",
|
||||
"@vitejs/plugin-react": "^4.4.0",
|
||||
"@xterm/addon-fit": "^0.10.0",
|
||||
"@xterm/xterm": "^5.4.0",
|
||||
"axios": "^1.9.0",
|
||||
"clsx": "^2.1.1",
|
||||
"eslint-config-airbnb-typescript": "^18.0.0",
|
||||
"framer-motion": "^12.9.4",
|
||||
"i18next": "^25.0.2",
|
||||
"i18next-browser-languagedetector": "^8.1.0",
|
||||
"framer-motion": "^12.9.2",
|
||||
"i18next": "^25.0.1",
|
||||
"i18next-browser-languagedetector": "^8.0.5",
|
||||
"i18next-http-backend": "^3.0.2",
|
||||
"isbot": "^5.1.27",
|
||||
"jose": "^6.0.10",
|
||||
"lucide-react": "^0.506.0",
|
||||
"lucide-react": "^0.503.0",
|
||||
"monaco-editor": "^0.52.2",
|
||||
"posthog-js": "^1.239.0",
|
||||
"posthog-js": "^1.236.7",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
"react-highlight": "^0.15.0",
|
||||
@@ -40,14 +40,14 @@
|
||||
"react-icons": "^5.5.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-redux": "^9.2.0",
|
||||
"react-router": "^7.5.3",
|
||||
"react-router": "^7.5.2",
|
||||
"react-syntax-highlighter": "^15.6.1",
|
||||
"react-textarea-autosize": "^8.5.9",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"sirv-cli": "^3.0.1",
|
||||
"socket.io-client": "^4.8.1",
|
||||
"tailwind-merge": "^3.2.0",
|
||||
"vite": "^6.3.4",
|
||||
"vite": "^6.3.3",
|
||||
"web-vitals": "^3.5.2",
|
||||
"ws": "^8.18.1"
|
||||
},
|
||||
@@ -77,12 +77,12 @@
|
||||
]
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/parser": "^7.27.1",
|
||||
"@babel/traverse": "^7.27.1",
|
||||
"@babel/parser": "^7.27.0",
|
||||
"@babel/traverse": "^7.27.0",
|
||||
"@babel/types": "^7.27.0",
|
||||
"@mswjs/socket.io-binding": "^0.1.1",
|
||||
"@playwright/test": "^1.52.0",
|
||||
"@react-router/dev": "^7.5.3",
|
||||
"@react-router/dev": "^7.5.2",
|
||||
"@tailwindcss/typography": "^0.5.16",
|
||||
"@tanstack/eslint-plugin-query": "^5.74.7",
|
||||
"@testing-library/dom": "^10.4.0",
|
||||
@@ -91,7 +91,7 @@
|
||||
"@testing-library/user-event": "^14.6.1",
|
||||
"@types/node": "^22.15.3",
|
||||
"@types/react": "^19.1.2",
|
||||
"@types/react-dom": "^19.1.3",
|
||||
"@types/react-dom": "^19.1.1",
|
||||
"@types/react-highlight": "^0.12.8",
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@types/ws": "^8.18.1",
|
||||
@@ -106,7 +106,7 @@
|
||||
"eslint-config-prettier": "^10.1.2",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"eslint-plugin-jsx-a11y": "^6.10.2",
|
||||
"eslint-plugin-prettier": "^5.4.0",
|
||||
"eslint-plugin-prettier": "^5.2.6",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-react-hooks": "^4.6.2",
|
||||
"eslint-plugin-unused-imports": "^4.1.4",
|
||||
@@ -116,7 +116,7 @@
|
||||
"msw": "^2.6.6",
|
||||
"postcss": "^8.5.2",
|
||||
"prettier": "^3.5.3",
|
||||
"stripe": "^18.1.0",
|
||||
"stripe": "^18.0.0",
|
||||
"tailwindcss": "^3.4.17",
|
||||
"typescript": "^5.8.3",
|
||||
"vite-plugin-svgr": "^4.2.0",
|
||||
|
||||
75
frontend/src/api/git.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import { GitRepository } from "#/types/git";
|
||||
import { extractNextPageFromLink } from "#/utils/extract-next-page-from-link";
|
||||
import { openHands } from "./open-hands-axios";
|
||||
|
||||
/**
|
||||
* Retrieves repositories where OpenHands Github App has been installed
|
||||
* @param installationIndex Pagination cursor position for app installation IDs
|
||||
* @param installations Collection of all App installation IDs for OpenHands Github App
|
||||
* @returns A list of repositories
|
||||
*/
|
||||
export const retrieveGitHubAppRepositories = async (
|
||||
installationIndex: number,
|
||||
installations: number[],
|
||||
page = 1,
|
||||
per_page = 30,
|
||||
) => {
|
||||
const installationId = installations[installationIndex];
|
||||
const response = await openHands.get<GitRepository[]>(
|
||||
"/api/user/repositories",
|
||||
{
|
||||
params: {
|
||||
sort: "pushed",
|
||||
page,
|
||||
per_page,
|
||||
installation_id: installationId,
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const link =
|
||||
response.data.length > 0 && response.data[0].link_header
|
||||
? response.data[0].link_header
|
||||
: "";
|
||||
|
||||
const nextPage = extractNextPageFromLink(link);
|
||||
let nextInstallation: number | null;
|
||||
|
||||
if (nextPage) {
|
||||
nextInstallation = installationIndex;
|
||||
} else if (installationIndex + 1 < installations.length) {
|
||||
nextInstallation = installationIndex + 1;
|
||||
} else {
|
||||
nextInstallation = null;
|
||||
}
|
||||
|
||||
return {
|
||||
data: response.data,
|
||||
nextPage,
|
||||
installationIndex: nextInstallation,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Given a PAT, retrieves the repositories of the user
|
||||
* @returns A list of repositories
|
||||
*/
|
||||
export const retrieveUserGitRepositories = async () => {
|
||||
const response = await openHands.get<GitRepository[]>(
|
||||
"/api/user/repositories",
|
||||
{
|
||||
params: {
|
||||
sort: "pushed",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// Check if any provider has more results
|
||||
const link =
|
||||
response.data.length > 0 && response.data[0].link_header
|
||||
? response.data[0].link_header
|
||||
: "";
|
||||
const nextPage = extractNextPageFromLink(link);
|
||||
|
||||
return { data: response.data, nextPage };
|
||||
};
|
||||
@@ -13,8 +13,8 @@ import {
|
||||
ConversationTrigger,
|
||||
} from "./open-hands.types";
|
||||
import { openHands } from "./open-hands-axios";
|
||||
import { ApiSettings, PostApiSettings, Provider } from "#/types/settings";
|
||||
import { GitUser, GitRepository, Branch } from "#/types/git";
|
||||
import { ApiSettings, PostApiSettings } from "#/types/settings";
|
||||
import { GitUser, GitRepository } from "#/types/git";
|
||||
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
|
||||
|
||||
class OpenHands {
|
||||
@@ -134,7 +134,7 @@ class OpenHands {
|
||||
|
||||
static async getUserConversations(): Promise<Conversation[]> {
|
||||
const { data } = await openHands.get<ResultSet<Conversation>>(
|
||||
"/api/conversations?limit=20",
|
||||
"/api/conversations?limit=9",
|
||||
);
|
||||
return data.results;
|
||||
}
|
||||
@@ -152,19 +152,16 @@ class OpenHands {
|
||||
|
||||
static async createConversation(
|
||||
conversation_trigger: ConversationTrigger = "gui",
|
||||
selectedRepository?: string,
|
||||
git_provider?: Provider,
|
||||
selectedRepository?: GitRepository,
|
||||
initialUserMsg?: string,
|
||||
imageUrls?: string[],
|
||||
replayJson?: string,
|
||||
suggested_task?: SuggestedTask,
|
||||
selected_branch?: string,
|
||||
): Promise<Conversation> {
|
||||
const body = {
|
||||
conversation_trigger,
|
||||
repository: selectedRepository,
|
||||
git_provider,
|
||||
selected_branch,
|
||||
selected_repository: selectedRepository,
|
||||
selected_branch: undefined,
|
||||
initial_user_msg: initialUserMsg,
|
||||
image_urls: imageUrls,
|
||||
replay_json: replayJson,
|
||||
@@ -277,7 +274,7 @@ class OpenHands {
|
||||
|
||||
static async logout(appMode: GetConfigResponse["APP_MODE"]): Promise<void> {
|
||||
const endpoint =
|
||||
appMode === "saas" ? "/api/logout" : "/api/unset-provider-tokens";
|
||||
appMode === "saas" ? "/api/logout" : "/api/unset-settings-tokens";
|
||||
await openHands.post(endpoint);
|
||||
}
|
||||
|
||||
@@ -300,31 +297,6 @@ class OpenHands {
|
||||
);
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a PAT, retrieves the repositories of the user
|
||||
* @returns A list of repositories
|
||||
*/
|
||||
static async retrieveUserGitRepositories() {
|
||||
const { data } = await openHands.get<GitRepository[]>(
|
||||
"/api/user/repositories",
|
||||
{
|
||||
params: {
|
||||
sort: "pushed",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static async getRepositoryBranches(repository: string): Promise<Branch[]> {
|
||||
const { data } = await openHands.get<Branch[]>(
|
||||
`/api/user/repository/branches?repository=${encodeURIComponent(repository)}`,
|
||||
);
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
export default OpenHands;
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
import { Provider, ProviderToken } from "#/types/settings";
|
||||
import { openHands } from "./open-hands-axios";
|
||||
import { POSTProviderTokens } from "./secrets-service.types";
|
||||
|
||||
export class SecretsService {
|
||||
static async addGitProvider(providers: Record<Provider, ProviderToken>) {
|
||||
const tokens: POSTProviderTokens = {
|
||||
provider_tokens: providers,
|
||||
};
|
||||
const { data } = await openHands.post<boolean>(
|
||||
"/api/add-git-providers",
|
||||
tokens,
|
||||
);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
import { Provider, ProviderToken } from "#/types/settings";
|
||||
|
||||
export interface POSTProviderTokens {
|
||||
provider_tokens: Record<Provider, ProviderToken>;
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
<svg width="22" height="22" viewBox="0 0 22 22" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M12 2.25C6.624 2.25 2.25 6.624 2.25 12C2.25 16.376 5.115 20.073 9.057 21.428C9.563 21.514 9.747 21.211 9.747 20.95C9.747 20.713 9.738 19.991 9.738 19.153C7 19.713 6.4 18.45 6.4 18.45C5.952 17.387 5.328 17.084 5.328 17.084C4.476 16.487 5.387 16.487 5.387 16.487C6.328 16.546 6.85 17.481 6.85 17.481C7.675 18.862 9.057 18.487 9.776 18.226C9.867 17.603 10.133 17.179 10.419 16.94C8.287 16.713 6.05 15.85 6.05 11.9C6.05 10.837 6.4 9.975 6.859 9.3C6.759 9.05 6.45 8.038 6.95 6.65C6.95 6.65 7.734 6.4 9.738 7.775C10.483 7.534 11.25 7.413 12.017 7.413C12.784 7.413 13.55 7.534 14.296 7.775C16.3 6.4 17.084 6.65 17.084 6.65C17.584 8.038 17.275 9.05 17.175 9.3C17.634 9.975 17.984 10.837 17.984 11.9C17.984 15.85 15.747 16.7 13.615 16.94C13.975 17.237 14.296 17.813 14.296 18.7C14.296 19.975 14.287 20.6 14.287 20.95C14.287 21.211 14.471 21.514 14.977 21.428C18.919 20.073 21.784 16.376 21.784 12C21.784 6.624 17.376 2.25 12 2.25Z" fill="currentColor"/>
|
||||
<path
|
||||
d="M15.359 21V17.319C15.3974 16.8654 15.3314 16.4095 15.1651 15.9814C14.9989 15.5534 14.7363 15.1631 14.3949 14.8364C17.6154 14.5035 21 13.3716 21 8.17826C20.9997 6.85027 20.4489 5.57321 19.4615 4.61139C19.9291 3.44954 19.896 2.16532 19.3692 1.02548C19.3692 1.02548 18.159 0.692576 15.359 2.43321C13.0082 1.84237 10.5302 1.84237 8.17949 2.43321C5.37949 0.692576 4.16923 1.02548 4.16923 1.02548C3.64244 2.16532 3.60938 3.44954 4.07692 4.61139C3.08218 5.58034 2.53079 6.86895 2.53846 8.2068C2.53846 13.3621 5.92308 14.494 9.14359 14.865C8.80615 15.1883 8.54591 15.574 8.3798 15.9968C8.2137 16.4196 8.14544 16.8701 8.17949 17.319V21M8.17949 18.1465C3.05128 19.5732 3.05128 15.7686 1 15.293L8.17949 18.1465Z"
|
||||
stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" />
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 1.0 KiB After Width: | Height: | Size: 896 B |
@@ -1,8 +1,6 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="97 99 186 182">
|
||||
<defs>
|
||||
<style>.cls-1{fill:currentColor;}</style>
|
||||
</defs>
|
||||
<g id="LOGO">
|
||||
<path class="cls-1" d="M282.83,170.73l-.27-.69-26.14-68.22a6.81,6.81,0,0,0-2.69-3.24,7,7,0,0,0-8,.43,7,7,0,0,0-2.32,3.52l-17.65,54H154.29l-17.65-54A6.86,6.86,0,0,0,134.32,99a7,7,0,0,0-8-.43,6.87,6.87,0,0,0-2.69,3.24L97.44,170l-.26.69a48.54,48.54,0,0,0,16.1,56.1l.09.07.24.17,39.82,29.82,19.7,14.91,12,9.06a8.07,8.07,0,0,0,9.76,0l12-9.06,19.7-14.91,40.06-30,.1-.08A48.56,48.56,0,0,0,282.83,170.73Z"/>
|
||||
</g>
|
||||
<svg width="22" height="22" viewBox="0 0 22 22" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M11 21L16.5 8H5.5L11 21Z" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M1 8L3.5 15.5L11 21L18.5 15.5L21 8" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M1 8L5.5 8L8.25 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M21 8L16.5 8L13.75 1" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 564 B After Width: | Height: | Size: 534 B |
@@ -1,26 +1,12 @@
|
||||
import { useEffect } from "react";
|
||||
import { useSelector, useDispatch } from "react-redux";
|
||||
import { useSelector } from "react-redux";
|
||||
import { RootState } from "#/store";
|
||||
import { BrowserSnapshot } from "./browser-snapshot";
|
||||
import { EmptyBrowserMessage } from "./empty-browser-message";
|
||||
import { useConversation } from "#/context/conversation-context";
|
||||
import {
|
||||
initialState as browserInitialState,
|
||||
setUrl,
|
||||
setScreenshotSrc,
|
||||
} from "#/state/browser-slice";
|
||||
|
||||
export function BrowserPanel() {
|
||||
const { url, screenshotSrc } = useSelector(
|
||||
(state: RootState) => state.browser,
|
||||
);
|
||||
const { conversationId } = useConversation();
|
||||
const dispatch = useDispatch();
|
||||
|
||||
useEffect(() => {
|
||||
dispatch(setUrl(browserInitialState.url));
|
||||
dispatch(setScreenshotSrc(browserInitialState.screenshotSrc));
|
||||
}, [conversationId]);
|
||||
|
||||
const imgSrc =
|
||||
screenshotSrc && screenshotSrc.startsWith("data:image/png;base64,")
|
||||
|
||||
@@ -12,21 +12,6 @@ const decodeHtmlEntities = (text: string): string => {
|
||||
return textarea.value;
|
||||
};
|
||||
|
||||
/**
|
||||
* Checks if a path is likely a directory
|
||||
* @param path The full path
|
||||
* @returns True if the path is likely a directory
|
||||
*/
|
||||
const isLikelyDirectory = (path: string): boolean => {
|
||||
if (!path) return false;
|
||||
// Check if path already ends with a slash
|
||||
if (path.endsWith("/") || path.endsWith("\\")) return true;
|
||||
// Check if path has no extension (simple heuristic)
|
||||
const lastPart = path.split(/[/\\]/).pop() || "";
|
||||
// If the last part has no dots, it's likely a directory
|
||||
return !lastPart.includes(".");
|
||||
};
|
||||
|
||||
/**
|
||||
* Extracts the filename from a path
|
||||
* @param path The full path
|
||||
@@ -36,14 +21,7 @@ const extractFilename = (path: string): string => {
|
||||
if (!path) return "";
|
||||
// Handle both Unix and Windows paths
|
||||
const parts = path.split(/[/\\]/);
|
||||
const filename = parts[parts.length - 1];
|
||||
|
||||
// Add trailing slash for directories
|
||||
if (isLikelyDirectory(path) && !filename.endsWith("/")) {
|
||||
return `${filename}/`;
|
||||
}
|
||||
|
||||
return filename;
|
||||
return parts[parts.length - 1];
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -86,4 +64,4 @@ function PathComponent(props: { children?: ReactNode }) {
|
||||
return <strong className="font-mono">{children}</strong>;
|
||||
}
|
||||
|
||||
export { PathComponent, isLikelyDirectory };
|
||||
export { PathComponent };
|
||||
|
||||
@@ -20,7 +20,7 @@ export function Controls({ setSecurityOpen, showSecurityLock }: ControlsProps) {
|
||||
useAutoTitle();
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2 md:items-center md:justify-between md:flex-row">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-2">
|
||||
<AgentControlBar />
|
||||
<AgentStatusBar />
|
||||
|
||||
@@ -16,7 +16,6 @@ import { BaseModal } from "../../shared/modals/base-modal/base-modal";
|
||||
import { RootState } from "#/store";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { selectSystemMessage } from "#/state/chat-slice";
|
||||
import { transformVSCodeUrl } from "#/utils/vscode-url-helper";
|
||||
|
||||
interface ConversationCardProps {
|
||||
onClick?: () => void;
|
||||
@@ -118,10 +117,7 @@ export function ConversationCard({
|
||||
const data = await response.json();
|
||||
|
||||
if (data.vscode_url) {
|
||||
const transformedUrl = transformVSCodeUrl(data.vscode_url);
|
||||
if (transformedUrl) {
|
||||
window.open(transformedUrl, "_blank");
|
||||
}
|
||||
window.open(data.vscode_url, "_blank");
|
||||
}
|
||||
// VS Code URL not available
|
||||
} catch (error) {
|
||||
@@ -164,7 +160,7 @@ export function ConversationCard({
|
||||
className={cn(
|
||||
"h-[100px] w-full px-[18px] py-4 border-b border-neutral-600 cursor-pointer",
|
||||
variant === "compact" &&
|
||||
"md:w-fit h-auto rounded-xl border border-[#525252]",
|
||||
"h-auto w-fit rounded-xl border border-[#525252]",
|
||||
)}
|
||||
>
|
||||
<div className="flex items-center justify-between w-full">
|
||||
|
||||