Compare commits

..

2 Commits

Author SHA1 Message Date
openhands 31341cc7f1 Add debugging and data-testid for tasks tab visibility troubleshooting
- Add console.log statements to help debug tab rendering
- Add data-testid='tasks-tab' to make tasks tab easier to identify
- Update ConversationTabNav to accept data-testid prop
2025-09-19 20:26:01 +00:00
openhands aaa2dbe45e Fix Unicode regex pattern in task parsing
Add 'u' flag to regex to properly handle Unicode emoji characters in task status parsing.

Co-authored-by: openhands <openhands@all-hands.dev>
2025-09-19 20:03:32 +00:00
153 changed files with 2978 additions and 6619 deletions
+6 -77
View File
@@ -59,7 +59,6 @@ jobs:
permissions:
contents: read
packages: write
security-events: write
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -86,35 +85,14 @@ jobs:
run: |
./containers/build.sh -i openhands -o ${{ env.REPO_OWNER }} --push
- name: Run Trivy vulnerability scanner
if: github.event_name != 'pull_request'
uses: aquasecurity/trivy-action@master
# This only reports, does not fail the build on CVE.
with:
image-ref: ghcr.io/${{ env.REPO_OWNER }}/openhands:${{ env.RELEVANT_SHA }}
format: 'sarif'
output: 'trivy-results.sarif'
timeout: '10m'
scanners: 'vuln' # Only scan vulnerabilities, not secrets/config
severity: 'CRITICAL,HIGH,MEDIUM' # Skip LOW severity
- name: Upload Trivy results to GitHub Security tab
if: github.event_name != 'pull_request'
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'
category: 'security/container-openhands-${{ matrix.base_image.tag }}'
sha: "${{ env.RELEVANT_SHA }}"
ref: refs/heads/${{ github.event.pull_request.head.ref || github.ref_name }}
# Builds the runtime Docker images
ghcr_build_runtime:
name: Build Runtime Image
name: Build Image
runs-on: blacksmith-8vcpu-ubuntu-2204
if: "!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/ext-v'))"
permissions:
contents: read
packages: write
security-events: write
needs: define-matrix
strategy:
matrix:
@@ -188,41 +166,12 @@ jobs:
name: runtime-src-${{ matrix.base_image.tag }}
path: containers/runtime
- name: Run Trivy vulnerability scanner
if: github.event_name != 'pull_request'
uses: aquasecurity/trivy-action@master
# This only reports, does not fail the build on CVE.
with:
image-ref: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
format: 'sarif'
output: 'trivy-results-raw.sarif'
timeout: '10m'
scanners: 'vuln' # Only scan vulnerabilities, not secrets/config
severity: 'CRITICAL,HIGH,MEDIUM' # Skip LOW severity
- name: Customize SARIF with image flavor
if: github.event_name != 'pull_request'
run: |
# Modify the tool name to include the image flavor
jq --arg flavor "${{ matrix.base_image.tag }}" \
'.runs[0].tool.driver.name = "Trivy (" + $flavor + ")"' \
trivy-results-raw.sarif > trivy-results.sarif
echo "Modified tool name to: $(jq -r '.runs[0].tool.driver.name' trivy-results.sarif)"
- name: Upload Trivy results to GitHub Security tab
if: github.event_name != 'pull_request'
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'
category: 'security/container-runtime-${{ matrix.base_image.tag }}'
ghcr_build_enterprise:
name: Build Enterprise Image
name: Push Enterprise Image
runs-on: blacksmith-8vcpu-ubuntu-2204
permissions:
contents: read
packages: write
security-events: write
needs: [define-matrix, ghcr_build_app]
# Do not build enterprise in forks
if: github.event.pull_request.head.repo.fork != true
@@ -262,16 +211,14 @@ jobs:
latest=auto
prefix=
suffix=
env:
DOCKER_METADATA_PR_HEAD_SHA: true
- name: Determine app image tag
shell: bash
run: |
# Duplicated with build.sh
sanitized_ref_name=$(echo "${{github.event.pull_request.head.sha}}" | sed 's/[^a-zA-Z0-9.-]\+/-/g')
sanitized_ref_name=$(echo "$GITHUB_REF_NAME" | sed 's/[^a-zA-Z0-9.-]\+/-/g')
OPENHANDS_BUILD_VERSION=$sanitized_ref_name
sanitized_ref_name=$(echo "$sanitized_ref_name" | tr '[:upper:]' '[:lower:]') # lower case is required in tagging
echo "OPENHANDS_BUILD_VERSION=${sanitized_ref_name}" >> $GITHUB_ENV
echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
echo "OPENHANDS_DOCKER_TAG=${sanitized_ref_name}" >> $GITHUB_ENV
- name: Build and push Docker image
uses: useblacksmith/build-push-action@v1
with:
@@ -281,30 +228,12 @@ jobs:
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
OPENHANDS_VERSION=${{ env.OPENHANDS_BUILD_VERSION }}
OPENHANDS_VERSION=${{ env.OPENHANDS_DOCKER_TAG }}
platforms: linux/amd64
# Add build provenance
provenance: true
# Add build attestations for better security
sbom: true
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
if: github.event_name != 'pull_request'
# This only reports, does not fail the build on CVE.
with:
image-ref: "ghcr.io/${{ env.REPO_OWNER }}/enterprise-server:sha-${{ env.RELEVANT_SHA }}"
format: 'sarif'
output: 'trivy-results.sarif'
timeout: '10m'
scanners: 'vuln' # Only scan vulnerabilities, not secrets/config
severity: 'CRITICAL,HIGH,MEDIUM' # Skip LOW severity
- name: Upload Trivy results to GitHub Security tab
if: github.event_name != 'pull_request'
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'
category: 'security/container-enterprise-server'
enterprise-preview:
name: Enterprise preview
+7 -54
View File
@@ -19,16 +19,12 @@ jobs:
# Run python tests on Linux
test-on-linux:
name: Python Tests on Linux
runs-on: blacksmith-4vcpu-ubuntu-2404
runs-on: blacksmith-4vcpu-ubuntu-2204
env:
INSTALL_DOCKER: "0" # Set to '0' to skip Docker installation
strategy:
matrix:
python-version: ["3.12"]
permissions:
# For coverage comment and python-coverage-comment-action branch
pull-requests: write
contents: write
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
@@ -52,21 +48,10 @@ jobs:
- name: Build Environment
run: make build
- name: Run Unit Tests
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -s ./tests/unit --cov=openhands --cov-branch
env:
COVERAGE_FILE: ".coverage.${{ matrix.python_version }}"
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv ./tests/unit
- name: Run Runtime Tests with CLIRuntime
run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -s tests/runtime/test_bash.py --cov=openhands --cov-branch
env:
COVERAGE_FILE: ".coverage.runtime.${{ matrix.python_version }}"
- name: Store coverage file
uses: actions/upload-artifact@v4
with:
name: coverage-openhands
path: |
.coverage.${{ matrix.python_version }}
.coverage.runtime.${{ matrix.python_version }}
include-hidden-files: true
run: PYTHONPATH=".:$PYTHONPATH" TEST_RUNTIME=cli poetry run pytest -svv tests/runtime/test_bash.py
# Run specific Windows python tests
test-on-windows:
name: Python Tests on Windows
@@ -100,7 +85,7 @@ jobs:
DEBUG: "1"
test-enterprise:
name: Enterprise Python Unit Tests
runs-on: blacksmith-4vcpu-ubuntu-2404
runs-on: blacksmith-4vcpu-ubuntu-2204
strategy:
matrix:
python-version: ["3.12"]
@@ -117,37 +102,5 @@ jobs:
working-directory: ./enterprise
run: poetry install --with dev,test
- name: Run Unit Tests
# Use base working directory for coverage paths to line up.
run: PYTHONPATH=".:$PYTHONPATH" poetry run --project=enterprise pytest --forked -n auto -s -p no:ddtrace -p no:ddtrace.pytest_bdd -p no:ddtrace.pytest_benchmark ./enterprise/tests/unit --cov=enterprise --cov-branch
env:
COVERAGE_FILE: ".coverage.enterprise.${{ matrix.python_version }}"
- name: Store coverage file
uses: actions/upload-artifact@v4
with:
name: coverage-enterprise
path: ".coverage.enterprise.${{ matrix.python_version }}"
include-hidden-files: true
coverage-comment:
name: Coverage Comment
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
needs: [test-on-linux, test-enterprise]
permissions:
pull-requests: write
contents: write
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v5
id: download
with:
pattern: coverage-*
merge-multiple: true
- name: Coverage comment
id: coverage_comment
uses: py-cov-action/python-coverage-comment-action@v3
with:
GITHUB_TOKEN: ${{ github.token }}
MERGE_COVERAGE_FILES: true
working-directory: ./enterprise
run: PYTHONPATH=".:$PYTHONPATH" poetry run pytest --forked -n auto -svv -p no:ddtrace -p no:ddtrace.pytest_bdd -p no:ddtrace.pytest_benchmark ./tests/unit
+1 -1
View File
@@ -15,7 +15,7 @@ jobs:
stale-issue-message: 'This issue is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
stale-pr-message: 'This PR is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
days-before-stale: 40
exempt-issue-labels: roadmap,backlog,app-team
exempt-issue-labels: roadmap,backlog
close-issue-message: 'This issue was automatically closed due to 50 days of inactivity. We do this to help keep the issues somewhat manageable and focus on active issues.'
close-pr-message: 'This PR was closed because it had no activity for 50 days. If you feel this was closed in error, and you would like to continue the PR, please resubmit or let us know.'
days-before-close: 10
-41
View File
@@ -489,47 +489,6 @@ type = "noop"
# Run the runtime sandbox container in privileged mode for use with docker-in-docker
#privileged = false
#################################### MCP #####################################
# Configuration for Model Context Protocol (MCP) servers
# MCP allows OpenHands to communicate with external tool servers
##############################################################################
[mcp]
# SSE servers - Server-Sent Events transport (legacy)
#sse_servers = [
# # Basic SSE server with just a URL
# "http://localhost:8080/mcp/sse",
#
# # SSE server with authentication
# {url = "https://api.example.com/mcp/sse", api_key = "your-api-key"}
#]
# SHTTP servers - Streamable HTTP transport (recommended)
#shttp_servers = [
# # Basic SHTTP server with default 60s timeout
# "https://api.example.com/mcp/shttp",
#
# # SHTTP server with custom timeout for long-running tools
# {
# url = "https://api.example.com/mcp/shttp",
# api_key = "your-api-key",
# timeout = 180 # 3 minutes for processing-heavy tools (1-3600 seconds)
# }
#]
# Stdio servers - Direct process communication (development only)
#stdio_servers = [
# # Basic stdio server
# {name = "filesystem", command = "npx", args = ["@modelcontextprotocol/server-filesystem", "/"]},
#
# # Stdio server with environment variables
# {
# name = "fetch",
# command = "uvx",
# args = ["mcp-server-fetch"],
# env = {DEBUG = "true"}
# }
#]
#################################### Model Routing ############################
# Configuration for experimental model routing feature
# Enables intelligent switching between different LLM models for specific purposes
Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 18 KiB

BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 212 KiB

After

Width:  |  Height:  |  Size: 144 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 663 KiB

+12 -13
View File
@@ -8,21 +8,9 @@ description: This guide walks you through the process of installing OpenHands Cl
- Signed in to [OpenHands Cloud](https://app.all-hands.dev) with [a Bitbucket account](/usage/cloud/openhands-cloud).
## Adding Bitbucket Repository Access
Upon signing into OpenHands Cloud with a Bitbucket account, OpenHands will have access to your repositories.
## Working With Bitbucket Repos in Openhands Cloud
After signing in with a Bitbucket account, use the `Open Repository` section to select the appropriate repository and
branch you'd like OpenHands to work on. Then click on `Launch` to start the conversation!
![Connect Repo](/static/img/connect-repo.png)
## IP Whitelisting
If your Bitbucket Cloud instance has IP restrictions, you'll need to whitelist the following IP addresses to allow
OpenHands to access your repositories:
If your Bitbucket Cloud instance has IP restrictions, you'll need to whitelist the following IP addresses to allow OpenHands to access your repositories:
### Core App IP
```
@@ -43,6 +31,17 @@ OpenHands to access your repositories:
34.60.55.59
```
## Adding Bitbucket Repository Access
Upon signing into OpenHands Cloud with a Bitbucket account, OpenHands will have access to your repositories.
## Working With Bitbucket Repos in Openhands Cloud
After signing in with a Bitbucket account, use the `select a repo` and `select a branch` dropdowns to select the
appropriate repository and branch you'd like OpenHands to work on. Then click on `Launch` to start the conversation!
![Connect Repo](/static/img/connect-repo-no-github.png)
## Next Steps
- [Learn about the Cloud UI](/usage/cloud/cloud-ui).
+7 -4
View File
@@ -12,10 +12,13 @@ For the available API endpoints, refer to the
To use the OpenHands Cloud API, you'll need to generate an API key:
1. Log in to your [OpenHands Cloud](https://app.all-hands.dev) account.
2. Navigate to the [Settings > API Keys](https://app.all-hands.dev/settings/api-keys) page.
3. Click `Create API Key`.
4. Give your key a descriptive name (Example: "Development" or "Production") and select `Create`.
5. Copy the generated API key and store it securely. It will only be shown once.
2. Navigate to the [Settings page](https://app.all-hands.dev/settings).
3. Select the `API Keys` tab.
4. Click `Create API Key`.
5. Give your key a descriptive name (Example: "Development" or "Production") and select `Create`.
6. Copy the generated API key and store it securely. It will only be shown once.
![API Key Generation](/static/img/api-key-generation.png)
## API Usage
+10 -25
View File
@@ -8,39 +8,24 @@ description: The Cloud UI provides a web interface for interacting with OpenHand
The landing page is where you can:
- [Add GitHub repository access](/usage/cloud/github-installation#adding-github-repository-access) to OpenHands.
- [Select a GitHub repo](/usage/cloud/github-installation#working-with-github-repos-in-openhands-cloud),
[a GitLab repo](/usage/cloud/gitlab-installation#working-with-gitlab-repos-in-openhands-cloud) or
[a Bitbucket repo](/usage/cloud/bitbucket-installation#working-with-bitbucket-repos-in-openhands-cloud) to start working on.
- Launch an empty conversation using `New Conversation`.
- See `Suggested Tasks` for repositories that OpenHands has access to.
- See your `Recent Conversations`.
- Launch an empty conversation using `Launch from Scratch`.
## Settings
Settings are divided across tabs, with each tab focusing on a specific area of configuration.
The Settings page allows you to:
- `User`
- Change your email address.
- `Integrations`
- [Configure GitHub repository access](/usage/cloud/github-installation#modifying-repository-access) for OpenHands.
- [Install the OpenHands Slack app](/usage/cloud/slack-installation).
- `Application`
- Set your preferred language, notifications and other preferences.
- Toggle task suggestions on GitHub.
- Toggle Solvability Analysis.
- Set a maximum budget per conversation.
- Configure the username and email that OpenHands uses for commits.
- `LLM` (Available for `Pro Users`)
- Choose to use another LLM or use different models from the OpenHands provider.
- `Billing`
- Add credits for using the OpenHands provider.
- Cancel your `Pro` subscription.
- `Secrets`
- [Generate custom secrets](/usage/common-settings#secrets-management).
- `API Keys`
- [Create API keys to work with OpenHands programmatically](/usage/cloud/cloud-api).
- `MCP`
- [Setup an MCP server](/usage/mcp)
- [Configure GitHub repository access](/usage/cloud/github-installation#modifying-repository-access) for OpenHands.
- [Install the OpenHands Slack app](/usage/cloud/slack-installation).
- Set application settings like your preferred language, notifications and other preferences.
- Add credits to your account.
- [Generate custom secrets](/usage/common-settings#secrets-management).
- [Create API keys to work with OpenHands programmatically](/usage/cloud/cloud-api).
- Change your email address.
## Key Features
+9 -16
View File
@@ -12,7 +12,7 @@ description: This guide walks you through the process of installing OpenHands Cl
You can grant OpenHands access to specific GitHub repositories:
1. Click on `+ Add GitHub Repos` in the repository selection dropdown.
1. Click on `Add GitHub repos` on the landing page.
2. Select your organization and choose the specific repositories to grant OpenHands access to.
<Accordion title="OpenHands permissions">
- OpenHands requests short-lived tokens (8-hour expiration) with these permissions:
@@ -34,22 +34,20 @@ You can grant OpenHands access to specific GitHub repositories:
## Modifying Repository Access
You can modify GitHub repository access at any time by:
- Selecting `+ Add GitHub Repos` in the repository selection dropdown or
- Visiting the `Settings > Integrations` page and selecting `Configure GitHub Repositories`
- Selecting `Add GitHub repos` on the landing page or
- Visiting the Settings page and selecting `Configure GitHub Repositories` under the `Integrations` tab
## Working With GitHub Repos in Openhands Cloud
Once you've granted GitHub repository access, you can start working with your GitHub repository. Use the
`Open Repository` section to select the appropriate repository and branch you'd like OpenHands to work on. Then click
on `Launch` to start the conversation!
Once you've granted GitHub repository access, you can start working with your GitHub repository. Use the `select a repo`
and `select a branch` dropdowns to select the appropriate repository and branch you'd like OpenHands to work on. Then
click on `Launch` to start the conversation!
![Connect Repo](/static/img/connect-repo.png)
## Working on GitHub Issues and Pull Requests Using Openhands
## Working on Github Issues and Pull Requests Using Openhands
To allow OpenHands to work directly from GitHub directly, you must
[give OpenHands access to your repository](/usage/cloud/github-installation#modifying-repository-access). Once access is
given, you can use OpenHands by labeling the issue or by tagging `@openhands`.
Giving GitHub repository access to OpenHands also allows you to work on GitHub issues and pull requests directly.
### Working with Issues
@@ -66,12 +64,7 @@ To get OpenHands to work on pull requests, mention `@openhands` in the comments
- Request updates
- Get code explanations
<Note>
The `@openhands` mention functionality in pull requests only works if the pull request is both
*to* and *from* a repository that you have added through the interface. This is because OpenHands needs appropriate
permissions to access both repositories.
</Note>
**Important Note**: The `@openhands` mention functionality in pull requests only works if the pull request is both *to* and *from* a repository that you have added through the interface. This is because OpenHands needs appropriate permissions to access both repositories.
## Next Steps
+6 -8
View File
@@ -14,17 +14,16 @@ Upon signing into OpenHands Cloud with a GitLab account, OpenHands will have acc
## Working With GitLab Repos in Openhands Cloud
After signing in with a Gitlab account, use the `Open Repository` section to select the appropriate repository and
branch you'd like OpenHands to work on. Then click on `Launch` to start the conversation!
After signing in with a Gitlab account, use the `select a repo` and `select a branch` dropdowns to select the
appropriate repository and branch you'd like OpenHands to work on. Then click on `Launch` to start the conversation!
![Connect Repo](/static/img/connect-repo.png)
![Connect Repo](/static/img/connect-repo-no-github.png)
## Using Tokens with Reduced Scopes
OpenHands requests an API-scoped token during OAuth authentication. By default, this token is provided to the agent.
To restrict the agent's permissions, you can define a custom secret `GITLAB_TOKEN`, which will override the default
token assigned to the agent. While the high-permission API token is still requested and used for other components of
the application (e.g. opening merge requests), the agent will not have access to it.
To restrict the agent's permissions, you can define a custom secret `GITLAB_TOKEN`, which will override the default token assigned to the agent.
While the high-permission API token is still requested and used for other components of the application (e.g. opening merge requests), the agent will not have access to it.
## Working on GitLab Issues and Merge Requests Using Openhands
@@ -33,8 +32,7 @@ This feature works for personal projects and is available for group projects wit
[Premium or Ultimate tier subscription](https://docs.gitlab.com/user/project/integrations/webhooks/#group-webhooks).
A webhook is automatically installed within a few minutes after the owner/maintainer of the project or group logs into
OpenHands Cloud. If you decide to delete the webhook, then re-installing will require the support of All Hands AI but
we are planning to improve this in a future release.
OpenHands Cloud. If you decide to delete the webhook, then re-installing will require the support of All Hands AI but we are planning to improve this in a future release.
</Note>
Giving GitLab repository access to OpenHands also allows you to work on GitLab issues and merge requests directly.
+1 -3
View File
@@ -13,9 +13,7 @@ description: This guide walks you through installing the OpenHands Slack app.
</iframe>
<Info>
OpenHands utilizes a large language model (LLM), which may generate responses that are inaccurate or incomplete.
While we strive for accuracy, OpenHands' outputs are not guaranteed to be correct, and we encourage users to
validate critical information independently.
OpenHands utilizes a large language model (LLM), which may generate responses that are inaccurate or incomplete. While we strive for accuracy, OpenHands' outputs are not guaranteed to be correct, and we encourage users to validate critical information independently.
</Info>
## Prerequisites
-46
View File
@@ -67,19 +67,6 @@ sse_servers = [
# External MCP service with authentication
{url="https://api.example.com/mcp/sse", api_key="your-api-key"}
]
# SHTTP Servers - Modern streamable HTTP transport (recommended)
shttp_servers = [
# Basic SHTTP server with default 60s timeout
"https://api.example.com/mcp/shttp",
# Server with custom timeout for heavy operations
{
url = "https://files.example.com/mcp/shttp",
api_key = "your-api-key",
timeout = 1800 # 30 minutes for large file processing
}
]
```
@@ -131,17 +118,6 @@ SHTTP (Streamable HTTP) servers are configured using either a string URL or an o
- Type: `str`
- Description: API key for authentication
- `timeout` (optional)
- Type: `int`
- Default: `60`
- Range: `1-3600` seconds (1 hour maximum)
- Description: Timeout in seconds for tool execution. This prevents tool calls from hanging indefinitely.
- **Use Cases:**
- **Short timeout (1-30s)**: For lightweight operations like status checks or simple queries
- **Medium timeout (30-300s)**: For standard processing tasks like data analysis or API calls
- **Long timeout (300-3600s)**: For heavy operations like file processing, complex calculations, or batch operations
- **Note**: This timeout only applies to individual tool calls, not server connection establishment.
### Stdio Servers
**Note**: While stdio servers are supported, we recommend using MCP proxies (see above) for better reliability and performance.
@@ -216,27 +192,5 @@ SHTTP is the modern HTTP-based transport protocol that provides enhanced feature
SHTTP is the recommended transport for HTTP-based MCP servers as it provides better reliability and features compared to the legacy SSE transport.
#### SHTTP Timeout Best Practices
When configuring SHTTP timeouts, consider these guidelines:
**Timeout Selection:**
- **Database queries**: 30-60 seconds
- **File operations**: 60-300 seconds (depending on file size)
- **Web scraping**: 60-120 seconds
- **Complex calculations**: 300-1800 seconds
- **Batch processing**: 1800-3600 seconds (maximum)
**Error Handling:**
When a tool call exceeds the configured timeout:
- The operation is cancelled with an `asyncio.TimeoutError`
- The agent receives a timeout error message
- The server connection remains active for subsequent requests
**Monitoring:**
- Set timeouts based on your tool's actual performance characteristics
- Monitor timeout occurrences to optimize timeout values
- Consider implementing server-side timeout handling for graceful degradation
### Standard Input/Output (stdio)
Stdio transport enables communication through standard input and output streams, making it ideal for local integrations and command-line tools. This transport is used for locally executed MCP servers that run as separate processes.
+23 -117
View File
@@ -766,7 +766,7 @@ version = "1.17.1"
description = "Foreign Function Interface for Python calling C code."
optional = false
python-versions = ">=3.8"
groups = ["main", "test"]
groups = ["main"]
files = [
{file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
{file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -836,7 +836,6 @@ files = [
{file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
{file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
]
markers = {test = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""}
[package.dependencies]
pycparser = "*"
@@ -1902,25 +1901,25 @@ files = [
[[package]]
name = "fastapi"
version = "0.117.1"
version = "0.116.1"
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "fastapi-0.117.1-py3-none-any.whl", hash = "sha256:33c51a0d21cab2b9722d4e56dbb9316f3687155be6b276191790d8da03507552"},
{file = "fastapi-0.117.1.tar.gz", hash = "sha256:fb2d42082d22b185f904ca0ecad2e195b851030bd6c5e4c032d1c981240c631a"},
{file = "fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565"},
{file = "fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143"},
]
[package.dependencies]
pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
starlette = ">=0.40.0,<0.49.0"
starlette = ">=0.40.0,<0.48.0"
typing-extensions = ">=4.8.0"
[package.extras]
all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[standard-no-fastapi-cloud-cli] (>=0.0.8)", "httpx (>=0.23.0,<1.0.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.8)", "httpx (>=0.23.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
standard-no-fastapi-cloud-cli = ["email-validator (>=2.0.0)", "fastapi-cli[standard-no-fastapi-cloud-cli] (>=0.0.8)", "httpx (>=0.23.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
[[package]]
name = "fastjsonschema"
@@ -2292,72 +2291,6 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""]
tqdm = ["tqdm"]
[[package]]
name = "gevent"
version = "25.9.1"
description = "Coroutine-based network library"
optional = false
python-versions = ">=3.9"
groups = ["test"]
files = [
{file = "gevent-25.9.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:856b990be5590e44c3a3dc6c8d48a40eaccbb42e99d2b791d11d1e7711a4297e"},
{file = "gevent-25.9.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:fe1599d0b30e6093eb3213551751b24feeb43db79f07e89d98dd2f3330c9063e"},
{file = "gevent-25.9.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:f0d8b64057b4bf1529b9ef9bd2259495747fba93d1f836c77bfeaacfec373fd0"},
{file = "gevent-25.9.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b56cbc820e3136ba52cd690bdf77e47a4c239964d5f80dc657c1068e0fe9521c"},
{file = "gevent-25.9.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c5fa9ce5122c085983e33e0dc058f81f5264cebe746de5c401654ab96dddfca8"},
{file = "gevent-25.9.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:03c74fec58eda4b4edc043311fca8ba4f8744ad1632eb0a41d5ec25413581975"},
{file = "gevent-25.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:a8ae9f895e8651d10b0a8328a61c9c53da11ea51b666388aa99b0ce90f9fdc27"},
{file = "gevent-25.9.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5aff9e8342dc954adb9c9c524db56c2f3557999463445ba3d9cbe3dada7b7"},
{file = "gevent-25.9.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1cdf6db28f050ee103441caa8b0448ace545364f775059d5e2de089da975c457"},
{file = "gevent-25.9.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:812debe235a8295be3b2a63b136c2474241fa5c58af55e6a0f8cfc29d4936235"},
{file = "gevent-25.9.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b28b61ff9216a3d73fe8f35669eefcafa957f143ac534faf77e8a19eb9e6883a"},
{file = "gevent-25.9.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5e4b6278b37373306fc6b1e5f0f1cf56339a1377f67c35972775143d8d7776ff"},
{file = "gevent-25.9.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d99f0cb2ce43c2e8305bf75bee61a8bde06619d21b9d0316ea190fc7a0620a56"},
{file = "gevent-25.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:72152517ecf548e2f838c61b4be76637d99279dbaa7e01b3924df040aa996586"},
{file = "gevent-25.9.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:46b188248c84ffdec18a686fcac5dbb32365d76912e14fda350db5dc0bfd4f86"},
{file = "gevent-25.9.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f2b54ea3ca6f0c763281cd3f96010ac7e98c2e267feb1221b5a26e2ca0b9a692"},
{file = "gevent-25.9.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7a834804ac00ed8a92a69d3826342c677be651b1c3cd66cc35df8bc711057aa2"},
{file = "gevent-25.9.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:323a27192ec4da6b22a9e51c3d9d896ff20bc53fdc9e45e56eaab76d1c39dd74"},
{file = "gevent-25.9.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6ea78b39a2c51d47ff0f130f4c755a9a4bbb2dd9721149420ad4712743911a51"},
{file = "gevent-25.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:dc45cd3e1cc07514a419960af932a62eb8515552ed004e56755e4bf20bad30c5"},
{file = "gevent-25.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34e01e50c71eaf67e92c186ee0196a039d6e4f4b35670396baed4a2d8f1b347f"},
{file = "gevent-25.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:4acd6bcd5feabf22c7c5174bd3b9535ee9f088d2bbce789f740ad8d6554b18f3"},
{file = "gevent-25.9.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:4f84591d13845ee31c13f44bdf6bd6c3dbf385b5af98b2f25ec328213775f2ed"},
{file = "gevent-25.9.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9cdbb24c276a2d0110ad5c978e49daf620b153719ac8a548ce1250a7eb1b9245"},
{file = "gevent-25.9.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:88b6c07169468af631dcf0fdd3658f9246d6822cc51461d43f7c44f28b0abb82"},
{file = "gevent-25.9.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b7bb0e29a7b3e6ca9bed2394aa820244069982c36dc30b70eb1004dd67851a48"},
{file = "gevent-25.9.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2951bb070c0ee37b632ac9134e4fdaad70d2e660c931bb792983a0837fe5b7d7"},
{file = "gevent-25.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4e17c2d57e9a42e25f2a73d297b22b60b2470a74be5a515b36c984e1a246d47"},
{file = "gevent-25.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8d94936f8f8b23d9de2251798fcb603b84f083fdf0d7f427183c1828fb64f117"},
{file = "gevent-25.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:eb51c5f9537b07da673258b4832f6635014fee31690c3f0944d34741b69f92fa"},
{file = "gevent-25.9.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:1a3fe4ea1c312dbf6b375b416925036fe79a40054e6bf6248ee46526ea628be1"},
{file = "gevent-25.9.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0adb937f13e5fb90cca2edf66d8d7e99d62a299687400ce2edee3f3504009356"},
{file = "gevent-25.9.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:427f869a2050a4202d93cf7fd6ab5cffb06d3e9113c10c967b6e2a0d45237cb8"},
{file = "gevent-25.9.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c049880175e8c93124188f9d926af0a62826a3b81aa6d3074928345f8238279e"},
{file = "gevent-25.9.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b5a67a0974ad9f24721034d1e008856111e0535f1541499f72a733a73d658d1c"},
{file = "gevent-25.9.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1d0f5d8d73f97e24ea8d24d8be0f51e0cf7c54b8021c1fddb580bf239474690f"},
{file = "gevent-25.9.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ddd3ff26e5c4240d3fbf5516c2d9d5f2a998ef87cfb73e1429cfaeaaec860fa6"},
{file = "gevent-25.9.1-cp314-cp314-win_amd64.whl", hash = "sha256:bb63c0d6cb9950cc94036a4995b9cc4667b8915366613449236970f4394f94d7"},
{file = "gevent-25.9.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f18f80aef6b1f6907219affe15b36677904f7cfeed1f6a6bc198616e507ae2d7"},
{file = "gevent-25.9.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b274a53e818124a281540ebb4e7a2c524778f745b7a99b01bdecf0ca3ac0ddb0"},
{file = "gevent-25.9.1-cp39-cp39-win32.whl", hash = "sha256:c6c91f7e33c7f01237755884316110ee7ea076f5bdb9aa0982b6dc63243c0a38"},
{file = "gevent-25.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:012a44b0121f3d7c800740ff80351c897e85e76a7e4764690f35c5ad9ec17de5"},
{file = "gevent-25.9.1.tar.gz", hash = "sha256:adf9cd552de44a4e6754c51ff2e78d9193b7fa6eab123db9578a210e657235dd"},
]
[package.dependencies]
cffi = {version = ">=1.17.1", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""}
greenlet = {version = ">=3.2.2", markers = "platform_python_implementation == \"CPython\""}
"zope.event" = "*"
"zope.interface" = "*"
[package.extras]
dnspython = ["dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\""]
docs = ["furo", "repoze.sphinx.autointerface", "sphinx", "sphinxcontrib-programoutput", "zope.schema"]
monitor = ["psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\""]
recommended = ["cffi (>=1.17.1) ; platform_python_implementation == \"CPython\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\"", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\""]
test = ["cffi (>=1.17.1) ; platform_python_implementation == \"CPython\"", "coverage (>=5.0) ; sys_platform != \"win32\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\"", "objgraph", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\"", "requests"]
[[package]]
name = "gitdb"
version = "4.0.12"
@@ -2774,7 +2707,7 @@ version = "3.2.4"
description = "Lightweight in-process concurrent programming"
optional = false
python-versions = ">=3.9"
groups = ["main", "test"]
groups = ["main"]
files = [
{file = "greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c"},
{file = "greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590"},
@@ -2831,7 +2764,6 @@ files = [
{file = "greenlet-3.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:d2e685ade4dafd447ede19c31277a224a239a0a1a4eca4e6390efedf20260cfb"},
{file = "greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d"},
]
markers = {test = "platform_python_implementation == \"CPython\""}
[package.extras]
docs = ["Sphinx", "furo"]
@@ -5431,7 +5363,7 @@ llama = ["llama-index (>=0.12.29,<0.13.0)", "llama-index-core (>=0.12.29,<0.13.0
[[package]]
name = "openhands-ai"
version = "0.57.0"
version = "0.55.0"
description = "OpenHands: Code Less, Make More"
optional = false
python-versions = "^3.12,<3.14"
@@ -5465,7 +5397,7 @@ json-repair = "*"
jupyter_kernel_gateway = "*"
kubernetes = "^33.1.0"
libtmux = ">=0.37,<0.40"
litellm = ">=1.74.3, <1.77.2, !=1.64.4, !=1.67.*"
litellm = "^1.74.3, !=1.64.4, !=1.67.*"
memory-profiler = "^0.61.0"
numpy = "*"
openai = "1.99.9"
@@ -5474,7 +5406,6 @@ opentelemetry-api = "^1.33.1"
opentelemetry-exporter-otlp-proto-grpc = "^1.33.1"
pathspec = "^0.12.1"
pexpect = "*"
pillow = "^11.3.0"
poetry = "^2.1.2"
prompt-toolkit = "^3.0.50"
protobuf = "^5.0.0,<6.0.0"
@@ -5482,7 +5413,6 @@ psutil = "*"
pygithub = "^2.5.0"
pyjwt = "^2.9.0"
pylatexenc = "*"
pypdf = "^6.0.0"
PyPDF2 = "*"
python-docx = "*"
python-dotenv = "*"
@@ -5496,17 +5426,13 @@ pyyaml = "^6.0.2"
qtconsole = "^5.6.1"
rapidfuzz = "^3.9.0"
redis = ">=5.2,<7.0"
requests = "^2.32.5"
setuptools = ">=78.1.1"
shellingham = "^1.5.4"
sse-starlette = "^3.0.2"
starlette = "^0.48.0"
sse-starlette = "^2.1.3"
tenacity = ">=8.5,<10.0"
termcolor = "*"
toml = "*"
tornado = "*"
types-toml = "*"
urllib3 = "^2.5.0"
uvicorn = "*"
whatthepatch = "^1.0.6"
zope-interface = "7.2"
@@ -6545,12 +6471,11 @@ version = "2.22"
description = "C parser in Python"
optional = false
python-versions = ">=3.8"
groups = ["main", "test"]
groups = ["main"]
files = [
{file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
{file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
]
markers = {test = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""}
[[package]]
name = "pydantic"
@@ -8340,7 +8265,7 @@ version = "80.9.0"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
python-versions = ">=3.9"
groups = ["main", "test"]
groups = ["main"]
files = [
{file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"},
{file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"},
@@ -8706,14 +8631,14 @@ sqlcipher = ["sqlcipher3_binary"]
[[package]]
name = "sse-starlette"
version = "3.0.2"
version = "2.4.1"
description = "SSE plugin for Starlette"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "sse_starlette-3.0.2-py3-none-any.whl", hash = "sha256:16b7cbfddbcd4eaca11f7b586f3b8a080f1afe952c15813455b162edea619e5a"},
{file = "sse_starlette-3.0.2.tar.gz", hash = "sha256:ccd60b5765ebb3584d0de2d7a6e4f745672581de4f5005ab31c3a25d10b52b3a"},
{file = "sse_starlette-2.4.1-py3-none-any.whl", hash = "sha256:08b77ea898ab1a13a428b2b6f73cfe6d0e607a7b4e15b9bb23e4a37b087fd39a"},
{file = "sse_starlette-2.4.1.tar.gz", hash = "sha256:7c8a800a1ca343e9165fc06bbda45c78e4c6166320707ae30b416c42da070926"},
]
[package.dependencies]
@@ -8721,7 +8646,7 @@ anyio = ">=4.7.0"
[package.extras]
daphne = ["daphne (>=4.2.0)"]
examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio] (>=2.0.41)", "starlette (>=0.41.3)", "uvicorn (>=0.34.0)"]
examples = ["aiosqlite (>=0.21.0)", "fastapi (>=0.115.12)", "sqlalchemy[asyncio,examples] (>=2.0.41)", "starlette (>=0.41.3)", "uvicorn (>=0.34.0)"]
granian = ["granian (>=2.3.1)"]
uvicorn = ["uvicorn (>=0.34.0)"]
@@ -8777,14 +8702,14 @@ files = [
[[package]]
name = "starlette"
version = "0.48.0"
version = "0.47.3"
description = "The little ASGI library that shines."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659"},
{file = "starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46"},
{file = "starlette-0.47.3-py3-none-any.whl", hash = "sha256:89c0778ca62a76b826101e7c709e70680a1699ca7da6b44d38eb0a7e61fe4b51"},
{file = "starlette-0.47.3.tar.gz", hash = "sha256:6bc94f839cc176c4858894f1f8908f0ab79dfec1a6b8402f6da9be26ebea52e9"},
]
[package.dependencies]
@@ -9913,32 +9838,13 @@ enabler = ["pytest-enabler (>=2.2)"]
test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more_itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
type = ["pytest-mypy"]
[[package]]
name = "zope-event"
version = "6.0"
description = "Very basic event publishing system"
optional = false
python-versions = ">=3.9"
groups = ["test"]
files = [
{file = "zope_event-6.0-py3-none-any.whl", hash = "sha256:6f0922593407cc673e7d8766b492c519f91bdc99f3080fe43dcec0a800d682a3"},
{file = "zope_event-6.0.tar.gz", hash = "sha256:0ebac894fa7c5f8b7a89141c272133d8c1de6ddc75ea4b1f327f00d1f890df92"},
]
[package.dependencies]
setuptools = ">=75.8.2"
[package.extras]
docs = ["Sphinx"]
test = ["zope.testrunner (>=6.4)"]
[[package]]
name = "zope-interface"
version = "7.2"
description = "Interfaces for Python"
optional = false
python-versions = ">=3.8"
groups = ["main", "test"]
groups = ["main"]
files = [
{file = "zope.interface-7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ce290e62229964715f1011c3dbeab7a4a1e4971fd6f31324c4519464473ef9f2"},
{file = "zope.interface-7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05b910a5afe03256b58ab2ba6288960a2892dfeef01336dc4be6f1b9ed02ab0a"},
@@ -10102,4 +10008,4 @@ cffi = ["cffi (>=1.17) ; python_version >= \"3.13\" and platform_python_implemen
[metadata]
lock-version = "2.1"
python-versions = "^3.12,<3.14"
content-hash = "8c460070dce6bdec5ee0ee7bc0c2246fcf2602d1e64a0867b4f5e3a0e334fe93"
content-hash = "5771671ef2acc36e7b0931c73fa035ca1d329e8dac6827f7a349e1a569c3fd23"
-5
View File
@@ -63,7 +63,6 @@ openai = "*"
opencv-python = "*"
pandas = "*"
reportlab = "*"
gevent = ">=24.2.1,<26.0.0"
[tool.poetry-dynamic-versioning]
enable = true
@@ -86,7 +85,3 @@ lint.pydocstyle.convention = "google"
[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
[tool.coverage.run]
relative_files = true
omit = [ "tests/*" ]
@@ -1,152 +0,0 @@
<h1 align="center"> Training Software Engineering Agents and Verifiers with SWE-Gym </h1>
A Multi-SWE-bench implementation of SWE-Gym.
<p align="center">
<a href="https://www.jiayipan.com/" style="text-decoration: none;">Jiayi Pan<sup>*,1</sup></a>,
<a href="https://xwang.dev/" style="text-decoration: none;">Xingyao Wang<sup>*,2</sup></a>,
<a href="https://www.phontron.com/" style="text-decoration: none;">Graham Neubig<sup>3</sup></a>,
<a href="https://www.cs.toronto.edu/~ndjaitly/" style="text-decoration: none;">Navdeep Jaitly<sup>4</sup></a>,
<a href="https://blender.cs.illinois.edu/hengji.html" style="text-decoration: none;">Heng Ji<sup>2</sup></a>,
<a href="https://www.alanesuhr.com/" style="text-decoration: none;">Alane Suhr<sup>^,1</sup></a>,
<a href="https://dreasysnail.github.io/" style="text-decoration: none;">Yizhe Zhang<sup>^,4</sup></a>
</p>
<p align="center">
<sup>1</sup>UC Berkeley, <sup>2</sup>UIUC, <sup>3</sup>CMU, <sup>4</sup>Apple </br>
<sub><sup>*</sup>Equal contribution, <sup>^</sup>Equal supervision</sub>
</p>
<p align="center">
<a href="https://arxiv.org/abs/2412.21139">📃 Paper</a>
<a href="https://huggingface.co/SWE-Gym" >🤗 Data & Models</a>
</p>
We present **SWE-Gym**, the first environment for training real-world software engineering agents.
We use it to train strong LM agents that achieve state-of-the-art open results on SWE-Bench, with early, promising scaling characteristics as we increase training and inference-time compute.
<p align="center">
<img src="https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/teaser.jpg?raw=true" width="100%" alt="teaser">
</p>
---
# Run SWE-Gym with OpenHands
The process of running SWE-Gym is very similar to how you'd run SWE-Bench evaluation.
1. First, clone OpenHands repo `git clone https://github.com/All-Hands-AI/OpenHands.git`
2. Then setup the repo following [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md)
3. Then you can simply serve your own model as an OpenAI compatible endpoint, put those info in config.toml. You can do this by following instruction [here](../../README.md#setup).
4. And then simply do the following to sample for 16x parallelism:
```bash
export ALLHANDS_API_KEY=ah-yourkey # You don't need to set this when running these in local docker container
./evaluation/benchmarks/multi_swe_bench/scripts/rollout_swegym.sh llm.mymodel-temp05 'train-t05' 16
```
NOTE: SWE-Gym sampling with parallelism is currently only tested with AllHands RemoteRuntime (limited beta). Fill [this form](https://docs.google.com/forms/d/e/1FAIpQLSckVz_JFwg2_mOxNZjCtr7aoBFI2Mwdan3f75J_TrdMS1JV2g/viewform) to apply for access.
5. When `rollout_swegym.sh` finishes, you will get a file called `output.with_completions.jsonl.gz`. Then you can use [`./scripts/swegym/convert_data.ipynb`](./scripts/swegym/convert_data.ipynb) to convert them into SFT data format.
## Running the Jupyter Notebook
To run the data conversion notebook, follow these steps:
1. Navigate to the OpenHands repository root:
```bash
cd openhands_repo
```
2. Set the PYTHONPATH and start Jupyter notebook:
```bash
PYTHONPATH=$(pwd) jupyter notebook
```
3. In the Jupyter interface, navigate to `evaluation/benchmarks/swe_bench/scripts/swegym/convert_data.ipynb`
4. Update the file paths in the notebook:
- Set `FILE_PATHS` to point to your `output.with_completions.jsonl.gz` files
- Set `YOUR_OUTPUT_FOLDER` to your desired output directory
5. Run the notebook cells sequentially to process your data and generate the SFT training format.
---
# More info about SWE-Gym
Progress in agents for software engineering has been limited by the lack of training environments that both include rigorous verification for reinforcement learning and cover the expansive tasks encountered in real-world repository-level engineering.
We introduce SWE-Gym: An Open Environment for Training Software Engineering Agents & Verifiers.
Our baselines achieve new open SOTA - 32%/26% on SWE-Bench Verified/Lite, with promising scaling trends.
![SWE-Gym Scaling](https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/scaling.jpg?raw=true)
*SWE-Gym enables scalable improvements for software engineering agents at both training and inference time. Our current results is primarily bottlenecked by training and inference compute, rather than the size of our environment.*
## SWE-Gym Environment
We create SWE-Gym, the first environment for training SWE agents, with **2.4K real tasks from 11 Python repos** & a Lite split of 234 instances. SWE-Gym combines real-world Python tasks, repository context, executable environments, and test verification to train agents for solving software engineering problems.
![SWE-Gym Repo Distribution](https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/swe-gym.jpg?raw=true)
## SWE-Gym trains LMs as agents
When fine-tuned on less than 500 agent-environment interaction trajectories sampled from it from GPT-4o and Claude 3.5 Sonnet, we achieve **+14%** absolute gains on SWE-Bench Verified with an 32B LM-powered OpenHands agent.
![OpenHands Performance diff before and after training](https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/oh-agent.jpg?raw=true)
## SWE-Gym enables self-improvement
SWE-Gym is also effective across agent scaffolds. With rejection sampling fine-tuning and MoatlessTools scaffold, our 32B and 7B models achieve 20% and 10% respectively on SWE-Bench Lite through self-improvement.
<p align="center">
<img src="https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/ml-agent.jpg?raw=true" width="80%" alt="Moatless self-improvement">
</p>
## SWE-Gym enables inference-time scaling
SWE-Gym enables inference-time scaling through verifiers trained on agent trajectories.
These verifiers identify most promising solutions via best-of-n selection, together with our learned agents, they achieve 32%/26% on SWE-Bench Verified/Lite, a new open SoTA.
![Inference Time Scaling for Moatless Agent](https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/inference-ml.jpg?raw=true)
*Inference Time Scaling for Moatless Agent*
![Inference Time Scaling for OpenHands Agent](https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/inference-oh.jpg?raw=true)
*Inference Time Scaling for OpenHands Agent*
## Our baselines on SWE-Gym shows strong scaling trends
Lastly, our ablations reveal strong scaling trends - performance is now bottlenecked by train and inference compute, rather than the size of our dataset. Pushing and improving these scaling trends further is an exciting direction for future work.
![](https://github.com/SWE-Gym/SWE-Gym/blob/main/assets/images/scaling.jpg?raw=true)
## Reproducing Results
**The Dataset**
To access SWE-Gym dataset, checkout our huggingface hub page [SWE-Gym](https://huggingface.co/SWE-Gym)
The environment constants are currently saved at [SWE-Bench-Fork](https://github.com/SWE-Gym/SWE-Bench-Fork)
We also have pre-built docker images for each instance under [xingyaoww/sweb.eval.x86_64](https://hub.docker.com/search?q=xingyaoww%2Fsweb.eval.x86_64.) prefix at docker hub.
## 📚 Citation
```bibtex
@misc{pan2024trainingsoftwareengineeringagents,
title={Training Software Engineering Agents and Verifiers with SWE-Gym},
author={Jiayi Pan and Xingyao Wang and Graham Neubig and Navdeep Jaitly and Heng Ji and Alane Suhr and Yizhe Zhang},
year={2024},
eprint={2412.21139},
archivePrefix={arXiv},
primaryClass={cs.SE},
url={https://arxiv.org/abs/2412.21139},
}
```
@@ -51,8 +51,8 @@ RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'tru
# TODO: migrate all swe-bench docker to ghcr.io/openhands
# TODO: 适应所有的语言
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'mswebench')
LANGUAGE = os.environ.get('LANGUAGE', 'java')
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', '')
LANGUAGE = os.environ.get('LANGUAGE', 'python')
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
@@ -305,19 +305,31 @@ def get_instance_docker_image(instance: pd.Series):
instance_id = instance.get('instance_id', '')
tag_suffix = instance_id.split('-')[-1] if instance_id else ''
container_tag = f'pr-{tag_suffix}'
return f'{DOCKER_IMAGE_PREFIX}/{container_name}:{container_tag}'
# pdb.set_trace()
return f'mswebench/{container_name}:{container_tag}'
# return "kong/insomnia:pr-8284"
# return "'sweb.eval.x86_64.local_insomnia"
# return "local_insomnia_why"
# return "local/kong-insomnia:pr-8117"
def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> OpenHandsConfig:
base_container_image = get_instance_docker_image(instance)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
if USE_INSTANCE_IMAGE:
# We use a different instance image for the each instance of swe-bench eval
# base_container_image = get_instance_docker_image(instance['instance_id'])
base_container_image = get_instance_docker_image(instance)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
else:
base_container_image = SWE_BENCH_CONTAINER_IMAGE
logger.info(f'Using swe-bench container image: {base_container_image}')
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
@@ -760,6 +772,7 @@ if __name__ == '__main__':
parser.add_argument(
'--dataset',
type=str,
default='princeton-nlp/SWE-bench',
help='data set to evaluate on, either full-test or lite-test',
)
parser.add_argument(
@@ -774,7 +787,6 @@ if __name__ == '__main__':
# so we don't need to manage file uploading to OpenHands's repo
# dataset = load_dataset(args.dataset, split=args.split)
# dataset = load_dataset(args.dataset)
logger.info(f'Loading dataset {args.dataset} with split {args.split} ')
dataset = load_dataset('json', data_files=args.dataset)
dataset = dataset[args.split]
swe_bench_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
@@ -827,7 +839,7 @@ if __name__ == '__main__':
args.eval_num_workers,
process_instance,
timeout_seconds=120 * 60, # 2 hour PER instance should be more than enough
max_retries=3,
max_retries=5,
)
# Check if any instances reached maximum retries
check_maximum_retries_exceeded(metadata.eval_output_dir)
@@ -1,54 +1,37 @@
import argparse
import json
input_file = 'XXX.jsonl'
output_file = 'YYY.jsonl'
def main(input_file, output_file):
with (
open(input_file, 'r', encoding='utf-8') as fin,
open(output_file, 'w', encoding='utf-8') as fout,
):
for line in fin:
line = line.strip()
if not line:
continue
with (
open(input_file, 'r', encoding='utf-8') as fin,
open(output_file, 'w', encoding='utf-8') as fout,
):
for line in fin:
line = line.strip()
if not line:
continue
data = json.loads(line)
item = data
data = json.loads(line)
item = data
# Skip instances that don't have resolved_issues or have empty resolved_issues
if not item.get('resolved_issues') or len(item['resolved_issues']) == 0:
print(
f'Skipping instance {item.get("org", "")}/{item.get("repo", "")}-{item.get("number", "")} - no resolved_issues'
)
continue
# 提取原始数据
org = item.get('org', '')
repo = item.get('repo', '')
number = str(item.get('number', ''))
# 提取原始数据
org = item.get('org', '')
repo = item.get('repo', '')
number = str(item.get('number', ''))
new_item = {}
new_item['repo'] = f'{org}/{repo}'
new_item['instance_id'] = f'{org}__{repo}-{number}'
new_item['problem_statement'] = (
item['resolved_issues'][0].get('title', '')
+ '\n'
+ item['resolved_issues'][0].get('body', '')
)
new_item['FAIL_TO_PASS'] = []
new_item['PASS_TO_PASS'] = []
new_item['base_commit'] = item['base'].get('sha', '')
new_item['version'] = '0.1' # depends
new_item = {}
new_item['repo'] = f'{org}/{repo}'
new_item['instance_id'] = f'{org}__{repo}-{number}'
# Get the first resolved issue
resolved_issue = item['resolved_issues'][0]
title = resolved_issue.get('title') or ''
body = resolved_issue.get('body') or ''
new_item['problem_statement'] = title + '\n' + body
new_item['FAIL_TO_PASS'] = []
new_item['PASS_TO_PASS'] = []
new_item['base_commit'] = item['base'].get('sha', '')
new_item['version'] = '0.1' # depends
output_data = new_item
fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True, help='Input .jsonl file path')
parser.add_argument('--output', required=True, help='Output .jsonl file path')
args = parser.parse_args()
main(args.input, args.output)
output_data = new_item
fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')
@@ -1,69 +0,0 @@
import argparse
import gzip
import json
import os
from glob import glob
from tqdm import tqdm
tqdm.pandas()
# Load trajectories for resolved instances
def load_completions(output_dir: str, instance_id: str):
glob_path = os.path.join(output_dir, 'llm_completions', instance_id, '*.json')
files = sorted(glob(glob_path)) # this is ascending order
# pick the last file (last turn)
try:
file_path = files[-1]
except IndexError:
# print(f'No files found for instance {instance_id}: files={files}')
return None
with open(file_path, 'r') as f:
result = json.load(f)
# create messages
messages = result['messages']
messages.append(result['response']['choices'][0]['message'])
tools = result['kwargs'].get('tools', [])
return {
'messages': messages,
'tools': tools,
}
parser = argparse.ArgumentParser()
parser.add_argument('jsonl_path', type=str)
args = parser.parse_args()
output_dir = os.path.dirname(args.jsonl_path)
output_path = os.path.join(output_dir, 'output.with_completions.jsonl.gz')
# Check if output would be different from input
needs_update = False
with open(args.jsonl_path, 'r') as f_in:
for line in tqdm(f_in, desc='Checking for changes'):
data = json.loads(line)
new_completions = load_completions(output_dir, data['instance_id'])
current_completions = data.get('raw_completions')
if current_completions != new_completions:
needs_update = True
break
if not needs_update:
print('No updates required. Skipping file update.')
exit(0)
if os.path.exists(output_path):
print(f'Output file already exists at {output_path}, overwriting? (y/n)')
if input() != 'y':
print('Exiting...')
exit(0)
# Process line by line
with open(args.jsonl_path, 'r') as f_in, gzip.open(output_path, 'wt') as f_out:
for line in tqdm(f_in):
data = json.loads(line)
data['raw_completions'] = load_completions(output_dir, data['instance_id'])
f_out.write(json.dumps(data) + '\n')
print(f'Saved compressed output to {output_path}')
@@ -1,11 +1,13 @@
import argparse
import json
import re
IN_FILE = 'output.jsonl'
OUT_FILE = 'patch.jsonl'
def main(input_file, output_file):
with open(input_file, 'r') as fin:
with open(output_file, 'w') as fout:
def main():
with open(IN_FILE, 'r') as fin:
with open(OUT_FILE, 'w') as fout:
for line in fin:
data = json.loads(line)
groups = re.match(r'(.*)__(.*)-(.*)', data['instance_id'])
@@ -13,14 +15,10 @@ def main(input_file, output_file):
'org': groups.group(1),
'repo': groups.group(2),
'number': groups.group(3),
'fix_patch': data.get('test_result', {}).get('git_patch', '') or '',
'fix_patch': data['test_result']['git_patch'],
}
fout.write(json.dumps(patch) + '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True, help='Input .jsonl file path')
parser.add_argument('--output', required=True, help='Output .jsonl file path')
args = parser.parse_args()
main(args.input, args.output)
main()
@@ -1,70 +0,0 @@
import argparse
import json
import os
import subprocess
def update_multi_swe_config(output_jsonl_path, config_path, dataset):
path_to_parent = os.path.dirname(os.path.abspath(output_jsonl_path))
converted_path = os.path.join(path_to_parent, 'output_converted.jsonl')
# Run the conversion script
subprocess.run(
[
'python3',
'./evaluation/benchmarks/multi_swe_bench/scripts/eval/convert.py',
'--input',
output_jsonl_path,
'--output',
converted_path,
],
check=True,
)
# Create required directories
os.makedirs(os.path.join(path_to_parent, 'eval_files', 'dataset'), exist_ok=True)
os.makedirs(os.path.join(path_to_parent, 'eval_files', 'workdir'), exist_ok=True)
os.makedirs(os.path.join(path_to_parent, 'eval_files', 'repos'), exist_ok=True)
os.makedirs(os.path.join(path_to_parent, 'eval_files', 'logs'), exist_ok=True)
# Prepare config dict
config = {
'mode': 'evaluation',
'workdir': os.path.join(path_to_parent, 'eval_files', 'workdir'),
'patch_files': [converted_path],
'dataset_files': [dataset],
'force_build': True,
'output_dir': os.path.join(path_to_parent, 'eval_files', 'dataset'),
'specifics': [],
'skips': [],
'repo_dir': os.path.join(path_to_parent, 'eval_files', 'repos'),
'need_clone': True,
'global_env': [],
'clear_env': True,
'stop_on_error': False,
'max_workers': 5,
'max_workers_build_image': 5,
'max_workers_run_instance': 5,
'log_dir': os.path.join(path_to_parent, 'eval_files', 'logs'),
'log_level': 'DEBUG',
'fix_patch_run_cmd': (
'bash -c "apt update ; apt install -y patch ; '
"sed -i 's@git apply.*@patch --batch --fuzz=5 -p1 -i /home/test.patch;"
'patch --batch --fuzz=5 -p1 -i /home/fix.patch@g\' /home/fix-run.sh ; chmod +x /home/*.sh ; /home/fix-run.sh"'
),
}
# Save to multibench.config
os.makedirs(os.path.dirname(config_path), exist_ok=True)
with open(config_path, 'w') as f:
json.dump(config, f, indent=4)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True, help='Path to input file')
parser.add_argument('--output', required=True, help='Path to create config')
parser.add_argument('--dataset', required=True, help='Path to dataset')
args = parser.parse_args()
update_multi_swe_config(args.input, args.output, args.dataset)
@@ -1,176 +0,0 @@
import argparse
import json
import os
from collections import defaultdict
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('input_file', type=str)
parser.add_argument(
'--force',
action='store_true',
help='Force update all reports even if no changes are detected',
)
parser.add_argument(
'--overwrite-backup',
action='store_true',
help='Automatically overwrite existing backup files without prompting',
)
args = parser.parse_args()
dirname = os.path.dirname(args.input_file)
# Initialize counters and data structures
instance_id_to_status = defaultdict(
lambda: {
'empty_generation': False,
'resolved': False,
'failed_apply_patch': False,
'error_eval': False,
'test_timeout': False,
}
)
# Process official report if it exists
swebench_official_report_json = os.path.join(
dirname, 'eval_files/dataset/final_report.json'
)
openhands_remote_report_jsonl = args.input_file.replace(
'.jsonl', '.swebench_eval.jsonl'
)
if os.path.exists(swebench_official_report_json):
output_md_filepath = os.path.join(dirname, 'README.md')
with open(swebench_official_report_json, 'r') as f:
report = json.load(f)
# Convert instance IDs from "repo/name:pr-123" format to "repo__name-123" format
def convert_instance_id(instance_id):
"""Convert instance ID from slash/colon-pr format to double underscore/dash format."""
if '/' in instance_id and ':pr-' in instance_id:
# Split on '/' and ':pr-'
parts = instance_id.split('/')
if len(parts) == 2:
repo_part = parts[0]
name_and_pr = parts[1]
if ':pr-' in name_and_pr:
name, pr_number = name_and_pr.split(':pr-')
return f'{repo_part}__{name}-{pr_number}'
return instance_id
# Convert all instance ID lists in the report
for key in [
'resolved_ids',
'unresolved_ids',
'error_ids',
'empty_patch_ids',
'incomplete_ids',
]:
if key in report:
report[key] = [
convert_instance_id(instance_id) for instance_id in report[key]
]
output_md = (
'# Multi-SWE-bench Report\n'
'This folder contains the evaluation results of the SWE-bench using the [official evaluation docker containerization](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md#choosing-the-right-cache_level).\n\n'
'## Summary\n'
f'- total instances: {report["total_instances"]}\n'
f'- submitted instances: {report["submitted_instances"]}\n'
f'- completed instances: {report["completed_instances"]}\n'
f'- empty patch instances: {report["empty_patch_instances"]}\n'
f'- resolved instances: {report["resolved_instances"]}\n'
f'- unresolved instances: {report["unresolved_instances"]}\n'
f'- error instances: {report["error_instances"]}\n'
)
output_md += '\n## Resolved Instances\n'
# instance_id to status
for instance_id in report['resolved_ids']:
instance_id_to_status[instance_id]['resolved'] = True
output_md += (
f'- [{instance_id}](./eval_outputs/{instance_id}/run_instance.log)\n'
)
output_md += '\n## Unresolved Instances\n'
for instance_id in report['unresolved_ids']:
output_md += (
f'- [{instance_id}](./eval_outputs/{instance_id}/run_instance.log)\n'
)
output_md += '\n## Error Instances\n'
for instance_id in report['error_ids']:
instance_id_to_status[instance_id]['error_eval'] = True
output_md += (
f'- [{instance_id}](./eval_outputs/{instance_id}/run_instance.log)\n'
)
output_md += '\n## Empty Patch Instances\n'
for instance_id in report['empty_patch_ids']:
instance_id_to_status[instance_id]['empty_generation'] = True
output_md += (
f'- [{instance_id}](./eval_outputs/{instance_id}/run_instance.log)\n'
)
output_md += '\n## Incomplete Instances\n'
for instance_id in report['incomplete_ids']:
output_md += (
f'- [{instance_id}](./eval_outputs/{instance_id}/run_instance.log)\n'
)
with open(output_md_filepath, 'w') as f:
f.write(output_md)
else:
print(
f'No report file found: Both {swebench_official_report_json} and {openhands_remote_report_jsonl} do not exist.'
)
exit()
# Before backup and update, check if any changes would be made (unless --force is used)
if not args.force:
needs_update = False
with open(args.input_file, 'r') as infile:
for line in tqdm(infile, desc='Checking for changes'):
data = json.loads(line)
instance_id = data['instance_id']
current_report = data.get('report', {})
new_report = instance_id_to_status[
instance_id
] # if no report, it's not resolved
if current_report != new_report:
needs_update = True
break
if not needs_update:
print('No updates detected. Skipping file update.')
exit()
else:
print('Force flag enabled. Updating all reports regardless of changes.')
# Backup and update the original file row by row
if os.path.exists(args.input_file + '.bak'):
if args.overwrite_backup:
print(
'Existing backup file found. Overwriting automatically due to --overwrite-backup flag.'
)
os.remove(args.input_file + '.bak')
else:
conf = input('Existing backup file found. Do you want to overwrite it? (y/n)')
if conf != 'y':
exit()
os.remove(args.input_file + '.bak')
os.rename(args.input_file, args.input_file + '.bak')
# Process and write file row by row
with (
open(args.input_file + '.bak', 'r') as infile,
open(args.input_file, 'w') as outfile,
):
for line in tqdm(infile, desc='Updating output file'):
data = json.loads(line)
instance_id = data['instance_id']
data['report'] = instance_id_to_status[instance_id]
outfile.write(json.dumps(data) + '\n')
@@ -1,146 +0,0 @@
#!/bin/bash
# NOTE: this script is for rolling out the Multi-SWE-Gym dataset for **TRAINING**
# For more information, please refer to
# 1. the Github Repo: https://github.com/SWE-Gym/SWE-Gym
# 2. the paper: https://arxiv.org/abs/2412.21139
MODEL=$1 # eg your llm config name in config.toml (eg: "llm.claude-3-5-sonnet-20241022-t05")
EXP_NAME=$2 # "train-t05"
EVAL_DATASET=$3 # path to original dataset (jsonl file)
N_WORKERS=${4:-64}
N_RUNS=${5:-1}
export EXP_NAME=$EXP_NAME
# use 2x resources for rollout since some codebases are pretty resource-intensive
export DEFAULT_RUNTIME_RESOURCE_FACTOR=2
echo "MODEL: $MODEL"
echo "EXP_NAME: $EXP_NAME"
echo "EVAL_DATASET: $EVAL_DATASET"
# Generate DATASET path by adding _with_runtime_ before .jsonl extension
DATASET="${EVAL_DATASET%.jsonl}_with_runtime_.jsonl" # path to converted dataset
# Create the converted dataset file
echo "Creating converted dataset at: $DATASET"
poetry run python ./evaluation/benchmarks/multi_swe_bench/scripts/data/data_change.py --input "$EVAL_DATASET" --output "$DATASET"
SPLIT="train"
export LANGUAGE=java
if [ -z "$ALLHANDS_API_KEY" ] || [ "$RUNTIME" != "remote" ]; then
echo "ALLHANDS_API_KEY is not set or RUNTIME is not set to remote. Will rollout and evaluate locally using Docker. WARNING: A large value of N_WORKERS will result in a large number of Docker containers being spun up and may crash your machine."
export RUNTIME=docker
else
echo "ALLHANDS_API_KEY is set and RUNTIME is set to remote. Continuing rollout and evaluation with remote runtime..."
export SANDBOX_REMOTE_RUNTIME_API_URL="https://runtime.eval.all-hands.dev"
fi
#EVAL_LIMIT=3000
MAX_ITER=100
# ===== Run inference =====
source "evaluation/utils/version_control.sh"
get_openhands_version
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "EVAL_DOCKER_IMAGE_PREFIX: $EVAL_DOCKER_IMAGE_PREFIX"
# Default to NOT use Hint
export USE_INSTANCE_IMAGE=true
export USE_HINT_TEXT=false
export RUN_WITH_BROWSING=false
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
EVAL_NOTE="$OPENHANDS_VERSION-no-hint-$EXP_NAME"
function run_eval() {
local eval_note=$1
export LANGUAGE=java
echo "About to run command"
COMMAND="EVAL_DOCKER_IMAGE_PREFIX=$EVAL_DOCKER_IMAGE_PREFIX; LANGUAGE=java;
poetry run python evaluation/benchmarks/multi_swe_bench/run_infer.py \
--agent-cls CodeActAgent \
--llm-config $MODEL \
--max-iterations $MAX_ITER \
--eval-num-workers $N_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT"
echo "Running command: $COMMAND"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
}
for run_idx in $(seq 1 $N_RUNS); do
while true; do
echo "### Running inference... ###"
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
current_eval_note="$EVAL_NOTE-run_$run_idx"
echo "EVAL_NOTE: $current_eval_note"
echo "DATASET command: $DATASET"
#INFER_OUTPUT=$(run_eval $current_eval_note)
INFER_OUTPUT=$(run_eval $current_eval_note | tee /dev/stderr)
INFER_STATUS=$? # Capture the exit status of run_infer.sh
echo "INFER_STATUS: $INFER_STATUS"
echo "### Cleaning up remote runtime... ###"
./evaluation/utils/scripts/cleanup_remote_runtime.sh
if [ $INFER_STATUS -eq 0 ]; then
echo "### Inference completed successfully. ###"
break
else
echo "### Inference failed with exit code $INFER_STATUS. Retrying... ###"
fi
done
# Extract the output directory using the special delimiters
OUTPUT_FILE=$(echo "$INFER_OUTPUT" | grep -o '### OUTPUT FILE:.* ###' | sed 's/### OUTPUT FILE: \(.*\) ###/\1/')
echo "Got OUTPUT_FILE: $OUTPUT_FILE"
while true; do
echo "### Evaluating on $OUTPUT_FILE ... ###"
OUTPUT_CONFIG_FILE="${OUTPUT_FILE%.jsonl}_config.json"
export EVAL_SKIP_BUILD_ERRORS=true
pip install multi-swe-bench --quiet --disable-pip-version-check > /dev/null 2>&1
COMMAND="poetry run python ./evaluation/benchmarks/multi_swe_bench/scripts/eval/update_multi_swe_bench_config.py --input $OUTPUT_FILE --output $OUTPUT_CONFIG_FILE --dataset $EVAL_DATASET;
python -m multi_swe_bench.harness.run_evaluation --config $OUTPUT_CONFIG_FILE
"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
echo "Running command: $COMMAND"
# Run the command
eval $COMMAND
EVAL_STATUS=$?
if [ $EVAL_STATUS -eq 0 ]; then
echo "### Evaluation completed successfully. ###"
break
else
echo "### Evaluation failed with exit code $EVAL_STATUS. Retrying... ###"
fi
./evaluation/utils/scripts/cleanup_remote_runtime.sh
done
# update the output with evaluation results
echo "### Updating the output with evaluation results... ###"
poetry run python evaluation/benchmarks/multi_swe_bench/scripts/eval/update_output_with_eval.py $OUTPUT_FILE
echo "### Combining the final completions... ###"
poetry run python evaluation/benchmarks/multi_swe_bench/scripts/eval/combine_final_completions.py $OUTPUT_FILE
echo "### DONE for run $run_idx! ###"
echo "You can find the final output at $(dirname $OUTPUT_FILE)/$FINAL_OUTPUT_FILE"
done
@@ -47,8 +47,8 @@ if [ -z "$DATASET" ]; then
fi
if [ -z "$LANGUAGE" ]; then
echo "LANGUAGE not specified, use default python"
LANGUAGE="java"
echo "LANUGUAGE not specified, use default python"
LANGUAGE="python"
fi
if [ -z "$SPLIT" ]; then
@@ -69,10 +69,10 @@ fi
if [ -z "$EVAL_DOCKER_IMAGE_PREFIX" ]; then
if [ "$LANGUAGE" = "python" ]; then
echo "EVAL_DOCKER_IMAGE_PREFIX is docker.io/xingyaoww/ as default as LANGUAGE is python"
echo "EVAL_DOCKER_IMAGE_PREFIX is docker.io/xingyaoww/ as default as LANUGUAGE is python"
EVAL_DOCKER_IMAGE_PREFIX="docker.io/xingyaoww/"
elif [ "$LANGUAGE" = "java" ]; then
echo "EVAL_DOCKER_IMAGE_PREFIX is empty as LANGUAGE is java"
echo "EVAL_DOCKER_IMAGE_PREFIX is java_verified as LANUGUAGE is java"
EVAL_DOCKER_IMAGE_PREFIX=""
fi
fi
@@ -1,344 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"\n",
"tqdm.pandas()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. Load raw data and convert to training data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import gzip\n",
"import json\n",
"\n",
"from tqdm import tqdm\n",
"\n",
"FILE_PATHS = [\n",
" 'YOURPATH-no-hint-train-t05-run_1/output.with_completions.jsonl.gz',\n",
" 'YOURPATH-no-hint-train-t05-run_2/output.with_completions.jsonl.gz',\n",
"]\n",
"\n",
"# More memory efficient for large files\n",
"# Initialize lists to store the data\n",
"data = []\n",
"\n",
"\n",
"# Read file line by line\n",
"for FILE_PATH in FILE_PATHS:\n",
" with gzip.open(FILE_PATH, 'rb') as f: # Use 'rb' for gzipped files\n",
" for i, line in tqdm(\n",
" enumerate(f), desc=f'Processing {FILE_PATH.split(\"/\")[-1]}'\n",
" ):\n",
" # Parse only the fields we need\n",
" raw_data = json.loads(line)\n",
" data.append(\n",
" {\n",
" 'resolved': raw_data['report']['resolved'],\n",
" 'messages': raw_data['raw_completions']['messages']\n",
" if raw_data['raw_completions'] is not None\n",
" else None,\n",
" 'git_patch': raw_data['test_result'].get('git_patch', ''),\n",
" 'tools': raw_data['raw_completions']['tools']\n",
" if raw_data['raw_completions'] is not None\n",
" and 'tools' in raw_data['raw_completions']\n",
" else None,\n",
" }\n",
" )\n",
"\n",
"# Convert to DataFrame after collecting all data\n",
"df = pd.DataFrame(data)\n",
"print(f'#total amount of data={len(df)}')\n",
"df = df[~df['messages'].isna()]\n",
"print(f'#total amount of data after removing nan={len(df)}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filter"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def _contains_multiple_tool_calls(messages: list[dict]) -> bool:\n",
" return any(\n",
" message.get('tool_calls') and len(message['tool_calls']) > 1\n",
" for message in messages\n",
" )\n",
"\n",
"\n",
"df['contains_multiple_tool_calls'] = df['messages'].apply(_contains_multiple_tool_calls)\n",
"display(df.groupby(['contains_multiple_tool_calls'])['resolved'].sum())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import copy\n",
"\n",
"# Convert function calling messages to non-function calling messages\n",
"from openhands.llm.fn_call_converter import (\n",
" FunctionCallConversionError,\n",
" convert_fncall_messages_to_non_fncall_messages,\n",
" convert_from_multiple_tool_calls_to_single_tool_call_messages,\n",
")\n",
"\n",
"total_failed = 0\n",
"\n",
"\n",
"def _convert_messages(messages: list[dict], tools: list[dict]) -> list[dict]:\n",
" global total_failed\n",
" message_copy = copy.deepcopy(messages)\n",
" for message in message_copy:\n",
" if message['content'] is None:\n",
" message['content'] = ''\n",
" try:\n",
" return convert_fncall_messages_to_non_fncall_messages(\n",
" message_copy, tools, add_in_context_learning_example=False\n",
" )\n",
" except FunctionCallConversionError:\n",
" total_failed += 1\n",
" # print(f'Failed to convert messages: {messages}\\nTools: {tools}')\n",
" # traceback.print_exc()\n",
" return None\n",
"\n",
"\n",
"df['converted_messages'] = df.apply(\n",
" lambda row: convert_from_multiple_tool_calls_to_single_tool_call_messages(\n",
" row['messages'], ignore_final_tool_result=True\n",
" ),\n",
" axis=1,\n",
")\n",
"df['nonfncall_messages'] = df.apply(\n",
" lambda row: _convert_messages(row['converted_messages'], row['tools']), axis=1\n",
")\n",
"print('total nan', df['nonfncall_messages'].isna().sum())\n",
"df = df[~df['nonfncall_messages'].isna()]\n",
"print(df['nonfncall_messages'].iloc[0])\n",
"\n",
"print(f'Total failed: {total_failed}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tokenization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pandarallel import pandarallel\n",
"from transformers import AutoTokenizer\n",
"\n",
"os.environ['TOKENIZERS_PARALLELISM'] = 'false'\n",
"pandarallel.initialize(progress_bar=True, verbose=1, nb_workers=16)\n",
"tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-7B-Instruct')\n",
"\n",
"\n",
"def clean_messages(messages):\n",
" clean = []\n",
" for msg in messages:\n",
" if not isinstance(msg, dict):\n",
" continue\n",
" role = msg.get('role')\n",
" content = msg.get('content')\n",
" if isinstance(content, str):\n",
" text = content\n",
" elif isinstance(content, dict):\n",
" text = content.get('text')\n",
" elif (\n",
" isinstance(content, list)\n",
" and len(content) == 1\n",
" and isinstance(content[0], dict)\n",
" ):\n",
" text = content[0].get('text')\n",
" else:\n",
" print(f'Format not accepted {content}')\n",
" clean.append({'role': role, 'content': text})\n",
" return clean\n",
"\n",
"\n",
"# Step 1: Clean the messages\n",
"df['nonfncall_messages'] = df['nonfncall_messages'].apply(clean_messages)\n",
"\n",
"# Step 2: Compute token count\n",
"df['n_tokens'] = df['nonfncall_messages'].parallel_apply(\n",
" lambda x: len(tokenizer.apply_chat_template(x))\n",
")\n",
"\n",
"# print(df['nonfncall_messages'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(f'BEFORE: #total={len(df)}')\n",
"df_selected = df[df['n_tokens'] < 131072]\n",
"print(f'AFTER(truncated to 128k): #total={len(df_selected)}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_selected['n_tokens'].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ecdf of n_tokens\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"display(df.groupby(['resolved'])['n_tokens'].describe())\n",
"sns.ecdfplot(x='n_tokens', data=df, hue='resolved')\n",
"plt.show()\n",
"\n",
"print(f'#total={len(df)}')\n",
"df_selected = df[df['n_tokens'] < 131072]\n",
"print(f'#selected={len(df_selected)}')\n",
"display(df_selected.groupby(['resolved'])['n_tokens'].describe())\n",
"sns.ecdfplot(x='n_tokens', data=df_selected, hue='resolved')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_selected[~df_selected['resolved']]['n_tokens'].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_selected['resolved'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_selected.groupby(['resolved'])['n_tokens'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Save Resolved Messages for SFT"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Flatten messages and change format to {\"content\": \"\", \"role\": \"\"}\n",
"df_selected[df_selected['resolved']][['nonfncall_messages']].rename(\n",
" columns={'nonfncall_messages': 'messages'}\n",
").to_json(\n",
" os.path.join(\n",
" 'PATH_TO_FILE',\n",
" f'policy_traj_128k_swegym_{df_selected[\"resolved\"].value_counts()[True]}i.jsonl',\n",
" ),\n",
" lines=True,\n",
" orient='records',\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
-81
View File
@@ -1,81 +0,0 @@
# SWE-Perf Evaluation
This folder contains the OpenHands inference generation of the [SWE-Perf benchmark](https://swe-perf.github.io/) ([paper](https://arxiv.org/pdf/2507.12415v1)).
The evaluation consists of three steps:
1. Environment setup: [install python environment](../../README.md#development-environment) and [configure LLM config](../../README.md#configure-openhands-and-your-llm).
2. [Run inference](#running-inference-locally-with-docker): Generate a edit patch for each Github issue
3. [Evaluate patches](#evaluate-generated-patches)
## Setup Environment and LLM Configuration
Please follow instruction [here](../../README.md#setup) to setup your local development environment and LLM.
## Running inference Locally with Docker
Make sure your Docker daemon is running, and you have ample disk space (at least 200-500GB, depends on the SWE-PErf set you are running on) for the instance-level docker image.
When the `run_infer.sh` script is started, it will automatically pull the relevant SWE-Perf images.
For example, for instance ID `scikit-learn_scikit-learn-11674`, it will try to pull our pre-build docker image `betty1202/sweb.eval.x86_64.scikit-learn_s_scikit-learn-11674` from DockerHub.
This image will be used create an OpenHands runtime image where the agent will operate on.
```bash
./evaluation/benchmarks/swe_perf/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [dataset] [dataset_split] [n_runs] [mode]
# Example
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 500 100 1 SWE-Perf/SWE-Perf test
```
where `model_config` is mandatory, and the rest are optional.
- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for your
LLM settings, as defined in your `config.toml`.
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version you would
like to evaluate. It could also be a release tag like `0.6.2`.
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks, defaulting
to `CodeActAgent`.
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit` instances. By
default, the script evaluates the entire SWE-Perf test set (140 issues). Note:
in order to use `eval_limit`, you must also set `agent`.
- `max_iter`, e.g. `20`, is the maximum number of iterations for the agent to run. By
default, it is set to 100.
- `num_workers`, e.g. `3`, is the number of parallel workers to run the evaluation. By
default, it is set to 1.
- `dataset`, a huggingface dataset name. e.g. `SWE-Perf/SWE-Perf`, specifies which dataset to evaluate on.
- `dataset_split`, split for the huggingface dataset. e.g., `test`, `dev`. Default to `test`.
- `n_runs`, e.g. `3`, is the number of times to run the evaluation. Default is 1.
- `mode`, e.g. `swt`, `swt-ci`, or `swe`, specifies the evaluation mode. Default is `swe`.
> [!CAUTION]
> Setting `num_workers` larger than 1 is not officially tested, YMMV.
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
then your command would be:
```bash
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
```
## Evaluate Generated Patches
To evaluate the generated patch, follow these steps:
### 1. Convert output to the evaluation standard format
Run the following command:
```bash
python -m evaluation.benchmarks.swe_perf.format_conversion \
--input_path [input_path] \
--output_path [output_path]
```
* `input_path`: Path to the raw generated patch file.
* `output_path`: Path where the converted file will be saved.
### 2. Run the SWE-Perf benchmark official evaluation
Once the output is converted, use the [official SWE-Perf benchmark evaluation](https://github.com/SWE-Perf/SWE-Perf/tree/main/evaluation) to evaluate it.
@@ -1,52 +0,0 @@
"""
Utilities for handling binary files and patch generation in SWE-Perf evaluation.
"""
def remove_binary_diffs(patch_text):
"""
Remove binary file diffs from a git patch.
Args:
patch_text (str): The git patch text
Returns:
str: The cleaned patch text with binary diffs removed
"""
lines = patch_text.splitlines()
cleaned_lines = []
block = []
is_binary_block = False
for line in lines:
if line.startswith('diff --git '):
if block and not is_binary_block:
cleaned_lines.extend(block)
block = [line]
is_binary_block = False
elif 'Binary files' in line:
is_binary_block = True
block.append(line)
else:
block.append(line)
if block and not is_binary_block:
cleaned_lines.extend(block)
return '\n'.join(cleaned_lines)
def remove_binary_files_from_git():
"""
Generate a bash command to remove binary files from git staging.
Returns:
str: A bash command that removes binary files from git staging
"""
return """
for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
git rm -f "$file" 2>/dev/null || rm -f "$file"
echo "Removed: $file"
fi
done
""".strip()
@@ -1,45 +0,0 @@
import json
import os
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('--input_path', type=str, help='Name of input path to JSON file.')
parser.add_argument('--output_path', type=str, help='Name of output path to JSON file.')
args = parser.parse_args()
input_path = args.input_path
output_path = args.output_path
os.makedirs(output_path, exist_ok=True)
def load_jsonl(file_path):
"""Load JSONL file into a list of dictionaries."""
data = []
with open(file_path, 'r') as f:
for line in f:
data.append(json.loads(line))
return data
dataset = load_jsonl(input_path)
ooutput_dataset = []
for data in dataset:
instance_id = data['instance_id']
model_name_or_path = 'openhands'
model_patch = (
data['test_result']['git_patch']
if 'test_result' in data and 'git_patch' in data['test_result']
else None
)
ooutput_dataset.append(
{
'instance_id': instance_id,
'model_name_or_path': model_name_or_path,
'model_patch': model_patch,
}
)
with open(os.path.join(output_path, 'output.jsonl'), 'w') as f:
for item in ooutput_dataset:
json_line = json.dumps(item, ensure_ascii=False)
f.write(json_line + '\n')
@@ -1,39 +0,0 @@
"""Mapping instance_id to resource_factor.
Different instances may have different resource requirements.
e.g., some instances may require more memory/CPU to run inference.
This file tracks the resource requirements of different instances.
"""
import json
import os
from openhands.core.logger import openhands_logger as logger
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
DEFAULT_RUNTIME_RESOURCE_FACTOR = int(
os.environ.get('DEFAULT_RUNTIME_RESOURCE_FACTOR', 1)
)
# dataset to resource mapping
_global_resource_mapping: dict[str, dict[str, float]] = {}
def get_resource_mapping(dataset_name: str) -> dict[str, float]:
if dataset_name not in _global_resource_mapping:
file_path = os.path.join(CUR_DIR, f'{dataset_name}.json')
if not os.path.exists(file_path):
logger.info(f'Resource mapping for {dataset_name} not found.')
return None
with open(file_path, 'r') as f:
_global_resource_mapping[dataset_name] = json.load(f)
logger.debug(f'Loaded resource mapping for {dataset_name}')
return _global_resource_mapping[dataset_name]
def get_instance_resource_factor(dataset_name: str, instance_id: str) -> int:
resource_mapping = get_resource_mapping(dataset_name)
if resource_mapping is None:
return DEFAULT_RUNTIME_RESOURCE_FACTOR
return int(resource_mapping.get(instance_id, DEFAULT_RUNTIME_RESOURCE_FACTOR))
@@ -1,842 +0,0 @@
# Based on https://github.com/logic-star-ai/swt-bench/blob/master/src/constants.py
# Constants - Installation Specifications
MAP_VERSION_TO_INSTALL_SKLEARN = {
k: {
'python': '3.6',
'packages': 'numpy scipy cython pytest pandas matplotlib',
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
'pip_packages': [
'cython',
'numpy==1.19.2',
'setuptools',
'scipy==1.5.2',
],
}
for k in ['0.20', '0.21', '0.22']
}
MAP_VERSION_TO_INSTALL_SKLEARN.update(
{
k: {
'python': '3.9',
'packages': "'numpy==1.19.2' 'scipy==1.5.2' 'cython==3.0.10' pytest 'pandas<2.0.0' 'matplotlib<3.9.0' setuptools pytest joblib threadpoolctl",
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
'pip_packages': ['cython', 'setuptools', 'numpy', 'scipy'],
}
for k in ['1.3', '1.4']
}
)
MAP_VERSION_TO_INSTALL_FLASK = {
'2.0': {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'setuptools==70.0.0',
'Werkzeug==2.3.7',
'Jinja2==3.0.1',
'itsdangerous==2.1.2',
'click==8.0.1',
'MarkupSafe==2.1.3',
],
},
'2.1': {
'python': '3.10',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'click==8.1.3',
'itsdangerous==2.1.2',
'Jinja2==3.1.2',
'MarkupSafe==2.1.1',
'Werkzeug==2.3.7',
],
},
}
MAP_VERSION_TO_INSTALL_FLASK.update(
{
k: {
'python': '3.11',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': [
'click==8.1.3',
'itsdangerous==2.1.2',
'Jinja2==3.1.2',
'MarkupSafe==2.1.1',
'Werkzeug==2.3.7',
],
}
for k in ['2.2', '2.3']
}
)
MAP_VERSION_TO_INSTALL_DJANGO = {
k: {
'python': '3.5',
'packages': 'requirements.txt',
'pre_install': [
'apt-get update && apt-get install -y locales',
"echo 'en_US UTF-8' > /etc/locale.gen",
'locale-gen en_US.UTF-8',
],
'install': 'python setup.py install',
'pip_packages': ['setuptools'],
'eval_commands': [
'export LANG=en_US.UTF-8',
'export LC_ALL=en_US.UTF-8',
'export PYTHONIOENCODING=utf8',
'export LANGUAGE=en_US:en',
],
}
for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']
}
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {'python': '3.5', 'install': 'python setup.py install'}
for k in ['1.4', '1.5', '1.6']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.6',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'eval_commands': [
"sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
'export LANG=en_US.UTF-8',
'export LANGUAGE=en_US:en',
'export LC_ALL=en_US.UTF-8',
],
}
for k in ['3.0', '3.1', '3.2']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.8',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['4.0']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['4.1', '4.2']
}
)
MAP_VERSION_TO_INSTALL_DJANGO.update(
{
k: {
'python': '3.11',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in ['5.0']
}
)
MAP_VERSION_TO_INSTALL_REQUESTS = {
k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .'}
for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2']
+ ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17']
+ ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '3.0']
}
MAP_VERSION_TO_INSTALL_SEABORN = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'importlib-resources==6.0.1',
'kiwisolver==1.4.5',
'matplotlib==3.7.2',
'numpy==1.25.2',
'packaging==23.1',
'pandas==1.3.5', # 2.0.3
'pillow==10.0.0',
'pyparsing==3.0.9',
'pytest',
'python-dateutil==2.8.2',
'pytz==2023.3.post1',
'scipy==1.11.2',
'six==1.16.0',
'tzdata==2023.1',
'zipp==3.16.2',
],
}
for k in ['0.11']
}
MAP_VERSION_TO_INSTALL_SEABORN.update(
{
k: {
'python': '3.9',
'install': 'python -m pip install -e .[dev]',
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'importlib-resources==6.0.1',
'kiwisolver==1.4.5',
'matplotlib==3.7.2',
'numpy==1.25.2',
'packaging==23.1',
'pandas==2.0.0',
'pillow==10.0.0',
'pyparsing==3.0.9',
'pytest',
'python-dateutil==2.8.2',
'pytz==2023.3.post1',
'scipy==1.11.2',
'six==1.16.0',
'tzdata==2023.1',
'zipp==3.16.2',
],
}
for k in ['0.12', '0.13']
}
)
MAP_VERSION_TO_INSTALL_PYTEST = {
k: {'python': '3.9', 'install': 'python -m pip install -e .'}
for k in [
'4.4',
'4.5',
'4.6',
'5.0',
'5.1',
'5.2',
'5.3',
'5.4',
'6.0',
'6.2',
'6.3',
'7.0',
'7.1',
'7.2',
'7.4',
'8.0',
]
}
MAP_VERSION_TO_INSTALL_PYTEST['4.4']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.13.1',
'py==1.11.0',
'setuptools==68.0.0',
'six==1.16.0',
]
MAP_VERSION_TO_INSTALL_PYTEST['4.5']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.11.0',
'py==1.11.0',
'setuptools==68.0.0',
'six==1.16.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['4.6']['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'six==1.16.0',
'wcwidth==0.2.6',
]
for k in ['5.0', '5.1', '5.2']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'atomicwrites==1.4.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['5.3']['pip_packages'] = [
'attrs==23.1.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'wcwidth==0.2.6',
]
MAP_VERSION_TO_INSTALL_PYTEST['5.4']['pip_packages'] = [
'py==1.11.0',
'packaging==23.1',
'attrs==23.1.0',
'more-itertools==10.1.0',
'pluggy==0.13.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['6.0']['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'more-itertools==10.1.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'toml==0.10.2',
]
for k in ['6.2', '6.3']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'toml==0.10.2',
]
MAP_VERSION_TO_INSTALL_PYTEST['7.0']['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
]
for k in ['7.1', '7.2']:
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
'attrs==23.1.0',
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==0.13.1',
'py==1.11.0',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['7.4']['pip_packages'] = [
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==1.3.0',
'exceptiongroup==1.1.3',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_PYTEST['8.0']['pip_packages'] = [
'iniconfig==2.0.0',
'packaging==23.1',
'pluggy==1.3.0',
'exceptiongroup==1.1.3',
'tomli==2.0.1',
]
MAP_VERSION_TO_INSTALL_MATPLOTLIB = {
k: {
'python': '3.11',
'packages': 'environment.yml',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng'
],
'pip_packages': [
'contourpy==1.1.0',
'cycler==0.11.0',
'fonttools==4.42.1',
'ghostscript',
'kiwisolver==1.4.5',
'numpy==1.25.2',
'packaging==23.1',
'pillow==10.0.0',
'pikepdf',
'pyparsing==3.0.9',
'python-dateutil==2.8.2',
'six==1.16.0',
'setuptools==68.1.2',
'setuptools-scm==7.1.0',
'typing-extensions==4.7.1',
],
}
for k in ['3.5', '3.6', '3.7']
}
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.8',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super'
],
'pip_packages': ['pytest', 'ipython'],
}
for k in ['3.1', '3.2', '3.3', '3.4']
}
)
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.7',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config'
],
'pip_packages': ['pytest'],
}
for k in ['3.0']
}
)
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
{
k: {
'python': '3.5',
'install': 'python setup.py build; python setup.py install',
'pre_install': [
'apt-get -y update && apt-get -y upgrade && && apt-get install -y imagemagick ffmpeg'
],
'pip_packages': ['pytest'],
'execute_test_as_nonroot': True,
}
for k in ['2.0', '2.1', '2.2', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5']
}
)
MAP_VERSION_TO_INSTALL_SPHINX = {
k: {
'python': '3.9',
'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11'],
'install': 'python -m pip install -e .[test]',
'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"],
}
for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']
+ ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']
+ ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2']
}
for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py",
"sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py",
"sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py",
"sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py",
"sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py",
"sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py",
]
)
if k in ['4.2', '4.3', '4.4']:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py",
]
)
elif k == '4.1':
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
(
"grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && "
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || "
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py"
),
(
"grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && "
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || "
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py"
),
]
)
else:
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
[
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py",
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py",
]
)
MAP_VERSION_TO_INSTALL_SPHINX['7.2']['pre_install'] += [
'apt-get update && apt-get install -y graphviz'
]
MAP_VERSION_TO_INSTALL_ASTROPY = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .[test] --verbose',
'pip_packages': [
'attrs==23.1.0',
'exceptiongroup==1.1.3',
'execnet==2.0.2',
'hypothesis==6.82.6',
'iniconfig==2.0.0',
'numpy==1.25.2',
'packaging==23.1',
'pluggy==1.3.0',
'psutil==5.9.5',
'pyerfa==2.0.0.3',
'pytest-arraydiff==0.5.0',
'pytest-astropy-header==0.2.2',
'pytest-astropy==0.10.0',
'pytest-cov==4.1.0',
'pytest-doctestplus==1.0.0',
'pytest-filter-subpackage==0.1.2',
'pytest-mock==3.11.1',
'pytest-openfiles==0.5.0',
'pytest-remotedata==0.4.0',
'pytest-xdist==3.3.1',
'pytest==7.4.0',
'PyYAML==6.0.1',
'setuptools==68.0.0',
'sortedcontainers==2.4.0',
'tomli==2.0.1',
],
}
for k in ['0.1', '0.2', '0.3', '0.4', '1.1', '1.2', '1.3', '3.0', '3.1', '3.2']
+ ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']
}
for k in ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']:
MAP_VERSION_TO_INSTALL_ASTROPY[k]['pre_install'] = [
'sed -i \'s/requires = \\["setuptools",/requires = \\["setuptools==68.0.0",/\' pyproject.toml'
]
MAP_VERSION_TO_INSTALL_SYMPY = {
k: {
'python': '3.9',
'packages': 'mpmath flake8',
'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'],
'install': 'python -m pip install -e .',
}
for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6']
+ ['1.7', '1.8', '1.9']
}
MAP_VERSION_TO_INSTALL_SYMPY.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': ['mpmath==1.3.0'],
}
for k in ['1.13']
}
)
MAP_VERSION_TO_INSTALL_PYLINT = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'2.10',
'2.11',
'2.13',
'2.14',
'2.15',
'2.16',
'2.17',
'2.8',
'2.9',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pip_packages'] = ['pyenchant==3.2']
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pre_install'] = [
'apt-get update && apt-get install -y libenchant-2-dev hunspell-en-us'
]
MAP_VERSION_TO_INSTALL_PYLINT.update(
{
k: {
**MAP_VERSION_TO_INSTALL_PYLINT[k],
'pip_packages': ['astroid==3.0.0a6', 'setuptools'],
}
for k in ['3.0']
}
)
MAP_VERSION_TO_INSTALL_XARRAY = {
k: {
'python': '3.10',
'packages': 'environment.yml',
'install': 'python -m pip install -e .',
'pip_packages': [
'numpy==1.23.0',
'packaging==23.1',
'pandas==1.5.3',
'pytest==7.4.0',
'python-dateutil==2.8.2',
'pytz==2023.3',
'six==1.16.0',
'scipy==1.11.1',
'setuptools==68.0.0',
],
'no_use_env': True,
}
for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09']
}
MAP_VERSION_TO_INSTALL_SQLFLUFF = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'0.10',
'0.11',
'0.12',
'0.13',
'0.4',
'0.5',
'0.6',
'0.8',
'0.9',
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'2.0',
'2.1',
'2.2',
]
}
MAP_VERSION_TO_INSTALL_DBT_CORE = {
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
}
for k in [
'0.13',
'0.14',
'0.15',
'0.16',
'0.17',
'0.18',
'0.19',
'0.20',
'0.21',
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'1.5',
'1.6',
'1.7',
]
}
MAP_VERSION_TO_INSTALL_PYVISTA = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in ['0.20', '0.21', '0.22', '0.23']
}
MAP_VERSION_TO_INSTALL_PYVISTA.update(
{
k: {
'python': '3.9',
'packages': 'requirements.txt',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in [
'0.24',
'0.25',
'0.26',
'0.27',
'0.28',
'0.29',
'0.30',
'0.31',
'0.32',
'0.33',
'0.34',
'0.35',
'0.36',
'0.37',
'0.38',
'0.39',
'0.40',
'0.41',
'0.42',
'0.43',
]
}
)
MAP_VERSION_TO_INSTALL_ASTROID = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .',
'pip_packages': ['pytest'],
}
for k in [
'2.10',
'2.12',
'2.13',
'2.14',
'2.15',
'2.16',
'2.5',
'2.6',
'2.7',
'2.8',
'2.9',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_MARSHMALLOW = {
k: {
'python': '3.9',
'install': "python -m pip install -e '.[dev]'",
}
for k in [
'2.18',
'2.19',
'2.20',
'3.0',
'3.1',
'3.10',
'3.11',
'3.12',
'3.13',
'3.15',
'3.16',
'3.19',
'3.2',
'3.4',
'3.8',
'3.9',
]
}
MAP_VERSION_TO_INSTALL_PVLIB = {
k: {
'python': '3.9',
'install': 'python -m pip install -e .[all]',
'packages': 'pandas scipy',
'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'],
}
for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
}
MAP_VERSION_TO_INSTALL_PYDICOM = {
k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy'}
for k in [
'1.0',
'1.1',
'1.2',
'1.3',
'1.4',
'2.0',
'2.1',
'2.2',
'2.3',
'2.4',
'3.0',
]
}
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.8'} for k in ['1.4', '2.0']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.9'} for k in ['2.1', '2.2']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.10'} for k in ['2.3']}
)
MAP_VERSION_TO_INSTALL_PYDICOM.update(
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.11'} for k in ['2.4', '3.0']}
)
MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {'python': '3.9'} for k in ['1.0']}
MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX = {
k: {'python': '3.10', 'packages': 'pytest'} for k in ['0.0.1']
}
# Constants - Task Instance Instllation Environment
MAP_VERSION_TO_INSTALL = {
'astropy/astropy': MAP_VERSION_TO_INSTALL_ASTROPY,
'dbt-labs/dbt-core': MAP_VERSION_TO_INSTALL_DBT_CORE,
'django/django': MAP_VERSION_TO_INSTALL_DJANGO,
'matplotlib/matplotlib': MAP_VERSION_TO_INSTALL_MATPLOTLIB,
'marshmallow-code/marshmallow': MAP_VERSION_TO_INSTALL_MARSHMALLOW,
'mwaskom/seaborn': MAP_VERSION_TO_INSTALL_SEABORN,
'pallets/flask': MAP_VERSION_TO_INSTALL_FLASK,
'psf/requests': MAP_VERSION_TO_INSTALL_REQUESTS,
'pvlib/pvlib-python': MAP_VERSION_TO_INSTALL_PVLIB,
'pydata/xarray': MAP_VERSION_TO_INSTALL_XARRAY,
'pydicom/pydicom': MAP_VERSION_TO_INSTALL_PYDICOM,
'pylint-dev/astroid': MAP_VERSION_TO_INSTALL_ASTROID,
'pylint-dev/pylint': MAP_VERSION_TO_INSTALL_PYLINT,
'pytest-dev/pytest': MAP_VERSION_TO_INSTALL_PYTEST,
'pyvista/pyvista': MAP_VERSION_TO_INSTALL_PYVISTA,
'scikit-learn/scikit-learn': MAP_VERSION_TO_INSTALL_SKLEARN,
'sphinx-doc/sphinx': MAP_VERSION_TO_INSTALL_SPHINX,
'sqlfluff/sqlfluff': MAP_VERSION_TO_INSTALL_SQLFLUFF,
'swe-bench/humaneval': MAP_VERSION_TO_INSTALL_HUMANEVAL,
'nielstron/humaneval_fix': MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX,
'sympy/sympy': MAP_VERSION_TO_INSTALL_SYMPY,
}
# Constants - Repository Specific Installation Instructions
MAP_REPO_TO_INSTALL = {}
# Constants - Task Instance Test Frameworks
TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long -p no:cacheprovider'
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
'astropy/astropy': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROPY.keys()
},
'django/django': {
k: './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
for k in MAP_VERSION_TO_INSTALL_DJANGO.keys()
},
'marshmallow-code/marshmallow': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MARSHMALLOW.keys()
},
'matplotlib/matplotlib': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MATPLOTLIB.keys()
},
'mwaskom/seaborn': {
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_SEABORN.keys()
},
'pallets/flask': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_FLASK.keys()
},
'psf/requests': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_REQUESTS.keys()
},
'pvlib/pvlib-python': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PVLIB.keys()
},
'pydata/xarray': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_XARRAY.keys()
},
'pydicom/pydicom': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYDICOM.keys()
},
'pylint-dev/astroid': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROID.keys()
},
'pylint-dev/pylint': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYLINT.keys()
},
'pytest-dev/pytest': {
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_PYTEST.keys()
},
'pyvista/pyvista': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYVISTA.keys()
},
'scikit-learn/scikit-learn': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SKLEARN.keys()
},
'sphinx-doc/sphinx': {
k: 'tox -epy39 -v --' for k in MAP_VERSION_TO_INSTALL_SPHINX.keys()
},
'sqlfluff/sqlfluff': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SQLFLUFF.keys()
},
'swe-bench/humaneval': {
k: 'python' for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
},
'nielstron/humaneval_fix': {
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
},
'sympy/sympy': {
k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
},
}
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
'./tests/runtests.py --verbosity 2'
)
-978
View File
@@ -1,978 +0,0 @@
import asyncio
import copy
import json
import os
import tempfile
from typing import Any, Literal
import pandas as pd
import toml
from datasets import load_dataset
import openhands.agenthub
from evaluation.benchmarks.swe_perf.binary_patch_utils import (
remove_binary_diffs,
remove_binary_files_from_git,
)
from evaluation.benchmarks.swe_perf.resource.mapping import (
get_instance_resource_factor,
)
from evaluation.benchmarks.swe_perf.resource.swt_bench_constants import (
MAP_REPO_TO_INSTALL,
MAP_VERSION_TO_INSTALL,
)
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
assert_and_raise,
check_maximum_retries_exceeded,
codeact_user_response,
get_default_sandbox_config_for_eval,
get_metrics,
is_fatal_evaluation_error,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
update_llm_config_for_completions_logging,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AgentConfig,
OpenHandsConfig,
get_evaluation_parser,
get_llm_config_arg,
)
from openhands.core.config.condenser_config import NoOpCondenserConfig
from openhands.core.config.utils import get_condenser_config_arg
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.critic import AgentFinishedCritic
from openhands.events.action import CmdRunAction, FileReadAction, MessageAction
from openhands.events.observation import (
CmdOutputObservation,
ErrorObservation,
FileReadObservation,
)
from openhands.events.serialization.event import event_from_dict, event_to_dict
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.utils.shutdown_listener import sleep_if_should_continue
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
ENABLE_LLM_EDITOR = os.environ.get('ENABLE_LLM_EDITOR', 'false').lower() == 'true'
BenchMode = Literal['swe', 'swt', 'swt-ci']
# Global variable to track dataset type
DATASET_TYPE = 'SWE-Perf'
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
}
def _get_sweperf_workspace_dir_name(instance: pd.Series) -> str:
return f'{instance.repo}__{instance.version}'.replace('/', '__')
def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
workspace_dir_name = _get_sweperf_workspace_dir_name(instance)
# The instruction
instruction = f"""
<uploaded_files>
/workspace/{workspace_dir_name}
</uploaded_files>
I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:
<issue_description>
{instance.problem_statement_realistic}
</issue_description>
Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?
I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
Your task is to make the minimal changes to non-test files in the /workspace/{workspace_dir_name} directory to ensure the <issue_description> is satisfied.
Follow these phases to resolve the issue:
## ⚙️ Phase 1: Understand the Problem & Test Reuse
**1.1. Install the package locally:**
```bash
python -m pip install pyinstrument
python -m pip install -e .
```
> Only proceed to README-based install if the above fails.
**1.2. Identify relevant modules and logic:**
* Use test cases mentioned in `<issue_description>` to locate the functions and files involved.
* Focus on potential performance bottlenecks: loops, I/O, locks, cache access, data structures, etc.
**1.3. Run initial benchmark:**
```bash
pytest -rA --durations=0 --disable-warnings -p no:warnings --tb=no <test_case>
```
## 📊 Phase 2: Localization (Hierarchical Bottleneck Detection)
**2.1. Global profiling using `pyinstrument`:**
```bash
pyinstrument -m pytest -rA --durations=0 --disable-warnings --tb=no --continue-on-collection-errors -p no:warnings <test_case>
```
**2.2. Analyze performance stack if necessary:**
* 🔍 **Module level**: Identify hot files and methods.
* 🔬 **Function level**: Focus on top-consuming classes/functions.
* 🧬 **Line level**: Add fine-grained sampling/logging if needed.
**2.3. Output a layered summary** showing where time is spent and why.
## 🧠 Phase 3: Repair (Design Candidate Fixes)
**3.1. Propose multiple optimization ideas:**
* Algorithm refinement
* Data structure improvement
* Parallelism / async
* Caching / batching
**3.2. For each candidate:**
* Describe the idea using pseudocode or `diff`
* Evaluate expected gain vs implementation complexity
---
## 🔬 Phase 4: Patch Validation (Quantitative Benchmarking)
**4.1. Apply each patch separately**
**4.2. Re-run benchmark using the same test case**
**4.3. Record improvements in pytest `call` phase**
**4.4. Build a comparison table:**
| Patch | Call Time (s) | % Improvement | Notes |
| ----- | ------------- | ------------- | ----- |
**4.5. Select the best-performing, low-risk patch**
---
## 📌 Phase 5: Fix Analysis (Explain the Fix)
**5.1.** Clearly describe the **problem**
**5.2.** Identify the **exact location** in the code
**5.3.** Describe **how the test reveals** the problem
**5.4.** State **best practices** relevant to the fix
**5.5.** Explain **how the fix resolves** the issue
---
## 🧩 Phase 6: Fix Implementation
**6.1.** Implement the selected fix in the non-test source files
**6.2.** Ensure the changes are minimal and performance-focused
---
## ✅ Phase 7: Final Verification & Review
**7.1.** Run **full regression tests** to ensure correctness
**7.2.** Re-benchmark performance and compare with base commit `{instance['base_commit']}`
**7.3.** Ensure `call` time is significantly improved in `pytest` output
**7.4.** If gain is limited, consider deeper optimization
**7.5.** Write a concise final report including:
* Bottleneck path
* Fix alternatives considered
* Final solution and metrics
* Code diff summary
## Important ##
1. Please do not optimize specifically for certain test cases, as this may be considered cheating.
2. Delete all files unrelated to optimization, including temporary files, auxiliary test code, and so on.
3. Avoid excessive optimization and unnecessary divergence; if the improvement is not significant, stop promptly to maintain efficiency and focus.
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
"""
if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
if 'image_assets' in instance:
assets = json.loads(instance['image_assets'])
assert 'problem_statement' in assets, (
'problem_statement is required in image_assets'
)
image_urls = assets['problem_statement']
return MessageAction(content=instruction, image_urls=image_urls)
return MessageAction(content=instruction)
def get_instance_docker_image(
instance_id: str,
) -> str:
docker_image_prefix = 'docker.io/betty1202/'
image_name = 'sweb.eval.x86_64.' + instance_id
image_name = image_name.replace(
'__', '_s_'
) # to comply with docker image naming convention
return (docker_image_prefix.rstrip('/') + '/' + image_name).lower()
def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> OpenHandsConfig:
base_container_image = get_instance_docker_image(
instance['instance_id'],
)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
sandbox_config.enable_auto_lint = True
sandbox_config.use_host_network = False
# Add platform to the sandbox config to solve issue 4401
sandbox_config.platform = 'linux/amd64'
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
)
config = OpenHandsConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
enable_browser=RUN_WITH_BROWSING,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
config.set_llm_config(
update_llm_config_for_completions_logging(
metadata.llm_config, metadata.eval_output_dir, instance['instance_id']
)
)
# get 'draft_editor' config if exists
config.set_llm_config(get_llm_config_arg('draft_editor'), 'draft_editor')
agent_config = AgentConfig(
enable_jupyter=False,
enable_browsing=RUN_WITH_BROWSING,
enable_llm_editor=ENABLE_LLM_EDITOR,
enable_mcp=False,
condenser=metadata.condenser_config,
enable_prompt_extensions=False,
)
config.set_agent_config(agent_config)
return config
def initialize_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required
metadata: EvalMetadata,
):
"""Initialize the runtime for the agent.
This function is called before the runtime is used to run the agent.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Initialization Fn')
logger.info('-' * 30)
workspace_dir_name = _get_sweperf_workspace_dir_name(instance)
obs: CmdOutputObservation
# Set instance id and git configuration
action = CmdRunAction(
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && git config --global core.pager "" && git config --global diff.binary false"""
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to export SWE_INSTANCE_ID and configure git: {str(obs)}',
)
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
# inject the init script
script_dir = os.path.dirname(__file__)
# inject the instance info
action = CmdRunAction(command='mkdir -p /swe_util/eval_data/instances')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
)
swe_instance_json_name = 'swe-perf-instance.json'
with tempfile.TemporaryDirectory() as temp_dir:
# Construct the full path for the desired file name within the temporary directory
temp_file_path = os.path.join(temp_dir, swe_instance_json_name)
# Write to the file with the desired name within the temporary directory
with open(temp_file_path, 'w') as f:
if not isinstance(instance, dict):
json.dump([instance.to_dict()], f)
else:
json.dump([instance], f)
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/swe_util/eval_data/instances/')
# inject the instance swe entry
entry_script_path = 'instance_swe_entry.sh'
runtime.copy_to(
str(os.path.join(script_dir, f'scripts/setup/{entry_script_path}')),
'/swe_util/',
)
action = CmdRunAction(command='cat ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source ~/.bashrc')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if isinstance(obs, ErrorObservation):
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
action = CmdRunAction(command=f'source /swe_util/{entry_script_path}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/{entry_script_path}: {str(obs)}',
)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git reset --hard')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
action = CmdRunAction(
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
if metadata.details['mode'] == 'swt-ci':
# set up repo
setup_commands = []
if instance['repo'] in MAP_REPO_TO_INSTALL:
setup_commands.append(MAP_REPO_TO_INSTALL[instance['repo']])
# Run pre-install set up if provided
install = MAP_VERSION_TO_INSTALL.get(instance['repo'], {}).get(
instance['version'], []
)
if 'pre_install' in install:
for pre_install in install['pre_install']:
setup_commands.append(pre_install)
if 'install' in install:
setup_commands.append(install['install'])
for command in setup_commands:
action = CmdRunAction(command=command)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
action = CmdRunAction(command='which python')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0 and 'testbed' in obs.content,
f'Expected to find python interpreter from testbed, but got: {str(obs)}',
)
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
logger.info('-' * 30)
def complete_runtime(
runtime: Runtime,
instance: pd.Series, # this argument is not required, but it is used to get the workspace_dir_name
) -> dict[str, Any]:
"""Complete the runtime for the agent.
This function is called before the runtime is used to run the agent.
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info('-' * 30)
logger.info('BEGIN Runtime Completion Fn')
logger.info('-' * 30)
obs: CmdOutputObservation
workspace_dir_name = _get_sweperf_workspace_dir_name(instance)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if obs.exit_code == -1:
# The previous command is still running
# We need to kill previous command
logger.info('The previous command is still running, trying to kill it...')
action = CmdRunAction(command='C-c')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Then run the command again
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if obs.exit_code == -1:
# The previous command is still running
# We need to kill previous command
logger.info('The previous command is still running, trying to ctrl+z it...')
action = CmdRunAction(command='C-z')
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
# Then run the command again
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git config --global core.pager ""')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git config --global core.pager "": {str(obs)}',
)
# First check for any git repositories in subdirectories
action = CmdRunAction(command='find . -type d -name .git -not -path "./.git"')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to find git repositories: {str(obs)}',
)
git_dirs = [p for p in obs.content.strip().split('\n') if p]
if git_dirs:
# Remove all .git directories in subdirectories
for git_dir in git_dirs:
action = CmdRunAction(command=f'rm -rf "{git_dir}"')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to remove git directory {git_dir}: {str(obs)}',
)
# add all files
action = CmdRunAction(command='git add -A')
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to git add -A: {str(obs)}',
)
# Remove binary files from git staging
action = CmdRunAction(command=remove_binary_files_from_git())
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
f'Failed to remove binary files: {str(obs)}',
)
n_retries = 0
git_patch = None
while n_retries < 5:
action = CmdRunAction(
command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff'
)
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
n_retries += 1
if isinstance(obs, CmdOutputObservation):
if obs.exit_code == 0:
# Read the patch file
action = FileReadAction(path='patch.diff')
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if isinstance(obs, FileReadObservation):
git_patch = obs.content
break
elif isinstance(obs, ErrorObservation):
# Fall back to cat "patch.diff" to get the patch
assert 'File could not be decoded as utf-8' in obs.content
action = CmdRunAction(command='cat patch.diff')
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation) and obs.exit_code == 0
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
git_patch = obs.content
break
else:
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
else:
logger.info('Failed to get git diff, retrying...')
sleep_if_should_continue(10)
elif isinstance(obs, ErrorObservation):
logger.error(f'Error occurred: {obs.content}. Retrying...')
sleep_if_should_continue(10)
else:
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
# Remove binary diffs from the patch
git_patch = remove_binary_diffs(git_patch)
logger.info('-' * 30)
logger.info('END Runtime Completion Fn')
logger.info('-' * 30)
return {'git_patch': git_patch}
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
runtime_failure_count: int = 0,
) -> EvalOutput:
config = get_config(instance, metadata)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
# Increase resource_factor with increasing attempt_id
if runtime_failure_count > 0:
config.sandbox.remote_runtime_resource_factor = min(
config.sandbox.remote_runtime_resource_factor * (2**runtime_failure_count),
8,
)
logger.warning(
f'This is the {runtime_failure_count + 1}th attempt for instance {instance.instance_id}, setting resource factor to {config.sandbox.remote_runtime_resource_factor}'
)
metadata = copy.deepcopy(metadata)
metadata.details['runtime_failure_count'] = runtime_failure_count
metadata.details['remote_runtime_resource_factor'] = (
config.sandbox.remote_runtime_resource_factor
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance, metadata)
message_action = get_instruction(instance, metadata)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=message_action,
runtime=runtime,
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
metadata.agent_class
],
)
)
# if fatal error, throw EvalError to trigger re-run
if is_fatal_evaluation_error(state.last_error):
raise EvalException('Fatal error detected: ' + state.last_error)
# Get git patch
complete_runtime_fn = complete_runtime
return_val = complete_runtime_fn(runtime, instance)
git_patch = return_val['git_patch']
logger.info(
f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
)
finally:
runtime.close()
# ==========================================
# ======= Attempt to evaluate the agent's edits =======
# we use eval_infer.sh to evaluate the agent's edits, not here
# because the agent may alter the environment / testcases
test_result = {
'git_patch': git_patch,
}
# If you are working on some simpler benchmark that only evaluates the final model output (e.g., in a MessageAction)
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
if state is None:
raise ValueError('State should not be None.')
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
histories = [event_to_dict(event) for event in state.history]
metrics = get_metrics(state)
# Save the output
instruction = message_action.content
if message_action.image_urls:
instruction += (
'\n\n<image_urls>' + '\n'.join(message_action.image_urls) + '</image_urls>'
)
output = EvalOutput(
instance_id=instance.instance_id,
instruction=instruction,
instance=instance.to_dict(), # SWE Bench specific
test_result=test_result,
metadata=metadata,
history=histories,
metrics=metrics,
error=state.last_error if state and state.last_error else None,
)
return output
def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.toml')
if os.path.exists(file_path):
with open(file_path, 'r') as file:
data = toml.load(file)
if 'selected_ids' in data:
selected_ids = data['selected_ids']
logger.info(
f'Filtering {len(selected_ids)} tasks from "selected_ids"...'
)
subset = dataset[dataset[filter_column].isin(selected_ids)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
if 'selected_repos' in data:
selected_repos = data['selected_repos']
if isinstance(selected_repos, str):
selected_repos = [selected_repos]
assert isinstance(selected_repos, list)
logger.info(
f'Filtering {selected_repos} tasks from "selected_repos"...'
)
subset = dataset[dataset['repo'].isin(selected_repos)]
logger.info(f'Retained {subset.shape[0]} tasks after filtering')
return subset
skip_ids = os.environ.get('SKIP_IDS', '').split(',')
if len(skip_ids) > 0:
logger.info(f'Filtering {len(skip_ids)} tasks from "SKIP_IDS"...')
return dataset[~dataset[filter_column].isin(skip_ids)]
return dataset
if __name__ == '__main__':
parser = get_evaluation_parser()
parser.add_argument(
'--dataset',
type=str,
default='SWE-Perf/SWE-Perf',
help='data set to evaluate on, either full-test or lite-test',
)
parser.add_argument(
'--split',
type=str,
default='test',
help='split to evaluate on',
)
parser.add_argument(
'--mode',
type=str,
default='swe',
choices=['swe', 'swt', 'swt-ci'],
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
)
args, _ = parser.parse_known_args()
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
# so we don't need to manage file uploading to OpenHands's repo
dataset = load_dataset(args.dataset, split=args.split)
swe_perf_tests = filter_dataset(dataset.to_pandas(), 'instance_id')
logger.info(
f'Loaded dataset {args.dataset} with split {args.split}: {len(swe_perf_tests)} tasks'
)
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
llm_config.log_completions = True
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
llm_config.modify_params = False
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
# Get condenser config from environment variable
condenser_name = os.environ.get('EVAL_CONDENSER')
if condenser_name:
condenser_config = get_condenser_config_arg(condenser_name)
if condenser_config is None:
raise ValueError(
f'Could not find Condenser config: EVAL_CONDENSER={condenser_name}'
)
else:
# If no specific condenser config is provided via env var, default to NoOpCondenser
condenser_config = NoOpCondenserConfig()
logger.debug(
'No Condenser config provided via EVAL_CONDENSER, using NoOpCondenser.'
)
details = {'mode': args.mode}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
dataset_descrption = (
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
)
metadata = make_metadata(
llm_config,
dataset_descrption,
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
details=details,
condenser_config=condenser_config,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
print(f'### OUTPUT FILE: {output_file} ###')
# Run evaluation in iterative mode:
# If a rollout fails to output AgentFinishAction, we will try again until it succeeds OR total 3 attempts have been made.
ITERATIVE_EVAL_MODE = (
os.environ.get('ITERATIVE_EVAL_MODE', 'false').lower() == 'true'
)
ITERATIVE_EVAL_MODE_MAX_ATTEMPTS = int(
os.environ.get('ITERATIVE_EVAL_MODE_MAX_ATTEMPTS', '3')
)
if not ITERATIVE_EVAL_MODE:
# load the dataset
instances = prepare_dataset(swe_perf_tests, output_file, args.eval_n_limit)
run_evaluation(
instances,
metadata,
output_file,
args.eval_num_workers,
process_instance,
timeout_seconds=8
* 60
* 60, # 8 hour PER instance should be more than enough
max_retries=5,
)
else:
critic = AgentFinishedCritic()
def get_cur_output_file_path(attempt: int) -> str:
return (
f'{output_file.removesuffix(".jsonl")}.critic_attempt_{attempt}.jsonl'
)
eval_ids = None
for attempt in range(1, ITERATIVE_EVAL_MODE_MAX_ATTEMPTS + 1):
cur_output_file = get_cur_output_file_path(attempt)
logger.info(
f'Running evaluation with critic {critic.__class__.__name__} for attempt {attempt} of {ITERATIVE_EVAL_MODE_MAX_ATTEMPTS}.'
)
# For deterministic eval, we set temperature to 0.1 for (>1) attempt
# so hopefully we get slightly different results
if attempt > 1 and metadata.llm_config.temperature == 0:
logger.info(
f'Detected temperature is 0 for (>1) attempt {attempt}. Setting temperature to 0.1...'
)
metadata.llm_config.temperature = 0.1
# Load instances - at first attempt, we evaluate all instances
# On subsequent attempts, we only evaluate the instances that failed the previous attempt determined by critic
instances = prepare_dataset(
swe_perf_tests, cur_output_file, args.eval_n_limit, eval_ids=eval_ids
)
# Run evaluation - but save them to cur_output_file
logger.info(
f'Evaluating {len(instances)} instances for attempt {attempt}...'
)
run_evaluation(
instances,
metadata,
cur_output_file,
args.eval_num_workers,
process_instance,
timeout_seconds=8
* 60
* 60, # 8 hour PER instance should be more than enough
max_retries=5,
)
# When eval is done, we update eval_ids to the instances that failed the current attempt
instances_failed = []
logger.info(
f'Use critic {critic.__class__.__name__} to check {len(instances)} instances for attempt {attempt}...'
)
with open(cur_output_file, 'r') as f:
for line in f:
instance = json.loads(line)
try:
history = [
event_from_dict(event) for event in instance['history']
]
critic_result = critic.evaluate(
history, instance['test_result'].get('git_patch', '')
)
if not critic_result.success:
instances_failed.append(instance['instance_id'])
except Exception as e:
logger.error(
f'Error loading history for instance {instance["instance_id"]}: {e}'
)
instances_failed.append(instance['instance_id'])
logger.info(
f'{len(instances_failed)} instances failed the current attempt {attempt}: {instances_failed}'
)
eval_ids = instances_failed
# If no instances failed, we break
if len(instances_failed) == 0:
break
# Then we should aggregate the results from all attempts into the original output file
# and remove the intermediate files
logger.info(
'Aggregating results from all attempts into the original output file...'
)
fout = open(output_file, 'w')
added_instance_ids = set()
for attempt in reversed(range(1, ITERATIVE_EVAL_MODE_MAX_ATTEMPTS + 1)):
cur_output_file = get_cur_output_file_path(attempt)
if not os.path.exists(cur_output_file):
logger.warning(
f'Intermediate output file {cur_output_file} does not exist. Skipping...'
)
continue
with open(cur_output_file, 'r') as f:
for line in f:
instance = json.loads(line)
# Also make sure git_patch is not empty - otherwise we fall back to previous attempt (empty patch is worse than anything else)
if (
instance['instance_id'] not in added_instance_ids
and instance['test_result'].get('git_patch', '').strip()
):
fout.write(line)
added_instance_ids.add(instance['instance_id'])
logger.info(
f'Aggregated instances from {cur_output_file}. Total instances added so far: {len(added_instance_ids)}'
)
fout.close()
logger.info(
f'Done! Total {len(added_instance_ids)} instances added to {output_file}'
)
# Check if any instances reached maximum retries
check_maximum_retries_exceeded(metadata.eval_output_dir)
@@ -1,146 +0,0 @@
#!/usr/bin/env bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
COMMIT_HASH=$2
AGENT=$3
EVAL_LIMIT=$4
MAX_ITER=$5
NUM_WORKERS=$6
DATASET=$7
SPLIT=$8
N_RUNS=$9
MODE=${10}
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
fi
checkout_eval_branch
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
fi
if [ -z "$RUN_WITH_BROWSING" ]; then
echo "RUN_WITH_BROWSING not specified, use default false"
RUN_WITH_BROWSING=false
fi
if [ -z "$DATASET" ]; then
echo "DATASET not specified, use default SWE-Perf/SWE-Perf"
DATASET="SWE-Perf/SWE-Perf"
fi
if [ -z "$SPLIT" ]; then
echo "SPLIT not specified, use default test"
SPLIT="test"
fi
if [ -z "$MODE" ]; then
MODE="swe"
echo "MODE not specified, use default $MODE"
fi
if [ -n "$EVAL_CONDENSER" ]; then
echo "Using Condenser Config: $EVAL_CONDENSER"
else
echo "No Condenser Config provided via EVAL_CONDENSER, use default (NoOpCondenser)."
fi
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
get_openhands_version
echo "AGENT: $AGENT"
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
echo "DATASET: $DATASET"
echo "SPLIT: $SPLIT"
echo "MAX_ITER: $MAX_ITER"
echo "NUM_WORKERS: $NUM_WORKERS"
echo "COMMIT_HASH: $COMMIT_HASH"
echo "MODE: $MODE"
echo "EVAL_CONDENSER: $EVAL_CONDENSER"
# Default to NOT use Hint
if [ -z "$USE_HINT_TEXT" ]; then
export USE_HINT_TEXT=false
fi
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
EVAL_NOTE="$OPENHANDS_VERSION"
# if not using Hint, add -no-hint to the eval note
if [ "$USE_HINT_TEXT" = false ]; then
EVAL_NOTE="$EVAL_NOTE-no-hint"
fi
if [ "$RUN_WITH_BROWSING" = true ]; then
EVAL_NOTE="$EVAL_NOTE-with-browsing"
fi
if [ -n "$EXP_NAME" ]; then
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
fi
# if mode != swe, add mode to the eval note
if [ "$MODE" != "swe" ]; then
EVAL_NOTE="${EVAL_NOTE}-${MODE}"
fi
# Add condenser config to eval note if provided
if [ -n "$EVAL_CONDENSER" ]; then
EVAL_NOTE="${EVAL_NOTE}-${EVAL_CONDENSER}"
fi
function run_eval() {
local eval_note="${1}"
COMMAND="poetry run python evaluation/benchmarks/swe_perf/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations $MAX_ITER \
--eval-num-workers $NUM_WORKERS \
--eval-note $eval_note \
--dataset $DATASET \
--split $SPLIT \
--mode $MODE"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
# Run the command
eval $COMMAND
}
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
if [ -z "$N_RUNS" ]; then
N_RUNS=1
echo "N_RUNS not specified, use default $N_RUNS"
fi
# Skip runs if the run number is in the SKIP_RUNS list
# read from env variable SKIP_RUNS as a comma separated list of run numbers
SKIP_RUNS=(${SKIP_RUNS//,/ })
for i in $(seq 1 $N_RUNS); do
if [[ " ${SKIP_RUNS[@]} " =~ " $i " ]]; then
echo "Skipping run $i"
continue
fi
current_eval_note="$EVAL_NOTE-run_$i"
echo "EVAL_NOTE: $current_eval_note"
run_eval $current_eval_note
done
checkout_original_branch
@@ -1,54 +0,0 @@
"""This script compares gold patches with OpenHands-generated patches and check whether
OpenHands found the right (set of) files to modify.
"""
import argparse
import json
import re
def extract_modified_files(patch):
modified_files = set()
file_pattern = re.compile(r'^diff --git a/(.*?) b/')
for line in patch.split('\n'):
match = file_pattern.match(line)
if match:
modified_files.add(match.group(1))
return modified_files
def process_report(oh_output_file):
succ = 0
fail = 0
for line in open(oh_output_file):
line = json.loads(line)
instance_id = line['instance_id']
gold_patch = line['swe_instance']['patch']
generated_patch = line['git_patch']
gold_modified_files = extract_modified_files(gold_patch)
# swe-bench lite only: a gold patch always contains exactly one file
assert len(gold_modified_files) == 1
generated_modified_files = extract_modified_files(generated_patch)
# Check if all files in gold_patch are also in generated_patch
all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
if all_files_in_generated:
succ += 1
else:
fail += 1
print(
f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
)
print(
f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--oh_output_file', help='Path to the OH output file')
args = parser.parse_args()
process_report(args.oh_output_file)
@@ -1,43 +0,0 @@
#!/usr/bin/env bash
source ~/.bashrc
SWEUTIL_DIR=/swe_util
# FIXME: Cannot read SWE_INSTANCE_ID from the environment variable
# SWE_INSTANCE_ID=django__django-11099
if [ -z "$SWE_INSTANCE_ID" ]; then
echo "Error: SWE_INSTANCE_ID is not set." >&2
exit 1
fi
# Read the swe-bench-test-lite.json file and extract the required item based on instance_id
item=$(jq --arg INSTANCE_ID "$SWE_INSTANCE_ID" '.[] | select(.instance_id == $INSTANCE_ID)' $SWEUTIL_DIR/eval_data/instances/swe-bench-instance.json)
if [[ -z "$item" ]]; then
echo "No item found for the provided instance ID."
exit 1
fi
WORKSPACE_NAME=$(echo "$item" | jq -r '(.repo | tostring) + "__" + (.version | tostring) | gsub("/"; "__")')
echo "WORKSPACE_NAME: $WORKSPACE_NAME"
# Clear the workspace
if [ -d /workspace ]; then
rm -rf /workspace/*
else
mkdir /workspace
fi
# Copy repo to workspace
if [ -d /workspace/$WORKSPACE_NAME ]; then
rm -rf /workspace/$WORKSPACE_NAME
fi
mkdir -p /workspace
cp -r /testbed /workspace/$WORKSPACE_NAME
# Activate instance-specific environment
if [ -d /opt/miniconda3 ]; then
. /opt/miniconda3/etc/profile.d/conda.sh
conda activate testbed
fi
+12 -20
View File
@@ -13,8 +13,7 @@ vi.mock("react-router", async () => {
vi.mock("#/context/conversation-context", () => ({
useConversation: () => ({ conversationId: "test-conversation-id" }),
ConversationProvider: ({ children }: { children: React.ReactNode }) =>
children,
ConversationProvider: ({ children }: { children: React.ReactNode }) => children,
}));
vi.mock("react-i18next", async () => {
@@ -30,18 +29,21 @@ vi.mock("react-i18next", async () => {
};
});
// Mock Zustand browser store
// Mock redux
const mockDispatch = vi.fn();
let mockBrowserState = {
url: "https://example.com",
screenshotSrc: "",
setUrl: vi.fn(),
setScreenshotSrc: vi.fn(),
reset: vi.fn(),
};
vi.mock("#/stores/browser-store", () => ({
useBrowserStore: () => mockBrowserState,
}));
vi.mock("react-redux", async () => {
const actual = await vi.importActual("react-redux");
return {
...actual,
useDispatch: () => mockDispatch,
useSelector: () => mockBrowserState,
};
});
// Import the component after all mocks are set up
import { BrowserPanel } from "#/components/features/browser/browser";
@@ -53,9 +55,6 @@ describe("Browser", () => {
mockBrowserState = {
url: "https://example.com",
screenshotSrc: "",
setUrl: vi.fn(),
setScreenshotSrc: vi.fn(),
reset: vi.fn(),
};
});
@@ -64,9 +63,6 @@ describe("Browser", () => {
mockBrowserState = {
url: "https://example.com",
screenshotSrc: "",
setUrl: vi.fn(),
setScreenshotSrc: vi.fn(),
reset: vi.fn(),
};
render(<BrowserPanel />);
@@ -79,11 +75,7 @@ describe("Browser", () => {
// Set the mock state for this test
mockBrowserState = {
url: "https://example.com",
screenshotSrc:
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
setUrl: vi.fn(),
setScreenshotSrc: vi.fn(),
reset: vi.fn(),
screenshotSrc: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mN0uGvyHwAFCAJS091fQwAAAABJRU5ErkJggg==",
};
render(<BrowserPanel />);
@@ -12,7 +12,6 @@ import GitService from "#/api/git-service/git-service.api";
import { GitRepository } from "#/types/git";
import { RepositoryMicroagent } from "#/types/microagent-management";
import { Conversation } from "#/api/open-hands.types";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
// Mock hooks
const mockUseUserProviders = vi.fn();
@@ -56,47 +55,20 @@ describe("MicroagentManagement", () => {
]);
const renderMicroagentManagement = (config?: QueryClientConfig) =>
renderWithProviders(<RouterStub />);
// Common test data
const testRepository = {
id: "1",
full_name: "user/test-repo",
git_provider: "github" as const,
is_public: true,
owner_type: "user" as const,
pushed_at: "2021-10-01T12:00:00Z",
};
// Helper function to render with custom Zustand store state
const renderWithCustomStore = (storeOverrides: Partial<any>) => {
useMicroagentManagementStore.setState(storeOverrides);
return renderWithProviders(<RouterStub />);
};
// Helper function to render with update modal visible
const renderWithUpdateModal = (additionalState: Partial<any> = {}) => {
return renderWithCustomStore({
updateMicroagentModalVisible: true,
selectedRepository: testRepository,
...additionalState,
});
};
// Helper function to render with selected microagent
const renderWithSelectedMicroagent = (
microagent: any,
additionalState: Partial<any> = {},
) => {
return renderWithCustomStore({
selectedRepository: testRepository,
selectedMicroagentItem: {
microagent,
conversation: null,
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
selectedRepository: null,
personalRepositories: [],
organizationRepositories: [],
repositories: [],
selectedMicroagentItem: null,
learnThisRepoModalVisible: false,
},
},
...additionalState,
});
};
beforeAll(() => {
vi.mock("react-router", async (importOriginal) => ({
@@ -209,23 +181,6 @@ describe("MicroagentManagement", () => {
vi.clearAllMocks();
vi.restoreAllMocks();
// Reset Zustand store to default state
useMicroagentManagementStore.setState({
// Modal visibility states
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
learnThisRepoModalVisible: false,
// Repository states
selectedRepository: null,
personalRepositories: [],
organizationRepositories: [],
repositories: [],
// Microagent states
selectedMicroagentItem: null,
});
// Setup default hook mocks
mockUseUserProviders.mockReturnValue({
providers: ["github"],
@@ -1387,10 +1342,28 @@ describe("MicroagentManagement", () => {
});
});
it("should render modal when Zustand state is set to visible", async () => {
// Render with modal already visible in Zustand state
renderWithCustomStore({
addMicroagentModalVisible: true,
it("should render modal when Redux state is set to visible", async () => {
// Render with modal already visible in Redux state
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: null,
addMicroagentModalVisible: true, // Start with modal visible
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
updateMicroagentModalVisible: false,
learnThisRepoModalVisible: false,
},
},
});
// Check that modal is rendered
@@ -1660,16 +1633,29 @@ describe("MicroagentManagement", () => {
pr_number: null,
};
const renderMicroagentManagementMain = (selectedMicroagentItem: any) => {
// Set the store with the selected microagent item and a repository
useMicroagentManagementStore.setState({
selectedMicroagentItem,
selectedRepository: testRepository,
const renderMicroagentManagementMain = (selectedMicroagentItem: any) =>
renderWithProviders(<MicroagentManagementMain />, {
preloadedState: {
microagentManagement: {
addMicroagentModalVisible: false,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
selectedMicroagentItem,
updateMicroagentModalVisible: false,
learnThisRepoModalVisible: false,
},
},
});
return renderWithProviders(<MicroagentManagementMain />);
};
it("should render MicroagentManagementDefault when no microagent or conversation is selected", async () => {
renderMicroagentManagementMain(null);
@@ -1994,8 +1980,31 @@ describe("MicroagentManagement", () => {
});
it("should render update microagent modal when updateMicroagentModalVisible is true", async () => {
// Render with update modal visible in Zustand state
renderWithUpdateModal();
// Render with update modal visible in Redux state
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true, // Start with update modal visible
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Check that update modal is rendered
expect(screen.getByTestId("add-microagent-modal")).toBeInTheDocument();
@@ -2006,7 +2015,30 @@ describe("MicroagentManagement", () => {
it("should display update microagent title when isUpdate is true", async () => {
// Render with update modal visible and selected microagent
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Check that the update title is displayed
expect(
@@ -2016,10 +2048,28 @@ describe("MicroagentManagement", () => {
it("should populate form fields with existing microagent data when updating", async () => {
// Render with update modal visible and selected microagent
renderWithUpdateModal({
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: null,
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
@@ -2036,7 +2086,30 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
// Render with update modal visible and selected microagent
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Wait for modal to be rendered
await waitFor(() => {
@@ -2064,7 +2137,30 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
// Render with update modal visible
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Wait for modal to be rendered
await waitFor(() => {
@@ -2087,7 +2183,30 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
// Render with update modal visible
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Wait for modal to be rendered
await waitFor(() => {
@@ -2113,7 +2232,27 @@ describe("MicroagentManagement", () => {
it("should handle update modal with empty microagent data", async () => {
// Render with update modal visible but no microagent data
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: null,
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Check that update modal is still rendered
expect(screen.getByTestId("add-microagent-modal")).toBeInTheDocument();
@@ -2134,7 +2273,30 @@ describe("MicroagentManagement", () => {
});
// Render with update modal visible and microagent
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Wait for the content to be loaded and check that the form field is empty
await waitFor(() => {
@@ -2155,7 +2317,30 @@ describe("MicroagentManagement", () => {
});
// Render with update modal visible and microagent
renderWithUpdateModal();
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForUpdate,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: true,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Check that the modal is rendered correctly
expect(screen.getByTestId("add-microagent-modal")).toBeInTheDocument();
@@ -2314,7 +2499,30 @@ describe("MicroagentManagement", () => {
it("should render learn something new button in microagent view", async () => {
// Render with selected microagent
renderWithSelectedMicroagent(mockMicroagentForLearn);
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForLearn,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Check that the learn something new button is displayed
expect(
@@ -2326,7 +2534,30 @@ describe("MicroagentManagement", () => {
const user = userEvent.setup();
// Render with selected microagent
renderWithSelectedMicroagent(mockMicroagentForLearn);
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForLearn,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Find and click the learn something new button
const learnButton = screen.getByText("COMMON$LEARN_SOMETHING_NEW");
@@ -2355,7 +2586,30 @@ describe("MicroagentManagement", () => {
});
// Render with selected microagent
renderWithSelectedMicroagent(mockMicroagentForLearn);
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForLearn,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Find and click the learn something new button
const learnButton = screen.getByText("COMMON$LEARN_SOMETHING_NEW");
@@ -2387,7 +2641,30 @@ describe("MicroagentManagement", () => {
});
// Render with selected microagent
renderWithSelectedMicroagent(mockMicroagentForLearn);
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForLearn,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Find and click the learn something new button
const learnButton = screen.getByText("COMMON$LEARN_SOMETHING_NEW");
@@ -2417,7 +2694,30 @@ describe("MicroagentManagement", () => {
});
// Render with selected microagent
renderWithSelectedMicroagent(mockMicroagentForLearn);
renderWithProviders(<RouterStub />, {
preloadedState: {
microagentManagement: {
selectedMicroagentItem: {
microagent: mockMicroagentForLearn,
conversation: undefined,
},
addMicroagentModalVisible: false,
updateMicroagentModalVisible: false,
selectedRepository: {
id: "1",
full_name: "user/test-repo",
git_provider: "github",
is_public: true,
owner_type: "user",
pushed_at: "2021-10-01T12:00:00Z",
},
personalRepositories: [],
organizationRepositories: [],
repositories: [],
learnThisRepoModalVisible: false,
},
},
});
// Find and click the learn something new button
const learnButton = screen.getByText("COMMON$LEARN_SOMETHING_NEW");
@@ -342,7 +342,13 @@ describe("InteractiveChatBox", () => {
// Simulate parent component updating the value prop
rerender(
<MemoryRouter>
<InteractiveChatBox onSubmit={onSubmit} onStop={onStop} />
<InteractiveChatBox
onSubmit={onSubmit}
onStop={onStop}
isWaitingForUserInput={true}
hasSubstantiveAgentActions={true}
optimisticUserMessage={false}
/>
</MemoryRouter>,
);
@@ -2,8 +2,8 @@ import { render, screen } from "@testing-library/react";
import { Provider } from "react-redux";
import { configureStore } from "@reduxjs/toolkit";
import { JupyterEditor } from "#/components/features/jupyter/jupyter";
import { useJupyterStore } from "#/state/jupyter-store";
import { vi, describe, it, expect, beforeEach } from "vitest";
import { jupyterReducer } from "#/state/jupyter-slice";
import { vi, describe, it, expect } from "vitest";
describe("JupyterEditor", () => {
const mockStore = configureStore({
@@ -15,20 +15,19 @@ describe("JupyterEditor", () => {
code: () => ({}),
cmd: () => ({}),
agent: () => ({}),
jupyter: jupyterReducer,
securityAnalyzer: () => ({}),
status: () => ({}),
},
});
beforeEach(() => {
// Reset the Zustand store before each test
useJupyterStore.setState({
cells: Array(20).fill({
content: "Test cell content",
type: "input",
imageUrls: undefined,
}),
});
preloadedState: {
jupyter: {
cells: Array(20).fill({
content: "Test cell content",
type: "input",
output: "Test output",
}),
},
},
});
it("should have a scrollable container", () => {
@@ -37,7 +36,7 @@ describe("JupyterEditor", () => {
<div style={{ height: "100vh" }}>
<JupyterEditor maxWidth={800} />
</div>
</Provider>,
</Provider>
);
const container = screen.getByTestId("jupyter-container");
+4 -8
View File
@@ -21,12 +21,8 @@ vi.mock("#/state/command-store", () => ({
},
}));
vi.mock("#/state/jupyter-store", () => ({
useJupyterStore: {
getState: () => ({
appendJupyterInput: mockAppendJupyterInput,
}),
},
vi.mock("#/state/jupyter-slice", () => ({
appendJupyterInput: mockAppendJupyterInput,
}));
vi.mock("#/state/metrics-slice", () => ({
@@ -85,8 +81,8 @@ describe("handleActionMessage", () => {
handleActionMessage(ipythonAction);
// Check that appendJupyterInput was called with the code
expect(mockAppendJupyterInput).toHaveBeenCalledWith(
"print('Hello from Jupyter!')",
expect(mockDispatch).toHaveBeenCalledWith(
mockAppendJupyterInput("print('Hello from Jupyter!')"),
);
expect(mockAppendInput).not.toHaveBeenCalled();
});
@@ -60,7 +60,13 @@ describe("Check for hardcoded English strings", () => {
test("InteractiveChatBox should not have hardcoded English strings", () => {
const { container } = renderWithProviders(
<MemoryRouter>
<InteractiveChatBox onSubmit={() => {}} onStop={() => {}} />
<InteractiveChatBox
onSubmit={() => {}}
onStop={() => {}}
isWaitingForUserInput={false}
hasSubstantiveAgentActions={false}
optimisticUserMessage={false}
/>
</MemoryRouter>,
);
+1025 -709
View File
File diff suppressed because it is too large Load Diff
+22 -21
View File
@@ -7,38 +7,38 @@
"node": ">=22.0.0"
},
"dependencies": {
"@heroui/react": "^2.8.4",
"@heroui/react": "^2.8.3",
"@heroui/use-infinite-scroll": "^2.2.11",
"@microlink/react-json-view": "^1.26.2",
"@monaco-editor/react": "^4.7.0-rc.0",
"@react-router/node": "^7.9.1",
"@react-router/serve": "^7.9.1",
"@react-router/node": "^7.8.2",
"@react-router/serve": "^7.8.2",
"@react-types/shared": "^3.32.0",
"@reduxjs/toolkit": "^2.9.0",
"@stripe/react-stripe-js": "^4.0.2",
"@stripe/react-stripe-js": "^4.0.0",
"@stripe/stripe-js": "^7.9.0",
"@tailwindcss/postcss": "^4.1.13",
"@tailwindcss/vite": "^4.1.13",
"@tanstack/react-query": "^5.90.2",
"@tanstack/react-query": "^5.87.0",
"@uidotdev/usehooks": "^2.4.1",
"@vitejs/plugin-react": "^5.0.3",
"@vitejs/plugin-react": "^5.0.2",
"@xterm/addon-fit": "^0.10.0",
"@xterm/xterm": "^5.4.0",
"axios": "^1.12.2",
"axios": "^1.11.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"date-fns": "^4.1.0",
"downshift": "^9.0.10",
"eslint-config-airbnb-typescript": "^18.0.0",
"framer-motion": "^12.23.19",
"framer-motion": "^12.23.12",
"i18next": "^25.5.2",
"i18next-browser-languagedetector": "^8.2.0",
"i18next-http-backend": "^3.0.2",
"isbot": "^5.1.30",
"jose": "^6.1.0",
"lucide-react": "^0.544.0",
"monaco-editor": "^0.53.0",
"posthog-js": "^1.268.1",
"lucide-react": "^0.542.0",
"monaco-editor": "^0.52.2",
"posthog-js": "^1.261.7",
"react": "^19.1.1",
"react-dom": "^19.1.1",
"react-highlight": "^0.15.0",
@@ -47,7 +47,8 @@
"react-icons": "^5.5.0",
"react-markdown": "^10.1.0",
"react-redux": "^9.2.0",
"react-router": "^7.9.1",
"react-resizable-panels": "^3.0.5",
"react-router": "^7.8.2",
"react-syntax-highlighter": "^15.6.6",
"remark-breaks": "^4.0.0",
"remark-gfm": "^4.0.1",
@@ -55,7 +56,7 @@
"socket.io-client": "^4.8.1",
"tailwind-merge": "^3.3.1",
"tailwind-scrollbar": "^4.0.2",
"vite": "^7.1.7",
"vite": "^7.1.4",
"web-vitals": "^5.1.0",
"ws": "^8.18.2",
"zustand": "^5.0.8"
@@ -97,16 +98,16 @@
"@babel/traverse": "^7.28.3",
"@babel/types": "^7.28.2",
"@mswjs/socket.io-binding": "^0.2.0",
"@playwright/test": "^1.55.1",
"@react-router/dev": "^7.9.1",
"@tailwindcss/typography": "^0.5.18",
"@tanstack/eslint-plugin-query": "^5.90.1",
"@playwright/test": "^1.55.0",
"@react-router/dev": "^7.8.2",
"@tailwindcss/typography": "^0.5.16",
"@tanstack/eslint-plugin-query": "^5.86.0",
"@testing-library/dom": "^10.4.1",
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/node": "^24.5.2",
"@types/react": "^19.1.13",
"@types/node": "^24.3.1",
"@types/react": "^19.1.12",
"@types/react-dom": "^19.1.9",
"@types/react-highlight": "^0.12.8",
"@types/react-syntax-highlighter": "^15.5.13",
@@ -128,8 +129,8 @@
"eslint-plugin-react-hooks": "^4.6.2",
"eslint-plugin-unused-imports": "^4.2.0",
"husky": "^9.1.7",
"jsdom": "^27.0.0",
"lint-staged": "^16.2.0",
"jsdom": "^26.1.0",
"lint-staged": "^16.1.6",
"msw": "^2.6.6",
"prettier": "^3.6.2",
"stripe": "^18.5.0",
+1 -1
View File
@@ -7,7 +7,7 @@
* - Please do NOT modify this file.
*/
const PACKAGE_VERSION = '2.11.1'
const PACKAGE_VERSION = '2.10.5'
const INTEGRITY_CHECKSUM = 'f5825c521429caf22a4dd13b66e243af'
const IS_MOCKED_RESPONSE = Symbol('isMockedResponse')
const activeClientIds = new Set()
@@ -1,16 +1,26 @@
import { useEffect } from "react";
import { useSelector, useDispatch } from "react-redux";
import { RootState } from "#/store";
import { BrowserSnapshot } from "./browser-snapshot";
import { EmptyBrowserMessage } from "./empty-browser-message";
import { useConversationId } from "#/hooks/use-conversation-id";
import { useBrowserStore } from "#/stores/browser-store";
import {
initialState as browserInitialState,
setUrl,
setScreenshotSrc,
} from "#/state/browser-slice";
export function BrowserPanel() {
const { url, screenshotSrc, reset } = useBrowserStore();
const { url, screenshotSrc } = useSelector(
(state: RootState) => state.browser,
);
const { conversationId } = useConversationId();
const dispatch = useDispatch();
useEffect(() => {
reset();
}, [conversationId, reset]);
dispatch(setUrl(browserInitialState.url));
dispatch(setScreenshotSrc(browserInitialState.screenshotSrc));
}, [conversationId]);
const imgSrc =
screenshotSrc && screenshotSrc.startsWith("data:image/png;base64,")
@@ -1,4 +1,4 @@
import { useSelector } from "react-redux";
import { useSelector, useDispatch } from "react-redux";
import React from "react";
import posthog from "posthog-js";
import { useParams } from "react-router";
@@ -33,8 +33,7 @@ import {
import { useUploadFiles } from "#/hooks/mutation/use-upload-files";
import { useConfig } from "#/hooks/query/use-config";
import { validateFiles } from "#/utils/file-validation";
import { useConversationStore } from "#/state/conversation-store";
import ConfirmationModeEnabled from "./confirmation-mode-enabled";
import { setMessageToSend } from "#/state/conversation-slice";
function getEntryPoint(
hasRepository: boolean | null,
@@ -46,7 +45,7 @@ function getEntryPoint(
}
export function ChatInterface() {
const { setMessageToSend } = useConversationStore();
const dispatch = useDispatch();
const { getErrorMessage } = useWSErrorMessage();
const { send, isLoadingMessages, parsedEvents } = useWsClient();
const { setOptimisticUserMessage, getOptimisticUserMessage } =
@@ -141,7 +140,7 @@ export function ChatInterface() {
send(createChatMessage(prompt, imageUrls, uploadedFiles, timestamp));
setOptimisticUserMessage(content);
setMessageToSend("");
dispatch(setMessageToSend(null));
};
const handleStop = () => {
@@ -156,6 +155,10 @@ export function ChatInterface() {
setFeedbackPolarity(polarity);
};
const isWaitingForUserInput =
curAgentState === AgentState.AWAITING_USER_INPUT ||
curAgentState === AgentState.FINISHED;
// Create a ScrollProvider with the scroll hook values
const scrollProviderValue = {
scrollRef,
@@ -176,7 +179,9 @@ export function ChatInterface() {
!optimisticUserMessage &&
!userEventsExist && (
<ChatSuggestions
onSuggestionsClick={(message) => setMessageToSend(message)}
onSuggestionsClick={(message) =>
dispatch(setMessageToSend(message))
}
/>
)}
{/* Note: We only hide chat suggestions when there's a user message */}
@@ -204,20 +209,17 @@ export function ChatInterface() {
<div className="flex flex-col gap-[6px]">
<div className="flex justify-between relative">
<div className="flex items-center gap-1">
<ConfirmationModeEnabled />
{events.length > 0 && (
<TrajectoryActions
onPositiveFeedback={() =>
onClickShareFeedbackActionButton("positive")
}
onNegativeFeedback={() =>
onClickShareFeedbackActionButton("negative")
}
isSaasMode={config?.APP_MODE === "saas"}
/>
)}
</div>
{events.length > 0 && (
<TrajectoryActions
onPositiveFeedback={() =>
onClickShareFeedbackActionButton("positive")
}
onNegativeFeedback={() =>
onClickShareFeedbackActionButton("negative")
}
isSaasMode={config?.APP_MODE === "saas"}
/>
)}
<div className="absolute left-1/2 transform -translate-x-1/2 bottom-0">
{curAgentState === AgentState.RUNNING && <TypingIndicator />}
@@ -231,6 +233,9 @@ export function ChatInterface() {
<InteractiveChatBox
onSubmit={handleSendMessage}
onStop={handleStop}
isWaitingForUserInput={isWaitingForUserInput}
hasSubstantiveAgentActions={hasSubstantiveAgentActions}
optimisticUserMessage={!!optimisticUserMessage}
/>
</div>
@@ -55,7 +55,7 @@ export function ChatMessage({
onMouseEnter={() => setIsHovering(true)}
onMouseLeave={() => setIsHovering(false)}
className={cn(
"rounded-xl relative w-fit max-w-full last:mb-4",
"rounded-xl relative w-fit max-w-full",
"flex flex-col gap-2",
type === "user" && " p-4 bg-tertiary self-end",
type === "agent" && "mt-6 max-w-full bg-transparent",
@@ -1,10 +1,11 @@
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { motion, AnimatePresence } from "framer-motion";
import { Suggestions } from "#/components/features/suggestions/suggestions";
import { I18nKey } from "#/i18n/declaration";
import BuildIt from "#/icons/build-it.svg?react";
import { SUGGESTIONS } from "#/utils/suggestions";
import { useConversationStore } from "#/state/conversation-store";
import { RootState } from "#/store";
interface ChatSuggestionsProps {
onSuggestionsClick: (value: string) => void;
@@ -12,7 +13,9 @@ interface ChatSuggestionsProps {
export function ChatSuggestions({ onSuggestionsClick }: ChatSuggestionsProps) {
const { t } = useTranslation();
const { shouldHideSuggestions } = useConversationStore();
const shouldHideSuggestions = useSelector(
(state: RootState) => state.conversation.shouldHideSuggestions,
);
return (
<AnimatePresence>
@@ -1,29 +0,0 @@
import { useTranslation } from "react-i18next";
import { Tooltip } from "@heroui/react";
import { I18nKey } from "#/i18n/declaration";
import LockIcon from "#/icons/lock.svg?react";
import { useSettings } from "#/hooks/query/use-settings";
function ConfirmationModeEnabled() {
const { t } = useTranslation();
const { data: settings } = useSettings();
if (!settings?.CONFIRMATION_MODE) {
return null;
}
return (
<Tooltip
content={t(I18nKey.COMMON$CONFIRMATION_MODE_ENABLED)}
closeDelay={100}
className="bg-white text-black hover:bg-transparent"
>
<div className="flex items-center justify-center w-[26px] h-[26px] rounded-lg bg-[#25272D]">
<LockIcon width={15} height={15} />
</div>
</Tooltip>
);
}
export default ConfirmationModeEnabled;
@@ -1,5 +1,12 @@
import React, { useEffect } from "react";
import { useDispatch, useSelector } from "react-redux";
import { ConversationStatus } from "#/types/conversation-status";
import {
clearAllFiles,
setShouldHideSuggestions,
setSubmittedMessage,
} from "#/state/conversation-slice";
import { RootState } from "#/store";
import { useChatInputLogic } from "#/hooks/chat/use-chat-input-logic";
import { useFileHandling } from "#/hooks/chat/use-file-handling";
import { useGripResize } from "#/hooks/chat/use-grip-resize";
@@ -8,7 +15,6 @@ import { useChatSubmission } from "#/hooks/chat/use-chat-submission";
import { ChatInputGrip } from "./components/chat-input-grip";
import { ChatInputContainer } from "./components/chat-input-container";
import { HiddenFileInput } from "./components/hidden-file-input";
import { useConversationStore } from "#/state/conversation-store";
export interface CustomChatInputProps {
disabled?: boolean;
@@ -35,12 +41,10 @@ export function CustomChatInput({
className = "",
buttonClassName = "",
}: CustomChatInputProps) {
const {
submittedMessage,
clearAllFiles,
setShouldHideSuggestions,
setSubmittedMessage,
} = useConversationStore();
const { submittedMessage } = useSelector(
(state: RootState) => state.conversation,
);
const dispatch = useDispatch();
// Disable input when conversation is stopped
const isConversationStopped = conversationStatus === "STOPPED";
@@ -52,8 +56,8 @@ export function CustomChatInput({
return;
}
onSubmit(submittedMessage);
setSubmittedMessage(null);
}, [submittedMessage, disabled, onSubmit, setSubmittedMessage]);
dispatch(setSubmittedMessage(null));
}, [submittedMessage, disabled, onSubmit, dispatch]);
// Custom hooks
const {
@@ -108,10 +112,10 @@ export function CustomChatInput({
// Cleanup: reset suggestions visibility when component unmounts
useEffect(
() => () => {
setShouldHideSuggestions(false);
clearAllFiles();
dispatch(setShouldHideSuggestions(false));
dispatch(clearAllFiles());
},
[setShouldHideSuggestions, clearAllFiles],
[dispatch],
);
return (
@@ -1,4 +1,4 @@
import { ActionSecurityRisk } from "#/stores/security-analyzer-store";
import { ActionSecurityRisk } from "#/state/security-analyzer-slice";
import {
FileWriteAction,
CommandAction,
@@ -1,68 +0,0 @@
import React from "react";
import { OpenHandsObservation } from "#/types/core/observations";
import { isErrorObservation } from "#/types/core/guards";
import { ErrorMessage } from "../error-message";
import { MicroagentStatusWrapper } from "./microagent-status-wrapper";
import { LikertScaleWrapper } from "./likert-scale-wrapper";
import { MicroagentStatus } from "#/types/microagent-status";
interface ErrorEventMessageProps {
event: OpenHandsObservation;
microagentStatus?: MicroagentStatus | null;
microagentConversationId?: string;
microagentPRUrl?: string;
actions?: Array<{
icon: React.ReactNode;
onClick: () => void;
tooltip?: string;
}>;
isLastMessage: boolean;
isInLast10Actions: boolean;
config?: { APP_MODE?: string } | null;
isCheckingFeedback: boolean;
feedbackData: {
exists: boolean;
rating?: number;
reason?: string;
};
}
export function ErrorEventMessage({
event,
microagentStatus,
microagentConversationId,
microagentPRUrl,
actions,
isLastMessage,
isInLast10Actions,
config,
isCheckingFeedback,
feedbackData,
}: ErrorEventMessageProps) {
if (!isErrorObservation(event)) {
return null;
}
return (
<div>
<ErrorMessage
errorId={event.extras.error_id}
defaultMessage={event.message}
/>
<MicroagentStatusWrapper
microagentStatus={microagentStatus}
microagentConversationId={microagentConversationId}
microagentPRUrl={microagentPRUrl}
actions={actions}
/>
<LikertScaleWrapper
event={event}
isLastMessage={isLastMessage}
isInLast10Actions={isInLast10Actions}
config={config}
isCheckingFeedback={isCheckingFeedback}
feedbackData={feedbackData}
/>
</div>
);
}
@@ -1,70 +0,0 @@
import React from "react";
import { OpenHandsAction } from "#/types/core/actions";
import { isFinishAction } from "#/types/core/guards";
import { ChatMessage } from "../chat-message";
import { MicroagentStatusWrapper } from "./microagent-status-wrapper";
import { LikertScaleWrapper } from "./likert-scale-wrapper";
import { getEventContent } from "../event-content-helpers/get-event-content";
import { MicroagentStatus } from "#/types/microagent-status";
interface FinishEventMessageProps {
event: OpenHandsAction;
microagentStatus?: MicroagentStatus | null;
microagentConversationId?: string;
microagentPRUrl?: string;
actions?: Array<{
icon: React.ReactNode;
onClick: () => void;
tooltip?: string;
}>;
isLastMessage: boolean;
isInLast10Actions: boolean;
config?: { APP_MODE?: string } | null;
isCheckingFeedback: boolean;
feedbackData: {
exists: boolean;
rating?: number;
reason?: string;
};
}
export function FinishEventMessage({
event,
microagentStatus,
microagentConversationId,
microagentPRUrl,
actions,
isLastMessage,
isInLast10Actions,
config,
isCheckingFeedback,
feedbackData,
}: FinishEventMessageProps) {
if (!isFinishAction(event)) {
return null;
}
return (
<>
<ChatMessage
type="agent"
message={getEventContent(event).details}
actions={actions}
/>
<MicroagentStatusWrapper
microagentStatus={microagentStatus}
microagentConversationId={microagentConversationId}
microagentPRUrl={microagentPRUrl}
actions={actions}
/>
<LikertScaleWrapper
event={event}
isLastMessage={isLastMessage}
isInLast10Actions={isInLast10Actions}
config={config}
isCheckingFeedback={isCheckingFeedback}
feedbackData={feedbackData}
/>
</>
);
}
@@ -1,45 +0,0 @@
import React from "react";
import { OpenHandsAction } from "#/types/core/actions";
import { OpenHandsObservation } from "#/types/core/observations";
import { isOpenHandsAction, isOpenHandsObservation } from "#/types/core/guards";
import { ChatMessage } from "../chat-message";
import { GenericEventMessage } from "../generic-event-message";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { getEventContent } from "../event-content-helpers/get-event-content";
import { getObservationResult } from "../event-content-helpers/get-observation-result";
const hasThoughtProperty = (
obj: Record<string, unknown>,
): obj is { thought: string } => "thought" in obj && !!obj.thought;
interface GenericEventMessageWrapperProps {
event: OpenHandsAction | OpenHandsObservation;
shouldShowConfirmationButtons: boolean;
}
export function GenericEventMessageWrapper({
event,
shouldShowConfirmationButtons,
}: GenericEventMessageWrapperProps) {
return (
<div>
{isOpenHandsAction(event) &&
hasThoughtProperty(event.args) &&
event.action !== "think" && (
<ChatMessage type="agent" message={event.args.thought} />
)}
<GenericEventMessage
title={getEventContent(event).title}
details={getEventContent(event).details}
success={
isOpenHandsObservation(event)
? getObservationResult(event)
: undefined
}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
@@ -1,10 +0,0 @@
export { ErrorEventMessage } from "./error-event-message";
export { UserAssistantEventMessage } from "./user-assistant-event-message";
export { FinishEventMessage } from "./finish-event-message";
export { RejectEventMessage } from "./reject-event-message";
export { McpEventMessage } from "./mcp-event-message";
export { TaskTrackingEventMessage } from "./task-tracking-event-message";
export { ObservationPairEventMessage } from "./observation-pair-event-message";
export { GenericEventMessageWrapper } from "./generic-event-message-wrapper";
export { MicroagentStatusWrapper } from "./microagent-status-wrapper";
export { LikertScaleWrapper } from "./likert-scale-wrapper";
@@ -1,50 +0,0 @@
import React from "react";
import { OpenHandsAction } from "#/types/core/actions";
import { OpenHandsObservation } from "#/types/core/observations";
import { isErrorObservation } from "#/types/core/guards";
import { LikertScale } from "../../feedback/likert-scale";
interface LikertScaleWrapperProps {
event: OpenHandsAction | OpenHandsObservation;
isLastMessage: boolean;
isInLast10Actions: boolean;
config?: { APP_MODE?: string } | null;
isCheckingFeedback: boolean;
feedbackData: {
exists: boolean;
rating?: number;
reason?: string;
};
}
export function LikertScaleWrapper({
event,
isLastMessage,
isInLast10Actions,
config,
isCheckingFeedback,
feedbackData,
}: LikertScaleWrapperProps) {
if (config?.APP_MODE !== "saas" || isCheckingFeedback) {
return null;
}
// For error observations, show if in last 10 actions
// For other events, show only if it's the last message
const shouldShow = isErrorObservation(event)
? isInLast10Actions
: isLastMessage;
if (!shouldShow) {
return null;
}
return (
<LikertScale
eventId={event.id}
initiallySubmitted={feedbackData.exists}
initialRating={feedbackData.rating}
initialReason={feedbackData.reason}
/>
);
}
@@ -1,33 +0,0 @@
import React from "react";
import { OpenHandsObservation } from "#/types/core/observations";
import { isMcpObservation } from "#/types/core/guards";
import { GenericEventMessage } from "../generic-event-message";
import { MCPObservationContent } from "../mcp-observation-content";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { getEventContent } from "../event-content-helpers/get-event-content";
import { getObservationResult } from "../event-content-helpers/get-observation-result";
interface McpEventMessageProps {
event: OpenHandsObservation;
shouldShowConfirmationButtons: boolean;
}
export function McpEventMessage({
event,
shouldShowConfirmationButtons,
}: McpEventMessageProps) {
if (!isMcpObservation(event)) {
return null;
}
return (
<div>
<GenericEventMessage
title={getEventContent(event).title}
details={<MCPObservationContent event={event} />}
success={getObservationResult(event)}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
@@ -1,33 +0,0 @@
import React from "react";
import { MicroagentStatus } from "#/types/microagent-status";
import { MicroagentStatusIndicator } from "../microagent/microagent-status-indicator";
interface MicroagentStatusWrapperProps {
microagentStatus?: MicroagentStatus | null;
microagentConversationId?: string;
microagentPRUrl?: string;
actions?: Array<{
icon: React.ReactNode;
onClick: () => void;
tooltip?: string;
}>;
}
export function MicroagentStatusWrapper({
microagentStatus,
microagentConversationId,
microagentPRUrl,
actions,
}: MicroagentStatusWrapperProps) {
if (!microagentStatus || !actions) {
return null;
}
return (
<MicroagentStatusIndicator
status={microagentStatus}
conversationId={microagentConversationId}
prUrl={microagentPRUrl}
/>
);
}
@@ -1,61 +0,0 @@
import React from "react";
import { OpenHandsAction } from "#/types/core/actions";
import { isOpenHandsAction } from "#/types/core/guards";
import { ChatMessage } from "../chat-message";
import { MicroagentStatusWrapper } from "./microagent-status-wrapper";
import { MicroagentStatus } from "#/types/microagent-status";
const hasThoughtProperty = (
obj: Record<string, unknown>,
): obj is { thought: string } => "thought" in obj && !!obj.thought;
interface ObservationPairEventMessageProps {
event: OpenHandsAction;
microagentStatus?: MicroagentStatus | null;
microagentConversationId?: string;
microagentPRUrl?: string;
actions?: Array<{
icon: React.ReactNode;
onClick: () => void;
tooltip?: string;
}>;
}
export function ObservationPairEventMessage({
event,
microagentStatus,
microagentConversationId,
microagentPRUrl,
actions,
}: ObservationPairEventMessageProps) {
if (!isOpenHandsAction(event)) {
return null;
}
if (hasThoughtProperty(event.args) && event.action !== "think") {
return (
<div>
<ChatMessage
type="agent"
message={event.args.thought}
actions={actions}
/>
<MicroagentStatusWrapper
microagentStatus={microagentStatus}
microagentConversationId={microagentConversationId}
microagentPRUrl={microagentPRUrl}
actions={actions}
/>
</div>
);
}
return (
<MicroagentStatusWrapper
microagentStatus={microagentStatus}
microagentConversationId={microagentConversationId}
microagentPRUrl={microagentPRUrl}
actions={actions}
/>
);
}
@@ -1,20 +0,0 @@
import React from "react";
import { OpenHandsObservation } from "#/types/core/observations";
import { isRejectObservation } from "#/types/core/guards";
import { ChatMessage } from "../chat-message";
interface RejectEventMessageProps {
event: OpenHandsObservation;
}
export function RejectEventMessage({ event }: RejectEventMessageProps) {
if (!isRejectObservation(event)) {
return null;
}
return (
<div>
<ChatMessage type="agent" message={event.content} />
</div>
);
}
@@ -1,50 +0,0 @@
import React from "react";
import { useTranslation } from "react-i18next";
import { OpenHandsObservation } from "#/types/core/observations";
import { isTaskTrackingObservation } from "#/types/core/guards";
import { GenericEventMessage } from "../generic-event-message";
import { TaskTrackingObservationContent } from "../task-tracking-observation-content";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { getObservationResult } from "../event-content-helpers/get-observation-result";
interface TaskTrackingEventMessageProps {
event: OpenHandsObservation;
shouldShowConfirmationButtons: boolean;
}
export function TaskTrackingEventMessage({
event,
shouldShowConfirmationButtons,
}: TaskTrackingEventMessageProps) {
const { t } = useTranslation();
if (!isTaskTrackingObservation(event)) {
return null;
}
const { command } = event.extras;
let title: React.ReactNode;
let initiallyExpanded = false;
// Determine title and expansion state based on command
if (command === "plan") {
title = t("OBSERVATION_MESSAGE$TASK_TRACKING_PLAN");
initiallyExpanded = true;
} else {
// command === "view"
title = t("OBSERVATION_MESSAGE$TASK_TRACKING_VIEW");
initiallyExpanded = false;
}
return (
<div>
<GenericEventMessage
title={title}
details={<TaskTrackingObservationContent event={event} />}
success={getObservationResult(event)}
initiallyExpanded={initiallyExpanded}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
@@ -1,83 +0,0 @@
import React from "react";
import { OpenHandsAction } from "#/types/core/actions";
import { isUserMessage, isAssistantMessage } from "#/types/core/guards";
import { ChatMessage } from "../chat-message";
import { ImageCarousel } from "../../images/image-carousel";
import { FileList } from "../../files/file-list";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { MicroagentStatusWrapper } from "./microagent-status-wrapper";
import { LikertScaleWrapper } from "./likert-scale-wrapper";
import { parseMessageFromEvent } from "../event-content-helpers/parse-message-from-event";
import { MicroagentStatus } from "#/types/microagent-status";
interface UserAssistantEventMessageProps {
event: OpenHandsAction;
shouldShowConfirmationButtons: boolean;
microagentStatus?: MicroagentStatus | null;
microagentConversationId?: string;
microagentPRUrl?: string;
actions?: Array<{
icon: React.ReactNode;
onClick: () => void;
tooltip?: string;
}>;
isLastMessage: boolean;
isInLast10Actions: boolean;
config?: { APP_MODE?: string } | null;
isCheckingFeedback: boolean;
feedbackData: {
exists: boolean;
rating?: number;
reason?: string;
};
}
export function UserAssistantEventMessage({
event,
shouldShowConfirmationButtons,
microagentStatus,
microagentConversationId,
microagentPRUrl,
actions,
isLastMessage,
isInLast10Actions,
config,
isCheckingFeedback,
feedbackData,
}: UserAssistantEventMessageProps) {
if (!isUserMessage(event) && !isAssistantMessage(event)) {
return null;
}
const message = parseMessageFromEvent(event);
return (
<>
<ChatMessage type={event.source} message={message} actions={actions}>
{event.args.image_urls && event.args.image_urls.length > 0 && (
<ImageCarousel size="small" images={event.args.image_urls} />
)}
{event.args.file_urls && event.args.file_urls.length > 0 && (
<FileList files={event.args.file_urls} />
)}
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</ChatMessage>
<MicroagentStatusWrapper
microagentStatus={microagentStatus}
microagentConversationId={microagentConversationId}
microagentPRUrl={microagentPRUrl}
actions={actions}
/>
{isAssistantMessage(event) && event.action === "message" && (
<LikertScaleWrapper
event={event}
isLastMessage={isLastMessage}
isInLast10Actions={isInLast10Actions}
config={config}
isCheckingFeedback={isCheckingFeedback}
feedbackData={feedbackData}
/>
)}
</>
);
}
@@ -1,29 +1,39 @@
import React from "react";
import { useTranslation } from "react-i18next";
import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
import { OpenHandsAction } from "#/types/core/actions";
import {
isUserMessage,
isErrorObservation,
isAssistantMessage,
isOpenHandsAction,
isOpenHandsObservation,
isFinishAction,
isRejectObservation,
isMcpObservation,
isTaskTrackingObservation,
} from "#/types/core/guards";
import { OpenHandsObservation } from "#/types/core/observations";
import { ImageCarousel } from "../images/image-carousel";
import { ChatMessage } from "./chat-message";
import { ErrorMessage } from "./error-message";
import { MCPObservationContent } from "./mcp-observation-content";
import { TaskTrackingObservationContent } from "./task-tracking-observation-content";
import { getObservationResult } from "./event-content-helpers/get-observation-result";
import { getEventContent } from "./event-content-helpers/get-event-content";
import { GenericEventMessage } from "./generic-event-message";
import { MicroagentStatus } from "#/types/microagent-status";
import { MicroagentStatusIndicator } from "./microagent/microagent-status-indicator";
import { FileList } from "../files/file-list";
import { parseMessageFromEvent } from "./event-content-helpers/parse-message-from-event";
import { LikertScale } from "../feedback/likert-scale";
import { useConfig } from "#/hooks/query/use-config";
import { useFeedbackExists } from "#/hooks/query/use-feedback-exists";
import {
ErrorEventMessage,
UserAssistantEventMessage,
FinishEventMessage,
RejectEventMessage,
McpEventMessage,
TaskTrackingEventMessage,
ObservationPairEventMessage,
GenericEventMessageWrapper,
} from "./event-message-components";
const hasThoughtProperty = (
obj: Record<string, unknown>,
): obj is { thought: string } => "thought" in obj && !!obj.thought;
interface EventMessageProps {
event: OpenHandsAction | OpenHandsObservation;
@@ -41,7 +51,6 @@ interface EventMessageProps {
isInLast10Actions: boolean;
}
/* eslint-disable react/jsx-props-no-spreading */
export function EventMessage({
event,
hasObservationPair,
@@ -53,6 +62,7 @@ export function EventMessage({
actions,
isInLast10Actions,
}: EventMessageProps) {
const { t } = useTranslation();
const shouldShowConfirmationButtons =
isLastMessage && event.source === "agent" && isAwaitingUserConfirmation;
@@ -63,83 +73,194 @@ export function EventMessage({
isLoading: isCheckingFeedback,
} = useFeedbackExists(event.id);
// Common props for components that need them
const commonProps = {
microagentStatus,
microagentConversationId,
microagentPRUrl,
actions,
isLastMessage,
isInLast10Actions,
config,
isCheckingFeedback,
feedbackData,
const renderLikertScale = () => {
if (config?.APP_MODE !== "saas" || isCheckingFeedback) {
return null;
}
// For error observations, show if in last 10 actions
// For other events, show only if it's the last message
const shouldShow = isErrorObservation(event)
? isInLast10Actions
: isLastMessage;
if (!shouldShow) {
return null;
}
return (
<LikertScale
eventId={event.id}
initiallySubmitted={feedbackData.exists}
initialRating={feedbackData.rating}
initialReason={feedbackData.reason}
/>
);
};
// Error observations
if (isErrorObservation(event)) {
return <ErrorEventMessage event={event} {...commonProps} />;
return (
<div>
<ErrorMessage
errorId={event.extras.error_id}
defaultMessage={event.message}
/>
{microagentStatus && actions && (
<MicroagentStatusIndicator
status={microagentStatus}
conversationId={microagentConversationId}
prUrl={microagentPRUrl}
/>
)}
{renderLikertScale()}
</div>
);
}
// Observation pairs with OpenHands actions
if (hasObservationPair && isOpenHandsAction(event)) {
return (
<ObservationPairEventMessage
event={event}
microagentStatus={microagentStatus}
microagentConversationId={microagentConversationId}
microagentPRUrl={microagentPRUrl}
actions={actions}
if (hasThoughtProperty(event.args) && event.action !== "think") {
return (
<div>
<ChatMessage
type="agent"
message={event.args.thought}
actions={actions}
/>
{microagentStatus && actions && (
<MicroagentStatusIndicator
status={microagentStatus}
conversationId={microagentConversationId}
prUrl={microagentPRUrl}
/>
)}
</div>
);
}
return microagentStatus && actions ? (
<MicroagentStatusIndicator
status={microagentStatus}
conversationId={microagentConversationId}
prUrl={microagentPRUrl}
/>
);
) : null;
}
// Finish actions
if (isFinishAction(event)) {
return <FinishEventMessage event={event} {...commonProps} />;
}
// User and assistant messages
if (isUserMessage(event) || isAssistantMessage(event)) {
return (
<UserAssistantEventMessage
event={event}
shouldShowConfirmationButtons={shouldShowConfirmationButtons}
{...commonProps}
/>
<>
<ChatMessage
type="agent"
message={getEventContent(event).details}
actions={actions}
/>
{microagentStatus && actions && (
<MicroagentStatusIndicator
status={microagentStatus}
conversationId={microagentConversationId}
prUrl={microagentPRUrl}
/>
)}
{renderLikertScale()}
</>
);
}
// Reject observations
if (isRejectObservation(event)) {
return <RejectEventMessage event={event} />;
if (isUserMessage(event) || isAssistantMessage(event)) {
const message = parseMessageFromEvent(event);
return (
<>
<ChatMessage type={event.source} message={message} actions={actions}>
{event.args.image_urls && event.args.image_urls.length > 0 && (
<ImageCarousel size="small" images={event.args.image_urls} />
)}
{event.args.file_urls && event.args.file_urls.length > 0 && (
<FileList files={event.args.file_urls} />
)}
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</ChatMessage>
{microagentStatus && actions && (
<MicroagentStatusIndicator
status={microagentStatus}
conversationId={microagentConversationId}
prUrl={microagentPRUrl}
/>
)}
{isAssistantMessage(event) &&
event.action === "message" &&
renderLikertScale()}
</>
);
}
if (isRejectObservation(event)) {
return (
<div>
<ChatMessage type="agent" message={event.content} />
</div>
);
}
// MCP observations
if (isMcpObservation(event)) {
return (
<McpEventMessage
event={event}
shouldShowConfirmationButtons={shouldShowConfirmationButtons}
/>
<div>
<GenericEventMessage
title={getEventContent(event).title}
details={<MCPObservationContent event={event} />}
success={getObservationResult(event)}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
// Task tracking observations
if (isTaskTrackingObservation(event)) {
const { command } = event.extras;
let title: React.ReactNode;
let initiallyExpanded = false;
// Determine title and expansion state based on command
if (command === "plan") {
title = t("OBSERVATION_MESSAGE$TASK_TRACKING_PLAN");
initiallyExpanded = true;
} else {
// command === "view"
title = t("OBSERVATION_MESSAGE$TASK_TRACKING_VIEW");
initiallyExpanded = false;
}
return (
<TaskTrackingEventMessage
event={event}
shouldShowConfirmationButtons={shouldShowConfirmationButtons}
/>
<div>
<GenericEventMessage
title={title}
details={<TaskTrackingObservationContent event={event} />}
success={getObservationResult(event)}
initiallyExpanded={initiallyExpanded}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
// Generic fallback
return (
<GenericEventMessageWrapper
event={event}
shouldShowConfirmationButtons={shouldShowConfirmationButtons}
/>
<div>
{isOpenHandsAction(event) &&
hasThoughtProperty(event.args) &&
event.action !== "think" && (
<ChatMessage type="agent" message={event.args.thought} />
)}
<GenericEventMessage
title={getEventContent(event).title}
details={getEventContent(event).details}
success={
isOpenHandsObservation(event)
? getObservationResult(event)
: undefined
}
/>
{shouldShowConfirmationButtons && <ConfirmationButtons />}
</div>
);
}
@@ -8,12 +8,14 @@ import { Provider } from "#/types/settings";
interface GitControlBarPrButtonProps {
onSuggestionsClick: (value: string) => void;
isEnabled: boolean;
hasRepository: boolean;
currentGitProvider: Provider;
}
export function GitControlBarPrButton({
onSuggestionsClick,
isEnabled,
hasRepository,
currentGitProvider,
}: GitControlBarPrButtonProps) {
@@ -22,7 +24,7 @@ export function GitControlBarPrButton({
const { providers } = useUserProviders();
const providersAreSet = providers.length > 0;
const isButtonEnabled = providersAreSet && hasRepository;
const isButtonEnabled = isEnabled && providersAreSet && hasRepository;
const handlePrClick = () => {
posthog.capture("create_pr_button_clicked");
@@ -8,10 +8,12 @@ import { I18nKey } from "#/i18n/declaration";
interface GitControlBarPullButtonProps {
onSuggestionsClick: (value: string) => void;
isEnabled: boolean;
}
export function GitControlBarPullButton({
onSuggestionsClick,
isEnabled,
}: GitControlBarPullButtonProps) {
const { t } = useTranslation();
@@ -20,7 +22,7 @@ export function GitControlBarPullButton({
const providersAreSet = providers.length > 0;
const hasRepository = conversation?.selected_repository;
const isButtonEnabled = providersAreSet && hasRepository;
const isButtonEnabled = isEnabled && providersAreSet && hasRepository;
const handlePullClick = () => {
posthog.capture("pull_button_clicked");
@@ -8,12 +8,14 @@ import { Provider } from "#/types/settings";
interface GitControlBarPushButtonProps {
onSuggestionsClick: (value: string) => void;
isEnabled: boolean;
hasRepository: boolean;
currentGitProvider: Provider;
}
export function GitControlBarPushButton({
onSuggestionsClick,
isEnabled,
hasRepository,
currentGitProvider,
}: GitControlBarPushButtonProps) {
@@ -22,7 +24,7 @@ export function GitControlBarPushButton({
const { providers } = useUserProviders();
const providersAreSet = providers.length > 0;
const isButtonEnabled = providersAreSet && hasRepository;
const isButtonEnabled = isEnabled && providersAreSet && hasRepository;
const handlePushClick = () => {
posthog.capture("push_button_clicked");
@@ -11,9 +11,17 @@ import { GitControlBarTooltipWrapper } from "./git-control-bar-tooltip-wrapper";
interface GitControlBarProps {
onSuggestionsClick: (value: string) => void;
isWaitingForUserInput: boolean;
hasSubstantiveAgentActions: boolean;
optimisticUserMessage: boolean;
}
export function GitControlBar({ onSuggestionsClick }: GitControlBarProps) {
export function GitControlBar({
onSuggestionsClick,
isWaitingForUserInput,
hasSubstantiveAgentActions,
optimisticUserMessage,
}: GitControlBarProps) {
const { t } = useTranslation();
const { data: conversation } = useActiveConversation();
@@ -22,6 +30,12 @@ export function GitControlBar({ onSuggestionsClick }: GitControlBarProps) {
const gitProvider = conversation?.git_provider as Provider;
const selectedBranch = conversation?.selected_branch;
// Button is enabled when the agent is waiting for user input, has substantive actions, and no optimistic message
const isButtonEnabled =
isWaitingForUserInput &&
hasSubstantiveAgentActions &&
!optimisticUserMessage;
const hasRepository = !!selectedRepository;
return (
@@ -59,6 +73,7 @@ export function GitControlBar({ onSuggestionsClick }: GitControlBarProps) {
>
<GitControlBarPullButton
onSuggestionsClick={onSuggestionsClick}
isEnabled={isButtonEnabled}
/>
</GitControlBarTooltipWrapper>
@@ -69,6 +84,7 @@ export function GitControlBar({ onSuggestionsClick }: GitControlBarProps) {
>
<GitControlBarPushButton
onSuggestionsClick={onSuggestionsClick}
isEnabled={isButtonEnabled}
hasRepository={hasRepository}
currentGitProvider={gitProvider}
/>
@@ -81,6 +97,7 @@ export function GitControlBar({ onSuggestionsClick }: GitControlBarProps) {
>
<GitControlBarPrButton
onSuggestionsClick={onSuggestionsClick}
isEnabled={isButtonEnabled}
hasRepository={hasRepository}
currentGitProvider={gitProvider}
/>
@@ -1,38 +1,44 @@
import { useSelector } from "react-redux";
import { useSelector, useDispatch } from "react-redux";
import { isFileImage } from "#/utils/is-file-image";
import { displayErrorToast } from "#/utils/custom-toast-handlers";
import { validateFiles } from "#/utils/file-validation";
import { CustomChatInput } from "./custom-chat-input";
import { RootState } from "#/store";
import { AgentState } from "#/types/agent-state";
import { useActiveConversation } from "#/hooks/query/use-active-conversation";
import { GitControlBar } from "./git-control-bar";
import { useConversationStore } from "#/state/conversation-store";
import {
addImages,
addFiles,
clearAllFiles,
addFileLoading,
removeFileLoading,
addImageLoading,
removeImageLoading,
} from "#/state/conversation-slice";
import { processFiles, processImages } from "#/utils/file-processing";
import { RootState } from "#/store";
interface InteractiveChatBoxProps {
onSubmit: (message: string, images: File[], files: File[]) => void;
onStop: () => void;
isWaitingForUserInput: boolean;
hasSubstantiveAgentActions: boolean;
optimisticUserMessage: boolean;
}
export function InteractiveChatBox({
onSubmit,
onStop,
isWaitingForUserInput,
hasSubstantiveAgentActions,
optimisticUserMessage,
}: InteractiveChatBoxProps) {
const {
images,
files,
addImages,
addFiles,
clearAllFiles,
addFileLoading,
removeFileLoading,
addImageLoading,
removeImageLoading,
} = useConversationStore();
const dispatch = useDispatch();
const curAgentState = useSelector(
(state: RootState) => state.agent.curAgentState,
);
const images = useSelector((state: RootState) => state.conversation.images);
const files = useSelector((state: RootState) => state.conversation.files);
const { data: conversation } = useActiveConversation();
// Helper function to validate and filter files
@@ -52,24 +58,26 @@ export function InteractiveChatBox({
// Helper function to show loading indicators for files
const showLoadingIndicators = (validFiles: File[], validImages: File[]) => {
validFiles.forEach((file) => addFileLoading(file.name));
validImages.forEach((image) => addImageLoading(image.name));
validFiles.forEach((file) => dispatch(addFileLoading(file.name)));
validImages.forEach((image) => dispatch(addImageLoading(image.name)));
};
// Helper function to handle successful file processing results
const handleSuccessfulFiles = (fileResults: { successful: File[] }) => {
if (fileResults.successful.length > 0) {
addFiles(fileResults.successful);
fileResults.successful.forEach((file) => removeFileLoading(file.name));
dispatch(addFiles(fileResults.successful));
fileResults.successful.forEach((file) =>
dispatch(removeFileLoading(file.name)),
);
}
};
// Helper function to handle successful image processing results
const handleSuccessfulImages = (imageResults: { successful: File[] }) => {
if (imageResults.successful.length > 0) {
addImages(imageResults.successful);
dispatch(addImages(imageResults.successful));
imageResults.successful.forEach((image) =>
removeImageLoading(image.name),
dispatch(removeImageLoading(image.name)),
);
}
};
@@ -80,14 +88,14 @@ export function InteractiveChatBox({
imageResults: { failed: { file: File; error: Error }[] },
) => {
fileResults.failed.forEach(({ file, error }) => {
removeFileLoading(file.name);
dispatch(removeFileLoading(file.name));
displayErrorToast(
`Failed to process file ${file.name}: ${error.message}`,
);
});
imageResults.failed.forEach(({ file, error }) => {
removeImageLoading(file.name);
dispatch(removeImageLoading(file.name));
displayErrorToast(
`Failed to process image ${file.name}: ${error.message}`,
);
@@ -96,8 +104,8 @@ export function InteractiveChatBox({
// Helper function to clear loading states on error
const clearLoadingStates = (validFiles: File[], validImages: File[]) => {
validFiles.forEach((file) => removeFileLoading(file.name));
validImages.forEach((image) => removeImageLoading(image.name));
validFiles.forEach((file) => dispatch(removeFileLoading(file.name)));
validImages.forEach((image) => dispatch(removeImageLoading(image.name)));
};
const handleUpload = async (selectedFiles: File[]) => {
@@ -132,7 +140,7 @@ export function InteractiveChatBox({
const handleSubmit = (message: string) => {
onSubmit(message, images, files);
clearAllFiles();
dispatch(clearAllFiles());
};
const handleSuggestionsClick = (suggestion: string) => {
@@ -153,7 +161,12 @@ export function InteractiveChatBox({
conversationStatus={conversation?.status || null}
/>
<div className="mt-4">
<GitControlBar onSuggestionsClick={handleSuggestionsClick} />
<GitControlBar
onSuggestionsClick={handleSuggestionsClick}
isWaitingForUserInput={isWaitingForUserInput}
hasSubstantiveAgentActions={hasSubstantiveAgentActions}
optimisticUserMessage={optimisticUserMessage}
/>
</div>
</div>
);
@@ -13,7 +13,6 @@ import { useHandleRuntimeActive } from "#/hooks/use-handle-runtime-active";
import { LoadingMicroagentBody } from "./loading-microagent-body";
import { LoadingMicroagentTextarea } from "./loading-microagent-textarea";
import { useGetMicroagents } from "#/hooks/query/use-get-microagents";
import { Typography } from "#/ui/typography";
interface LaunchMicroagentModalProps {
onClose: () => void;
@@ -77,9 +76,9 @@ export function LaunchMicroagentModal({
</button>
</div>
<Typography.Text className="text-sm text-[#A3A3A3] font-normal leading-5">
<span className="text-sm text-[#A3A3A3] font-normal leading-5">
{t("MICROAGENT$DEFINITION")}
</Typography.Text>
</span>
<form
data-testid="launch-microagent-modal"
@@ -1,7 +1,6 @@
import { Spinner } from "@heroui/react";
import { useTranslation } from "react-i18next";
import { ModalBody } from "#/components/shared/modals/modal-body";
import { Typography } from "#/ui/typography";
export function LoadingMicroagentBody() {
const { t } = useTranslation();
@@ -11,7 +10,7 @@ export function LoadingMicroagentBody() {
{t("MICROAGENT$ADD_TO_MICROAGENT")}
</h2>
<Spinner size="lg" />
<Typography.Text>{t("MICROAGENT$WAIT_FOR_RUNTIME")}</Typography.Text>
<p>{t("MICROAGENT$WAIT_FOR_RUNTIME")}</p>
</ModalBody>
);
}
@@ -3,7 +3,6 @@ import { useTranslation } from "react-i18next";
import { Spinner } from "@heroui/react";
import { MicroagentStatus } from "#/types/microagent-status";
import { SuccessIndicator } from "../success-indicator";
import { Typography } from "#/ui/typography";
interface MicroagentStatusIndicatorProps {
status: MicroagentStatus;
@@ -82,9 +81,7 @@ export function MicroagentStatusIndicator({
);
}
return (
<Typography.Text className="underline">{statusText}</Typography.Text>
);
return <span className="underline">{statusText}</span>;
};
return (
@@ -1,23 +1,26 @@
import { useSelector, useDispatch } from "react-redux";
import { RootState } from "#/store";
import { UploadedFile } from "./uploaded-file";
import { UploadedImage } from "./uploaded-image";
import { useConversationStore } from "#/state/conversation-store";
import { removeFile, removeImage } from "#/state/conversation-slice";
export function UploadedFiles() {
const {
images,
files,
loadingFiles,
loadingImages,
removeFile,
removeImage,
} = useConversationStore();
const dispatch = useDispatch();
const images = useSelector((state: RootState) => state.conversation.images);
const files = useSelector((state: RootState) => state.conversation.files);
const loadingFiles = useSelector(
(state: RootState) => state.conversation.loadingFiles,
);
const loadingImages = useSelector(
(state: RootState) => state.conversation.loadingImages,
);
const handleRemoveFile = (index: number) => {
removeFile(index);
dispatch(removeFile(index));
};
const handleRemoveImage = (index: number) => {
removeImage(index);
dispatch(removeImage(index));
};
// Don't render anything if there are no files, images, or loading items
@@ -1,5 +1,5 @@
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { useSelector, useDispatch } from "react-redux";
import { useEffect } from "react";
import { RootState } from "#/store";
import { useStatusStore } from "#/state/status-store";
@@ -12,7 +12,7 @@ import ClockIcon from "#/icons/u-clock-three.svg?react";
import { ChatResumeAgentButton } from "../chat/chat-play-button";
import { cn } from "#/utils/utils";
import { AgentLoading } from "./agent-loading";
import { useConversationStore } from "#/state/conversation-store";
import { setShouldShownAgentLoading } from "#/state/conversation-slice";
import CircleErrorIcon from "#/icons/circle-error.svg?react";
export interface AgentStatusProps {
@@ -29,7 +29,7 @@ export function AgentStatus({
disabled = false,
}: AgentStatusProps) {
const { t } = useTranslation();
const { setShouldShownAgentLoading } = useConversationStore();
const dispatch = useDispatch();
const { curAgentState } = useSelector((state: RootState) => state.agent);
const { curStatusMessage } = useStatusStore();
const { webSocketStatus } = useWsClient();
@@ -58,8 +58,8 @@ export function AgentStatus({
// Update global state when agent loading condition changes
useEffect(() => {
setShouldShownAgentLoading(shouldShownAgentLoading);
}, [shouldShownAgentLoading, setShouldShownAgentLoading]);
dispatch(setShouldShownAgentLoading(shouldShownAgentLoading));
}, [shouldShownAgentLoading, dispatch]);
return (
<div className={`flex items-center gap-1 ${className}`}>
@@ -1,4 +1,5 @@
import { useTranslation } from "react-i18next";
import { useDispatch } from "react-redux";
import { ContextMenu } from "#/ui/context-menu";
import { ContextMenuListItem } from "../context-menu/context-menu-list-item";
import { ToolsContextMenuIconText } from "./tools-context-menu-icon-text";
@@ -10,7 +11,7 @@ import {
getCreatePRPrompt,
getCreateNewBranchPrompt,
} from "#/utils/utils";
import { useConversationStore } from "#/state/conversation-store";
import { setMessageToSend } from "#/state/conversation-slice";
import ArrowUpIcon from "#/icons/u-arrow-up.svg?react";
import ArrowDownIcon from "#/icons/u-arrow-down.svg?react";
@@ -27,28 +28,28 @@ interface GitToolsSubmenuProps {
export function GitToolsSubmenu({ onClose }: GitToolsSubmenuProps) {
const { t } = useTranslation();
const { setMessageToSend } = useConversationStore();
const dispatch = useDispatch();
const { data: conversation } = useActiveConversation();
const currentGitProvider = conversation?.git_provider as Provider;
const onGitPull = () => {
setMessageToSend(getGitPullPrompt());
dispatch(setMessageToSend(getGitPullPrompt()));
onClose();
};
const onGitPush = () => {
setMessageToSend(getGitPushPrompt(currentGitProvider));
dispatch(setMessageToSend(getGitPushPrompt(currentGitProvider)));
onClose();
};
const onCreatePR = () => {
setMessageToSend(getCreatePRPrompt(currentGitProvider));
dispatch(setMessageToSend(getCreatePRPrompt(currentGitProvider)));
onClose();
};
const onCreateNewBranch = () => {
setMessageToSend(getCreateNewBranchPrompt());
dispatch(setMessageToSend(getCreateNewBranchPrompt()));
onClose();
};
@@ -1,4 +1,5 @@
import { useTranslation } from "react-i18next";
import { useDispatch } from "react-redux";
import { ContextMenu } from "#/ui/context-menu";
import { ContextMenuListItem } from "../context-menu/context-menu-list-item";
import { ToolsContextMenuIconText } from "./tools-context-menu-icon-text";
@@ -8,7 +9,7 @@ import PrStatusIcon from "#/icons/pr-status.svg?react";
import DocumentIcon from "#/icons/document.svg?react";
import WaterIcon from "#/icons/u-water.svg?react";
import { I18nKey } from "#/i18n/declaration";
import { useConversationStore } from "#/state/conversation-store";
import { setMessageToSend } from "#/state/conversation-slice";
import { REPO_SUGGESTIONS } from "#/utils/suggestions/repo-suggestions";
import { CONTEXT_MENU_ICON_TEXT_CLASSNAME } from "#/utils/constants";
@@ -21,22 +22,22 @@ interface MacrosSubmenuProps {
export function MacrosSubmenu({ onClose }: MacrosSubmenuProps) {
const { t } = useTranslation();
const { setMessageToSend } = useConversationStore();
const dispatch = useDispatch();
const onIncreaseTestCoverage = () => {
setMessageToSend(REPO_SUGGESTIONS.INCREASE_TEST_COVERAGE);
dispatch(setMessageToSend(REPO_SUGGESTIONS.INCREASE_TEST_COVERAGE));
onClose();
};
const onFixReadme = () => {
setMessageToSend(REPO_SUGGESTIONS.FIX_README);
dispatch(setMessageToSend(REPO_SUGGESTIONS.FIX_README));
onClose();
};
const onAutoMergePRs = () => {
setMessageToSend(REPO_SUGGESTIONS.AUTO_MERGE_PRS);
dispatch(setMessageToSend(REPO_SUGGESTIONS.AUTO_MERGE_PRS));
onClose();
};
const onCleanDependencies = () => {
setMessageToSend(REPO_SUGGESTIONS.CLEAN_DEPENDENCIES);
dispatch(setMessageToSend(REPO_SUGGESTIONS.CLEAN_DEPENDENCIES));
onClose();
};
@@ -20,7 +20,7 @@ export function ConversationPanelWrapper({
return ReactDOM.createPortal(
<div
className={cn(
"absolute h-full w-full left-0 top-0 z-[9999] bg-black/80 rounded-xl",
"absolute h-full w-full left-0 top-0 z-20 bg-black/80 rounded-xl",
pathname === "/" && "bottom-0 top-0 md:top-3 md:bottom-3 h-auto",
)}
>
@@ -1,4 +1,3 @@
import { cn } from "#/utils/utils";
import { ChatInterface } from "../../chat/chat-interface";
interface ChatInterfaceWrapperProps {
@@ -8,16 +7,15 @@ interface ChatInterfaceWrapperProps {
export function ChatInterfaceWrapper({
isRightPanelShown,
}: ChatInterfaceWrapperProps) {
return (
<div className="flex justify-center w-full h-full">
<div
className={cn(
"w-full transition-all duration-300 ease-in-out",
isRightPanelShown ? "max-w-4xl" : "max-w-6xl",
)}
>
<ChatInterface />
if (!isRightPanelShown) {
return (
<div className="flex justify-center w-full h-full">
<div className="w-full max-w-[768px]">
<ChatInterface />
</div>
</div>
</div>
);
);
}
return <ChatInterface />;
}
@@ -1,11 +1,14 @@
import { useSelector } from "react-redux";
import { useWindowSize } from "@uidotdev/usehooks";
import { RootState } from "#/store";
import { MobileLayout } from "./mobile-layout";
import { DesktopLayout } from "./desktop-layout";
import { useConversationStore } from "#/state/conversation-store";
export function ConversationMain() {
const { width } = useWindowSize();
const { isRightPanelShown } = useConversationStore();
const isRightPanelShown = useSelector(
(state: RootState) => state.conversation.isRightPanelShown,
);
if (width && width <= 1024) {
return <MobileLayout isRightPanelShown={isRightPanelShown} />;
@@ -1,64 +1,35 @@
import { cn } from "#/utils/utils";
import { Panel, PanelGroup, PanelResizeHandle } from "react-resizable-panels";
import { ChatInterfaceWrapper } from "./chat-interface-wrapper";
import { ConversationTabContent } from "../conversation-tabs/conversation-tab-content/conversation-tab-content";
import { ResizeHandle } from "../../../ui/resize-handle";
import { useResizablePanels } from "#/hooks/use-resizable-panels";
interface DesktopLayoutProps {
isRightPanelShown: boolean;
}
export function DesktopLayout({ isRightPanelShown }: DesktopLayoutProps) {
const { leftWidth, rightWidth, isDragging, containerRef, handleMouseDown } =
useResizablePanels({
defaultLeftWidth: 50,
minLeftWidth: 30,
maxLeftWidth: 80,
storageKey: "desktop-layout-panel-width",
});
return (
<div className="h-full flex flex-col overflow-hidden">
<div
ref={containerRef}
className="flex flex-1 transition-all duration-300 ease-in-out overflow-hidden"
style={{
// Only apply smooth transitions when not dragging
transitionProperty: isDragging ? "none" : "all",
}}
>
{/* Left Panel (Chat) */}
<div
className="flex flex-col bg-base overflow-hidden transition-all duration-300 ease-in-out"
style={{
width: isRightPanelShown ? `${leftWidth}%` : "100%",
transitionProperty: isDragging ? "none" : "all",
}}
>
<ChatInterfaceWrapper isRightPanelShown={isRightPanelShown} />
</div>
{/* Resize Handle */}
{isRightPanelShown && <ResizeHandle onMouseDown={handleMouseDown} />}
{/* Right Panel */}
<div
className={cn(
"transition-all duration-300 ease-in-out overflow-hidden",
isRightPanelShown
? "translate-x-0 opacity-100"
: "w-0 translate-x-full opacity-0",
)}
style={{
width: isRightPanelShown ? `${rightWidth}%` : "0%",
transitionProperty: isDragging ? "opacity, transform" : "all",
}}
>
<div className="flex flex-col flex-1 gap-3 min-w-max h-full">
<ConversationTabContent />
</div>
</div>
</div>
</div>
<PanelGroup
direction="horizontal"
className="grow h-full min-h-0 min-w-0"
autoSaveId="react-resizable-panels:layout"
>
<Panel minSize={30} maxSize={80} className="overflow-hidden bg-base">
<ChatInterfaceWrapper isRightPanelShown={isRightPanelShown} />
</Panel>
{isRightPanelShown && (
<>
<PanelResizeHandle className="cursor-ew-resize" />
<Panel
minSize={20}
maxSize={70}
className="flex flex-col overflow-hidden"
>
<div className="flex flex-col flex-1 gap-3">
<ConversationTabContent />
</div>
</Panel>
</>
)}
</PanelGroup>
);
}
@@ -8,30 +8,20 @@ interface MobileLayoutProps {
export function MobileLayout({ isRightPanelShown }: MobileLayoutProps) {
return (
<div className="relative h-full flex flex-col overflow-hidden">
{/* Chat area - shrinks when panel slides up */}
<div className="flex flex-col gap-3 overflow-auto w-full">
<div
className={cn(
"flex-1 bg-base overflow-hidden transition-all duration-300 ease-in-out",
isRightPanelShown ? "flex-[0.6]" : "flex-1",
"overflow-hidden w-full bg-base min-h-[600px]",
!isRightPanelShown && "h-full",
)}
>
<ChatInterface />
</div>
{/* Bottom panel - slides up from bottom */}
<div
className={cn(
"absolute bottom-0 left-0 right-0 transition-all duration-300 ease-in-out overflow-hidden",
isRightPanelShown
? "h-[40%] translate-y-0 opacity-100"
: "h-0 translate-y-full opacity-0",
)}
>
<div className="h-full flex flex-col gap-3 pt-2">
{isRightPanelShown && (
<div className="h-full w-full min-h-[494px] flex flex-col gap-3">
<ConversationTabContent />
</div>
</div>
)}
</div>
);
}
@@ -1,5 +1,7 @@
import { lazy, useMemo } from "react";
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { RootState } from "#/store";
import { ConversationLoading } from "../../conversation-loading";
import { I18nKey } from "#/i18n/declaration";
import { TabWrapper } from "./tab-wrapper";
@@ -7,7 +9,6 @@ import { TabContainer } from "./tab-container";
import { TabContentArea } from "./tab-content-area";
import { ConversationTabTitle } from "../conversation-tab-title";
import Terminal from "#/components/features/terminal/terminal";
import { useConversationStore } from "#/state/conversation-store";
// Lazy load all tab components
const EditorTab = lazy(() => import("#/routes/changes-tab"));
@@ -15,9 +16,15 @@ const BrowserTab = lazy(() => import("#/routes/browser-tab"));
const JupyterTab = lazy(() => import("#/routes/jupyter-tab"));
const ServedTab = lazy(() => import("#/routes/served-tab"));
const VSCodeTab = lazy(() => import("#/routes/vscode-tab"));
const TasksTab = lazy(() => import("#/routes/tasks-tab"));
export function ConversationTabContent() {
const { selectedTab, shouldShownAgentLoading } = useConversationStore();
const selectedTab = useSelector(
(state: RootState) => state.conversation.selectedTab,
);
const { shouldShownAgentLoading } = useSelector(
(state: RootState) => state.conversation,
);
const { t } = useTranslation();
@@ -28,6 +35,7 @@ export function ConversationTabContent() {
const isServedActive = selectedTab === "served";
const isVSCodeActive = selectedTab === "vscode";
const isTerminalActive = selectedTab === "terminal";
const isTasksActive = selectedTab === "tasks";
// Define tab configurations
const tabs = [
@@ -49,6 +57,7 @@ export function ConversationTabContent() {
component: Terminal,
isActive: isTerminalActive,
},
{ key: "tasks", component: TasksTab, isActive: isTasksActive },
];
const conversationTabTitle = useMemo(() => {
@@ -70,6 +79,9 @@ export function ConversationTabContent() {
if (isTerminalActive) {
return t(I18nKey.COMMON$TERMINAL);
}
if (isTasksActive) {
return t(I18nKey.COMMON$TASKS);
}
return "";
}, [
isEditorActive,
@@ -78,6 +90,7 @@ export function ConversationTabContent() {
isServedActive,
isVSCodeActive,
isTerminalActive,
isTasksActive,
]);
if (shouldShownAgentLoading) {
@@ -5,12 +5,14 @@ type ConversationTabNavProps = {
icon: ComponentType<{ className: string }>;
onClick(): void;
isActive?: boolean;
"data-testid"?: string;
};
export function ConversationTabNav({
icon: Icon,
onClick,
isActive,
"data-testid": dataTestId,
}: ConversationTabNavProps) {
return (
<button
@@ -18,6 +20,7 @@ export function ConversationTabNav({
onClick={() => {
onClick();
}}
data-testid={dataTestId}
className={cn(
"p-1 rounded-md cursor-pointer",
"text-[#9299AA] bg-[#0D0F11]",
@@ -1,4 +1,5 @@
import { useEffect } from "react";
import { useDispatch, useSelector } from "react-redux";
import { useTranslation } from "react-i18next";
import { useLocalStorage } from "@uidotdev/usehooks";
import JupyterIcon from "#/icons/jupyter.svg?react";
@@ -7,23 +8,28 @@ import GlobeIcon from "#/icons/globe.svg?react";
import ServerIcon from "#/icons/server.svg?react";
import GitChanges from "#/icons/git_changes.svg?react";
import VSCodeIcon from "#/icons/vscode.svg?react";
import ListIcon from "#/icons/list.svg?react";
import { cn } from "#/utils/utils";
import { ConversationTabNav } from "./conversation-tab-nav";
import { ChatActionTooltip } from "../../chat/chat-action-tooltip";
import { I18nKey } from "#/i18n/declaration";
import { VSCodeTooltipContent } from "./vscode-tooltip-content";
import {
useConversationStore,
setHasRightPanelToggled,
setSelectedTab,
setIsRightPanelShown,
type ConversationTab,
} from "#/state/conversation-store";
} from "#/state/conversation-slice";
import { RootState } from "#/store";
export function ConversationTabs() {
const {
selectedTab,
isRightPanelShown,
setHasRightPanelToggled,
setSelectedTab,
} = useConversationStore();
const dispatch = useDispatch();
const selectedTab = useSelector(
(state: RootState) => state.conversation.selectedTab,
);
const { isRightPanelShown } = useSelector(
(state: RootState) => state.conversation,
);
// Persist selectedTab and isRightPanelShown in localStorage
const [persistedSelectedTab, setPersistedSelectedTab] =
@@ -36,22 +42,18 @@ export function ConversationTabs() {
useLocalStorage<boolean>("conversation-right-panel-shown", true);
const onTabChange = (value: ConversationTab | null) => {
setSelectedTab(value);
dispatch(setSelectedTab(value));
// Persist the selected tab to localStorage
setPersistedSelectedTab(value);
};
// Initialize Zustand state from localStorage on component mount
// Initialize Redux state from localStorage on component mount
useEffect(() => {
// Initialize selectedTab from localStorage if available
setSelectedTab(persistedSelectedTab);
setHasRightPanelToggled(persistedIsRightPanelShown);
}, [
setSelectedTab,
setHasRightPanelToggled,
persistedSelectedTab,
persistedIsRightPanelShown,
]);
dispatch(setSelectedTab(persistedSelectedTab));
dispatch(setIsRightPanelShown(persistedIsRightPanelShown));
dispatch(setHasRightPanelToggled(persistedIsRightPanelShown));
}, []);
useEffect(() => {
const handlePanelVisibilityChange = () => {
@@ -71,13 +73,13 @@ export function ConversationTabs() {
const onTabSelected = (tab: ConversationTab) => {
if (selectedTab === tab && isRightPanelShown) {
// If clicking the same active tab, close the drawer
setHasRightPanelToggled(false);
dispatch(setHasRightPanelToggled(false));
setPersistedIsRightPanelShown(false);
} else {
// If clicking a different tab or drawer is closed, open drawer and select tab
onTabChange(tab);
if (!isRightPanelShown) {
setHasRightPanelToggled(true);
dispatch(setHasRightPanelToggled(true));
setPersistedIsRightPanelShown(true);
}
}
@@ -129,8 +131,19 @@ export function ConversationTabs() {
tooltipContent: t(I18nKey.COMMON$BROWSER),
tooltipAriaLabel: t(I18nKey.COMMON$BROWSER),
},
{
isActive: isTabActive("tasks"),
icon: ListIcon,
onClick: () => onTabSelected("tasks"),
tooltipContent: t(I18nKey.COMMON$TASKS),
tooltipAriaLabel: t(I18nKey.COMMON$TASKS),
},
];
// Debug logging to help troubleshoot tab visibility
console.log("ConversationTabs: Rendering", tabs.length, "tabs");
console.log("ConversationTabs: Tasks tab config:", tabs[tabs.length - 1]);
return (
<div
className={cn(
@@ -152,6 +165,7 @@ export function ConversationTabs() {
icon={icon}
onClick={onClick}
isActive={isActive}
data-testid={index === tabs.length - 1 ? "tasks-tab" : undefined}
/>
</ChatActionTooltip>
),
@@ -1,5 +1,5 @@
import React from "react";
import { Cell } from "#/state/jupyter-store";
import { Cell } from "#/state/jupyter-slice";
import { JupyterLine, parseCellContent } from "#/utils/parse-cell-content";
import { JupytrerCellInput } from "./jupyter-cell-input";
import { JupyterCellOutput } from "./jupyter-cell-output";
@@ -9,14 +9,13 @@ import { RUNTIME_INACTIVE_STATES } from "#/types/agent-state";
import { I18nKey } from "#/i18n/declaration";
import JupyterLargeIcon from "#/icons/jupyter-large.svg?react";
import { WaitingForRuntimeMessage } from "../chat/waiting-for-runtime-message";
import { useJupyterStore } from "#/state/jupyter-store";
interface JupyterEditorProps {
maxWidth: number;
}
export function JupyterEditor({ maxWidth }: JupyterEditorProps) {
const cells = useJupyterStore((state) => state.cells);
const cells = useSelector((state: RootState) => state.jupyter?.cells ?? []);
const { curAgentState } = useSelector((state: RootState) => state.agent);
const jupyterRef = React.useRef<HTMLDivElement>(null);
@@ -1,6 +1,11 @@
import { useTranslation } from "react-i18next";
import { useDispatch, useSelector } from "react-redux";
import { I18nKey } from "#/i18n/declaration";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import {
setAddMicroagentModalVisible,
setSelectedRepository,
} from "#/state/microagent-management-slice";
import { RootState } from "#/store";
import { GitRepository } from "#/types/git";
interface MicroagentManagementAddMicroagentButtonProps {
@@ -12,16 +17,16 @@ export function MicroagentManagementAddMicroagentButton({
}: MicroagentManagementAddMicroagentButtonProps) {
const { t } = useTranslation();
const {
addMicroagentModalVisible,
setAddMicroagentModalVisible,
setSelectedRepository,
} = useMicroagentManagementStore();
const { addMicroagentModalVisible } = useSelector(
(state: RootState) => state.microagentManagement,
);
const dispatch = useDispatch();
const handleClick = (e: React.MouseEvent<HTMLButtonElement>) => {
e.stopPropagation();
setAddMicroagentModalVisible(!addMicroagentModalVisible);
setSelectedRepository(repository);
dispatch(setAddMicroagentModalVisible(!addMicroagentModalVisible));
dispatch(setSelectedRepository(repository));
};
return (
@@ -1,9 +1,15 @@
import React, { useEffect, useState } from "react";
import { useTranslation } from "react-i18next";
import { useDispatch, useSelector } from "react-redux";
import { MicroagentManagementSidebar } from "./microagent-management-sidebar";
import { MicroagentManagementMain } from "./microagent-management-main";
import { MicroagentManagementUpsertMicroagentModal } from "./microagent-management-upsert-microagent-modal";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import { RootState } from "#/store";
import {
setAddMicroagentModalVisible,
setUpdateMicroagentModalVisible,
setLearnThisRepoModalVisible,
} from "#/state/microagent-management-slice";
import { useCreateConversationAndSubscribeMultiple } from "#/hooks/use-create-conversation-and-subscribe-multiple";
import {
LearnThisRepoFormData,
@@ -100,15 +106,14 @@ export function MicroagentManagementContent() {
updateMicroagentModalVisible,
selectedRepository,
learnThisRepoModalVisible,
setAddMicroagentModalVisible,
setUpdateMicroagentModalVisible,
setLearnThisRepoModalVisible,
} = useMicroagentManagementStore();
} = useSelector((state: RootState) => state.microagentManagement);
const { providers } = useUserProviders();
const { t } = useTranslation();
const dispatch = useDispatch();
const { createConversationAndSubscribe, isPending } =
useCreateConversationAndSubscribeMultiple();
@@ -125,9 +130,9 @@ export function MicroagentManagementContent() {
const hideUpsertMicroagentModal = (isUpdate: boolean = false) => {
if (isUpdate) {
setUpdateMicroagentModalVisible(false);
dispatch(setUpdateMicroagentModalVisible(false));
} else {
setAddMicroagentModalVisible(false);
dispatch(setAddMicroagentModalVisible(false));
}
};
@@ -259,7 +264,7 @@ export function MicroagentManagementContent() {
};
const hideLearnThisRepoModal = () => {
setLearnThisRepoModalVisible(false);
dispatch(setLearnThisRepoModalVisible(false));
};
const handleLearnThisRepoConfirm = (formData: LearnThisRepoFormData) => {
@@ -1,13 +1,16 @@
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { RootState } from "#/store";
import { I18nKey } from "#/i18n/declaration";
import { BrandButton } from "../settings/brand-button";
import { Loader } from "#/components/shared/loader";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
export function MicroagentManagementConversationStopped() {
const { t } = useTranslation();
const { selectedMicroagentItem } = useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const { conversation } = selectedMicroagentItem ?? {};
@@ -1,13 +1,16 @@
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { RootState } from "#/store";
import { I18nKey } from "#/i18n/declaration";
import { BrandButton } from "../settings/brand-button";
import { Loader } from "#/components/shared/loader";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
export function MicroagentManagementError() {
const { t } = useTranslation();
const { selectedMicroagentItem } = useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const { conversation } = selectedMicroagentItem ?? {};
@@ -1,11 +1,12 @@
import { useState } from "react";
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { FaCircleInfo } from "react-icons/fa6";
import { ModalBackdrop } from "#/components/shared/modals/modal-backdrop";
import { ModalBody } from "#/components/shared/modals/modal-body";
import { BrandButton } from "../settings/brand-button";
import { I18nKey } from "#/i18n/declaration";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import { RootState } from "#/store";
import XIcon from "#/icons/x.svg?react";
import { cn, getRepoMdCreatePrompt } from "#/utils/utils";
import { LearnThisRepoFormData } from "#/types/microagent-management";
@@ -25,7 +26,9 @@ export function MicroagentManagementLearnThisRepoModal({
const [query, setQuery] = useState<string>("");
const { selectedRepository } = useMicroagentManagementStore();
const { selectedRepository } = useSelector(
(state: RootState) => state.microagentManagement,
);
const onSubmit = (event: React.FormEvent<HTMLFormElement>) => {
event.preventDefault();
@@ -1,6 +1,10 @@
import { useDispatch } from "react-redux";
import { useTranslation } from "react-i18next";
import { I18nKey } from "#/i18n/declaration";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import {
setLearnThisRepoModalVisible,
setSelectedRepository,
} from "#/state/microagent-management-slice";
import { GitRepository } from "#/types/git";
interface MicroagentManagementLearnThisRepoProps {
@@ -10,13 +14,12 @@ interface MicroagentManagementLearnThisRepoProps {
export function MicroagentManagementLearnThisRepo({
repository,
}: MicroagentManagementLearnThisRepoProps) {
const { setLearnThisRepoModalVisible, setSelectedRepository } =
useMicroagentManagementStore();
const dispatch = useDispatch();
const { t } = useTranslation();
const handleClick = () => {
setLearnThisRepoModalVisible(true);
setSelectedRepository(repository);
dispatch(setLearnThisRepoModalVisible(true));
dispatch(setSelectedRepository(repository));
};
return (
@@ -1,4 +1,5 @@
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import { useSelector } from "react-redux";
import { RootState } from "#/store";
import { MicroagentManagementDefault } from "./microagent-management-default";
import { MicroagentManagementOpeningPr } from "./microagent-management-opening-pr";
import { MicroagentManagementReviewPr } from "./microagent-management-review-pr";
@@ -7,7 +8,9 @@ import { MicroagentManagementError } from "./microagent-management-error";
import { MicroagentManagementConversationStopped } from "./microagent-management-conversation-stopped";
export function MicroagentManagementMain() {
const { selectedMicroagentItem } = useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const { microagent, conversation } = selectedMicroagentItem ?? {};
@@ -1,9 +1,14 @@
import { useMemo } from "react";
import { useDispatch, useSelector } from "react-redux";
import { useTranslation } from "react-i18next";
import { I18nKey } from "#/i18n/declaration";
import { RepositoryMicroagent } from "#/types/microagent-management";
import { Conversation } from "#/api/open-hands.types";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import {
setSelectedMicroagentItem,
setSelectedRepository,
} from "#/state/microagent-management-slice";
import { RootState } from "#/store";
import { cn } from "#/utils/utils";
import { GitRepository } from "#/types/git";
@@ -20,11 +25,11 @@ export function MicroagentManagementMicroagentCard({
}: MicroagentManagementMicroagentCardProps) {
const { t } = useTranslation();
const {
selectedMicroagentItem,
setSelectedMicroagentItem,
setSelectedRepository,
} = useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const dispatch = useDispatch();
const {
status: conversationStatus,
@@ -78,18 +83,20 @@ export function MicroagentManagementMicroagentCard({
}, [microagent, conversation, selectedMicroagentItem]);
const onMicroagentCardClicked = () => {
setSelectedMicroagentItem(
microagent
? {
microagent,
conversation: undefined,
}
: {
microagent: undefined,
conversation,
},
dispatch(
setSelectedMicroagentItem(
microagent
? {
microagent,
conversation: null,
}
: {
microagent: null,
conversation,
},
),
);
setSelectedRepository(repository);
dispatch(setSelectedRepository(repository));
};
return (
@@ -1,13 +1,16 @@
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { RootState } from "#/store";
import { I18nKey } from "#/i18n/declaration";
import { BrandButton } from "../settings/brand-button";
import { Loader } from "#/components/shared/loader";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
export function MicroagentManagementOpeningPr() {
const { t } = useTranslation();
const { selectedMicroagentItem } = useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const { conversation } = selectedMicroagentItem ?? {};
@@ -1,12 +1,14 @@
import { useTranslation } from "react-i18next";
import { useEffect } from "react";
import { useDispatch, useSelector } from "react-redux";
import { Spinner } from "@heroui/react";
import { MicroagentManagementMicroagentCard } from "./microagent-management-microagent-card";
import { MicroagentManagementLearnThisRepo } from "./microagent-management-learn-this-repo";
import { useRepositoryMicroagents } from "#/hooks/query/use-repository-microagents";
import { useMicroagentManagementConversations } from "#/hooks/query/use-microagent-management-conversations";
import { GitRepository } from "#/types/git";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import { RootState } from "#/store";
import { setSelectedMicroagentItem } from "#/state/microagent-management-slice";
import { cn } from "#/utils/utils";
import { I18nKey } from "#/i18n/declaration";
@@ -17,8 +19,11 @@ interface MicroagentManagementRepoMicroagentsProps {
export function MicroagentManagementRepoMicroagents({
repository,
}: MicroagentManagementRepoMicroagentsProps) {
const { selectedMicroagentItem, setSelectedMicroagentItem } =
useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const dispatch = useDispatch();
const { t } = useTranslation();
@@ -55,22 +60,26 @@ export function MicroagentManagementRepoMicroagents({
conversation.conversation_id === selectedConversation.conversation_id,
);
if (latestSelectedConversation) {
setSelectedMicroagentItem({
microagent: undefined,
conversation: latestSelectedConversation,
});
dispatch(
setSelectedMicroagentItem({
microagent: null,
conversation: latestSelectedConversation,
}),
);
}
}
}, [conversations]);
useEffect(
() => () => {
setSelectedMicroagentItem({
microagent: undefined,
conversation: undefined,
});
dispatch(
setSelectedMicroagentItem({
microagent: null,
conversation: null,
}),
);
},
[setSelectedMicroagentItem],
[],
);
// Show loading only when both queries are loading
@@ -1,14 +1,17 @@
import { useSelector } from "react-redux";
import { useTranslation } from "react-i18next";
import { I18nKey } from "#/i18n/declaration";
import { BrandButton } from "../settings/brand-button";
import { getProviderName, constructPullRequestUrl } from "#/utils/utils";
import { Provider } from "#/types/settings";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import { RootState } from "#/store";
export function MicroagentManagementReviewPr() {
const { t } = useTranslation();
const { selectedMicroagentItem } = useMicroagentManagementStore();
const { selectedMicroagentItem } = useSelector(
(state: RootState) => state.microagentManagement,
);
const { conversation } = selectedMicroagentItem ?? {};
@@ -1,8 +1,9 @@
import { Tab, Tabs } from "@heroui/react";
import { useTranslation } from "react-i18next";
import { useSelector } from "react-redux";
import { MicroagentManagementRepositories } from "./microagent-management-repositories";
import { I18nKey } from "#/i18n/declaration";
import { useMicroagentManagementStore } from "#/state/microagent-management-store";
import { RootState } from "#/store";
interface MicroagentManagementSidebarTabsProps {
isSearchLoading?: boolean;
@@ -14,7 +15,7 @@ export function MicroagentManagementSidebarTabs({
const { t } = useTranslation();
const { repositories, personalRepositories, organizationRepositories } =
useMicroagentManagementStore();
useSelector((state: RootState) => state.microagentManagement);
return (
<div className="flex w-full flex-col">

Some files were not shown because too many files have changed in this diff Show More