mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
141 Commits
add-postho
...
openhands-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
530153f54b | ||
|
|
89f8e162da | ||
|
|
b0a9938e6c | ||
|
|
039fe295a4 | ||
|
|
8f3ff1210e | ||
|
|
5e1e685493 | ||
|
|
e9f2b72ea5 | ||
|
|
986b90be0a | ||
|
|
bf9f2aa7a5 | ||
|
|
b3bd3924a0 | ||
|
|
0de50153a0 | ||
|
|
a04024a239 | ||
|
|
1e509a70d4 | ||
|
|
300a59853b | ||
|
|
2514b200c5 | ||
|
|
52d881c98d | ||
|
|
6ac23aea80 | ||
|
|
0412949018 | ||
|
|
5b5adc5c7b | ||
|
|
0b40f6fac8 | ||
|
|
44d488b718 | ||
|
|
4f9120ffc6 | ||
|
|
50426edaa1 | ||
|
|
a792f84a83 | ||
|
|
cd9d96766c | ||
|
|
14564b25d6 | ||
|
|
0637b5b912 | ||
|
|
20bf48b693 | ||
|
|
5b3270be2d | ||
|
|
ae43744052 | ||
|
|
b5d7e428d1 | ||
|
|
d65ea313e8 | ||
|
|
a09ecadba6 | ||
|
|
358166feb2 | ||
|
|
85e2b73eb4 | ||
|
|
c18475ddc2 | ||
|
|
06fcf54475 | ||
|
|
f751f8ab37 | ||
|
|
523c6d03c1 | ||
|
|
0e0f043e59 | ||
|
|
91c691d526 | ||
|
|
c6092291ce | ||
|
|
a2c55cfdef | ||
|
|
76cad626ed | ||
|
|
7c23993344 | ||
|
|
b669715416 | ||
|
|
7292122b72 | ||
|
|
992ae15c78 | ||
|
|
f2b4772ac2 | ||
|
|
9b9b1291fc | ||
|
|
6171395ef9 | ||
|
|
d270476d6c | ||
|
|
f1f7dca009 | ||
|
|
45f572f268 | ||
|
|
4b124d5906 | ||
|
|
988d4aa679 | ||
|
|
b452fe273c | ||
|
|
9544b37c8a | ||
|
|
0491357fef | ||
|
|
fedd517a71 | ||
|
|
9cbed8802f | ||
|
|
c2e1babd76 | ||
|
|
cc8b677f3e | ||
|
|
78e3f82de1 | ||
|
|
20ca2cd8b9 | ||
|
|
ea0fcd6002 | ||
|
|
6bcebd4b9d | ||
|
|
93e9db3206 | ||
|
|
caf34d83bd | ||
|
|
49d3cd0863 | ||
|
|
34989f8e96 | ||
|
|
9274664302 | ||
|
|
437f0a0154 | ||
|
|
9d79bf5fff | ||
|
|
4c62b1d428 | ||
|
|
b2a4b4ed90 | ||
|
|
9262babc3b | ||
|
|
1c80ded753 | ||
|
|
4de8c4d6b1 | ||
|
|
91f2254039 | ||
|
|
66fd156c65 | ||
|
|
4ec16f3c2e | ||
|
|
628003abef | ||
|
|
07e400b73d | ||
|
|
7e14a512e0 | ||
|
|
d7e8f843ad | ||
|
|
e69ae81ad2 | ||
|
|
03c5db32e6 | ||
|
|
5e5bf23f9c | ||
|
|
e0fcd7a61e | ||
|
|
e9989d1085 | ||
|
|
49c515b252 | ||
|
|
2d05578c21 | ||
|
|
d05a6f30e1 | ||
|
|
10c81c39fb | ||
|
|
2d599349ef | ||
|
|
33caf5c6ca | ||
|
|
a9850766a7 | ||
|
|
77e2416def | ||
|
|
02af9865ec | ||
|
|
75ca2aa6b1 | ||
|
|
d820661592 | ||
|
|
1ff351a4f1 | ||
|
|
78b8e58561 | ||
|
|
fddbfce51a | ||
|
|
20d3766451 | ||
|
|
72b5e18898 | ||
|
|
03b8b8c19a | ||
|
|
7c2f1b075e | ||
|
|
883da1b28c | ||
|
|
bb98d94b35 | ||
|
|
d114c45135 | ||
|
|
36e092e0ac | ||
|
|
e2bb69908a | ||
|
|
cd33c5eac7 | ||
|
|
0f8a139fb5 | ||
|
|
ced4ee3038 | ||
|
|
cc19dac939 | ||
|
|
4c7c73a6f2 | ||
|
|
0493fea9fc | ||
|
|
1c2db9f468 | ||
|
|
c210230802 | ||
|
|
9b12989a36 | ||
|
|
f1bcd72cd8 | ||
|
|
a5c57fbd3a | ||
|
|
513f7ab7e7 | ||
|
|
53c0c5a07b | ||
|
|
d924e7cea5 | ||
|
|
7910a90522 | ||
|
|
35d49f6941 | ||
|
|
e359a4affa | ||
|
|
159f79f9d8 | ||
|
|
827c19ccd9 | ||
|
|
588b9f34be | ||
|
|
fb02fefaca | ||
|
|
856d5ff976 | ||
|
|
eb4aeb3922 | ||
|
|
4b47e5215b | ||
|
|
0087082643 | ||
|
|
e698a393b2 | ||
|
|
d48e2a4cf1 |
3
.github/ISSUE_TEMPLATE/feature_request.md
vendored
3
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -12,3 +12,6 @@ assignees: ''
|
||||
**Describe the UX or technical implementation you have in mind**
|
||||
|
||||
**Additional context**
|
||||
|
||||
|
||||
### If you find this feature request or enhancement useful, make sure to add a 👍 to the issue
|
||||
|
||||
53
.github/workflows/dummy-agent-test.yml
vendored
53
.github/workflows/dummy-agent-test.yml
vendored
@@ -1,53 +0,0 @@
|
||||
# Workflow that uses the DummyAgent to run a simple task
|
||||
name: Run E2E test with dummy agent
|
||||
|
||||
# Always run on "main"
|
||||
# Always run on PRs
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
# If triggered by a PR, it will be in the same group. However, each commit on main will be in its own unique group
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ (github.head_ref && github.ref) || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: useblacksmith/setup-node@v5
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
uses: useblacksmith/setup-python@v6
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: 'poetry'
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: poetry install --without evaluation
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Run tests
|
||||
run: |
|
||||
set -e
|
||||
SANDBOX_FORCE_REBUILD_RUNTIME=True poetry run python3 openhands/core/main.py -t "do a flip" -d ./workspace/ -c DummyAgent
|
||||
- name: Check exit code
|
||||
run: |
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Test failed"
|
||||
exit 1
|
||||
else
|
||||
echo "Test passed"
|
||||
fi
|
||||
105
.github/workflows/ghcr-build.yml
vendored
105
.github/workflows/ghcr-build.yml
vendored
@@ -29,7 +29,7 @@ env:
|
||||
|
||||
jobs:
|
||||
define-matrix:
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
runs-on: blacksmith
|
||||
outputs:
|
||||
base_image: ${{ steps.define-base-images.outputs.base_image }}
|
||||
steps:
|
||||
@@ -137,38 +137,56 @@ jobs:
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
~/.cache/ms-playwright
|
||||
~/.virtualenvs
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
# This is the one that saves the cache, the others set 'lookup-only: true'
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: make install-python-dependencies
|
||||
run: make install-python-dependencies POETRY_GROUP=main INSTALL_PLAYWRIGHT=0
|
||||
- name: Create source distribution and Dockerfile
|
||||
run: poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ matrix.base_image.image }} --build_folder containers/runtime --force_rebuild
|
||||
- name: Lowercase Repository Owner
|
||||
run: |
|
||||
echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
- name: Short SHA
|
||||
run: |
|
||||
echo SHORT_SHA=$(git rev-parse --short "$RELEVANT_SHA") >> $GITHUB_ENV
|
||||
- name: Determine docker build params
|
||||
if: github.event.pull_request.head.repo.fork != true
|
||||
shell: bash
|
||||
run: |
|
||||
./containers/build.sh -i runtime -o ${{ env.REPO_OWNER }} -t ${{ matrix.base_image.tag }} --dry
|
||||
|
||||
DOCKER_BUILD_JSON=$(jq -c . < docker-build-dry.json)
|
||||
echo "DOCKER_TAGS=$(echo "$DOCKER_BUILD_JSON" | jq -r '.tags | join(",")')" >> $GITHUB_ENV
|
||||
echo "DOCKER_PLATFORM=$(echo "$DOCKER_BUILD_JSON" | jq -r '.platform')" >> $GITHUB_ENV
|
||||
echo "DOCKER_BUILD_ARGS=$(echo "$DOCKER_BUILD_JSON" | jq -r '.build_args | join(",")')" >> $GITHUB_ENV
|
||||
- name: Build and push runtime image ${{ matrix.base_image.image }}
|
||||
if: github.event.pull_request.head.repo.fork != true
|
||||
run: |
|
||||
./containers/build.sh -i runtime -o ${{ env.REPO_OWNER }} --push -t ${{ matrix.base_image.tag }}
|
||||
# Forked repos can't push to GHCR, so we need to upload the image as an artifact
|
||||
uses: useblacksmith/build-push-action@v1
|
||||
with:
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_TAGS }}
|
||||
platforms: ${{ env.DOCKER_PLATFORM }}
|
||||
build-args: ${{ env.DOCKER_BUILD_ARGS }}
|
||||
context: containers/runtime
|
||||
provenance: false
|
||||
# Forked repos can't push to GHCR, so we just build in order to populate the cache for rebuilding
|
||||
- name: Build runtime image ${{ matrix.base_image.image }} for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: docker/build-push-action@v6
|
||||
uses: useblacksmith/build-push-action@v1
|
||||
with:
|
||||
tags: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
|
||||
outputs: type=docker,dest=/tmp/runtime-${{ matrix.base_image.tag }}.tar
|
||||
context: containers/runtime
|
||||
- name: Upload runtime image for fork
|
||||
- name: Upload runtime source for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: runtime-${{ matrix.base_image.tag }}
|
||||
path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
|
||||
name: runtime-src-${{ matrix.base_image.tag }}
|
||||
path: containers/runtime
|
||||
|
||||
verify_hash_equivalence_in_runtime_and_app:
|
||||
name: Verify Hash Equivalence in Runtime and Docker images
|
||||
@@ -189,9 +207,9 @@ jobs:
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
~/.cache/ms-playwright
|
||||
~/.virtualenvs
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
lookup-only: true
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Set up Python
|
||||
@@ -201,7 +219,7 @@ jobs:
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: make install-python-dependencies
|
||||
run: make install-python-dependencies POETRY_GROUP=main INSTALL_PLAYWRIGHT=0
|
||||
- name: Get hash in App Image
|
||||
run: |
|
||||
echo "Hash from app image: ${{ needs.ghcr_build_app.outputs.hash_from_app_image }}"
|
||||
@@ -229,7 +247,7 @@ jobs:
|
||||
test_runtime_root:
|
||||
name: RT Unit Tests (Root)
|
||||
needs: [ghcr_build_runtime, define-matrix]
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2204
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -239,25 +257,31 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
# Forked repos can't push to GHCR, so we need to download the image as an artifact
|
||||
- name: Download runtime image for fork
|
||||
- name: Download runtime source for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: runtime-${{ matrix.base_image.tag }}
|
||||
path: /tmp
|
||||
- name: Load runtime image for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
name: runtime-src-${{ matrix.base_image.tag }}
|
||||
path: containers/runtime
|
||||
- name: Lowercase Repository Owner
|
||||
run: |
|
||||
docker load --input /tmp/runtime-${{ matrix.base_image.tag }}.tar
|
||||
echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
# Forked repos can't push to GHCR, so we need to rebuild using cache
|
||||
- name: Build runtime image ${{ matrix.base_image.image }} for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: useblacksmith/build-push-action@v1
|
||||
with:
|
||||
load: true
|
||||
tags: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
|
||||
context: containers/runtime
|
||||
- name: Cache Poetry dependencies
|
||||
uses: useblacksmith/cache@v5
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
~/.cache/ms-playwright
|
||||
~/.virtualenvs
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
lookup-only: true
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Set up Python
|
||||
@@ -267,10 +291,7 @@ jobs:
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: make install-python-dependencies
|
||||
- name: Lowercase Repository Owner
|
||||
run: |
|
||||
echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
run: make install-python-dependencies POETRY_GROUP=main,test,runtime INSTALL_PLAYWRIGHT=0
|
||||
- name: Run docker runtime tests
|
||||
run: |
|
||||
# We install pytest-xdist in order to run tests across CPUs
|
||||
@@ -289,7 +310,7 @@ jobs:
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=false \
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
|
||||
poetry run pytest -n 7 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
@@ -298,7 +319,7 @@ jobs:
|
||||
# Run unit tests with the Docker runtime Docker images as openhands user
|
||||
test_runtime_oh:
|
||||
name: RT Unit Tests (openhands)
|
||||
runs-on: blacksmith-4vcpu-ubuntu-2204
|
||||
runs-on: blacksmith-8vcpu-ubuntu-2204
|
||||
needs: [ghcr_build_runtime, define-matrix]
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -308,17 +329,23 @@ jobs:
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
# Forked repos can't push to GHCR, so we need to download the image as an artifact
|
||||
- name: Download runtime image for fork
|
||||
- name: Download runtime source for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: runtime-${{ matrix.base_image.tag }}
|
||||
path: /tmp
|
||||
- name: Load runtime image for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
name: runtime-src-${{ matrix.base_image.tag }}
|
||||
path: containers/runtime
|
||||
- name: Lowercase Repository Owner
|
||||
run: |
|
||||
docker load --input /tmp/runtime-${{ matrix.base_image.tag }}.tar
|
||||
echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
# Forked repos can't push to GHCR, so we need to rebuild using cache
|
||||
- name: Build runtime image ${{ matrix.base_image.image }} for fork
|
||||
if: github.event.pull_request.head.repo.fork
|
||||
uses: useblacksmith/build-push-action@v1
|
||||
with:
|
||||
load: true
|
||||
tags: ghcr.io/${{ env.REPO_OWNER }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image.tag }}
|
||||
context: containers/runtime
|
||||
- name: Cache Poetry dependencies
|
||||
uses: useblacksmith/cache@v5
|
||||
with:
|
||||
@@ -326,6 +353,7 @@ jobs:
|
||||
~/.cache/pypoetry
|
||||
~/.virtualenvs
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
lookup-only: true
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Set up Python
|
||||
@@ -335,10 +363,7 @@ jobs:
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: make install-python-dependencies
|
||||
- name: Lowercase Repository Owner
|
||||
run: |
|
||||
echo REPO_OWNER=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
|
||||
run: make install-python-dependencies POETRY_GROUP=main,test,runtime INSTALL_PLAYWRIGHT=0
|
||||
- name: Run runtime tests
|
||||
run: |
|
||||
# We install pytest-xdist in order to run tests across CPUs
|
||||
@@ -354,7 +379,7 @@ jobs:
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=true \
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
|
||||
poetry run pytest -n 7 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py --durations=10
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
|
||||
11
.github/workflows/openhands-resolver.yml
vendored
11
.github/workflows/openhands-resolver.yml
vendored
@@ -16,6 +16,11 @@ on:
|
||||
type: string
|
||||
default: "main"
|
||||
description: "Target branch to pull and create PR against"
|
||||
pr_type:
|
||||
required: false
|
||||
type: string
|
||||
default: "draft"
|
||||
description: "The PR type that is going to be created (draft, ready)"
|
||||
LLM_MODEL:
|
||||
required: false
|
||||
type: string
|
||||
@@ -174,7 +179,7 @@ jobs:
|
||||
|
||||
echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
|
||||
echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.PAT_TOKEN || github.token }}" >> $GITHUB_ENV
|
||||
echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
|
||||
echo "SANDBOX_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
|
||||
|
||||
# Set branch variables
|
||||
echo "TARGET_BRANCH=${{ inputs.target_branch || 'main' }}" >> $GITHUB_ENV
|
||||
@@ -280,9 +285,9 @@ jobs:
|
||||
cd /tmp && python -m openhands.resolver.send_pull_request \
|
||||
--issue-number ${{ env.ISSUE_NUMBER }} \
|
||||
--target-branch ${{ env.TARGET_BRANCH }} \
|
||||
--pr-type draft \
|
||||
--pr-type ${{ inputs.pr_type || 'draft' }} \
|
||||
--reviewer ${{ github.actor }} | tee pr_result.txt && \
|
||||
grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
|
||||
grep "PR created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
|
||||
else
|
||||
cd /tmp && python -m openhands.resolver.send_pull_request \
|
||||
--issue-number ${{ env.ISSUE_NUMBER }} \
|
||||
|
||||
@@ -54,3 +54,20 @@ Frontend:
|
||||
## Template for Github Pull Request
|
||||
|
||||
If you are starting a pull request (PR), please follow the template in `.github/pull_request_template.md`.
|
||||
|
||||
## Implementation Details
|
||||
|
||||
These details may or may not be useful for your current task.
|
||||
|
||||
### Frontend
|
||||
|
||||
#### Action Handling:
|
||||
- Actions are defined in `frontend/src/types/action-type.ts`
|
||||
- The `HANDLED_ACTIONS` array in `frontend/src/state/chat-slice.ts` determines which actions are displayed as collapsible UI elements
|
||||
- To add a new action type to the UI:
|
||||
1. Add the action type to the `HANDLED_ACTIONS` array
|
||||
2. Implement the action handling in `addAssistantAction` function in chat-slice.ts
|
||||
3. Add a translation key in the format `ACTION_MESSAGE$ACTION_NAME` to the i18n files
|
||||
- Actions with `thought` property are displayed in the UI based on their action type:
|
||||
- Regular actions (like "run", "edit") display the thought as a separate message
|
||||
- Special actions (like "think") are displayed as collapsible elements only
|
||||
|
||||
@@ -118,7 +118,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.31-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.33-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
35
Makefile
35
Makefile
@@ -133,20 +133,29 @@ install-python-dependencies:
|
||||
export HNSWLIB_NO_NATIVE=1; \
|
||||
poetry run pip install chroma-hnswlib; \
|
||||
fi
|
||||
@poetry install
|
||||
@if [ -f "/etc/manjaro-release" ]; then \
|
||||
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
|
||||
poetry run pip install playwright; \
|
||||
poetry run playwright install chromium; \
|
||||
@if [ -n "${POETRY_GROUP}" ]; then \
|
||||
echo "Installing only POETRY_GROUP=${POETRY_GROUP}"; \
|
||||
poetry install --only $${POETRY_GROUP}; \
|
||||
else \
|
||||
if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
|
||||
echo "Running playwright install --with-deps chromium..."; \
|
||||
poetry run playwright install --with-deps chromium; \
|
||||
mkdir -p cache; \
|
||||
touch cache/playwright_chromium_is_installed.txt; \
|
||||
poetry install; \
|
||||
fi
|
||||
@if [ "${INSTALL_PLAYWRIGHT}" != "false" ] && [ "${INSTALL_PLAYWRIGHT}" != "0" ]; then \
|
||||
if [ -f "/etc/manjaro-release" ]; then \
|
||||
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
|
||||
poetry run pip install playwright; \
|
||||
poetry run playwright install chromium; \
|
||||
else \
|
||||
echo "Setup already done. Skipping playwright installation."; \
|
||||
if [ ! -f cache/playwright_chromium_is_installed.txt ]; then \
|
||||
echo "Running playwright install --with-deps chromium..."; \
|
||||
poetry run playwright install --with-deps chromium; \
|
||||
mkdir -p cache; \
|
||||
touch cache/playwright_chromium_is_installed.txt; \
|
||||
else \
|
||||
echo "Setup already done. Skipping playwright installation."; \
|
||||
fi \
|
||||
fi \
|
||||
else \
|
||||
echo "Skipping Playwright installation (INSTALL_PLAYWRIGHT=${INSTALL_PLAYWRIGHT})."; \
|
||||
fi
|
||||
@echo "$(GREEN)Python dependencies installed successfully.$(RESET)"
|
||||
|
||||
@@ -166,7 +175,7 @@ install-pre-commit-hooks:
|
||||
|
||||
lint-backend:
|
||||
@echo "$(YELLOW)Running linters...$(RESET)"
|
||||
@poetry run pre-commit run --files openhands/**/* agenthub/**/* evaluation/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
|
||||
@poetry run pre-commit run --files openhands/**/* evaluation/**/* tests/**/* --show-diff-on-failure --config $(PRE_COMMIT_CONFIG_PATH)
|
||||
|
||||
lint-frontend:
|
||||
@echo "$(YELLOW)Running linters for frontend...$(RESET)"
|
||||
@@ -185,7 +194,7 @@ test:
|
||||
|
||||
build-frontend:
|
||||
@echo "$(YELLOW)Building frontend...$(RESET)"
|
||||
@cd frontend && npm run build
|
||||
@cd frontend && npm run prepare && npm run build
|
||||
|
||||
# Start backend
|
||||
start-backend:
|
||||
|
||||
47
README.md
47
README.md
@@ -18,7 +18,7 @@
|
||||
<br/>
|
||||
<a href="https://docs.all-hands.dev/modules/usage/getting-started"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
|
||||
<a href="https://arxiv.org/abs/2407.16741"><img src="https://img.shields.io/badge/Paper%20on%20Arxiv-000?logoColor=FFE165&logo=arxiv&style=for-the-badge" alt="Paper on Arxiv"></a>
|
||||
<a href="https://huggingface.co/spaces/OpenHands/evaluation"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
|
||||
<a href="https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0#gid=0"><img src="https://img.shields.io/badge/Benchmark%20score-000?logoColor=FFE165&logo=huggingface&style=for-the-badge" alt="Evaluation Benchmark Score"></a>
|
||||
<hr>
|
||||
</div>
|
||||
|
||||
@@ -27,7 +27,7 @@ Welcome to OpenHands (formerly OpenDevin), a platform for software development a
|
||||
OpenHands agents can do anything a human developer can: modify code, run commands, browse the web,
|
||||
call APIs, and yes—even copy code snippets from StackOverflow.
|
||||
|
||||
Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [Quick Start](#-quick-start).
|
||||
Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or [sign up for OpenHands Cloud](https://app.all-hands.dev) to get started.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Using OpenHands for work? We'd love to chat! Fill out
|
||||
@@ -36,37 +36,50 @@ Learn more at [docs.all-hands.dev](https://docs.all-hands.dev), or jump to the [
|
||||
|
||||

|
||||
|
||||
## ⚡ Quick Start
|
||||
## ☁️ OpenHands Cloud
|
||||
The easiest way to get started with OpenHands is on [OpenHands Cloud](https://app.all-hands.dev),
|
||||
which comes with $50 in free credits for new users.
|
||||
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
## 💻 Running OpenHands Locally
|
||||
|
||||
OpenHands can also run on your local system using Docker.
|
||||
See the [Running OpenHands](https://docs.all-hands.dev/modules/usage/installation) guide for
|
||||
system requirements and more information.
|
||||
|
||||
> [!WARNING]
|
||||
> On a public network? See our [Hardened Docker Installation Guide](https://docs.all-hands.dev/modules/usage/runtimes/docker#hardened-docker-installation)
|
||||
> to secure your deployment by restricting network binding and implementing additional security measures.
|
||||
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> On a public network? See our [Hardened Docker Installation](https://docs.all-hands.dev/modules/usage/runtimes/docker#hardened-docker-installation) guide
|
||||
> to secure your deployment by restricting network binding and implementing additional security measures.
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
Finally, you'll need a model provider and API key.
|
||||
When you open the application, you'll be asked to choose an LLM provider and add an API key.
|
||||
[Anthropic's Claude 3.5 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-5-sonnet-20241022`)
|
||||
works best, but you have [many options](https://docs.all-hands.dev/modules/usage/llms).
|
||||
|
||||
---
|
||||
## 💡 Other ways to run OpenHands
|
||||
|
||||
> [!CAUTION]
|
||||
> OpenHands is meant to be run by a single user on their local workstation.
|
||||
> It is not appropriate for multi-tenant deployments where multiple users share the same instance. There is no built-in authentication, isolation, or scalability.
|
||||
>
|
||||
> If you're interested in running OpenHands in a multi-tenant environment, please
|
||||
> [get in touch with us](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
|
||||
> for advanced deployment options.
|
||||
|
||||
You can also [connect OpenHands to your local filesystem](https://docs.all-hands.dev/modules/usage/runtimes/docker#connecting-to-your-filesystem),
|
||||
run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode),
|
||||
@@ -75,14 +88,6 @@ or run it on tagged issues with [a github action](https://docs.all-hands.dev/mod
|
||||
|
||||
Visit [Running OpenHands](https://docs.all-hands.dev/modules/usage/installation) for more information and setup instructions.
|
||||
|
||||
> [!CAUTION]
|
||||
> OpenHands is meant to be run by a single user on their local workstation.
|
||||
> It is not appropriate for multi-tenant deployments where multiple users share the same instance. There is no built-in isolation or scalability.
|
||||
>
|
||||
> If you're interested in running OpenHands in a multi-tenant environment, please
|
||||
> [get in touch with us](https://docs.google.com/forms/d/e/1FAIpQLSet3VbGaz8z32gW9Wm-Grl4jpt5WgMXPgJ4EDPVmCETCBpJtQ/viewform)
|
||||
> for advanced deployment options.
|
||||
|
||||
If you want to modify the OpenHands source code, check out [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).
|
||||
|
||||
Having issues? The [Troubleshooting Guide](https://docs.all-hands.dev/modules/usage/troubleshooting) can help.
|
||||
|
||||
@@ -216,13 +216,26 @@ model = "gpt-4o"
|
||||
[agent]
|
||||
|
||||
# Whether the browsing tool is enabled
|
||||
codeact_enable_browsing = true
|
||||
enable_browsing = true
|
||||
|
||||
# Whether the LLM draft editor is enabled
|
||||
codeact_enable_llm_editor = false
|
||||
enable_llm_editor = false
|
||||
|
||||
# Whether the standard editor tool (str_replace_editor) is enabled
|
||||
# Only has an effect if enable_llm_editor is False
|
||||
enable_editor = true
|
||||
|
||||
# Whether the IPython tool is enabled
|
||||
codeact_enable_jupyter = true
|
||||
enable_jupyter = true
|
||||
|
||||
# Whether the command tool is enabled
|
||||
enable_cmd = true
|
||||
|
||||
# Whether the think tool is enabled
|
||||
enable_think = true
|
||||
|
||||
# Whether the finish tool is enabled
|
||||
enable_finish = true
|
||||
|
||||
# LLM config group to use
|
||||
#llm_config = 'your-llm-config-group'
|
||||
|
||||
@@ -7,15 +7,17 @@ org_name=""
|
||||
push=0
|
||||
load=0
|
||||
tag_suffix=""
|
||||
dry_run=0
|
||||
|
||||
# Function to display usage information
|
||||
usage() {
|
||||
echo "Usage: $0 -i <image_name> [-o <org_name>] [--push] [--load] [-t <tag_suffix>]"
|
||||
echo "Usage: $0 -i <image_name> [-o <org_name>] [--push] [--load] [-t <tag_suffix>] [--dry]"
|
||||
echo " -i: Image name (required)"
|
||||
echo " -o: Organization name"
|
||||
echo " --push: Push the image"
|
||||
echo " --load: Load the image"
|
||||
echo " -t: Tag suffix"
|
||||
echo " --dry: Don't build, only create build-args.json"
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -27,6 +29,7 @@ while [[ $# -gt 0 ]]; do
|
||||
--push) push=1; shift ;;
|
||||
--load) load=1; shift ;;
|
||||
-t) tag_suffix="$2"; shift 2 ;;
|
||||
--dry) dry_run=1; shift ;;
|
||||
*) usage ;;
|
||||
esac
|
||||
done
|
||||
@@ -113,10 +116,13 @@ echo "Repo: $DOCKER_REPOSITORY"
|
||||
echo "Base dir: $DOCKER_BASE_DIR"
|
||||
|
||||
args=""
|
||||
full_tags=()
|
||||
for tag in "${tags[@]}"; do
|
||||
args+=" -t $DOCKER_REPOSITORY:$tag"
|
||||
full_tags+=("$DOCKER_REPOSITORY:$tag")
|
||||
done
|
||||
|
||||
|
||||
if [[ $push -eq 1 ]]; then
|
||||
args+=" --push"
|
||||
args+=" --cache-to=type=registry,ref=$DOCKER_REPOSITORY:$cache_tag,mode=max"
|
||||
@@ -136,6 +142,26 @@ else
|
||||
# For push or without load, build for multiple platforms
|
||||
platform="linux/amd64,linux/arm64"
|
||||
fi
|
||||
if [[ $dry_run -eq 1 ]]; then
|
||||
echo "Dry Run is enabled. Writing build config to docker-build-dry.json"
|
||||
jq -n \
|
||||
--argjson tags "$(printf '%s\n' "${full_tags[@]}" | jq -R . | jq -s .)" \
|
||||
--arg platform "$platform" \
|
||||
--arg openhands_build_version "$OPENHANDS_BUILD_VERSION" \
|
||||
--arg dockerfile "$dir/Dockerfile" \
|
||||
'{
|
||||
tags: $tags,
|
||||
platform: $platform,
|
||||
build_args: [
|
||||
"OPENHANDS_BUILD_VERSION=" + $openhands_build_version
|
||||
],
|
||||
dockerfile: $dockerfile
|
||||
}' > docker-build-dry.json
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
|
||||
echo "Building for platform(s): $platform"
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.31-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.33-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -354,12 +354,12 @@ Les options de configuration de l'agent sont définies dans les sections `[agent
|
||||
- Valeur par défaut : `true`
|
||||
- Description : Si l'appel de fonction est activé
|
||||
|
||||
- `codeact_enable_browsing`
|
||||
- `enable_browsing`
|
||||
- Type : `bool`
|
||||
- Valeur par défaut : `false`
|
||||
- Description : Si le délégué de navigation est activé dans l'espace d'action (fonctionne uniquement avec l'appel de fonction)
|
||||
|
||||
- `codeact_enable_llm_editor`
|
||||
- `enable_llm_editor`
|
||||
- Type : `bool`
|
||||
- Valeur par défaut : `false`
|
||||
- Description : Si l'éditeur LLM est activé dans l'espace d'action (fonctionne uniquement avec l'appel de fonction)
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -21,14 +21,18 @@ OpenHands fournit un mode Interface Graphique (GUI) convivial pour interagir ave
|
||||
3. Entrez la `Clé API` correspondante pour le fournisseur choisi.
|
||||
4. Cliquez sur "Enregistrer" pour appliquer les paramètres.
|
||||
|
||||
### Configuration du Jeton GitHub
|
||||
### Jetons de Contrôle de Version
|
||||
|
||||
OpenHands prend en charge plusieurs fournisseurs de contrôle de version. Vous pouvez configurer des jetons pour plusieurs fournisseurs simultanément.
|
||||
|
||||
#### Configuration du Jeton GitHub
|
||||
|
||||
OpenHands exporte automatiquement un `GITHUB_TOKEN` vers l'environnement shell s'il est disponible. Cela peut se produire de deux manières :
|
||||
|
||||
1. **Localement (OSS)** : L'utilisateur saisit directement son jeton GitHub
|
||||
2. **En ligne (SaaS)** : Le jeton est obtenu via l'authentification OAuth GitHub
|
||||
|
||||
#### Configuration d'un Jeton GitHub Local
|
||||
##### Configuration d'un Jeton GitHub Local
|
||||
|
||||
1. **Générer un Personal Access Token (PAT)** :
|
||||
- Allez dans Paramètres GitHub > Paramètres développeur > Personal Access Tokens > Tokens (classique)
|
||||
@@ -40,11 +44,11 @@ OpenHands exporte automatiquement un `GITHUB_TOKEN` vers l'environnement shell s
|
||||
|
||||
2. **Entrer le Jeton dans OpenHands** :
|
||||
- Cliquez sur le bouton Paramètres (icône d'engrenage) en haut à droite
|
||||
- Accédez à la section "GitHub"
|
||||
- Accédez à la section "Git Provider Settings"
|
||||
- Collez votre jeton dans le champ "Jeton GitHub"
|
||||
- Cliquez sur "Enregistrer" pour appliquer les modifications
|
||||
|
||||
#### Politiques de Jetons Organisationnels
|
||||
##### Politiques de Jetons Organisationnels
|
||||
|
||||
Si vous travaillez avec des dépôts organisationnels, une configuration supplémentaire peut être nécessaire :
|
||||
|
||||
@@ -59,7 +63,7 @@ Si vous travaillez avec des dépôts organisationnels, une configuration supplé
|
||||
- Si nécessaire, cliquez sur "Activer SSO" à côté de votre organisation
|
||||
- Terminez le processus d'autorisation SSO
|
||||
|
||||
#### Authentification OAuth (Mode En Ligne)
|
||||
##### Authentification OAuth (Mode En Ligne)
|
||||
|
||||
Lorsque vous utilisez OpenHands en mode en ligne, le flux OAuth GitHub :
|
||||
|
||||
@@ -74,7 +78,7 @@ Lorsque vous utilisez OpenHands en mode en ligne, le flux OAuth GitHub :
|
||||
- Autorisez OpenHands à accéder à votre compte GitHub
|
||||
- Si vous utilisez une organisation, autorisez l'accès à l'organisation si vous y êtes invité
|
||||
|
||||
#### Dépannage
|
||||
##### Dépannage
|
||||
|
||||
Problèmes courants et solutions :
|
||||
|
||||
@@ -95,6 +99,43 @@ Problèmes courants et solutions :
|
||||
- Vérifiez la console du navigateur pour tout message d'erreur
|
||||
- Utilisez le bouton "Tester la connexion" dans les paramètres s'il est disponible
|
||||
|
||||
#### Configuration du Jeton GitLab
|
||||
|
||||
OpenHands exporte automatiquement un `GITLAB_TOKEN` vers l'environnement shell, uniquement pour les installations locales, s'il est disponible.
|
||||
|
||||
##### Configuration d'un Jeton GitLab
|
||||
|
||||
1. **Générer un Personal Access Token (PAT)** :
|
||||
- Sur GitLab, allez dans Paramètres utilisateur > Jetons d'accès
|
||||
- Créez un nouveau jeton avec les portées suivantes :
|
||||
- `api` (Accès API)
|
||||
- `read_user` (Lecture des informations utilisateur)
|
||||
- `read_repository` (Lecture du dépôt)
|
||||
- `write_repository` (Écriture du dépôt)
|
||||
- Définissez une date d'expiration ou laissez vide pour un jeton sans expiration
|
||||
|
||||
2. **Entrer le Jeton dans OpenHands** :
|
||||
- Cliquez sur le bouton Paramètres (icône d'engrenage)
|
||||
- Accédez à la section `Git Provider Settings`
|
||||
- Collez votre jeton dans le champ `Jeton GitLab`
|
||||
- Si vous utilisez GitLab auto-hébergé, entrez l'URL de votre instance GitLab
|
||||
- Cliquez sur `Enregistrer les modifications` pour appliquer les changements
|
||||
|
||||
##### Dépannage
|
||||
|
||||
Problèmes courants et solutions :
|
||||
|
||||
1. **Jeton Non Reconnu** :
|
||||
- Assurez-vous que le jeton est correctement enregistré dans les paramètres
|
||||
- Vérifiez que le jeton n'a pas expiré
|
||||
- Vérifiez que le jeton a les portées requises
|
||||
- Pour les instances auto-hébergées, vérifiez l'URL correcte de l'instance
|
||||
|
||||
2. **Accès Refusé** :
|
||||
- Vérifiez les permissions d'accès au projet
|
||||
- Vérifiez si le jeton possède les portées nécessaires
|
||||
- Pour les dépôts de groupe/organisation, assurez-vous d'avoir les accès appropriés
|
||||
|
||||
### Paramètres Avancés
|
||||
|
||||
1. Basculez sur `Options Avancées` pour accéder aux paramètres supplémentaires.
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -348,12 +348,12 @@ dockerコマンドで使用する場合は、`-e LLM_<option>`として渡しま
|
||||
- デフォルト値: `true`
|
||||
- 説明: 関数呼び出しが有効かどうか
|
||||
|
||||
- `codeact_enable_browsing`
|
||||
- `enable_browsing`
|
||||
- 型: `bool`
|
||||
- デフォルト値: `false`
|
||||
- 説明: アクションスペースでブラウジングデリゲートが有効かどうか(関数呼び出しでのみ機能)
|
||||
|
||||
- `codeact_enable_llm_editor`
|
||||
- `enable_llm_editor`
|
||||
- 型: `bool`
|
||||
- デフォルト値: `false`
|
||||
- 説明: アクションスペースでLLMエディタが有効かどうか(関数呼び出しでのみ機能)
|
||||
|
||||
@@ -34,7 +34,7 @@ Docker で OpenHands を CLI モードで実行するには:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -44,7 +44,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -16,7 +16,11 @@ OpenHandsは、AI アシスタントとやり取りするためのグラフィ
|
||||
3. 選択したプロバイダーに対応する`API Key`を入力します。
|
||||
4. `Save Changes`をクリックして設定を適用します。
|
||||
|
||||
### GitHubトークンの設定
|
||||
### バージョン管理トークン
|
||||
|
||||
OpenHandsは複数のバージョン管理プロバイダーをサポートしています。複数のプロバイダーのトークンを同時に設定できます。
|
||||
|
||||
#### GitHubトークンの設定
|
||||
|
||||
OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環境にエクスポートします。これは2つの方法で行われます。
|
||||
|
||||
@@ -34,7 +38,7 @@ OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環
|
||||
- Minimal Permissions(検索用に**Meta Data = Read-only**を選択し、ブランチ作成用に**Pull Requests = Read and Write**、**Content = Read and Write**を選択します)
|
||||
2. **OpenHandsにトークンを入力**:
|
||||
- 設定ボタン(歯車アイコン)をクリックします。
|
||||
- `GitHub Settings`セクションに移動します。
|
||||
- `Git Provider Settings`セクションに移動します。
|
||||
- `GitHub Token`フィールドにトークンを貼り付けます。
|
||||
- `Save Changes`をクリックして変更を適用します。
|
||||
</details>
|
||||
@@ -94,6 +98,46 @@ OpenHandsは、利用可能な場合、自動的に`GITHUB_TOKEN`をシェル環
|
||||
- 組織を使用している場合は、プロンプトが表示されたら組織へのアクセスを承認します。
|
||||
</details>
|
||||
|
||||
#### GitLabトークンの設定
|
||||
|
||||
OpenHandsは、利用可能な場合、ローカルインストールのみ、自動的に`GITLAB_TOKEN`をシェル環境にエクスポートします。
|
||||
|
||||
<details>
|
||||
<summary>GitLabトークンの設定</summary>
|
||||
|
||||
1. **Personal Access Token(PAT)の生成**:
|
||||
- GitLabで、User Settings > Access Tokensに移動します。
|
||||
- 以下のスコープを持つ新しいトークンを作成します:
|
||||
- `api`(APIアクセス)
|
||||
- `read_user`(ユーザー情報の読み取り)
|
||||
- `read_repository`(リポジトリ読み取り)
|
||||
- `write_repository`(リポジトリ書き込み)
|
||||
- 有効期限を設定するか、無期限トークンの場合は空白のままにします。
|
||||
2. **OpenHandsにトークンを入力**:
|
||||
- 設定ボタン(歯車アイコン)をクリックします。
|
||||
- `Git Provider Settings`セクションに移動します。
|
||||
- `GitLab Token`フィールドにトークンを貼り付けます。
|
||||
- セルフホスト型GitLabを使用している場合は、GitLabインスタンスのURLを入力します。
|
||||
- `Save Changes`をクリックして変更を適用します。
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>トラブルシューティング</summary>
|
||||
|
||||
一般的な問題と解決策:
|
||||
|
||||
- **トークンが認識されない**:
|
||||
- トークンが設定に正しく保存されていることを確認します。
|
||||
- トークンの有効期限が切れていないことを確認します。
|
||||
- トークンに必要なスコープがあることを確認します。
|
||||
- セルフホスト型インスタンスの場合は、正しいインスタンスURLを確認します。
|
||||
|
||||
- **アクセスが拒否された**:
|
||||
- プロジェクトのアクセス権限を確認します。
|
||||
- トークンに必要なスコープがあるかどうかを確認します。
|
||||
- グループ/組織のリポジトリの場合は、適切なアクセス権があることを確認します。
|
||||
</details>
|
||||
|
||||
### 高度な設定
|
||||
|
||||
1. 設定ページ内で、`Advanced`オプションを切り替えて追加の設定にアクセスします。
|
||||
|
||||
@@ -31,7 +31,7 @@ DockerでOpenHandsをヘッドレスモードで実行するには:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -42,7 +42,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -21,8 +21,8 @@ OpenHandsがリポジトリで動作する際:
|
||||
```
|
||||
---
|
||||
name: <Microagentの名前>
|
||||
type: <MicroAgentのタイプ>
|
||||
version: <MicroAgentのバージョン>
|
||||
type: <Microagentのタイプ>
|
||||
version: <Microagentのバージョン>
|
||||
agent: <エージェントのタイプ (通常はCodeActAgent)>
|
||||
triggers:
|
||||
- <オプション: microagentをトリガーするキーワード。トリガーを削除すると、常に含まれるようになります>
|
||||
|
||||
@@ -25,7 +25,7 @@ nikolaik の `SANDBOX_RUNTIME_CONTAINER_IMAGE` は、ランタイムサーバー
|
||||
|
||||
```bash
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-v $WORKSPACE_BASE:/opt/workspace_base \
|
||||
@@ -82,5 +82,5 @@ docker network create openhands-network
|
||||
# 分離されたネットワークで OpenHands を実行
|
||||
docker run # ... \
|
||||
--network openhands-network \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33
|
||||
```
|
||||
|
||||
@@ -7,7 +7,7 @@ O GitHub Cloud Resolver automatiza correções de código e fornece assistência
|
||||
## Configuração
|
||||
|
||||
O Resolvedor do GitHub na Nuvem está disponível automaticamente quando você
|
||||
[concede acesso ao repositório do OpenHands Cloud](./openhands-cloud.md#adding-repository-access).
|
||||
[concede acesso ao repositório do OpenHands Cloud](./openhands-cloud#adding-repository-access).
|
||||
|
||||
## Uso
|
||||
|
||||
|
||||
@@ -292,17 +292,17 @@ As opções de configuração do agente são definidas nas seções `[agent]` e
|
||||
- Padrão: `true`
|
||||
- Descrição: Se a chamada de função está habilitada
|
||||
|
||||
- `codeact_enable_browsing`
|
||||
- `enable_browsing`
|
||||
- Tipo: `bool`
|
||||
- Padrão: `false`
|
||||
- Descrição: Se o delegado de navegação está habilitado no espaço de ação (funciona apenas com chamada de função)
|
||||
|
||||
- `codeact_enable_llm_editor`
|
||||
- `enable_llm_editor`
|
||||
- Tipo: `bool`
|
||||
- Padrão: `false`
|
||||
- Descrição: Se o editor LLM está habilitado no espaço de ação (funciona apenas com chamada de função)
|
||||
|
||||
- `codeact_enable_jupyter`
|
||||
- `enable_jupyter`
|
||||
- Tipo: `bool`
|
||||
- Padrão: `false`
|
||||
- Descrição: Se o Jupyter está habilitado no espaço de ação
|
||||
|
||||
@@ -35,7 +35,7 @@ Para executar o OpenHands no modo CLI com Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -17,7 +17,11 @@ O OpenHands fornece um modo de Interface Gráfica do Usuário (GUI) para interag
|
||||
3. Insira a `Chave de API` correspondente para o provedor escolhido.
|
||||
4. Clique em `Salvar Alterações` para aplicar as configurações.
|
||||
|
||||
### Configuração do Token do GitHub
|
||||
### Tokens de Controle de Versão
|
||||
|
||||
O OpenHands suporta múltiplos provedores de controle de versão. Você pode configurar tokens para vários provedores simultaneamente.
|
||||
|
||||
#### Configuração do Token do GitHub
|
||||
|
||||
O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se ele estiver disponível. Isso pode acontecer de duas maneiras:
|
||||
|
||||
@@ -35,7 +39,7 @@ O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se e
|
||||
- Minimal Permissions (Selecione **Meta Data = Read-only** para pesquisa, **Pull Requests = Read and Write**, **Content = Read and Write** para criação de branches)
|
||||
2. **Insira o Token no OpenHands**:
|
||||
- Clique no botão Settings (ícone de engrenagem).
|
||||
- Navegue até a seção `GitHub Settings`.
|
||||
- Navegue até a seção `Git Provider Settings`.
|
||||
- Cole seu token no campo `GitHub Token`.
|
||||
- Clique em `Save Changes` para aplicar as alterações.
|
||||
</details>
|
||||
@@ -95,6 +99,46 @@ O OpenHands exporta automaticamente um `GITHUB_TOKEN` para o ambiente shell se e
|
||||
- Se estiver usando uma organização, autorize o acesso à organização se solicitado.
|
||||
</details>
|
||||
|
||||
#### Configuração do Token do GitLab
|
||||
|
||||
O OpenHands exporta automaticamente um `GITLAB_TOKEN` para o ambiente shell, apenas para instalações locais, se ele estiver disponível.
|
||||
|
||||
<details>
|
||||
<summary>Configurando um Token do GitLab</summary>
|
||||
|
||||
1. **Gere um Personal Access Token (PAT)**:
|
||||
- No GitLab, vá para User Settings > Access Tokens.
|
||||
- Crie um novo token com os seguintes escopos:
|
||||
- `api` (Acesso à API)
|
||||
- `read_user` (Leitura de informações do usuário)
|
||||
- `read_repository` (Leitura do repositório)
|
||||
- `write_repository` (Escrita no repositório)
|
||||
- Defina uma data de expiração ou deixe em branco para um token sem expiração.
|
||||
2. **Insira o Token no OpenHands**:
|
||||
- Clique no botão Settings (ícone de engrenagem).
|
||||
- Navegue até a seção `Git Provider Settings`.
|
||||
- Cole seu token no campo `GitLab Token`.
|
||||
- Se estiver usando GitLab auto-hospedado, insira a URL da sua instância GitLab.
|
||||
- Clique em `Save Changes` para aplicar as alterações.
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Solução de Problemas</summary>
|
||||
|
||||
Problemas comuns e soluções:
|
||||
|
||||
- **Token Não Reconhecido**:
|
||||
- Certifique-se de que o token esteja salvo corretamente nas configurações.
|
||||
- Verifique se o token não expirou.
|
||||
- Verifique se o token possui os escopos necessários.
|
||||
- Para instâncias auto-hospedadas, verifique a URL correta da instância.
|
||||
|
||||
- **Acesso Negado**:
|
||||
- Verifique as permissões de acesso ao projeto.
|
||||
- Verifique se o token possui os escopos necessários.
|
||||
- Para repositórios de grupo/organização, certifique-se de ter o acesso adequado.
|
||||
</details>
|
||||
|
||||
### Configurações Avançadas
|
||||
|
||||
1. Dentro da página Settings, ative as opções `Advanced` para acessar configurações adicionais.
|
||||
|
||||
@@ -32,7 +32,7 @@ Para executar o OpenHands no modo Headless com Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.main -t "escreva um script bash que imprima oi"
|
||||
```
|
||||
|
||||
|
||||
@@ -58,17 +58,17 @@
|
||||
A maneira mais fácil de executar o OpenHands é no Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33
|
||||
```
|
||||
|
||||
Você encontrará o OpenHands em execução em http://localhost:3000!
|
||||
|
||||
@@ -21,8 +21,8 @@ Todos os microagentes usam arquivos markdown com frontmatter YAML que possuem in
|
||||
```
|
||||
---
|
||||
name: <Nome do microagente>
|
||||
type: <Tipo do MicroAgent>
|
||||
version: <Versão do MicroAgent>
|
||||
type: <Tipo do Microagent>
|
||||
version: <Versão do Microagent>
|
||||
agent: <O tipo de agente (normalmente CodeActAgent)>
|
||||
triggers:
|
||||
- <Palavras-chave opcionais que acionam o microagente. Se os gatilhos forem removidos, ele sempre será incluído>
|
||||
|
||||
@@ -13,7 +13,7 @@ Este é o Runtime padrão que é usado quando você inicia o OpenHands. Você po
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -349,17 +349,17 @@ Agent 配置选项在 `config.toml` 文件的 `[agent]` 和 `[agent.<agent_name>
|
||||
- 默认值: `true`
|
||||
- 描述: 是否启用函数调用
|
||||
|
||||
- `codeact_enable_browsing`
|
||||
- `enable_browsing`
|
||||
- 类型: `bool`
|
||||
- 默认值: `false`
|
||||
- 描述: 是否在 action space 中启用浏览代理(仅适用于函数调用)
|
||||
|
||||
- `codeact_enable_llm_editor`
|
||||
- `enable_llm_editor`
|
||||
- 类型: `bool`
|
||||
- 默认值: `false`
|
||||
- 描述: 是否在 action space 中启用 LLM 编辑器(仅适用于函数调用)
|
||||
|
||||
- `codeact_enable_jupyter`
|
||||
- `enable_jupyter`
|
||||
- 类型: `bool`
|
||||
- 默认值: `false`
|
||||
- 描述: 是否在 action space 中启用 Jupyter
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -19,14 +19,18 @@ OpenHands 提供了一个用户友好的图形用户界面(GUI)模式,用
|
||||
3. 输入所选提供商对应的 `API Key`。
|
||||
4. 点击"保存"应用设置。
|
||||
|
||||
### GitHub Token 设置
|
||||
### 版本控制令牌
|
||||
|
||||
OpenHands 支持多个版本控制提供商。您可以同时配置多个提供商的令牌。
|
||||
|
||||
#### GitHub Token 设置
|
||||
|
||||
如果可用,OpenHands 会自动将 `GITHUB_TOKEN` 导出到 shell 环境中。这可以通过两种方式实现:
|
||||
|
||||
1. **本地(OSS)**:用户直接输入他们的 GitHub token
|
||||
2. **在线(SaaS)**:通过 GitHub OAuth 身份验证获取 token
|
||||
|
||||
#### 设置本地 GitHub Token
|
||||
##### 设置本地 GitHub Token
|
||||
|
||||
1. **生成个人访问令牌(PAT)**:
|
||||
- 转到 GitHub 设置 > 开发者设置 > 个人访问令牌 > 令牌(经典)
|
||||
@@ -38,11 +42,11 @@ OpenHands 提供了一个用户友好的图形用户界面(GUI)模式,用
|
||||
|
||||
2. **在 OpenHands 中输入令牌**:
|
||||
- 点击右上角的设置按钮(齿轮图标)
|
||||
- 导航到"GitHub"部分
|
||||
- 导航到"Git Provider Settings"部分
|
||||
- 将令牌粘贴到"GitHub Token"字段中
|
||||
- 点击"保存"应用更改
|
||||
|
||||
#### 组织令牌策略
|
||||
##### 组织令牌策略
|
||||
|
||||
如果您使用组织仓库,可能需要额外的设置:
|
||||
|
||||
@@ -57,7 +61,7 @@ OpenHands 提供了一个用户友好的图形用户界面(GUI)模式,用
|
||||
- 如果需要,点击组织旁边的"启用 SSO"
|
||||
- 完成 SSO 授权过程
|
||||
|
||||
#### OAuth 身份验证(在线模式)
|
||||
##### OAuth 身份验证(在线模式)
|
||||
|
||||
在在线模式下使用 OpenHands 时,GitHub OAuth 流程:
|
||||
|
||||
@@ -72,7 +76,7 @@ OpenHands 提供了一个用户友好的图形用户界面(GUI)模式,用
|
||||
- 授权 OpenHands 访问您的 GitHub 帐户
|
||||
- 如果使用组织,在出现提示时授权组织访问
|
||||
|
||||
#### 故障排除
|
||||
##### 故障排除
|
||||
|
||||
常见问题和解决方案:
|
||||
|
||||
@@ -93,6 +97,43 @@ OpenHands 提供了一个用户友好的图形用户界面(GUI)模式,用
|
||||
- 检查浏览器控制台中是否有任何错误消息
|
||||
- 如果可用,使用设置中的"测试连接"按钮
|
||||
|
||||
#### GitLab Token 设置
|
||||
|
||||
OpenHands 会自动将 `GITLAB_TOKEN` 导出到 shell 环境中,仅适用于本地安装,如果它可用的话。
|
||||
|
||||
##### 设置 GitLab Token
|
||||
|
||||
1. **生成个人访问令牌(PAT)**:
|
||||
- 在 GitLab 中,转到用户设置 > 访问令牌
|
||||
- 创建具有以下范围的新令牌:
|
||||
- `api`(API 访问)
|
||||
- `read_user`(读取用户信息)
|
||||
- `read_repository`(读取仓库)
|
||||
- `write_repository`(写入仓库)
|
||||
- 设置过期日期或留空以获取永不过期的令牌
|
||||
|
||||
2. **在 OpenHands 中输入令牌**:
|
||||
- 点击设置按钮(齿轮图标)
|
||||
- 导航到 `Git Provider Settings` 部分
|
||||
- 将令牌粘贴到 `GitLab Token` 字段中
|
||||
- 如果使用自托管 GitLab,请输入您的 GitLab 实例 URL
|
||||
- 点击 `Save Changes` 应用更改
|
||||
|
||||
##### 故障排除
|
||||
|
||||
常见问题和解决方案:
|
||||
|
||||
1. **令牌无法识别**:
|
||||
- 确保令牌已正确保存在设置中
|
||||
- 检查令牌是否已过期
|
||||
- 验证令牌是否具有所需的范围
|
||||
- 对于自托管实例,验证正确的实例 URL
|
||||
|
||||
2. **访问被拒绝**:
|
||||
- 验证项目访问权限
|
||||
- 检查令牌是否具有必要的范围
|
||||
- 对于组/组织仓库,确保您拥有适当的访问权限
|
||||
|
||||
### 高级设置
|
||||
|
||||
1. 切换`高级选项`以访问其他设置。
|
||||
|
||||
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -5,7 +5,7 @@ The GitHub Resolver automates code fixes and provides intelligent assistance for
|
||||
## Setup
|
||||
|
||||
The Cloud GitHub Resolver is available automatically when you
|
||||
[grant OpenHands Cloud repository access](./openhands-cloud.md#adding-repository-access).
|
||||
[grant OpenHands Cloud repository access](./openhands-cloud#adding-repository-access).
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
@@ -19,9 +19,12 @@ After visiting OpenHands Cloud, you will be asked to connect with your GitHub ac
|
||||
### Adding Repository Access
|
||||
|
||||
You can grant OpenHands specific repository access:
|
||||
1. Click the `Select a GitHub project` dropdown, select `Add more repositories...`.
|
||||
1. Click the `Select a Git project` dropdown, select `Add more repositories...`.
|
||||
2. Select the organization, then choose the specific repositories to grant OpenHands access to.
|
||||
- Openhands requests short-lived tokens (8-hour expiry) with these permissions:
|
||||
<details>
|
||||
<summary>Permission Details for Repository Access</summary>
|
||||
|
||||
Openhands requests short-lived tokens (8-hour expiry) with these permissions:
|
||||
- Actions: Read and write
|
||||
- Administration: Read-only
|
||||
- Commit statuses: Read and write
|
||||
@@ -31,13 +34,22 @@ You can grant OpenHands specific repository access:
|
||||
- Pull requests: Read and write
|
||||
- Webhooks: Read and write
|
||||
- Workflows: Read and write
|
||||
- Repository access for a user is granted based on:
|
||||
|
||||
Repository access for a user is granted based on:
|
||||
- Granted permission for the repository.
|
||||
- User's GitHub permissions (owner/collaborator).
|
||||
</details>
|
||||
|
||||
3. Click on `Install & Authorize`.
|
||||
|
||||
### Modifying Repository Access
|
||||
|
||||
You can modify repository access at any time by:
|
||||
* Using the same `Select a GitHub project > Add more repositories` workflow, or
|
||||
* Using the same `Select a Git project > Add more repositories` workflow, or
|
||||
* Visiting the Settings page and selecting `Configure GitHub Repositories` under the `GitHub Settings` section.
|
||||
|
||||
## Conversation Persistence
|
||||
|
||||
- Conversations List – Displays only the 10 most recent conversations initiated within the past 10 days.
|
||||
- Workspaces – Conversation workspaces are retained for 14 days.
|
||||
- Runtimes – Runtimes remain active ("warm") for 30 minutes. After this period, resuming a conversation may take 1–2 minutes.
|
||||
@@ -291,17 +291,17 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
|
||||
- Default: `true`
|
||||
- Description: Whether function calling is enabled
|
||||
|
||||
- `codeact_enable_browsing`
|
||||
- `enable_browsing`
|
||||
- Type: `bool`
|
||||
- Default: `false`
|
||||
- Description: Whether browsing delegate is enabled in the action space (only works with function calling)
|
||||
|
||||
- `codeact_enable_llm_editor`
|
||||
- `enable_llm_editor`
|
||||
- Type: `bool`
|
||||
- Default: `false`
|
||||
- Description: Whether LLM editor is enabled in the action space (only works with function calling)
|
||||
|
||||
- `codeact_enable_jupyter`
|
||||
- `enable_jupyter`
|
||||
- Type: `bool`
|
||||
- Default: `false`
|
||||
- Description: Whether Jupyter is enabled in the action space
|
||||
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,8 +45,11 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
This command will start an interactive session in Docker where you can input tasks and receive responses from OpenHands.
|
||||
|
||||
The `-e SANDBOX_USER_ID=$(id -u)` is passed to the Docker command to ensure the sandbox user matches the host user’s
|
||||
permissions. This prevents the agent from creating root-owned files in the mounted workspace.
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
# Custom Sandbox
|
||||
|
||||
:::note
|
||||
This guide is for users that would like to use their own custom Docker image for the runtime, e.g. with certain tools or programming languages pre-installed
|
||||
This guide is for users that would like to use their own custom Docker image for the runtime. For example
|
||||
with certain tools or programming languages pre-installed.
|
||||
:::
|
||||
|
||||
The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer
|
||||
|
||||
@@ -18,11 +18,14 @@ OpenHands provides a Graphical User Interface (GUI) mode for interacting with th
|
||||
3. Enter the corresponding `API Key` for your chosen provider.
|
||||
4. Click `Save Changes` to apply the settings.
|
||||
|
||||
### GitHub Token Setup
|
||||
### Version Control Tokens
|
||||
|
||||
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
|
||||
OpenHands supports multiple version control providers. You can configure tokens for multiple providers simultaneously.
|
||||
|
||||
#### GitHub Token Setup
|
||||
|
||||
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if provided:
|
||||
|
||||
**Local Installation**: The user directly inputs their GitHub token.
|
||||
<details>
|
||||
<summary>Setting Up a GitHub Token</summary>
|
||||
|
||||
@@ -36,9 +39,8 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it
|
||||
- Minimal Permissions ( Select `Meta Data = Read-only` read for search, `Pull Requests = Read and Write` and `Content = Read and Write` for branch creation)
|
||||
2. **Enter Token in OpenHands**:
|
||||
- Click the Settings button (gear icon).
|
||||
- Navigate to the `GitHub Settings` section.
|
||||
- Paste your token in the `GitHub Token` field.
|
||||
- Click `Save Changes` to apply the changes.
|
||||
- Click `Save` to apply the changes.
|
||||
</details>
|
||||
|
||||
<details>
|
||||
@@ -79,21 +81,43 @@ OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it
|
||||
- Check the browser console for any error messages.
|
||||
</details>
|
||||
|
||||
**OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.
|
||||
#### GitLab Token Setup
|
||||
|
||||
OpenHands automatically exports a `GITLAB_TOKEN` to the shell environment if provided:
|
||||
|
||||
<details>
|
||||
<summary>OAuth Authentication</summary>
|
||||
<summary>Setting Up a GitLab Token</summary>
|
||||
|
||||
When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
|
||||
- Repository access (read/write)
|
||||
- Workflow management
|
||||
- Organization read access
|
||||
1. **Generate a Personal Access Token (PAT)**:
|
||||
- On GitLab, go to User Settings > Access Tokens.
|
||||
- Create a new token with the following scopes:
|
||||
- `api` (API access)
|
||||
- `read_user` (Read user information)
|
||||
- `read_repository` (Read repository)
|
||||
- `write_repository` (Write repository)
|
||||
- Set an expiration date or leave it blank for a non-expiring token.
|
||||
2. **Enter Token in OpenHands**:
|
||||
- Click the Settings button (gear icon).
|
||||
- Paste your token in the `GitLab Token` field.
|
||||
- Enter your GitLab instance URL if using self-hosted GitLab.
|
||||
- Click `Save` to apply the changes.
|
||||
</details>
|
||||
|
||||
To authenticate OpenHands:
|
||||
- Click `Sign in with GitHub` when prompted.
|
||||
- Review the requested permissions.
|
||||
- Authorize OpenHands to access your GitHub account.
|
||||
- If using an organization, authorize organization access if prompted.
|
||||
<details>
|
||||
<summary>Troubleshooting</summary>
|
||||
|
||||
Common issues and solutions:
|
||||
|
||||
- **Token Not Recognized**:
|
||||
- Ensure the token is properly saved in settings.
|
||||
- Check that the token hasn't expired.
|
||||
- Verify the token has the required scopes.
|
||||
- For self-hosted instances, verify the correct instance URL.
|
||||
|
||||
- **Access Denied**:
|
||||
- Verify project access permissions.
|
||||
- Check if the token has the necessary scopes.
|
||||
- For group/organization repositories, ensure you have proper access.
|
||||
</details>
|
||||
|
||||
### Advanced Settings
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,10 +43,13 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
The `-e SANDBOX_USER_ID=$(id -u)` is passed to the Docker command to ensure the sandbox user matches the host user’s
|
||||
permissions. This prevents the agent from creating root-owned files in the mounted workspace.
|
||||
|
||||
## Advanced Headless Configurations
|
||||
|
||||
To view all available configuration options for headless mode, run the Python command with the `--help` flag.
|
||||
|
||||
@@ -58,17 +58,17 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.31-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.33-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.31
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.33
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
* Modify files
|
||||
* Upload and download files
|
||||
|
||||
### Terminal
|
||||
- A space for OpenHands and users to run terminal commands.
|
||||
|
||||
### Jupyter
|
||||
- Shows all Python commands that were executed by OpenHands.
|
||||
- Particularly handy when using OpenHands to perform data visualization tasks.
|
||||
@@ -23,6 +26,3 @@
|
||||
### Browser
|
||||
- Used by OpenHands to browse websites.
|
||||
- The browser is non-interactive.
|
||||
|
||||
### Terminal
|
||||
- A space for OpenHands and users to run terminal commands.
|
||||
|
||||
@@ -17,6 +17,8 @@ Based on these findings and community feedback, the following models have been v
|
||||
- [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/)
|
||||
- [deepseek/deepseek-chat](https://api-docs.deepseek.com/)
|
||||
- [openai/o3-mini](https://openai.com/index/openai-o3-mini/)
|
||||
- [openai/o3](https://openai.com/index/introducing-o3-and-o4-mini/)
|
||||
- [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/)
|
||||
- [all-hands/openhands-lm-32b-v0.1](https://www.all-hands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) -- available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1)
|
||||
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ It is highly recommended that you use GPUs to serve local models for optimal exp
|
||||
For example, to download [OpenHands LM 32B v0.1](https://huggingface.co/all-hands/openhands-lm-32b-v0.1):
|
||||
|
||||
```bash
|
||||
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir my_folder/openhands-lm-32b-v0.1
|
||||
huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir all-hands/openhands-lm-32b-v0.1
|
||||
```
|
||||
|
||||
## Create an OpenAI-Compatible Endpoint With a Model Serving Framework
|
||||
@@ -27,7 +27,7 @@ huggingface-cli download all-hands/openhands-lm-32b-v0.1 --local-dir my_folder/o
|
||||
|
||||
```bash
|
||||
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
|
||||
--model my_folder/openhands-lm-32b-v0.1 \
|
||||
--model all-hands/openhands-lm-32b-v0.1 \
|
||||
--served-model-name openhands-lm-32b-v0.1 \
|
||||
--port 8000 \
|
||||
--tp 2 --dp 1 \
|
||||
@@ -41,7 +41,7 @@ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \
|
||||
- Example launch command for OpenHands LM 32B (with at least 2 GPUs):
|
||||
|
||||
```bash
|
||||
vllm serve my_folder/openhands-lm-32b-v0.1 \
|
||||
vllm serve all-hands/openhands-lm-32b-v0.1 \
|
||||
--host 0.0.0.0 --port 8000 \
|
||||
--api-key mykey \
|
||||
--tensor-parallel-size 2 \
|
||||
@@ -67,7 +67,7 @@ Ensure `config.toml` exists by running `make setup-config` which will create one
|
||||
workspace_base="/path/to/your/workspace"
|
||||
|
||||
[llm]
|
||||
embedding_model="local"
|
||||
model="openhands-lm-32b-v0.1"
|
||||
ollama_base_url="http://localhost:8000"
|
||||
```
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ A useful feature is the ability to connect to your local filesystem. To mount yo
|
||||
Be careful! There's nothing stopping the OpenHands agent from deleting or modifying
|
||||
any files that are mounted into its workspace.
|
||||
|
||||
This setup can cause some issues with file permissions (hence the `SANDBOX_USER_ID` variable)
|
||||
but seems to work well on most systems.
|
||||
The `-e SANDBOX_USER_ID=$(id -u)` is passed to the Docker command to ensure the sandbox user matches the host user’s
|
||||
permissions. This prevents the agent from creating root-owned files in the mounted workspace.
|
||||
|
||||
## Hardened Docker Installation
|
||||
|
||||
|
||||
@@ -20,3 +20,18 @@ Try these in order:
|
||||
* If using Docker Desktop, ensure `Settings > Advanced > Allow the default Docker socket to be used` is enabled.
|
||||
* Depending on your configuration you may need `Settings > Resources > Network > Enable host networking` enabled in Docker Desktop.
|
||||
* Reinstall Docker Desktop.
|
||||
|
||||
### Permission Error
|
||||
|
||||
**Description**
|
||||
|
||||
On initial prompt, an error is seen with `Permission Denied` or `PermissionError`.
|
||||
|
||||
**Resolution**
|
||||
|
||||
* Check if the `~/.openhands-state` is owned by `root`. If so, you can:
|
||||
* Change the directory's ownership: `sudo chown <user>:<user> ~/.openhands-state`.
|
||||
* or update permissions on the directory: `sudo chmod 777 ~/.openhands-state`
|
||||
* or delete it if you don’t need previous data. OpenHands will recreate it. You'll need to re-enter LLM settings.
|
||||
* If mounting a local directory, ensure your `WORKSPACE_BASE` has the necessary permissions for the user running
|
||||
OpenHands.
|
||||
|
||||
8
docs/package-lock.json
generated
8
docs/package-lock.json
generated
@@ -24,7 +24,7 @@
|
||||
"@docusaurus/module-type-aliases": "^3.5.1",
|
||||
"@docusaurus/tsconfig": "^3.7.0",
|
||||
"@docusaurus/types": "^3.5.1",
|
||||
"typescript": "~5.8.2"
|
||||
"typescript": "~5.8.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0"
|
||||
@@ -17638,9 +17638,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.8.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.2.tgz",
|
||||
"integrity": "sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==",
|
||||
"version": "5.8.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
|
||||
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"@docusaurus/module-type-aliases": "^3.5.1",
|
||||
"@docusaurus/tsconfig": "^3.7.0",
|
||||
"@docusaurus/types": "^3.5.1",
|
||||
"typescript": "~5.8.2"
|
||||
"typescript": "~5.8.3"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
BIN
docs/static/img/oh-features.png
vendored
BIN
docs/static/img/oh-features.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 148 KiB After Width: | Height: | Size: 121 KiB |
@@ -9307,10 +9307,10 @@ typedarray-to-buffer@^3.1.5:
|
||||
dependencies:
|
||||
is-typedarray "^1.0.0"
|
||||
|
||||
typescript@~5.8.2:
|
||||
version "5.8.2"
|
||||
resolved "https://registry.npmjs.org/typescript/-/typescript-5.8.2.tgz"
|
||||
integrity sha512-aJn6wq13/afZp/jT9QZmwEjDqqvSGp1VT5GVg+f/t6/oVyrgXM6BY1h9BRh/O5p3PlUPAe+WuiEZOmb/49RqoQ==
|
||||
typescript@~5.8.3:
|
||||
version "5.8.3"
|
||||
resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.8.3.tgz#92f8a3e5e3cf497356f4178c34cd65a7f5e8440e"
|
||||
integrity sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==
|
||||
|
||||
undici-types@~5.26.4:
|
||||
version "5.26.5"
|
||||
|
||||
@@ -131,9 +131,9 @@ def get_config(
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
codeact_enable_jupyter=False,
|
||||
codeact_enable_browsing=RUN_WITH_BROWSING,
|
||||
codeact_enable_llm_editor=False,
|
||||
enable_jupyter=False,
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
@@ -79,8 +79,8 @@ def get_config(
|
||||
agent_config.enable_prompt_extensions = False
|
||||
agent_config = AgentConfig(
|
||||
function_calling=False,
|
||||
codeact_enable_jupyter=True,
|
||||
codeact_enable_browsing_delegate=True,
|
||||
enable_jupyter=True,
|
||||
enable_browsing=True,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
1
evaluation/benchmarks/lca_ci_build_repair/.gitignore
vendored
Normal file
1
evaluation/benchmarks/lca_ci_build_repair/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
config.yaml
|
||||
35
evaluation/benchmarks/lca_ci_build_repair/README.MD
Normal file
35
evaluation/benchmarks/lca_ci_build_repair/README.MD
Normal file
@@ -0,0 +1,35 @@
|
||||
# CI Builds Repair Benchmark Integration
|
||||
|
||||
This module integrates the CI Builds Repair benchmark developed by [JetBrains-Research](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark).
|
||||
|
||||
For more information, refer to the [GitHub repository](https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair/ci-builds-repair-benchmark) and the associated [research paper](https://arxiv.org/abs/2406.11612).
|
||||
See notice below for details
|
||||
|
||||
## Setup
|
||||
|
||||
Before running any scripts, make sure to configure the benchmark by setting up `config.yaml`.
|
||||
This benchmark pushes to JetBrains' private GitHub repository. You will to request a `token_gh` provided by their team, to run this benchmark.
|
||||
|
||||
## Inference
|
||||
|
||||
To run inference with your model:
|
||||
|
||||
```bash
|
||||
./evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh llm.yourmodel
|
||||
```
|
||||
|
||||
## Evaluation
|
||||
|
||||
To evaluate the predictions:
|
||||
|
||||
```bash
|
||||
./evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh predictions_path_containing_output
|
||||
```
|
||||
|
||||
## Results
|
||||
The benchmark contains 68 instances, we skip instances #126 and #145, and only run 66 instances due to dockerization errors.
|
||||
|
||||
Due to running in live GitHub machines, the benchmark is sensitive to the date it is run. Even the golden patches in the dataset might present failures due to updates.
|
||||
For example, on 2025-04-09, running the benchmark against the golden patches gave 57/67 successes, with 1 job left in the waiting list.
|
||||
|
||||
On 2025-04-10, running the benchmark full with OH and no oracle, 37 succeeded. That is 54% of the complete set of 68 instances and 64% of the 57 that succeed with golden patches.
|
||||
@@ -0,0 +1,11 @@
|
||||
LCA_PATH: path #where to clone lca-ci rep
|
||||
model_name: OpenHands
|
||||
benchmark_owner: ICML-25-BenchName-builds-repair
|
||||
token_gh: your_token
|
||||
#for lca-ci-repo
|
||||
repos_folder: /path/to/repos # here the cloned repos would be stored
|
||||
out_folder: /out/folder # here the result files would be stored
|
||||
data_cache_dir: /data/cache/dir/ # here the cached dataset would be stored
|
||||
username_gh: username-gh # your GitHub username
|
||||
# test_username: test_user # username that would be displayed in the benchmark. Optional. If ommitted, username_gh would be used
|
||||
language: Python # dataset language (now only Python is available)
|
||||
242
evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
Normal file
242
evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""Implements evaluation on JetBrains CI builds repair baselines
|
||||
|
||||
Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
|
||||
and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
|
||||
|
||||
TODOs:
|
||||
- Add more flags
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import ruamel.yaml
|
||||
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
LLMConfig,
|
||||
get_parser,
|
||||
load_app_config,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime
|
||||
from openhands.events.action import CmdRunAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
|
||||
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
config = load_app_config()
|
||||
|
||||
|
||||
def load_bench_config():
|
||||
script_dir = os.path.dirname(
|
||||
os.path.abspath(__file__)
|
||||
) # Get the absolute path of the script
|
||||
config_path = os.path.join(script_dir, 'config.yaml')
|
||||
yaml = ruamel.yaml.YAML(typ='rt')
|
||||
with open(config_path, 'r') as file:
|
||||
return yaml.load(file)
|
||||
|
||||
|
||||
bench_config = load_bench_config()
|
||||
|
||||
|
||||
def run_eval(
|
||||
runtime: Runtime,
|
||||
):
|
||||
"""Run the evaluation and create report"""
|
||||
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
|
||||
obs: CmdOutputObservation
|
||||
|
||||
lca_path = bench_config['LCA_PATH']
|
||||
lca_ci_path = os.path.join(
|
||||
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
|
||||
)
|
||||
|
||||
model_name = bench_config['model_name']
|
||||
|
||||
action = CmdRunAction(command=f'mkdir {lca_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command=f'cd {lca_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
|
||||
action = CmdRunAction(command=f'git clone {lca_repo_url}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command=f'cd {lca_ci_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='git switch open-hands-integration')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
script_dir = os.path.dirname(
|
||||
os.path.abspath(__file__)
|
||||
) # Get the absolute path of the script
|
||||
config_path = os.path.join(script_dir, 'config.yaml')
|
||||
runtime.copy_to(config_path, lca_ci_path)
|
||||
|
||||
token_gh = bench_config['token_gh']
|
||||
commandf = f'export TOKEN_GH={token_gh}'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
action = CmdRunAction(command='poetry install')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
# Set up the task environment
|
||||
commandf = f'poetry run python run_eval_jobs.py --model-name "{model_name}" --config-path "{lca_ci_path}/config.yaml" --job-ids-file "/tmp/output_lca.jsonl" --result-filename "testfile.jsonl" > /tmp/single_output.txt'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(f'run_eval_jobs.py gave {obs.content} !')
|
||||
# assert obs.exit_code == 0
|
||||
|
||||
commandf = 'cat /tmp/single_output.txt'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(f' {commandf} gave {obs.content}!')
|
||||
|
||||
testfile_path = os.path.join(bench_config['out_folder'], 'testfile.jsonl')
|
||||
commandf = f'cat {testfile_path}'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
report_str = obs.content
|
||||
|
||||
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
|
||||
return report_str
|
||||
|
||||
|
||||
def process_predictions(predictions_path: str):
|
||||
output_path = Path(predictions_path)
|
||||
if output_path.suffix != '.jsonl':
|
||||
raise ValueError('output_path must end in .jsonl')
|
||||
|
||||
output_lca_path = output_path.with_name(output_path.stem + '_lca.jsonl')
|
||||
|
||||
with output_path.open() as infile, output_lca_path.open('w') as outfile:
|
||||
for line in infile:
|
||||
data = json.loads(line)
|
||||
json.dump(data.get('test_result'), outfile)
|
||||
outfile.write('\n')
|
||||
|
||||
return str(output_lca_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = get_parser()
|
||||
parser.add_argument(
|
||||
'-s',
|
||||
'--eval-split',
|
||||
type=str,
|
||||
default='test',
|
||||
choices=['test'],
|
||||
help='data split to evaluate on, must be test',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--predictions-path',
|
||||
type=str,
|
||||
help='Path to the directory containing the output.jsonl with the predictions.',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
data_split = args.eval_split
|
||||
|
||||
llm_config = LLMConfig(model='dummy_model')
|
||||
|
||||
metadata = make_metadata(
|
||||
llm_config,
|
||||
f'jetbrains-lca-ci--{data_split}',
|
||||
args.agent_cls,
|
||||
args.max_iterations,
|
||||
args.eval_note,
|
||||
args.predictions_path,
|
||||
)
|
||||
|
||||
# prepare image
|
||||
config = get_config(metadata)
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
logger.info('Converting output.jsonl into output_lca.jsonl')
|
||||
predictions_lca_path = process_predictions(
|
||||
os.path.join(args.predictions_path, 'output.jsonl')
|
||||
)
|
||||
runtime.copy_to(predictions_lca_path, '/tmp')
|
||||
|
||||
# get results
|
||||
results_str = run_eval(runtime)
|
||||
results_path = os.path.join(args.predictions_path, 'results.jsonl')
|
||||
with open(results_path, 'w') as file:
|
||||
file.write(results_str)
|
||||
logger.info(f'Saved results to {results_path}')
|
||||
|
||||
# make a summary
|
||||
resolved_instances = []
|
||||
unresolved_instances = []
|
||||
for line in results_str.strip().splitlines():
|
||||
data = json.loads(line)
|
||||
conclusion = data.get('conclusion')
|
||||
if conclusion == 'success':
|
||||
resolved_instances.append(data)
|
||||
elif conclusion == 'failure':
|
||||
unresolved_instances.append(data)
|
||||
|
||||
completed_instances = resolved_instances + unresolved_instances
|
||||
|
||||
report = {
|
||||
'success': len(resolved_instances),
|
||||
'failure': len(unresolved_instances),
|
||||
'resolved_instances': resolved_instances,
|
||||
'unresolved_instances': unresolved_instances,
|
||||
'completed_instances': completed_instances,
|
||||
}
|
||||
|
||||
print(f'Results: {report}')
|
||||
report_path = os.path.join(args.predictions_path, 'report.jsonl')
|
||||
with open(report_path, 'w') as out_f:
|
||||
out_f.write(json.dumps(report) + '\n')
|
||||
|
||||
logger.info(f'Saved report of results in swebench format to {report_path}')
|
||||
406
evaluation/benchmarks/lca_ci_build_repair/run_infer.py
Normal file
406
evaluation/benchmarks/lca_ci_build_repair/run_infer.py
Normal file
@@ -0,0 +1,406 @@
|
||||
"""Implements inference on JetBrains CI builds repair baselines
|
||||
|
||||
Please see https://github.com/JetBrains-Research/lca-baselines/tree/main/ci-builds-repair
|
||||
and https://huggingface.co/datasets/JetBrains-Research/lca-ci-builds-repair
|
||||
|
||||
TODOs:
|
||||
- Add EXP_NAME
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import ruamel.yaml
|
||||
from datasets import load_dataset
|
||||
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
load_app_config,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import CmdRunAction, MessageAction
|
||||
from openhands.events.observation import CmdOutputObservation
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
|
||||
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
config = load_app_config()
|
||||
|
||||
|
||||
def load_bench_config():
|
||||
script_dir = os.path.dirname(
|
||||
os.path.abspath(__file__)
|
||||
) # Get the absolute path of the script
|
||||
config_path = os.path.join(script_dir, 'config.yaml')
|
||||
yaml = ruamel.yaml.YAML(typ='rt')
|
||||
with open(config_path, 'r') as file:
|
||||
return yaml.load(file)
|
||||
|
||||
|
||||
bench_config = load_bench_config()
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
'CodeActAgent': codeact_user_response,
|
||||
}
|
||||
|
||||
AGENT_CLS_TO_INST_SUFFIX = {
|
||||
'CodeActAgent': 'When you think you have completed the task, please finish the interaction using the "finish" tool.\n'
|
||||
}
|
||||
|
||||
|
||||
def initialize_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series,
|
||||
):
|
||||
"""Initialize the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
"""
|
||||
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
|
||||
obs: CmdOutputObservation
|
||||
|
||||
lca_path = bench_config['LCA_PATH']
|
||||
lca_ci_path = os.path.join(
|
||||
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
|
||||
)
|
||||
|
||||
repo_name = instance['repo_name']
|
||||
repos_path = bench_config['repos_folder']
|
||||
repo_owner = instance['repo_owner']
|
||||
repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
|
||||
model_name = bench_config['model_name']
|
||||
|
||||
action = CmdRunAction(command=f'mkdir {lca_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command=f'cd {lca_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
lca_repo_url = 'https://github.com/juanmichelini/lca-baselines'
|
||||
action = CmdRunAction(command=f'git clone {lca_repo_url}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command=f'cd {lca_ci_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
action = CmdRunAction(command='git switch open-hands-integration')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
script_dir = os.path.dirname(
|
||||
os.path.abspath(__file__)
|
||||
) # Get the absolute path of the script
|
||||
config_path = os.path.join(script_dir, 'config.yaml')
|
||||
with open(config_path, 'r') as file:
|
||||
config_as_text = file.read()
|
||||
|
||||
commandf = f"echo '{config_as_text}' > config.yaml"
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
token_gh = bench_config['token_gh']
|
||||
commandf = f'export TOKEN_GH={token_gh}'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
action = CmdRunAction(command='poetry install')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
# Set up the task environment
|
||||
commandf = f'poetry run python run_get_datapoint.py --model-name {model_name} --id {instance["id"]} > branch_name.txt'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
if obs.exit_code != 0:
|
||||
print(f'run_get_datapoint.py failed at {instance["id"]} with {obs.content}')
|
||||
assert obs.exit_code == 0
|
||||
|
||||
commandf = 'cat branch_name.txt'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
bench_config['user_branch_name'] = obs.content
|
||||
|
||||
# Navigate to the task's code path
|
||||
action = CmdRunAction(command=f'cd {repo_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
|
||||
|
||||
|
||||
def complete_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series,
|
||||
) -> dict[str, Any]:
|
||||
"""Complete the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
If you need to do something in the sandbox to get the correctness metric after
|
||||
the agent has run, modify this function.
|
||||
"""
|
||||
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
|
||||
obs: CmdOutputObservation
|
||||
|
||||
model_name = bench_config['model_name']
|
||||
|
||||
lca_path = bench_config['LCA_PATH']
|
||||
lca_ci_path = os.path.join(
|
||||
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
|
||||
)
|
||||
|
||||
user_branch_name = bench_config['user_branch_name']
|
||||
|
||||
token_gh = bench_config['token_gh']
|
||||
commandf = f'export TOKEN_GH={token_gh}'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
|
||||
# Navigate to the lca-baseslines scripts path
|
||||
action = CmdRunAction(command=f'cd {lca_ci_path}')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
commandf = f'poetry run python run_push_datapoint.py --id {instance["id"]} --model-name {model_name} --user-branch-name {user_branch_name} > single_output.json'
|
||||
logger.info(f'Running push script: {commandf}')
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
# assert obs.exit_code == 0
|
||||
|
||||
commandf = 'cat single_output.json'
|
||||
action = CmdRunAction(command=commandf)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
result = json.loads(obs.content)
|
||||
|
||||
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):
|
||||
config = get_config(metadata)
|
||||
|
||||
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
|
||||
if reset_logger:
|
||||
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
|
||||
reset_logger_for_multiprocessing(logger, instance['instance_id'], log_dir)
|
||||
else:
|
||||
logger.info(f'Starting evaluation for instance {instance["instance_id"]}.')
|
||||
|
||||
repo_name = instance['repo_name']
|
||||
repo_workflow = instance['workflow_path']
|
||||
repo_logs = instance['logs']
|
||||
repos_path = bench_config['repos_folder']
|
||||
repo_owner = instance['repo_owner']
|
||||
repo_path = os.path.join(repos_path, f'{repo_owner}__{repo_name}')
|
||||
|
||||
# Prepare the task instruction
|
||||
instruction_no_oracle = f"""
|
||||
<uploaded_files>
|
||||
{repo_path}
|
||||
</uploaded_files>
|
||||
|
||||
I've uploaded a python code repository in the directory {repo_path}, Consider the following issue:
|
||||
|
||||
<issue_description>
|
||||
The repository must pass the CI workflow {repo_workflow}.
|
||||
but it gave the following error
|
||||
{repo_logs}
|
||||
</issue_description>
|
||||
|
||||
Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?
|
||||
I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!
|
||||
Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
|
||||
Your task is to make the minimal changes to non-test files in the {repo_path} directory to ensure the <issue_description> is satisfied.
|
||||
|
||||
Follow these phases to resolve the issue:
|
||||
|
||||
Phase 1. READING: read the problem and reword it in clearer terms
|
||||
1.1 If there are code or config snippets. Express in words any best practices or conventions in them.
|
||||
1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details.
|
||||
1.3 Explain the problem in clear terms.
|
||||
1.4 Enumerate the steps to reproduce the problem.
|
||||
1.5 Hightlight any best practices to take into account when testing and fixing the issue
|
||||
|
||||
Phase 2. RUNNING: install and run the tests on the repository
|
||||
2.1 Follow the readme
|
||||
2.2 Install the environment and anything needed
|
||||
2.2 Iterate and figure out how to run the tests
|
||||
|
||||
Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
|
||||
3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
|
||||
3.2 Identify all files related to the problem statement.
|
||||
3.3 Propose the methods and files to fix the issue and explain why.
|
||||
3.4 From the possible file locations, select the most likely location to fix the issue.
|
||||
|
||||
Phase 4. TEST CREATION: before implementing any fix, create a script to reproduce and verify the issue.
|
||||
4.1 Look at existing test files in the repository to understand the test format/structure.
|
||||
4.2 Create a minimal reproduction script that reproduces the located issue.
|
||||
4.3 Run the reproduction script to confirm you are reproducing the issue.
|
||||
4.4 Adjust the reproduction script as necessary.
|
||||
|
||||
Phase 5. FIX ANALYSIS: state clearly the problem and how to fix it
|
||||
5.1 State clearly what the problem is.
|
||||
5.2 State clearly where the problem is located.
|
||||
5.3 State clearly how the test reproduces the issue.
|
||||
5.4 State clearly the best practices to take into account in the fix.
|
||||
5.5 State clearly how to fix the problem.
|
||||
|
||||
Phase 6. FIX IMPLEMENTATION: Edit the source code to implement your chosen solution.
|
||||
6.1 Make minimal, focused changes to fix the issue.
|
||||
|
||||
Phase 7. VERIFICATION: Test your implementation thoroughly.
|
||||
7.1 Run your reproduction script to verify the fix works.
|
||||
7.2 Add edge cases to your test script to ensure comprehensive coverage.
|
||||
7.3 Run existing tests related to the modified code to ensure you haven't broken anything. Run any tests in the repository related to:
|
||||
7.2.1 The issue you are fixing
|
||||
7.2.2 The files you modified
|
||||
7.2.3 The functions you changed
|
||||
7.4 If any tests fail, revise your implementation until all tests pass
|
||||
|
||||
Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["sha_fail"]}.
|
||||
8.1 Ensure you've fully addressed all requirements.
|
||||
|
||||
Once all phases are done, announce: 'Agent Task Complete'.
|
||||
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
|
||||
"""
|
||||
runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
initialize_runtime(runtime, instance)
|
||||
|
||||
# Run the agent
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=MessageAction(content=instruction_no_oracle),
|
||||
runtime=runtime,
|
||||
fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
|
||||
metadata.agent_class
|
||||
),
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
instance_id=instance['instance_id'],
|
||||
# instance=instance.to_dict(orient='recorods'),
|
||||
instruction=instruction_no_oracle,
|
||||
metadata=metadata,
|
||||
history=histories,
|
||||
test_result=test_result,
|
||||
metrics=metrics,
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = get_parser()
|
||||
parser.add_argument(
|
||||
'-s',
|
||||
'--eval-split',
|
||||
type=str,
|
||||
default='test',
|
||||
choices=['test'],
|
||||
help='data split to evaluate on, must be test',
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
data_split = args.eval_split
|
||||
|
||||
bench = load_dataset(
|
||||
'JetBrains-Research/lca-ci-builds-repair', split=data_split
|
||||
).to_pandas()
|
||||
# todo: see why 126 is giving problems on inference
|
||||
# todo: see why 145 is giving problems on eval
|
||||
bench = bench[bench['id'] != 126]
|
||||
bench = bench[bench['id'] != 145]
|
||||
# bench = bench.iloc[0:56]
|
||||
# add column instnace_id for compatibility with oh repo, old id column must be kept for lca repo
|
||||
bench['instance_id'] = bench['id'].astype(str)
|
||||
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
# modify_params must be False for evaluation purpose, for reproducibility and accurancy of results
|
||||
llm_config.modify_params = False
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
metadata = make_metadata(
|
||||
llm_config,
|
||||
f'jetbrains-lca-ci--{data_split}',
|
||||
args.agent_cls,
|
||||
args.max_iterations,
|
||||
args.eval_note,
|
||||
args.eval_output_dir,
|
||||
)
|
||||
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
|
||||
instances = prepare_dataset(bench, output_file, args.eval_n_limit)
|
||||
|
||||
run_evaluation(
|
||||
instances, metadata, output_file, args.eval_num_workers, process_instance
|
||||
)
|
||||
33
evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh
Executable file
33
evaluation/benchmarks/lca_ci_build_repair/scripts/eval_infer.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
PROCESS_FILEPATH=$1
|
||||
if [ -z "$PROCESS_FILEPATH" ]; then
|
||||
echo "Error: PROCESS_FILEPATH is empty. Usage: ./eval_infer.sh <output_file> [instance_id] [dataset_name] [split]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
|
||||
PROCESS_FILEPATH=$(realpath $PROCESS_FILEPATH)
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "PROCESS_FILEPATH: $PROCESS_FILEPATH"
|
||||
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
if [ -n "$EXP_NAME" ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/eval_infer.py \
|
||||
--predictions-path $PROCESS_FILEPATH "
|
||||
|
||||
echo "RUNNING: $COMMAND"
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
}
|
||||
|
||||
unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
|
||||
run_eval
|
||||
27
evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh
Executable file
27
evaluation/benchmarks/lca_ci_build_repair/scripts/run_infer.sh
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
MODEL_CONFIG=$1
|
||||
|
||||
get_openhands_version
|
||||
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
if [ -n "$EXP_NAME" ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
COMMAND="poetry run python ./evaluation/benchmarks/lca_ci_build_repair/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG "
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
}
|
||||
|
||||
#unset SANDBOX_ENV_GITHUB_TOKEN # prevent the agent from using the github token to push
|
||||
run_eval
|
||||
60
evaluation/benchmarks/lca_ci_build_repair/setup.py
Normal file
60
evaluation/benchmarks/lca_ci_build_repair/setup.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Installs LCA CI Build Repair benchmark with scripts for OH integration."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def setup():
|
||||
# Read config.yaml
|
||||
print('Reading config.yaml')
|
||||
script_dir = os.path.dirname(
|
||||
os.path.abspath(__file__)
|
||||
) # Get the absolute path of the script
|
||||
config_path = os.path.join(script_dir, 'config.yaml')
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
lca_path = config['LCA_PATH']
|
||||
lca_ci_path = os.path.join(
|
||||
lca_path, 'lca-baselines', 'ci-builds-repair', 'ci-builds-repair-benchmark'
|
||||
)
|
||||
repo_url = 'https://github.com/juanmichelini/lca-baselines'
|
||||
|
||||
# Clone the repository to LCA_CI_PATH
|
||||
print(f'Cloning lca-baselines repository from {repo_url} into {lca_path}')
|
||||
result = subprocess.run(
|
||||
['git', 'clone', repo_url], cwd=lca_path, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f'Warning cloning repository: {result.stderr}')
|
||||
|
||||
# Clone the repository to LCA_CI_PATH
|
||||
print('Switching branches')
|
||||
result = subprocess.run(
|
||||
['git', 'switch', 'open-hands-integration'],
|
||||
cwd=lca_ci_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f'Warning switching repository: {result.stderr}')
|
||||
|
||||
# Move and rename config_lca.yaml (overwrite if exists)
|
||||
lca_ci_config_path = os.path.join(lca_ci_path, 'config.yaml')
|
||||
print(f'Copying config.yaml to {lca_ci_config_path}')
|
||||
shutil.copy(config_path, lca_ci_config_path)
|
||||
|
||||
# Run poetry install in LCA_CI_PATH
|
||||
print(f"Running 'poetry install' in {lca_ci_path}")
|
||||
result = subprocess.run(
|
||||
['poetry', 'install'], cwd=lca_ci_path, capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f'Warning during poetry install: {result.stderr}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
setup()
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
This folder contains the evaluation harness that we built on top of the original [SWE-Bench benchmark](https://www.swebench.com/) ([paper](https://arxiv.org/abs/2310.06770)).
|
||||
|
||||
**UPDATE (4/8/2025): We now support running SWT-Bench evaluation! For more details, checkout [the corresponding section](#SWT-Bench-Evaluation).**
|
||||
|
||||
**UPDATE (03/27/2025): We now support SWE-Bench multimodal evaluation! Simply use "princeton-nlp/SWE-bench_Multimodal" as the dataset name in the `run_infer.sh` script to evaluate on multimodal instances.**
|
||||
|
||||
**UPDATE (2/18/2025): We now support running SWE-Gym using the same evaluation harness here. For more details, checkout [this README](./SWE-Gym.md).**
|
||||
@@ -141,7 +143,7 @@ With `output.jsonl` file, you can run `eval_infer.sh` to evaluate generated patc
|
||||
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh $YOUR_OUTPUT_JSONL [instance_id] [dataset_name] [split]
|
||||
|
||||
# Example
|
||||
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/swe_bench/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
|
||||
./evaluation/benchmarks/swe_bench/scripts/eval_infer.sh evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Lite/CodeActAgent/gpt-4-1106-preview_maxiter_50_N_v1.0/output.jsonl
|
||||
```
|
||||
|
||||
The script now accepts optional arguments:
|
||||
@@ -182,3 +184,58 @@ To clean-up all existing runtimes that you've already started, run:
|
||||
```bash
|
||||
ALLHANDS_API_KEY="YOUR-API-KEY" ./evaluation/utils/scripts/cleanup_remote_runtime.sh
|
||||
```
|
||||
|
||||
## SWT-Bench Evaluation
|
||||
|
||||
[SWT-Bench](https://swtbench.com/) ([paper](https://arxiv.org/abs/2406.12952)) is a benchmark for evaluating the capability of LLMs at creating unit tests. It is performed on the same instances as SWE-Bench, but requires a separate evaluation harness to capture coverage and issue reproduction. We therefore detail below how to leverage the inference script in this folder to run inference on SWT-Bench and how to use the SWT-Bench evaluation harness to evaluate them.
|
||||
|
||||
### Run inference on SWT-Bench
|
||||
|
||||
To run inference on SWT-Bench, you can use the same `run_infer.sh` script as described for evaluation on plain SWE-Bench. The only differences is that you need to specify the `mode` parameter to `swt` or `swt-ci` when running the script. For example, to run inference on SWT-Bench Verified, run the following command:
|
||||
|
||||
```bash
|
||||
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [max_iter] [num_workers] [swe-dataset] test 1 swt
|
||||
|
||||
# Example - This runs evaluation on CodeActAgent for 500 instances on "SWT-bench_Verified"'s test set (corresponding to SWE-bench_Verified), with max 100 iteration per instances, with 1 number of workers running in parallel
|
||||
./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llm.eval_gpt4o-2024-11-20 HEAD CodeActAgent 500 100 1 princeton-nlp/SWE-bench_Verified test 1 swt
|
||||
```
|
||||
|
||||
The two modes `swt` and `swt-ci` have the following effect:
|
||||
- `swt`: This mode will change the prompt to instruct the agent to generate reproducing test cases instead of resolving the issue.
|
||||
- `swt-ci`: In addition to the changes by `swt`, this mode sets up the CI environment by i) pre-installing the environment in the docker image, such that the test framework can be executed without errors and ii) telling the model the exact command to run the test framework.
|
||||
|
||||
### Run evaluation for SWT-bench
|
||||
|
||||
The evaluation of these results is done leveraging [the SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master).
|
||||
|
||||
#### Extracting results into SWT-Bench harness format
|
||||
In order to run evaluation of the obtained inference results in the SWT-Bench harness, we transform the results to a format that the SWT-Bench evaluation harness expects.
|
||||
|
||||
```bash
|
||||
python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file [output.jsonl] > [output_swt.jsonl]
|
||||
|
||||
# Example
|
||||
python3 evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py --prediction_file "evaluation/evaluation_outputs/outputs/princeton-nlp__SWE-bench_Verified-test/CodeActAgent/gpt-4o-2024-11-20_maxiter_100_N_v0.31.0-no-hint-swt-run_1/output.jsonl" > OpenHands-gpt-4o-2024-11-20.jsonl
|
||||
```
|
||||
|
||||
#### Running the results in SWT-Bench
|
||||
|
||||
Next, we run the [SWT-Bench evaluation harness](https://github.com/logic-star-ai/swt-bench/tree/master) with these results.
|
||||
First set-up and validate the setup as described in the harness [here](https://github.com/logic-star-ai/swt-bench/tree/master?tab=readme-ov-file#-set-up).
|
||||
Then, run the evaluation with the following command:
|
||||
|
||||
```bash
|
||||
# Example
|
||||
python3 -m src.main \
|
||||
--dataset_name princeton-nlp/SWE-bench_Verified \
|
||||
--predictions_path <pathTo>/OpenHands-gpt-4o-2024-11-20.jsonl \
|
||||
--max_workers 12 \
|
||||
--run_id OpenHands-CodeAct-gpt-4o-2024-11-20 --patch_types vanilla --build_mode api
|
||||
```
|
||||
|
||||
The results of the evaluation can be obtained by running the reporting script of the harness.
|
||||
|
||||
```bash
|
||||
# Example
|
||||
python -m src.report run_instance_swt_logs/OpenHands-CodeAct-gpt-4o-2024-11-20/OpenHands__CodeActAgent__gpt-4o-2024-11-20 --dataset verified
|
||||
```
|
||||
|
||||
52
evaluation/benchmarks/swe_bench/binary_patch_utils.py
Normal file
52
evaluation/benchmarks/swe_bench/binary_patch_utils.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
Utilities for handling binary files and patch generation in SWE-bench evaluation.
|
||||
"""
|
||||
|
||||
|
||||
def remove_binary_diffs(patch_text):
|
||||
"""
|
||||
Remove binary file diffs from a git patch.
|
||||
|
||||
Args:
|
||||
patch_text (str): The git patch text
|
||||
|
||||
Returns:
|
||||
str: The cleaned patch text with binary diffs removed
|
||||
"""
|
||||
lines = patch_text.splitlines()
|
||||
cleaned_lines = []
|
||||
block = []
|
||||
is_binary_block = False
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('diff --git '):
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
block = [line]
|
||||
is_binary_block = False
|
||||
elif 'Binary files' in line:
|
||||
is_binary_block = True
|
||||
block.append(line)
|
||||
else:
|
||||
block.append(line)
|
||||
|
||||
if block and not is_binary_block:
|
||||
cleaned_lines.extend(block)
|
||||
return '\n'.join(cleaned_lines)
|
||||
|
||||
|
||||
def remove_binary_files_from_git():
|
||||
"""
|
||||
Generate a bash command to remove binary files from git staging.
|
||||
|
||||
Returns:
|
||||
str: A bash command that removes binary files from git staging
|
||||
"""
|
||||
return """
|
||||
for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do
|
||||
if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then
|
||||
git rm -f "$file" 2>/dev/null || rm -f "$file"
|
||||
echo "Removed: $file"
|
||||
fi
|
||||
done
|
||||
""".strip()
|
||||
842
evaluation/benchmarks/swe_bench/resource/swt_bench_constants.py
Normal file
842
evaluation/benchmarks/swe_bench/resource/swt_bench_constants.py
Normal file
@@ -0,0 +1,842 @@
|
||||
# Based on https://github.com/logic-star-ai/swt-bench/blob/master/src/constants.py
|
||||
|
||||
# Constants - Installation Specifications
|
||||
MAP_VERSION_TO_INSTALL_SKLEARN = {
|
||||
k: {
|
||||
'python': '3.6',
|
||||
'packages': 'numpy scipy cython pytest pandas matplotlib',
|
||||
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
|
||||
'pip_packages': [
|
||||
'cython',
|
||||
'numpy==1.19.2',
|
||||
'setuptools',
|
||||
'scipy==1.5.2',
|
||||
],
|
||||
}
|
||||
for k in ['0.20', '0.21', '0.22']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_SKLEARN.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': "'numpy==1.19.2' 'scipy==1.5.2' 'cython==3.0.10' pytest 'pandas<2.0.0' 'matplotlib<3.9.0' setuptools pytest joblib threadpoolctl",
|
||||
'install': 'python -m pip install -v --no-use-pep517 --no-build-isolation -e .',
|
||||
'pip_packages': ['cython', 'setuptools', 'numpy', 'scipy'],
|
||||
}
|
||||
for k in ['1.3', '1.4']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_FLASK = {
|
||||
'2.0': {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': [
|
||||
'setuptools==70.0.0',
|
||||
'Werkzeug==2.3.7',
|
||||
'Jinja2==3.0.1',
|
||||
'itsdangerous==2.1.2',
|
||||
'click==8.0.1',
|
||||
'MarkupSafe==2.1.3',
|
||||
],
|
||||
},
|
||||
'2.1': {
|
||||
'python': '3.10',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': [
|
||||
'click==8.1.3',
|
||||
'itsdangerous==2.1.2',
|
||||
'Jinja2==3.1.2',
|
||||
'MarkupSafe==2.1.1',
|
||||
'Werkzeug==2.3.7',
|
||||
],
|
||||
},
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_FLASK.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.11',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': [
|
||||
'click==8.1.3',
|
||||
'itsdangerous==2.1.2',
|
||||
'Jinja2==3.1.2',
|
||||
'MarkupSafe==2.1.1',
|
||||
'Werkzeug==2.3.7',
|
||||
],
|
||||
}
|
||||
for k in ['2.2', '2.3']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_DJANGO = {
|
||||
k: {
|
||||
'python': '3.5',
|
||||
'packages': 'requirements.txt',
|
||||
'pre_install': [
|
||||
'apt-get update && apt-get install -y locales',
|
||||
"echo 'en_US UTF-8' > /etc/locale.gen",
|
||||
'locale-gen en_US.UTF-8',
|
||||
],
|
||||
'install': 'python setup.py install',
|
||||
'pip_packages': ['setuptools'],
|
||||
'eval_commands': [
|
||||
'export LANG=en_US.UTF-8',
|
||||
'export LC_ALL=en_US.UTF-8',
|
||||
'export PYTHONIOENCODING=utf8',
|
||||
'export LANGUAGE=en_US:en',
|
||||
],
|
||||
}
|
||||
for k in ['1.7', '1.8', '1.9', '1.10', '1.11', '2.0', '2.1', '2.2']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_DJANGO.update(
|
||||
{
|
||||
k: {'python': '3.5', 'install': 'python setup.py install'}
|
||||
for k in ['1.4', '1.5', '1.6']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_DJANGO.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.6',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'eval_commands': [
|
||||
"sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen",
|
||||
'export LANG=en_US.UTF-8',
|
||||
'export LANGUAGE=en_US:en',
|
||||
'export LC_ALL=en_US.UTF-8',
|
||||
],
|
||||
}
|
||||
for k in ['3.0', '3.1', '3.2']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_DJANGO.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.8',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in ['4.0']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_DJANGO.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in ['4.1', '4.2']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_DJANGO.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.11',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in ['5.0']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_REQUESTS = {
|
||||
k: {'python': '3.9', 'packages': 'pytest', 'install': 'python -m pip install .'}
|
||||
for k in ['0.7', '0.8', '0.9', '0.11', '0.13', '0.14', '1.1', '1.2', '2.0', '2.2']
|
||||
+ ['2.3', '2.4', '2.5', '2.7', '2.8', '2.9', '2.10', '2.11', '2.12', '2.17']
|
||||
+ ['2.18', '2.19', '2.22', '2.26', '2.25', '2.27', '3.0']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_SEABORN = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': [
|
||||
'contourpy==1.1.0',
|
||||
'cycler==0.11.0',
|
||||
'fonttools==4.42.1',
|
||||
'importlib-resources==6.0.1',
|
||||
'kiwisolver==1.4.5',
|
||||
'matplotlib==3.7.2',
|
||||
'numpy==1.25.2',
|
||||
'packaging==23.1',
|
||||
'pandas==1.3.5', # 2.0.3
|
||||
'pillow==10.0.0',
|
||||
'pyparsing==3.0.9',
|
||||
'pytest',
|
||||
'python-dateutil==2.8.2',
|
||||
'pytz==2023.3.post1',
|
||||
'scipy==1.11.2',
|
||||
'six==1.16.0',
|
||||
'tzdata==2023.1',
|
||||
'zipp==3.16.2',
|
||||
],
|
||||
}
|
||||
for k in ['0.11']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_SEABORN.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': 'python -m pip install -e .[dev]',
|
||||
'pip_packages': [
|
||||
'contourpy==1.1.0',
|
||||
'cycler==0.11.0',
|
||||
'fonttools==4.42.1',
|
||||
'importlib-resources==6.0.1',
|
||||
'kiwisolver==1.4.5',
|
||||
'matplotlib==3.7.2',
|
||||
'numpy==1.25.2',
|
||||
'packaging==23.1',
|
||||
'pandas==2.0.0',
|
||||
'pillow==10.0.0',
|
||||
'pyparsing==3.0.9',
|
||||
'pytest',
|
||||
'python-dateutil==2.8.2',
|
||||
'pytz==2023.3.post1',
|
||||
'scipy==1.11.2',
|
||||
'six==1.16.0',
|
||||
'tzdata==2023.1',
|
||||
'zipp==3.16.2',
|
||||
],
|
||||
}
|
||||
for k in ['0.12', '0.13']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_PYTEST = {
|
||||
k: {'python': '3.9', 'install': 'python -m pip install -e .'}
|
||||
for k in [
|
||||
'4.4',
|
||||
'4.5',
|
||||
'4.6',
|
||||
'5.0',
|
||||
'5.1',
|
||||
'5.2',
|
||||
'5.3',
|
||||
'5.4',
|
||||
'6.0',
|
||||
'6.2',
|
||||
'6.3',
|
||||
'7.0',
|
||||
'7.1',
|
||||
'7.2',
|
||||
'7.4',
|
||||
'8.0',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['4.4']['pip_packages'] = [
|
||||
'atomicwrites==1.4.1',
|
||||
'attrs==23.1.0',
|
||||
'more-itertools==10.1.0',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'setuptools==68.0.0',
|
||||
'six==1.16.0',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['4.5']['pip_packages'] = [
|
||||
'atomicwrites==1.4.1',
|
||||
'attrs==23.1.0',
|
||||
'more-itertools==10.1.0',
|
||||
'pluggy==0.11.0',
|
||||
'py==1.11.0',
|
||||
'setuptools==68.0.0',
|
||||
'six==1.16.0',
|
||||
'wcwidth==0.2.6',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['4.6']['pip_packages'] = [
|
||||
'atomicwrites==1.4.1',
|
||||
'attrs==23.1.0',
|
||||
'more-itertools==10.1.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'six==1.16.0',
|
||||
'wcwidth==0.2.6',
|
||||
]
|
||||
for k in ['5.0', '5.1', '5.2']:
|
||||
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
|
||||
'atomicwrites==1.4.1',
|
||||
'attrs==23.1.0',
|
||||
'more-itertools==10.1.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'wcwidth==0.2.6',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['5.3']['pip_packages'] = [
|
||||
'attrs==23.1.0',
|
||||
'more-itertools==10.1.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'wcwidth==0.2.6',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['5.4']['pip_packages'] = [
|
||||
'py==1.11.0',
|
||||
'packaging==23.1',
|
||||
'attrs==23.1.0',
|
||||
'more-itertools==10.1.0',
|
||||
'pluggy==0.13.1',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['6.0']['pip_packages'] = [
|
||||
'attrs==23.1.0',
|
||||
'iniconfig==2.0.0',
|
||||
'more-itertools==10.1.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'toml==0.10.2',
|
||||
]
|
||||
for k in ['6.2', '6.3']:
|
||||
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
|
||||
'attrs==23.1.0',
|
||||
'iniconfig==2.0.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'toml==0.10.2',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['7.0']['pip_packages'] = [
|
||||
'attrs==23.1.0',
|
||||
'iniconfig==2.0.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
]
|
||||
for k in ['7.1', '7.2']:
|
||||
MAP_VERSION_TO_INSTALL_PYTEST[k]['pip_packages'] = [
|
||||
'attrs==23.1.0',
|
||||
'iniconfig==2.0.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==0.13.1',
|
||||
'py==1.11.0',
|
||||
'tomli==2.0.1',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['7.4']['pip_packages'] = [
|
||||
'iniconfig==2.0.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==1.3.0',
|
||||
'exceptiongroup==1.1.3',
|
||||
'tomli==2.0.1',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYTEST['8.0']['pip_packages'] = [
|
||||
'iniconfig==2.0.0',
|
||||
'packaging==23.1',
|
||||
'pluggy==1.3.0',
|
||||
'exceptiongroup==1.1.3',
|
||||
'tomli==2.0.1',
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_MATPLOTLIB = {
|
||||
k: {
|
||||
'python': '3.11',
|
||||
'packages': 'environment.yml',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pre_install': [
|
||||
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super dvipng'
|
||||
],
|
||||
'pip_packages': [
|
||||
'contourpy==1.1.0',
|
||||
'cycler==0.11.0',
|
||||
'fonttools==4.42.1',
|
||||
'ghostscript',
|
||||
'kiwisolver==1.4.5',
|
||||
'numpy==1.25.2',
|
||||
'packaging==23.1',
|
||||
'pillow==10.0.0',
|
||||
'pikepdf',
|
||||
'pyparsing==3.0.9',
|
||||
'python-dateutil==2.8.2',
|
||||
'six==1.16.0',
|
||||
'setuptools==68.1.2',
|
||||
'setuptools-scm==7.1.0',
|
||||
'typing-extensions==4.7.1',
|
||||
],
|
||||
}
|
||||
for k in ['3.5', '3.6', '3.7']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.8',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pre_install': [
|
||||
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config texlive texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-luatex cm-super'
|
||||
],
|
||||
'pip_packages': ['pytest', 'ipython'],
|
||||
}
|
||||
for k in ['3.1', '3.2', '3.3', '3.4']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.7',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pre_install': [
|
||||
'apt-get -y update && apt-get -y upgrade && apt-get install -y imagemagick ffmpeg libfreetype6-dev pkg-config'
|
||||
],
|
||||
'pip_packages': ['pytest'],
|
||||
}
|
||||
for k in ['3.0']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_MATPLOTLIB.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.5',
|
||||
'install': 'python setup.py build; python setup.py install',
|
||||
'pre_install': [
|
||||
'apt-get -y update && apt-get -y upgrade && && apt-get install -y imagemagick ffmpeg'
|
||||
],
|
||||
'pip_packages': ['pytest'],
|
||||
'execute_test_as_nonroot': True,
|
||||
}
|
||||
for k in ['2.0', '2.1', '2.2', '1.0', '1.1', '1.2', '1.3', '1.4', '1.5']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_SPHINX = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'pip_packages': ['tox==4.16.0', 'tox-current-env==0.0.11'],
|
||||
'install': 'python -m pip install -e .[test]',
|
||||
'pre_install': ["sed -i 's/pytest/pytest -rA/' tox.ini"],
|
||||
}
|
||||
for k in ['1.5', '1.6', '1.7', '1.8', '2.0', '2.1', '2.2', '2.3', '2.4', '3.0']
|
||||
+ ['3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']
|
||||
+ ['4.5', '5.0', '5.1', '5.2', '5.3', '6.0', '6.2', '7.0', '7.1', '7.2']
|
||||
}
|
||||
for k in ['3.0', '3.1', '3.2', '3.3', '3.4', '3.5', '4.0', '4.1', '4.2', '4.3', '4.4']:
|
||||
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
|
||||
[
|
||||
"sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py",
|
||||
"sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py",
|
||||
"sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py",
|
||||
"sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py",
|
||||
"sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py",
|
||||
"sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py",
|
||||
]
|
||||
)
|
||||
if k in ['4.2', '4.3', '4.4']:
|
||||
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
|
||||
[
|
||||
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py",
|
||||
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py",
|
||||
]
|
||||
)
|
||||
elif k == '4.1':
|
||||
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
|
||||
[
|
||||
(
|
||||
"grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && "
|
||||
"sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || "
|
||||
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py"
|
||||
),
|
||||
(
|
||||
"grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && "
|
||||
"sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || "
|
||||
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py"
|
||||
),
|
||||
]
|
||||
)
|
||||
else:
|
||||
MAP_VERSION_TO_INSTALL_SPHINX[k]['pre_install'].extend(
|
||||
[
|
||||
"sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py",
|
||||
"sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py",
|
||||
]
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_SPHINX['7.2']['pre_install'] += [
|
||||
'apt-get update && apt-get install -y graphviz'
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_ASTROPY = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': 'python -m pip install -e .[test] --verbose',
|
||||
'pip_packages': [
|
||||
'attrs==23.1.0',
|
||||
'exceptiongroup==1.1.3',
|
||||
'execnet==2.0.2',
|
||||
'hypothesis==6.82.6',
|
||||
'iniconfig==2.0.0',
|
||||
'numpy==1.25.2',
|
||||
'packaging==23.1',
|
||||
'pluggy==1.3.0',
|
||||
'psutil==5.9.5',
|
||||
'pyerfa==2.0.0.3',
|
||||
'pytest-arraydiff==0.5.0',
|
||||
'pytest-astropy-header==0.2.2',
|
||||
'pytest-astropy==0.10.0',
|
||||
'pytest-cov==4.1.0',
|
||||
'pytest-doctestplus==1.0.0',
|
||||
'pytest-filter-subpackage==0.1.2',
|
||||
'pytest-mock==3.11.1',
|
||||
'pytest-openfiles==0.5.0',
|
||||
'pytest-remotedata==0.4.0',
|
||||
'pytest-xdist==3.3.1',
|
||||
'pytest==7.4.0',
|
||||
'PyYAML==6.0.1',
|
||||
'setuptools==68.0.0',
|
||||
'sortedcontainers==2.4.0',
|
||||
'tomli==2.0.1',
|
||||
],
|
||||
}
|
||||
for k in ['0.1', '0.2', '0.3', '0.4', '1.1', '1.2', '1.3', '3.0', '3.1', '3.2']
|
||||
+ ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']
|
||||
}
|
||||
for k in ['4.1', '4.2', '4.3', '5.0', '5.1', '5.2']:
|
||||
MAP_VERSION_TO_INSTALL_ASTROPY[k]['pre_install'] = [
|
||||
'sed -i \'s/requires = \\["setuptools",/requires = \\["setuptools==68.0.0",/\' pyproject.toml'
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_SYMPY = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'mpmath flake8',
|
||||
'pip_packages': ['mpmath==1.3.0', 'flake8-comprehensions'],
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in ['0.7', '1.0', '1.1', '1.10', '1.11', '1.12', '1.2', '1.4', '1.5', '1.6']
|
||||
+ ['1.7', '1.8', '1.9']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_SYMPY.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': ['mpmath==1.3.0'],
|
||||
}
|
||||
for k in ['1.13']
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_PYLINT = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in [
|
||||
'2.10',
|
||||
'2.11',
|
||||
'2.13',
|
||||
'2.14',
|
||||
'2.15',
|
||||
'2.16',
|
||||
'2.17',
|
||||
'2.8',
|
||||
'2.9',
|
||||
'3.0',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pip_packages'] = ['pyenchant==3.2']
|
||||
MAP_VERSION_TO_INSTALL_PYLINT['2.8']['pre_install'] = [
|
||||
'apt-get update && apt-get install -y libenchant-2-dev hunspell-en-us'
|
||||
]
|
||||
MAP_VERSION_TO_INSTALL_PYLINT.update(
|
||||
{
|
||||
k: {
|
||||
**MAP_VERSION_TO_INSTALL_PYLINT[k],
|
||||
'pip_packages': ['astroid==3.0.0a6', 'setuptools'],
|
||||
}
|
||||
for k in ['3.0']
|
||||
}
|
||||
)
|
||||
|
||||
MAP_VERSION_TO_INSTALL_XARRAY = {
|
||||
k: {
|
||||
'python': '3.10',
|
||||
'packages': 'environment.yml',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': [
|
||||
'numpy==1.23.0',
|
||||
'packaging==23.1',
|
||||
'pandas==1.5.3',
|
||||
'pytest==7.4.0',
|
||||
'python-dateutil==2.8.2',
|
||||
'pytz==2023.3',
|
||||
'six==1.16.0',
|
||||
'scipy==1.11.1',
|
||||
'setuptools==68.0.0',
|
||||
],
|
||||
'no_use_env': True,
|
||||
}
|
||||
for k in ['0.12', '0.18', '0.19', '0.20', '2022.03', '2022.06', '2022.09']
|
||||
}
|
||||
|
||||
MAP_VERSION_TO_INSTALL_SQLFLUFF = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in [
|
||||
'0.10',
|
||||
'0.11',
|
||||
'0.12',
|
||||
'0.13',
|
||||
'0.4',
|
||||
'0.5',
|
||||
'0.6',
|
||||
'0.8',
|
||||
'0.9',
|
||||
'1.0',
|
||||
'1.1',
|
||||
'1.2',
|
||||
'1.3',
|
||||
'1.4',
|
||||
'2.0',
|
||||
'2.1',
|
||||
'2.2',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_DBT_CORE = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
}
|
||||
for k in [
|
||||
'0.13',
|
||||
'0.14',
|
||||
'0.15',
|
||||
'0.16',
|
||||
'0.17',
|
||||
'0.18',
|
||||
'0.19',
|
||||
'0.20',
|
||||
'0.21',
|
||||
'1.0',
|
||||
'1.1',
|
||||
'1.2',
|
||||
'1.3',
|
||||
'1.4',
|
||||
'1.5',
|
||||
'1.6',
|
||||
'1.7',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PYVISTA = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': ['pytest'],
|
||||
}
|
||||
for k in ['0.20', '0.21', '0.22', '0.23']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PYVISTA.update(
|
||||
{
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'packages': 'requirements.txt',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': ['pytest'],
|
||||
}
|
||||
for k in [
|
||||
'0.24',
|
||||
'0.25',
|
||||
'0.26',
|
||||
'0.27',
|
||||
'0.28',
|
||||
'0.29',
|
||||
'0.30',
|
||||
'0.31',
|
||||
'0.32',
|
||||
'0.33',
|
||||
'0.34',
|
||||
'0.35',
|
||||
'0.36',
|
||||
'0.37',
|
||||
'0.38',
|
||||
'0.39',
|
||||
'0.40',
|
||||
'0.41',
|
||||
'0.42',
|
||||
'0.43',
|
||||
]
|
||||
}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_ASTROID = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': 'python -m pip install -e .',
|
||||
'pip_packages': ['pytest'],
|
||||
}
|
||||
for k in [
|
||||
'2.10',
|
||||
'2.12',
|
||||
'2.13',
|
||||
'2.14',
|
||||
'2.15',
|
||||
'2.16',
|
||||
'2.5',
|
||||
'2.6',
|
||||
'2.7',
|
||||
'2.8',
|
||||
'2.9',
|
||||
'3.0',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_MARSHMALLOW = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': "python -m pip install -e '.[dev]'",
|
||||
}
|
||||
for k in [
|
||||
'2.18',
|
||||
'2.19',
|
||||
'2.20',
|
||||
'3.0',
|
||||
'3.1',
|
||||
'3.10',
|
||||
'3.11',
|
||||
'3.12',
|
||||
'3.13',
|
||||
'3.15',
|
||||
'3.16',
|
||||
'3.19',
|
||||
'3.2',
|
||||
'3.4',
|
||||
'3.8',
|
||||
'3.9',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PVLIB = {
|
||||
k: {
|
||||
'python': '3.9',
|
||||
'install': 'python -m pip install -e .[all]',
|
||||
'packages': 'pandas scipy',
|
||||
'pip_packages': ['jupyter', 'ipython', 'matplotlib', 'pytest', 'flake8'],
|
||||
}
|
||||
for k in ['0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PYDICOM = {
|
||||
k: {'python': '3.6', 'install': 'python -m pip install -e .', 'packages': 'numpy'}
|
||||
for k in [
|
||||
'1.0',
|
||||
'1.1',
|
||||
'1.2',
|
||||
'1.3',
|
||||
'1.4',
|
||||
'2.0',
|
||||
'2.1',
|
||||
'2.2',
|
||||
'2.3',
|
||||
'2.4',
|
||||
'3.0',
|
||||
]
|
||||
}
|
||||
MAP_VERSION_TO_INSTALL_PYDICOM.update(
|
||||
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.8'} for k in ['1.4', '2.0']}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_PYDICOM.update(
|
||||
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.9'} for k in ['2.1', '2.2']}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_PYDICOM.update(
|
||||
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.10'} for k in ['2.3']}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_PYDICOM.update(
|
||||
{k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], 'python': '3.11'} for k in ['2.4', '3.0']}
|
||||
)
|
||||
MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {'python': '3.9'} for k in ['1.0']}
|
||||
MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX = {
|
||||
k: {'python': '3.10', 'packages': 'pytest'} for k in ['0.0.1']
|
||||
}
|
||||
|
||||
# Constants - Task Instance Instllation Environment
|
||||
MAP_VERSION_TO_INSTALL = {
|
||||
'astropy/astropy': MAP_VERSION_TO_INSTALL_ASTROPY,
|
||||
'dbt-labs/dbt-core': MAP_VERSION_TO_INSTALL_DBT_CORE,
|
||||
'django/django': MAP_VERSION_TO_INSTALL_DJANGO,
|
||||
'matplotlib/matplotlib': MAP_VERSION_TO_INSTALL_MATPLOTLIB,
|
||||
'marshmallow-code/marshmallow': MAP_VERSION_TO_INSTALL_MARSHMALLOW,
|
||||
'mwaskom/seaborn': MAP_VERSION_TO_INSTALL_SEABORN,
|
||||
'pallets/flask': MAP_VERSION_TO_INSTALL_FLASK,
|
||||
'psf/requests': MAP_VERSION_TO_INSTALL_REQUESTS,
|
||||
'pvlib/pvlib-python': MAP_VERSION_TO_INSTALL_PVLIB,
|
||||
'pydata/xarray': MAP_VERSION_TO_INSTALL_XARRAY,
|
||||
'pydicom/pydicom': MAP_VERSION_TO_INSTALL_PYDICOM,
|
||||
'pylint-dev/astroid': MAP_VERSION_TO_INSTALL_ASTROID,
|
||||
'pylint-dev/pylint': MAP_VERSION_TO_INSTALL_PYLINT,
|
||||
'pytest-dev/pytest': MAP_VERSION_TO_INSTALL_PYTEST,
|
||||
'pyvista/pyvista': MAP_VERSION_TO_INSTALL_PYVISTA,
|
||||
'scikit-learn/scikit-learn': MAP_VERSION_TO_INSTALL_SKLEARN,
|
||||
'sphinx-doc/sphinx': MAP_VERSION_TO_INSTALL_SPHINX,
|
||||
'sqlfluff/sqlfluff': MAP_VERSION_TO_INSTALL_SQLFLUFF,
|
||||
'swe-bench/humaneval': MAP_VERSION_TO_INSTALL_HUMANEVAL,
|
||||
'nielstron/humaneval_fix': MAP_VERSION_TO_INSTALL_HUMANEVAL_FIX,
|
||||
'sympy/sympy': MAP_VERSION_TO_INSTALL_SYMPY,
|
||||
}
|
||||
|
||||
# Constants - Repository Specific Installation Instructions
|
||||
MAP_REPO_TO_INSTALL = {}
|
||||
|
||||
# Constants - Task Instance Test Frameworks
|
||||
TEST_PYTEST_VERBOSE = 'pytest -rA --tb=long -p no:cacheprovider'
|
||||
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE = {
|
||||
'astropy/astropy': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROPY.keys()
|
||||
},
|
||||
'django/django': {
|
||||
k: './tests/runtests.py --verbosity 2 --settings=test_sqlite --parallel 1'
|
||||
for k in MAP_VERSION_TO_INSTALL_DJANGO.keys()
|
||||
},
|
||||
'marshmallow-code/marshmallow': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MARSHMALLOW.keys()
|
||||
},
|
||||
'matplotlib/matplotlib': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_MATPLOTLIB.keys()
|
||||
},
|
||||
'mwaskom/seaborn': {
|
||||
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_SEABORN.keys()
|
||||
},
|
||||
'pallets/flask': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_FLASK.keys()
|
||||
},
|
||||
'psf/requests': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_REQUESTS.keys()
|
||||
},
|
||||
'pvlib/pvlib-python': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PVLIB.keys()
|
||||
},
|
||||
'pydata/xarray': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_XARRAY.keys()
|
||||
},
|
||||
'pydicom/pydicom': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYDICOM.keys()
|
||||
},
|
||||
'pylint-dev/astroid': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_ASTROID.keys()
|
||||
},
|
||||
'pylint-dev/pylint': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYLINT.keys()
|
||||
},
|
||||
'pytest-dev/pytest': {
|
||||
k: 'pytest -rA --tb=long' for k in MAP_VERSION_TO_INSTALL_PYTEST.keys()
|
||||
},
|
||||
'pyvista/pyvista': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_PYVISTA.keys()
|
||||
},
|
||||
'scikit-learn/scikit-learn': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SKLEARN.keys()
|
||||
},
|
||||
'sphinx-doc/sphinx': {
|
||||
k: 'tox -epy39 -v --' for k in MAP_VERSION_TO_INSTALL_SPHINX.keys()
|
||||
},
|
||||
'sqlfluff/sqlfluff': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_SQLFLUFF.keys()
|
||||
},
|
||||
'swe-bench/humaneval': {
|
||||
k: 'python' for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
|
||||
},
|
||||
'nielstron/humaneval_fix': {
|
||||
k: TEST_PYTEST_VERBOSE for k in MAP_VERSION_TO_INSTALL_HUMANEVAL.keys()
|
||||
},
|
||||
'sympy/sympy': {
|
||||
k: 'bin/test -C --verbose' for k in MAP_VERSION_TO_INSTALL_SYMPY.keys()
|
||||
},
|
||||
}
|
||||
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE['django/django']['1.9'] = (
|
||||
'./tests/runtests.py --verbosity 2'
|
||||
)
|
||||
@@ -3,16 +3,25 @@ import copy
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any
|
||||
from typing import Any, Literal
|
||||
|
||||
import pandas as pd
|
||||
import toml
|
||||
from datasets import load_dataset
|
||||
|
||||
import openhands.agenthub
|
||||
from evaluation.benchmarks.swe_bench.binary_patch_utils import (
|
||||
remove_binary_diffs,
|
||||
remove_binary_files_from_git,
|
||||
)
|
||||
from evaluation.benchmarks.swe_bench.resource.mapping import (
|
||||
get_instance_resource_factor,
|
||||
)
|
||||
from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
|
||||
MAP_REPO_TO_INSTALL,
|
||||
MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE,
|
||||
MAP_VERSION_TO_INSTALL,
|
||||
)
|
||||
from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
@@ -38,8 +47,12 @@ from openhands.core.config import (
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.critic import AgentFinishedCritic
|
||||
from openhands.events.action import CmdRunAction, MessageAction
|
||||
from openhands.events.observation import CmdOutputObservation, ErrorObservation
|
||||
from openhands.events.action import CmdRunAction, FileReadAction, MessageAction
|
||||
from openhands.events.observation import (
|
||||
CmdOutputObservation,
|
||||
ErrorObservation,
|
||||
FileReadObservation,
|
||||
)
|
||||
from openhands.events.serialization.event import event_from_dict, event_to_dict
|
||||
from openhands.runtime.base import Runtime
|
||||
from openhands.utils.async_utils import call_async_from_sync
|
||||
@@ -47,6 +60,7 @@ from openhands.utils.shutdown_listener import sleep_if_should_continue
|
||||
|
||||
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
|
||||
RUN_WITH_BROWSING = os.environ.get('RUN_WITH_BROWSING', 'false').lower() == 'true'
|
||||
BenchMode = Literal['swe', 'swt', 'swt-ci']
|
||||
|
||||
|
||||
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
|
||||
@@ -60,7 +74,36 @@ def _get_swebench_workspace_dir_name(instance: pd.Series) -> str:
|
||||
|
||||
def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageAction:
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
instruction = f"""
|
||||
mode = metadata.details['mode']
|
||||
if mode.startswith('swt'):
|
||||
test_instructions = (
|
||||
f'The following command can be used to run the tests: `{list(MAP_REPO_TO_TEST_FRAMEWORK_VERBOSE[instance.repo].values())[0]}`. Make sure they fail in the expected way.\n'
|
||||
if mode.endswith('ci')
|
||||
else ''
|
||||
)
|
||||
instruction = f"""\
|
||||
<uploaded_files>
|
||||
/workspace/{workspace_dir_name}
|
||||
</uploaded_files>
|
||||
I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following issue description:
|
||||
|
||||
<issue_description>
|
||||
{instance.problem_statement}
|
||||
</issue_description>
|
||||
|
||||
|
||||
Can you help me implement the necessary changes to the repository to test whether the issue in <issue_description> was resolved?
|
||||
I will take care of all changes to any of the non-test files. This means you DON'T have to modify the actual logic and ONLY have to update test logic and tests!
|
||||
Your task is to make the minimal changes to tests files in the /workspace directory to reproduce the issue in the <issue_description>, i.e., such that the generated tests fail in the current state (where the issue is unresolved) and pass when the issue will be resolved.
|
||||
Follow these steps to reproduce the issue:
|
||||
1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
|
||||
2. Create a script `reproduction.py` to reproduce the error and execute it with `python reproduction.py` using the BashTool, to confirm the error
|
||||
3. Edit the sourcecode of the repo to integrate your reproduction script into the test framework
|
||||
4. Run the test framework and make sure your tests fail! Only submit FAILING tests! Never submit passing tests.
|
||||
{test_instructions}Your thinking should be thorough and so it's fine if it's very long.
|
||||
"""
|
||||
else:
|
||||
instruction = f"""
|
||||
<uploaded_files>
|
||||
/workspace/{workspace_dir_name}
|
||||
</uploaded_files>
|
||||
@@ -76,39 +119,54 @@ I've already taken care of all changes to any of the test files described in the
|
||||
Also the development Python environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.
|
||||
Your task is to make the minimal changes to non-test files in the /workspace/{workspace_dir_name} directory to ensure the <issue_description> is satisfied.
|
||||
|
||||
Follow these steps to resolve the issue:
|
||||
Follow these phases to resolve the issue:
|
||||
|
||||
1. EXPLORATION: First, thoroughly explore the repository structure using tools like `find` and `grep`.
|
||||
- Identify all files mentioned in the problem statement
|
||||
- Locate where the issue occurs in the codebase
|
||||
- Understand the surrounding context and dependencies
|
||||
- Use `grep` to search for relevant functions, classes, or error messages
|
||||
Phase 1. READING: read the problem and reword it in clearer terms
|
||||
1.1 If there are code or config snippets. Express in words any best practices or conventions in them.
|
||||
1.2 Hightlight message errors, method names, variables, file names, stack traces, and technical details.
|
||||
1.3 Explain the problem in clear terms.
|
||||
1.4 Enumerate the steps to reproduce the problem.
|
||||
1.5 Hightlight any best practices to take into account when testing and fixing the issue
|
||||
|
||||
2. ANALYSIS: Based on your exploration, think carefully about the problem and propose 2-5 possible approaches to fix the issue.
|
||||
- Analyze the root cause of the problem
|
||||
- Consider trade-offs between different solutions
|
||||
- Select the most promising approach and explain your reasoning
|
||||
Phase 2. RUNNING: install and run the tests on the repository
|
||||
2.1 Follow the readme
|
||||
2.2 Install the environment and anything needed
|
||||
2.2 Iterate and figure out how to run the tests
|
||||
|
||||
3. TEST CREATION: Before implementing any fix, create a script to reproduce and verify the issue.
|
||||
- Look at existing test files in the repository to understand the test format/structure
|
||||
- Create a minimal reproduction script that demonstrates the issue
|
||||
- Run your script to confirm the error exists
|
||||
Phase 3. EXPLORATION: find the files that are related to the problem and possible solutions
|
||||
3.1 Use `grep` to search for relevant methods, classes, keywords and error messages.
|
||||
3.2 Identify all files related to the problem statement.
|
||||
3.3 Propose the methods and files to fix the issue and explain why.
|
||||
3.4 From the possible file locations, select the most likely location to fix the issue.
|
||||
|
||||
4. IMPLEMENTATION: Edit the source code to implement your chosen solution.
|
||||
- Make minimal, focused changes to fix the issue
|
||||
Phase 4. TEST CREATION: before implementing any fix, create a script to reproduce and verify the issue.
|
||||
4.1 Look at existing test files in the repository to understand the test format/structure.
|
||||
4.2 Create a minimal reproduction script that reproduces the located issue.
|
||||
4.3 Run the reproduction script to confirm you are reproducing the issue.
|
||||
4.4 Adjust the reproduction script as necessary.
|
||||
|
||||
5. VERIFICATION: Test your implementation thoroughly.
|
||||
- Run your reproduction script to verify the fix works
|
||||
- Add edge cases to your test script to ensure comprehensive coverage
|
||||
- Run existing tests related to the modified code to ensure you haven't broken anything
|
||||
Phase 5. FIX ANALYSIS: state clearly the problem and how to fix it
|
||||
5.1 State clearly what the problem is.
|
||||
5.2 State clearly where the problem is located.
|
||||
5.3 State clearly how the test reproduces the issue.
|
||||
5.4 State clearly the best practices to take into account in the fix.
|
||||
5.5 State clearly how to fix the problem.
|
||||
|
||||
6. FINAL REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["base_commit"]}.
|
||||
- Ensure you've fully addressed all requirements
|
||||
- Run any tests in the repository related to:
|
||||
* The issue you are fixing
|
||||
* The files you modified
|
||||
* The functions you changed
|
||||
- If any tests fail, revise your implementation until all tests pass
|
||||
Phase 6. FIX IMPLEMENTATION: Edit the source code to implement your chosen solution.
|
||||
6.1 Make minimal, focused changes to fix the issue.
|
||||
|
||||
Phase 7. VERIFICATION: Test your implementation thoroughly.
|
||||
7.1 Run your reproduction script to verify the fix works.
|
||||
7.2 Add edge cases to your test script to ensure comprehensive coverage.
|
||||
7.3 Run existing tests related to the modified code to ensure you haven't broken anything.
|
||||
|
||||
8. FINAL REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["base_commit"]}.
|
||||
8.1 Ensure you've fully addressed all requirements.
|
||||
8.2 Run any tests in the repository related to:
|
||||
8.2.1 The issue you are fixing
|
||||
8.2.2 The files you modified
|
||||
8.2.3 The functions you changed
|
||||
8.3 If any tests fail, revise your implementation until all tests pass
|
||||
|
||||
Be thorough in your exploration, testing, and reasoning. It's fine if your thinking process is lengthy - quality and completeness are more important than brevity.
|
||||
"""
|
||||
@@ -202,9 +260,9 @@ def get_config(
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
codeact_enable_jupyter=False,
|
||||
codeact_enable_browsing=RUN_WITH_BROWSING,
|
||||
codeact_enable_llm_editor=False,
|
||||
enable_jupyter=False,
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
@@ -215,6 +273,7 @@ def get_config(
|
||||
def initialize_runtime(
|
||||
runtime: Runtime,
|
||||
instance: pd.Series, # this argument is not required
|
||||
metadata: EvalMetadata,
|
||||
):
|
||||
"""Initialize the runtime for the agent.
|
||||
|
||||
@@ -226,16 +285,17 @@ def initialize_runtime(
|
||||
workspace_dir_name = _get_swebench_workspace_dir_name(instance)
|
||||
obs: CmdOutputObservation
|
||||
|
||||
# Set instance id
|
||||
# Set instance id and git configuration
|
||||
action = CmdRunAction(
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc"""
|
||||
command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && git config --global core.pager "" && git config --global diff.binary false"""
|
||||
)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
|
||||
obs.exit_code == 0,
|
||||
f'Failed to export SWE_INSTANCE_ID and configure git: {str(obs)}',
|
||||
)
|
||||
|
||||
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
|
||||
@@ -331,6 +391,30 @@ def initialize_runtime(
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
|
||||
|
||||
if metadata.details['mode'] == 'swt-ci':
|
||||
# set up repo
|
||||
setup_commands = []
|
||||
if instance['repo'] in MAP_REPO_TO_INSTALL:
|
||||
setup_commands.append(MAP_REPO_TO_INSTALL[instance['repo']])
|
||||
|
||||
# Run pre-install set up if provided
|
||||
install = MAP_VERSION_TO_INSTALL.get(instance['repo'], {}).get(
|
||||
instance['version'], []
|
||||
)
|
||||
if 'pre_install' in install:
|
||||
for pre_install in install['pre_install']:
|
||||
setup_commands.append(pre_install)
|
||||
|
||||
if 'install' in install:
|
||||
setup_commands.append(install['install'])
|
||||
|
||||
for command in setup_commands:
|
||||
action = CmdRunAction(command=command)
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if 'multimodal' not in metadata.dataset.lower():
|
||||
# Only for non-multimodal datasets, we need to activate the testbed environment for Python
|
||||
# SWE-Bench multimodal datasets are not using the testbed environment
|
||||
@@ -452,11 +536,22 @@ def complete_runtime(
|
||||
f'Failed to git add -A: {str(obs)}',
|
||||
)
|
||||
|
||||
# Remove binary files from git staging
|
||||
action = CmdRunAction(command=remove_binary_files_from_git())
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to remove binary files: {str(obs)}',
|
||||
)
|
||||
|
||||
n_retries = 0
|
||||
git_patch = None
|
||||
while n_retries < 5:
|
||||
action = CmdRunAction(
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]}'
|
||||
command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff'
|
||||
)
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
@@ -465,8 +560,28 @@ def complete_runtime(
|
||||
n_retries += 1
|
||||
if isinstance(obs, CmdOutputObservation):
|
||||
if obs.exit_code == 0:
|
||||
git_patch = obs.content.strip()
|
||||
break
|
||||
# Read the patch file
|
||||
action = FileReadAction(path='patch.diff')
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
if isinstance(obs, FileReadObservation):
|
||||
git_patch = obs.content
|
||||
break
|
||||
elif isinstance(obs, ErrorObservation):
|
||||
# Fall back to cat "patch.diff" to get the patch
|
||||
assert 'File could not be decoded as utf-8' in obs.content
|
||||
action = CmdRunAction(command='cat patch.diff')
|
||||
action.set_hard_timeout(max(300 + 100 * n_retries, 600))
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
assert isinstance(obs, CmdOutputObservation) and obs.exit_code == 0
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
git_patch = obs.content
|
||||
break
|
||||
else:
|
||||
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
|
||||
else:
|
||||
logger.info('Failed to get git diff, retrying...')
|
||||
sleep_if_should_continue(10)
|
||||
@@ -478,6 +593,9 @@ def complete_runtime(
|
||||
|
||||
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
|
||||
|
||||
# Remove binary diffs from the patch
|
||||
git_patch = remove_binary_diffs(git_patch)
|
||||
|
||||
logger.info('-' * 30)
|
||||
logger.info('END Runtime Completion Fn')
|
||||
logger.info('-' * 30)
|
||||
@@ -519,7 +637,7 @@ def process_instance(
|
||||
call_async_from_sync(runtime.connect)
|
||||
|
||||
try:
|
||||
initialize_runtime(runtime, instance)
|
||||
initialize_runtime(runtime, instance, metadata)
|
||||
|
||||
message_action = get_instruction(instance, metadata)
|
||||
|
||||
@@ -619,6 +737,13 @@ if __name__ == '__main__':
|
||||
default='test',
|
||||
help='split to evaluate on',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--mode',
|
||||
type=str,
|
||||
default='swe',
|
||||
choices=['swe', 'swt', 'swt-ci'],
|
||||
help="mode to run the evaluation, either 'swe', 'swt', or 'swt-ci'",
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
# NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
||||
@@ -655,7 +780,7 @@ if __name__ == '__main__':
|
||||
if llm_config is None:
|
||||
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
|
||||
|
||||
details = {}
|
||||
details = {'mode': args.mode}
|
||||
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
|
||||
|
||||
dataset_descrption = (
|
||||
@@ -802,7 +927,11 @@ if __name__ == '__main__':
|
||||
with open(cur_output_file, 'r') as f:
|
||||
for line in f:
|
||||
instance = json.loads(line)
|
||||
if instance['instance_id'] not in added_instance_ids:
|
||||
# Also make sure git_patch is not empty - otherwise we fall back to previous attempt (empty patch is worse than anything else)
|
||||
if (
|
||||
instance['instance_id'] not in added_instance_ids
|
||||
and instance['test_result'].get('git_patch', '').strip()
|
||||
):
|
||||
fout.write(line)
|
||||
added_instance_ids.add(instance['instance_id'])
|
||||
logger.info(
|
||||
|
||||
@@ -12,6 +12,7 @@ NUM_WORKERS=$6
|
||||
DATASET=$7
|
||||
SPLIT=$8
|
||||
N_RUNS=$9
|
||||
MODE=${10}
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
@@ -45,6 +46,11 @@ if [ -z "$SPLIT" ]; then
|
||||
SPLIT="test"
|
||||
fi
|
||||
|
||||
if [ -z "$MODE" ]; then
|
||||
MODE="swe"
|
||||
echo "MODE not specified, use default $MODE"
|
||||
fi
|
||||
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
@@ -55,6 +61,10 @@ echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "SPLIT: $SPLIT"
|
||||
echo "MAX_ITER: $MAX_ITER"
|
||||
echo "NUM_WORKERS: $NUM_WORKERS"
|
||||
echo "COMMIT_HASH: $COMMIT_HASH"
|
||||
echo "MODE: $MODE"
|
||||
|
||||
# Default to NOT use Hint
|
||||
if [ -z "$USE_HINT_TEXT" ]; then
|
||||
@@ -74,9 +84,13 @@ fi
|
||||
if [ -n "$EXP_NAME" ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-$EXP_NAME"
|
||||
fi
|
||||
# if mode != swe, add mode to the eval note
|
||||
if [ "$MODE" != "swe" ]; then
|
||||
EVAL_NOTE="${EVAL_NOTE}-${MODE}"
|
||||
fi
|
||||
|
||||
function run_eval() {
|
||||
local eval_note=$1
|
||||
local eval_note="${1}"
|
||||
COMMAND="poetry run python evaluation/benchmarks/swe_bench/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
@@ -84,7 +98,8 @@ function run_eval() {
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $eval_note \
|
||||
--dataset $DATASET \
|
||||
--split $SPLIT"
|
||||
--split $SPLIT \
|
||||
--mode $MODE"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
95
evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py
Normal file
95
evaluation/benchmarks/swe_bench/scripts/swtbench/convert.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
|
||||
import unidiff
|
||||
|
||||
from evaluation.benchmarks.swe_bench.resource.swt_bench_constants import (
|
||||
MAP_VERSION_TO_INSTALL,
|
||||
)
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def remove_setup_files(model_patch: str, instance: dict, delete_setup_changes: bool):
|
||||
"""Discard all changes that a patch applies to files changes by the pre_install script and that are reproduction scripts (top-level script)"""
|
||||
setup_files = ['setup.py', 'tox.ini', 'pyproject.toml']
|
||||
pre_install = (
|
||||
MAP_VERSION_TO_INSTALL.get(instance['repo'], {})
|
||||
.get(instance['version'], {})
|
||||
.get('pre_install', [])
|
||||
)
|
||||
relevant_files = (
|
||||
[
|
||||
file
|
||||
for file in setup_files
|
||||
if any(file in install and 'sed' in install for install in pre_install)
|
||||
]
|
||||
if delete_setup_changes
|
||||
else []
|
||||
)
|
||||
for i in range(10):
|
||||
try:
|
||||
# Appearently outputs.jsonl has .strip() applied, so we try to reconstruct the original patch by adding auxiliary whitespace
|
||||
patch = unidiff.PatchSet(model_patch + i * '\n')
|
||||
break
|
||||
except unidiff.UnidiffParseError:
|
||||
pass
|
||||
|
||||
to_delete = []
|
||||
for i, file in enumerate(patch):
|
||||
if (
|
||||
any(f in file.source_file for f in relevant_files)
|
||||
or file.target_file.count('/') == 1
|
||||
):
|
||||
to_delete.append(i)
|
||||
for i in reversed(to_delete):
|
||||
del patch[i]
|
||||
return str(patch)
|
||||
|
||||
|
||||
def main(
|
||||
prediction_file: str,
|
||||
):
|
||||
"""Main function to extract the model patches from the OpenHands prediction file and turn them into the expected SWT-Bench format."""
|
||||
with open(prediction_file) as f:
|
||||
for line in f:
|
||||
pred = json.loads(line)
|
||||
try:
|
||||
git_diff = pred['test_result']['git_patch']
|
||||
except KeyError:
|
||||
_LOGGER.warning(
|
||||
'Warning: No git diff found for instance %s', pred['instance_id']
|
||||
)
|
||||
continue
|
||||
ci_mode = pred['metadata']['details'].get('mode', '') == 'swt-ci'
|
||||
try:
|
||||
git_diff = remove_setup_files(git_diff, pred['instance'], ci_mode)
|
||||
except: # noqa: E722
|
||||
_LOGGER.warning(
|
||||
'Warning: Invalid git diff found for instance %s',
|
||||
pred['instance_id'],
|
||||
)
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
'instance_id': pred['instance_id'],
|
||||
'model_name_or_path': f'{pred["metadata"]["llm_config"]["openrouter_app_name"]}__{pred["metadata"]["agent_class"]}__{pred["metadata"]["llm_config"]["model"]}',
|
||||
'model_patch': git_diff,
|
||||
'full_output': json.dumps(pred),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--prediction_file',
|
||||
type=str,
|
||||
required=True,
|
||||
help='Path to the prediction file (.../outputs.jsonl)',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.prediction_file)
|
||||
@@ -158,9 +158,9 @@ def get_config(
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
codeact_enable_jupyter=False,
|
||||
codeact_enable_browsing=RUN_WITH_BROWSING,
|
||||
codeact_enable_llm_editor=False,
|
||||
enable_jupyter=False,
|
||||
enable_browsing=RUN_WITH_BROWSING,
|
||||
enable_llm_editor=False,
|
||||
condenser=metadata.condenser_config,
|
||||
enable_prompt_extensions=False,
|
||||
)
|
||||
|
||||
@@ -62,9 +62,9 @@ def get_config(
|
||||
)
|
||||
)
|
||||
agent_config = AgentConfig(
|
||||
codeact_enable_jupyter=True,
|
||||
codeact_enable_browsing=True,
|
||||
codeact_enable_llm_editor=False,
|
||||
enable_jupyter=True,
|
||||
enable_browsing=True,
|
||||
enable_llm_editor=False,
|
||||
)
|
||||
config.set_agent_config(agent_config)
|
||||
return config
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"parser": "@typescript-eslint/parser",
|
||||
"parserOptions": {
|
||||
"project": "./tsconfig.json"
|
||||
"project": "./tsconfig.json",
|
||||
},
|
||||
"extends": [
|
||||
"airbnb",
|
||||
@@ -11,15 +11,12 @@
|
||||
"plugin:@typescript-eslint/recommended",
|
||||
"plugin:react/recommended",
|
||||
"plugin:react-hooks/recommended",
|
||||
"plugin:@tanstack/query/recommended"
|
||||
],
|
||||
"plugins": [
|
||||
"prettier"
|
||||
"plugin:@tanstack/query/recommended",
|
||||
],
|
||||
"plugins": ["prettier", "unused-imports"],
|
||||
"rules": {
|
||||
"prettier/prettier": [
|
||||
"error"
|
||||
],
|
||||
"unused-imports/no-unused-imports": "error",
|
||||
"prettier/prettier": ["error"],
|
||||
// Resolves https://stackoverflow.com/questions/59265981/typescript-eslint-missing-file-extension-ts-import-extensions/59268871#59268871
|
||||
"import/extensions": [
|
||||
"error",
|
||||
@@ -27,32 +24,26 @@
|
||||
{
|
||||
"": "never",
|
||||
"ts": "never",
|
||||
"tsx": "never"
|
||||
}
|
||||
]
|
||||
"tsx": "never",
|
||||
},
|
||||
],
|
||||
},
|
||||
"settings": {
|
||||
"react": {
|
||||
"version": "detect"
|
||||
}
|
||||
"version": "detect",
|
||||
},
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"files": [
|
||||
"*.ts",
|
||||
"*.tsx"
|
||||
],
|
||||
"files": ["*.ts", "*.tsx"],
|
||||
"rules": {
|
||||
// Allow state modification in reduce and Redux reducers
|
||||
"no-param-reassign": [
|
||||
"error",
|
||||
{
|
||||
"props": true,
|
||||
"ignorePropertyModificationsFor": [
|
||||
"acc",
|
||||
"state"
|
||||
]
|
||||
}
|
||||
"ignorePropertyModificationsFor": ["acc", "state"],
|
||||
},
|
||||
],
|
||||
// For https://stackoverflow.com/questions/55844608/stuck-with-eslint-error-i-e-separately-loops-should-be-avoided-in-favor-of-arra
|
||||
"no-restricted-syntax": "off",
|
||||
@@ -66,24 +57,19 @@
|
||||
2,
|
||||
{
|
||||
"required": {
|
||||
"some": [
|
||||
"nesting",
|
||||
"id"
|
||||
]
|
||||
}
|
||||
}
|
||||
"some": ["nesting", "id"],
|
||||
},
|
||||
},
|
||||
],
|
||||
"react/prop-types": "off",
|
||||
"react/no-array-index-key": "off",
|
||||
"react-hooks/exhaustive-deps": "off",
|
||||
"import/no-extraneous-dependencies": "off",
|
||||
"react/react-in-jsx-scope": "off"
|
||||
"react/react-in-jsx-scope": "off",
|
||||
},
|
||||
"parserOptions": {
|
||||
"project": [
|
||||
"**/tsconfig.json"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"project": ["**/tsconfig.json"],
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#!/bin/sh
|
||||
cd frontend
|
||||
lint-staged
|
||||
vitest run
|
||||
npm run check-unlocalized-strings
|
||||
npx lint-staged
|
||||
|
||||
@@ -37,4 +37,4 @@ describe("CopyToClipboardButton", () => {
|
||||
const button = screen.getByTestId("copy-to-clipboard");
|
||||
expect(button).toHaveAttribute("aria-label", "BUTTON$COPIED");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -217,6 +217,17 @@ describe("ChatInput", () => {
|
||||
expect(onImagePaste).toHaveBeenCalledWith([file]);
|
||||
});
|
||||
|
||||
it("should use the default maxRows value", () => {
|
||||
// We can't directly test the maxRows prop as it's not exposed in the DOM
|
||||
// Instead, we'll verify the component renders with the default props
|
||||
render(<ChatInput onSubmit={onSubmitMock} />);
|
||||
const textarea = screen.getByRole("textbox");
|
||||
expect(textarea).toBeInTheDocument();
|
||||
|
||||
// The actual verification of maxRows=16 is handled internally by the TextareaAutosize component
|
||||
// and affects how many rows the textarea can expand to
|
||||
});
|
||||
|
||||
it("should not submit when Enter is pressed during IME composition", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<ChatInput onSubmit={onSubmitMock} />);
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import type { Message } from "#/message";
|
||||
import { act, screen, waitFor, within } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import type { Message } from "#/message";
|
||||
import { addUserMessage } from "#/state/chat-slice";
|
||||
import { SUGGESTIONS } from "#/utils/suggestions";
|
||||
import * as ChatSlice from "#/state/chat-slice";
|
||||
@@ -45,7 +45,15 @@ describe("Empty state", () => {
|
||||
it("should render suggestions if empty", () => {
|
||||
const { store } = renderWithProviders(<ChatInterface />, {
|
||||
preloadedState: {
|
||||
chat: { messages: [] },
|
||||
chat: {
|
||||
messages: [],
|
||||
systemMessage: {
|
||||
content: "",
|
||||
tools: [],
|
||||
openhands_version: null,
|
||||
agent_class: null
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -68,7 +76,15 @@ describe("Empty state", () => {
|
||||
it("should render the default suggestions", () => {
|
||||
renderWithProviders(<ChatInterface />, {
|
||||
preloadedState: {
|
||||
chat: { messages: [] },
|
||||
chat: {
|
||||
messages: [],
|
||||
systemMessage: {
|
||||
content: "",
|
||||
tools: [],
|
||||
openhands_version: null,
|
||||
agent_class: null
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -98,7 +114,15 @@ describe("Empty state", () => {
|
||||
const user = userEvent.setup();
|
||||
const { store } = renderWithProviders(<ChatInterface />, {
|
||||
preloadedState: {
|
||||
chat: { messages: [] },
|
||||
chat: {
|
||||
messages: [],
|
||||
systemMessage: {
|
||||
content: "",
|
||||
tools: [],
|
||||
openhands_version: null,
|
||||
agent_class: null
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -127,7 +151,15 @@ describe("Empty state", () => {
|
||||
const user = userEvent.setup();
|
||||
const { rerender } = renderWithProviders(<ChatInterface />, {
|
||||
preloadedState: {
|
||||
chat: { messages: [] },
|
||||
chat: {
|
||||
messages: [],
|
||||
systemMessage: {
|
||||
content: "",
|
||||
tools: [],
|
||||
openhands_version: null,
|
||||
agent_class: null
|
||||
}
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -95,6 +95,23 @@ describe("ExpandableMessage", () => {
|
||||
expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render with neutral border and no icon for action messages with undefined success (timeout case)", () => {
|
||||
renderWithProviders(
|
||||
<ExpandableMessage
|
||||
id="OBSERVATION_MESSAGE$RUN"
|
||||
message="Command timed out"
|
||||
type="action"
|
||||
success={undefined}
|
||||
/>,
|
||||
);
|
||||
const element = screen.getByText("OBSERVATION_MESSAGE$RUN");
|
||||
const container = element.closest(
|
||||
"div.flex.gap-2.items-center.justify-start",
|
||||
);
|
||||
expect(container).toHaveClass("border-neutral-300");
|
||||
expect(screen.queryByTestId("status-icon")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should render the out of credits message when the user is out of credits", async () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
// @ts-expect-error - We only care about the APP_MODE and FEATURE_FLAGS fields
|
||||
|
||||
@@ -3,34 +3,46 @@ import { it, describe, expect, vi, beforeAll, afterAll } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { AuthModal } from "#/components/features/waitlist/auth-modal";
|
||||
import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import * as AuthHook from "#/context/auth-context";
|
||||
|
||||
describe("AuthModal", () => {
|
||||
beforeAll(() => {
|
||||
vi.stubGlobal("location", { href: "" });
|
||||
vi.spyOn(AuthHook, "useAuth").mockReturnValue({
|
||||
providersAreSet: false,
|
||||
setProvidersAreSet: vi.fn(),
|
||||
providerTokensSet: [],
|
||||
setProviderTokensSet: vi.fn()
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
vi.unstubAllGlobals();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("should render a tos checkbox that is unchecked by default", () => {
|
||||
render(<AuthModal githubAuthUrl={null} />);
|
||||
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
|
||||
expect(checkbox).not.toBeChecked();
|
||||
});
|
||||
|
||||
it("should only enable the GitHub button if the tos checkbox is checked", async () => {
|
||||
it("should only enable the identity provider buttons if the tos checkbox is checked", async () => {
|
||||
const user = userEvent.setup();
|
||||
render(<AuthModal githubAuthUrl={null} />);
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const button = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
render(<AuthModal githubAuthUrl={null} appMode="saas" />);
|
||||
|
||||
expect(button).toBeDisabled();
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
const githubButton = screen.getByRole("button", { name: "GITHUB$CONNECT_TO_GITHUB" });
|
||||
const gitlabButton = screen.getByRole("button", { name: "GITLAB$CONNECT_TO_GITLAB" });
|
||||
|
||||
expect(githubButton).toBeDisabled();
|
||||
expect(gitlabButton).toBeDisabled();
|
||||
|
||||
await user.click(checkbox);
|
||||
|
||||
expect(button).not.toBeDisabled();
|
||||
expect(githubButton).not.toBeDisabled();
|
||||
expect(gitlabButton).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should set user analytics consent to true when the user checks the tos checkbox", async () => {
|
||||
@@ -40,7 +52,7 @@ describe("AuthModal", () => {
|
||||
);
|
||||
|
||||
const user = userEvent.setup();
|
||||
render(<AuthModal githubAuthUrl="mock-url" />);
|
||||
render(<AuthModal githubAuthUrl="mock-url" appMode="saas" />);
|
||||
|
||||
const checkbox = screen.getByRole("checkbox");
|
||||
await user.click(checkbox);
|
||||
|
||||
@@ -76,11 +76,11 @@ describe("ConversationCard", () => {
|
||||
const card = screen.getByTestId("conversation-card");
|
||||
|
||||
within(card).getByText("Conversation 1");
|
||||
|
||||
|
||||
// Just check that the card contains the expected text content
|
||||
expect(card).toHaveTextContent("Created");
|
||||
expect(card).toHaveTextContent("ago");
|
||||
|
||||
|
||||
// Use a regex to match the time part since it might have whitespace
|
||||
const timeRegex = new RegExp(formatTimeDelta(new Date("2021-10-01T12:00:00Z")));
|
||||
expect(card).toHaveTextContent(timeRegex);
|
||||
|
||||
@@ -56,12 +56,16 @@ describe("GitRepositorySelector", () => {
|
||||
full_name: "test/repo1",
|
||||
git_provider: "github" as Provider,
|
||||
stargazers_count: 100,
|
||||
is_public: true,
|
||||
pushed_at: "2023-01-01T00:00:00Z",
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
full_name: "test/repo2",
|
||||
git_provider: "github" as Provider,
|
||||
stargazers_count: 200,
|
||||
is_public: true,
|
||||
pushed_at: "2023-01-02T00:00:00Z",
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { Provider } from "react-redux";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { setupStore } from "test-utils";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import { HomeHeader } from "#/components/features/home/home-header";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
|
||||
const renderHomeHeader = () => {
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: HomeHeader,
|
||||
path: "/",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="conversation-screen" />,
|
||||
path: "/conversations/:conversationId",
|
||||
},
|
||||
]);
|
||||
|
||||
return render(<RouterStub />, {
|
||||
wrapper: ({ children }) => (
|
||||
<Provider store={setupStore()}>
|
||||
<AuthProvider initialProvidersAreSet>
|
||||
<QueryClientProvider client={new QueryClient()}>
|
||||
{children}
|
||||
</QueryClientProvider>
|
||||
</AuthProvider>
|
||||
</Provider>
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
describe("HomeHeader", () => {
|
||||
it("should create an empty conversation and redirect when pressing the launch from scratch button", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderHomeHeader();
|
||||
|
||||
const launchButton = screen.getByRole("button", {
|
||||
name: /launch from scratch/i,
|
||||
});
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
|
||||
undefined,
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
);
|
||||
|
||||
// expect to be redirected to /conversations/:conversationId
|
||||
await screen.findByTestId("conversation-screen");
|
||||
});
|
||||
|
||||
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
|
||||
renderHomeHeader();
|
||||
|
||||
const launchButton = screen.getByRole("button", {
|
||||
name: /launch from scratch/i,
|
||||
});
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(launchButton).toHaveTextContent(/Loading/i);
|
||||
expect(launchButton).toBeDisabled();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,216 @@
|
||||
import { render, screen, waitFor, within } from "@testing-library/react";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
|
||||
import { setupStore } from "test-utils";
|
||||
import { Provider } from "react-redux";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import * as GitService from "#/api/git";
|
||||
import { RepoConnector } from "#/components/features/home/repo-connector";
|
||||
|
||||
const renderRepoConnector = (initialProvidersAreSet = true) => {
|
||||
const mockRepoSelection = vi.fn();
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: () => <RepoConnector onRepoSelection={mockRepoSelection} />,
|
||||
path: "/",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="conversation-screen" />,
|
||||
path: "/conversations/:conversationId",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="settings-screen" />,
|
||||
path: "/settings",
|
||||
},
|
||||
]);
|
||||
|
||||
return render(<RouterStub />, {
|
||||
wrapper: ({ children }) => (
|
||||
<Provider store={setupStore()}>
|
||||
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
|
||||
<QueryClientProvider client={new QueryClient()}>
|
||||
{children}
|
||||
</QueryClientProvider>
|
||||
</AuthProvider>
|
||||
</Provider>
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
const MOCK_RESPOSITORIES: GitRepository[] = [
|
||||
{
|
||||
id: 1,
|
||||
full_name: "rbren/polaris",
|
||||
git_provider: "github",
|
||||
is_public: true,
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
full_name: "All-Hands-AI/OpenHands",
|
||||
git_provider: "github",
|
||||
is_public: true,
|
||||
},
|
||||
];
|
||||
|
||||
describe("RepoConnector", () => {
|
||||
it("should render the repository connector section", () => {
|
||||
renderRepoConnector();
|
||||
screen.getByTestId("repo-connector");
|
||||
});
|
||||
|
||||
it("should render the available repositories in the dropdown", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
const dropdown = screen.getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
|
||||
await waitFor(() => {
|
||||
screen.getByText("rbren/polaris");
|
||||
screen.getByText("All-Hands-AI/OpenHands");
|
||||
});
|
||||
});
|
||||
|
||||
it("should only enable the launch button if a repo is selected", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
const launchButton = screen.getByTestId("repo-launch-button");
|
||||
expect(launchButton).toBeDisabled();
|
||||
|
||||
const dropdown = screen.getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
await userEvent.click(screen.getByText("rbren/polaris"));
|
||||
|
||||
expect(launchButton).toBeEnabled();
|
||||
});
|
||||
|
||||
it("should render the 'add git(hub|lab) repos' links if saas mode", async () => {
|
||||
const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
// @ts-expect-error - only return the APP_MODE
|
||||
getConfiSpy.mockResolvedValue({
|
||||
APP_MODE: "saas",
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
await screen.findByText("Add GitHub repos");
|
||||
});
|
||||
|
||||
it("should not render the 'add git(hub|lab) repos' links if oss mode", async () => {
|
||||
const getConfiSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
// @ts-expect-error - only return the APP_MODE
|
||||
getConfiSpy.mockResolvedValue({
|
||||
APP_MODE: "oss",
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
expect(screen.queryByText("Add GitHub repos")).not.toBeInTheDocument();
|
||||
expect(screen.queryByText("Add GitLab repos")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should create a conversation and redirect with the selected repo when pressing the launch button", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
const repoConnector = screen.getByTestId("repo-connector");
|
||||
const launchButton =
|
||||
within(repoConnector).getByTestId("repo-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
// repo not selected yet
|
||||
expect(createConversationSpy).not.toHaveBeenCalled();
|
||||
|
||||
// select a repository from the dropdown
|
||||
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
|
||||
const repoOption = screen.getByText("rbren/polaris");
|
||||
await userEvent.click(repoOption);
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledExactlyOnceWith(
|
||||
{
|
||||
full_name: "rbren/polaris",
|
||||
git_provider: "github",
|
||||
id: 1,
|
||||
is_public: true,
|
||||
},
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("should change the launch button text to 'Loading...' when creating a conversation", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderRepoConnector();
|
||||
|
||||
const launchButton = screen.getByTestId("repo-launch-button");
|
||||
|
||||
const dropdown = screen.getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
await userEvent.click(screen.getByText("rbren/polaris"));
|
||||
|
||||
await userEvent.click(launchButton);
|
||||
expect(launchButton).toBeDisabled();
|
||||
expect(launchButton).toHaveTextContent(/Loading/i);
|
||||
});
|
||||
|
||||
it("should not display a button to settings if the user is signed in with their git provider", async () => {
|
||||
renderRepoConnector(true);
|
||||
expect(
|
||||
screen.queryByTestId("navigate-to-settings-button"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
|
||||
renderRepoConnector(false);
|
||||
|
||||
const goToSettingsButton = await screen.findByTestId(
|
||||
"navigate-to-settings-button",
|
||||
);
|
||||
const dropdown = screen.queryByTestId("repo-dropdown");
|
||||
const launchButton = screen.queryByTestId("repo-launch-button");
|
||||
const providerLinks = screen.queryAllByText(/add git(hub|lab) repos/i);
|
||||
|
||||
expect(dropdown).not.toBeInTheDocument();
|
||||
expect(launchButton).not.toBeInTheDocument();
|
||||
expect(providerLinks.length).toBe(0);
|
||||
|
||||
expect(goToSettingsButton).toBeInTheDocument();
|
||||
|
||||
await userEvent.click(goToSettingsButton);
|
||||
await screen.findByTestId("settings-screen");
|
||||
});
|
||||
});
|
||||
186
frontend/__tests__/components/features/home/task-card.test.tsx
Normal file
186
frontend/__tests__/components/features/home/task-card.test.tsx
Normal file
@@ -0,0 +1,186 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { Provider } from "react-redux";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { setupStore } from "test-utils";
|
||||
import { SuggestedTask } from "#/components/features/home/tasks/task.types";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import { TaskCard } from "#/components/features/home/tasks/task-card";
|
||||
import * as GitService from "#/api/git";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import {
|
||||
getFailingChecksPrompt,
|
||||
getMergeConflictPrompt,
|
||||
getOpenIssuePrompt,
|
||||
getUnresolvedCommentsPrompt,
|
||||
} from "#/components/features/home/tasks/get-prompt-for-query";
|
||||
|
||||
const MOCK_TASK_1: SuggestedTask = {
|
||||
issue_number: 123,
|
||||
repo: "repo1",
|
||||
title: "Task 1",
|
||||
task_type: "MERGE_CONFLICTS",
|
||||
};
|
||||
|
||||
const MOCK_TASK_2: SuggestedTask = {
|
||||
issue_number: 456,
|
||||
repo: "repo2",
|
||||
title: "Task 2",
|
||||
task_type: "FAILING_CHECKS",
|
||||
};
|
||||
|
||||
const MOCK_TASK_3: SuggestedTask = {
|
||||
issue_number: 789,
|
||||
repo: "repo3",
|
||||
title: "Task 3",
|
||||
task_type: "UNRESOLVED_COMMENTS",
|
||||
};
|
||||
|
||||
const MOCK_TASK_4: SuggestedTask = {
|
||||
issue_number: 101112,
|
||||
repo: "repo4",
|
||||
title: "Task 4",
|
||||
task_type: "OPEN_ISSUE",
|
||||
};
|
||||
|
||||
const MOCK_RESPOSITORIES: GitRepository[] = [
|
||||
{ id: 1, full_name: "repo1", git_provider: "github", is_public: true },
|
||||
{ id: 2, full_name: "repo2", git_provider: "github", is_public: true },
|
||||
{ id: 3, full_name: "repo3", git_provider: "gitlab", is_public: true },
|
||||
{ id: 4, full_name: "repo4", git_provider: "gitlab", is_public: true },
|
||||
];
|
||||
|
||||
const renderTaskCard = (task = MOCK_TASK_1) => {
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: () => <TaskCard task={task} />,
|
||||
path: "/",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="conversation-screen" />,
|
||||
path: "/conversations/:conversationId",
|
||||
},
|
||||
]);
|
||||
|
||||
return render(<RouterStub />, {
|
||||
wrapper: ({ children }) => (
|
||||
<Provider store={setupStore()}>
|
||||
<AuthProvider initialProvidersAreSet>
|
||||
<QueryClientProvider client={new QueryClient()}>
|
||||
{children}
|
||||
</QueryClientProvider>
|
||||
</AuthProvider>
|
||||
</Provider>
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
describe("TaskCard", () => {
|
||||
it("format the issue id", async () => {
|
||||
renderTaskCard();
|
||||
|
||||
const taskId = screen.getByTestId("task-id");
|
||||
expect(taskId).toHaveTextContent(/#123/i);
|
||||
});
|
||||
|
||||
it("should call createConversation when clicking the launch button", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderTaskCard();
|
||||
|
||||
const launchButton = screen.getByTestId("task-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe("creating conversation prompts", () => {
|
||||
beforeEach(() => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
});
|
||||
|
||||
it("should call create conversation with the merge conflict prompt", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderTaskCard(MOCK_TASK_1);
|
||||
|
||||
const launchButton = screen.getByTestId("task-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledWith(
|
||||
MOCK_RESPOSITORIES[0],
|
||||
getMergeConflictPrompt(MOCK_TASK_1.issue_number, MOCK_TASK_1.repo),
|
||||
[],
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("should call create conversation with the failing checks prompt", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderTaskCard(MOCK_TASK_2);
|
||||
|
||||
const launchButton = screen.getByTestId("task-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledWith(
|
||||
MOCK_RESPOSITORIES[1],
|
||||
getFailingChecksPrompt(MOCK_TASK_2.issue_number, MOCK_TASK_2.repo),
|
||||
[],
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("should call create conversation with the unresolved comments prompt", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderTaskCard(MOCK_TASK_3);
|
||||
|
||||
const launchButton = screen.getByTestId("task-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledWith(
|
||||
MOCK_RESPOSITORIES[2],
|
||||
getUnresolvedCommentsPrompt(MOCK_TASK_3.issue_number, MOCK_TASK_3.repo),
|
||||
[],
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("should call create conversation with the open issue prompt", async () => {
|
||||
const createConversationSpy = vi.spyOn(OpenHands, "createConversation");
|
||||
|
||||
renderTaskCard(MOCK_TASK_4);
|
||||
|
||||
const launchButton = screen.getByTestId("task-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(createConversationSpy).toHaveBeenCalledWith(
|
||||
MOCK_RESPOSITORIES[3],
|
||||
getOpenIssuePrompt(MOCK_TASK_4.issue_number, MOCK_TASK_4.repo),
|
||||
[],
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("should disable the launch button and update text content when creating a conversation", async () => {
|
||||
renderTaskCard();
|
||||
|
||||
const launchButton = screen.getByTestId("task-launch-button");
|
||||
await userEvent.click(launchButton);
|
||||
|
||||
expect(launchButton).toHaveTextContent(/Loading/i);
|
||||
expect(launchButton).toBeDisabled();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,113 @@
|
||||
import { render, screen, waitFor } from "@testing-library/react";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { Provider } from "react-redux";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { setupStore } from "test-utils";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { TaskSuggestions } from "#/components/features/home/tasks/task-suggestions";
|
||||
import { SuggestionsService } from "#/api/suggestions-service/suggestions-service.api";
|
||||
import { MOCK_TASKS } from "#/mocks/task-suggestions-handlers";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
|
||||
const renderTaskSuggestions = (initialProvidersAreSet = true) => {
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: TaskSuggestions,
|
||||
path: "/",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="conversation-screen" />,
|
||||
path: "/conversations/:conversationId",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="settings-screen" />,
|
||||
path: "/settings",
|
||||
},
|
||||
]);
|
||||
|
||||
return render(<RouterStub />, {
|
||||
wrapper: ({ children }) => (
|
||||
<Provider store={setupStore()}>
|
||||
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
|
||||
<QueryClientProvider client={new QueryClient()}>
|
||||
{children}
|
||||
</QueryClientProvider>
|
||||
</AuthProvider>
|
||||
</Provider>
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
describe("TaskSuggestions", () => {
|
||||
const getSuggestedTasksSpy = vi.spyOn(
|
||||
SuggestionsService,
|
||||
"getSuggestedTasks",
|
||||
);
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("should render the task suggestions section", () => {
|
||||
renderTaskSuggestions();
|
||||
screen.getByTestId("task-suggestions");
|
||||
});
|
||||
|
||||
it("should render an empty message if there are no tasks", async () => {
|
||||
getSuggestedTasksSpy.mockResolvedValue([]);
|
||||
renderTaskSuggestions();
|
||||
await screen.findByText(/No tasks available/i);
|
||||
});
|
||||
|
||||
it("should render the task groups with the correct titles", async () => {
|
||||
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
|
||||
renderTaskSuggestions();
|
||||
|
||||
await waitFor(() => {
|
||||
MOCK_TASKS.forEach((taskGroup) => {
|
||||
screen.getByText(taskGroup.title);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("should render the task cards with the correct task details", async () => {
|
||||
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
|
||||
renderTaskSuggestions();
|
||||
|
||||
await waitFor(() => {
|
||||
MOCK_TASKS.forEach((task) => {
|
||||
screen.getByText(task.title);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("should render skeletons when loading", async () => {
|
||||
getSuggestedTasksSpy.mockResolvedValue(MOCK_TASKS);
|
||||
renderTaskSuggestions();
|
||||
|
||||
const skeletons = screen.getAllByTestId("task-group-skeleton");
|
||||
expect(skeletons.length).toBeGreaterThan(0);
|
||||
|
||||
await waitFor(() => {
|
||||
MOCK_TASKS.forEach((taskGroup) => {
|
||||
screen.getByText(taskGroup.title);
|
||||
});
|
||||
});
|
||||
|
||||
expect(screen.queryByTestId("task-group-skeleton")).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should display a button to settings if the user needs to sign in with their git provider", async () => {
|
||||
renderTaskSuggestions(false);
|
||||
|
||||
expect(getSuggestedTasksSpy).not.toHaveBeenCalled();
|
||||
const goToSettingsButton = await screen.findByTestId(
|
||||
"navigate-to-settings-button",
|
||||
);
|
||||
expect(goToSettingsButton).toBeInTheDocument();
|
||||
|
||||
await userEvent.click(goToSettingsButton);
|
||||
await screen.findByTestId("settings-screen");
|
||||
});
|
||||
});
|
||||
@@ -61,25 +61,25 @@ describe("PaymentForm", () => {
|
||||
renderPaymentForm();
|
||||
|
||||
const topUpInput = await screen.findByTestId("top-up-input");
|
||||
await user.type(topUpInput, "50.12");
|
||||
await user.type(topUpInput, "50");
|
||||
|
||||
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
|
||||
await user.click(topUpButton);
|
||||
|
||||
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.12);
|
||||
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
|
||||
});
|
||||
|
||||
it("should round the top-up amount to two decimal places", async () => {
|
||||
it("should only accept integer values", async () => {
|
||||
const user = userEvent.setup();
|
||||
renderPaymentForm();
|
||||
|
||||
const topUpInput = await screen.findByTestId("top-up-input");
|
||||
await user.type(topUpInput, "50.125456");
|
||||
await user.type(topUpInput, "50");
|
||||
|
||||
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
|
||||
await user.click(topUpButton);
|
||||
|
||||
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50.13);
|
||||
expect(createCheckoutSessionSpy).toHaveBeenCalledWith(50);
|
||||
});
|
||||
|
||||
it("should disable the top-up button if the user enters an invalid amount", async () => {
|
||||
@@ -100,7 +100,7 @@ describe("PaymentForm", () => {
|
||||
renderPaymentForm();
|
||||
|
||||
const topUpInput = await screen.findByTestId("top-up-input");
|
||||
await user.type(topUpInput, "50.12");
|
||||
await user.type(topUpInput, "50");
|
||||
|
||||
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
|
||||
await user.click(topUpButton);
|
||||
@@ -114,7 +114,7 @@ describe("PaymentForm", () => {
|
||||
renderPaymentForm();
|
||||
|
||||
const topUpInput = await screen.findByTestId("top-up-input");
|
||||
await user.type(topUpInput, "-50.12");
|
||||
await user.type(topUpInput, "-50");
|
||||
|
||||
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
|
||||
await user.click(topUpButton);
|
||||
@@ -139,6 +139,8 @@ describe("PaymentForm", () => {
|
||||
const user = userEvent.setup();
|
||||
renderPaymentForm();
|
||||
|
||||
// With type="number", the browser would prevent non-numeric input,
|
||||
// but we'll test the validation logic anyway
|
||||
const topUpInput = await screen.findByTestId("top-up-input");
|
||||
await user.type(topUpInput, "abc");
|
||||
|
||||
@@ -160,5 +162,19 @@ describe("PaymentForm", () => {
|
||||
|
||||
expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("user enters a decimal value", async () => {
|
||||
const user = userEvent.setup();
|
||||
renderPaymentForm();
|
||||
|
||||
// With step="1", the browser would validate this, but we'll test our validation logic
|
||||
const topUpInput = await screen.findByTestId("top-up-input");
|
||||
await user.type(topUpInput, "50.5");
|
||||
|
||||
const topUpButton = screen.getByText("PAYMENT$ADD_CREDIT");
|
||||
await user.click(topUpButton);
|
||||
|
||||
expect(createCheckoutSessionSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
import { Messages } from "#/components/features/chat/messages";
|
||||
import type { Message } from "#/message";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
|
||||
// Mock the useParams hook to provide a conversationId
|
||||
vi.mock("react-router", async () => {
|
||||
const actual = await vi.importActual<typeof import("react-router")>("react-router");
|
||||
return {
|
||||
...actual,
|
||||
useParams: () => ({ conversationId: "test-conversation-id" }),
|
||||
};
|
||||
});
|
||||
|
||||
describe("File Operations Messages", () => {
|
||||
it("should show success indicator for successful file read operation", () => {
|
||||
const messages: Message[] = [
|
||||
|
||||
@@ -5,7 +5,7 @@ import { Command, appendInput, appendOutput } from "#/state/command-slice";
|
||||
import Terminal from "#/components/features/terminal/terminal";
|
||||
|
||||
const renderTerminal = (commands: Command[] = []) =>
|
||||
renderWithProviders(<Terminal secrets={[]} />, {
|
||||
renderWithProviders(<Terminal />, {
|
||||
preloadedState: {
|
||||
cmd: {
|
||||
commands,
|
||||
@@ -121,7 +121,7 @@ describe.skip("Terminal", () => {
|
||||
|
||||
// This test fails because it expects `disposeMock` to have been called before the component is unmounted.
|
||||
it.skip("should dispose the terminal on unmount", () => {
|
||||
const { unmount } = renderWithProviders(<Terminal secrets={[]} />);
|
||||
const { unmount } = renderWithProviders(<Terminal />);
|
||||
|
||||
expect(mockTerminal.dispose).not.toHaveBeenCalled();
|
||||
|
||||
|
||||
@@ -1,31 +1,31 @@
|
||||
import { beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { render } from "@testing-library/react";
|
||||
import { afterEach } from "node:test";
|
||||
import { ReactNode } from "react";
|
||||
import { useTerminal } from "#/hooks/use-terminal";
|
||||
import { Command } from "#/state/command-slice";
|
||||
import { AgentState } from "#/types/agent-state";
|
||||
import { renderWithProviders } from "../../test-utils";
|
||||
|
||||
// Mock the WsClient context
|
||||
vi.mock("#/context/ws-client-provider", () => ({
|
||||
useWsClient: () => ({
|
||||
send: vi.fn(),
|
||||
status: "CONNECTED",
|
||||
isLoadingMessages: false,
|
||||
events: [],
|
||||
}),
|
||||
}));
|
||||
|
||||
interface TestTerminalComponentProps {
|
||||
commands: Command[];
|
||||
secrets: string[];
|
||||
}
|
||||
|
||||
function TestTerminalComponent({
|
||||
commands,
|
||||
secrets,
|
||||
}: TestTerminalComponentProps) {
|
||||
const ref = useTerminal({ commands, secrets, disabled: false });
|
||||
const ref = useTerminal({ commands });
|
||||
return <div ref={ref} />;
|
||||
}
|
||||
|
||||
interface WrapperProps {
|
||||
children: ReactNode;
|
||||
}
|
||||
|
||||
function Wrapper({ children }: WrapperProps) {
|
||||
return <div>{children}</div>;
|
||||
}
|
||||
|
||||
describe("useTerminal", () => {
|
||||
const mockTerminal = vi.hoisted(() => ({
|
||||
loadAddon: vi.fn(),
|
||||
@@ -57,8 +57,11 @@ describe("useTerminal", () => {
|
||||
});
|
||||
|
||||
it("should render", () => {
|
||||
render(<TestTerminalComponent commands={[]} secrets={[]} />, {
|
||||
wrapper: Wrapper,
|
||||
renderWithProviders(<TestTerminalComponent commands={[]} />, {
|
||||
preloadedState: {
|
||||
agent: { curAgentState: AgentState.RUNNING },
|
||||
cmd: { commands: [] },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -68,15 +71,19 @@ describe("useTerminal", () => {
|
||||
{ content: "hello", type: "output" },
|
||||
];
|
||||
|
||||
render(<TestTerminalComponent commands={commands} secrets={[]} />, {
|
||||
wrapper: Wrapper,
|
||||
renderWithProviders(<TestTerminalComponent commands={commands} />, {
|
||||
preloadedState: {
|
||||
agent: { curAgentState: AgentState.RUNNING },
|
||||
cmd: { commands },
|
||||
},
|
||||
});
|
||||
|
||||
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(1, "echo hello");
|
||||
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(2, "hello");
|
||||
});
|
||||
|
||||
it("should hide secrets in the terminal", () => {
|
||||
// This test is no longer relevant as secrets filtering has been removed
|
||||
it.skip("should hide secrets in the terminal", () => {
|
||||
const secret = "super_secret_github_token";
|
||||
const anotherSecret = "super_secret_another_token";
|
||||
const commands: Command[] = [
|
||||
@@ -87,23 +94,18 @@ describe("useTerminal", () => {
|
||||
{ content: secret, type: "output" },
|
||||
];
|
||||
|
||||
render(
|
||||
renderWithProviders(
|
||||
<TestTerminalComponent
|
||||
commands={commands}
|
||||
secrets={[secret, anotherSecret]}
|
||||
/>,
|
||||
{
|
||||
wrapper: Wrapper,
|
||||
preloadedState: {
|
||||
agent: { curAgentState: AgentState.RUNNING },
|
||||
cmd: { commands },
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
// BUG: `vi.clearAllMocks()` does not clear the number of calls
|
||||
// therefore, we need to assume the order of the calls based
|
||||
// on the test order
|
||||
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
`export GITHUB_TOKEN=${"*".repeat(10)},${"*".repeat(10)},${"*".repeat(10)}`,
|
||||
);
|
||||
expect(mockTerminal.writeln).toHaveBeenNthCalledWith(4, "*".repeat(10));
|
||||
// This test is no longer relevant as secrets filtering has been removed
|
||||
});
|
||||
});
|
||||
|
||||
370
frontend/__tests__/routes/home-screen.test.tsx
Normal file
370
frontend/__tests__/routes/home-screen.test.tsx
Normal file
@@ -0,0 +1,370 @@
|
||||
import { render, screen, waitFor, within } from "@testing-library/react";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { QueryClientProvider, QueryClient } from "@tanstack/react-query";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { Provider } from "react-redux";
|
||||
import { setupStore } from "test-utils";
|
||||
import { AxiosError } from "axios";
|
||||
import HomeScreen from "#/routes/home";
|
||||
import { AuthProvider } from "#/context/auth-context";
|
||||
import * as GitService from "#/api/git";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import MainApp from "#/routes/root-layout";
|
||||
|
||||
const createAxiosNotFoundErrorObject = () =>
|
||||
new AxiosError(
|
||||
"Request failed with status code 404",
|
||||
"ERR_BAD_REQUEST",
|
||||
undefined,
|
||||
undefined,
|
||||
{
|
||||
status: 404,
|
||||
statusText: "Not Found",
|
||||
data: { message: "Settings not found" },
|
||||
headers: {},
|
||||
// @ts-expect-error - we only need the response object for this test
|
||||
config: {},
|
||||
},
|
||||
);
|
||||
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
Component: MainApp,
|
||||
path: "/",
|
||||
children: [
|
||||
{
|
||||
Component: HomeScreen,
|
||||
path: "/",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="conversation-screen" />,
|
||||
path: "/conversations/:conversationId",
|
||||
},
|
||||
{
|
||||
Component: () => <div data-testid="settings-screen" />,
|
||||
path: "/settings",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const renderHomeScreen = (initialProvidersAreSet = true) =>
|
||||
render(<RouterStub />, {
|
||||
wrapper: ({ children }) => (
|
||||
<Provider store={setupStore()}>
|
||||
<AuthProvider initialProvidersAreSet={initialProvidersAreSet}>
|
||||
<QueryClientProvider client={new QueryClient()}>
|
||||
{children}
|
||||
</QueryClientProvider>
|
||||
</AuthProvider>
|
||||
</Provider>
|
||||
),
|
||||
});
|
||||
|
||||
const MOCK_RESPOSITORIES: GitRepository[] = [
|
||||
{
|
||||
id: 1,
|
||||
full_name: "octocat/hello-world",
|
||||
git_provider: "github",
|
||||
is_public: true,
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
full_name: "octocat/earth",
|
||||
git_provider: "github",
|
||||
is_public: true,
|
||||
},
|
||||
];
|
||||
|
||||
describe("HomeScreen", () => {
|
||||
it("should render", () => {
|
||||
renderHomeScreen();
|
||||
screen.getByTestId("home-screen");
|
||||
});
|
||||
|
||||
it("should render the repository connector and suggested tasks sections", async () => {
|
||||
renderHomeScreen();
|
||||
|
||||
screen.getByTestId("repo-connector");
|
||||
screen.getByTestId("task-suggestions");
|
||||
});
|
||||
|
||||
it("should filter the suggested tasks based on the selected repository", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
const taskSuggestions = screen.getByTestId("task-suggestions");
|
||||
|
||||
// Initially, all tasks should be visible
|
||||
await waitFor(() => {
|
||||
within(taskSuggestions).getByText("octocat/hello-world");
|
||||
within(taskSuggestions).getByText("octocat/earth");
|
||||
});
|
||||
|
||||
// Select a repository from the dropdown
|
||||
const repoConnector = screen.getByTestId("repo-connector");
|
||||
|
||||
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
|
||||
const repoOption = screen.getAllByText("octocat/hello-world")[1];
|
||||
await userEvent.click(repoOption);
|
||||
|
||||
// After selecting a repository, only tasks related to that repository should be visible
|
||||
await waitFor(() => {
|
||||
within(taskSuggestions).getByText("octocat/hello-world");
|
||||
expect(
|
||||
within(taskSuggestions).queryByText("octocat/earth"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should reset the filtered tasks when the selected repository is cleared", async () => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
const taskSuggestions = screen.getByTestId("task-suggestions");
|
||||
|
||||
// Initially, all tasks should be visible
|
||||
await waitFor(() => {
|
||||
within(taskSuggestions).getByText("octocat/hello-world");
|
||||
within(taskSuggestions).getByText("octocat/earth");
|
||||
});
|
||||
|
||||
// Select a repository from the dropdown
|
||||
const repoConnector = screen.getByTestId("repo-connector");
|
||||
|
||||
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
|
||||
const repoOption = screen.getAllByText("octocat/hello-world")[1];
|
||||
await userEvent.click(repoOption);
|
||||
|
||||
// After selecting a repository, only tasks related to that repository should be visible
|
||||
await waitFor(() => {
|
||||
within(taskSuggestions).getByText("octocat/hello-world");
|
||||
expect(
|
||||
within(taskSuggestions).queryByText("octocat/earth"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Clear the selected repository
|
||||
await userEvent.clear(dropdown);
|
||||
|
||||
// All tasks should be visible again
|
||||
await waitFor(() => {
|
||||
within(taskSuggestions).getByText("octocat/hello-world");
|
||||
within(taskSuggestions).getByText("octocat/earth");
|
||||
});
|
||||
});
|
||||
|
||||
describe("launch buttons", () => {
|
||||
const setupLaunchButtons = async () => {
|
||||
let headerLaunchButton = screen.getByTestId("header-launch-button");
|
||||
let repoLaunchButton = screen.getByTestId("repo-launch-button");
|
||||
let tasksLaunchButtons =
|
||||
await screen.findAllByTestId("task-launch-button");
|
||||
|
||||
// Select a repository from the dropdown to enable the repo launch button
|
||||
const repoConnector = screen.getByTestId("repo-connector");
|
||||
const dropdown = within(repoConnector).getByTestId("repo-dropdown");
|
||||
await userEvent.click(dropdown);
|
||||
const repoOption = screen.getAllByText("octocat/hello-world")[1];
|
||||
await userEvent.click(repoOption);
|
||||
|
||||
expect(headerLaunchButton).not.toBeDisabled();
|
||||
expect(repoLaunchButton).not.toBeDisabled();
|
||||
tasksLaunchButtons.forEach((button) => {
|
||||
expect(button).not.toBeDisabled();
|
||||
});
|
||||
|
||||
headerLaunchButton = screen.getByTestId("header-launch-button");
|
||||
repoLaunchButton = screen.getByTestId("repo-launch-button");
|
||||
tasksLaunchButtons = await screen.findAllByTestId("task-launch-button");
|
||||
|
||||
return {
|
||||
headerLaunchButton,
|
||||
repoLaunchButton,
|
||||
tasksLaunchButtons,
|
||||
};
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
const retrieveUserGitRepositoriesSpy = vi.spyOn(
|
||||
GitService,
|
||||
"retrieveUserGitRepositories",
|
||||
);
|
||||
retrieveUserGitRepositoriesSpy.mockResolvedValue({
|
||||
data: MOCK_RESPOSITORIES,
|
||||
nextPage: null,
|
||||
});
|
||||
});
|
||||
|
||||
it("should disable the other launch buttons when the header launch button is clicked", async () => {
|
||||
renderHomeScreen();
|
||||
const { headerLaunchButton, repoLaunchButton } =
|
||||
await setupLaunchButtons();
|
||||
|
||||
const tasksLaunchButtonsAfter =
|
||||
await screen.findAllByTestId("task-launch-button");
|
||||
|
||||
// All other buttons should be disabled when the header button is clicked
|
||||
await userEvent.click(headerLaunchButton);
|
||||
|
||||
expect(headerLaunchButton).toBeDisabled();
|
||||
expect(repoLaunchButton).toBeDisabled();
|
||||
tasksLaunchButtonsAfter.forEach((button) => {
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
|
||||
it("should disable the other launch buttons when the repo launch button is clicked", async () => {
|
||||
renderHomeScreen();
|
||||
const { headerLaunchButton, repoLaunchButton } =
|
||||
await setupLaunchButtons();
|
||||
|
||||
const tasksLaunchButtonsAfter =
|
||||
await screen.findAllByTestId("task-launch-button");
|
||||
|
||||
// All other buttons should be disabled when the repo button is clicked
|
||||
await userEvent.click(repoLaunchButton);
|
||||
|
||||
expect(headerLaunchButton).toBeDisabled();
|
||||
expect(repoLaunchButton).toBeDisabled();
|
||||
tasksLaunchButtonsAfter.forEach((button) => {
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
|
||||
it("should disable the other launch buttons when any task launch button is clicked", async () => {
|
||||
renderHomeScreen();
|
||||
const { headerLaunchButton, repoLaunchButton, tasksLaunchButtons } =
|
||||
await setupLaunchButtons();
|
||||
|
||||
const tasksLaunchButtonsAfter =
|
||||
await screen.findAllByTestId("task-launch-button");
|
||||
|
||||
// All other buttons should be disabled when the task button is clicked
|
||||
await userEvent.click(tasksLaunchButtons[0]);
|
||||
|
||||
expect(headerLaunchButton).toBeDisabled();
|
||||
expect(repoLaunchButton).toBeDisabled();
|
||||
tasksLaunchButtonsAfter.forEach((button) => {
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("should hide the suggested tasks section if not authed with git(hub|lab)", async () => {
|
||||
renderHomeScreen(false);
|
||||
|
||||
const taskSuggestions = screen.queryByTestId("task-suggestions");
|
||||
const repoConnector = screen.getByTestId("repo-connector");
|
||||
|
||||
expect(taskSuggestions).not.toBeInTheDocument();
|
||||
expect(repoConnector).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Settings 404", () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
|
||||
it("should open the settings modal if GET /settings fails with a 404", async () => {
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
const settingsModal = await screen.findByTestId("ai-config-modal");
|
||||
expect(settingsModal).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should navigate to the settings screen when clicking the advanced settings button", async () => {
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
const user = userEvent.setup();
|
||||
renderHomeScreen();
|
||||
|
||||
const settingsScreen = screen.queryByTestId("settings-screen");
|
||||
expect(settingsScreen).not.toBeInTheDocument();
|
||||
|
||||
const settingsModal = await screen.findByTestId("ai-config-modal");
|
||||
expect(settingsModal).toBeInTheDocument();
|
||||
|
||||
const advancedSettingsButton = await screen.findByTestId(
|
||||
"advanced-settings-link",
|
||||
);
|
||||
await user.click(advancedSettingsButton);
|
||||
|
||||
const settingsScreenAfter = await screen.findByTestId("settings-screen");
|
||||
expect(settingsScreenAfter).toBeInTheDocument();
|
||||
|
||||
const settingsModalAfter = screen.queryByTestId("ai-config-modal");
|
||||
expect(settingsModalAfter).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
|
||||
// @ts-expect-error - we only need APP_MODE for this test
|
||||
getConfigSpy.mockResolvedValue({
|
||||
APP_MODE: "saas",
|
||||
FEATURE_FLAGS: {
|
||||
ENABLE_BILLING: false,
|
||||
HIDE_LLM_SETTINGS: false,
|
||||
},
|
||||
});
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
// small hack to wait for the modal to not appear
|
||||
await expect(
|
||||
screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
|
||||
).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Setup Payment modal", () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
|
||||
it("should only render if SaaS mode and is new user", async () => {
|
||||
// @ts-expect-error - we only need the APP_MODE for this test
|
||||
getConfigSpy.mockResolvedValue({
|
||||
APP_MODE: "saas",
|
||||
FEATURE_FLAGS: {
|
||||
ENABLE_BILLING: true,
|
||||
HIDE_LLM_SETTINGS: false,
|
||||
},
|
||||
});
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
renderHomeScreen();
|
||||
|
||||
const setupPaymentModal = await screen.findByTestId(
|
||||
"proceed-to-stripe-button",
|
||||
);
|
||||
expect(setupPaymentModal).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
@@ -1,177 +0,0 @@
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { screen } from "@testing-library/react";
|
||||
import { AxiosError } from "axios";
|
||||
import MainApp from "#/routes/root-layout";
|
||||
import SettingsScreen from "#/routes/settings";
|
||||
import Home from "#/routes/home";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
|
||||
const createAxiosNotFoundErrorObject = () =>
|
||||
new AxiosError(
|
||||
"Request failed with status code 404",
|
||||
"ERR_BAD_REQUEST",
|
||||
undefined,
|
||||
undefined,
|
||||
{
|
||||
status: 404,
|
||||
statusText: "Not Found",
|
||||
data: { message: "Settings not found" },
|
||||
headers: {},
|
||||
// @ts-expect-error - we only need the response object for this test
|
||||
config: {},
|
||||
},
|
||||
);
|
||||
|
||||
const getSettingsSpy = vi.spyOn(OpenHands, "getSettings");
|
||||
|
||||
const RouterStub = createRoutesStub([
|
||||
{
|
||||
// layout route
|
||||
Component: MainApp,
|
||||
path: "/",
|
||||
children: [
|
||||
{
|
||||
// home route
|
||||
Component: Home,
|
||||
path: "/",
|
||||
},
|
||||
{
|
||||
Component: SettingsScreen,
|
||||
path: "/settings",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("Home Screen", () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
|
||||
it("should render the home screen", () => {
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
});
|
||||
|
||||
it("should navigate to the settings screen when the settings button is clicked", async () => {
|
||||
const user = userEvent.setup();
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
|
||||
const settingsButton = await screen.findByTestId("settings-button");
|
||||
await user.click(settingsButton);
|
||||
|
||||
const settingsScreen = await screen.findByTestId("settings-screen");
|
||||
expect(settingsScreen).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should navigate to the settings when pressing 'Connect to GitHub' if the user isn't authenticated", async () => {
|
||||
// @ts-expect-error - we only need APP_MODE for this test
|
||||
getConfigSpy.mockResolvedValue({
|
||||
APP_MODE: "oss",
|
||||
FEATURE_FLAGS: {
|
||||
ENABLE_BILLING: false,
|
||||
HIDE_LLM_SETTINGS: false,
|
||||
},
|
||||
});
|
||||
const user = userEvent.setup();
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
|
||||
const connectToGitHubButton =
|
||||
await screen.findByTestId("connect-to-github");
|
||||
await user.click(connectToGitHubButton);
|
||||
|
||||
const settingsScreen = await screen.findByTestId("settings-screen");
|
||||
expect(settingsScreen).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Settings 404", () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
|
||||
it("should open the settings modal if GET /settings fails with a 404", async () => {
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
|
||||
const settingsModal = await screen.findByTestId("ai-config-modal");
|
||||
expect(settingsModal).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should navigate to the settings screen when clicking the advanced settings button", async () => {
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
const user = userEvent.setup();
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
|
||||
const settingsScreen = screen.queryByTestId("settings-screen");
|
||||
expect(settingsScreen).not.toBeInTheDocument();
|
||||
|
||||
const settingsModal = await screen.findByTestId("ai-config-modal");
|
||||
expect(settingsModal).toBeInTheDocument();
|
||||
|
||||
const advancedSettingsButton = await screen.findByTestId(
|
||||
"advanced-settings-link",
|
||||
);
|
||||
await user.click(advancedSettingsButton);
|
||||
|
||||
const settingsScreenAfter = await screen.findByTestId("settings-screen");
|
||||
expect(settingsScreenAfter).toBeInTheDocument();
|
||||
|
||||
const settingsModalAfter = screen.queryByTestId("ai-config-modal");
|
||||
expect(settingsModalAfter).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not open the settings modal if GET /settings fails but is SaaS mode", async () => {
|
||||
// @ts-expect-error - we only need APP_MODE for this test
|
||||
getConfigSpy.mockResolvedValue({
|
||||
APP_MODE: "saas",
|
||||
FEATURE_FLAGS: {
|
||||
ENABLE_BILLING: false,
|
||||
HIDE_LLM_SETTINGS: false,
|
||||
},
|
||||
});
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
|
||||
// small hack to wait for the modal to not appear
|
||||
await expect(
|
||||
screen.findByTestId("ai-config-modal", {}, { timeout: 1000 }),
|
||||
).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Setup Payment modal", () => {
|
||||
const getConfigSpy = vi.spyOn(OpenHands, "getConfig");
|
||||
|
||||
afterEach(() => {
|
||||
vi.resetAllMocks();
|
||||
});
|
||||
|
||||
it("should only render if SaaS mode and is new user", async () => {
|
||||
// @ts-expect-error - we only need the APP_MODE for this test
|
||||
getConfigSpy.mockResolvedValue({
|
||||
APP_MODE: "saas",
|
||||
FEATURE_FLAGS: {
|
||||
ENABLE_BILLING: true,
|
||||
HIDE_LLM_SETTINGS: false,
|
||||
},
|
||||
});
|
||||
const error = createAxiosNotFoundErrorObject();
|
||||
getSettingsSpy.mockRejectedValue(error);
|
||||
|
||||
renderWithProviders(<RouterStub initialEntries={["/"]} />);
|
||||
|
||||
const setupPaymentModal = await screen.findByTestId(
|
||||
"proceed-to-stripe-button",
|
||||
);
|
||||
expect(setupPaymentModal).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
51
frontend/__tests__/services/observations.test.ts
Normal file
51
frontend/__tests__/services/observations.test.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { handleObservationMessage } from "#/services/observations";
|
||||
import store from "#/store";
|
||||
import { ObservationMessage } from "#/types/message";
|
||||
|
||||
// Mock dependencies
|
||||
vi.mock("#/store", () => ({
|
||||
default: {
|
||||
dispatch: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
describe("Observations Service", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("handleObservationMessage", () => {
|
||||
const createErrorMessage = (): ObservationMessage => ({
|
||||
id: 14,
|
||||
timestamp: "2025-04-14T13:37:54.451843",
|
||||
message: "The action has not been executed.",
|
||||
cause: 12,
|
||||
observation: "error",
|
||||
content: "The action has not been executed.",
|
||||
extras: {
|
||||
error_id: "",
|
||||
metadata: {},
|
||||
},
|
||||
});
|
||||
|
||||
it("should dispatch error messages exactly once", () => {
|
||||
const errorMessage = createErrorMessage();
|
||||
|
||||
handleObservationMessage(errorMessage);
|
||||
|
||||
expect(store.dispatch).toHaveBeenCalledTimes(1);
|
||||
expect(store.dispatch).toHaveBeenCalledWith({
|
||||
type: "chat/addAssistantObservation",
|
||||
payload: expect.objectContaining({
|
||||
observation: "error",
|
||||
content: "The action has not been executed.",
|
||||
source: "user",
|
||||
extras: {
|
||||
error_id: "",
|
||||
},
|
||||
}),
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -2,10 +2,7 @@ import { render, screen } from "@testing-library/react";
|
||||
import { test, expect, describe, vi } from "vitest";
|
||||
import { InteractiveChatBox } from "#/components/features/chat/interactive-chat-box";
|
||||
import { ChatInput } from "#/components/features/chat/chat-input";
|
||||
import path from 'path';
|
||||
import { scanDirectoryForUnlocalizedStrings } from "#/utils/scan-unlocalized-strings-ast";
|
||||
|
||||
// Mock react-i18next
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({
|
||||
t: (key: string) => key,
|
||||
@@ -15,22 +12,17 @@ vi.mock("react-i18next", () => ({
|
||||
describe("Check for hardcoded English strings", () => {
|
||||
test("InteractiveChatBox should not have hardcoded English strings", () => {
|
||||
const { container } = render(
|
||||
<InteractiveChatBox
|
||||
onSubmit={() => {}}
|
||||
onStop={() => {}}
|
||||
/>
|
||||
<InteractiveChatBox onSubmit={() => {}} onStop={() => {}} />,
|
||||
);
|
||||
|
||||
// Get all text content
|
||||
const text = container.textContent;
|
||||
|
||||
// List of English strings that should be translated
|
||||
const hardcodedStrings = [
|
||||
"What do you want to build?",
|
||||
];
|
||||
const hardcodedStrings = ["What do you want to build?"];
|
||||
|
||||
// Check each string
|
||||
hardcodedStrings.forEach(str => {
|
||||
hardcodedStrings.forEach((str) => {
|
||||
expect(text).not.toContain(str);
|
||||
});
|
||||
});
|
||||
@@ -39,23 +31,4 @@ describe("Check for hardcoded English strings", () => {
|
||||
render(<ChatInput onSubmit={() => {}} />);
|
||||
screen.getByPlaceholderText("SUGGESTIONS$WHAT_TO_BUILD");
|
||||
});
|
||||
|
||||
test("No unlocalized strings should exist in frontend code", () => {
|
||||
const srcPath = path.resolve(__dirname, '../../src');
|
||||
|
||||
// Get unlocalized strings using the AST scanner
|
||||
// The scanner now properly handles CSS classes using AST information
|
||||
const results = scanDirectoryForUnlocalizedStrings(srcPath);
|
||||
|
||||
// If we found any unlocalized strings, format them for output
|
||||
if (results.size > 0) {
|
||||
const formattedResults = Array.from(results.entries())
|
||||
.map(([file, strings]) => `\n${file}:\n ${strings.join('\n ')}`)
|
||||
.join('\n');
|
||||
|
||||
throw new Error(
|
||||
`Found unlocalized strings in the following files:${formattedResults}`
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
91
frontend/__tests__/utils/group-suggested-tasks.test.ts
Normal file
91
frontend/__tests__/utils/group-suggested-tasks.test.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
import { expect, test } from "vitest";
|
||||
import {
|
||||
SuggestedTask,
|
||||
SuggestedTaskGroup,
|
||||
} from "#/components/features/home/tasks/task.types";
|
||||
import { groupSuggestedTasks } from "#/utils/group-suggested-tasks";
|
||||
|
||||
const rawTasks: SuggestedTask[] = [
|
||||
{
|
||||
issue_number: 1,
|
||||
repo: "repo1",
|
||||
title: "Task 1",
|
||||
task_type: "MERGE_CONFLICTS",
|
||||
},
|
||||
{
|
||||
issue_number: 2,
|
||||
repo: "repo1",
|
||||
title: "Task 2",
|
||||
task_type: "FAILING_CHECKS",
|
||||
},
|
||||
{
|
||||
issue_number: 3,
|
||||
repo: "repo2",
|
||||
title: "Task 3",
|
||||
task_type: "UNRESOLVED_COMMENTS",
|
||||
},
|
||||
{
|
||||
issue_number: 4,
|
||||
repo: "repo2",
|
||||
title: "Task 4",
|
||||
task_type: "OPEN_ISSUE",
|
||||
},
|
||||
{
|
||||
issue_number: 5,
|
||||
repo: "repo3",
|
||||
title: "Task 5",
|
||||
task_type: "FAILING_CHECKS",
|
||||
},
|
||||
];
|
||||
|
||||
const groupedTasks: SuggestedTaskGroup[] = [
|
||||
{
|
||||
title: "repo1",
|
||||
tasks: [
|
||||
{
|
||||
issue_number: 1,
|
||||
repo: "repo1",
|
||||
title: "Task 1",
|
||||
task_type: "MERGE_CONFLICTS",
|
||||
},
|
||||
{
|
||||
issue_number: 2,
|
||||
repo: "repo1",
|
||||
title: "Task 2",
|
||||
task_type: "FAILING_CHECKS",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
title: "repo2",
|
||||
tasks: [
|
||||
{
|
||||
issue_number: 3,
|
||||
repo: "repo2",
|
||||
title: "Task 3",
|
||||
task_type: "UNRESOLVED_COMMENTS",
|
||||
},
|
||||
{
|
||||
issue_number: 4,
|
||||
repo: "repo2",
|
||||
title: "Task 4",
|
||||
task_type: "OPEN_ISSUE",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
title: "repo3",
|
||||
tasks: [
|
||||
{
|
||||
issue_number: 5,
|
||||
repo: "repo3",
|
||||
title: "Task 5",
|
||||
task_type: "FAILING_CHECKS",
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
test("groupSuggestedTasks", () => {
|
||||
expect(groupSuggestedTasks(rawTasks)).toEqual(groupedTasks);
|
||||
});
|
||||
3988
frontend/package-lock.json
generated
3988
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,51 +1,53 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.31.0",
|
||||
"version": "0.33.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@heroui/react": "2.7.5",
|
||||
"@heroui/react": "2.7.6",
|
||||
"@microlink/react-json-view": "^1.26.1",
|
||||
"@monaco-editor/react": "^4.7.0-rc.0",
|
||||
"@react-router/node": "^7.4.0",
|
||||
"@react-router/serve": "^7.4.0",
|
||||
"@react-types/shared": "^3.28.0",
|
||||
"@reduxjs/toolkit": "^2.6.1",
|
||||
"@stripe/react-stripe-js": "^3.5.1",
|
||||
"@stripe/stripe-js": "^6.1.0",
|
||||
"@tanstack/react-query": "^5.69.0",
|
||||
"@vitejs/plugin-react": "^4.3.2",
|
||||
"@react-router/node": "^7.5.1",
|
||||
"@react-router/serve": "^7.5.1",
|
||||
"@react-types/shared": "^3.29.0",
|
||||
"@reduxjs/toolkit": "^2.7.0",
|
||||
"@stripe/react-stripe-js": "^3.6.0",
|
||||
"@stripe/stripe-js": "^7.1.0",
|
||||
"@tanstack/react-query": "^5.74.4",
|
||||
"@vitejs/plugin-react": "^4.4.0",
|
||||
"@xterm/addon-fit": "^0.10.0",
|
||||
"@xterm/xterm": "^5.4.0",
|
||||
"axios": "^1.8.4",
|
||||
"clsx": "^2.1.1",
|
||||
"eslint-config-airbnb-typescript": "^18.0.0",
|
||||
"framer-motion": "^12.6.2",
|
||||
"i18next": "^24.2.3",
|
||||
"i18next-browser-languagedetector": "^8.0.4",
|
||||
"framer-motion": "^12.7.4",
|
||||
"i18next": "^25.0.0",
|
||||
"i18next-browser-languagedetector": "^8.0.5",
|
||||
"i18next-http-backend": "^3.0.2",
|
||||
"isbot": "^5.1.25",
|
||||
"jose": "^6.0.10",
|
||||
"lucide-react": "^0.501.0",
|
||||
"monaco-editor": "^0.52.2",
|
||||
"posthog-js": "^1.233.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0",
|
||||
"posthog-js": "^1.236.2",
|
||||
"react": "^19.1.0",
|
||||
"react-dom": "^19.1.0",
|
||||
"react-highlight": "^0.15.0",
|
||||
"react-hot-toast": "^2.5.1",
|
||||
"react-i18next": "^15.4.1",
|
||||
"react-icons": "^5.5.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-redux": "^9.2.0",
|
||||
"react-router": "^7.4.0",
|
||||
"react-router": "^7.5.1",
|
||||
"react-syntax-highlighter": "^15.6.1",
|
||||
"react-textarea-autosize": "^8.5.8",
|
||||
"react-textarea-autosize": "^8.5.9",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"sirv-cli": "^3.0.1",
|
||||
"socket.io-client": "^4.8.1",
|
||||
"tailwind-merge": "^3.0.2",
|
||||
"vite": "^6.2.3",
|
||||
"tailwind-merge": "^3.2.0",
|
||||
"vite": "^6.3.2",
|
||||
"web-vitals": "^3.5.2",
|
||||
"ws": "^8.18.1"
|
||||
},
|
||||
@@ -53,7 +55,7 @@
|
||||
"dev": "npm run make-i18n && cross-env VITE_MOCK_API=false react-router dev",
|
||||
"dev:mock": "npm run make-i18n && cross-env VITE_MOCK_API=true VITE_MOCK_SAAS=false react-router dev",
|
||||
"dev:mock:saas": "npm run make-i18n && cross-env VITE_MOCK_API=true VITE_MOCK_SAAS=true react-router dev",
|
||||
"build": "npm run make-i18n && npm run typecheck && react-router build",
|
||||
"build": "npm run make-i18n && react-router build",
|
||||
"start": "npx sirv-cli build/ --single",
|
||||
"test": "vitest run",
|
||||
"test:e2e": "playwright test",
|
||||
@@ -62,15 +64,11 @@
|
||||
"preview": "vite preview",
|
||||
"make-i18n": "node scripts/make-i18n-translations.cjs",
|
||||
"prelint": "npm run make-i18n",
|
||||
"lint": "eslint src --ext .ts,.tsx,.js && prettier --check src/**/*.{ts,tsx}",
|
||||
"lint": "npm run typecheck && eslint src --ext .ts,.tsx,.js && prettier --check src/**/*.{ts,tsx}",
|
||||
"lint:fix": "eslint src --ext .ts,.tsx,.js --fix && prettier --write src/**/*.{ts,tsx}",
|
||||
"prepare": "cd .. && husky frontend/.husky",
|
||||
"typecheck": "react-router typegen && tsc"
|
||||
},
|
||||
"husky": {
|
||||
"hooks": {
|
||||
"pre-commit": "npm run test && lint-staged"
|
||||
}
|
||||
"typecheck": "react-router typegen && tsc",
|
||||
"check-unlocalized-strings": "node scripts/check-unlocalized-strings.cjs"
|
||||
},
|
||||
"lint-staged": {
|
||||
"src/**/*.{ts,tsx,js}": [
|
||||
@@ -83,43 +81,44 @@
|
||||
"@babel/traverse": "^7.27.0",
|
||||
"@babel/types": "^7.27.0",
|
||||
"@mswjs/socket.io-binding": "^0.1.1",
|
||||
"@playwright/test": "^1.51.1",
|
||||
"@react-router/dev": "^7.4.0",
|
||||
"@playwright/test": "^1.52.0",
|
||||
"@react-router/dev": "^7.5.1",
|
||||
"@tailwindcss/typography": "^0.5.16",
|
||||
"@tanstack/eslint-plugin-query": "^5.68.0",
|
||||
"@tanstack/eslint-plugin-query": "^5.73.3",
|
||||
"@testing-library/dom": "^10.4.0",
|
||||
"@testing-library/jest-dom": "^6.6.1",
|
||||
"@testing-library/react": "^16.2.0",
|
||||
"@testing-library/react": "^16.3.0",
|
||||
"@testing-library/user-event": "^14.6.1",
|
||||
"@types/node": "^22.13.14",
|
||||
"@types/react": "^19.0.12",
|
||||
"@types/react-dom": "^19.0.3",
|
||||
"@types/node": "^22.14.1",
|
||||
"@types/react": "^19.1.2",
|
||||
"@types/react-dom": "^19.1.1",
|
||||
"@types/react-highlight": "^0.12.8",
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@types/ws": "^8.18.0",
|
||||
"@types/ws": "^8.18.1",
|
||||
"@typescript-eslint/eslint-plugin": "^7.18.0",
|
||||
"@typescript-eslint/parser": "^7.18.0",
|
||||
"@vitest/coverage-v8": "^3.0.9",
|
||||
"@vitest/coverage-v8": "^3.1.1",
|
||||
"autoprefixer": "^10.4.21",
|
||||
"cross-env": "^7.0.3",
|
||||
"eslint": "^8.57.0",
|
||||
"eslint-config-airbnb": "^19.0.4",
|
||||
"eslint-config-airbnb-typescript": "^18.0.0",
|
||||
"eslint-config-prettier": "^10.1.1",
|
||||
"eslint-config-prettier": "^10.1.2",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"eslint-plugin-jsx-a11y": "^6.10.2",
|
||||
"eslint-plugin-prettier": "^5.2.5",
|
||||
"eslint-plugin-react": "^7.37.4",
|
||||
"eslint-plugin-prettier": "^5.2.6",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-react-hooks": "^4.6.2",
|
||||
"husky": "^9.1.6",
|
||||
"jsdom": "^26.0.0",
|
||||
"lint-staged": "^15.5.0",
|
||||
"eslint-plugin-unused-imports": "^4.1.4",
|
||||
"husky": "^9.1.7",
|
||||
"jsdom": "^26.1.0",
|
||||
"lint-staged": "^15.5.1",
|
||||
"msw": "^2.6.6",
|
||||
"postcss": "^8.5.2",
|
||||
"prettier": "^3.5.3",
|
||||
"stripe": "^17.7.0",
|
||||
"stripe": "^18.0.0",
|
||||
"tailwindcss": "^3.4.17",
|
||||
"typescript": "^5.8.2",
|
||||
"typescript": "^5.8.3",
|
||||
"vite-plugin-svgr": "^4.2.0",
|
||||
"vite-tsconfig-paths": "^5.1.4",
|
||||
"vitest": "^3.0.2"
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* - Please do NOT serve this file on production.
|
||||
*/
|
||||
|
||||
const PACKAGE_VERSION = '2.7.3'
|
||||
const PACKAGE_VERSION = '2.7.5'
|
||||
const INTEGRITY_CHECKSUM = '00729d72e3b82faf54ca8b9621dbb96f'
|
||||
const IS_MOCKED_RESPONSE = Symbol('isMockedResponse')
|
||||
const activeClientIds = new Set()
|
||||
|
||||
709
frontend/scripts/check-unlocalized-strings.cjs
Executable file
709
frontend/scripts/check-unlocalized-strings.cjs
Executable file
@@ -0,0 +1,709 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* Pre-commit hook script to check for unlocalized strings in the frontend code
|
||||
* This script is based on the test in __tests__/utils/check-hardcoded-strings.test.tsx
|
||||
*/
|
||||
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const parser = require('@babel/parser');
|
||||
const traverse = require('@babel/traverse').default;
|
||||
|
||||
// Files/directories to ignore
|
||||
const IGNORE_PATHS = [
|
||||
// Build and dependency files
|
||||
"node_modules",
|
||||
"dist",
|
||||
".git",
|
||||
"test",
|
||||
"__tests__",
|
||||
".d.ts",
|
||||
"i18n",
|
||||
"package.json",
|
||||
"package-lock.json",
|
||||
"tsconfig.json",
|
||||
|
||||
// Internal code that doesn't need localization
|
||||
"mocks", // Mock data
|
||||
"assets", // SVG paths and CSS classes
|
||||
"types", // Type definitions and constants
|
||||
"state", // Redux state management
|
||||
"api", // API endpoints
|
||||
"services", // Internal services
|
||||
"hooks", // React hooks
|
||||
"context", // React context
|
||||
"store", // Redux store
|
||||
"routes.ts", // Route definitions
|
||||
"root.tsx", // Root component
|
||||
"entry.client.tsx", // Client entry point
|
||||
"utils/scan-unlocalized-strings.ts", // Original scanner
|
||||
"utils/scan-unlocalized-strings-ast.ts", // This file itself
|
||||
];
|
||||
|
||||
// Extensions to scan
|
||||
const SCAN_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx"];
|
||||
|
||||
// Attributes that typically don't contain user-facing text
|
||||
const NON_TEXT_ATTRIBUTES = [
|
||||
"className",
|
||||
"i18nKey",
|
||||
"testId",
|
||||
"id",
|
||||
"name",
|
||||
"type",
|
||||
"href",
|
||||
"src",
|
||||
"alt",
|
||||
"placeholder",
|
||||
"rel",
|
||||
"target",
|
||||
"style",
|
||||
"onClick",
|
||||
"onChange",
|
||||
"onSubmit",
|
||||
"data-testid",
|
||||
"aria-label",
|
||||
"aria-labelledby",
|
||||
"aria-describedby",
|
||||
"aria-hidden",
|
||||
"role",
|
||||
];
|
||||
|
||||
function shouldIgnorePath(filePath) {
|
||||
return IGNORE_PATHS.some((ignore) => filePath.includes(ignore));
|
||||
}
|
||||
|
||||
// Check if a string looks like a translation key
|
||||
// Translation keys typically use dots, underscores, or are all caps
|
||||
// Also check for the pattern with $ which is used in our translation keys
|
||||
function isLikelyTranslationKey(str) {
|
||||
return (
|
||||
/^[A-Z0-9_$.]+$/.test(str) ||
|
||||
str.includes(".") ||
|
||||
/[A-Z0-9_]+\$[A-Z0-9_]+/.test(str)
|
||||
);
|
||||
}
|
||||
|
||||
// Check if a string is a raw translation key that should be wrapped in t()
|
||||
function isRawTranslationKey(str) {
|
||||
// Check for our specific translation key pattern (e.g., "SETTINGS$GITHUB_SETTINGS")
|
||||
// Exclude specific keys that are already properly used with i18next.t() in the code
|
||||
const excludedKeys = [
|
||||
"STATUS$ERROR_LLM_OUT_OF_CREDITS",
|
||||
"ERROR$GENERIC",
|
||||
"GITHUB$AUTH_SCOPE",
|
||||
];
|
||||
|
||||
if (excludedKeys.includes(str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return /^[A-Z0-9_]+\$[A-Z0-9_]+$/.test(str);
|
||||
}
|
||||
|
||||
// Specific technical strings that should be excluded from localization
|
||||
const EXCLUDED_TECHNICAL_STRINGS = [
|
||||
"openid email profile", // OAuth scope string - not user-facing
|
||||
"OPEN_ISSUE", // Task type identifier, not a UI string
|
||||
];
|
||||
|
||||
function isExcludedTechnicalString(str) {
|
||||
return EXCLUDED_TECHNICAL_STRINGS.includes(str);
|
||||
}
|
||||
|
||||
function isCommonDevelopmentString(str) {
|
||||
// Technical patterns that are definitely not UI strings
|
||||
const technicalPatterns = [
|
||||
// URLs and paths
|
||||
/^https?:\/\//, // URLs
|
||||
/^\/[a-zA-Z0-9_\-./]*$/, // File paths
|
||||
/^[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+$/, // File extensions, class names
|
||||
/^@[a-zA-Z0-9/-]+$/, // Import paths
|
||||
/^#\/[a-zA-Z0-9/-]+$/, // Alias imports
|
||||
/^[a-zA-Z0-9/-]+\/[a-zA-Z0-9/-]+$/, // Module paths
|
||||
/^data:image\/[a-zA-Z0-9;,]+$/, // Data URLs
|
||||
/^application\/[a-zA-Z0-9-]+$/, // MIME types
|
||||
/^!\[image]\(data:image\/png;base64,$/, // Markdown image with base64 data
|
||||
|
||||
// Numbers, IDs, and technical values
|
||||
/^\d+(\.\d+)?$/, // Numbers
|
||||
/^#[0-9a-fA-F]{3,8}$/, // Color codes
|
||||
/^[a-zA-Z0-9_-]+=[a-zA-Z0-9_-]+$/, // Key-value pairs
|
||||
/^mm:ss$/, // Time format
|
||||
/^[a-zA-Z0-9]+\/[a-zA-Z0-9-]+$/, // Provider/model format
|
||||
/^\?[a-zA-Z0-9_-]+$/, // URL parameters
|
||||
/^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$/i, // UUID
|
||||
/^[A-Za-z0-9+/=]+$/, // Base64
|
||||
|
||||
// HTML and CSS selectors
|
||||
/^[a-z]+(\[[^\]]+\])+$/, // CSS attribute selectors
|
||||
/^[a-z]+:[a-z-]+$/, // CSS pseudo-selectors
|
||||
/^[a-z]+\.[a-z0-9_-]+$/, // CSS class selectors
|
||||
/^[a-z]+#[a-z0-9_-]+$/, // CSS ID selectors
|
||||
/^[a-z]+\s*>\s*[a-z]+$/, // CSS child selectors
|
||||
/^[a-z]+\s+[a-z]+$/, // CSS descendant selectors
|
||||
|
||||
// CSS and styling patterns
|
||||
/^[a-z0-9-]+:[a-z0-9-]+$/, // CSS property:value
|
||||
/^[a-z0-9-]+:[a-z0-9-]+;[a-z0-9-]+:[a-z0-9-]+$/, // Multiple CSS properties
|
||||
];
|
||||
|
||||
// File extensions and media types
|
||||
const fileExtensionPattern =
|
||||
/^\.(png|jpg|jpeg|gif|svg|webp|bmp|ico|pdf|mp4|webm|ogg|mp3|wav|json|xml|csv|txt|md|html|css|js|jsx|ts|tsx)$/i;
|
||||
if (fileExtensionPattern.test(str)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// AI model and provider patterns
|
||||
const aiRelatedPattern =
|
||||
/^(AI|OpenAI|VertexAI|PaLM|Gemini|Anthropic|Anyscale|Databricks|Ollama|FriendliAI|Groq|DeepInfra|AI21|Replicate|OpenRouter|Azure|AWS|SageMaker|Bedrock|Mistral|Perplexity|Fireworks|Cloudflare|Workers|Voyage|claude-|gpt-|o1-|o3-)/i;
|
||||
if (aiRelatedPattern.test(str)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// CSS units and values
|
||||
const cssUnitsPattern =
|
||||
/(px|rem|em|vh|vw|vmin|vmax|ch|ex|fr|deg|rad|turn|grad|ms|s)$/;
|
||||
const cssValuesPattern =
|
||||
/(rgb|rgba|hsl|hsla|#[0-9a-fA-F]+|solid|absolute|relative|sticky|fixed|static|block|inline|flex|grid|none|auto|hidden|visible)/;
|
||||
|
||||
if (cssUnitsPattern.test(str) || cssValuesPattern.test(str)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for CSS class strings with brackets (common in the codebase)
|
||||
if (
|
||||
str.includes("[") &&
|
||||
str.includes("]") &&
|
||||
(str.includes("px") ||
|
||||
str.includes("rem") ||
|
||||
str.includes("em") ||
|
||||
str.includes("w-") ||
|
||||
str.includes("h-") ||
|
||||
str.includes("p-") ||
|
||||
str.includes("m-"))
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for CSS class strings with specific patterns
|
||||
if (
|
||||
str.includes("border-") ||
|
||||
str.includes("rounded-") ||
|
||||
str.includes("cursor-") ||
|
||||
str.includes("opacity-") ||
|
||||
str.includes("disabled:") ||
|
||||
str.includes("hover:") ||
|
||||
str.includes("focus-within:") ||
|
||||
str.includes("first-of-type:") ||
|
||||
str.includes("last-of-type:") ||
|
||||
str.includes("group-data-")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if it looks like a Tailwind class string
|
||||
if (/^[a-z0-9-]+(\s+[a-z0-9-]+)*$/.test(str)) {
|
||||
// Common Tailwind prefixes and patterns
|
||||
const tailwindPrefixes = [
|
||||
"bg-", "text-", "border-", "rounded-", "p-", "m-", "px-", "py-", "mx-", "my-",
|
||||
"w-", "h-", "min-w-", "min-h-", "max-w-", "max-h-", "flex-", "grid-", "gap-",
|
||||
"space-", "items-", "justify-", "self-", "col-", "row-", "order-", "object-",
|
||||
"overflow-", "opacity-", "z-", "top-", "right-", "bottom-", "left-", "inset-",
|
||||
"font-", "tracking-", "leading-", "list-", "placeholder-", "shadow-", "ring-",
|
||||
"transition-", "duration-", "ease-", "delay-", "animate-", "scale-", "rotate-",
|
||||
"translate-", "skew-", "origin-", "cursor-", "select-", "resize-", "fill-", "stroke-",
|
||||
];
|
||||
|
||||
// Check if any word in the string starts with a Tailwind prefix
|
||||
const words = str.split(/\s+/);
|
||||
for (const word of words) {
|
||||
for (const prefix of tailwindPrefixes) {
|
||||
if (word.startsWith(prefix)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Tailwind modifiers
|
||||
const tailwindModifiers = [
|
||||
"hover:", "focus:", "active:", "disabled:", "visited:", "first:", "last:",
|
||||
"odd:", "even:", "group-hover:", "focus-within:", "focus-visible:", "motion-safe:",
|
||||
"motion-reduce:", "dark:", "light:", "sm:", "md:", "lg:", "xl:", "2xl:",
|
||||
];
|
||||
|
||||
for (const word of words) {
|
||||
for (const modifier of tailwindModifiers) {
|
||||
if (word.includes(modifier)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for CSS property combinations
|
||||
const cssProperties = [
|
||||
"border", "rounded", "px", "py", "mx", "my", "p", "m", "w", "h", "flex",
|
||||
"grid", "gap", "transition", "duration", "font", "leading", "tracking",
|
||||
];
|
||||
|
||||
// If the string contains multiple CSS properties, it's likely a CSS class string
|
||||
let cssPropertyCount = 0;
|
||||
for (const word of words) {
|
||||
if (
|
||||
cssProperties.some(
|
||||
(prop) => word === prop || word.startsWith(`${prop}-`),
|
||||
)
|
||||
) {
|
||||
cssPropertyCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (cssPropertyCount >= 2) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for specific CSS class patterns that appear in the test failures
|
||||
if (
|
||||
str.match(
|
||||
/^(border|rounded|flex|grid|transition|duration|ease|hover:|focus:|active:|disabled:|placeholder:|text-|bg-|w-|h-|p-|m-|gap-|items-|justify-|self-|overflow-|cursor-|opacity-|z-|top-|right-|bottom-|left-|inset-|font-|tracking-|leading-|whitespace-|break-|truncate|shadow-|ring-|outline-|animate-|transform|rotate-|scale-|skew-|translate-|origin-|first-of-type:|last-of-type:|group-data-|max-|min-|px-|py-|mx-|my-|grow|shrink|resize-|underline|italic|normal)/,
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// HTML tags and attributes
|
||||
if (
|
||||
/^<[a-z0-9]+(?:\s[^>]*)?>.*<\/[a-z0-9]+>$/i.test(str) ||
|
||||
/^<[a-z0-9]+ [^>]+\/>$/i.test(str)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for specific patterns in suggestions and examples
|
||||
if (
|
||||
str.includes("* ") &&
|
||||
(str.includes("create a") ||
|
||||
str.includes("build a") ||
|
||||
str.includes("make a"))
|
||||
) {
|
||||
// This is likely a suggestion or example, not a UI string
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check for specific technical identifiers from the test failures
|
||||
if (
|
||||
/^(download_via_vscode_button_clicked|open-vscode-error-|set-indicator|settings_saved|openhands-trace-|provider-item-|last_browser_action_error)$/.test(
|
||||
str,
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for URL paths and query parameters
|
||||
if (
|
||||
str.startsWith("?") ||
|
||||
str.startsWith("/") ||
|
||||
str.includes("auth.") ||
|
||||
str.includes("$1auth.")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for specific strings that should be excluded
|
||||
if (
|
||||
str === "Cache Hit:" ||
|
||||
str === "Cache Write:" ||
|
||||
str === "ADD_DOCS" ||
|
||||
str === "ADD_DOCKERFILE" ||
|
||||
str === "Verified" ||
|
||||
str === "Others" ||
|
||||
str === "Feedback" ||
|
||||
str === "JSON File" ||
|
||||
str === "mt-0.5 md:mt-0"
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for long suggestion texts
|
||||
if (
|
||||
str.length > 100 &&
|
||||
(str.includes("Please write a bash script") ||
|
||||
str.includes("Please investigate the repo") ||
|
||||
str.includes("Please push the changes") ||
|
||||
str.includes("Examine the dependencies") ||
|
||||
str.includes("Investigate the documentation") ||
|
||||
str.includes("Investigate the current repo") ||
|
||||
str.includes("I want to create a Hello World app") ||
|
||||
str.includes("I want to create a VueJS app") ||
|
||||
str.includes("This should be a client-only app"))
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for specific error messages and UI text
|
||||
if (
|
||||
str === "All data associated with this project will be lost." ||
|
||||
str === "You will lose any unsaved information." ||
|
||||
str ===
|
||||
"This conversation does not exist, or you do not have permission to access it." ||
|
||||
str === "Failed to fetch settings. Please try reloading." ||
|
||||
str ===
|
||||
"If you tell OpenHands to start a web server, the app will appear here." ||
|
||||
str ===
|
||||
"Your browser doesn't support downloading files. Please use Chrome, Edge, or another browser that supports the File System Access API." ||
|
||||
str ===
|
||||
"Something went wrong while fetching settings. Please reload the page." ||
|
||||
str ===
|
||||
"To help us improve, we collect feedback from your interactions to improve our prompts. By submitting this form, you consent to us collecting this data." ||
|
||||
str === "Please push the latest changes to the existing pull request."
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check against all technical patterns
|
||||
return technicalPatterns.some((pattern) => pattern.test(str));
|
||||
}
|
||||
|
||||
function isLikelyUserFacingText(str) {
|
||||
// Basic validation - skip very short strings or strings without letters
|
||||
if (!str || str.length <= 2 || !/[a-zA-Z]/.test(str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if it's a specifically excluded technical string
|
||||
if (isExcludedTechnicalString(str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if it's a raw translation key that should be wrapped in t()
|
||||
if (isRawTranslationKey(str)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if it's a translation key pattern (e.g., "SETTINGS$BASE_URL")
|
||||
// These should be wrapped in t() or use I18nKey enum
|
||||
if (isLikelyTranslationKey(str) && /^[A-Z0-9_]+\$[A-Z0-9_]+$/.test(str)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// First, check if it's a common development string (not user-facing)
|
||||
if (isCommonDevelopmentString(str)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Multi-word phrases are likely UI text
|
||||
const hasMultipleWords = /\s+/.test(str) && str.split(/\s+/).length > 1;
|
||||
|
||||
// Sentences and questions are likely UI text
|
||||
const hasPunctuation = /[?!.,:]/.test(str);
|
||||
const isCapitalizedPhrase = /^[A-Z]/.test(str) && hasMultipleWords;
|
||||
const isTitleCase = hasMultipleWords && /\s[A-Z]/.test(str);
|
||||
const hasSentenceStructure = /^[A-Z].*[.!?]$/.test(str); // Starts with capital, ends with punctuation
|
||||
const hasQuestionForm =
|
||||
/^(What|How|Why|When|Where|Who|Can|Could|Would|Will|Is|Are|Do|Does|Did|Should|May|Might)/.test(
|
||||
str,
|
||||
);
|
||||
|
||||
// Product names and camelCase identifiers are likely UI text
|
||||
const hasInternalCapitals = /[a-z][A-Z]/.test(str); // CamelCase product names
|
||||
|
||||
// Instruction text patterns are likely UI text
|
||||
const looksLikeInstruction =
|
||||
/^(Enter|Type|Select|Choose|Provide|Specify|Search|Find|Input|Add|Write|Describe|Set|Pick|Browse|Upload|Download|Click|Tap|Press|Go to|Visit|Open|Close)/i.test(
|
||||
str,
|
||||
);
|
||||
|
||||
// Error and status messages are likely UI text
|
||||
const looksLikeErrorOrStatus =
|
||||
/(failed|error|invalid|required|missing|incorrect|wrong|unavailable|not found|not available|try again|success|completed|finished|done|saved|updated|created|deleted|removed|added)/i.test(
|
||||
str,
|
||||
);
|
||||
|
||||
// Single word check - assume it's UI text unless proven otherwise
|
||||
const isSingleWord =
|
||||
!str.includes(" ") && str.length > 1 && /^[a-zA-Z]+$/.test(str);
|
||||
|
||||
// For single words, we need to be more careful
|
||||
if (isSingleWord) {
|
||||
// Skip common programming terms and variable names
|
||||
const isCommonProgrammingTerm =
|
||||
/^(null|undefined|true|false|function|class|interface|type|enum|const|let|var|return|import|export|default|async|await|try|catch|finally|throw|new|this|super|extends|implements|instanceof|typeof|void|delete|in|of|for|while|do|if|else|switch|case|break|continue|yield|static|get|set|public|private|protected|readonly|abstract|implements|namespace|module|declare|as|from|with)$/i.test(
|
||||
str,
|
||||
);
|
||||
|
||||
if (isCommonProgrammingTerm) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip common variable name patterns
|
||||
const looksLikeVariableName =
|
||||
/^[a-z][a-zA-Z0-9]*$/.test(str) && str.length <= 20;
|
||||
|
||||
if (looksLikeVariableName) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip common CSS values
|
||||
const isCommonCssValue =
|
||||
/^(auto|none|hidden|visible|block|inline|flex|grid|row|column|wrap|nowrap|center|start|end|stretch|cover|contain|fixed|absolute|relative|static|sticky|pointer|default|inherit|initial|unset)$/i.test(
|
||||
str,
|
||||
);
|
||||
|
||||
if (isCommonCssValue) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip common file extensions
|
||||
const isFileExtension = /^\.[a-z0-9]+$/i.test(str);
|
||||
if (isFileExtension) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip common abbreviations
|
||||
const isCommonAbbreviation =
|
||||
/^(id|src|href|url|alt|img|btn|nav|div|span|ul|li|ol|dl|dt|dd|svg|png|jpg|gif|pdf|doc|txt|md|js|ts|jsx|tsx|css|scss|less|html|xml|json|yaml|yml|toml|csv|mp3|mp4|wav|avi|mov|mpeg|webm|webp|ttf|woff|eot|otf)$/i.test(
|
||||
str,
|
||||
);
|
||||
|
||||
if (isCommonAbbreviation) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If it's a single word that's not a programming term, variable name, CSS value, file extension, or abbreviation,
|
||||
// it might be UI text, but we'll be conservative and return false
|
||||
return false;
|
||||
}
|
||||
|
||||
// If it has multiple words, punctuation, or looks like a sentence, it's likely UI text
|
||||
return (
|
||||
hasMultipleWords ||
|
||||
hasPunctuation ||
|
||||
isCapitalizedPhrase ||
|
||||
isTitleCase ||
|
||||
hasSentenceStructure ||
|
||||
hasQuestionForm ||
|
||||
hasInternalCapitals ||
|
||||
looksLikeInstruction ||
|
||||
looksLikeErrorOrStatus
|
||||
);
|
||||
}
|
||||
|
||||
function isInTranslationContext(path) {
|
||||
// Check if the JSX text is inside a <Trans> component
|
||||
let current = path;
|
||||
while (current.parentPath) {
|
||||
if (
|
||||
current.isJSXElement() &&
|
||||
current.node.openingElement &&
|
||||
current.node.openingElement.name &&
|
||||
current.node.openingElement.name.name === "Trans"
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
current = current.parentPath;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function scanFileForUnlocalizedStrings(filePath) {
|
||||
// Skip all suggestion files as they contain special strings
|
||||
if (filePath.includes("suggestions")) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const content = fs.readFileSync(filePath, "utf-8");
|
||||
const unlocalizedStrings = [];
|
||||
|
||||
// Skip files that are too large
|
||||
if (content.length > 1000000) {
|
||||
console.warn(`Skipping large file: ${filePath}`);
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
// Parse the file
|
||||
const ast = parser.parse(content, {
|
||||
sourceType: "module",
|
||||
plugins: ["jsx", "typescript", "classProperties", "decorators-legacy"],
|
||||
});
|
||||
|
||||
// Traverse the AST
|
||||
traverse(ast, {
|
||||
// Find JSX text content
|
||||
JSXText(jsxTextPath) {
|
||||
const text = jsxTextPath.node.value.trim();
|
||||
if (
|
||||
text &&
|
||||
isLikelyUserFacingText(text) &&
|
||||
!isInTranslationContext(jsxTextPath)
|
||||
) {
|
||||
unlocalizedStrings.push(text);
|
||||
}
|
||||
},
|
||||
|
||||
// Find string literals in JSX attributes
|
||||
JSXAttribute(jsxAttrPath) {
|
||||
const attrName = jsxAttrPath.node.name.name.toString();
|
||||
|
||||
// Skip technical attributes that don't contain user-facing text
|
||||
if (NON_TEXT_ATTRIBUTES.includes(attrName)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip styling attributes
|
||||
if (
|
||||
attrName === "className" ||
|
||||
attrName === "class" ||
|
||||
attrName === "style"
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip data attributes and event handlers
|
||||
if (attrName.startsWith("data-") || attrName.startsWith("on")) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check the attribute value
|
||||
const value = jsxAttrPath.node.value;
|
||||
if (value && value.type === "StringLiteral") {
|
||||
const text = value.value.trim();
|
||||
if (text && isLikelyUserFacingText(text)) {
|
||||
unlocalizedStrings.push(text);
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
// Find string literals in code
|
||||
StringLiteral(stringPath) {
|
||||
// Skip if parent is JSX attribute (already handled above)
|
||||
if (stringPath.parent.type === "JSXAttribute") {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if parent is import/export declaration
|
||||
if (
|
||||
stringPath.parent.type === "ImportDeclaration" ||
|
||||
stringPath.parent.type === "ExportDeclaration"
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if parent is object property key
|
||||
if (
|
||||
stringPath.parent.type === "ObjectProperty" &&
|
||||
stringPath.parent.key === stringPath.node
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if inside a t() call or Trans component
|
||||
let isInsideTranslation = false;
|
||||
let current = stringPath;
|
||||
|
||||
while (current.parentPath && !isInsideTranslation) {
|
||||
// Check for t() function call
|
||||
if (
|
||||
current.parent.type === "CallExpression" &&
|
||||
current.parent.callee &&
|
||||
((current.parent.callee.type === "Identifier" &&
|
||||
current.parent.callee.name === "t") ||
|
||||
(current.parent.callee.type === "MemberExpression" &&
|
||||
current.parent.callee.property &&
|
||||
current.parent.callee.property.name === "t"))
|
||||
) {
|
||||
isInsideTranslation = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for <Trans> component
|
||||
if (
|
||||
current.parent.type === "JSXElement" &&
|
||||
current.parent.openingElement &&
|
||||
current.parent.openingElement.name &&
|
||||
current.parent.openingElement.name.name === "Trans"
|
||||
) {
|
||||
isInsideTranslation = true;
|
||||
break;
|
||||
}
|
||||
|
||||
current = current.parentPath;
|
||||
}
|
||||
|
||||
if (!isInsideTranslation) {
|
||||
const text = stringPath.node.value.trim();
|
||||
if (text && isLikelyUserFacingText(text)) {
|
||||
unlocalizedStrings.push(text);
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return unlocalizedStrings;
|
||||
} catch (error) {
|
||||
console.error(`Error parsing file ${filePath}:`, error);
|
||||
return [];
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error reading file ${filePath}:`, error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function scanDirectoryForUnlocalizedStrings(dirPath) {
|
||||
const results = new Map();
|
||||
|
||||
function scanDir(currentPath) {
|
||||
const entries = fs.readdirSync(currentPath, { withFileTypes: true });
|
||||
|
||||
for (const entry of entries) {
|
||||
const fullPath = path.join(currentPath, entry.name);
|
||||
|
||||
if (!shouldIgnorePath(fullPath)) {
|
||||
if (entry.isDirectory()) {
|
||||
scanDir(fullPath);
|
||||
} else if (
|
||||
entry.isFile() &&
|
||||
SCAN_EXTENSIONS.includes(path.extname(fullPath))
|
||||
) {
|
||||
const unlocalized = scanFileForUnlocalizedStrings(fullPath);
|
||||
if (unlocalized.length > 0) {
|
||||
results.set(fullPath, unlocalized);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
scanDir(dirPath);
|
||||
return results;
|
||||
}
|
||||
|
||||
// Run the check
|
||||
try {
|
||||
const srcPath = path.resolve(__dirname, '../src');
|
||||
console.log('Checking for unlocalized strings in frontend code...');
|
||||
|
||||
// Get unlocalized strings using the AST scanner
|
||||
const results = scanDirectoryForUnlocalizedStrings(srcPath);
|
||||
|
||||
// If we found any unlocalized strings, format them for output and exit with error
|
||||
if (results.size > 0) {
|
||||
const formattedResults = Array.from(results.entries())
|
||||
.map(([file, strings]) => `\n${file}:\n ${strings.join('\n ')}`)
|
||||
.join('\n');
|
||||
|
||||
console.error(`Error: Found unlocalized strings in the following files:${formattedResults}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('✅ No unlocalized strings found in frontend code.');
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
console.error('Error running unlocalized strings check:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
@@ -8,6 +8,8 @@ import {
|
||||
Conversation,
|
||||
ResultSet,
|
||||
GetTrajectoryResponse,
|
||||
GitChangeDiff,
|
||||
GitChange,
|
||||
} from "./open-hands.types";
|
||||
import { openHands } from "./open-hands-axios";
|
||||
import { ApiSettings, PostApiSettings } from "#/types/settings";
|
||||
@@ -277,6 +279,26 @@ class OpenHands {
|
||||
appMode === "saas" ? "/api/logout" : "/api/unset-settings-tokens";
|
||||
await openHands.post(endpoint);
|
||||
}
|
||||
|
||||
static async getGitChanges(conversationId: string): Promise<GitChange[]> {
|
||||
const { data } = await openHands.get<GitChange[]>(
|
||||
`/api/conversations/${conversationId}/git/changes`,
|
||||
);
|
||||
return data;
|
||||
}
|
||||
|
||||
static async getGitChangeDiff(
|
||||
conversationId: string,
|
||||
path: string,
|
||||
): Promise<GitChangeDiff> {
|
||||
const { data } = await openHands.get<GitChangeDiff>(
|
||||
`/api/conversations/${conversationId}/git/diff`,
|
||||
{
|
||||
params: { path },
|
||||
},
|
||||
);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
export default OpenHands;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user