mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
193 Commits
0.22.0
...
improve-ag
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ba79c454b | ||
|
|
fbc06f42aa | ||
|
|
f35ed5e277 | ||
|
|
6787a3adf7 | ||
|
|
fa50e0c9b9 | ||
|
|
f4c5bbda19 | ||
|
|
6562297615 | ||
|
|
0217a7cfbd | ||
|
|
aa15c9d385 | ||
|
|
8ad89e368a | ||
|
|
29ba94fc0f | ||
|
|
8956f92f6a | ||
|
|
753e3c4205 | ||
|
|
ecd573febc | ||
|
|
325a558fbc | ||
|
|
666c186826 | ||
|
|
2d2dbf1561 | ||
|
|
abac25cc4c | ||
|
|
70b21d16bd | ||
|
|
bf82f75ae4 | ||
|
|
a8bce3724f | ||
|
|
e109f7e58e | ||
|
|
a20f299579 | ||
|
|
bf77da7849 | ||
|
|
869ea59ecd | ||
|
|
f093c14ad3 | ||
|
|
9d3a0a02b8 | ||
|
|
35bab5070d | ||
|
|
d03b9775b5 | ||
|
|
fab4532f6b | ||
|
|
d33913e036 | ||
|
|
e52aee168e | ||
|
|
c27b191358 | ||
|
|
22c5ad85d9 | ||
|
|
0180ce77b1 | ||
|
|
2f14e53746 | ||
|
|
52723061b1 | ||
|
|
42f1fc92fa | ||
|
|
3f8bc8a7ea | ||
|
|
f869ad995c | ||
|
|
74c942c911 | ||
|
|
eed7e2dd6e | ||
|
|
663e36109c | ||
|
|
e92e4a1cbc | ||
|
|
61ce673400 | ||
|
|
b95840db0c | ||
|
|
003ebc0ded | ||
|
|
df8bbc2b67 | ||
|
|
eb5be2ab63 | ||
|
|
81f2b08a89 | ||
|
|
cb72a06ca3 | ||
|
|
340c2310d1 | ||
|
|
f4e5fb2873 | ||
|
|
1a7003a705 | ||
|
|
8d097efb4f | ||
|
|
2e98fc8fb3 | ||
|
|
e3e00ed70a | ||
|
|
96d1992823 | ||
|
|
7a3a0d8c0c | ||
|
|
fdffca18e0 | ||
|
|
b10416e0a3 | ||
|
|
1f462d2417 | ||
|
|
0a6ff463db | ||
|
|
9ff15bf94f | ||
|
|
6c48013601 | ||
|
|
07fcb786af | ||
|
|
ce42e22105 | ||
|
|
14ee6d7afe | ||
|
|
57391d6e66 | ||
|
|
a7bb73ded2 | ||
|
|
f4b123f73b | ||
|
|
ae31a24c29 | ||
|
|
3a478c2303 | ||
|
|
82b5325792 | ||
|
|
265e8ae5f4 | ||
|
|
0cbf50576d | ||
|
|
745038b394 | ||
|
|
b018567d53 | ||
|
|
30e39e85d0 | ||
|
|
4443417c75 | ||
|
|
efbff2e655 | ||
|
|
63565982aa | ||
|
|
b07fddcb71 | ||
|
|
99b50d038e | ||
|
|
1ddfa99c57 | ||
|
|
0c03e257b7 | ||
|
|
85e3a00d9d | ||
|
|
edd51102ad | ||
|
|
f5fccab1f6 | ||
|
|
ef12bc5381 | ||
|
|
b197e0af47 | ||
|
|
341b695ad3 | ||
|
|
d46d99a35e | ||
|
|
653168fc3d | ||
|
|
cb5e7f0130 | ||
|
|
312b9fbfb1 | ||
|
|
ba599c7dd6 | ||
|
|
7e359eda4a | ||
|
|
f7c806c119 | ||
|
|
ff25e794ef | ||
|
|
a371562d94 | ||
|
|
425ccc9b1f | ||
|
|
1afe7f1058 | ||
|
|
3188646195 | ||
|
|
6772227c9d | ||
|
|
6a6dc93e03 | ||
|
|
1a715d2ec4 | ||
|
|
4615548477 | ||
|
|
b12b426e3d | ||
|
|
a1107a2c30 | ||
|
|
af0becd65b | ||
|
|
13839b4273 | ||
|
|
7860055f8c | ||
|
|
2b40a92943 | ||
|
|
6c88b10c59 | ||
|
|
8688634950 | ||
|
|
6e35ac49c1 | ||
|
|
9bdc8dda6c | ||
|
|
75f3f282af | ||
|
|
4a5891cbea | ||
|
|
707cb07f4f | ||
|
|
61c709b7c7 | ||
|
|
1c72676483 | ||
|
|
52ac2729f7 | ||
|
|
5fa2634d60 | ||
|
|
478b225d11 | ||
|
|
ce82545437 | ||
|
|
93d2e4a338 | ||
|
|
ff48f8beba | ||
|
|
e930cd0aef | ||
|
|
6655ec0731 | ||
|
|
669e284dc5 | ||
|
|
8140d2e05a | ||
|
|
ed68034427 | ||
|
|
2832dba27a | ||
|
|
5491ad3318 | ||
|
|
e47aaba4ca | ||
|
|
fe8b92743b | ||
|
|
0d312a645a | ||
|
|
f564939780 | ||
|
|
be7007bcca | ||
|
|
240d1c972c | ||
|
|
a7239ce799 | ||
|
|
7c16ca8f27 | ||
|
|
7151f75340 | ||
|
|
f24fbec165 | ||
|
|
4dbe831d42 | ||
|
|
90bbd4edbe | ||
|
|
cc104b2e44 | ||
|
|
4adef574c0 | ||
|
|
7d09a158c3 | ||
|
|
bbfdc62139 | ||
|
|
622fc5213d | ||
|
|
6d62be518b | ||
|
|
e487008e74 | ||
|
|
62402cd617 | ||
|
|
be522f1fb9 | ||
|
|
4ef09ab897 | ||
|
|
32c5fde562 | ||
|
|
a593d9bc6d | ||
|
|
eb8d1600c3 | ||
|
|
3b0bbce54a | ||
|
|
19e0c32eb7 | ||
|
|
17a4100feb | ||
|
|
47b84189a3 | ||
|
|
7f4b5476dc | ||
|
|
0c84fe58dd | ||
|
|
575f4fd347 | ||
|
|
f7934bed80 | ||
|
|
e01fdf2a11 | ||
|
|
fd73f4210e | ||
|
|
1bccfb3492 | ||
|
|
27fdae6ecc | ||
|
|
5dd4810f58 | ||
|
|
83724100e5 | ||
|
|
6b243155f4 | ||
|
|
173f824704 | ||
|
|
8f881c4df1 | ||
|
|
d0276d1925 | ||
|
|
6e90c30be4 | ||
|
|
8ff0e027a6 | ||
|
|
c54911d877 | ||
|
|
0afe889ccd | ||
|
|
c9f16248d0 | ||
|
|
99d2d01e1a | ||
|
|
36090ad8ff | ||
|
|
1a9971b1bf | ||
|
|
473fcae57e | ||
|
|
a6eed5b7e9 | ||
|
|
b64d130a6e | ||
|
|
a253713ce2 | ||
|
|
94d833cb5f | ||
|
|
6909075be8 |
15
.github/pull_request_template.md
vendored
15
.github/pull_request_template.md
vendored
@@ -1,11 +1,12 @@
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces**
|
||||
|
||||
- [ ] Include this change in the Release Notes. If checked, you must provide an **end-user friendly** description for your change below
|
||||
|
||||
---
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions**
|
||||
- [ ] This change is worth documenting at https://docs.all-hands.dev/
|
||||
- [ ] Include this change in the Release Notes. If checked, you **must** provide an **end-user friendly** description for your change below
|
||||
|
||||
**End-user friendly description of the problem this fixes or functionality that this introduces.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses**
|
||||
**Give a summary of what the PR does, explaining any non-trivial design decisions.**
|
||||
|
||||
|
||||
---
|
||||
**Link of any specific issues this addresses.**
|
||||
|
||||
18
.github/workflows/dummy-agent-test.yml
vendored
18
.github/workflows/dummy-agent-test.yml
vendored
@@ -19,25 +19,15 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: true
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
docker-images: false
|
||||
swap-storage: true
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
|
||||
70
.github/workflows/ghcr-build.yml
vendored
70
.github/workflows/ghcr-build.yml
vendored
@@ -41,22 +41,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: true
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
docker-images: false
|
||||
swap-storage: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3.3.0
|
||||
uses: docker/setup-qemu-action@v3.4.0
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
- name: Login to GHCR
|
||||
@@ -104,22 +90,8 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: true
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
docker-images: false
|
||||
swap-storage: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3.3.0
|
||||
uses: docker/setup-qemu-action@v3.4.0
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
- name: Login to GHCR
|
||||
@@ -219,7 +191,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run unit tests with the EventStream runtime Docker images as root
|
||||
# Run unit tests with the Docker runtime Docker images as root
|
||||
test_runtime_root:
|
||||
name: RT Unit Tests (Root)
|
||||
needs: [ghcr_build_runtime]
|
||||
@@ -230,20 +202,6 @@ jobs:
|
||||
base_image: ['nikolaik']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: true
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
docker-images: false
|
||||
swap-storage: true
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -275,7 +233,7 @@ jobs:
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: make install-python-dependencies
|
||||
- name: Run runtime tests
|
||||
- name: Run docker runtime tests
|
||||
run: |
|
||||
# We install pytest-xdist in order to run tests across CPUs
|
||||
poetry run pip install pytest-xdist
|
||||
@@ -286,7 +244,7 @@ jobs:
|
||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
|
||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
TEST_RUNTIME=eventstream \
|
||||
TEST_RUNTIME=docker \
|
||||
SANDBOX_USER_ID=$(id -u) \
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
@@ -297,7 +255,7 @@ jobs:
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
# Run unit tests with the EventStream runtime Docker images as openhands user
|
||||
# Run unit tests with the Docker runtime Docker images as openhands user
|
||||
test_runtime_oh:
|
||||
name: RT Unit Tests (openhands)
|
||||
runs-on: ubuntu-latest
|
||||
@@ -307,20 +265,6 @@ jobs:
|
||||
base_image: ['nikolaik']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: true
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
docker-images: false
|
||||
swap-storage: true
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
@@ -363,7 +307,7 @@ jobs:
|
||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
|
||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
TEST_RUNTIME=eventstream \
|
||||
TEST_RUNTIME=docker \
|
||||
SANDBOX_USER_ID=$(id -u) \
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
|
||||
28
.github/workflows/openhands-resolver.yml
vendored
28
.github/workflows/openhands-resolver.yml
vendored
@@ -20,6 +20,10 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: "anthropic/claude-3-5-sonnet-20241022"
|
||||
LLM_API_VERSION:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
base_container_image:
|
||||
required: false
|
||||
type: string
|
||||
@@ -84,12 +88,10 @@ jobs:
|
||||
run: |
|
||||
python -m pip index versions openhands-ai > openhands_versions.txt
|
||||
OPENHANDS_VERSION=$(head -n 1 openhands_versions.txt | awk '{print $2}' | tr -d '()')
|
||||
# Ensure requirements.txt ends with newline before appending
|
||||
if [ -f requirements.txt ] && [ -s requirements.txt ]; then
|
||||
sed -i -e '$a\' requirements.txt
|
||||
fi
|
||||
echo "openhands-ai==${OPENHANDS_VERSION}" >> requirements.txt
|
||||
cat requirements.txt
|
||||
|
||||
# Create a new requirements.txt locally within the workflow, ensuring no reference to the repo's file
|
||||
echo "openhands-ai==${OPENHANDS_VERSION}" > /tmp/requirements.txt
|
||||
cat /tmp/requirements.txt
|
||||
|
||||
- name: Cache pip dependencies
|
||||
if: |
|
||||
@@ -107,15 +109,16 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ env.pythonLocation }}/lib/python3.12/site-packages/*
|
||||
key: ${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('requirements.txt') }}
|
||||
key: ${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('/tmp/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('requirements.txt') }}
|
||||
${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('/tmp/requirements.txt') }}
|
||||
|
||||
- name: Check required environment variables
|
||||
env:
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
LLM_API_VERSION: ${{ inputs.LLM_API_VERSION }}
|
||||
PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
|
||||
PAT_USERNAME: ${{ secrets.PAT_USERNAME }}
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
@@ -172,7 +175,7 @@ jobs:
|
||||
echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
|
||||
|
||||
# Set branch variables
|
||||
echo "TARGET_BRANCH=${{ inputs.target_branch }}" >> $GITHUB_ENV
|
||||
echo "TARGET_BRANCH=${{ inputs.target_branch || 'main' }}" >> $GITHUB_ENV
|
||||
|
||||
- name: Comment on issue with start message
|
||||
uses: actions/github-script@v7
|
||||
@@ -220,16 +223,18 @@ jobs:
|
||||
} else {
|
||||
console.log("Installing from requirements.txt...");
|
||||
await exec.exec("python -m pip install --upgrade pip");
|
||||
await exec.exec("pip install -r requirements.txt");
|
||||
await exec.exec("pip install -r /tmp/requirements.txt");
|
||||
}
|
||||
|
||||
- name: Attempt to resolve issue
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
|
||||
GITHUB_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
GIT_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
LLM_API_VERSION: ${{ inputs.LLM_API_VERSION }}
|
||||
PYTHONPATH: ""
|
||||
run: |
|
||||
cd /tmp && python -m openhands.resolver.resolve_issue \
|
||||
@@ -262,14 +267,17 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
|
||||
GITHUB_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
GIT_USERNAME: ${{ secrets.PAT_USERNAME || 'openhands-agent' }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || inputs.LLM_MODEL }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
LLM_API_VERSION: ${{ inputs.LLM_API_VERSION }}
|
||||
PYTHONPATH: ""
|
||||
run: |
|
||||
if [ "${{ steps.check_result.outputs.RESOLUTION_SUCCESS }}" == "true" ]; then
|
||||
cd /tmp && python -m openhands.resolver.send_pull_request \
|
||||
--issue-number ${{ env.ISSUE_NUMBER }} \
|
||||
--target-branch ${{ env.TARGET_BRANCH }} \
|
||||
--pr-type draft \
|
||||
--reviewer ${{ github.actor }} | tee pr_result.txt && \
|
||||
grep "draft created" pr_result.txt | sed 's/.*\///g' > pr_number.txt
|
||||
|
||||
98
.github/workflows/py-unit-tests-mac.yml
vendored
98
.github/workflows/py-unit-tests-mac.yml
vendored
@@ -1,98 +0,0 @@
|
||||
# Workflow that runs python unit tests on mac
|
||||
name: Run Python Unit Tests Mac
|
||||
|
||||
# This job is flaky so only run it nightly
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
jobs:
|
||||
# Run python unit tests on macOS
|
||||
test-on-macos:
|
||||
name: Python Unit Tests on macOS
|
||||
runs-on: macos-14
|
||||
env:
|
||||
INSTALL_DOCKER: '1' # Set to '0' to skip Docker installation
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Cache Poetry dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry
|
||||
~/.virtualenvs
|
||||
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-poetry-
|
||||
- name: Install tmux
|
||||
run: brew install tmux
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: poetry install --without evaluation,llama-index
|
||||
- name: Install & Start Docker
|
||||
if: env.INSTALL_DOCKER == '1'
|
||||
run: |
|
||||
INSTANCE_NAME="colima-${GITHUB_RUN_ID}"
|
||||
|
||||
# Uninstall colima to upgrade to the latest version
|
||||
if brew list colima &>/dev/null; then
|
||||
brew uninstall colima
|
||||
# unlinking colima dependency: go
|
||||
brew uninstall go@1.21
|
||||
fi
|
||||
rm -rf ~/.colima ~/.lima
|
||||
brew install --HEAD colima
|
||||
brew install docker
|
||||
|
||||
start_colima() {
|
||||
# Find a free port in the range 10000-20000
|
||||
RANDOM_PORT=$((RANDOM % 10001 + 10000))
|
||||
|
||||
# Original line:
|
||||
if ! colima start --network-address --arch x86_64 --cpu=1 --memory=1 --verbose --ssh-port $RANDOM_PORT; then
|
||||
echo "Failed to start Colima."
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# Attempt to start Colima for 5 total attempts:
|
||||
ATTEMPT_LIMIT=5
|
||||
for ((i=1; i<=ATTEMPT_LIMIT; i++)); do
|
||||
|
||||
if start_colima; then
|
||||
echo "Colima started successfully."
|
||||
break
|
||||
else
|
||||
colima stop -f
|
||||
sleep 10
|
||||
colima delete -f
|
||||
if [ $i -eq $ATTEMPT_LIMIT ]; then
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
|
||||
# For testcontainers to find the Colima socket
|
||||
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
|
||||
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Run Tests
|
||||
run: poetry run pytest --forked --cov=openhands --cov-report=xml ./tests/unit --ignore=tests/unit/test_memory.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
6
.github/workflows/py-unit-tests.yml
vendored
6
.github/workflows/py-unit-tests.yml
vendored
@@ -32,6 +32,10 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Install tmux
|
||||
run: sudo apt-get update && sudo apt-get install -y tmux
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22.x'
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
- name: Set up Python
|
||||
@@ -44,7 +48,7 @@ jobs:
|
||||
- name: Build Environment
|
||||
run: make build
|
||||
- name: Run Tests
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_memory.py
|
||||
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_long_term_memory.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
env:
|
||||
|
||||
1
.github/workflows/stale.yml
vendored
1
.github/workflows/stale.yml
vendored
@@ -19,3 +19,4 @@ jobs:
|
||||
close-issue-message: 'This issue was closed because it has been stalled for over 30 days with no activity.'
|
||||
close-pr-message: 'This PR was closed because it has been stalled for over 30 days with no activity.'
|
||||
days-before-close: 7
|
||||
operations-per-run: 150
|
||||
|
||||
172
.openhands/microagents/glossary.md
Normal file
172
.openhands/microagents/glossary.md
Normal file
@@ -0,0 +1,172 @@
|
||||
# OpenHands Glossary
|
||||
|
||||
### Agent
|
||||
The core AI entity in OpenHands that can perform software development tasks by interacting with tools, browsing the web, and modifying code.
|
||||
|
||||
#### Agent Controller
|
||||
A component that manages the agent's lifecycle, handles its state, and coordinates interactions between the agent and various tools.
|
||||
|
||||
#### Agent Delegation
|
||||
The ability of an agent to hand off specific tasks to other specialized agents for better task completion.
|
||||
|
||||
#### Agent Hub
|
||||
A central registry of different agent types and their capabilities, allowing for easy agent selection and instantiation.
|
||||
|
||||
#### Agent Skill
|
||||
A specific capability or function that an agent can perform, such as file manipulation, web browsing, or code editing.
|
||||
|
||||
#### Agent State
|
||||
The current context and status of an agent, including its memory, active tools, and ongoing tasks.
|
||||
|
||||
#### CodeAct Agent
|
||||
[A generalist agent in OpenHands](https://arxiv.org/abs/2407.16741) designed to perform tasks by editing and executing code.
|
||||
|
||||
### Browser
|
||||
A system for web-based interactions and tasks.
|
||||
|
||||
#### Browser Gym
|
||||
A testing and evaluation environment for browser-based agent interactions and tasks.
|
||||
|
||||
#### Web Browser Tool
|
||||
A tool that enables agents to interact with web pages and perform web-based tasks.
|
||||
|
||||
### Commands
|
||||
Terminal and execution related functionality.
|
||||
|
||||
#### Bash Session
|
||||
A persistent terminal session that maintains state and history for bash command execution.
|
||||
This uses tmux under the hood.
|
||||
|
||||
### Configuration
|
||||
System-wide settings and options.
|
||||
|
||||
#### Agent Configuration
|
||||
Settings that define an agent's behavior, capabilities, and limitations, including available tools and runtime settings.
|
||||
|
||||
#### Configuration Options
|
||||
Settings that control various aspects of OpenHands behavior, including runtime, security, and agent settings.
|
||||
|
||||
#### LLM Config
|
||||
Configuration settings for language models used by agents, including model selection and parameters.
|
||||
|
||||
#### LLM Draft Config
|
||||
Settings for draft mode operations with language models, typically used for faster, lower-quality responses.
|
||||
|
||||
#### Runtime Configuration
|
||||
Settings that define how the runtime environment should be set up and operated.
|
||||
|
||||
#### Security Options
|
||||
Configuration settings that control security features and restrictions.
|
||||
|
||||
### Conversation
|
||||
A sequence of interactions between a user and an agent, including messages, actions, and their results.
|
||||
|
||||
#### Conversation Info
|
||||
Metadata about a conversation, including its status, participants, and timeline.
|
||||
|
||||
#### Conversation Manager
|
||||
A component that handles the creation, storage, and retrieval of conversations.
|
||||
|
||||
#### Conversation Metadata
|
||||
Additional information about conversations, such as tags, timestamps, and related resources.
|
||||
|
||||
#### Conversation Status
|
||||
The current state of a conversation, including whether it's active, completed, or failed.
|
||||
|
||||
#### Conversation Store
|
||||
A storage system for maintaining conversation history and related data.
|
||||
|
||||
### Events
|
||||
|
||||
#### Event
|
||||
Every Conversation comprises a series of Events. Each Event is either an Action or an Observation.
|
||||
|
||||
#### Event Stream
|
||||
A continuous flow of events that represents the ongoing activities and interactions in the system.
|
||||
|
||||
#### Action
|
||||
A specific operation or command that an agent executes through available tools, such as running a command or editing a file.
|
||||
|
||||
#### Observation
|
||||
The response or result returned by a tool after an agent's action, providing feedback about the action's outcome.
|
||||
|
||||
### Interface
|
||||
Different ways to interact with OpenHands.
|
||||
|
||||
#### CLI Mode
|
||||
A command-line interface mode for interacting with OpenHands agents without a graphical interface.
|
||||
|
||||
#### GUI Mode
|
||||
A graphical user interface mode for interacting with OpenHands agents through a web interface.
|
||||
|
||||
#### Headless Mode
|
||||
A mode of operation where OpenHands runs without a user interface, suitable for automation and scripting.
|
||||
|
||||
### Agent Memory
|
||||
The system that decides which parts of the Event Stream (i.e. the conversation history) should be passed into each LLM prompt.
|
||||
|
||||
#### Memory Store
|
||||
A storage system for maintaining agent memory and context across sessions.
|
||||
|
||||
#### Condenser
|
||||
A component that processes and summarizes conversation history to maintain context while staying within token limits.
|
||||
|
||||
#### Truncation
|
||||
A very simple Condenser strategy. Reduces conversation history or content to stay within token limits.
|
||||
|
||||
### Microagent
|
||||
A specialized prompt that enhances OpenHands with domain-specific knowledge, repository-specific context, and task-specific workflows.
|
||||
|
||||
#### Microagent Registry
|
||||
A central repository of available microagents and their configurations.
|
||||
|
||||
#### Public Microagent
|
||||
A general-purpose microagent available to all OpenHands users, triggered by specific keywords.
|
||||
|
||||
#### Repository Microagent
|
||||
A type of microagent that provides repository-specific context and guidelines, stored in the `.openhands/microagents/` directory.
|
||||
|
||||
### Prompt
|
||||
Components for managing and processing prompts.
|
||||
|
||||
#### Prompt Caching
|
||||
A system for caching and reusing common prompts to improve performance.
|
||||
|
||||
#### Prompt Manager
|
||||
A component that handles the loading, processing, and management of prompts used by agents, including microagents.
|
||||
|
||||
#### Response Parsing
|
||||
The process of interpreting and structuring responses from language models and tools.
|
||||
|
||||
### Runtime
|
||||
The execution environment where agents perform their tasks, which can be local, remote, or containerized.
|
||||
|
||||
#### Action Execution Server
|
||||
A REST API that receives agent actions (e.g. bash commands, python code, browsing actions), executes them in the runtime environment, and returns the results.
|
||||
|
||||
#### Action Execution Client
|
||||
A component that handles the execution of actions in the runtime environment, managing the communication between the agent and the runtime.
|
||||
|
||||
#### Docker Runtime
|
||||
A containerized runtime environment that provides isolation and reproducibility for agent operations.
|
||||
|
||||
#### E2B Runtime
|
||||
A specialized runtime environment built on E2B for secure and isolated code execution.
|
||||
|
||||
#### Local Runtime
|
||||
A runtime environment that executes on the local machine, suitable for development and testing.
|
||||
|
||||
#### Modal Runtime
|
||||
A runtime environment built on Modal for scalable and distributed agent operations.
|
||||
|
||||
#### Remote Runtime
|
||||
A sandboxed environment that executes code and commands remotely, providing isolation and security for agent operations.
|
||||
|
||||
#### Runtime Builder
|
||||
A component that builds a Docker image for the Action Execution Server based on a user-specified base image.
|
||||
|
||||
### Security
|
||||
Security-related components and features.
|
||||
|
||||
#### Security Analyzer
|
||||
A component that checks agent actions for potential security risks.
|
||||
@@ -100,7 +100,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker container image by
|
||||
setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.21-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.26-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
6
Makefile
6
Makefile
@@ -1,4 +1,4 @@
|
||||
SHELL=/bin/bash
|
||||
SHELL=/usr/bin/env bash
|
||||
# Makefile for OpenHands project
|
||||
|
||||
# Variables
|
||||
@@ -81,10 +81,10 @@ check-nodejs:
|
||||
@if command -v node > /dev/null; then \
|
||||
NODE_VERSION=$(shell node --version | sed -E 's/v//g'); \
|
||||
IFS='.' read -r -a NODE_VERSION_ARRAY <<< "$$NODE_VERSION"; \
|
||||
if [ "$${NODE_VERSION_ARRAY[0]}" -ge 20 ]; then \
|
||||
if [ "$${NODE_VERSION_ARRAY[0]}" -ge 22 ]; then \
|
||||
echo "$(BLUE)Node.js $$NODE_VERSION is already installed.$(RESET)"; \
|
||||
else \
|
||||
echo "$(RED)Node.js 20.x or later is required. Please install Node.js 20.x or later to continue.$(RESET)"; \
|
||||
echo "$(RED)Node.js 22.x or later is required. Please install Node.js 22.x or later to continue.$(RESET)"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
else \
|
||||
|
||||
@@ -43,17 +43,17 @@ See the [Running OpenHands](https://docs.all-hands.dev/modules/usage/installatio
|
||||
system requirements and more information.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
|
||||
|
||||
3
build.sh
3
build.sh
@@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
cp pyproject.toml poetry.lock openhands
|
||||
poetry build -v
|
||||
|
||||
@@ -17,6 +17,12 @@
|
||||
#modal_api_token_id = ""
|
||||
#modal_api_token_secret = ""
|
||||
|
||||
# API key for Daytona
|
||||
#daytona_api_key = ""
|
||||
|
||||
# Daytona Target
|
||||
#daytona_target = ""
|
||||
|
||||
# Base path for the workspace
|
||||
workspace_base = "./workspace"
|
||||
|
||||
@@ -104,7 +110,7 @@ workspace_base = "./workspace"
|
||||
#aws_secret_access_key = ""
|
||||
|
||||
# API key to use (For Headless / CLI only - In Web this is overridden by Session Init)
|
||||
api_key = "your-api-key"
|
||||
api_key = ""
|
||||
|
||||
# API base URL (For Headless / CLI only - In Web this is overridden by Session Init)
|
||||
#base_url = ""
|
||||
@@ -195,7 +201,7 @@ model = "gpt-4o"
|
||||
#native_tool_calling = None
|
||||
|
||||
[llm.gpt4o-mini]
|
||||
api_key = "your-api-key"
|
||||
api_key = ""
|
||||
model = "gpt-4o"
|
||||
|
||||
|
||||
@@ -234,6 +240,10 @@ codeact_enable_jupyter = true
|
||||
# List of microagents to disable
|
||||
#disabled_microagents = []
|
||||
|
||||
# Whether history should be truncated to continue the session when hitting LLM context
|
||||
# length limit
|
||||
enable_history_truncation = true
|
||||
|
||||
[agent.RepoExplorerAgent]
|
||||
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
|
||||
# useful when an agent doesn't demand high quality but uses a lot of tokens
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
# Initialize variables with default values
|
||||
|
||||
@@ -11,7 +11,7 @@ services:
|
||||
- BACKEND_HOST=${BACKEND_HOST:-"0.0.0.0"}
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.21-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.26-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -o pipefail
|
||||
|
||||
function get_docker() {
|
||||
|
||||
@@ -24,3 +24,6 @@ inline-quotes = "single"
|
||||
|
||||
[format]
|
||||
quote-style = "single"
|
||||
|
||||
[lint.flake8-bugbear]
|
||||
extend-immutable-calls = ["Depends", "fastapi.Depends", "fastapi.params.Depends"]
|
||||
|
||||
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of openhands-state for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -46,3 +46,11 @@ docker run -it \
|
||||
-e THAT=that
|
||||
...
|
||||
```
|
||||
|
||||
### Referring to UI Elements
|
||||
|
||||
When referencing UI elements, use ``.
|
||||
|
||||
Example:
|
||||
1. Toggle the `Advanced` option
|
||||
2. Enter your model in the `Custom Model` textbox.
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
|
||||
|
||||
# 📦 Runtime EventStream
|
||||
# 📦 Runtime Docker
|
||||
|
||||
Le Runtime EventStream d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA.
|
||||
Le Runtime Docker d'OpenHands est le composant principal qui permet l'exécution sécurisée et flexible des actions des agents d'IA.
|
||||
Il crée un environnement en bac à sable (sandbox) en utilisant Docker, où du code arbitraire peut être exécuté en toute sécurité sans risquer le système hôte.
|
||||
|
||||
## Pourquoi avons-nous besoin d'un runtime en bac à sable ?
|
||||
|
||||
@@ -163,7 +163,7 @@ Les options de configuration de base sont définies dans la section `[core]` du
|
||||
|
||||
- `runtime`
|
||||
- Type : `str`
|
||||
- Valeur par défaut : `"eventstream"`
|
||||
- Valeur par défaut : `"docker"`
|
||||
- Description : Environnement d'exécution
|
||||
|
||||
- `default_agent`
|
||||
|
||||
@@ -52,7 +52,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -61,7 +61,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ Pour créer un workflow d'évaluation pour votre benchmark, suivez ces étapes :
|
||||
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
runtime='eventstream',
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='your_container_image',
|
||||
|
||||
@@ -46,7 +46,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -56,6 +56,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -13,16 +13,16 @@
|
||||
La façon la plus simple d'exécuter OpenHands est avec Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
Vous pouvez également exécuter OpenHands en mode [headless scriptable](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), en tant que [CLI interactive](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), ou en utilisant l'[Action GitHub OpenHands](https://docs.all-hands.dev/modules/usage/how-to/github-action).
|
||||
|
||||
@@ -13,7 +13,7 @@ C'est le Runtime par défaut qui est utilisé lorsque vous démarrez OpenHands.
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
以下是翻译后的内容:
|
||||
|
||||
# 📦 EventStream 运行时
|
||||
# 📦 Docker 运行时
|
||||
|
||||
OpenHands EventStream 运行时是实现 AI 代理操作安全灵活执行的核心组件。
|
||||
OpenHands Docker 运行时是实现 AI 代理操作安全灵活执行的核心组件。
|
||||
它使用 Docker 创建一个沙盒环境,可以安全地运行任意代码而不会危及主机系统。
|
||||
|
||||
## 为什么我们需要沙盒运行时?
|
||||
|
||||
@@ -162,7 +162,7 @@
|
||||
|
||||
- `runtime`
|
||||
- 类型: `str`
|
||||
- 默认值: `"eventstream"`
|
||||
- 默认值: `"docker"`
|
||||
- 描述: 运行时环境
|
||||
|
||||
- `default_agent`
|
||||
|
||||
@@ -50,7 +50,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -59,7 +59,7 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ OpenHands 的主要入口点在 `openhands/core/main.py` 中。以下是它的
|
||||
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
runtime='eventstream',
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='your_container_image',
|
||||
|
||||
@@ -47,7 +47,7 @@ LLM_API_KEY="sk_test_12345"
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -57,6 +57,6 @@ docker run -it \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi" --no-auto-continue
|
||||
```
|
||||
|
||||
@@ -11,16 +11,16 @@
|
||||
在 Docker 中运行 OpenHands 是最简单的方式。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
你也可以在可脚本化的[无头模式](https://docs.all-hands.dev/modules/usage/how-to/headless-mode)下运行 OpenHands,作为[交互式 CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode),或使用 [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action)。
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# 📦 EventStream Runtime
|
||||
# 📦 Docker Runtime
|
||||
|
||||
The OpenHands EventStream Runtime is the core component that enables secure and flexible execution of AI agent's action.
|
||||
The OpenHands Docker Runtime is the core component that enables secure and flexible execution of AI agent's action.
|
||||
It creates a sandboxed environment using Docker, where arbitrary code can be run safely without risking the host system.
|
||||
|
||||
## Why do we need a sandboxed runtime?
|
||||
@@ -54,14 +54,13 @@ graph TD
|
||||
6. Action Execution: The runtime client receives actions from the backend, executes them in the sandboxed environment, and sends back observations
|
||||
7. Observation Return: The action execution server sends execution results back to the OpenHands backend as observations
|
||||
|
||||
|
||||
The role of the client:
|
||||
|
||||
- It acts as an intermediary between the OpenHands backend and the sandboxed environment
|
||||
- It executes various types of actions (shell commands, file operations, Python code, etc.) safely within the container
|
||||
- It manages the state of the sandboxed environment, including the current working directory and loaded plugins
|
||||
- It formats and returns observations to the backend, ensuring a consistent interface for processing results
|
||||
|
||||
|
||||
## How OpenHands builds and maintains OH Runtime images
|
||||
|
||||
OpenHands' approach to building and managing runtime images ensures efficiency, consistency, and flexibility in creating and maintaining Docker images for both production and development environments.
|
||||
@@ -78,16 +77,15 @@ Tags may be in one of 2 formats:
|
||||
- **Source Tag**: `oh_v{openhands_version}_{16_digit_lock_hash}_{16_digit_source_hash}`
|
||||
(e.g.: `oh_v0.9.9_1234567890abcdef_1234567890abcdef`)
|
||||
|
||||
|
||||
#### Source Tag - Most Specific
|
||||
|
||||
This is the first 16 digits of the MD5 of the directory hash for the source directory. This gives a hash
|
||||
for only the openhands source
|
||||
|
||||
|
||||
#### Lock Tag
|
||||
|
||||
This hash is built from the first 16 digits of the MD5 of:
|
||||
|
||||
- The name of the base image upon which the image was built (e.g.: `nikolaik/python-nodejs:python3.12-nodejs22`)
|
||||
- The content of the `pyproject.toml` included in the image.
|
||||
- The content of the `poetry.lock` included in the image.
|
||||
|
||||
21
docs/modules/usage/cloud/cloud-github-resolver.md
Normal file
21
docs/modules/usage/cloud/cloud-github-resolver.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Cloud GitHub Resolver
|
||||
|
||||
The GitHub Resolver automates code fixes and provides intelligent assistance for your repositories.
|
||||
|
||||
## Setup
|
||||
|
||||
The Cloud Github Resolver is available automatically when you
|
||||
[grant OpenHands Cloud repository access](./openhands-cloud.md#adding-repositories).
|
||||
|
||||
## Usage
|
||||
|
||||
### Issues
|
||||
|
||||
On your repository, label an issue with `openhands`. OpenHands will attempt to fix the issue.
|
||||
|
||||
### Pull Requests
|
||||
|
||||
In order to get OpenHands to work on pull requests, use `@openhands` in top level or single inline comments to:
|
||||
- Ask questions
|
||||
- Request updates
|
||||
- Get code explanations
|
||||
39
docs/modules/usage/cloud/openhands-cloud.md
Normal file
39
docs/modules/usage/cloud/openhands-cloud.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Openhands Cloud
|
||||
|
||||
OpenHands Cloud is the cloud hosted version of OpenHands by All Hands AI.
|
||||
|
||||
## Accessing OpenHands Cloud
|
||||
|
||||
Currently, users are being admitted to access OpenHands Cloud in waves. To sign up,
|
||||
[join the waitlist](https://www.all-hands.dev/join-waitlist). Once you are approved, you will get an email with
|
||||
instructions on how to access it.
|
||||
|
||||
## Getting Started
|
||||
|
||||
After visiting OpenHands Cloud, you will be asked to connect with your GitHub account:
|
||||
1. After reading and accepting the terms of service, click `Connect to GitHub`.
|
||||
2. Review the permissions requested by OpenHands and then click `Authorize OpenHands by All Hands AI`.
|
||||
- OpenHands will require some permissions from your GitHub account. To read more about these permissions,
|
||||
you can click the `Learn more` link on the GitHub authorize page.
|
||||
|
||||
## Adding Repositories
|
||||
|
||||
You can grant OpenHands specific repository access:
|
||||
1. Under the `Select a GitHub project` dropdown, select `Add more repositories...`.
|
||||
2. Select the organization, then choose the specific repositories to grant OpenHands access to.
|
||||
- Openhands requests short-lived tokens (8-hour expiry) with these permissions:
|
||||
- Actions: Read and write
|
||||
- Administration: Read-only
|
||||
- Commit statuses: Read and write
|
||||
- Contents: Read and write
|
||||
- Issues: Read and write
|
||||
- Metadata: Read-only
|
||||
- Pull requests: Read and write
|
||||
- Webhooks: Read and write
|
||||
- Workflows: Read and write
|
||||
- Repository access for a user is granted based on:
|
||||
- Granted permission for the repository.
|
||||
- User's GitHub permissions (owner/collaborator).
|
||||
|
||||
You can manage repository access any time by following the above workflow or visiting the Settings page and selecting
|
||||
`Configure GitHub Repositories` under the `GitHub Settings` section.
|
||||
@@ -126,7 +126,7 @@ The core configuration options are defined in the `[core]` section of the `confi
|
||||
|
||||
- `runtime`
|
||||
- Type: `str`
|
||||
- Default: `"eventstream"`
|
||||
- Default: `"docker"`
|
||||
- Description: Runtime environment
|
||||
|
||||
- `default_agent`
|
||||
@@ -340,6 +340,11 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
|
||||
- Default: `false`
|
||||
- Description: Whether Jupyter is enabled in the action space
|
||||
|
||||
- `enable_history_truncation`
|
||||
- Type: `bool`
|
||||
- Default: `true`
|
||||
- Description: Whether history should be truncated to continue the session when hitting LLM context length limit
|
||||
|
||||
### Microagent Usage
|
||||
- `enable_prompt_extensions`
|
||||
- Type: `bool`
|
||||
|
||||
@@ -35,7 +35,7 @@ To run OpenHands in CLI mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -45,7 +45,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.cli
|
||||
```
|
||||
|
||||
|
||||
@@ -41,8 +41,16 @@ docker build -t custom-image .
|
||||
|
||||
This will produce a new image called `custom-image`, which will be available in Docker.
|
||||
|
||||
> Note that in the configuration described in this document, OpenHands will run as user "openhands" inside the
|
||||
> sandbox and thus all packages installed via the docker file should be available to all users on the system, not just root.
|
||||
## Using the Docker Command
|
||||
|
||||
When running OpenHands using [the docker command](/modules/usage/installation#start-the-app), replace
|
||||
`-e SANDBOX_RUNTIME_CONTAINER_IMAGE=...` with `-e SANDBOX_BASE_CONTAINER_IMAGE=<custom image name>`:
|
||||
|
||||
```commandline
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_BASE_CONTAINER_IMAGE=custom-image \
|
||||
...
|
||||
```
|
||||
|
||||
## Using the Development Workflow
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
|
||||
def get_config(instance: pd.Series, metadata: EvalMetadata) -> AppConfig:
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
runtime='eventstream',
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='your_container_image',
|
||||
|
||||
@@ -42,9 +42,10 @@ You can provide custom directions for OpenHands by following the [README for the
|
||||
Github resolver will automatically check for valid [repository secrets](https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions?tool=webui#creating-secrets-for-a-repository) or [repository variables](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) to customize its behavior.
|
||||
The customization options you can set are:
|
||||
|
||||
| **Attribute name** | **Type** | **Purpose** | **Example** |
|
||||
|----------------------------------| -------- |-------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
|
||||
| `LLM_MODEL` | Variable | Set the LLM to use with OpenHands | `LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"` |
|
||||
| `OPENHANDS_MAX_ITER` | Variable | Set max limit for agent iterations | `OPENHANDS_MAX_ITER=10` |
|
||||
| `OPENHANDS_MACRO` | Variable | Customize default macro for invoking the resolver | `OPENHANDS_MACRO=@resolveit` |
|
||||
| `OPENHANDS_BASE_CONTAINER_IMAGE` | Variable | Custom Sandbox ([learn more](https://docs.all-hands.dev/modules/usage/how-to/custom-sandbox-guide)) | `OPENHANDS_BASE_CONTAINER_IMAGE="custom_image"` |
|
||||
| **Attribute name** | **Type** | **Purpose** | **Example** |
|
||||
| -------------------------------- | -------- | --------------------------------------------------------------------------------------------------- | -------------------------------------------------- |
|
||||
| `LLM_MODEL` | Variable | Set the LLM to use with OpenHands | `LLM_MODEL="anthropic/claude-3-5-sonnet-20241022"` |
|
||||
| `OPENHANDS_MAX_ITER` | Variable | Set max limit for agent iterations | `OPENHANDS_MAX_ITER=10` |
|
||||
| `OPENHANDS_MACRO` | Variable | Customize default macro for invoking the resolver | `OPENHANDS_MACRO=@resolveit` |
|
||||
| `OPENHANDS_BASE_CONTAINER_IMAGE` | Variable | Custom Sandbox ([learn more](https://docs.all-hands.dev/modules/usage/how-to/custom-sandbox-guide)) | `OPENHANDS_BASE_CONTAINER_IMAGE="custom_image"` |
|
||||
| `TARGET_BRANCH` | Variable | Merge to branch other than `main` | `TARGET_BRANCH="dev"` |
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
# GUI Mode
|
||||
|
||||
## Introduction
|
||||
|
||||
OpenHands provides a user-friendly Graphical User Interface (GUI) mode for interacting with the AI assistant.
|
||||
This mode offers an intuitive way to set up the environment, manage settings, and communicate with the AI.
|
||||
OpenHands provides a Graphical User Interface (GUI) mode for interacting with the AI assistant.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
@@ -14,104 +11,95 @@ This mode offers an intuitive way to set up the environment, manage settings, an
|
||||
|
||||
### Initial Setup
|
||||
|
||||
1. Upon first launch, you'll see a settings modal.
|
||||
2. Select an `LLM Provider` and `LLM Model` from the dropdown menus.
|
||||
1. Upon first launch, you'll see a settings page.
|
||||
2. Select an `LLM Provider` and `LLM Model` from the dropdown menus. If the required model does not exist in the list,
|
||||
toggle `Advanced` options and enter it with the correct prefix in the `Custom Model` text box.
|
||||
3. Enter the corresponding `API Key` for your chosen provider.
|
||||
4. Click "Save" to apply the settings.
|
||||
4. Click `Save Changes` to apply the settings.
|
||||
|
||||
### GitHub Token Setup
|
||||
|
||||
OpenHands automatically exports a `GITHUB_TOKEN` to the shell environment if it is available. This can happen in two ways:
|
||||
|
||||
- **Locally (OSS)**: The user directly inputs their GitHub token.
|
||||
- **Online (SaaS)**: The token is obtained through GitHub OAuth authentication.
|
||||
|
||||
#### Setting Up a Local GitHub Token
|
||||
|
||||
1. **Generate a Personal Access Token (PAT)**:
|
||||
- Go to GitHub Settings > Developer Settings > Personal Access Tokens > Tokens (classic).
|
||||
- Click "Generate new token (classic)".
|
||||
- **Local Installation**: The user directly inputs their GitHub token.
|
||||
<details>
|
||||
<summary>Setting Up a GitHub Token</summary>
|
||||
1. **Generate a Personal Access Token (PAT)**:
|
||||
- On GitHub, go to Settings > Developer Settings > Personal Access Tokens > Tokens (classic).
|
||||
- Click `Generate new token (classic)`.
|
||||
- Required scopes:
|
||||
- `repo` (Full control of private repositories)
|
||||
- `workflow` (Update GitHub Action workflows)
|
||||
- `read:org` (Read organization data)
|
||||
2. **Enter Token in OpenHands**:
|
||||
- Click the Settings button (gear icon).
|
||||
- Navigate to the `GitHub Settings` section.
|
||||
- Paste your token in the `GitHub Token` field.
|
||||
- Click `Save Changes` to apply the changes.
|
||||
</details>
|
||||
|
||||
2. **Enter Token in OpenHands**:
|
||||
- Click the Settings button (gear icon) in the top right.
|
||||
- Navigate to the "GitHub" section.
|
||||
- Paste your token in the "GitHub Token" field.
|
||||
- Click "Save" to apply the changes.
|
||||
<details>
|
||||
<summary>Organizational Token Policies</summary>
|
||||
|
||||
#### Organizational Token Policies
|
||||
If you're working with organizational repositories, additional setup may be required:
|
||||
|
||||
If you're working with organizational repositories, additional setup may be required:
|
||||
|
||||
1. **Check Organization Requirements**:
|
||||
1. **Check Organization Requirements**:
|
||||
- Organization admins may enforce specific token policies.
|
||||
- Some organizations require tokens to be created with SSO enabled.
|
||||
- Review your organization's [token policy settings](https://docs.github.com/en/organizations/managing-programmatic-access-to-your-organization/setting-a-personal-access-token-policy-for-your-organization).
|
||||
|
||||
2. **Verify Organization Access**:
|
||||
2. **Verify Organization Access**:
|
||||
- Go to your token settings on GitHub.
|
||||
- Look for the organization under "Organization access".
|
||||
- If required, click "Enable SSO" next to your organization.
|
||||
- Look for the organization under `Organization access`.
|
||||
- If required, click `Enable SSO` next to your organization.
|
||||
- Complete the SSO authorization process.
|
||||
</details>
|
||||
|
||||
#### OAuth Authentication (Online Mode)
|
||||
<details>
|
||||
<summary>Troubleshooting</summary>
|
||||
|
||||
When using OpenHands in online mode, the GitHub OAuth flow:
|
||||
Common issues and solutions:
|
||||
|
||||
1. Requests the following permissions:
|
||||
- **Token Not Recognized**:
|
||||
- Ensure the token is properly saved in settings.
|
||||
- Check that the token hasn't expired.
|
||||
- Verify the token has the required scopes.
|
||||
- Try regenerating the token.
|
||||
|
||||
- **Organization Access Denied**:
|
||||
- Check if SSO is required but not enabled.
|
||||
- Verify organization membership.
|
||||
- Contact organization admin if token policies are blocking access.
|
||||
|
||||
- **Verifying Token Works**:
|
||||
- The app will show a green checkmark if the token is valid.
|
||||
- Try accessing a repository to confirm permissions.
|
||||
- Check the browser console for any error messages.
|
||||
</details>
|
||||
|
||||
- **OpenHands Cloud**: The token is obtained through GitHub OAuth authentication.
|
||||
|
||||
<details>
|
||||
<summary>OAuth Authentication</summary>
|
||||
|
||||
When using OpenHands Cloud, the GitHub OAuth flow requests the following permissions:
|
||||
- Repository access (read/write)
|
||||
- Workflow management
|
||||
- Organization read access
|
||||
|
||||
2. Authentication steps:
|
||||
- Click "Sign in with GitHub" when prompted.
|
||||
To authenticate OpenHands:
|
||||
- Click `Sign in with GitHub` when prompted.
|
||||
- Review the requested permissions.
|
||||
- Authorize OpenHands to access your GitHub account.
|
||||
- If using an organization, authorize organization access if prompted.
|
||||
|
||||
#### Troubleshooting
|
||||
|
||||
Common issues and solutions:
|
||||
|
||||
- **Token Not Recognized**:
|
||||
- Ensure the token is properly saved in settings.
|
||||
- Check that the token hasn't expired.
|
||||
- Verify the token has the required scopes.
|
||||
- Try regenerating the token.
|
||||
|
||||
- **Organization Access Denied**:
|
||||
- Check if SSO is required but not enabled.
|
||||
- Verify organization membership.
|
||||
- Contact organization admin if token policies are blocking access.
|
||||
|
||||
- **Verifying Token Works**:
|
||||
- The app will show a green checkmark if the token is valid.
|
||||
- Try accessing a repository to confirm permissions.
|
||||
- Check the browser console for any error messages.
|
||||
- Use the "Test Connection" button in settings if available.
|
||||
</details>
|
||||
|
||||
### Advanced Settings
|
||||
|
||||
1. Toggle `Advanced Options` to access additional settings.
|
||||
1. Inside the Settings page, toggle `Advanced` options to access additional settings.
|
||||
2. Use the `Custom Model` text box to manually enter a model if it's not in the list.
|
||||
3. Specify a `Base URL` if required by your LLM provider.
|
||||
|
||||
### Main Interface
|
||||
|
||||
The main interface consists of several key components:
|
||||
|
||||
- **Chat Window**: The central area where you can view the conversation history with the AI assistant.
|
||||
- **Input Box**: Located at the bottom of the screen, use this to type your messages or commands to the AI.
|
||||
- **Send Button**: Click this to send your message to the AI.
|
||||
- **Settings Button**: A gear icon that opens the settings modal, allowing you to adjust your configuration at any time.
|
||||
- **Workspace Panel**: Displays the files and folders in your workspace, allowing you to navigate and view files, or the agent's past commands or web browsing history.
|
||||
|
||||
### Interacting with the AI
|
||||
|
||||
1. Type your question, request, or task description in the input box.
|
||||
1. Type your prompt in the input box.
|
||||
2. Click the send button or press Enter to submit your message.
|
||||
3. The AI will process your input and provide a response in the chat window.
|
||||
4. You can continue the conversation by asking follow-up questions or providing additional information.
|
||||
|
||||
@@ -32,7 +32,7 @@ To run OpenHands in Headless mode with Docker:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -43,7 +43,7 @@ docker run -it \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -6,11 +6,14 @@
|
||||
- Linux
|
||||
- Windows with [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) and [Docker Desktop support](https://docs.docker.com/desktop/setup/install/windows-install/#system-requirements)
|
||||
|
||||
A system with a modern processor and a minimum of **4GB RAM** is recommended to run OpenHands.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
<details>
|
||||
<summary>MacOS</summary>
|
||||
### Docker Desktop
|
||||
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Mac](https://docs.docker.com/desktop/setup/install/mac-install).
|
||||
2. Open Docker Desktop, go to `Settings > Advanced` and ensure `Allow the default Docker socket to be used` is enabled.
|
||||
@@ -23,7 +26,7 @@
|
||||
Tested with Ubuntu 22.04.
|
||||
:::
|
||||
|
||||
### Docker Desktop
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Linux](https://docs.docker.com/desktop/setup/install/linux/).
|
||||
|
||||
@@ -31,18 +34,23 @@
|
||||
|
||||
<details>
|
||||
<summary>Windows</summary>
|
||||
### WSL
|
||||
|
||||
**WSL**
|
||||
|
||||
1. [Install WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
|
||||
2. Run `wsl --version` in powershell and confirm `Default Version: 2`.
|
||||
|
||||
### Docker Desktop
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Windows](https://docs.docker.com/desktop/setup/install/windows-install).
|
||||
2. Open Docker Desktop, go to `Settings` and confirm the following:
|
||||
- General: `Use the WSL 2 based engine` is enabled.
|
||||
- Resources > WSL Integration: `Enable integration with my default WSL distro` is enabled.
|
||||
|
||||
:::note
|
||||
The docker command below to start the app must be run inside the WSL terminal.
|
||||
:::
|
||||
|
||||
</details>
|
||||
|
||||
## Start the App
|
||||
@@ -50,17 +58,17 @@
|
||||
The easiest way to run OpenHands is in Docker.
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands-state:/.openhands-state \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.21
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.26
|
||||
```
|
||||
|
||||
You'll find OpenHands running at http://localhost:3000!
|
||||
@@ -72,24 +80,22 @@ or run it on tagged issues with [a github action](https://docs.all-hands.dev/mod
|
||||
|
||||
## Setup
|
||||
|
||||
Upon launching OpenHands, you'll see a settings modal. You **must** select an `LLM Provider` and `LLM Model` and enter a corresponding `API Key`.
|
||||
Upon launching OpenHands, you'll see a Settings page. You **must** select an `LLM Provider` and `LLM Model` and enter a corresponding `API Key`.
|
||||
These can be changed at any time by selecting the `Settings` button (gear icon) in the UI.
|
||||
|
||||
If the required `LLM Model` does not exist in the list, you can toggle `Advanced Options` and manually enter it with the correct prefix
|
||||
If the required model does not exist in the list, you can toggle `Advanced` options and manually enter it with the correct prefix
|
||||
in the `Custom Model` text box.
|
||||
The `Advanced Options` also allow you to specify a `Base URL` if required.
|
||||
The `Advanced` options also allow you to specify a `Base URL` if required.
|
||||
|
||||
<div style={{ display: 'flex', justifyContent: 'center', gap: '20px' }}>
|
||||
<img src="/img/settings-screenshot.png" alt="settings-modal" width="340" />
|
||||
<img src="/img/settings-advanced.png" alt="settings-modal" width="335" />
|
||||
</div>
|
||||
Now you're ready to [get started with OpenHands](./getting-started).
|
||||
|
||||
## Versions
|
||||
|
||||
The command above pulls the most recent stable release of OpenHands. You have other options as well:
|
||||
- For a specific release, use `docker.all-hands.dev/all-hands-ai/openhands:$VERSION`, replacing $VERSION with the version number.
|
||||
- We use semver, and release major, minor, and patch tags. So `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release.
|
||||
- For the most up-to-date development version, you can use `docker.all-hands.dev/all-hands-ai/openhands:main`. This version is unstable and is recommended for testing or development purposes only.
|
||||
The [docker command above](./installation#start-the-app) pulls the most recent stable release of OpenHands. You have other options as well:
|
||||
- For a specific release, replace $VERSION in `openhands:$VERSION` and `runtime:$VERSION`, with the version number.
|
||||
We use SemVer so `0.9` will automatically point to the latest `0.9.x` release, and `0` will point to the latest `0.x.x` release.
|
||||
- For the most up-to-date development version, replace $VERSION in `openhands:$VERSION` and `runtime:$VERSION`, with `main`.
|
||||
This version is unstable and is recommended for testing or development purposes only.
|
||||
|
||||
You can choose the tag that best suits your needs based on stability requirements and desired features.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ You will need your ChatGPT deployment name which can be found on the deployments
|
||||
<deployment-name> below.
|
||||
:::
|
||||
|
||||
1. Enable `Advanced Options`
|
||||
1. Enable `Advanced` options
|
||||
2. Set the following:
|
||||
- `Custom Model` to azure/<deployment-name>
|
||||
- `Base URL` to your Azure API Base URL (e.g. `https://example-endpoint.openai.azure.com`)
|
||||
|
||||
@@ -10,7 +10,7 @@ OpenHands uses LiteLLM to make calls to Google's chat models. You can find their
|
||||
When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
|
||||
- `LLM Provider` to `Gemini`
|
||||
- `LLM Model` to the model you will be using.
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. gemini/<model-name> like `gemini/gemini-1.5-pro`).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. gemini/<model-name> like `gemini/gemini-1.5-pro`).
|
||||
- `API Key` to your Gemini API key
|
||||
|
||||
## VertexAI - Google Cloud Platform Configs
|
||||
@@ -27,4 +27,4 @@ VERTEXAI_LOCATION="<your-gcp-location>"
|
||||
Then set the following in the OpenHands UI through the Settings:
|
||||
- `LLM Provider` to `VertexAI`
|
||||
- `LLM Model` to the model you will be using.
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. vertex_ai/<model-name>).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. vertex_ai/<model-name>).
|
||||
|
||||
@@ -8,7 +8,7 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
- `LLM Provider` to `Groq`
|
||||
- `LLM Model` to the model you will be using. [Visit here to see the list of
|
||||
models that Groq hosts](https://console.groq.com/docs/models). If the model is not in the list, toggle
|
||||
`Advanced Options`, and enter it in `Custom Model` (e.g. groq/<model-name> like `groq/llama3-70b-8192`).
|
||||
`Advanced` options, and enter it in `Custom Model` (e.g. groq/<model-name> like `groq/llama3-70b-8192`).
|
||||
- `API key` to your Groq API key. To find or create your Groq API Key, [see here](https://console.groq.com/keys).
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ models that Groq hosts](https://console.groq.com/docs/models). If the model is n
|
||||
|
||||
The Groq endpoint for chat completion is [mostly OpenAI-compatible](https://console.groq.com/docs/openai). Therefore, you can access Groq models as you
|
||||
would access any OpenAI-compatible endpoint. In the OpenHands UI through the Settings:
|
||||
1. Enable `Advanced Options`
|
||||
1. Enable `Advanced` options
|
||||
2. Set the following:
|
||||
- `Custom Model` to the prefix `openai/` + the model you will be using (e.g. `openai/llama3-70b-8192`)
|
||||
- `Base URL` to `https://api.groq.com/openai/v1`
|
||||
|
||||
@@ -8,7 +8,7 @@ To use LiteLLM proxy with OpenHands, you need to:
|
||||
|
||||
1. Set up a LiteLLM proxy server (see [LiteLLM documentation](https://docs.litellm.ai/docs/proxy/quick_start))
|
||||
2. When running OpenHands, you'll need to set the following in the OpenHands UI through the Settings:
|
||||
* Enable `Advanced Options`
|
||||
* Enable `Advanced` options
|
||||
* `Custom Model` to the prefix `litellm_proxy/` + the model you will be using (e.g. `litellm_proxy/anthropic.claude-3-5-sonnet-20241022-v2:0`)
|
||||
* `Base URL` to your LiteLLM proxy URL (e.g. `https://your-litellm-proxy.com`)
|
||||
* `API Key` to your LiteLLM proxy API key
|
||||
|
||||
@@ -38,7 +38,7 @@ The following can be set in the OpenHands UI through the Settings:
|
||||
- `LLM Provider`
|
||||
- `LLM Model`
|
||||
- `API Key`
|
||||
- `Base URL` (through `Advanced Settings`)
|
||||
- `Base URL` (through `Advanced` settings)
|
||||
|
||||
There are some settings that may be necessary for some LLMs/providers that cannot be set through the UI. Instead, these
|
||||
can be set through environment variables passed to the [docker run command](/modules/usage/installation#start-the-app)
|
||||
@@ -63,22 +63,22 @@ We have a few guides for running OpenHands with specific model providers:
|
||||
### API retries and rate limits
|
||||
|
||||
LLM providers typically have rate limits, sometimes very low, and may require retries. OpenHands will automatically
|
||||
retry requests if it receives a Rate Limit Error (429 error code), API connection error, or other transient errors.
|
||||
retry requests if it receives a Rate Limit Error (429 error code).
|
||||
|
||||
You can customize these options as you need for the provider you're using. Check their documentation, and set the
|
||||
following environment variables to control the number of retries and the time between retries:
|
||||
|
||||
- `LLM_NUM_RETRIES` (Default of 8)
|
||||
- `LLM_RETRY_MIN_WAIT` (Default of 15 seconds)
|
||||
- `LLM_RETRY_MAX_WAIT` (Default of 120 seconds)
|
||||
- `LLM_NUM_RETRIES` (Default of 4 times)
|
||||
- `LLM_RETRY_MIN_WAIT` (Default of 5 seconds)
|
||||
- `LLM_RETRY_MAX_WAIT` (Default of 30 seconds)
|
||||
- `LLM_RETRY_MULTIPLIER` (Default of 2)
|
||||
|
||||
If you are running OpenHands in development mode, you can also set these options in the `config.toml` file:
|
||||
|
||||
```toml
|
||||
[llm]
|
||||
num_retries = 8
|
||||
retry_min_wait = 15
|
||||
retry_max_wait = 120
|
||||
num_retries = 4
|
||||
retry_min_wait = 5
|
||||
retry_max_wait = 30
|
||||
retry_multiplier = 2
|
||||
```
|
||||
|
||||
@@ -8,7 +8,7 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
* `LLM Provider` to `OpenAI`
|
||||
* `LLM Model` to the model you will be using.
|
||||
[Visit here to see a full list of OpenAI models that LiteLLM supports.](https://docs.litellm.ai/docs/providers/openai#openai-chat-completion-models)
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. openai/<model-name> like `openai/gpt-4o`).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. openai/<model-name> like `openai/gpt-4o`).
|
||||
* `API Key` to your OpenAI API key. To find or create your OpenAI Project API Key, [see here](https://platform.openai.com/api-keys).
|
||||
|
||||
## Using OpenAI-Compatible Endpoints
|
||||
@@ -18,7 +18,7 @@ Just as for OpenAI Chat completions, we use LiteLLM for OpenAI-compatible endpoi
|
||||
## Using an OpenAI Proxy
|
||||
|
||||
If you're using an OpenAI proxy, in the OpenHands UI through the Settings:
|
||||
1. Enable `Advanced Options`
|
||||
1. Enable `Advanced` options
|
||||
2. Set the following:
|
||||
- `Custom Model` to openai/<model-name> (e.g. `openai/gpt-4o` or openai/<proxy-prefix>/<model-name>)
|
||||
- `Base URL` to the URL of your OpenAI proxy
|
||||
|
||||
@@ -8,5 +8,5 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
* `LLM Provider` to `OpenRouter`
|
||||
* `LLM Model` to the model you will be using.
|
||||
[Visit here to see a full list of OpenRouter models](https://openrouter.ai/models).
|
||||
If the model is not in the list, toggle `Advanced Options`, and enter it in `Custom Model` (e.g. openrouter/<model-name> like `openrouter/anthropic/claude-3.5-sonnet`).
|
||||
If the model is not in the list, toggle `Advanced` options, and enter it in `Custom Model` (e.g. openrouter/<model-name> like `openrouter/anthropic/claude-3.5-sonnet`).
|
||||
* `API Key` to your OpenRouter API key.
|
||||
|
||||
@@ -16,7 +16,7 @@ some flags being passed to `docker run` that make this possible:
|
||||
|
||||
```
|
||||
docker run # ...
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.21-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.26-nikolaik \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -42,7 +42,7 @@ const sidebars: SidebarsConfig = {
|
||||
id: 'usage/prompting/microagents-public',
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -66,9 +66,26 @@ const sidebars: SidebarsConfig = {
|
||||
},
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Github Actions',
|
||||
label: 'Github Action',
|
||||
id: 'usage/how-to/github-action',
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Cloud',
|
||||
items: [
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Openhands Cloud',
|
||||
id: 'usage/cloud/openhands-cloud',
|
||||
},
|
||||
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Cloud GitHub Resolver',
|
||||
id: 'usage/cloud/cloud-github-resolver',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -185,7 +202,7 @@ const sidebars: SidebarsConfig = {
|
||||
type: 'doc',
|
||||
label: 'About',
|
||||
id: 'usage/about',
|
||||
}
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
@@ -23,6 +23,17 @@ export default function Home(): JSX.Element {
|
||||
})}
|
||||
>
|
||||
<HomepageHeader />
|
||||
<div style={{ textAlign: 'center', padding: '2rem' }}>
|
||||
<br />
|
||||
<h2>Most Popular Links</h2>
|
||||
<ul style={{ listStyleType: 'none'}}>
|
||||
<li><a href="/modules/usage/Installation">How to Run OpenHands</a></li>
|
||||
<li><a href="/modules/usage/prompting/microagents-repo">Customizing OpenHands to a repository</a></li>
|
||||
<li><a href="/modules/usage/how-to/github-action">Integrating OpenHands with Github</a></li>
|
||||
<li><a href="/modules/usage/llms#model-recommendations">Recommended models to use</a></li>
|
||||
<li><a href="/modules/usage/runtimes#connecting-to-your-filesystem">Connecting OpenHands to your filesystem</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
BIN
docs/static/img/settings-advanced.png
vendored
BIN
docs/static/img/settings-advanced.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 28 KiB |
BIN
docs/static/img/settings-screenshot.png
vendored
BIN
docs/static/img/settings-screenshot.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 33 KiB |
@@ -20,6 +20,8 @@ To evaluate an agent, you can provide the agent's name to the `run_infer.py` pro
|
||||
### Evaluating Different LLMs
|
||||
|
||||
OpenHands in development mode uses `config.toml` to keep track of most configuration.
|
||||
**IMPORTANT: For evaluation, only the LLM section in `config.toml` will be used. Other configurations, such as `save_trajectory_path`, are not applied during evaluation.**
|
||||
|
||||
Here's an example configuration file you can use to define and use multiple LLMs:
|
||||
|
||||
```toml
|
||||
@@ -40,6 +42,8 @@ api_key = "XXX"
|
||||
temperature = 0.0
|
||||
```
|
||||
|
||||
For other configurations specific to evaluation, such as `save_trajectory_path`, these are typically set in the `get_config` function of the respective `run_infer.py` file for each benchmark.
|
||||
|
||||
## Supported Benchmarks
|
||||
|
||||
The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), [miscellaneous assistance](#misc-assistance), and [real-world](#real-world) tasks.
|
||||
|
||||
@@ -9,6 +9,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -17,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -60,16 +60,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=False,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -17,6 +17,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -40,20 +40,15 @@ from openhands.utils.async_utils import call_async_from_sync
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-slim'
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-slim',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -24,7 +25,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
load_from_toml,
|
||||
parse_arguments,
|
||||
@@ -47,21 +47,14 @@ SKIP_NUM = (
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.11-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.11-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
timeout=100,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -22,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -57,17 +57,15 @@ def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
BIOCODER_BENCH_CONTAINER_IMAGE = 'public.ecr.aws/i5g0m1f6/eval_biocoder:v1.0'
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = BIOCODER_BENCH_CONTAINER_IMAGE
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -17,6 +17,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -71,16 +71,15 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -10,6 +10,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -18,7 +19,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -36,16 +36,14 @@ def get_config(
|
||||
assert (
|
||||
metadata.max_iterations == 1
|
||||
), 'max_iterations must be 1 for browsing delegation evaluation.'
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=False,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -105,9 +105,7 @@ def get_config(
|
||||
instance: pd.Series,
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
# COMMIT0_CONTAINER_IMAGE = 'wentingzhao/'
|
||||
assert USE_INSTANCE_IMAGE
|
||||
# We use a different instance image for the each instance of commit0 eval
|
||||
repo_name = instance['repo'].split('/')[1]
|
||||
base_container_image = get_instance_docker_image(repo_name)
|
||||
logger.info(
|
||||
@@ -115,27 +113,16 @@ def get_config(
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
# else:
|
||||
# raise
|
||||
# base_container_image = SWE_BENCH_CONTAINER_IMAGE
|
||||
# logger.info(f'Using swe-bench container image: {base_container_image}')
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -16,6 +16,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -25,7 +26,6 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -62,16 +62,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -13,6 +13,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -21,10 +22,10 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.config.utils import get_agent_config_arg
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.core.main import create_runtime, run_controller
|
||||
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
||||
@@ -47,23 +48,25 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
if metadata.agent_config:
|
||||
config.set_agent_config(metadata.agent_config, metadata.agent_class)
|
||||
else:
|
||||
logger.info('Agent config not provided, using default settings')
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
return config
|
||||
|
||||
|
||||
@@ -237,6 +240,10 @@ if __name__ == '__main__':
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
agent_config = None
|
||||
if args.agent_config:
|
||||
agent_config = get_agent_config_arg(args.agent_config)
|
||||
|
||||
llm_config = None
|
||||
if args.llm_config:
|
||||
llm_config = get_llm_config_arg(args.llm_config)
|
||||
@@ -255,6 +262,7 @@ if __name__ == '__main__':
|
||||
eval_output_dir=args.eval_output_dir,
|
||||
data_split=args.data_split,
|
||||
details={'gaia-level': args.level},
|
||||
agent_config=agent_config,
|
||||
)
|
||||
|
||||
dataset = load_dataset('gaia-benchmark/GAIA', args.level)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
@@ -9,6 +9,7 @@ AGENT=$3
|
||||
EVAL_LIMIT=$4
|
||||
LEVELS=$5
|
||||
NUM_WORKERS=$6
|
||||
AGENT_CONFIG=$7
|
||||
|
||||
if [ -z "$NUM_WORKERS" ]; then
|
||||
NUM_WORKERS=1
|
||||
@@ -49,5 +50,9 @@ if [ -n "$EVAL_LIMIT" ]; then
|
||||
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
|
||||
fi
|
||||
|
||||
if [ -n "$AGENT_CONFIG" ]; then
|
||||
echo "AGENT_CONFIG: $AGENT_CONFIG"
|
||||
COMMAND="$COMMAND --agent-config $AGENT_CONFIG"
|
||||
|
||||
# Run the command
|
||||
eval $COMMAND
|
||||
|
||||
@@ -11,6 +11,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -19,7 +20,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -40,16 +40,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -29,6 +29,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -37,7 +38,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -61,16 +61,14 @@ ACTION_FORMAT = """
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -22,6 +22,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -30,7 +31,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -82,16 +82,14 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -9,6 +9,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -17,7 +18,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -45,17 +45,18 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-logic-reasoning:v1.0'
|
||||
sandbox_config.runtime_extra_deps = (
|
||||
'$OH_INTERPRETER_PATH -m pip install scitools-pyke'
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
runtime_extra_deps='$OH_INTERPRETER_PATH -m pip install scitools-pyke',
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -12,6 +12,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -21,7 +22,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
parse_arguments,
|
||||
)
|
||||
@@ -55,22 +55,14 @@ def get_config(
|
||||
metadata: EvalMetadata,
|
||||
env_id: str,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-miniwob:v1.0'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
browsergym_eval_env=env_id,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
remote_runtime_init_timeout=1800,
|
||||
keep_runtime_alive=False,
|
||||
timeout=120,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -14,6 +14,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -22,7 +23,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -103,17 +103,18 @@ def load_incontext_example(task_name: str, with_tool: bool = True):
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-mint:v1.0'
|
||||
sandbox_config.runtime_extra_deps = (
|
||||
f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='xingyaoww/od-eval-mint:v1.0',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
runtime_extra_deps=f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -25,6 +25,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -33,7 +34,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
load_app_config,
|
||||
@@ -77,16 +77,14 @@ ID2CONDA = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'public.ecr.aws/i5g0m1f6/ml-bench'
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='public.ecr.aws/i5g0m1f6/ml-bench',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Step 1: Stop all running containers
|
||||
echo "Stopping all running containers..."
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
RESULT_FILE=$1
|
||||
MODEL_CONFIG=$2
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -11,6 +11,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -20,7 +21,6 @@ from evaluation.utils.shared import (
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -59,21 +59,17 @@ def get_config(
|
||||
metadata: EvalMetadata,
|
||||
instance_id: str,
|
||||
) -> AppConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = (
|
||||
'docker.io/xingyaoww/openhands-eval-scienceagentbench'
|
||||
)
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_budget_per_task=4,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image='docker.io/xingyaoww/openhands-eval-scienceagentbench',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
timeout=300,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eo pipefail
|
||||
|
||||
source "evaluation/utils/version_control.sh"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from functools import partial
|
||||
@@ -21,13 +22,14 @@ from evaluation.benchmarks.swe_bench.run_infer import get_instance_docker_image
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
LLMConfig,
|
||||
get_parser,
|
||||
)
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
@@ -79,22 +81,16 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> AppConfig:
|
||||
f'Please make sure this image exists. '
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = AppConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=600,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_resource_factor=get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
),
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
@@ -412,6 +408,21 @@ if __name__ == '__main__':
|
||||
with open(metadata_filepath, 'r') as metadata_file:
|
||||
data = metadata_file.read()
|
||||
metadata = EvalMetadata.model_validate_json(data)
|
||||
else:
|
||||
# Initialize with a dummy metadata when file doesn't exist
|
||||
metadata = EvalMetadata(
|
||||
agent_class='dummy_agent', # Placeholder agent class
|
||||
llm_config=LLMConfig(model='dummy_model'), # Minimal LLM config
|
||||
max_iterations=1, # Minimal iterations
|
||||
eval_output_dir=os.path.dirname(
|
||||
args.input_file
|
||||
), # Use input file dir as output dir
|
||||
start_time=time.strftime('%Y-%m-%d %H:%M:%S'), # Current time
|
||||
git_commit=subprocess.check_output(['git', 'rev-parse', 'HEAD'])
|
||||
.decode('utf-8')
|
||||
.strip(), # Current commit
|
||||
dataset=args.dataset, # Dataset name from args
|
||||
)
|
||||
|
||||
# The evaluation harness constrains the signature of `process_instance_func` but we need to
|
||||
# pass extra information. Build a new function object to avoid issues with multiprocessing.
|
||||
|
||||
@@ -7,6 +7,7 @@ This file tracks the resource requirements of different instances.
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
{"pydata__xarray-6721": 8, "pytest-dev__pytest-7236": 8, "matplotlib__matplotlib-24627": 4, "django__django-15561": 4, "django__django-15098": 4, "django__django-14771": 4, "sympy__sympy-21612": 4, "sympy__sympy-15345": 4, "psf__requests-5414": 4, "astropy__astropy-14508": 2, "django__django-11451": 2, "django__django-11477": 2, "django__django-10880": 2, "django__django-11163": 2, "django__django-11815": 2, "astropy__astropy-14369": 2, "django__django-10097": 2, "django__django-10554": 2, "django__django-12304": 2, "django__django-12325": 2, "django__django-11551": 2, "django__django-11734": 2, "django__django-13109": 2, "django__django-13089": 2, "django__django-13343": 2, "django__django-13363": 2, "django__django-13809": 2, "django__django-13810": 2, "django__django-13786": 2, "django__django-13807": 2, "django__django-14493": 2, "django__django-11820": 2, "django__django-11951": 2, "django__django-11964": 2, "astropy__astropy-14309": 2, "astropy__astropy-14365": 2, "astropy__astropy-12907": 2, "astropy__astropy-14182": 2, "django__django-15161": 2, "django__django-15128": 2, "django__django-14999": 2, "django__django-14915": 2, "django__django-14752": 2, "django__django-14765": 2, "django__django-14089": 2, "django__django-15252": 2, "django__django-15380": 2, "django__django-15382": 2, "django__django-15499": 2, "django__django-15467": 2, "django__django-15280": 2, "django__django-15315": 2, "django__django-15277": 2, "django__django-15268": 2, "django__django-15629": 2, "django__django-15695": 2, "django__django-15732": 2, "django__django-15863": 2, "django__django-16082": 2, "django__django-16145": 2, "django__django-16256": 2, "django__django-16429": 2, "django__django-16454": 2, "django__django-16493": 2, "matplotlib__matplotlib-13989": 2, "matplotlib__matplotlib-20488": 2, "django__django-15503": 2, "django__django-15525": 2, "django__django-15375": 2, "django__django-15278": 2, "matplotlib__matplotlib-21568": 2, "matplotlib__matplotlib-20859": 2, "matplotlib__matplotlib-20826": 2, "matplotlib__matplotlib-20676": 2, "matplotlib__matplotlib-23412": 2, "matplotlib__matplotlib-22719": 2, "matplotlib__matplotlib-23299": 2, "matplotlib__matplotlib-22865": 2, "matplotlib__matplotlib-24149": 2, "matplotlib__matplotlib-24177": 2, "matplotlib__matplotlib-24570": 2, "matplotlib__matplotlib-24637": 2, "matplotlib__matplotlib-24970": 2, "matplotlib__matplotlib-23476": 2, "matplotlib__matplotlib-24026": 2, "matplotlib__matplotlib-23314": 2, "matplotlib__matplotlib-25332": 2, "matplotlib__matplotlib-25311": 2, "matplotlib__matplotlib-25122": 2, "matplotlib__matplotlib-25479": 2, "matplotlib__matplotlib-26342": 2, "psf__requests-2317": 2, "matplotlib__matplotlib-25960": 2, "matplotlib__matplotlib-25775": 2, "pydata__xarray-4356": 2, "pydata__xarray-4075": 2, "pydata__xarray-6461": 2, "pydata__xarray-4687": 2, "pydata__xarray-6599": 2, "pylint-dev__pylint-4661": 2, "django__django-15554": 2, "django__django-15563": 2, "pytest-dev__pytest-5262": 2, "pytest-dev__pytest-10081": 2, "scikit-learn__scikit-learn-12973": 2, "scikit-learn__scikit-learn-13124": 2, "scikit-learn__scikit-learn-13779": 2, "scikit-learn__scikit-learn-14141": 2, "scikit-learn__scikit-learn-13439": 2, "scikit-learn__scikit-learn-13496": 2, "scikit-learn__scikit-learn-15100": 2, "scikit-learn__scikit-learn-25102": 2, "scikit-learn__scikit-learn-25232": 2, "scikit-learn__scikit-learn-25747": 2, "scikit-learn__scikit-learn-26323": 2, "scikit-learn__scikit-learn-9288": 2, "scikit-learn__scikit-learn-14496": 2, "scikit-learn__scikit-learn-14629": 2, "sphinx-doc__sphinx-8265": 2, "sphinx-doc__sphinx-8548": 2, "sphinx-doc__sphinx-8593": 2, "sphinx-doc__sphinx-8595": 2, "sphinx-doc__sphinx-8621": 2, "sphinx-doc__sphinx-8638": 2, "sphinx-doc__sphinx-9229": 2, "sphinx-doc__sphinx-9281": 2, "sphinx-doc__sphinx-9461": 2, "sphinx-doc__sphinx-9591": 2, "sphinx-doc__sphinx-9658": 2, "sphinx-doc__sphinx-9673": 2, "sympy__sympy-12096": 2, "sympy__sympy-12481": 2, "sphinx-doc__sphinx-10323": 2, "sphinx-doc__sphinx-7590": 2, "sympy__sympy-13877": 2, "sympy__sympy-12489": 2, "sympy__sympy-15809": 2, "sympy__sympy-14711": 2, "sympy__sympy-16597": 2, "sympy__sympy-16766": 2, "sympy__sympy-16792": 2, "sympy__sympy-15875": 2, "sympy__sympy-17655": 2, "sympy__sympy-18189": 2, "sympy__sympy-18763": 2, "sympy__sympy-19040": 2, "sympy__sympy-19495": 2, "sympy__sympy-19637": 2, "sympy__sympy-19783": 2, "sympy__sympy-17630": 2, "sympy__sympy-20428": 2, "sympy__sympy-20590": 2, "sympy__sympy-20801": 2, "sympy__sympy-21379": 2, "sympy__sympy-21847": 2, "sympy__sympy-22456": 2, "sympy__sympy-22714": 2, "sympy__sympy-22914": 2, "sympy__sympy-23262": 2, "sympy__sympy-23413": 2, "sympy__sympy-23534": 2, "sympy__sympy-24066": 2, "sympy__sympy-24213": 2, "sympy__sympy-24443": 2, "sympy__sympy-24562": 2, "sympy__sympy-24661": 2}
|
||||
@@ -18,6 +18,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
@@ -30,7 +31,6 @@ from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
AgentConfig,
|
||||
AppConfig,
|
||||
SandboxConfig,
|
||||
get_llm_config_arg,
|
||||
get_parser,
|
||||
)
|
||||
@@ -122,28 +122,23 @@ def get_config(
|
||||
base_container_image = SWE_BENCH_CONTAINER_IMAGE
|
||||
logger.info(f'Using swe-bench container image: {base_container_image}')
|
||||
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
sandbox_config.enable_auto_lint = True
|
||||
sandbox_config.use_host_network = False
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
sandbox_config.remote_runtime_resource_factor = get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = AppConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_resource_factor=get_instance_resource_factor(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
),
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
@@ -329,6 +324,22 @@ def complete_runtime(
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
if obs.exit_code == -1:
|
||||
# The previous command is still running
|
||||
# We need to kill previous command
|
||||
logger.info('The previous command is still running, trying to kill it...')
|
||||
action = CmdRunAction(command='C-c')
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
# Then run the command again
|
||||
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
|
||||
action.set_hard_timeout(600)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert_and_raise(
|
||||
isinstance(obs, CmdOutputObservation) and obs.exit_code == 0,
|
||||
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
LEVEL=$1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is ONLY used for pushing docker images created by https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
FOLDER_PATH=$1
|
||||
NEW_FOLDER_PATH=${FOLDER_PATH}.swebench_submission
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user