Compare commits

..

92 Commits

Author SHA1 Message Date
Robert Brennan
c0d7f6d56b random changes to agent 2024-10-28 16:19:15 -04:00
Robert Brennan
fdb385ab93 Simplify makefile (#4591) 2024-10-28 13:10:32 -04:00
sp.wack
13d101e092 fix(frontend): Record events sent to WS (#4596) 2024-10-28 15:53:31 +00:00
sp.wack
6cf3728247 test(frontend): Test, refactor, and improve the chat interface (#4549) 2024-10-28 17:26:28 +04:00
sp.wack
ae188458ef chore(frontend): Remove root level package.json (#4590) 2024-10-28 16:42:17 +04:00
Robert Brennan
a20da54e3a Remove verbose log from agent controller (#4585) 2024-10-27 15:50:23 +00:00
Mahmoud Sehsah
2a6740f4ba fix(builder): Build the runtime with docker version that contains (-) in the version name (#4580) 2024-10-27 02:54:52 +01:00
Ryan H. Tran
5ba7bc6be1 Mention build-essential dependency for ubuntu in dev doc (#4511) 2024-10-26 20:17:43 -05:00
Xingyao Wang
98d4884ced fix(controller): stop when run into loop (#4579) 2024-10-26 19:40:58 -05:00
Xingyao Wang
be3cbb045e fix(controllor): make agent controller stops when encounter fatal observation (#4573) 2024-10-26 13:28:27 -05:00
dependabot[bot]
8bfd2fcf4f chore(deps): bump the version-all group across 1 directory with 8 updates (#4564)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-25 20:07:16 +02:00
tofarr
d4e3982a6b Small refactor : EventStream as a dataclass (#4557) 2024-10-25 17:31:20 +00:00
Xingyao Wang
1f23dc89b6 fix(eval): add runtime.connect to all eval harness (#4565) 2024-10-26 00:41:30 +08:00
Xingyao Wang
7340b78962 feat(eval): rewrite log_completions to save completions to directory (#4566) 2024-10-25 16:36:11 +00:00
tofarr
c3da25febc Fix for docker leak (#4560) 2024-10-25 15:53:39 +00:00
Robert Brennan
8d2b2d4318 Refactor runtime to add a connect method (#4410)
Co-authored-by: Tim O'Farrell <tofarr@gmail.com>
2024-10-25 09:02:19 -04:00
tofarr
c4f5c07be1 Refactor: shorter syntax (#4558) 2024-10-25 06:45:28 -06:00
Xingyao Wang
349e2dbe50 refactor: move bash related logic into BashSession for cleaner code (#4527)
Co-authored-by: Tim O'Farrell <tofarr@gmail.com>
2024-10-25 20:44:25 +08:00
Xingyao Wang
dcd4b04f57 feat(llm): update prompt caching list to include new sonnet (#4552) 2024-10-25 20:36:35 +08:00
sp.wack
78eacc4489 fix(frontend): Fix loader checking unset config variable in window (#4546)
Co-authored-by: Robert Brennan <accounts@rbren.io>
2024-10-25 08:14:40 -04:00
tofarr
60990c128a Feature: Minor refactor of SessionManager to make it a dataclass (#4553) 2024-10-24 14:32:05 -06:00
Robert Brennan
c4c25ea229 Minor fixes for GitHub credential exchange (#4554) 2024-10-24 16:29:03 -04:00
tofarr
930726f4e8 Fix for issue where we hammer docker needlessly (#4551) 2024-10-24 20:03:35 +00:00
tofarr
ee2c2ff2b8 Feat changed "is_confirmed" to "confirmation_state" (#4508) 2024-10-24 13:35:14 -06:00
Robert Brennan
8c064fe3df add catch all route, disable caching (#4547) 2024-10-24 15:06:17 -04:00
sp.wack
e878741ae7 test(frontend): Test, refactor, and improve the chat input (#4535) 2024-10-24 18:19:41 +04:00
tofarr
90e2bf4883 Split bash commands by the new line character (#4462) 2024-10-24 07:44:38 -06:00
dependabot[bot]
615b94cf2f chore(deps): bump the version-all group across 1 directory with 19 updates (#4531)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-23 21:28:02 +02:00
Graham Neubig
ce2430180f Update README.md to fix miniwob name (#4534) 2024-10-23 18:24:43 +00:00
Xingyao Wang
eaea94cc1b fix(remote runtime): retry on 429 error on remote build & log retries (#4532) 2024-10-24 02:07:11 +08:00
sp.wack
385cc8f512 [ALL-561] feat(frontend|backend): Display error messages in the chat (#4509) 2024-10-23 18:56:00 +04:00
Xingyao Wang
2d5b360505 refactor: re-organize different runtime implementations into an impl folder (#4346)
Co-authored-by: Graham Neubig <neubig@gmail.com>
2024-10-23 10:10:03 +00:00
mamoodi
9b6fd239d0 Release 0.11.0 (#4523) 2024-10-22 16:43:13 -04:00
sp.wack
dd15845b91 [ALL-570] fix(frontend): Don't wrap filenames in the file explorer (#4521) 2024-10-22 23:31:42 +04:00
sp.wack
64adb64fef [ALL-597] fix(frontend): Fetch config.json locally (#4522) 2024-10-22 23:31:29 +04:00
Yashwanth S C
6573304014 fix(frontend): Error when API key is not entered is not clear (#4429) 2024-10-22 22:23:09 +04:00
sp.wack
29ddcdaf46 [ALL-469] fix(frontend): Indicate that import projects require zips (#4515) 2024-10-22 22:15:08 +04:00
mamoodi
d0bbad8eda Remove settings base container as it is not supported (#4520) 2024-10-22 18:14:59 +00:00
sp.wack
7b81df2a94 [ALL-596] fix(frontend): Fix import project from sending request before runtime is active (#4513) 2024-10-22 18:04:49 +00:00
mamoodi
550044454c Revert docker install in OpenHands app image (#4519) 2024-10-22 13:46:19 -04:00
sp.wack
3927fc3616 [ALL-594] chore(frontend): Add frontend error handling for failed requests (#4501) 2024-10-22 20:05:59 +04:00
sp.wack
864f81bc71 test(frontend): User actions and friends (#4497) 2024-10-22 20:04:07 +04:00
Graham Neubig
54250e3fe2 Update evaluation README.md structure (#4516) 2024-10-22 14:42:22 +00:00
Xingyao Wang
da548d308c [agent] LLM-based editing (#3985)
Co-authored-by: Tim O'Farrell <tofarr@gmail.com>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: Robert Brennan <accounts@rbren.io>
Co-authored-by: Graham Neubig <neubig@gmail.com>
2024-10-22 04:51:44 +08:00
sp.wack
6fe5482b20 [ALL-571] chore(frontend): Move saas-related configs to config.json (#4496) 2024-10-21 14:59:20 +00:00
dependabot[bot]
520586a89c chore(deps): bump @mdx-js/react from 3.0.1 to 3.1.0 in /docs in the version-all group (#4478)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-21 09:21:33 +04:00
Xingyao Wang
263798584e fix(runtime): replace codec error in pexcept (#4493) 2024-10-20 12:51:05 +08:00
Alejandro Cuadron Lafuente
a9a593bb21 [Fix] Added support to specify the platform on which the runtime image should be built. (#4402)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: mamoodi <mamoodiha@gmail.com>
Co-authored-by: tofarr <tofarr@gmail.com>
Co-authored-by: Robert Brennan <contact@rbren.io>
2024-10-20 09:19:05 +08:00
tobitege
6471d0f94d .gitignore: ignore all node_modules folders (#4491) 2024-10-20 09:17:45 +08:00
sp.wack
5cc16cb82a fix(frontend): Fix waitlist logic (#4492) 2024-10-19 14:20:54 -04:00
Robert Brennan
cc68756b26 fix freeze on zip-files endpoint (#4487) 2024-10-18 15:29:07 -04:00
Xingyao Wang
126bf316bc fix(docker): Dockerfile failed to build on RemoteRuntime (#4481)
Co-authored-by: tofarr <tofarr@gmail.com>
2024-10-19 03:28:39 +08:00
Xingyao Wang
91308ba4dc feat: clean-up retries RemoteRuntime & add FatalErrorObservation (#4485) 2024-10-18 17:23:13 +00:00
Graham Neubig
b660aa99b8 Fix issue #4480: '[Bug]: Being blocked by cloudflare results in futile retries (#4482)
Co-authored-by: openhands <openhands@all-hands.dev>
2024-10-18 13:04:34 -04:00
sp.wack
cf793582a7 [ALL-543] feat(frontend): Setup auth route, replace loading spinner, add new route (#4448) 2024-10-18 19:32:46 +04:00
Robert Brennan
56fe905241 reduce dependabot frequency (#4305) 2024-10-18 11:21:15 -04:00
mamoodi
02abf60433 Run flaky mac tests nightly (#4470) 2024-10-18 10:38:40 -04:00
mamoodi
e6a5e39047 Update docs associated with new UI (#4469) 2024-10-18 10:19:56 -04:00
mamoodi
feee509de7 Update leftover versions (#4468) 2024-10-18 09:28:53 -04:00
Robert Brennan
fd6facbf03 update contributing docs (#4438)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
2024-10-18 05:08:54 +02:00
dependabot[bot]
1ea3087eec chore(deps): bump modal from 0.64.182 to 0.64.192 (#4460)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-18 05:02:20 +02:00
dependabot[bot]
2e09b4f95e chore(deps-dev): bump torch from 2.2.2 to 2.5.0 (#4459)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-18 05:01:27 +02:00
mamoodi
d2d55f5ea2 Update custom sandbox doc (#4332) 2024-10-17 18:23:57 -04:00
mamoodi
0e467b1429 Release 0.10.0 (#4463) 2024-10-17 22:23:40 +00:00
Xingyao Wang
ec3152b6e1 linter: only lint on updated lines in the new file (#4409) 2024-10-17 15:57:03 -04:00
sp.wack
642e01b673 fix(frontend): Update build directory and referenced paths (#4461) 2024-10-17 23:24:49 +04:00
sp.wack
6cb174b7d1 [ALL-557] feat(frontend): Add save and discard actions to the editor (#4442)
Co-authored-by: mamoodi <mamoodiha@gmail.com>
2024-10-17 17:14:55 +00:00
Robert Brennan
154854bbe3 run in dev mode in makefile (#4452) 2024-10-17 12:40:47 -04:00
sp.wack
678630c5bd fix(frontend): Catch config fetch error and set default fallback (#4453) 2024-10-17 16:17:44 +00:00
dependabot[bot]
ad800bf373 chore(deps): bump litellm from 1.49.5 to 1.49.6 (#4458)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-17 17:51:45 +02:00
dependabot[bot]
206788a0e8 chore(deps): bump react-syntax-highlighter from 15.5.0 to 15.6.1 in /frontend (#4457)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-17 15:31:07 +00:00
dependabot[bot]
ca3fbb2a80 chore(deps-dev): bump @types/node from 22.7.5 to 22.7.6 in /frontend (#4455)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-17 15:29:23 +00:00
dependabot[bot]
cc500a622a chore(deps-dev): bump @testing-library/jest-dom from 6.5.0 to 6.6.1 in /frontend (#4456)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-17 15:29:01 +00:00
tofarr
5fb3dece93 Feat: Divided docker layer to make it easier to cache (#4313)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2024-10-17 15:08:56 +00:00
sp.wack
83c096b974 [ALL-551] chore(frontend): Retrieve APP_MODE from the server (#4423) 2024-10-17 18:35:21 +04:00
Xingyao Wang
015df47e53 chore: remove integration tests from CI to unblock (#4451) 2024-10-17 14:19:53 +00:00
Jiayi Pan
c1b323a076 Show actual dataset name in swebench log directory (#4417) 2024-10-17 10:32:38 +08:00
Xingyao Wang
84a578ad20 [test] remove integration tests from CI & move them into evaluation (#4447) 2024-10-17 05:38:23 +08:00
dependabot[bot]
8e5db345b2 chore(deps): bump boto3 from 1.35.40 to 1.35.42 (#4445)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-16 22:51:40 +02:00
dependabot[bot]
f61266841c chore(deps): bump browsergym from 0.8.0 to 0.8.1 (#4437)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-16 22:50:39 +02:00
dependabot[bot]
277d991b37 chore(deps): bump fastapi from 0.115.0 to 0.115.2 (#4370)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-16 22:02:26 +02:00
Engel Nyst
20aa66d5e2 Bump Mac version in CI (#4441) 2024-10-16 21:52:21 +02:00
dependabot[bot]
9bc6252967 chore(deps): bump anthropic from 0.36.0 to 0.36.1 (#4436)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-16 21:25:00 +02:00
Alejandro Cuadron Lafuente
bb416009c5 [Fix] Fixed the inputs to the ManagerAgent (#4427)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Co-authored-by: mamoodi <mamoodiha@gmail.com>
Co-authored-by: tofarr <tofarr@gmail.com>
Co-authored-by: Robert Brennan <contact@rbren.io>
2024-10-16 20:47:46 +02:00
Robert Brennan
226ea545fa Add workflow scope to GitHub authentication URL (#4439)
Co-authored-by: openhands <openhands@all-hands.dev>
2024-10-16 14:41:46 -04:00
tofarr
e12bff5189 Fix: Removed flaky test (#4444) 2024-10-16 18:10:27 +00:00
dependabot[bot]
23d3becf1d chore(deps): bump litellm from 1.49.4 to 1.49.5 (#4431)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-16 18:36:39 +02:00
Robert Brennan
be79ccdb39 fix default host (#4413) 2024-10-16 10:56:42 -04:00
sp.wack
2277897f86 feat(frontend): Improve file based routing (#4317) 2024-10-16 18:54:15 +04:00
tofarr
be9619be3a Feat faster unit tests 2 (#4418) 2024-10-16 08:40:53 -06:00
tofarr
cb58dab82b Fix loop graceful shutdown (#4394) 2024-10-16 08:40:33 -06:00
sp.wack
8ab293a667 fix(frontend): Fix request headers (#4422) 2024-10-16 14:22:18 +00:00
459 changed files with 18261 additions and 38570 deletions

View File

@@ -1,21 +1,35 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
open-pull-requests-limit: 20
open-pull-requests-limit: 1
groups:
# put packages in their own group if they have a history of breaking the build or needing to be reverted
pre-commit:
patterns:
- "pre-commit"
llama:
patterns:
- "llama*"
chromadb:
patterns:
- "chromadb"
security-all:
applies-to: "security-updates"
patterns:
- "*"
version-all:
applies-to: "version-updates"
patterns:
- "*"
- package-ecosystem: "npm"
directory: "/frontend"
schedule:
interval: "daily"
open-pull-requests-limit: 20
open-pull-requests-limit: 1
groups:
docusaurus:
patterns:
@@ -23,12 +37,21 @@ updates:
eslint:
patterns:
- "*eslint*"
security-all:
applies-to: "security-updates"
patterns:
- "*"
version-all:
applies-to: "version-updates"
patterns:
- "*"
- package-ecosystem: "npm"
directory: "/docs"
schedule:
interval: "daily"
open-pull-requests-limit: 20
interval: "weekly"
day: "wednesday"
open-pull-requests-limit: 1
groups:
docusaurus:
patterns:
@@ -36,3 +59,11 @@ updates:
eslint:
patterns:
- "*eslint*"
security-all:
applies-to: "security-updates"
patterns:
- "*"
version-all:
applies-to: "version-updates"
patterns:
- "*"

View File

@@ -88,14 +88,6 @@ jobs:
hash_from_app_image=$(cat docker-outputs.txt | grep "Hash for docker build directory" | awk -F "): " '{print $2}' | uniq | head -n1)
echo "hash_from_app_image=$hash_from_app_image" >> $GITHUB_OUTPUT
echo "Hash from app image: $hash_from_app_image"
# This test should move when we have a test suite for the app image
- name: Test docker in App Image
run: |
# Lowercase the repository owner
export REPO_OWNER=${{ github.repository_owner }}
REPO_OWNER=$(echo $REPO_OWNER | tr '[:upper:]' '[:lower:]')
docker run -e SANDBOX_USER_ID=0 -v /var/run/docker.sock:/var/run/docker.sock ghcr.io/${REPO_OWNER}/openhands:${{ env.RELEVANT_SHA }} /bin/bash -c "docker run hello-world"
# Builds the runtime Docker images
ghcr_build_runtime:
@@ -384,78 +376,6 @@ jobs:
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run integration tests with the eventstream runtime Docker image
runtime_integration_tests_on_linux:
name: RT Integration Tests (Linux)
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
strategy:
fail-fast: false
matrix:
base_image: ['nikolaik']
steps:
- uses: actions/checkout@v4
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: false
swap-storage: true
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
# Forked repos can't push to GHCR, so we need to download the image as an artifact
- name: Download runtime image for fork
if: github.event.pull_request.head.repo.fork
uses: actions/download-artifact@v4
with:
name: runtime-${{ matrix.base_image }}
path: /tmp
- name: Load runtime image for fork
if: github.event.pull_request.head.repo.fork
run: |
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: make install-python-dependencies
- name: Run integration tests
run: |
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
TEST_ONLY=true \
./tests/integration/regenerate.sh
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# The two following jobs (named identically) are to check whether all the runtime tests have passed as the
# "All Runtime Tests Passed" is a required job for PRs to merge
# Due to this bug: https://github.com/actions/runner/issues/2566, we want to create a job that runs when the
@@ -464,7 +384,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
needs: [test_runtime_root, test_runtime_oh, verify_hash_equivalence_in_runtime_and_app]
steps:
- name: All tests passed
run: echo "All runtime tests have passed successfully!"
@@ -473,7 +393,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
needs: [test_runtime_root, test_runtime_oh, verify_hash_equivalence_in_runtime_and_app]
steps:
- name: Some tests failed
run: |

96
.github/workflows/py-unit-tests-mac.yml vendored Normal file
View File

@@ -0,0 +1,96 @@
# Workflow that runs python unit tests on mac
name: Run Python Unit Tests Mac
# This job is flaky so only run it nightly
on:
schedule:
- cron: '0 0 * * *'
jobs:
# Run python unit tests on macOS
test-on-macos:
name: Python Unit Tests on macOS
runs-on: macos-14
env:
INSTALL_DOCKER: '1' # Set to '0' to skip Docker installation
strategy:
matrix:
python-version: ['3.12']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation,llama-index
- name: Install & Start Docker
if: env.INSTALL_DOCKER == '1'
run: |
INSTANCE_NAME="colima-${GITHUB_RUN_ID}"
# Uninstall colima to upgrade to the latest version
if brew list colima &>/dev/null; then
brew uninstall colima
# unlinking colima dependency: go
brew uninstall go@1.21
fi
rm -rf ~/.colima ~/.lima
brew install --HEAD colima
brew install docker
start_colima() {
# Find a free port in the range 10000-20000
RANDOM_PORT=$((RANDOM % 10001 + 10000))
# Original line:
if ! colima start --network-address --arch x86_64 --cpu=1 --memory=1 --verbose --ssh-port $RANDOM_PORT; then
echo "Failed to start Colima."
return 1
fi
return 0
}
# Attempt to start Colima for 5 total attempts:
ATTEMPT_LIMIT=5
for ((i=1; i<=ATTEMPT_LIMIT; i++)); do
if start_colima; then
echo "Colima started successfully."
break
else
colima stop -f
sleep 10
colima delete -f
if [ $i -eq $ATTEMPT_LIMIT ]; then
exit 1
fi
sleep 10
fi
done
# For testcontainers to find the Colima socket
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
- name: Build Environment
run: make build
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Run Tests
run: poetry run pytest --forked --cov=openhands --cov-report=xml ./tests/unit --ignore=tests/unit/test_memory.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

View File

@@ -16,94 +16,6 @@ concurrency:
cancel-in-progress: true
jobs:
# Run python unit tests on macOS
test-on-macos:
name: Python Unit Tests on macOS
runs-on: macos-12
env:
INSTALL_DOCKER: '1' # Set to '0' to skip Docker installation
strategy:
matrix:
python-version: ['3.12']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation,llama-index
- name: Install & Start Docker
if: env.INSTALL_DOCKER == '1'
run: |
INSTANCE_NAME="colima-${GITHUB_RUN_ID}"
# Uninstall colima to upgrade to the latest version
if brew list colima &>/dev/null; then
brew uninstall colima
# unlinking colima dependency: go
brew uninstall go@1.21
fi
rm -rf ~/.colima ~/.lima
brew install --HEAD colima
brew install docker
start_colima() {
# Find a free port in the range 10000-20000
RANDOM_PORT=$((RANDOM % 10001 + 10000))
# Original line:
if ! colima start --network-address --arch x86_64 --cpu=1 --memory=1 --verbose --ssh-port $RANDOM_PORT; then
echo "Failed to start Colima."
return 1
fi
return 0
}
# Attempt to start Colima for 5 total attempts:
ATTEMPT_LIMIT=5
for ((i=1; i<=ATTEMPT_LIMIT; i++)); do
if start_colima; then
echo "Colima started successfully."
break
else
colima stop -f
sleep 10
colima delete -f
if [ $i -eq $ATTEMPT_LIMIT ]; then
exit 1
fi
sleep 10
fi
done
# For testcontainers to find the Colima socket
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
- name: Build Environment
run: make build
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Run Tests
run: poetry run pytest --forked --cov=openhands --cov-report=xml ./tests/unit --ignore=tests/unit/test_memory.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run python unit tests on Linux
test-on-linux:
name: Python Unit Tests on Linux

View File

@@ -1,73 +0,0 @@
name: Regenerate Integration Tests
on:
workflow_dispatch:
inputs:
debug:
description: 'Enable debug mode'
type: boolean
default: true
log_to_file:
description: 'Enable logging to file'
type: boolean
default: true
force_regenerate_tests:
description: 'Force regeneration of tests'
type: boolean
default: false
force_use_llm:
description: 'Force use of LLM'
type: boolean
default: false
jobs:
regenerate_integration_tests:
if: github.ref != 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: make install-python-dependencies
- name: Build Environment
run: make build
- name: Regenerate integration tests
run: |
DEBUG=${{ inputs.debug }} \
LOG_TO_FILE=${{ inputs.log_to_file }} \
FORCE_REGENERATE=${{ inputs.force_regenerate_tests }} \
FORCE_USE_LLM=${{ inputs.force_use_llm }} \
./tests/integration/regenerate.sh
- name: Commit changes
run: |
if git diff --quiet --exit-code; then
echo "No changes to commit"
exit 0
fi
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git add .
# run it twice in case pre-commit makes changes
git commit -am "Regenerate integration tests" || git commit -am "Regenerate integration tests"
git push

2
.gitignore vendored
View File

@@ -178,7 +178,6 @@ evaluation/toolqa/data
# frontend
# dependencies
frontend/node_modules
frontend/.pnp
frontend/bun.lockb
frontend/yarn.lock
@@ -228,3 +227,4 @@ runtime_*.tar
containers/runtime/Dockerfile
containers/runtime/project.tar.gz
containers/runtime/code
**/node_modules/

View File

@@ -2,14 +2,6 @@
Thanks for your interest in contributing to OpenHands! We welcome and appreciate contributions.
## How Can I Contribute?
There are many ways that you can contribute:
1. **Download and use** OpenHands, and send [issues](https://github.com/All-Hands-AI/OpenHands/issues) when you encounter something that isn't working or a feature that you'd like to see.
2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents.
3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on.
## Understanding OpenHands's CodeBase
To understand the codebase, please refer to the README in each module:
@@ -19,79 +11,61 @@ To understand the codebase, please refer to the README in each module:
- [agenthub](./openhands/agenthub/README.md)
- [server](./openhands/server/README.md)
## Setting up your development environment
We have a separate doc [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) that tells you how to set up a development workflow.
## How can I contribute?
There are many ways that you can contribute:
1. **Download and use** OpenHands, and send [issues](https://github.com/All-Hands-AI/OpenHands/issues) when you encounter something that isn't working or a feature that you'd like to see.
2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents.
3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on.
## What can I build?
Here are a few ways you can help improve the codebase.
#### UI/UX
We're always looking to improve the look and feel of the application. If you've got a small fix
for something that's bugging you, feel free to open up a PR that changes the `./frontend` directory.
If you're looking to make a bigger change, add a new UI element, or significantly alter the style
of the application, please open an issue first, or better, join the #frontend channel in our Slack
to gather consensus from our design team first.
#### Improving the agent
Our main agent is the CodeAct agent. You can [see its prompts here](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/agenthub/codeact_agent)
Changes to these prompts, and to the underlying behavior in Python, can have a huge impact on user experience.
You can try modifying the prompts to see how they change the behavior of the agent as you use the app
locally, but we will need to do an end-to-end evaluation of any changes here to ensure that the agent
is getting better over time.
We use the [SWE-bench](https://www.swebench.com/) benchmark to test our agent. You can join the #evaluation
channel in Slack to learn more.
#### Adding a new agent
You may want to experiment with building new types of agents. You can add an agent to `openhands/agenthub`
to help expand the capabilities of OpenHands.
#### Adding a new runtime
The agent needs a place to run code and commands. When you run OpenHands on your laptop, it uses a Docker container
to do this by default. But there are other ways of creating a sandbox for the agent.
If you work for a company that provides a cloud-based runtime, you could help us add support for that runtime
by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/runtime.py).
#### Testing
When you write code, it is also good to write tests. Please navigate to the `tests` folder to see existing test suites.
At the moment, we have two kinds of tests: `unit` and `integration`. Please refer to the README for each test suite. These tests also run on GitHub's continuous integration to ensure quality of the project.
## Sending Pull Requests to OpenHands
### 1. Fork the Official Repository
Fork the [OpenHands repository](https://github.com/All-Hands-AI/OpenHands) into your own account.
Clone your own forked repository into your local environment:
You'll need to fork our repository to send us a Pull Request. You can learn more
about how to fork a GitHub repo and open a PR with your changes in [this article](https://medium.com/swlh/forks-and-pull-requests-how-to-contribute-to-github-repos-8843fac34ce8)
```shell
git clone git@github.com:<YOUR-USERNAME>/OpenHands.git
```
### 2. Configure Git
Set the official repository as your [upstream](https://www.atlassian.com/git/tutorials/git-forks-and-upstreams) to synchronize with the latest update in the official repository.
Add the original repository as upstream:
```shell
cd OpenHands
git remote add upstream git@github.com:All-Hands-AI/OpenHands.git
```
Verify that the remote is set:
```shell
git remote -v
```
You should see both `origin` and `upstream` in the output.
### 3. Synchronize with Official Repository
Synchronize latest commit with official repository before coding:
```shell
git fetch upstream
git checkout main
git merge upstream/main
git push origin main
```
### 4. Set up the Development Environment
We have a separate doc [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) that tells you how to set up a development workflow.
### 5. Write Code and Commit It
Once you have done this, you can write code, test it, and commit it to a branch (replace `my_branch` with an appropriate name):
```shell
git checkout -b my_branch
git add .
git commit
git push origin my_branch
```
### 6. Open a Pull Request
* On GitHub, go to the page of your forked repository, and create a Pull Request:
- Click on `Branches`
- Click on the `...` beside your branch and click on `New pull request`
- Set `base repository` to `All-Hands-AI/OpenHands`
- Set `base` to `main`
- Click `Create pull request`
The PR should appear in [OpenHands PRs](https://github.com/All-Hands-AI/OpenHands/pulls).
Then the OpenHands team will review your code.
## PR Rules
### 1. Pull Request title
### Pull Request title
As described [here](https://github.com/commitizen/conventional-commit-types/blob/master/index.json), a valid PR title should begin with one of the following prefixes:
- `feat`: A new feature
@@ -112,6 +86,9 @@ For example, a PR title could be:
You may also check out previous PRs in the [PR list](https://github.com/All-Hands-AI/OpenHands/pulls).
### 2. Pull Request description
### Pull Request description
- If your PR is small (such as a typo fix), you can go brief.
- If it contains a lot of changes, it's better to write more details.
If your changes are user-facing (e.g. a new feature in the UI, a change in behavior, or a bugfix)
please include a short message that we can add to our changelog.

View File

@@ -5,12 +5,14 @@ Otherwise, you can clone the OpenHands project directly.
## Start the server for development
### 1. Requirements
* Linux, Mac OS, or [WSL on Windows](https://learn.microsoft.com/en-us/windows/wsl/install) [ Ubuntu <= 22.04]
* Linux, Mac OS, or [WSL on Windows](https://learn.microsoft.com/en-us/windows/wsl/install) [Ubuntu <= 22.04]
* [Docker](https://docs.docker.com/engine/install/) (For those on MacOS, make sure to allow the default Docker socket to be used from advanced settings!)
* [Python](https://www.python.org/downloads/) = 3.12
* [NodeJS](https://nodejs.org/en/download/package-manager) >= 18.17.1
* [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) >= 1.8
* netcat => sudo apt-get install netcat
* OS-specific dependencies:
- Ubuntu: build-essential => `sudo apt-get install build-essential`
- WSL: netcat => `sudo apt-get install netcat`
Make sure you have all these dependencies installed before moving on to `make build`.
@@ -91,9 +93,6 @@ To run tests, refer to the following:
poetry run pytest ./tests/unit/test_*.py
```
#### Integration tests
Please refer to [this README](./tests/integration/README.md) for details.
### 9. Add or update dependency
1. Add your dependency in `pyproject.toml` or use `poetry add xxx`
2. Update the poetry.lock file via `poetry lock --no-update`

View File

@@ -195,7 +195,7 @@ start-backend:
# Start frontend
start-frontend:
@echo "$(YELLOW)Starting frontend...$(RESET)"
@cd frontend && VITE_BACKEND_HOST=$(BACKEND_HOST_PORT) VITE_FRONTEND_PORT=$(FRONTEND_PORT) npm run start -- --port $(FRONTEND_PORT)
@cd frontend && VITE_BACKEND_HOST=$(BACKEND_HOST_PORT) VITE_FRONTEND_PORT=$(FRONTEND_PORT) npm run dev -- --port $(FRONTEND_PORT) --host $(BACKEND_HOST)
# Common setup for running the app (non-callable)
_run_setup:
@@ -214,7 +214,7 @@ _run_setup:
run:
@echo "$(YELLOW)Running the app...$(RESET)"
@$(MAKE) -s _run_setup
@cd frontend && echo "$(BLUE)Starting frontend with npm...$(RESET)" && npm run start -- --port $(FRONTEND_PORT)
@$(MAKE) -s start-frontend
@echo "$(GREEN)Application started successfully.$(RESET)"
# Run the app (in docker)

View File

@@ -42,10 +42,10 @@ system requirements and more information.
```bash
export WORKSPACE_BASE=$(pwd)/workspace
docker pull ghcr.io/all-hands-ai/runtime:0.9-nikolaik
docker pull ghcr.io/all-hands-ai/runtime:0.11-nikolaik
docker run -it --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.11-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -53,7 +53,7 @@ docker run -it --pull=always \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.9
ghcr.io/all-hands-ai/openhands:0.11
```
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!

View File

@@ -172,11 +172,9 @@ model = "gpt-4o"
#disable_vision = true
[llm.gpt4o-mini]
# API key to use
api_key = "your-api-key"
model = "gpt-4o"
# Model to use
model = "gpt-4o-mini"
#################################### Agent ###################################
# Configuration for agents (group name starts with 'agent')

View File

@@ -8,7 +8,7 @@ RUN npm install -g npm@10.5.1
RUN npm ci
COPY ./frontend ./
RUN npm run make-i18n && npm run build
RUN npm run build
FROM python:3.12.3-slim AS backend-builder
@@ -46,14 +46,6 @@ RUN mkdir -p $WORKSPACE_BASE
RUN apt-get update -y \
&& apt-get install -y curl ssh sudo
# Install Docker - https://docs.docker.com/engine/install/debian/
RUN apt-get install ca-certificates curl \
&& curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc \
&& chmod a+r /etc/apt/keyrings/docker.asc \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian bookworm stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null \
&& apt-get update \
&& apt install -y docker-ce
# Default is 1000, but OSX is often 501
RUN sed -i 's/^UID_MIN.*/UID_MIN 499/' /etc/login.defs
# Default is 60000, but we've seen up to 200000
@@ -90,7 +82,7 @@ RUN python openhands/core/download.py # No-op to download assets
# openhands:openhands -> openhands:app
RUN find /app \! -group app -exec chgrp app {} +
COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build/client ./frontend/build
COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build ./frontend/build
COPY --chown=openhands:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
USER root

View File

@@ -21,7 +21,7 @@ The OpenHands Runtime system uses a client-server architecture implemented with
graph TD
A[User-provided Custom Docker Image] --> B[OpenHands Backend]
B -->|Builds| C[OH Runtime Image]
C -->|Launches| D[Runtime Client]
C -->|Launches| D[Action Executor]
D -->|Initializes| E[Browser]
D -->|Initializes| F[Bash Shell]
D -->|Initializes| G[Plugins]
@@ -49,10 +49,10 @@ graph TD
1. User Input: The user provides a custom base Docker image
2. Image Building: OpenHands builds a new Docker image (the "OH runtime image") based on the user-provided image. This new image includes OpenHands-specific code, primarily the "runtime client"
3. Container Launch: When OpenHands starts, it launches a Docker container using the OH runtime image
4. Client Initialization: The runtime client initializes inside the container, setting up necessary components like a bash shell and loading any specified plugins
5. Communication: The OpenHands backend (`runtime.py`) communicates with the runtime client over RESTful API, sending actions and receiving observations
4. Action Execution Server Initialization: The action execution server initializes an `ActionExecutor` inside the container, setting up necessary components like a bash shell and loading any specified plugins
5. Communication: The OpenHands backend (`openhands/runtime/impl/eventstream/eventstream_runtime.py`) communicates with the action execution server over RESTful API, sending actions and receiving observations
6. Action Execution: The runtime client receives actions from the backend, executes them in the sandboxed environment, and sends back observations
7. Observation Return: The client sends execution results back to the OpenHands backend as observations
7. Observation Return: The action execution server sends execution results back to the OpenHands backend as observations
The role of the client:
@@ -70,74 +70,46 @@ Check out the [relevant code](https://github.com/All-Hands-AI/OpenHands/blob/mai
### Image Tagging System
OpenHands uses a dual-tagging system for its runtime images to balance reproducibility with flexibility:
OpenHands uses a dual-tagging system for its runtime images to balance reproducibility with flexibility.
Tags may be in one of 2 formats:
1. Hash-based tag: `{target_image_repo}:{target_image_hash_tag}`.
Example: `runtime:abc123def456`
- **Generic**: `oh_v{openhands_version}_{16_digit_lock_hash}` (e.g.: `oh_v0.9.9_1234567890abcdef`)
- **Specific**: `oh_v{openhands_version}_{16_digit_lock_hash}_{16_digit_source_hash}`
(e.g.: `oh_v0.9.9_1234567890abcdef_1234567890abcdef`)
- This tag is based on the MD5 hash of the Docker build folder, which includes the source code (of runtime client and related dependencies) and Dockerfile
- Identical hash tags guarantee that the images were built with exactly the same source code and Dockerfile
- This ensures reproducibility; the same hash always means the same image contents
#### Lock Hash
2. Generic tag: `{target_image_repo}:{target_image_tag}`.
Example: `runtime:oh_v0.9.3_ubuntu_tag_22.04`
This hash is built from the first 16 digits of the MD5 of:
- The name of the base image upon which the image was built (e.g.: `nikolaik/python-nodejs:python3.12-nodejs22`)
- The content of the `pyproject.toml` included in the image.
- The content of the `poetry.lock` included in the image.
- This tag follows the format: `runtime:oh_v{OH_VERSION}_{BASE_IMAGE_NAME}_tag_{BASE_IMAGE_TAG}`
- It represents the latest build for a particular base image and OpenHands version combination
- This tag is updated whenever a new image is built from the same base image, even if the source code changes
This effectively gives a hash for the dependencies of Openhands independent of the source code.
The hash-based tag ensures reproducibility, while the generic tag provides a stable reference to the latest version of a particular configuration. This dual-tagging approach allows OpenHands to efficiently manage both development and production environments.
#### Source Hash
### Build Process
This is the first 16 digits of the MD5 of the directory hash for the source directory. This gives a hash
for only the openhands source
1. Image Naming Convention:
- Hash-based tag: `{target_image_repo}:{target_image_hash_tag}`.
Example: `runtime:abc123def456`
- Generic tag: `{target_image_repo}:{target_image_tag}`.
Example: `runtime:oh_v0.9.3_ubuntu_tag_22.04`
#### Build Process
2. Build Process:
- a. Convert the base image name to an OH runtime image name
Example: `ubuntu:22.04` -> `runtime:oh_v0.9.3_ubuntu_tag_22.04`
- b. Generate a build context (Dockerfile and OpenHands source code) and calculate its hash
- c. Check for an existing image with the calculated hash
- d. If not found, check for a recent compatible image to use as a base
- e. If no compatible image exists, build from scratch using the original base image
- f. Tag the new image with both hash-based and generic tags
When generating an image...
3. Image Reuse and Rebuilding Logic:
The system follows these steps to determine whether to build a new image or use an existing one from a user-provided (base) image (e.g., `ubuntu:22.04`):
- a. If an image exists with the same hash (e.g., `runtime:abc123def456`), it will be reused as is
- b. If the exact hash is not found, the system will try to rebuild using the latest generic image (e.g., `runtime:oh_v0.9.3_ubuntu_tag_22.04`) as a base. This saves time by leveraging existing dependencies
- c. If neither the hash-tagged nor the generic-tagged image is found, the system will build the image completely from scratch
- OpenHands first checks whether an image with the same **Specific** tag exists. If there is such an image,
no build is performed - the existing image is used.
- OpenHands next checks whether an image with the **Generic** tag exists. If there is such an image,
OpenHands builds a new image based upon it, bypassing all installation steps (like `poetry install` and
`apt-get`) except a final operation to copy the current source code. The new image is tagged with a
**Specific** tag only.
- If neither a **Specific** nor **Generic** tag exists, a brand new image is built based upon the base
image (Which is a slower operation). This new image is tagged with both the **Generic** and **Specific**
tags.
4. Caching and Efficiency:
- The system attempts to reuse existing images when possible to save build time
- If an exact match (by hash) is found, it's used without rebuilding
- If a compatible image is found, it's used as a base for rebuilding, saving time on dependency installation
Here's a flowchart illustrating the build process:
```mermaid
flowchart TD
A[Start] --> B{Convert base image name}
B --> |ubuntu:22.04 -> runtime:oh_v0.9.3_ubuntu_tag_22.04| C[Generate build context and hash]
C --> D{Check for existing image with hash}
D -->|Found runtime:abc123def456| E[Use existing image]
D -->|Not found| F{Check for runtime:oh_v0.9.3_ubuntu_tag_22.04}
F -->|Found| G[Rebuild based on recent image]
F -->|Not found| H[Build from scratch]
G --> I[Tag with hash and generic tags]
H --> I
E --> J[End]
I --> J
```
This approach ensures that:
This dual-tagging approach allows OpenHands to efficiently manage both development and production environments.
1. Identical source code and Dockerfile always produce the same image (via hash-based tags)
2. The system can quickly rebuild images when minor changes occur (by leveraging recent compatible images)
3. The generic tag (e.g., `runtime:oh_v0.9.3_ubuntu_tag_22.04`) always points to the latest build for a particular base image and OpenHands version combination
3. The generic tag (e.g., `runtime:oh_v0.9.3_1234567890abcdef`) always points to the latest build for a particular base image and OpenHands version combination
## Runtime Plugin System

View File

@@ -57,7 +57,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.9 \
ghcr.io/all-hands-ai/openhands:0.11 \
python -m openhands.core.cli
```

View File

@@ -1,81 +1,64 @@
# Custom Sandbox
The sandbox is where the agent does its work. Instead of running commands directly on your computer
(which could be dangerous), the agent runs them inside of a Docker container.
The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer
(which could be risky), the agent runs them inside a Docker container.
The default OpenHands sandbox (`python-nodejs:python3.12-nodejs22`
from [nikolaik/python-nodejs](https://hub.docker.com/r/nikolaik/python-nodejs)) comes with some packages installed such
as python and Node.js but your use case may need additional software installed by default.
as python and Node.js but may need other software installed by default.
There are two ways you can do so:
You have two options for customization:
1. Use an existing image from docker hub.
2. Creating your own custom docker image and using it.
1. Use an existing image with the required software.
2. Create your own custom Docker image.
If you want to take the first approach, you can skip the `Create Your Docker Image` section.
## Setup
Make sure you are able to run OpenHands using the [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) first.
If you choose the first option, you can skip the `Create Your Docker Image` section.
## Create Your Docker Image
To create a custom docker image, it must be debian/ubuntu based.
To create a custom Docker image, it must be Debian based.
For example, if we want OpenHands to have access to the `node` binary, we would use the following Dockerfile:
For example, if you want OpenHands to have `ruby` installed, create a `Dockerfile` with the following content:
```dockerfile
# Start with latest ubuntu image
FROM ubuntu:latest
FROM debian:latest
# Run needed updates
RUN apt-get update && apt-get install -y
# Install node
RUN apt-get install -y nodejs
# Install required packages
RUN apt-get update && apt-get install -y ruby
```
Next build your docker image with the name of your choice, for example `custom_image`.
Save this file in a folder. Then, build your Docker image (e.g., named custom-image) by navigating to the folder in
the terminal and running::
```bash
docker build -t custom-image .
```
To do this you can create a directory and put your file inside it with the name `Dockerfile`, and inside the directory run the following command:
This will produce a new image called `custom-image`, which will be available in Docker.
> Note that in the configuration described in this document, OpenHands will run as user "openhands" inside the
> sandbox and thus all packages installed via the docker file should be available to all users on the system, not just root.
## Using the Development Workflow
### Setup
First, ensure you can run OpenHands by following the instructions in [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).
### Specify the Base Sandbox Image
In the `config.toml` file within the OpenHands directory, set the `sandbox_base_container_image` to the image you want to use.
This can be an image youve already pulled or one youve built:
```bash
docker build -t custom_image .
```
This will produce a new image called ```custom_image``` that will be available in Docker Engine.
> Note that in the configuration described in this document, OpenHands will run as user "openhands" inside the sandbox and thus all packages installed via the docker file should be available to all users on the system, not just root.
>
> Installing with apt-get above installs node for all users.
## Specify your sandbox image in config.toml file
OpenHands configuration occurs via the top-level `config.toml` file.
Create a `config.toml` file in the OpenHands directory and enter these contents:
```toml
[core]
workspace_base="./workspace"
run_as_openhands=true
sandbox_base_container_image="custom_image"
...
sandbox_base_container_image="custom-image"
```
For `sandbox_base_container_image`, you can specify either:
### Run
1. The name of your custom image that you built in the previous step (e.g., `”custom_image”`)
2. A pre-existing image from Docker Hub (e.g., `”node:20”` if you want a sandbox with Node.js pre-installed)
## Run
Run OpenHands by running ```make run``` in the top level directory.
Navigate to ```localhost:3001``` and check if your desired dependencies are available.
In the case of the example above, running ```node -v``` in the terminal produces ```v20.15.0```.
Congratulations!
## Technical Explanation
Please refer to [custom docker image section of the runtime documentation](https://docs.all-hands.dev/modules/usage/architecture/runtime#advanced-how-openhands-builds-and-maintains-od-runtime-images) for more details.

View File

@@ -134,9 +134,11 @@ To create an evaluation workflow for your benchmark, follow these steps:
4. Create a function to process each instance:
```python
from openhands.utils.async_utils import call_async_from_sync
def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput:
config = get_config(instance, metadata)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
instruction = get_instruction(instance, metadata)

View File

@@ -51,6 +51,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.9 \
ghcr.io/all-hands-ai/openhands:0.11 \
python -m openhands.core.main -t "write a bash script that prints hi"
```

View File

@@ -14,10 +14,10 @@ existing code that you'd like to modify.
```bash
export WORKSPACE_BASE=$(pwd)/workspace
docker pull ghcr.io/all-hands-ai/runtime:0.9-nikolaik
docker pull ghcr.io/all-hands-ai/runtime:0.11-nikolaik
docker run -it --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.11-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -25,7 +25,7 @@ docker run -it --pull=always \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.9
ghcr.io/all-hands-ai/openhands:0.11
```
You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), or using the [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action).

View File

@@ -5,7 +5,7 @@ OpenHands uses LiteLLM to make calls to Azure's chat models. You can find their
## Azure OpenAI Configuration
When running OpenHands, you'll need to set the following environment variable using `-e` in the
[docker run command](/modules/usage/installation):
[docker run command](/modules/usage/installation#start-the-app):
```
LLM_API_VERSION="<api-version>" # e.g. "2023-05-15"
@@ -37,7 +37,7 @@ OpenHands uses llama-index for embeddings. You can find their documentation on A
### Azure OpenAI Configuration
When running OpenHands, set the following environment variables using `-e` in the
[docker run command](/modules/usage/installation):
[docker run command](/modules/usage/installation#start-the-app):
```
LLM_EMBEDDING_MODEL="azureopenai"

View File

@@ -16,7 +16,7 @@ If the model is not in the list, toggle `Advanced Options`, and enter it in `Cus
## VertexAI - Google Cloud Platform Configs
To use Vertex AI through Google Cloud Platform when running OpenHands, you'll need to set the following environment
variables using `-e` in the [docker run command](/modules/usage/installation):
variables using `-e` in the [docker run command](/modules/usage/installation#start-the-app):
```
GOOGLE_APPLICATION_CREDENTIALS="<json-dump-of-gcp-service-account-json>"

View File

@@ -48,7 +48,7 @@ The following can be set in the OpenHands UI through the Settings:
- `Base URL` (through `Advanced Settings`)
There are some settings that may be necessary for some LLMs/providers that cannot be set through the UI. Instead, these
can be set through environment variables passed to the [docker run command](/modules/usage/installation)
can be set through environment variables passed to the [docker run command](/modules/usage/installation#start-the-app)
using `-e`:
- `LLM_API_VERSION`

View File

@@ -12,7 +12,7 @@
"@docusaurus/plugin-content-pages": "^3.5.2",
"@docusaurus/preset-classic": "^3.5.2",
"@docusaurus/theme-mermaid": "^3.5.2",
"@mdx-js/react": "^3.0.0",
"@mdx-js/react": "^3.1.0",
"clsx": "^2.0.0",
"prism-react-renderer": "^2.4.0",
"react": "^18.3.1",
@@ -2883,9 +2883,9 @@
}
},
"node_modules/@mdx-js/react": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.0.1.tgz",
"integrity": "sha512-9ZrPIU4MGf6et1m1ov3zKf+q9+deetI51zprKB1D/z3NOb+rUxxtEl3mCjW5wTGh6VhRdwPueh1oRzi6ezkA8A==",
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz",
"integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==",
"dependencies": {
"@types/mdx": "^2.0.0"
},

View File

@@ -19,7 +19,7 @@
"@docusaurus/plugin-content-pages": "^3.5.2",
"@docusaurus/preset-classic": "^3.5.2",
"@docusaurus/theme-mermaid": "^3.5.2",
"@mdx-js/react": "^3.0.0",
"@mdx-js/react": "^3.1.0",
"clsx": "^2.0.0",
"prism-react-renderer": "^2.4.0",
"react": "^18.3.1",

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 33 KiB

9093
docs/yarn.lock Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -23,6 +23,7 @@ from openhands.core.config import (
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.utils.async_utils import call_async_from_sync
game = None
@@ -119,6 +120,7 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(

View File

@@ -2,19 +2,47 @@
This folder contains code and resources to run experiments and evaluations.
## Logistics
## For Benchmark Users
To better organize the evaluation folder, we should follow the rules below:
### Setup
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
all the preprocessing/evaluation/analysis scripts.
- Raw data and experimental records should not be stored within this repo.
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
- Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
Before starting evaluation, follow the instructions here [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
Once you are done with setup, you can follow the benchmark-specific instructions in each subdirectory of the evaluation directory.
Generally these will involve running `run_infer.py` to perform inference with the agents.
### Implementing and Evaluating an Agent
To add an agent to OpenHands, you will need to implement it in the [agenthub directory](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/agenthub). There is a README there with more information.
To evaluate an agent, you can provide the agent's name to the `run_infer.py` program.
### Evaluating Different LLMs
OpenHands in development mode uses `config.toml` to keep track of most configuration.
Here's an example configuration file you can use to define and use multiple LLMs:
```toml
[llm]
# IMPORTANT: add your API key here, and set the model to the one you want to evaluate
model = "gpt-4o-2024-05-13"
api_key = "sk-XXX"
[llm.eval_gpt4_1106_preview_llm]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[llm.eval_some_openai_compatible_model_llm]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"
temperature = 0.0
```
## Supported Benchmarks
To learn more about how to integrate your benchmark into OpenHands, check out [tutorial here](https://docs.all-hands.dev/modules/usage/how-to/evaluation-harness).
The OpenHands evaluation harness supports a wide variety of benchmarks across software engineering, web browsing, and miscellaneous assistance tasks.
### Software Engineering
@@ -41,36 +69,19 @@ To learn more about how to integrate your benchmark into OpenHands, check out [t
- Entity deduction Arena (EDA): [`evaluation/EDA`](./EDA)
- ProofWriter: [`evaluation/logic_reasoning`](./logic_reasoning)
## Before everything begins: Setup Environment and LLM Configuration
Please follow instruction [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
OpenHands in development mode uses `config.toml` to keep track of most configurations.
Here's an example configuration file you can use to define and use multiple LLMs:
```toml
[llm]
# IMPORTANT: add your API key here, and set the model to the one you want to evaluate
model = "gpt-4o-2024-05-13"
api_key = "sk-XXX"
[llm.eval_gpt4_1106_preview_llm]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[llm.eval_some_openai_compatible_model_llm]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"
temperature = 0.0
```
### Result Visualization
## Result Visualization
Check [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization of existing experimental results.
### Upload your results
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
## For Benchmark Developers
To learn more about how to integrate your benchmark into OpenHands, check out [tutorial here](https://docs.all-hands.dev/modules/usage/how-to/evaluation-harness). Briefly,
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
all the preprocessing/evaluation/analysis scripts.
- Raw data and experimental records should not be stored within this repo.
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
- Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.

View File

@@ -32,7 +32,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def get_config(
@@ -210,6 +211,7 @@ def process_instance(
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance=instance)

View File

@@ -32,7 +32,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
# Configure visibility of unit tests to the Agent.
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@@ -48,13 +49,14 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
runtime=os.environ.get('RUNTIME', 'eventstream'),
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.11-bookworm',
enable_auto_lint=True,
use_host_network=False,
timeout=100,
api_key=os.environ.get('ALLHANDS_API_KEY', None),
),
# do not mount workspace
workspace_base=None,
@@ -186,7 +188,9 @@ def process_instance(
signature_file=f'{instance.instance_name}.py',
)
if USE_UNIT_TESTS:
print(f'\nInstruction to run test_file: {instance.instance_name}_test.py\n')
logger.info(
f'\nInstruction to run test_file: {instance.instance_name}_test.py\n'
)
instruction += (
f'Use `python -m unittest {instance.instance_name}_test.py` to run the test_file '
'and verify the correctness of your solution. DO NOT EDIT the test file.\n\n'
@@ -204,6 +208,7 @@ def process_instance(
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance=instance)

View File

@@ -29,7 +29,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': functools.partial(
@@ -275,7 +276,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@@ -32,7 +32,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def codeact_user_response(state: State) -> str:
@@ -403,6 +404,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@@ -28,7 +28,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
@@ -142,6 +143,7 @@ def process_instance(
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@@ -25,6 +25,7 @@ from openhands.core.config import (
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@@ -81,6 +82,7 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
config=config,

View File

@@ -48,6 +48,7 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import Observation
from openhands.utils.async_utils import call_async_from_sync
ACTION_FORMAT = """
<<FINAL_ANSWER||
@@ -215,7 +216,7 @@ Ok now its time to start solving the question. Good luck!
"""
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
config=config,

View File

@@ -37,7 +37,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
IMPORT_HELPER = {
'python': [
@@ -233,6 +234,7 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
state: State | None = asyncio.run(
run_controller(

View File

@@ -0,0 +1,69 @@
# Integration tests
This directory implements integration tests that [was running in CI](https://github.com/All-Hands-AI/OpenHands/tree/23d3becf1d6f5d07e592f7345750c314a826b4e9/tests/integration).
[PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985) introduce LLM-based editing, which requires access to LLM to perform edit. Hence, we remove integration tests from CI and intend to run them as nightly evaluation to ensure the quality of OpenHands softwares.
## To add new tests
Each test is a file named like `tXX_testname.py` where `XX` is a number.
Make sure to name the file for each test to start with `t` and ends with `.py`.
Each test should be structured as a subclass of [`BaseIntegrationTest`](./tests/base.py), where you need to implement `initialize_runtime` that setup the runtime enviornment before test, and `verify_result` that takes in a `Runtime` and history of `Event` and return a `TestResult`. See [t01_fix_simple_typo.py](./tests/t01_fix_simple_typo.py) and [t05_simple_browsing.py](./tests/t05_simple_browsing.py) for two representative examples.
```python
class TestResult(BaseModel):
success: bool
reason: str | None = None
class BaseIntegrationTest(ABC):
"""Base class for integration tests."""
INSTRUCTION: str
@classmethod
@abstractmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
"""Initialize the runtime for the test to run."""
pass
@classmethod
@abstractmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
"""Verify the result of the test.
This method will be called after the agent performs the task on the runtime.
"""
pass
```
## Setup Environment and LLM Configuration
Please follow instruction [here](../README.md#setup) to setup your local
development environment and LLM.
## Start the evaluation
```bash
./evaluation/integration_tests/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [eval-num-workers] [eval_ids]
```
- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for
your LLM settings, as defined in your `config.toml`.
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version
you would like to evaluate. It could also be a release tag like `0.9.0`.
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks,
defaulting to `CodeActAgent`.
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit`
instances. By default, the script evaluates the entire Exercism test set
(133 issues). Note: in order to use `eval_limit`, you must also set `agent`.
- `eval-num-workers`: the number of workers to use for evaluation. Default: `1`.
- `eval_ids`, e.g. `"1,3,10"`, limits the evaluation to instances with the
given IDs (comma separated).
Example:
```bash
./evaluation/integration_tests/scripts/run_infer.sh llm.claude-35-sonnet-eval HEAD CodeActAgent
```

View File

View File

@@ -0,0 +1,213 @@
import asyncio
import importlib.util
import os
import pandas as pd
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
codeact_user_response,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
SandboxConfig,
get_llm_config_arg,
parse_arguments,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
FAKE_RESPONSES = {
'CodeActAgent': codeact_user_response,
}
def get_config(
metadata: EvalMetadata,
instance_id: str,
) -> AppConfig:
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
# use default base_container_image
enable_auto_lint=True,
use_host_network=False,
timeout=100,
),
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
if metadata.llm_config.log_completions:
metadata.llm_config.log_completions_folder = os.path.join(
metadata.eval_output_dir, 'llm_completions', instance_id
)
logger.info(
f'Logging LLM completions for instance {instance_id} to '
f'{metadata.llm_config.log_completions_folder}'
)
config.set_llm_config(metadata.llm_config)
return config
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
) -> EvalOutput:
config = get_config(metadata, instance.instance_id)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, str(instance.instance_id), log_dir)
else:
logger.info(
f'\nStarting evaluation for instance {str(instance.instance_id)}.\n'
)
# =============================================
# import test instance
# =============================================
instance_id = instance.instance_id
spec = importlib.util.spec_from_file_location(instance_id, instance.file_path)
test_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(test_module)
assert hasattr(
test_module, 'Test'
), f'Test module {instance_id} does not have a Test class'
test_class: type[BaseIntegrationTest] = test_module.Test
assert issubclass(
test_class, BaseIntegrationTest
), f'Test class {instance_id} does not inherit from BaseIntegrationTest'
instruction = test_class.INSTRUCTION
# =============================================
# create sandbox and run the agent
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
test_class.initialize_runtime(runtime)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction),
runtime=runtime,
fake_user_response_fn=FAKE_RESPONSES[metadata.agent_class],
)
)
if state is None:
raise ValueError('State should not be None.')
# # =============================================
# # result evaluation
# # =============================================
histories = state.history.get_events()
test_result: TestResult = test_class.verify_result(runtime, histories)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(
instance_id=str(instance.instance_id),
instance=instance.to_dict(),
instruction=instruction,
metadata=metadata,
history=histories,
metrics=metrics,
error=state.last_error if state and state.last_error else None,
test_result=test_result.model_dump(),
)
return output
def load_integration_tests() -> pd.DataFrame:
"""Load tests from python files under ./tests"""
cur_dir = os.path.dirname(os.path.abspath(__file__))
test_dir = os.path.join(cur_dir, 'tests')
test_files = [
os.path.join(test_dir, f)
for f in os.listdir(test_dir)
if f.startswith('t') and f.endswith('.py')
]
df = pd.DataFrame(test_files, columns=['file_path'])
df['instance_id'] = df['file_path'].apply(
lambda x: os.path.basename(x).rstrip('.py')
)
return df
if __name__ == '__main__':
args = parse_arguments()
integration_tests = load_integration_tests()
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
metadata = make_metadata(
llm_config,
'integration_tests',
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
# Parse dataset IDs if provided
eval_ids = None
if args.eval_ids:
eval_ids = str(args.eval_ids).split(',')
logger.info(f'\nUsing specific dataset IDs: {eval_ids}\n')
instances = prepare_dataset(
integration_tests,
output_file,
args.eval_n_limit,
eval_ids=eval_ids,
)
run_evaluation(
instances,
metadata,
output_file,
args.eval_num_workers,
process_instance,
)
df = pd.read_json(output_file, lines=True, orient='records')
df['success'] = df['test_result'].apply(lambda x: x['success'])
df['reason'] = df['test_result'].apply(lambda x: x['reason'])
logger.info('-' * 100)
logger.info(
f'Success rate: {df["success"].mean():.2%} ({df["success"].sum()}/{len(df)})'
)
logger.info(
'\nEvaluation Results:'
+ '\n'
+ df[['instance_id', 'success', 'reason']].to_string(index=False)
)
logger.info('-' * 100)

View File

@@ -0,0 +1,61 @@
#!/bin/bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
COMMIT_HASH=$2
AGENT=$3
EVAL_LIMIT=$4
NUM_WORKERS=$5
EVAL_IDS=$6
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
fi
checkout_eval_branch
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
get_agent_version
echo "AGENT: $AGENT"
echo "AGENT_VERSION: $AGENT_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
EVAL_NOTE=$AGENT_VERSION
# Default to NOT use unit tests.
if [ -z "$USE_UNIT_TESTS" ]; then
export USE_UNIT_TESTS=false
fi
echo "USE_UNIT_TESTS: $USE_UNIT_TESTS"
# If use unit tests, set EVAL_NOTE to the commit hash
if [ "$USE_UNIT_TESTS" = true ]; then
EVAL_NOTE=$EVAL_NOTE-w-test
fi
# export PYTHONPATH=evaluation/integration_tests:\$PYTHONPATH
COMMAND="poetry run python evaluation/integration_tests/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations 10 \
--eval-num-workers $NUM_WORKERS \
--eval-note $EVAL_NOTE"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
if [ -n "$EVAL_IDS" ]; then
echo "EVAL_IDS: $EVAL_IDS"
COMMAND="$COMMAND --eval-ids $EVAL_IDS"
fi
# Run the command
eval $COMMAND

View File

@@ -0,0 +1,32 @@
from abc import ABC, abstractmethod
from pydantic import BaseModel
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class TestResult(BaseModel):
success: bool
reason: str | None = None
class BaseIntegrationTest(ABC):
"""Base class for integration tests."""
INSTRUCTION: str
@classmethod
@abstractmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
"""Initialize the runtime for the test to run."""
pass
@classmethod
@abstractmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
"""Verify the result of the test.
This method will be called after the agent performs the task on the runtime.
"""
pass

View File

@@ -0,0 +1,39 @@
import os
import tempfile
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = 'Fix typos in bad.txt.'
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
# create a file with a typo in /workspace/bad.txt
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_path = os.path.join(temp_dir, 'bad.txt')
with open(temp_file_path, 'w') as f:
f.write('This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!')
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/workspace')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/bad.txt has been fixed
action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False, reason=f'Failed to run command: {obs.content}'
)
# check if the file /workspace/bad.txt has been fixed
if (
obs.content.strip().replace('\r\n', '\n')
== 'This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!'
):
return TestResult(success=True)
return TestResult(success=False, reason=f'File not fixed: {obs.content}')

View File

@@ -0,0 +1,40 @@
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = "Write a shell script '/workspace/hello.sh' that prints 'hello'."
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/hello.sh: {obs.content}.',
)
# execute the script
action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to execute /workspace/hello.sh: {obs.content}.',
)
if obs.content.strip() != 'hello':
return TestResult(
success=False, reason=f'Script did not print "hello": {obs.content}.'
)
return TestResult(success=True)

View File

@@ -0,0 +1,43 @@
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'."
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/test.txt: {obs.content}.',
)
# execute the script
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/test.txt: {obs.content}.',
)
if 'hello world' not in obs.content.strip():
return TestResult(
success=False,
reason=f'File did not contain "hello world": {obs.content}.',
)
return TestResult(success=True)

View File

@@ -0,0 +1,58 @@
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = 'Write a git commit message for the current staging area and commit the changes.'
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# git init
action = CmdRunAction(command='git init', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# create README.md
action = CmdRunAction(
command='echo \'print("hello world")\' > hello.py', keep_prompt=False
)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# git add README.md
action = CmdRunAction(command='git add hello.py', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.py exists
action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/hello.py: {obs.content}.',
)
# check if the staging area is empty
action = CmdRunAction(command='git status', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False, reason=f'Failed to git status: {obs.content}.'
)
if 'nothing to commit, working tree clean' in obs.content.strip():
return TestResult(success=True)
return TestResult(
success=False,
reason=f'Failed to check for "nothing to commit, working tree clean": {obs.content}.',
)

View File

@@ -0,0 +1,134 @@
import os
import tempfile
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.event import Event
from openhands.events.observation import AgentDelegateObservation
from openhands.runtime.base import Runtime
HTML_FILE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>The Ultimate Answer</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background: linear-gradient(to right, #1e3c72, #2a5298);
color: #fff;
font-family: 'Arial', sans-serif;
text-align: center;
}
.container {
text-align: center;
padding: 20px;
background: rgba(255, 255, 255, 0.1);
border-radius: 10px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
}
h1 {
font-size: 36px;
margin-bottom: 20px;
}
p {
font-size: 18px;
margin-bottom: 30px;
}
#showButton {
padding: 10px 20px;
font-size: 16px;
color: #1e3c72;
background: #fff;
border: none;
border-radius: 5px;
cursor: pointer;
transition: background 0.3s ease;
}
#showButton:hover {
background: #f0f0f0;
}
#result {
margin-top: 20px;
font-size: 24px;
}
</style>
</head>
<body>
<div class="container">
<h1>The Ultimate Answer</h1>
<p>Click the button to reveal the answer to life, the universe, and everything.</p>
<button id="showButton">Click me</button>
<div id="result"></div>
</div>
<script>
document.getElementById('showButton').addEventListener('click', function() {
document.getElementById('result').innerText = 'The answer is OpenHands is all you need!';
});
</script>
</body>
</html>
"""
class Test(BaseIntegrationTest):
INSTRUCTION = 'Browse localhost:8000, and tell me the ultimate answer to life.'
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# create a file with a typo in /workspace/bad.txt
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_path = os.path.join(temp_dir, 'index.html')
with open(temp_file_path, 'w') as f:
f.write(HTML_FILE)
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/tmp/server')
# create README.md
action = CmdRunAction(
command='cd /tmp/server && nohup python3 -m http.server 8000 &',
keep_prompt=False,
)
obs = runtime.run_action(action)
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the "The answer is OpenHands is all you need!" is in any message
message_actions = [
event
for event in histories
if isinstance(
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
)
]
for event in message_actions:
if isinstance(event, AgentDelegateObservation):
content = event.content
elif isinstance(event, AgentFinishAction):
content = event.outputs.get('content', '')
elif isinstance(event, MessageAction):
content = event.content
else:
raise ValueError(f'Unknown event type: {type(event)}')
if 'OpenHands is all you need!' in content:
return TestResult(success=True)
return TestResult(
success=False,
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}. Messages: {message_actions}',
)

View File

@@ -29,7 +29,8 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@@ -202,6 +203,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@@ -1,4 +1,4 @@
# WebArena Evaluation with OpenHands Browsing Agents
# Mini-World of Bits Evaluation with OpenHands Browsing Agents
This folder contains evaluation for [MiniWoB++](https://miniwob.farama.org/) benchmark, powered by [BrowserGym](https://github.com/ServiceNow/BrowserGym) for easy evaluation of how well an agent capable of browsing can perform on synthetic web browsing tasks.

View File

@@ -30,11 +30,12 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.runtime.runtime import Runtime
from openhands.utils.async_utils import call_async_from_sync
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
@@ -127,6 +128,7 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(
run_controller(

View File

@@ -32,7 +32,8 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@@ -176,6 +177,7 @@ def process_instance(
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime)
state: State | None = asyncio.run(

View File

@@ -131,11 +131,9 @@ class MultipleChoiceTask(Task):
def compare_two_numbers(p, gt):
if isinstance(p, int) or isinstance(p, float):
if isinstance(p, (int, float)):
pass
elif isinstance(p, list) or isinstance(p, bool) or isinstance(p, str):
return False
elif isinstance(p, tuple) or isinstance(p, complex) or isinstance(p, dict):
elif isinstance(p, (bool, complex, dict, list, str, tuple)):
return False
else:
raise ValueError(p)
@@ -227,8 +225,8 @@ class TheoremqaTask(Task):
prediction = prediction.replace('°', '')
# Detect the boolean keyword in the generation
if prediction in ['true', 'yes', 'false', 'no']:
if prediction == 'true' or prediction == 'yes':
if prediction in ('true', 'yes', 'false', 'no'):
if prediction in ('true', 'yes'):
prediction = 'True'
else:
prediction = 'False'
@@ -342,7 +340,7 @@ class TheoremqaTask(Task):
answer_type = self._answer_type
gt = self.extract_answer(self.reference)
if isinstance(prediction, (str, int, float)) or isinstance(prediction, list):
if isinstance(prediction, (str, int, float, list)):
# Comparing prediction against the reference
if answer_type in ['bool', 'option', 'Option']:
cur_correct = int(prediction == f'({gt})') or int(prediction == gt)

View File

@@ -41,7 +41,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
config = load_app_config()
@@ -233,6 +234,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Run the agent

View File

@@ -28,6 +28,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.utils.async_utils import call_async_from_sync
# TODO: migrate all swe-bench docker to ghcr.io/openhands
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
@@ -128,7 +129,7 @@ def process_instance(
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
# Get patch and save it to /tmp/patch.diff
with tempfile.TemporaryDirectory() as temp_dir:
# Patch file

View File

@@ -11,6 +11,7 @@ from datasets import load_dataset
import openhands.agenthub
from evaluation.swe_bench.prompt import CODEACT_SWE_PROMPT
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
assert_and_raise,
@@ -32,8 +33,9 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.runtime.utils.shutdown_listener import sleep_if_should_continue
from openhands.utils.async_utils import call_async_from_sync
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false').lower() == 'true'
@@ -80,8 +82,10 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
instruction += f'# Hints\n{instance.hints_text}\n\n'
instruction += (
'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n'
'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n'
'You should NOT modify any existing test case files. You SHOULD add new test in a NEW file to reproduce the issue.\n'
'You should verify that the issue is resolved and any new tests you create pass successfully.\n'
'You should NEVER use web browsing or any other web-based tools.\n'
'You should ALWAYS use the default Python interpreter available in the <execute_bash> environment to run code related to the provided issue and/or repository.\n'
)
# NOTE: You can actually set slightly different instruction for different agents
@@ -122,7 +126,6 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_budget_per_task=4,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'eventstream'),
sandbox=SandboxConfig(
@@ -131,6 +134,8 @@ def get_config(
use_host_network=False,
# large enough timeout, since some testcases take very long to run
timeout=300,
# Add platform to the sandbox config to solve issue 4401
platform='linux/amd64',
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_remote_runtime_alive=False,
@@ -139,6 +144,14 @@ def get_config(
workspace_base=None,
workspace_mount_path=None,
)
if metadata.llm_config.log_completions:
metadata.llm_config.log_completions_folder = os.path.join(
metadata.eval_output_dir, 'llm_completions', instance['instance_id']
)
logger.info(
f'Logging LLM completions for instance {instance["instance_id"]} to '
f'{metadata.llm_config.log_completions_folder}'
)
config.set_llm_config(metadata.llm_config)
return config
@@ -166,7 +179,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {obs.content}'
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
)
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
@@ -174,7 +187,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {obs.content}')
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
if USE_INSTANCE_IMAGE:
# inject the init script
@@ -188,7 +201,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to create /swe_util/eval_data/instances: {obs.content}',
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
)
swe_instance_json_name = 'swe-bench-instance.json'
@@ -215,16 +228,16 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {obs.content}')
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source ~/.bashrc')
action.timeout = 600
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0, f'Failed to source ~/.bashrc: {obs.content}'
)
if isinstance(obs, ErrorObservation):
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
action.timeout = 3600
@@ -233,7 +246,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/instance_swe_entry.sh: {obs.content}',
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
)
else:
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
@@ -243,7 +256,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/swe_entry.sh: {obs.content}',
f'Failed to source /swe_util/swe_entry.sh: {str(obs)}',
)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
@@ -253,7 +266,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {obs.content}',
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git reset --hard')
@@ -261,7 +274,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {obs.content}')
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
action = CmdRunAction(
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
@@ -270,7 +283,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {obs.content}')
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
@@ -300,7 +313,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {obs.content}',
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
)
action = CmdRunAction(command='git config --global core.pager ""')
@@ -310,7 +323,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to git config --global core.pager "": {obs.content}',
f'Failed to git config --global core.pager "": {str(obs)}',
)
action = CmdRunAction(command='git add -A')
@@ -318,7 +331,7 @@ def complete_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {obs.content}')
assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {str(obs)}')
n_retries = 0
git_patch = None
@@ -343,7 +356,9 @@ def complete_runtime(
logger.error(f'Error occurred: {obs.content}. Retrying...')
sleep_if_should_continue(10)
else:
assert_and_raise(False, f'Unexpected observation type: {type(obs)}')
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
logger.info('-' * 30)
logger.info('END Runtime Completion Fn')
@@ -366,6 +381,7 @@ def process_instance(
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)
@@ -384,6 +400,13 @@ def process_instance(
)
)
# if fatal error, throw EvalError to trigger re-run
if (
state.last_error
and 'fatal error during agent execution' in state.last_error
):
raise EvalException('Fatal error detected: ' + state.last_error)
# ======= THIS IS SWE-Bench specific =======
# Get git patch
return_val = complete_runtime(runtime, instance)
@@ -419,7 +442,6 @@ def process_instance(
metadata=metadata,
history=histories,
metrics=metrics,
llm_completions=state.extra_data.get('llm_completions', []),
error=state.last_error if state and state.last_error else None,
)
return output
@@ -472,14 +494,13 @@ if __name__ == '__main__':
details = {}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
if hasattr(_agent_cls, 'system_message'):
details['system_message'] = _agent_cls.system_message
if hasattr(_agent_cls, 'in_context_example'):
details['in_context_example'] = _agent_cls.in_context_example
dataset_descrption = (
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
)
metadata = make_metadata(
llm_config,
'swe-bench-lite',
dataset_descrption,
args.agent_cls,
args.max_iterations,
args.eval_note,

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python3
import argparse
import pandas as pd
parser = argparse.ArgumentParser(
description='Compare two swe_bench output JSONL files and print the resolved diff'
)
parser.add_argument('input_file_1', type=str)
parser.add_argument('input_file_2', type=str)
args = parser.parse_args()
df1 = pd.read_json(args.input_file_1, orient='records', lines=True)
df2 = pd.read_json(args.input_file_2, orient='records', lines=True)
# Get the intersection of the instance_ids
df = pd.merge(df1, df2, on='instance_id', how='inner')
def _get_resolved(report):
if report is None:
return False
if isinstance(report, float):
return False
else:
return report.get('resolved', False)
df['resolved_x'] = df['report_x'].apply(_get_resolved)
df['resolved_y'] = df['report_y'].apply(_get_resolved)
df['diff'] = df.apply(lambda x: x['resolved_x'] != x['resolved_y'], axis=1)
df_diff = df[df['diff']].sort_values(
by=['resolved_x', 'resolved_y'], ascending=[False, False]
)
# skip if any of the resolved is nan, which means one of the eval is not finished yet
df_diff = df_diff[df_diff['resolved_x'].notna() & df_diff['resolved_y'].notna()]
print(f'X={args.input_file_1}')
print(f'Y={args.input_file_2}')
print(f'# diff={df_diff.shape[0]}')
df_diff = df_diff[['instance_id', 'resolved_x', 'resolved_y', 'report_x', 'report_y']]
# x resolved but y not
print('-' * 100)
df_diff_x_only = df_diff[df_diff['resolved_x'] & ~df_diff['resolved_y']].sort_values(
by='instance_id'
)
print(f'# x resolved but y not={df_diff_x_only.shape[0]}')
print(df_diff_x_only[['instance_id', 'report_x', 'report_y']])
# y resolved but x not
print('-' * 100)
df_diff_y_only = df_diff[~df_diff['resolved_x'] & df_diff['resolved_y']].sort_values(
by='instance_id'
)
print(f'# y resolved but x not={df_diff_y_only.shape[0]}')
print(df_diff_y_only[['instance_id', 'report_x', 'report_y']])
# get instance_id from df_diff_y_only
print('-' * 100)
print('Instances that x resolved but y not:')
print(df_diff_x_only['instance_id'].tolist())
print('-' * 100)
print('Instances that y resolved but x not:')
print(df_diff_y_only['instance_id'].tolist())

View File

@@ -3,6 +3,9 @@ import argparse
import json
from collections import Counter
from openhands.events.serialization import event_from_dict
from openhands.events.utils import get_pairs_from_events
ERROR_KEYWORDS = [
'Agent encountered an error while processing the last action',
'APIError',
@@ -26,8 +29,37 @@ if __name__ == '__main__':
error_counter = Counter()
main_agent_cost = []
editor_cost = []
num_turns = []
for line in lines:
_d = json.loads(line)
# Cost
costs = _d['metrics'].get('costs', [])
_cur_main_agent_cost = 0
_cur_editor_cost = 0
for cost in costs:
if isinstance(cost, float):
# backward compatible
_cur_main_agent_cost += cost
else:
if 'draft_editor' in cost['model']:
_cur_editor_cost += cost['cost']
else:
_cur_main_agent_cost += cost['cost']
main_agent_cost.append(_cur_main_agent_cost)
editor_cost.append(_cur_editor_cost)
# Turn status
history = _d.get('history', [])
events = [event_from_dict(event) for event in history]
pairs = get_pairs_from_events(events)
num_turns.append(len(pairs))
# Patch & resolve status
patch = _d.get('test_result', {}).get('git_patch', '')
if patch == '':
num_empty_patch += 1
@@ -38,6 +70,7 @@ if __name__ == '__main__':
if resolved:
num_resolved += 1
# Error
error = _d.get('error', None)
if error is not None and isinstance(error, str):
@@ -70,7 +103,17 @@ if __name__ == '__main__':
print(
f'# of loop: {num_agent_stuck_in_loop} / {num_lines} ({num_agent_stuck_in_loop / num_lines * 100:.2f}%)'
)
assert len(num_turns) == num_lines
assert len(main_agent_cost) == num_lines
assert len(editor_cost) == num_lines
print(f'Avg. num of turns per instance: {sum(num_turns) / num_lines:.2f}')
print(f'Avg. agent cost per instance: {sum(main_agent_cost) / num_lines:.2f} USD')
print(f'Avg. editor cost per instance: {sum(editor_cost) / num_lines:.2f} USD')
print(
f'Avg. total cost per instance: {(sum(main_agent_cost) + sum(editor_cost)) / num_lines:.2f} USD'
)
print('-' * 100)
print('Detailed error breakdown:')
for error, count in error_counter.items():
print(f'{error}: {count} ({count / num_lines * 100:.2f}%)')
print('-' * 100)

View File

@@ -25,8 +25,8 @@ if [ -z "$AGENT" ]; then
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 30"
MAX_ITER=30
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
fi
if [ -z "$USE_INSTANCE_IMAGE" ]; then

View File

@@ -25,7 +25,8 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.runtime import Runtime
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@@ -103,6 +104,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime)
# Here's how you can run the agent (similar to the `main` function) and get the final task state

View File

@@ -61,7 +61,6 @@ class EvalOutput(BaseModel):
history: (
list[dict[str, Any]] | list[tuple[dict[str, Any], dict[str, Any]]] | None
) = None
llm_completions: list[dict[str, Any]] | None = None
metrics: dict[str, Any] | None = None
error: str | None = None

View File

@@ -30,11 +30,12 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.runtime.runtime import Runtime
from openhands.utils.async_utils import call_async_from_sync
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
@@ -143,6 +144,7 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(

View File

@@ -1,6 +1,2 @@
VITE_BACKEND_BASE_URL="localhost:3000" # Backend URL without protocol (e.g. localhost:3000)
VITE_MOCK_API="false" # true or false
# GitHub OAuth
VITE_GITHUB_CLIENT_ID=""
VITE_APP_MODE="oss" # "oss" or "saas"

View File

@@ -0,0 +1,73 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { describe, it, expect, test } from "vitest";
import { ChatMessage } from "#/components/chat-message";
describe("ChatMessage", () => {
it("should render a user message", () => {
render(<ChatMessage type="user" message="Hello, World!" />);
expect(screen.getByTestId("user-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it("should render an assistant message", () => {
render(<ChatMessage type="assistant" message="Hello, World!" />);
expect(screen.getByTestId("assistant-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it.skip("should support code syntax highlighting", () => {
const code = "```js\nconsole.log('Hello, World!')\n```";
render(<ChatMessage type="user" message={code} />);
// SyntaxHighlighter breaks the code blocks into "tokens"
expect(screen.getByText("console")).toBeInTheDocument();
expect(screen.getByText("log")).toBeInTheDocument();
expect(screen.getByText("'Hello, World!'")).toBeInTheDocument();
});
it.todo("should support markdown content");
it("should render the copy to clipboard button when the user hovers over the message", async () => {
const user = userEvent.setup();
render(<ChatMessage type="user" message="Hello, World!" />);
const message = screen.getByText("Hello, World!");
expect(screen.getByTestId("copy-to-clipboard")).not.toBeVisible();
await user.hover(message);
expect(screen.getByTestId("copy-to-clipboard")).toBeVisible();
});
it("should copy content to clipboard", async () => {
const user = userEvent.setup();
render(<ChatMessage type="user" message="Hello, World!" />);
const copyToClipboardButton = screen.getByTestId("copy-to-clipboard");
await user.click(copyToClipboardButton);
expect(navigator.clipboard.readText()).resolves.toBe("Hello, World!");
});
// BUG: vi.useFakeTimers() seems to break the tests
it.todo(
"should display a checkmark for 200ms and disable the button after copying content to clipboard",
);
it("should display an error toast if copying content to clipboard fails", async () => {});
test.todo("push a toast after successfully copying content to clipboard");
it("should render a component passed as a prop", () => {
function Component() {
return <div data-testid="custom-component">Custom Component</div>;
}
render(
<ChatMessage type="user" message="Hello, World">
<Component />
</ChatMessage>,
);
expect(screen.getByTestId("custom-component")).toBeInTheDocument();
});
});

View File

@@ -1,28 +0,0 @@
import { screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";
import { renderWithProviders } from "test-utils";
import Chat from "#/components/chat/Chat";
const MESSAGES: Message[] = [
{
sender: "assistant",
content: "Hello!",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "user",
content: "Hi!",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
describe("Chat", () => {
it("should render chat messages", () => {
renderWithProviders(<Chat messages={MESSAGES} />);
const messages = screen.getAllByTestId("article");
expect(messages).toHaveLength(MESSAGES.length);
});
});

View File

@@ -1,119 +0,0 @@
import userEvent from "@testing-library/user-event";
import { render, screen } from "@testing-library/react";
import { describe, afterEach, vi, it, expect } from "vitest";
import ChatInput from "#/components/chat/ChatInput";
describe.skip("ChatInput", () => {
afterEach(() => {
vi.clearAllMocks();
});
const onSendMessage = vi.fn();
it("should render a textarea", () => {
render(<ChatInput onSendMessage={onSendMessage} />);
expect(screen.getByRole("textbox")).toBeInTheDocument();
});
it("should be able to be set as disabled", async () => {
const user = userEvent.setup();
render(<ChatInput disabled onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
expect(textarea).not.toBeDisabled(); // user can still type
expect(button).toBeDisabled(); // user cannot submit
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSendMessage).not.toHaveBeenCalled();
});
it("should render with a placeholder", () => {
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByPlaceholderText(
/CHAT_INTERFACE\$INPUT_PLACEHOLDER/i,
);
expect(textarea).toBeInTheDocument();
});
it("should render a send button", () => {
render(<ChatInput onSendMessage={onSendMessage} />);
expect(screen.getByRole("button")).toBeInTheDocument();
});
it("should call sendChatMessage with the input when the send button is clicked", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
await user.click(button);
expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
// Additionally, check if it was called exactly once
expect(onSendMessage).toHaveBeenCalledTimes(1);
});
it("should be able to send a message when the enter key is pressed", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
});
it("should NOT send a message when shift + enter is pressed", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Shift>} {Enter}"); // Shift + Enter
expect(onSendMessage).not.toHaveBeenCalled();
});
it("should NOT send an empty message", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, " ");
// with enter key
await user.keyboard("{Enter}");
expect(onSendMessage).not.toHaveBeenCalled();
// with button click
await user.click(button);
expect(onSendMessage).not.toHaveBeenCalled();
});
it("should clear the input message after sending a message", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
expect(textarea).toHaveValue("Hello, world!");
await user.click(button);
expect(textarea).toHaveValue("");
});
// this is already implemented but need to figure out how to test it
it.todo(
"should NOT send a message when the enter key is pressed while composing",
);
});

View File

@@ -1,148 +0,0 @@
import { screen, act } from "@testing-library/react";
import { describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import { createMemoryRouter, RouterProvider } from "react-router-dom";
import { addAssistantMessage } from "#/state/chatSlice";
import AgentState from "#/types/AgentState";
import ChatInterface from "#/components/chat/ChatInterface";
const router = createMemoryRouter([
{
path: "/",
element: <ChatInterface />,
},
]);
/// <reference types="vitest" />
interface CustomMatchers<R = unknown> {
toMatchMessageEvent(expected: string): R;
}
declare module "vitest" {
interface Assertion<T> extends CustomMatchers<T> {}
// @ts-expect-error - recursively references itself
interface AsymmetricMatchersContaining extends CustomMatchers {}
}
// This is for the scrollview ref in Chat.tsx
// TODO: Move this into test setup
HTMLElement.prototype.scrollTo = vi.fn().mockImplementation(() => {});
const TEST_TIMESTAMP = new Date().toISOString();
describe.skip("ChatInterface", () => {
// TODO: replace below with e.g. fake timers
// https://vitest.dev/guide/mocking#timers
// https://vitest.dev/api/vi.html#vi-usefaketimers
// Custom matcher for testing message events
expect.extend({
toMatchMessageEvent(received, expected) {
const receivedObj = JSON.parse(received);
const expectedObj = JSON.parse(expected);
// Compare everything except the timestamp
const { timestamp: receivedTimestamp, ...receivedRest } =
receivedObj.args;
const { timestamp: expectedTimestamp, ...expectedRest } =
expectedObj.args;
const pass =
this.equals(receivedRest, expectedRest) &&
typeof receivedTimestamp === "string";
return {
pass,
message: () =>
pass
? `expected ${received} not to match the structure of ${expected} (ignoring exact timestamp)`
: `expected ${received} to match the structure of ${expected} (ignoring exact timestamp)`,
};
},
});
it("should render empty message list and input", () => {
renderWithProviders(<ChatInterface />);
expect(screen.queryAllByTestId("article")).toHaveLength(0);
});
it("should render user and assistant messages", () => {
const { store } = renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
chat: {
messages: [
{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: TEST_TIMESTAMP,
},
],
},
},
});
expect(screen.getAllByTestId("article")).toHaveLength(1);
expect(screen.getByText("Hello")).toBeInTheDocument();
act(() => {
// simulate assistant response
store.dispatch(addAssistantMessage("Hello to you!"));
});
expect(screen.getAllByTestId("article")).toHaveLength(2);
expect(screen.getByText("Hello to you!")).toBeInTheDocument();
});
it("should send the user message as an event to the Session when the agent state is INIT", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
agent: {
curAgentState: AgentState.INIT,
},
},
});
const input = screen.getByRole("textbox");
await user.type(input, "my message");
await user.keyboard("{Enter}");
});
it("should send the user message as an event to the Session when the agent state is AWAITING_USER_INPUT", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
agent: {
curAgentState: AgentState.AWAITING_USER_INPUT,
},
},
});
const input = screen.getByRole("textbox");
await user.type(input, "my message");
await user.keyboard("{Enter}");
});
it("should disable the user input if agent is not initialized", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
agent: {
curAgentState: AgentState.LOADING,
},
},
});
const input = screen.getByRole("textbox");
await user.type(input, "my message");
await user.keyboard("{Enter}");
const submitButton = screen.getByLabelText(
"CHAT_INTERFACE$TOOLTIP_SEND_MESSAGE",
);
expect(submitButton).toBeDisabled();
});
it.todo("test scroll-related behaviour");
});

View File

@@ -1,200 +0,0 @@
import { fireEvent, render, screen, within } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import toast from "#/utils/toast";
import ChatMessage from "#/components/chat/ChatMessage";
describe("Message", () => {
it("should render a user message", () => {
render(
<ChatMessage
message={{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
expect(screen.getByTestId("article")).toBeInTheDocument();
expect(screen.getByTestId("article")).toHaveClass("self-end"); // user message should be on the right side
});
it("should render an assistant message", () => {
render(
<ChatMessage
message={{
sender: "assistant",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
expect(screen.getByTestId("article")).toBeInTheDocument();
expect(screen.getByTestId("article")).not.toHaveClass("self-end"); // assistant message should be on the left side
});
it("should render markdown content", () => {
render(
<ChatMessage
message={{
sender: "user",
content: "```js\nconsole.log('Hello')\n```",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
// SyntaxHighlighter breaks the code blocks into "tokens"
expect(screen.getByText("console")).toBeInTheDocument();
expect(screen.getByText("log")).toBeInTheDocument();
expect(screen.getByText("'Hello'")).toBeInTheDocument();
});
describe("copy to clipboard", () => {
const toastInfoSpy = vi.spyOn(toast, "info");
const toastErrorSpy = vi.spyOn(toast, "error");
it("should copy any message to clipboard", async () => {
const user = userEvent.setup();
render(
<ChatMessage
message={{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
const message = screen.getByTestId("article");
let copyButton = within(message).queryByTestId("copy-button");
expect(copyButton).not.toBeInTheDocument();
// I am using `fireEvent` here because `userEvent.hover()` seems to interfere with the
// `userEvent.click()` call later on
fireEvent.mouseEnter(message);
copyButton = within(message).getByTestId("copy-button");
await user.click(copyButton);
expect(navigator.clipboard.readText()).resolves.toBe("Hello");
expect(toastInfoSpy).toHaveBeenCalled();
});
it("should show an error message when the message cannot be copied", async () => {
const user = userEvent.setup();
render(
<ChatMessage
message={{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
const message = screen.getByTestId("article");
fireEvent.mouseEnter(message);
const copyButton = within(message).getByTestId("copy-button");
const clipboardSpy = vi
.spyOn(navigator.clipboard, "writeText")
.mockRejectedValue(new Error("Failed to copy"));
await user.click(copyButton);
expect(clipboardSpy).toHaveBeenCalled();
expect(toastErrorSpy).toHaveBeenCalled();
});
});
describe("confirmation buttons", () => {
const expectButtonsNotToBeRendered = () => {
expect(
screen.queryByTestId("action-confirm-button"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("action-reject-button"),
).not.toBeInTheDocument();
};
it.skip("should display confirmation buttons for the last assistant message", () => {
// it should not render buttons if the message is not the last one
const { rerender } = render(
<ChatMessage
message={{
sender: "assistant",
content: "Are you sure?",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
awaitingUserConfirmation
/>,
);
expectButtonsNotToBeRendered();
// it should not render buttons if the message is not from the assistant
rerender(
<ChatMessage
message={{
sender: "user",
content: "Yes",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage
awaitingUserConfirmation
/>,
);
expectButtonsNotToBeRendered();
// it should not render buttons if the message is not awaiting user confirmation
rerender(
<ChatMessage
message={{
sender: "assistant",
content: "Are you sure?",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage
awaitingUserConfirmation={false}
/>,
);
expectButtonsNotToBeRendered();
// it should render buttons if all conditions are met
rerender(
<ChatMessage
message={{
sender: "assistant",
content: "Are you sure?",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage
awaitingUserConfirmation
/>,
);
const confirmButton = screen.getByTestId("action-confirm-button");
const rejectButton = screen.getByTestId("action-reject-button");
expect(confirmButton).toBeInTheDocument();
expect(rejectButton).toBeInTheDocument();
});
});
});

View File

@@ -0,0 +1,161 @@
import userEvent from "@testing-library/user-event";
import { render, screen } from "@testing-library/react";
import { describe, afterEach, vi, it, expect } from "vitest";
import { ChatInput } from "#/components/chat-input";
describe("ChatInput", () => {
const onSubmitMock = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render a textarea", () => {
render(<ChatInput onSubmit={onSubmitMock} />);
expect(screen.getByTestId("chat-input")).toBeInTheDocument();
expect(screen.getByRole("textbox")).toBeInTheDocument();
});
it("should call onSubmit when the user types and presses enter", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!");
});
it("should call onSubmit when pressing the submit button", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
await user.click(button);
expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!");
});
it("should not call onSubmit when the message is empty", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const button = screen.getByRole("button");
await user.click(button);
expect(onSubmitMock).not.toHaveBeenCalled();
await user.keyboard("{Enter}");
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should disable submit", async () => {
const user = userEvent.setup();
render(<ChatInput disabled onSubmit={onSubmitMock} />);
const button = screen.getByRole("button");
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
expect(button).toBeDisabled();
await user.click(button);
expect(onSubmitMock).not.toHaveBeenCalled();
await user.keyboard("{Enter}");
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should render a placeholder", () => {
render(
<ChatInput placeholder="Enter your message" onSubmit={onSubmitMock} />,
);
const textarea = screen.getByPlaceholderText("Enter your message");
expect(textarea).toBeInTheDocument();
});
it("should create a newline instead of submitting when shift + enter is pressed", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Shift>} {Enter}"); // Shift + Enter
expect(onSubmitMock).not.toHaveBeenCalled();
// expect(textarea).toHaveValue("Hello, world!\n");
});
it("should clear the input message after sending a message", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(textarea).toHaveValue("");
await user.type(textarea, "Hello, world!");
await user.click(button);
expect(textarea).toHaveValue("");
});
it("should hide the submit button", () => {
render(<ChatInput onSubmit={onSubmitMock} showButton={false} />);
expect(screen.queryByRole("button")).not.toBeInTheDocument();
});
it("should call onChange when the user types", async () => {
const user = userEvent.setup();
const onChangeMock = vi.fn();
render(<ChatInput onSubmit={onSubmitMock} onChange={onChangeMock} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
expect(onChangeMock).toHaveBeenCalledTimes("Hello, world!".length);
});
it("should have set the passed value", () => {
render(<ChatInput value="Hello, world!" onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
expect(textarea).toHaveValue("Hello, world!");
});
it("should display the stop button and trigger the callback", async () => {
const user = userEvent.setup();
const onStopMock = vi.fn();
render(
<ChatInput onSubmit={onSubmitMock} button="stop" onStop={onStopMock} />,
);
const stopButton = screen.getByTestId("stop-button");
await user.click(stopButton);
expect(onStopMock).toHaveBeenCalledOnce();
});
it("should call onFocus and onBlur when the textarea is focused and blurred", async () => {
const user = userEvent.setup();
const onFocusMock = vi.fn();
const onBlurMock = vi.fn();
render(
<ChatInput
onSubmit={onSubmitMock}
onFocus={onFocusMock}
onBlur={onBlurMock}
/>,
);
const textarea = screen.getByRole("textbox");
await user.click(textarea);
expect(onFocusMock).toHaveBeenCalledOnce();
await user.tab();
expect(onBlurMock).toHaveBeenCalledOnce();
});
});

View File

@@ -0,0 +1,185 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { render, screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { ChatInterface } from "#/components/chat-interface";
import { SocketProvider } from "#/context/socket";
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const renderChatInterface = (messages: (Message | ErrorMessage)[]) =>
render(<ChatInterface />, { wrapper: SocketProvider });
describe.skip("ChatInterface", () => {
afterEach(() => {
vi.clearAllMocks();
});
it.todo("should render suggestions if empty");
it("should render messages", () => {
const messages: Message[] = [
{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "assistant",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
renderChatInterface(messages);
expect(screen.getAllByTestId(/-message/)).toHaveLength(2);
});
it("should render a chat input", () => {
const messages: Message[] = [];
renderChatInterface(messages);
expect(screen.getByTestId("chat-input")).toBeInTheDocument();
});
it.todo("should call socket send when submitting a message", async () => {
const user = userEvent.setup();
const messages: Message[] = [];
renderChatInterface(messages);
const input = screen.getByTestId("chat-input");
await user.type(input, "Hello");
await user.keyboard("{Enter}");
// spy on send and expect to have been called
});
it("should render an image carousel with a message", () => {
let messages: Message[] = [
{
sender: "assistant",
content: "Here are some images",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
const { rerender } = renderChatInterface(messages);
expect(screen.queryByTestId("image-carousel")).not.toBeInTheDocument();
messages = [
{
sender: "assistant",
content: "Here are some images",
imageUrls: ["image1", "image2"],
timestamp: new Date().toISOString(),
},
];
rerender(<ChatInterface />);
const imageCarousel = screen.getByTestId("image-carousel");
expect(imageCarousel).toBeInTheDocument();
expect(within(imageCarousel).getAllByTestId("image-preview")).toHaveLength(
2,
);
});
it.todo("should render confirmation buttons");
it("should render a 'continue' action when there are more than 2 messages and awaiting user input", () => {
const messages: Message[] = [
{
sender: "assistant",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "user",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
const { rerender } = renderChatInterface(messages);
expect(
screen.queryByTestId("continue-action-button"),
).not.toBeInTheDocument();
messages.push({
sender: "assistant",
content: "How can I help you?",
imageUrls: [],
timestamp: new Date().toISOString(),
});
rerender(<ChatInterface />);
expect(screen.getByTestId("continue-action-button")).toBeInTheDocument();
});
it("should render inline errors", () => {
const messages: (Message | ErrorMessage)[] = [
{
sender: "assistant",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
error: "Woops!",
message: "Something went wrong",
},
];
renderChatInterface(messages);
const error = screen.getByTestId("error-message");
expect(within(error).getByText("Woops!")).toBeInTheDocument();
expect(within(error).getByText("Something went wrong")).toBeInTheDocument();
});
it("should render feedback actions if there are more than 3 messages", () => {
const messages: Message[] = [
{
sender: "assistant",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "user",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "assistant",
content: "How can I help you?",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
const { rerender } = renderChatInterface(messages);
expect(screen.queryByTestId("feedback-actions")).not.toBeInTheDocument();
messages.push({
sender: "user",
content: "I need help",
imageUrls: [],
timestamp: new Date().toISOString(),
});
rerender(<ChatInterface />);
expect(screen.getByTestId("feedback-actions")).toBeInTheDocument();
});
describe("feedback", () => {
it.todo("should open the feedback modal when a feedback action is clicked");
it.todo(
"should submit feedback and hide the actions when feedback is shared",
);
it.todo("should render the actions once more after new messages are added");
});
});

View File

@@ -0,0 +1,99 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, test, vi } from "vitest";
import { AccountSettingsContextMenu } from "#/components/context-menu/account-settings-context-menu";
describe("AccountSettingsContextMenu", () => {
const user = userEvent.setup();
const onClickAccountSettingsMock = vi.fn();
const onLogoutMock = vi.fn();
const onCloseMock = vi.fn();
afterEach(() => {
onClickAccountSettingsMock.mockClear();
onLogoutMock.mockClear();
onCloseMock.mockClear();
});
it("should always render the right options", () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
expect(
screen.getByTestId("account-settings-context-menu"),
).toBeInTheDocument();
expect(screen.getByText("Account Settings")).toBeInTheDocument();
expect(screen.getByText("Logout")).toBeInTheDocument();
});
it("should call onClickAccountSettings when the account settings option is clicked", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
const accountSettingsOption = screen.getByText("Account Settings");
await user.click(accountSettingsOption);
expect(onClickAccountSettingsMock).toHaveBeenCalledOnce();
});
it("should call onLogout when the logout option is clicked", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).toHaveBeenCalledOnce();
});
test("onLogout should be disabled if the user is not logged in", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn={false}
/>,
);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
});
it("should call onClose when clicking outside of the element", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
const accountSettingsButton = screen.getByText("Account Settings");
await user.click(accountSettingsButton);
await user.click(document.body);
expect(onCloseMock).toHaveBeenCalledOnce();
});
});

View File

@@ -0,0 +1,41 @@
import { describe, it, expect, vi } from "vitest";
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { ContextMenuListItem } from "#/components/context-menu/context-menu-list-item";
describe("ContextMenuListItem", () => {
it("should render the component with the children", () => {
render(<ContextMenuListItem onClick={vi.fn}>Test</ContextMenuListItem>);
expect(screen.getByTestId("context-menu-list-item")).toBeInTheDocument();
expect(screen.getByText("Test")).toBeInTheDocument();
});
it("should call the onClick callback when clicked", async () => {
const user = userEvent.setup();
const onClickMock = vi.fn();
render(
<ContextMenuListItem onClick={onClickMock}>Test</ContextMenuListItem>,
);
const element = screen.getByTestId("context-menu-list-item");
await user.click(element);
expect(onClickMock).toHaveBeenCalledOnce();
});
it("should not call the onClick callback when clicked and the button is disabled", async () => {
const user = userEvent.setup();
const onClickMock = vi.fn();
render(
<ContextMenuListItem onClick={onClickMock} isDisabled>
Test
</ContextMenuListItem>,
);
const element = screen.getByTestId("context-menu-list-item");
await user.click(element);
expect(onClickMock).not.toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,55 @@
import { render, screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { FeedbackActions } from "#/components/feedback-actions";
describe("FeedbackActions", () => {
const user = userEvent.setup();
const onPositiveFeedback = vi.fn();
const onNegativeFeedback = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render correctly", () => {
render(
<FeedbackActions
onPositiveFeedback={onPositiveFeedback}
onNegativeFeedback={onNegativeFeedback}
/>,
);
const actions = screen.getByTestId("feedback-actions");
within(actions).getByTestId("positive-feedback");
within(actions).getByTestId("negative-feedback");
});
it("should call onPositiveFeedback when positive feedback is clicked", async () => {
render(
<FeedbackActions
onPositiveFeedback={onPositiveFeedback}
onNegativeFeedback={onNegativeFeedback}
/>,
);
const positiveFeedback = screen.getByTestId("positive-feedback");
await user.click(positiveFeedback);
expect(onPositiveFeedback).toHaveBeenCalled();
});
it("should call onNegativeFeedback when negative feedback is clicked", async () => {
render(
<FeedbackActions
onPositiveFeedback={onPositiveFeedback}
onNegativeFeedback={onNegativeFeedback}
/>,
);
const negativeFeedback = screen.getByTestId("negative-feedback");
await user.click(negativeFeedback);
expect(onNegativeFeedback).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,108 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { FeedbackForm } from "#/components/feedback-form";
describe("FeedbackForm", () => {
const user = userEvent.setup();
const onSubmitMock = vi.fn();
const onCloseMock = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render correctly", () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
screen.getByLabelText("Email");
screen.getByLabelText("Private");
screen.getByLabelText("Public");
screen.getByRole("button", { name: "Submit" });
screen.getByRole("button", { name: "Cancel" });
});
it("should switch between private and public permissions", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const privateRadio = screen.getByLabelText("Private");
const publicRadio = screen.getByLabelText("Public");
expect(privateRadio).toBeChecked(); // private is the default value
expect(publicRadio).not.toBeChecked();
await user.click(publicRadio);
expect(publicRadio).toBeChecked();
expect(privateRadio).not.toBeChecked();
await user.click(privateRadio);
expect(privateRadio).toBeChecked();
expect(publicRadio).not.toBeChecked();
});
it("should call onSubmit when the form is submitted", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const email = screen.getByLabelText("Email");
await user.type(email, "test@test.test");
await user.click(screen.getByRole("button", { name: "Submit" }));
expect(onSubmitMock).toHaveBeenCalledWith("private", "test@test.test"); // private is the default value
});
it("should not call onSubmit when the email is invalid", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const email = screen.getByLabelText("Email");
const submitButton = screen.getByRole("button", { name: "Submit" });
await user.click(submitButton);
expect(onSubmitMock).not.toHaveBeenCalled();
await user.type(email, "test");
await user.click(submitButton);
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should submit public permissions when the public radio is checked", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const email = screen.getByLabelText("Email");
const publicRadio = screen.getByLabelText("Public");
await user.type(email, "test@test.test");
await user.click(publicRadio);
await user.click(screen.getByRole("button", { name: "Submit" }));
expect(onSubmitMock).toHaveBeenCalledWith("public", "test@test.test");
});
it("should call onClose when the close button is clicked", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
await user.click(screen.getByRole("button", { name: "Cancel" }));
expect(onSubmitMock).not.toHaveBeenCalled();
expect(onCloseMock).toHaveBeenCalled();
});
it("should disable the buttons if isSubmitting is true", () => {
const { rerender } = render(
<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />,
);
const submitButton = screen.getByRole("button", { name: "Submit" });
const cancelButton = screen.getByRole("button", { name: "Cancel" });
expect(submitButton).not.toBeDisabled();
expect(cancelButton).not.toBeDisabled();
rerender(
<FeedbackForm
onSubmit={onSubmitMock}
onClose={onCloseMock}
isSubmitting
/>,
);
expect(submitButton).toBeDisabled();
expect(cancelButton).toBeDisabled();
});
});

View File

@@ -16,7 +16,7 @@ vi.mock("../../services/fileService", async () => ({
}));
const renderFileExplorerWithRunningAgentState = () =>
renderWithProviders(<FileExplorer />, {
renderWithProviders(<FileExplorer error={null} />, {
preloadedState: {
agent: {
curAgentState: AgentState.RUNNING,

View File

@@ -0,0 +1,37 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { describe, expect, it, vi } from "vitest";
import { ImagePreview } from "#/components/image-preview";
describe("ImagePreview", () => {
it("should render an image", () => {
render(
<ImagePreview src="https://example.com/image.jpg" onRemove={vi.fn} />,
);
const img = screen.getByRole("img");
expect(screen.getByTestId("image-preview")).toBeInTheDocument();
expect(img).toHaveAttribute("src", "https://example.com/image.jpg");
});
it("should call onRemove when the close button is clicked", async () => {
const user = userEvent.setup();
const onRemoveMock = vi.fn();
render(
<ImagePreview
src="https://example.com/image.jpg"
onRemove={onRemoveMock}
/>,
);
const closeButton = screen.getByRole("button");
await user.click(closeButton);
expect(onRemoveMock).toHaveBeenCalledOnce();
});
it("shoud not display the close button when onRemove is not provided", () => {
render(<ImagePreview src="https://example.com/image.jpg" />);
expect(screen.queryByRole("button")).not.toBeInTheDocument();
});
});

View File

@@ -0,0 +1,119 @@
import { render, screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import { InteractiveChatBox } from "#/components/interactive-chat-box";
describe("InteractiveChatBox", () => {
const onSubmitMock = vi.fn();
const onStopMock = vi.fn();
beforeAll(() => {
global.URL.createObjectURL = vi
.fn()
.mockReturnValue("blob:http://example.com");
});
afterEach(() => {
vi.clearAllMocks();
});
it("should render", () => {
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const chatBox = screen.getByTestId("interactive-chat-box");
within(chatBox).getByTestId("chat-input");
within(chatBox).getByTestId("upload-image-input");
});
it("should display the image previews when images are uploaded", async () => {
const user = userEvent.setup();
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
const input = screen.getByTestId("upload-image-input");
expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
await user.upload(input, file);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(1);
const files = [
new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }),
new File(["(⌐□_□)"], "chucknorris3.png", { type: "image/png" }),
];
await user.upload(input, files);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(3);
});
it("should remove the image preview when the close button is clicked", async () => {
const user = userEvent.setup();
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
const input = screen.getByTestId("upload-image-input");
await user.upload(input, file);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(1);
const imagePreview = screen.getByTestId("image-preview");
const closeButton = within(imagePreview).getByRole("button");
await user.click(closeButton);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
});
it("should call onSubmit with the message and images", async () => {
const user = userEvent.setup();
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const textarea = within(screen.getByTestId("chat-input")).getByRole(
"textbox",
);
const input = screen.getByTestId("upload-image-input");
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
await user.upload(input, file);
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!", [file]);
// clear images after submission
expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
});
it("should disable the submit button", async () => {
const user = userEvent.setup();
render(
<InteractiveChatBox
isDisabled
onSubmit={onSubmitMock}
onStop={onStopMock}
/>,
);
const button = screen.getByRole("button");
expect(button).toBeDisabled();
await user.click(button);
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should display the stop button if set and call onStop when clicked", async () => {
const user = userEvent.setup();
render(
<InteractiveChatBox
mode="stop"
onSubmit={onSubmitMock}
onStop={onStopMock}
/>,
);
const stopButton = screen.getByTestId("stop-button");
expect(stopButton).toBeInTheDocument();
await user.click(stopButton);
expect(onStopMock).toHaveBeenCalledOnce();
});
});

View File

@@ -1,193 +0,0 @@
import { render, screen, within } from "@testing-library/react";
import { Mock, afterEach, describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import toast from "react-hot-toast";
import FeedbackModal from "#/components/modals/feedback/FeedbackModal";
import OpenHands from "#/api/open-hands";
describe.skip("FeedbackModal", () => {
Storage.prototype.setItem = vi.fn();
Storage.prototype.getItem = vi.fn();
vi.mock("#/services/feedbackService", () => ({
sendFeedback: vi.fn(),
}));
vi.mock("#/services/auth", () => ({
getToken: vi.fn().mockReturnValue("some-token"),
}));
// mock Session class
vi.mock("#/services/session", () => ({
default: {
_history: [
{ args: { LLM_API_KEY: "DANGER-key-should-not-be-here" } },
{ content: "Hello" },
],
},
}));
afterEach(() => {
vi.clearAllMocks();
});
it("should render the feedback model when open", () => {
const { rerender } = render(
<FeedbackModal
polarity="positive"
isOpen={false}
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
expect(screen.queryByTestId("feedback-modal")).not.toBeInTheDocument();
rerender(
<FeedbackModal
polarity="positive"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
expect(screen.getByTestId("feedback-modal")).toBeInTheDocument();
});
it("should display an error if the email is invalid when submitting", async () => {
const user = userEvent.setup();
render(
<FeedbackModal
polarity="positive"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
await user.click(submitButton);
expect(screen.getByTestId("invalid-email-message")).toBeInTheDocument();
expect(OpenHands.sendFeedback).not.toHaveBeenCalled();
});
it("should call sendFeedback with the correct data when the share button is clicked", async () => {
const user = userEvent.setup();
render(
<FeedbackModal
polarity="negative"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
const email = "example@example.com";
const emailInput = screen.getByTestId("email-input");
await user.type(emailInput, email);
// select public
const permissionsGroup = screen.getByTestId("permissions-group");
const publicOption = within(permissionsGroup).getByRole("radio", {
name: "FEEDBACK$PUBLIC_LABEL",
});
expect(publicOption).not.toBeChecked();
await user.click(publicOption);
expect(publicOption).toBeChecked();
await user.click(submitButton);
expect(
screen.queryByTestId("invalid-email-message"),
).not.toBeInTheDocument();
expect(OpenHands.sendFeedback).toHaveBeenCalledWith({
email,
permissions: "public",
feedback: "negative",
trajectory: [{ args: {} }, { content: "Hello" }], // api key should be removed
token: "some-token",
version: "1.0",
});
});
it("should store the users email in local state for later use", async () => {
const email = "example@example.com";
const user = userEvent.setup();
const { rerender } = render(
<FeedbackModal
polarity="negative"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
expect(localStorage.getItem).toHaveBeenCalledWith("feedback-email");
const emailInput = screen.getByTestId("email-input");
expect(emailInput).toHaveValue("");
await user.type(emailInput, email);
expect(emailInput).toHaveValue(email);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
await user.click(submitButton);
expect(localStorage.setItem).toHaveBeenCalledWith("feedback-email", email);
rerender(
<FeedbackModal
polarity="positive"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const emailInputAfterClose = screen.getByTestId("email-input");
expect(emailInputAfterClose).toHaveValue(email);
});
// TODO: figure out how to properly mock toast
it.skip("should display a success toast when the feedback is shared successfully", async () => {
(OpenHands.sendFeedback as Mock).mockResolvedValue({
statusCode: 200,
body: {
message: "Feedback shared",
feedback_id: "some-id",
password: "some-password",
},
});
const user = userEvent.setup();
render(
<FeedbackModal
polarity="negative"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
const email = "example@example.com";
const emailInput = screen.getByTestId("email-input");
await user.type(emailInput, email);
await user.click(submitButton);
expect(toast).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,9 @@
import { describe, it } from "vitest";
describe("AIConfigForm", () => {
it.todo("should render the AI config form");
it.todo("should toggle the advanced settings when clicked");
it.todo("should call the onSubmit callback when the form is submitted");
it.todo("should call the onReset callback when the reset button is clicked");
it.todo("should call the onClose callback when the close button is clicked");
});

View File

@@ -0,0 +1,9 @@
import { describe, it } from "vitest";
describe("DropdownInput", () => {
it.todo("should render the input");
it.todo("should render the placeholder");
it.todo("should render the dropdown when clicked");
it.todo("should select an option when clicked");
it.todo("should filter the options when typing");
});

View File

@@ -0,0 +1,12 @@
import { describe, it } from "vitest";
describe("ModelSelector", () => {
it.todo("should render the model selector");
it.todo("should display and select the providers");
it.todo("should display and select the models");
it.todo("should disable the models if a provider is not selected");
it.todo("should disable the inputs if isDisabled is true");
it.todo(
"should set the selected model and provider if the currentModel prop is set",
);
});

View File

@@ -0,0 +1,71 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { UploadImageInput } from "#/components/upload-image-input";
describe("UploadImageInput", () => {
const user = userEvent.setup();
const onUploadMock = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render an input", () => {
render(<UploadImageInput onUpload={onUploadMock} />);
expect(screen.getByTestId("upload-image-input")).toBeInTheDocument();
});
it("should call onUpload when a file is selected", async () => {
render(<UploadImageInput onUpload={onUploadMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
const input = screen.getByTestId("upload-image-input");
await user.upload(input, file);
expect(onUploadMock).toHaveBeenNthCalledWith(1, [file]);
});
it("should call onUpload when multiple files are selected", async () => {
render(<UploadImageInput onUpload={onUploadMock} />);
const files = [
new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }),
new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }),
];
const input = screen.getByTestId("upload-image-input");
await user.upload(input, files);
expect(onUploadMock).toHaveBeenNthCalledWith(1, files);
});
it("should not upload any file that is not an image", async () => {
render(<UploadImageInput onUpload={onUploadMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.txt", {
type: "text/plain",
});
const input = screen.getByTestId("upload-image-input");
await user.upload(input, file);
expect(onUploadMock).not.toHaveBeenCalled();
});
it("should render custom labels", () => {
const { rerender } = render(<UploadImageInput onUpload={onUploadMock} />);
expect(screen.getByTestId("default-label")).toBeInTheDocument();
function CustomLabel() {
return <span>Custom label</span>;
}
rerender(
<UploadImageInput onUpload={onUploadMock} label={<CustomLabel />} />,
);
expect(screen.getByText("Custom label")).toBeInTheDocument();
expect(screen.queryByTestId("default-label")).not.toBeInTheDocument();
});
});

View File

@@ -0,0 +1,132 @@
import { render, screen } from "@testing-library/react";
import { describe, expect, it, test, vi, afterEach } from "vitest";
import userEvent from "@testing-library/user-event";
import * as Remix from "@remix-run/react";
import { UserActions } from "#/components/user-actions";
describe("UserActions", () => {
const user = userEvent.setup();
const onClickAccountSettingsMock = vi.fn();
const onLogoutMock = vi.fn();
const useFetcherSpy = vi.spyOn(Remix, "useFetcher");
// @ts-expect-error - Only returning the relevant properties for the test
useFetcherSpy.mockReturnValue({ state: "idle" });
afterEach(() => {
onClickAccountSettingsMock.mockClear();
onLogoutMock.mockClear();
useFetcherSpy.mockClear();
});
it("should render", () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
expect(screen.getByTestId("user-actions")).toBeInTheDocument();
expect(screen.getByTestId("user-avatar")).toBeInTheDocument();
});
it("should toggle the user menu when the user avatar is clicked", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
expect(
screen.getByTestId("account-settings-context-menu"),
).toBeInTheDocument();
await user.click(userAvatar);
expect(
screen.queryByTestId("account-settings-context-menu"),
).not.toBeInTheDocument();
});
it("should call onClickAccountSettings and close the menu when the account settings option is clicked", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
const accountSettingsOption = screen.getByText("Account Settings");
await user.click(accountSettingsOption);
expect(onClickAccountSettingsMock).toHaveBeenCalledOnce();
expect(
screen.queryByTestId("account-settings-context-menu"),
).not.toBeInTheDocument();
});
it("should call onLogout and close the menu when the logout option is clicked", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
user={{ avatar_url: "https://example.com/avatar.png" }}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).toHaveBeenCalledOnce();
expect(
screen.queryByTestId("account-settings-context-menu"),
).not.toBeInTheDocument();
});
test("onLogout should not be called when the user is not logged in", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
});
it("should display the loading spinner", () => {
// @ts-expect-error - Only returning the relevant properties for the test
useFetcherSpy.mockReturnValue({ state: "loading" });
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
user={{ avatar_url: "https://example.com/avatar.png" }}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
user.click(userAvatar);
expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
expect(screen.queryByAltText("user avatar")).not.toBeInTheDocument();
});
});

View File

@@ -0,0 +1,68 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { UserAvatar } from "#/components/user-avatar";
describe("UserAvatar", () => {
const onClickMock = vi.fn();
afterEach(() => {
onClickMock.mockClear();
});
it("(default) should render the placeholder avatar when the user is logged out", () => {
render(<UserAvatar onClick={onClickMock} />);
expect(screen.getByTestId("user-avatar")).toBeInTheDocument();
expect(
screen.getByLabelText("user avatar placeholder"),
).toBeInTheDocument();
});
it("should call onClick when clicked", async () => {
const user = userEvent.setup();
render(<UserAvatar onClick={onClickMock} />);
const userAvatarContainer = screen.getByTestId("user-avatar");
await user.click(userAvatarContainer);
expect(onClickMock).toHaveBeenCalledOnce();
});
it("should display the user's avatar when available", () => {
render(
<UserAvatar
onClick={onClickMock}
avatarUrl="https://example.com/avatar.png"
/>,
);
expect(screen.getByAltText("user avatar")).toBeInTheDocument();
expect(
screen.queryByLabelText("user avatar placeholder"),
).not.toBeInTheDocument();
});
it("should display a loading spinner instead of an avatar when isLoading is true", () => {
const { rerender } = render(<UserAvatar onClick={onClickMock} />);
expect(screen.queryByTestId("loading-spinner")).not.toBeInTheDocument();
expect(
screen.getByLabelText("user avatar placeholder"),
).toBeInTheDocument();
rerender(<UserAvatar onClick={onClickMock} isLoading />);
expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
expect(
screen.queryByLabelText("user avatar placeholder"),
).not.toBeInTheDocument();
rerender(
<UserAvatar
onClick={onClickMock}
avatarUrl="https://example.com/avatar.png"
isLoading
/>,
);
expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
expect(screen.queryByAltText("user avatar")).not.toBeInTheDocument();
});
});

View File

@@ -0,0 +1,36 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { expect, test, vi } from "vitest";
import { useClickOutsideElement } from "#/hooks/useClickOutsideElement";
interface ClickOutsideTestComponentProps {
callback: () => void;
}
function ClickOutsideTestComponent({
callback,
}: ClickOutsideTestComponentProps) {
const ref = useClickOutsideElement<HTMLDivElement>(callback);
return (
<div>
<div data-testid="inside-element" ref={ref} />
<div data-testid="outside-element" />
</div>
);
}
test("call the callback when the element is clicked outside", async () => {
const user = userEvent.setup();
const callback = vi.fn();
render(<ClickOutsideTestComponent callback={callback} />);
const insideElement = screen.getByTestId("inside-element");
const outsideElement = screen.getByTestId("outside-element");
await user.click(insideElement);
expect(callback).not.toHaveBeenCalled();
await user.click(outsideElement);
expect(callback).toHaveBeenCalled();
});

View File

@@ -0,0 +1,35 @@
import { describe, it, test } from "vitest";
describe("frontend/routes/_oh", () => {
describe("brand logo", () => {
it.todo("should not do anything if the user is in the main screen");
it.todo(
"should be clickable and redirect to the main screen if the user is not in the main screen",
);
});
describe("user menu", () => {
it.todo("should open the user menu when clicked");
describe("logged out", () => {
it.todo("should display a placeholder");
test.todo("the logout option in the user menu should be disabled");
});
describe("logged in", () => {
it.todo("should display the user's avatar");
it.todo("should log the user out when the logout option is clicked");
});
});
describe("config", () => {
it.todo("should open the config modal when clicked");
it.todo(
"should not save the config and close the config modal when the close button is clicked",
);
it.todo(
"should save the config when the save button is clicked and close the modal",
);
it.todo("should warn the user about saving the config when in /app");
});
});

View File

@@ -1,43 +0,0 @@
import { createRemixStub } from "@remix-run/testing";
import { describe, expect, it } from "vitest";
import { screen, within } from "@testing-library/react";
import { renderWithProviders } from "test-utils";
import userEvent from "@testing-library/user-event";
import CodeEditor from "#/routes/app._index/route";
const RemixStub = createRemixStub([{ path: "/app", Component: CodeEditor }]);
describe.skip("CodeEditor", () => {
it("should render", async () => {
renderWithProviders(<RemixStub initialEntries={["/app"]} />);
await screen.findByTestId("file-explorer");
expect(screen.getByTestId("code-editor-empty-message")).toBeInTheDocument();
});
it("should retrieve the files", async () => {
renderWithProviders(<RemixStub initialEntries={["/app"]} />);
const explorer = await screen.findByTestId("file-explorer");
const files = within(explorer).getAllByTestId("tree-node");
// request mocked with msw
expect(files).toHaveLength(3);
});
it("should open a file", async () => {
const user = userEvent.setup();
renderWithProviders(<RemixStub initialEntries={["/app"]} />);
const explorer = await screen.findByTestId("file-explorer");
const files = within(explorer).getAllByTestId("tree-node");
await user.click(files[0]);
// check if the file is opened
expect(
screen.queryByTestId("code-editor-empty-message"),
).not.toBeInTheDocument();
const editor = await screen.findByTestId("code-editor");
expect(
within(editor).getByText(/content of file1.ts/i),
).toBeInTheDocument();
});
});

View File

@@ -1,56 +0,0 @@
import { createRemixStub } from "@remix-run/testing";
import { beforeAll, describe, expect, it, vi } from "vitest";
import { render, screen, waitFor } from "@testing-library/react";
import { ws } from "msw";
import { setupServer } from "msw/node";
import App from "#/routes/app";
import AgentState from "#/types/AgentState";
import { AgentStateChangeObservation } from "#/types/core/observations";
const RemixStub = createRemixStub([{ path: "/app", Component: App }]);
describe.skip("App", () => {
const agent = ws.link("ws://localhost:3001/ws");
const server = setupServer();
beforeAll(() => {
// mock `dom.scrollTo`
HTMLElement.prototype.scrollTo = vi.fn().mockImplementation(() => {});
});
it("should render", async () => {
render(<RemixStub initialEntries={["/app"]} />);
await waitFor(() => {
expect(screen.getByTestId("app")).toBeInTheDocument();
expect(
screen.getByText(/INITIALIZING_AGENT_LOADING_MESSAGE/i),
).toBeInTheDocument();
});
});
it("should establish a ws connection and send the init message", async () => {
server.use(
agent.addEventListener("connection", ({ client }) => {
client.send(
JSON.stringify({
id: 1,
cause: 0,
message: "AGENT_INIT_MESSAGE",
source: "agent",
timestamp: new Date().toISOString(),
observation: "agent_state_changed",
content: "AGENT_INIT_MESSAGE",
extras: { agent_state: AgentState.INIT },
} satisfies AgentStateChangeObservation),
);
}),
);
render(<RemixStub initialEntries={["/app"]} />);
await waitFor(() => {
expect(screen.getByText(/AGENT_INIT_MESSAGE/i)).toBeInTheDocument();
});
});
});

View File

@@ -1,50 +0,0 @@
import { createRemixStub } from "@remix-run/testing";
import { describe, expect, it } from "vitest";
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import Home from "#/routes/_index/route";
const renderRemixStub = (config?: { authenticated: boolean }) =>
createRemixStub([
{
path: "/",
Component: Home,
loader: () => ({
ghToken: config?.authenticated ? "ghp_123456" : null,
}),
},
]);
describe.skip("Home (_index)", () => {
it("should render", async () => {
const RemixStub = renderRemixStub();
render(<RemixStub />);
await screen.findByText(/let's start building/i);
});
it("should load the gh repos if a token is present", async () => {
const user = userEvent.setup();
const RemixStub = renderRemixStub({ authenticated: true });
render(<RemixStub />);
const repos = await screen.findByPlaceholderText(
/select a github project/i,
);
await user.click(repos);
// mocked responses from msw
screen.getByText(/octocat\/hello-world/i);
screen.getByText(/octocat\/earth/i);
});
it("should not load the gh repos if a token is not present", async () => {
const RemixStub = renderRemixStub();
render(<RemixStub />);
const repos = await screen.findByPlaceholderText(
/select a github project/i,
);
await userEvent.click(repos);
expect(screen.queryByText(/octocat\/hello-world/i)).not.toBeInTheDocument();
expect(screen.queryByText(/octocat\/earth/i)).not.toBeInTheDocument();
});
});

View File

@@ -1,40 +0,0 @@
import { describe, expect, it } from "vitest";
import { createRemixStub } from "@remix-run/testing";
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import App, { clientLoader } from "#/root";
const RemixStub = createRemixStub([
{
path: "/",
Component: App,
loader: clientLoader,
},
]);
describe.skip("Root", () => {
it("should render", async () => {
render(<RemixStub />);
await screen.findByTestId("link-to-main");
});
describe("Auth Modal", () => {
it("should display the auth modal on first time visit", async () => {
render(<RemixStub />);
await screen.findByTestId("auth-modal");
});
it("should close the auth modal on accepting the terms", async () => {
const user = userEvent.setup();
render(<RemixStub />);
await screen.findByTestId("auth-modal");
await user.click(screen.getByTestId("accept-terms"));
await user.click(screen.getByRole("button", { name: /continue/i }));
expect(screen.queryByTestId("auth-modal")).not.toBeInTheDocument();
expect(screen.getByTestId("link-to-main")).toBeInTheDocument();
});
it.todo("should not display the auth modal on subsequent visits");
});
});

4
frontend/global.d.ts vendored Normal file
View File

@@ -0,0 +1,4 @@
interface Window {
__APP_MODE__?: "saas" | "oss";
__GITHUB_CLIENT_ID__?: string | null;
}

View File

@@ -1,12 +1,12 @@
{
"name": "openhands-frontend",
"version": "0.9.8",
"version": "0.11.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "openhands-frontend",
"version": "0.9.8",
"version": "0.11.0",
"dependencies": {
"@monaco-editor/react": "^4.6.0",
"@nextui-org/react": "^2.4.8",
@@ -35,7 +35,8 @@
"react-markdown": "^9.0.1",
"react-redux": "^9.1.2",
"react-router-dom": "^6.26.1",
"react-syntax-highlighter": "^15.5.0",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.4",
"remark-gfm": "^4.0.0",
"sirv-cli": "^3.0.0",
"tailwind-merge": "^2.5.4",
@@ -47,10 +48,10 @@
"@remix-run/dev": "^2.11.2",
"@remix-run/testing": "^2.11.2",
"@tailwindcss/typography": "^0.5.15",
"@testing-library/jest-dom": "^6.5.0",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/react": "^16.0.1",
"@testing-library/user-event": "^14.5.2",
"@types/node": "^22.7.5",
"@types/node": "^22.7.6",
"@types/react": "^18.3.11",
"@types/react-dom": "^18.3.0",
"@types/react-highlight": "^0.12.8",
@@ -1601,9 +1602,9 @@
}
},
"node_modules/@jspm/core": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/@jspm/core/-/core-2.0.1.tgz",
"integrity": "sha512-Lg3PnLp0QXpxwLIAuuJboLeRaIhrgJjeuh797QADg3xz8wGLugQOS5DpsE8A6i6Adgzf+bacllkKZG3J0tGfDw==",
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@jspm/core/-/core-2.1.0.tgz",
"integrity": "sha512-3sRl+pkyFY/kLmHl0cgHiFp2xEqErA8N3ECjMs7serSUBmoJ70lBa0PG5t0IM6WJgdZNyyI0R8YFfi5wM8+mzg==",
"dev": true
},
"node_modules/@mdx-js/mdx": {
@@ -3560,15 +3561,15 @@
}
},
"node_modules/@react-aria/grid": {
"version": "3.10.4",
"resolved": "https://registry.npmjs.org/@react-aria/grid/-/grid-3.10.4.tgz",
"integrity": "sha512-3AjJ0hwRhOCIHThIZrGWrjAuKDpaZuBkODW3dvgLqtsNm3tL46DI6U9O3vfp8lNbrWMsXJgjRXwvXvdv0/gwCA==",
"version": "3.10.5",
"resolved": "https://registry.npmjs.org/@react-aria/grid/-/grid-3.10.5.tgz",
"integrity": "sha512-9sLa+rpLgRZk7VX+tvdSudn1tdVgolVzhDLGWd95yS4UtPVMihTMGBrRoByY57Wxvh1V+7Ptw8kc6tsRSotYKg==",
"dependencies": {
"@react-aria/focus": "^3.18.3",
"@react-aria/focus": "^3.18.4",
"@react-aria/i18n": "^3.12.3",
"@react-aria/interactions": "^3.22.3",
"@react-aria/interactions": "^3.22.4",
"@react-aria/live-announcer": "^3.4.0",
"@react-aria/selection": "^3.20.0",
"@react-aria/selection": "^3.20.1",
"@react-aria/utils": "^3.25.3",
"@react-stately/collections": "^3.11.0",
"@react-stately/grid": "^3.9.3",
@@ -3584,11 +3585,11 @@
}
},
"node_modules/@react-aria/grid/node_modules/@react-aria/focus": {
"version": "3.18.3",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.3.tgz",
"integrity": "sha512-WKUElg+5zS0D3xlVn8MntNnkzJql2J6MuzAMP8Sv5WTgFDse/XGR842dsxPTIyKKdrWVCRegCuwa4m3n/GzgJw==",
"version": "3.18.4",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.4.tgz",
"integrity": "sha512-91J35077w9UNaMK1cpMUEFRkNNz0uZjnSwiyBCFuRdaVuivO53wNC9XtWSDNDdcO5cGy87vfJRVAiyoCn/mjqA==",
"dependencies": {
"@react-aria/interactions": "^3.22.3",
"@react-aria/interactions": "^3.22.4",
"@react-aria/utils": "^3.25.3",
"@react-types/shared": "^3.25.0",
"@swc/helpers": "^0.5.0",
@@ -3617,9 +3618,9 @@
}
},
"node_modules/@react-aria/grid/node_modules/@react-aria/interactions": {
"version": "3.22.3",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.3.tgz",
"integrity": "sha512-RRUb/aG+P0IKTIWikY/SylB6bIbLZeztnZY2vbe7RAG5MgVaCgn5HQ45SI15GlTmhsFG8CnF6slJsUFJiNHpbQ==",
"version": "3.22.4",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.4.tgz",
"integrity": "sha512-E0vsgtpItmknq/MJELqYJwib+YN18Qag8nroqwjk1qOnBa9ROIkUhWJerLi1qs5diXq9LHKehZDXRlwPvdEFww==",
"dependencies": {
"@react-aria/ssr": "^3.9.6",
"@react-aria/utils": "^3.25.3",
@@ -3631,13 +3632,13 @@
}
},
"node_modules/@react-aria/grid/node_modules/@react-aria/selection": {
"version": "3.20.0",
"resolved": "https://registry.npmjs.org/@react-aria/selection/-/selection-3.20.0.tgz",
"integrity": "sha512-h3giMcXo4SMZRL5HrqZvOLNTsdh5jCXwLUx0wpj/2EF0tcYQL6WDfn1iJ+rHARkUIs7X70fUV8iwlbUySZy1xg==",
"version": "3.20.1",
"resolved": "https://registry.npmjs.org/@react-aria/selection/-/selection-3.20.1.tgz",
"integrity": "sha512-My0w8UC/7PAkz/1yZUjr2VRuzDZz1RrbgTqP36j5hsJx8RczDTjI4TmKtQNKG0ggaP4w83G2Og5JPTq3w3LMAw==",
"dependencies": {
"@react-aria/focus": "^3.18.3",
"@react-aria/focus": "^3.18.4",
"@react-aria/i18n": "^3.12.3",
"@react-aria/interactions": "^3.22.3",
"@react-aria/interactions": "^3.22.4",
"@react-aria/utils": "^3.25.3",
"@react-stately/selection": "^3.17.0",
"@react-types/shared": "^3.25.0",
@@ -4110,12 +4111,12 @@
}
},
"node_modules/@react-aria/toggle": {
"version": "3.10.8",
"resolved": "https://registry.npmjs.org/@react-aria/toggle/-/toggle-3.10.8.tgz",
"integrity": "sha512-N6WTgE8ByMYY+ZygUUPGON2vW5NrxwU91H98+Nozl+Rq6ZYR2fD9i8oRtLtrYPxjU2HmaFwDyQdWvmMJZuDxig==",
"version": "3.10.9",
"resolved": "https://registry.npmjs.org/@react-aria/toggle/-/toggle-3.10.9.tgz",
"integrity": "sha512-dtfnyIU2/kcH9rFAiB48diSmaXDv45K7UCuTkMQLjbQa3QHC1oYNbleVN/VdGyAMBsIWtfl8L4uuPrAQmDV/bg==",
"dependencies": {
"@react-aria/focus": "^3.18.3",
"@react-aria/interactions": "^3.22.3",
"@react-aria/focus": "^3.18.4",
"@react-aria/interactions": "^3.22.4",
"@react-aria/utils": "^3.25.3",
"@react-stately/toggle": "^3.7.8",
"@react-types/checkbox": "^3.8.4",
@@ -4127,11 +4128,11 @@
}
},
"node_modules/@react-aria/toggle/node_modules/@react-aria/focus": {
"version": "3.18.3",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.3.tgz",
"integrity": "sha512-WKUElg+5zS0D3xlVn8MntNnkzJql2J6MuzAMP8Sv5WTgFDse/XGR842dsxPTIyKKdrWVCRegCuwa4m3n/GzgJw==",
"version": "3.18.4",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.4.tgz",
"integrity": "sha512-91J35077w9UNaMK1cpMUEFRkNNz0uZjnSwiyBCFuRdaVuivO53wNC9XtWSDNDdcO5cGy87vfJRVAiyoCn/mjqA==",
"dependencies": {
"@react-aria/interactions": "^3.22.3",
"@react-aria/interactions": "^3.22.4",
"@react-aria/utils": "^3.25.3",
"@react-types/shared": "^3.25.0",
"@swc/helpers": "^0.5.0",
@@ -4142,9 +4143,9 @@
}
},
"node_modules/@react-aria/toggle/node_modules/@react-aria/interactions": {
"version": "3.22.3",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.3.tgz",
"integrity": "sha512-RRUb/aG+P0IKTIWikY/SylB6bIbLZeztnZY2vbe7RAG5MgVaCgn5HQ45SI15GlTmhsFG8CnF6slJsUFJiNHpbQ==",
"version": "3.22.4",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.4.tgz",
"integrity": "sha512-E0vsgtpItmknq/MJELqYJwib+YN18Qag8nroqwjk1qOnBa9ROIkUhWJerLi1qs5diXq9LHKehZDXRlwPvdEFww==",
"dependencies": {
"@react-aria/ssr": "^3.9.6",
"@react-aria/utils": "^3.25.3",
@@ -5814,9 +5815,9 @@
}
},
"node_modules/@testing-library/jest-dom": {
"version": "6.5.0",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.5.0.tgz",
"integrity": "sha512-xGGHpBXYSHUUr6XsKBfs85TWlYKpTc37cSBBVrXcib2MkHLboWlkClhWF37JKlDb9KEq3dHs+f2xR7XJEWGBxA==",
"version": "6.6.1",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.6.1.tgz",
"integrity": "sha512-mNYIiAuP4yJwV2zBRQCV7PHoQwbb6/8TfMpPcwSUzcSVDJHWOXt6hjNtIN1v5knDmimYnjJxKhsoVd4LVGIO+w==",
"dev": true,
"dependencies": {
"@adobe/css-tools": "^4.4.0",
@@ -6028,9 +6029,9 @@
}
},
"node_modules/@types/node": {
"version": "22.7.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.5.tgz",
"integrity": "sha512-jML7s2NAzMWc//QSJ1a3prpk78cOPchGvXJsC3C6R6PSMoooztvRVQEz89gmBTBY1SPMaqo5teB4uNHPdetShQ==",
"version": "22.7.6",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.6.tgz",
"integrity": "sha512-/d7Rnj0/ExXDMcioS78/kf1lMzYk4BZV8MZGTBKzTGZ6/406ukkbYlIsZmMPhcR5KlkunDHQLrtAVmSq7r+mSw==",
"devOptional": true,
"dependencies": {
"undici-types": "~6.19.2"
@@ -6619,9 +6620,9 @@
}
},
"node_modules/acorn": {
"version": "8.12.1",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
"integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==",
"version": "8.13.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.13.0.tgz",
"integrity": "sha512-8zSiw54Oxrdym50NlZ9sUusyO1Z1ZchgRLWRaK6c86XJFClyCgFKetdowBg5bKxyp/u+CDBJG4Mpp0m3HLZl9w==",
"dev": true,
"bin": {
"acorn": "bin/acorn"
@@ -7325,9 +7326,9 @@
}
},
"node_modules/caniuse-lite": {
"version": "1.0.30001668",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001668.tgz",
"integrity": "sha512-nWLrdxqCdblixUO+27JtGJJE/txpJlyUy5YN1u53wLZkP0emYCo5zgS6QYft7VUYR42LGgi/S5hdLZTrnyIddw==",
"version": "1.0.30001669",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001669.tgz",
"integrity": "sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==",
"funding": [
{
"type": "opencollective",
@@ -8396,9 +8397,9 @@
"integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="
},
"node_modules/electron-to-chromium": {
"version": "1.5.36",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.36.tgz",
"integrity": "sha512-HYTX8tKge/VNp6FGO+f/uVDmUkq+cEfcxYhKf15Akc4M5yxt5YmorwlAitKWjWhWQnKcDRBAQKXkhqqXMqcrjw=="
"version": "1.5.39",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.39.tgz",
"integrity": "sha512-4xkpSR6CjuiaNyvwiWDI85N9AxsvbPawB8xc7yzLPonYTuP19BVgYweKyUMFtHEZgIcHWMt1ks5Cqx2m+6/Grg=="
},
"node_modules/emoji-regex": {
"version": "9.2.2",
@@ -9840,9 +9841,9 @@
}
},
"node_modules/framer-motion": {
"version": "11.11.8",
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.11.8.tgz",
"integrity": "sha512-mnGQNEoz99GtFXBBPw+Ag5K4FcfP5XrXxrxHz+iE4Lmg7W3sf2gKmGuvfkZCW/yIfcdv5vJd6KiSPETH1Pw68Q==",
"version": "11.11.9",
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.11.9.tgz",
"integrity": "sha512-XpdZseuCrZehdHGuW22zZt3SF5g6AHJHJi7JwQIigOznW4Jg1n0oGPMJQheMaKLC+0rp5gxUKMRYI6ytd3q4RQ==",
"peer": true,
"dependencies": {
"tslib": "^2.4.0"
@@ -10321,9 +10322,9 @@
}
},
"node_modules/hast-util-to-jsx-runtime": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.1.tgz",
"integrity": "sha512-Rbemi1rzrkysSin0FDHZfsxYPoqLGHFfxFm28aOBHPibT7aqjy7kUgY636se9xbuCWUsFpWAYlmtGHQakiqtEA==",
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.2.tgz",
"integrity": "sha512-1ngXYb+V9UT5h+PxNRa1O1FYguZK/XL+gkeqvp7EdHlB9oHUG0eYRo/vY5inBdcqo3RkPMC58/H94HvkbfGdyg==",
"dependencies": {
"@types/estree": "^1.0.0",
"@types/hast": "^3.0.0",
@@ -11043,6 +11044,11 @@
"node": "*"
}
},
"node_modules/highlightjs-vue": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/highlightjs-vue/-/highlightjs-vue-1.0.0.tgz",
"integrity": "sha512-PDEfEF102G23vHmPhLyPboFCD+BkMGu+GuJe2d9/eH4FsCwvgBpnc9n0pGE+ffKdph38s6foEZiEjdgHdzp+IA=="
},
"node_modules/hosted-git-info": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-6.1.1.tgz",
@@ -20734,12 +20740,13 @@
}
},
"node_modules/react-syntax-highlighter": {
"version": "15.5.0",
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.5.0.tgz",
"integrity": "sha512-+zq2myprEnQmH5yw6Gqc8lD55QHnpKaU8TOcFeC/Lg/MQSs8UknEA0JC4nTZGFAXC2J2Hyj/ijJ7NlabyPi2gg==",
"version": "15.6.1",
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.6.1.tgz",
"integrity": "sha512-OqJ2/vL7lEeV5zTJyG7kmARppUjiB9h9udl4qHQjjgEos66z00Ia0OckwYfRxCSFrW8RJIBnsBwQsHZbVPspqg==",
"dependencies": {
"@babel/runtime": "^7.3.1",
"highlight.js": "^10.4.1",
"highlightjs-vue": "^1.0.0",
"lowlight": "^1.17.0",
"prismjs": "^1.27.0",
"refractor": "^3.6.0"
@@ -22713,13 +22720,17 @@
}
},
"node_modules/string.prototype.includes": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.0.tgz",
"integrity": "sha512-E34CkBgyeqNDcrbU76cDjL5JLcVrtSdYq0MEh/B10r17pRP4ciHLwTgnuLV8Ay6cgEMLkcBkFCKyFZ43YldYzg==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.1.tgz",
"integrity": "sha512-o7+c9bW6zpAdJHTtujeePODAhkuicdAryFsfVKwA+wGw89wJ4GTY484WTucM9hLtDEOpOvI+aHnzqnC5lHp4Rg==",
"dev": true,
"dependencies": {
"define-properties": "^1.1.3",
"es-abstract": "^1.17.5"
"call-bind": "^1.0.7",
"define-properties": "^1.2.1",
"es-abstract": "^1.23.3"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/string.prototype.matchall": {
@@ -23499,9 +23510,9 @@
}
},
"node_modules/tslib": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.7.0.tgz",
"integrity": "sha512-gLXCKdN1/j47AiHiOkJN69hJmcbGTHI0ImLmbYLHykhgeN0jVGola9yVjFgzCUklsZQMW55o+dW7IXv3RCXDzA=="
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.0.tgz",
"integrity": "sha512-jWVzBLplnCmoaTr13V9dYbiQ99wvZRd0vNWaDRg+aVYRcjDF3nDksxFDE/+fkXnKhpnUUkmx5pK/v8mCtLVqZA=="
},
"node_modules/turbo-stream": {
"version": "2.4.0",
@@ -23661,9 +23672,9 @@
}
},
"node_modules/undici": {
"version": "6.20.0",
"resolved": "https://registry.npmjs.org/undici/-/undici-6.20.0.tgz",
"integrity": "sha512-AITZfPuxubm31Sx0vr8bteSalEbs9wQb/BOBi9FPlD9Qpd6HxZ4Q0+hI742jBhkPb4RT2v5MQzaW5VhRVyj+9A==",
"version": "6.20.1",
"resolved": "https://registry.npmjs.org/undici/-/undici-6.20.1.tgz",
"integrity": "sha512-AjQF1QsmqfJys+LXfGTNum+qw4S88CojRInG/6t31W/1fk6G59s92bnAvGz5Cmur+kQv2SURXEvvudLmbrE8QA==",
"engines": {
"node": ">=18.17"
}

View File

@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.9.8",
"version": "0.11.0",
"private": true,
"type": "module",
"engines": {
@@ -34,7 +34,8 @@
"react-markdown": "^9.0.1",
"react-redux": "^9.1.2",
"react-router-dom": "^6.26.1",
"react-syntax-highlighter": "^15.5.0",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.4",
"remark-gfm": "^4.0.0",
"sirv-cli": "^3.0.0",
"tailwind-merge": "^2.5.4",
@@ -46,7 +47,7 @@
"dev": "npm run make-i18n && VITE_MOCK_API=false remix vite:dev",
"dev:mock": "npm run make-i18n && VITE_MOCK_API=true remix vite:dev",
"build": "npm run make-i18n && tsc && remix vite:build",
"start": "npx sirv-cli build/client/ --single",
"start": "npx sirv-cli build/ --single",
"test": "vitest run",
"test:coverage": "npm run make-i18n && vitest run --coverage",
"dev_wsl": "VITE_WATCH_USE_POLLING=true vite",
@@ -72,10 +73,10 @@
"@remix-run/dev": "^2.11.2",
"@remix-run/testing": "^2.11.2",
"@tailwindcss/typography": "^0.5.15",
"@testing-library/jest-dom": "^6.5.0",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/react": "^16.0.1",
"@testing-library/user-event": "^14.5.2",
"@types/node": "^22.7.5",
"@types/node": "^22.7.6",
"@types/react": "^18.3.11",
"@types/react-dom": "^18.3.0",
"@types/react-highlight": "^0.12.8",

View File

@@ -0,0 +1,4 @@
{
"APP_MODE": "oss",
"GITHUB_CLIENT_ID": ""
}

View File

@@ -1,3 +0,0 @@
{
"users": []
}

View File

@@ -6,6 +6,7 @@ import {
FeedbackResponse,
GitHubAccessTokenResponse,
ErrorResponse,
GetConfigResponse,
} from "./open-hands.types";
/**
@@ -60,6 +61,15 @@ class OpenHands {
return response.json();
}
static async getConfig(): Promise<GetConfigResponse> {
const response = await fetch("config.json", {
headers: {
"Cache-Control": "no-cache",
},
});
return response.json();
}
/**
* Retrieve the list of files available in the workspace
* @param token User token provided by the server
@@ -71,7 +81,9 @@ class OpenHands {
if (path) url.searchParams.append("path", path);
const response = await fetch(url.toString(), {
headers: OpenHands.generateHeaders(token),
headers: {
Authorization: `Bearer ${token}`,
},
});
return response.json();
@@ -87,7 +99,9 @@ class OpenHands {
const url = new URL(`${OpenHands.BASE_URL}/api/select-file`);
url.searchParams.append("file", path);
const response = await fetch(url.toString(), {
headers: OpenHands.generateHeaders(token),
headers: {
Authorization: `Bearer ${token}`,
},
});
const data = await response.json();
@@ -109,7 +123,10 @@ class OpenHands {
const response = await fetch(`${OpenHands.BASE_URL}/api/save-file`, {
method: "POST",
body: JSON.stringify({ filePath: path, content }),
headers: OpenHands.generateHeaders(token),
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
});
return response.json();
@@ -130,8 +147,10 @@ class OpenHands {
const response = await fetch(`${OpenHands.BASE_URL}/api/upload-files`, {
method: "POST",
headers: OpenHands.generateHeaders(token),
body: formData,
headers: {
Authorization: `Bearer ${token}`,
},
});
return response.json();
@@ -144,8 +163,11 @@ class OpenHands {
*/
static async getWorkspaceZip(token: string): Promise<Blob> {
const response = await fetch(`${OpenHands.BASE_URL}/api/zip-directory`, {
headers: OpenHands.generateHeaders(token),
headers: {
Authorization: `Bearer ${token}`,
},
});
return response.blob();
}
@@ -158,12 +180,14 @@ class OpenHands {
static async sendFeedback(
token: string,
data: Feedback,
// TODO: Type the response
): Promise<FeedbackResponse> {
const response = await fetch(`${OpenHands.BASE_URL}/api/submit-feedback`, {
method: "POST",
headers: OpenHands.generateHeaders(token),
body: JSON.stringify(data),
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
});
return response.json();
@@ -177,23 +201,32 @@ class OpenHands {
static async getGitHubAccessToken(
code: string,
): Promise<GitHubAccessTokenResponse> {
const response = await fetch(`${OpenHands.BASE_URL}/github/callback`, {
const response = await fetch(`${OpenHands.BASE_URL}/api/github/callback`, {
method: "POST",
body: JSON.stringify({ code }),
headers: {
"Content-Type": "application/json",
},
});
return response.json();
}
/**
* Generate the headers for the request
* @param token User token provided by the server
* @returns Headers for the request
* Check if the user is authenticated
* @param login The user's GitHub login handle
* @returns Whether the user is authenticated
*/
private static generateHeaders(token: string) {
return {
Authorization: `Bearer ${token}`,
};
static async isAuthenticated(login: string): Promise<boolean> {
const response = await fetch(`${OpenHands.BASE_URL}/api/authenticate`, {
method: "POST",
body: JSON.stringify({ login }),
headers: {
"Content-Type": "application/json",
},
});
return response.status === 200;
}
}

View File

@@ -35,3 +35,8 @@ export interface Feedback {
permissions: "public" | "private";
trajectory: unknown[];
}
export interface GetConfigResponse {
APP_MODE: "saas" | "oss";
GITHUB_CLIENT_ID: string | null;
}

View File

@@ -0,0 +1,28 @@
interface ChevronLeftProps {
width?: number;
height?: number;
active?: boolean;
}
export function ChevronLeft({
width = 20,
height = 20,
active,
}: ChevronLeftProps) {
return (
<svg
width={width}
height={height}
viewBox={`0 0 ${width} ${height}`}
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M11.204 15.0037L6.65511 9.99993L11.204 4.99617L12.1289 5.83701L8.34444 9.99993L12.1289 14.1628L11.204 15.0037Z"
fill={active ? "#D4D4D4" : "#525252"}
/>
</svg>
);
}

View File

@@ -0,0 +1,28 @@
interface ChevronRightProps {
width?: number;
height?: number;
active?: boolean;
}
export function ChevronRight({
width = 20,
height = 20,
active,
}: ChevronRightProps) {
return (
<svg
width={width}
height={height}
viewBox={`0 0 ${width} ${height}`}
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M8.79602 4.99634L13.3449 10.0001L8.79602 15.0038L7.87109 14.163L11.6556 10.0001L7.87109 5.83718L8.79602 4.99634Z"
fill={active ? "#D4D4D4" : "#525252"}
/>
</svg>
);
}

View File

@@ -0,0 +1,5 @@
<svg width="11" height="11" viewBox="0 0 11 11" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd"
d="M5.69949 5.72974L7.91965 7.9505L8.35077 7.51999L6.13001 5.29922L8.35077 3.07907L7.92026 2.64795L5.69949 4.86871L3.47934 2.64795L3.04883 3.07907L5.26898 5.29922L3.04883 7.51938L3.47934 7.9505L5.69949 5.72974Z"
fill="black" />
</svg>

After

Width:  |  Height:  |  Size: 387 B

View File

@@ -0,0 +1,10 @@
import Clip from "#/assets/clip.svg?react";
export function AttachImageLabel() {
return (
<div className="flex self-start items-center text-[#A3A3A3] text-xs leading-[18px] -tracking-[0.08px] cursor-pointer">
<Clip width={16} height={16} />
Attach images
</div>
);
}

Some files were not shown because too many files have changed in this diff Show More