Compare commits

..

1 Commits

Author SHA1 Message Date
amanape 5c798fe7a6 Setup basic auth guard 2024-10-16 12:25:48 +04:00
459 changed files with 38562 additions and 18253 deletions
+9 -40
View File
@@ -1,35 +1,21 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
open-pull-requests-limit: 1
groups:
# put packages in their own group if they have a history of breaking the build or needing to be reverted
pre-commit:
patterns:
- "pre-commit"
llama:
patterns:
- "llama*"
chromadb:
patterns:
- "chromadb"
security-all:
applies-to: "security-updates"
patterns:
- "*"
version-all:
applies-to: "version-updates"
patterns:
- "*"
open-pull-requests-limit: 20
- package-ecosystem: "npm"
directory: "/frontend"
schedule:
interval: "daily"
open-pull-requests-limit: 1
open-pull-requests-limit: 20
groups:
docusaurus:
patterns:
@@ -37,21 +23,12 @@ updates:
eslint:
patterns:
- "*eslint*"
security-all:
applies-to: "security-updates"
patterns:
- "*"
version-all:
applies-to: "version-updates"
patterns:
- "*"
- package-ecosystem: "npm"
directory: "/docs"
schedule:
interval: "weekly"
day: "wednesday"
open-pull-requests-limit: 1
interval: "daily"
open-pull-requests-limit: 20
groups:
docusaurus:
patterns:
@@ -59,11 +36,3 @@ updates:
eslint:
patterns:
- "*eslint*"
security-all:
applies-to: "security-updates"
patterns:
- "*"
version-all:
applies-to: "version-updates"
patterns:
- "*"
+82 -2
View File
@@ -88,6 +88,14 @@ jobs:
hash_from_app_image=$(cat docker-outputs.txt | grep "Hash for docker build directory" | awk -F "): " '{print $2}' | uniq | head -n1)
echo "hash_from_app_image=$hash_from_app_image" >> $GITHUB_OUTPUT
echo "Hash from app image: $hash_from_app_image"
# This test should move when we have a test suite for the app image
- name: Test docker in App Image
run: |
# Lowercase the repository owner
export REPO_OWNER=${{ github.repository_owner }}
REPO_OWNER=$(echo $REPO_OWNER | tr '[:upper:]' '[:lower:]')
docker run -e SANDBOX_USER_ID=0 -v /var/run/docker.sock:/var/run/docker.sock ghcr.io/${REPO_OWNER}/openhands:${{ env.RELEVANT_SHA }} /bin/bash -c "docker run hello-world"
# Builds the runtime Docker images
ghcr_build_runtime:
@@ -376,6 +384,78 @@ jobs:
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run integration tests with the eventstream runtime Docker image
runtime_integration_tests_on_linux:
name: RT Integration Tests (Linux)
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
strategy:
fail-fast: false
matrix:
base_image: ['nikolaik']
steps:
- uses: actions/checkout@v4
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: false
swap-storage: true
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
# Forked repos can't push to GHCR, so we need to download the image as an artifact
- name: Download runtime image for fork
if: github.event.pull_request.head.repo.fork
uses: actions/download-artifact@v4
with:
name: runtime-${{ matrix.base_image }}
path: /tmp
- name: Load runtime image for fork
if: github.event.pull_request.head.repo.fork
run: |
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: make install-python-dependencies
- name: Run integration tests
run: |
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ env.RELEVANT_SHA }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
TEST_ONLY=true \
./tests/integration/regenerate.sh
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# The two following jobs (named identically) are to check whether all the runtime tests have passed as the
# "All Runtime Tests Passed" is a required job for PRs to merge
# Due to this bug: https://github.com/actions/runner/issues/2566, we want to create a job that runs when the
@@ -384,7 +464,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime_root, test_runtime_oh, verify_hash_equivalence_in_runtime_and_app]
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
steps:
- name: All tests passed
run: echo "All runtime tests have passed successfully!"
@@ -393,7 +473,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime_root, test_runtime_oh, verify_hash_equivalence_in_runtime_and_app]
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
steps:
- name: Some tests failed
run: |
-96
View File
@@ -1,96 +0,0 @@
# Workflow that runs python unit tests on mac
name: Run Python Unit Tests Mac
# This job is flaky so only run it nightly
on:
schedule:
- cron: '0 0 * * *'
jobs:
# Run python unit tests on macOS
test-on-macos:
name: Python Unit Tests on macOS
runs-on: macos-14
env:
INSTALL_DOCKER: '1' # Set to '0' to skip Docker installation
strategy:
matrix:
python-version: ['3.12']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation,llama-index
- name: Install & Start Docker
if: env.INSTALL_DOCKER == '1'
run: |
INSTANCE_NAME="colima-${GITHUB_RUN_ID}"
# Uninstall colima to upgrade to the latest version
if brew list colima &>/dev/null; then
brew uninstall colima
# unlinking colima dependency: go
brew uninstall go@1.21
fi
rm -rf ~/.colima ~/.lima
brew install --HEAD colima
brew install docker
start_colima() {
# Find a free port in the range 10000-20000
RANDOM_PORT=$((RANDOM % 10001 + 10000))
# Original line:
if ! colima start --network-address --arch x86_64 --cpu=1 --memory=1 --verbose --ssh-port $RANDOM_PORT; then
echo "Failed to start Colima."
return 1
fi
return 0
}
# Attempt to start Colima for 5 total attempts:
ATTEMPT_LIMIT=5
for ((i=1; i<=ATTEMPT_LIMIT; i++)); do
if start_colima; then
echo "Colima started successfully."
break
else
colima stop -f
sleep 10
colima delete -f
if [ $i -eq $ATTEMPT_LIMIT ]; then
exit 1
fi
sleep 10
fi
done
# For testcontainers to find the Colima socket
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
- name: Build Environment
run: make build
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Run Tests
run: poetry run pytest --forked --cov=openhands --cov-report=xml ./tests/unit --ignore=tests/unit/test_memory.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+88
View File
@@ -16,6 +16,94 @@ concurrency:
cancel-in-progress: true
jobs:
# Run python unit tests on macOS
test-on-macos:
name: Python Unit Tests on macOS
runs-on: macos-12
env:
INSTALL_DOCKER: '1' # Set to '0' to skip Docker installation
strategy:
matrix:
python-version: ['3.12']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation,llama-index
- name: Install & Start Docker
if: env.INSTALL_DOCKER == '1'
run: |
INSTANCE_NAME="colima-${GITHUB_RUN_ID}"
# Uninstall colima to upgrade to the latest version
if brew list colima &>/dev/null; then
brew uninstall colima
# unlinking colima dependency: go
brew uninstall go@1.21
fi
rm -rf ~/.colima ~/.lima
brew install --HEAD colima
brew install docker
start_colima() {
# Find a free port in the range 10000-20000
RANDOM_PORT=$((RANDOM % 10001 + 10000))
# Original line:
if ! colima start --network-address --arch x86_64 --cpu=1 --memory=1 --verbose --ssh-port $RANDOM_PORT; then
echo "Failed to start Colima."
return 1
fi
return 0
}
# Attempt to start Colima for 5 total attempts:
ATTEMPT_LIMIT=5
for ((i=1; i<=ATTEMPT_LIMIT; i++)); do
if start_colima; then
echo "Colima started successfully."
break
else
colima stop -f
sleep 10
colima delete -f
if [ $i -eq $ATTEMPT_LIMIT ]; then
exit 1
fi
sleep 10
fi
done
# For testcontainers to find the Colima socket
# https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
- name: Build Environment
run: make build
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Run Tests
run: poetry run pytest --forked --cov=openhands --cov-report=xml ./tests/unit --ignore=tests/unit/test_memory.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run python unit tests on Linux
test-on-linux:
name: Python Unit Tests on Linux
@@ -0,0 +1,73 @@
name: Regenerate Integration Tests
on:
workflow_dispatch:
inputs:
debug:
description: 'Enable debug mode'
type: boolean
default: true
log_to_file:
description: 'Enable logging to file'
type: boolean
default: true
force_regenerate_tests:
description: 'Force regeneration of tests'
type: boolean
default: false
force_use_llm:
description: 'Force use of LLM'
type: boolean
default: false
jobs:
regenerate_integration_tests:
if: github.ref != 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v3
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: make install-python-dependencies
- name: Build Environment
run: make build
- name: Regenerate integration tests
run: |
DEBUG=${{ inputs.debug }} \
LOG_TO_FILE=${{ inputs.log_to_file }} \
FORCE_REGENERATE=${{ inputs.force_regenerate_tests }} \
FORCE_USE_LLM=${{ inputs.force_use_llm }} \
./tests/integration/regenerate.sh
- name: Commit changes
run: |
if git diff --quiet --exit-code; then
echo "No changes to commit"
exit 0
fi
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git add .
# run it twice in case pre-commit makes changes
git commit -am "Regenerate integration tests" || git commit -am "Regenerate integration tests"
git push
+1 -1
View File
@@ -178,6 +178,7 @@ evaluation/toolqa/data
# frontend
# dependencies
frontend/node_modules
frontend/.pnp
frontend/bun.lockb
frontend/yarn.lock
@@ -227,4 +228,3 @@ runtime_*.tar
containers/runtime/Dockerfile
containers/runtime/project.tar.gz
containers/runtime/code
**/node_modules/
+75 -52
View File
@@ -2,6 +2,14 @@
Thanks for your interest in contributing to OpenHands! We welcome and appreciate contributions.
## How Can I Contribute?
There are many ways that you can contribute:
1. **Download and use** OpenHands, and send [issues](https://github.com/All-Hands-AI/OpenHands/issues) when you encounter something that isn't working or a feature that you'd like to see.
2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents.
3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on.
## Understanding OpenHands's CodeBase
To understand the codebase, please refer to the README in each module:
@@ -11,61 +19,79 @@ To understand the codebase, please refer to the README in each module:
- [agenthub](./openhands/agenthub/README.md)
- [server](./openhands/server/README.md)
## Setting up your development environment
We have a separate doc [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) that tells you how to set up a development workflow.
## How can I contribute?
There are many ways that you can contribute:
1. **Download and use** OpenHands, and send [issues](https://github.com/All-Hands-AI/OpenHands/issues) when you encounter something that isn't working or a feature that you'd like to see.
2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents.
3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on.
## What can I build?
Here are a few ways you can help improve the codebase.
#### UI/UX
We're always looking to improve the look and feel of the application. If you've got a small fix
for something that's bugging you, feel free to open up a PR that changes the `./frontend` directory.
If you're looking to make a bigger change, add a new UI element, or significantly alter the style
of the application, please open an issue first, or better, join the #frontend channel in our Slack
to gather consensus from our design team first.
#### Improving the agent
Our main agent is the CodeAct agent. You can [see its prompts here](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/agenthub/codeact_agent)
Changes to these prompts, and to the underlying behavior in Python, can have a huge impact on user experience.
You can try modifying the prompts to see how they change the behavior of the agent as you use the app
locally, but we will need to do an end-to-end evaluation of any changes here to ensure that the agent
is getting better over time.
We use the [SWE-bench](https://www.swebench.com/) benchmark to test our agent. You can join the #evaluation
channel in Slack to learn more.
#### Adding a new agent
You may want to experiment with building new types of agents. You can add an agent to `openhands/agenthub`
to help expand the capabilities of OpenHands.
#### Adding a new runtime
The agent needs a place to run code and commands. When you run OpenHands on your laptop, it uses a Docker container
to do this by default. But there are other ways of creating a sandbox for the agent.
If you work for a company that provides a cloud-based runtime, you could help us add support for that runtime
by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/runtime.py).
#### Testing
When you write code, it is also good to write tests. Please navigate to the `tests` folder to see existing test suites.
At the moment, we have two kinds of tests: `unit` and `integration`. Please refer to the README for each test suite. These tests also run on GitHub's continuous integration to ensure quality of the project.
## Sending Pull Requests to OpenHands
You'll need to fork our repository to send us a Pull Request. You can learn more
about how to fork a GitHub repo and open a PR with your changes in [this article](https://medium.com/swlh/forks-and-pull-requests-how-to-contribute-to-github-repos-8843fac34ce8)
### 1. Fork the Official Repository
Fork the [OpenHands repository](https://github.com/All-Hands-AI/OpenHands) into your own account.
Clone your own forked repository into your local environment:
### Pull Request title
```shell
git clone git@github.com:<YOUR-USERNAME>/OpenHands.git
```
### 2. Configure Git
Set the official repository as your [upstream](https://www.atlassian.com/git/tutorials/git-forks-and-upstreams) to synchronize with the latest update in the official repository.
Add the original repository as upstream:
```shell
cd OpenHands
git remote add upstream git@github.com:All-Hands-AI/OpenHands.git
```
Verify that the remote is set:
```shell
git remote -v
```
You should see both `origin` and `upstream` in the output.
### 3. Synchronize with Official Repository
Synchronize latest commit with official repository before coding:
```shell
git fetch upstream
git checkout main
git merge upstream/main
git push origin main
```
### 4. Set up the Development Environment
We have a separate doc [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) that tells you how to set up a development workflow.
### 5. Write Code and Commit It
Once you have done this, you can write code, test it, and commit it to a branch (replace `my_branch` with an appropriate name):
```shell
git checkout -b my_branch
git add .
git commit
git push origin my_branch
```
### 6. Open a Pull Request
* On GitHub, go to the page of your forked repository, and create a Pull Request:
- Click on `Branches`
- Click on the `...` beside your branch and click on `New pull request`
- Set `base repository` to `All-Hands-AI/OpenHands`
- Set `base` to `main`
- Click `Create pull request`
The PR should appear in [OpenHands PRs](https://github.com/All-Hands-AI/OpenHands/pulls).
Then the OpenHands team will review your code.
## PR Rules
### 1. Pull Request title
As described [here](https://github.com/commitizen/conventional-commit-types/blob/master/index.json), a valid PR title should begin with one of the following prefixes:
- `feat`: A new feature
@@ -86,9 +112,6 @@ For example, a PR title could be:
You may also check out previous PRs in the [PR list](https://github.com/All-Hands-AI/OpenHands/pulls).
### Pull Request description
### 2. Pull Request description
- If your PR is small (such as a typo fix), you can go brief.
- If it contains a lot of changes, it's better to write more details.
If your changes are user-facing (e.g. a new feature in the UI, a change in behavior, or a bugfix)
please include a short message that we can add to our changelog.
+5 -4
View File
@@ -5,14 +5,12 @@ Otherwise, you can clone the OpenHands project directly.
## Start the server for development
### 1. Requirements
* Linux, Mac OS, or [WSL on Windows](https://learn.microsoft.com/en-us/windows/wsl/install) [Ubuntu <= 22.04]
* Linux, Mac OS, or [WSL on Windows](https://learn.microsoft.com/en-us/windows/wsl/install) [ Ubuntu <= 22.04]
* [Docker](https://docs.docker.com/engine/install/) (For those on MacOS, make sure to allow the default Docker socket to be used from advanced settings!)
* [Python](https://www.python.org/downloads/) = 3.12
* [NodeJS](https://nodejs.org/en/download/package-manager) >= 18.17.1
* [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) >= 1.8
* OS-specific dependencies:
- Ubuntu: build-essential => `sudo apt-get install build-essential`
- WSL: netcat => `sudo apt-get install netcat`
* netcat => sudo apt-get install netcat
Make sure you have all these dependencies installed before moving on to `make build`.
@@ -93,6 +91,9 @@ To run tests, refer to the following:
poetry run pytest ./tests/unit/test_*.py
```
#### Integration tests
Please refer to [this README](./tests/integration/README.md) for details.
### 9. Add or update dependency
1. Add your dependency in `pyproject.toml` or use `poetry add xxx`
2. Update the poetry.lock file via `poetry lock --no-update`
+2 -2
View File
@@ -195,7 +195,7 @@ start-backend:
# Start frontend
start-frontend:
@echo "$(YELLOW)Starting frontend...$(RESET)"
@cd frontend && VITE_BACKEND_HOST=$(BACKEND_HOST_PORT) VITE_FRONTEND_PORT=$(FRONTEND_PORT) npm run dev -- --port $(FRONTEND_PORT) --host $(BACKEND_HOST)
@cd frontend && VITE_BACKEND_HOST=$(BACKEND_HOST_PORT) VITE_FRONTEND_PORT=$(FRONTEND_PORT) npm run start -- --port $(FRONTEND_PORT)
# Common setup for running the app (non-callable)
_run_setup:
@@ -214,7 +214,7 @@ _run_setup:
run:
@echo "$(YELLOW)Running the app...$(RESET)"
@$(MAKE) -s _run_setup
@$(MAKE) -s start-frontend
@cd frontend && echo "$(BLUE)Starting frontend with npm...$(RESET)" && npm run start -- --port $(FRONTEND_PORT)
@echo "$(GREEN)Application started successfully.$(RESET)"
# Run the app (in docker)
+3 -3
View File
@@ -42,10 +42,10 @@ system requirements and more information.
```bash
export WORKSPACE_BASE=$(pwd)/workspace
docker pull ghcr.io/all-hands-ai/runtime:0.11-nikolaik
docker pull ghcr.io/all-hands-ai/runtime:0.9-nikolaik
docker run -it --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.11-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -53,7 +53,7 @@ docker run -it --pull=always \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.11
ghcr.io/all-hands-ai/openhands:0.9
```
You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)!
+3 -1
View File
@@ -172,9 +172,11 @@ model = "gpt-4o"
#disable_vision = true
[llm.gpt4o-mini]
# API key to use
api_key = "your-api-key"
model = "gpt-4o"
# Model to use
model = "gpt-4o-mini"
#################################### Agent ###################################
# Configuration for agents (group name starts with 'agent')
+10 -2
View File
@@ -8,7 +8,7 @@ RUN npm install -g npm@10.5.1
RUN npm ci
COPY ./frontend ./
RUN npm run build
RUN npm run make-i18n && npm run build
FROM python:3.12.3-slim AS backend-builder
@@ -46,6 +46,14 @@ RUN mkdir -p $WORKSPACE_BASE
RUN apt-get update -y \
&& apt-get install -y curl ssh sudo
# Install Docker - https://docs.docker.com/engine/install/debian/
RUN apt-get install ca-certificates curl \
&& curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc \
&& chmod a+r /etc/apt/keyrings/docker.asc \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian bookworm stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null \
&& apt-get update \
&& apt install -y docker-ce
# Default is 1000, but OSX is often 501
RUN sed -i 's/^UID_MIN.*/UID_MIN 499/' /etc/login.defs
# Default is 60000, but we've seen up to 200000
@@ -82,7 +90,7 @@ RUN python openhands/core/download.py # No-op to download assets
# openhands:openhands -> openhands:app
RUN find /app \! -group app -exec chgrp app {} +
COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build ./frontend/build
COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build/client ./frontend/build
COPY --chown=openhands:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
USER root
+59 -31
View File
@@ -21,7 +21,7 @@ The OpenHands Runtime system uses a client-server architecture implemented with
graph TD
A[User-provided Custom Docker Image] --> B[OpenHands Backend]
B -->|Builds| C[OH Runtime Image]
C -->|Launches| D[Action Executor]
C -->|Launches| D[Runtime Client]
D -->|Initializes| E[Browser]
D -->|Initializes| F[Bash Shell]
D -->|Initializes| G[Plugins]
@@ -49,10 +49,10 @@ graph TD
1. User Input: The user provides a custom base Docker image
2. Image Building: OpenHands builds a new Docker image (the "OH runtime image") based on the user-provided image. This new image includes OpenHands-specific code, primarily the "runtime client"
3. Container Launch: When OpenHands starts, it launches a Docker container using the OH runtime image
4. Action Execution Server Initialization: The action execution server initializes an `ActionExecutor` inside the container, setting up necessary components like a bash shell and loading any specified plugins
5. Communication: The OpenHands backend (`openhands/runtime/impl/eventstream/eventstream_runtime.py`) communicates with the action execution server over RESTful API, sending actions and receiving observations
4. Client Initialization: The runtime client initializes inside the container, setting up necessary components like a bash shell and loading any specified plugins
5. Communication: The OpenHands backend (`runtime.py`) communicates with the runtime client over RESTful API, sending actions and receiving observations
6. Action Execution: The runtime client receives actions from the backend, executes them in the sandboxed environment, and sends back observations
7. Observation Return: The action execution server sends execution results back to the OpenHands backend as observations
7. Observation Return: The client sends execution results back to the OpenHands backend as observations
The role of the client:
@@ -70,46 +70,74 @@ Check out the [relevant code](https://github.com/All-Hands-AI/OpenHands/blob/mai
### Image Tagging System
OpenHands uses a dual-tagging system for its runtime images to balance reproducibility with flexibility.
Tags may be in one of 2 formats:
OpenHands uses a dual-tagging system for its runtime images to balance reproducibility with flexibility:
- **Generic**: `oh_v{openhands_version}_{16_digit_lock_hash}` (e.g.: `oh_v0.9.9_1234567890abcdef`)
- **Specific**: `oh_v{openhands_version}_{16_digit_lock_hash}_{16_digit_source_hash}`
(e.g.: `oh_v0.9.9_1234567890abcdef_1234567890abcdef`)
1. Hash-based tag: `{target_image_repo}:{target_image_hash_tag}`.
Example: `runtime:abc123def456`
#### Lock Hash
- This tag is based on the MD5 hash of the Docker build folder, which includes the source code (of runtime client and related dependencies) and Dockerfile
- Identical hash tags guarantee that the images were built with exactly the same source code and Dockerfile
- This ensures reproducibility; the same hash always means the same image contents
This hash is built from the first 16 digits of the MD5 of:
- The name of the base image upon which the image was built (e.g.: `nikolaik/python-nodejs:python3.12-nodejs22`)
- The content of the `pyproject.toml` included in the image.
- The content of the `poetry.lock` included in the image.
2. Generic tag: `{target_image_repo}:{target_image_tag}`.
Example: `runtime:oh_v0.9.3_ubuntu_tag_22.04`
This effectively gives a hash for the dependencies of Openhands independent of the source code.
- This tag follows the format: `runtime:oh_v{OH_VERSION}_{BASE_IMAGE_NAME}_tag_{BASE_IMAGE_TAG}`
- It represents the latest build for a particular base image and OpenHands version combination
- This tag is updated whenever a new image is built from the same base image, even if the source code changes
#### Source Hash
The hash-based tag ensures reproducibility, while the generic tag provides a stable reference to the latest version of a particular configuration. This dual-tagging approach allows OpenHands to efficiently manage both development and production environments.
This is the first 16 digits of the MD5 of the directory hash for the source directory. This gives a hash
for only the openhands source
### Build Process
#### Build Process
1. Image Naming Convention:
- Hash-based tag: `{target_image_repo}:{target_image_hash_tag}`.
Example: `runtime:abc123def456`
- Generic tag: `{target_image_repo}:{target_image_tag}`.
Example: `runtime:oh_v0.9.3_ubuntu_tag_22.04`
When generating an image...
2. Build Process:
- a. Convert the base image name to an OH runtime image name
Example: `ubuntu:22.04` -> `runtime:oh_v0.9.3_ubuntu_tag_22.04`
- b. Generate a build context (Dockerfile and OpenHands source code) and calculate its hash
- c. Check for an existing image with the calculated hash
- d. If not found, check for a recent compatible image to use as a base
- e. If no compatible image exists, build from scratch using the original base image
- f. Tag the new image with both hash-based and generic tags
- OpenHands first checks whether an image with the same **Specific** tag exists. If there is such an image,
no build is performed - the existing image is used.
- OpenHands next checks whether an image with the **Generic** tag exists. If there is such an image,
OpenHands builds a new image based upon it, bypassing all installation steps (like `poetry install` and
`apt-get`) except a final operation to copy the current source code. The new image is tagged with a
**Specific** tag only.
- If neither a **Specific** nor **Generic** tag exists, a brand new image is built based upon the base
image (Which is a slower operation). This new image is tagged with both the **Generic** and **Specific**
tags.
3. Image Reuse and Rebuilding Logic:
The system follows these steps to determine whether to build a new image or use an existing one from a user-provided (base) image (e.g., `ubuntu:22.04`):
- a. If an image exists with the same hash (e.g., `runtime:abc123def456`), it will be reused as is
- b. If the exact hash is not found, the system will try to rebuild using the latest generic image (e.g., `runtime:oh_v0.9.3_ubuntu_tag_22.04`) as a base. This saves time by leveraging existing dependencies
- c. If neither the hash-tagged nor the generic-tagged image is found, the system will build the image completely from scratch
This dual-tagging approach allows OpenHands to efficiently manage both development and production environments.
4. Caching and Efficiency:
- The system attempts to reuse existing images when possible to save build time
- If an exact match (by hash) is found, it's used without rebuilding
- If a compatible image is found, it's used as a base for rebuilding, saving time on dependency installation
Here's a flowchart illustrating the build process:
```mermaid
flowchart TD
A[Start] --> B{Convert base image name}
B --> |ubuntu:22.04 -> runtime:oh_v0.9.3_ubuntu_tag_22.04| C[Generate build context and hash]
C --> D{Check for existing image with hash}
D -->|Found runtime:abc123def456| E[Use existing image]
D -->|Not found| F{Check for runtime:oh_v0.9.3_ubuntu_tag_22.04}
F -->|Found| G[Rebuild based on recent image]
F -->|Not found| H[Build from scratch]
G --> I[Tag with hash and generic tags]
H --> I
E --> J[End]
I --> J
```
This approach ensures that:
1. Identical source code and Dockerfile always produce the same image (via hash-based tags)
2. The system can quickly rebuild images when minor changes occur (by leveraging recent compatible images)
3. The generic tag (e.g., `runtime:oh_v0.9.3_1234567890abcdef`) always points to the latest build for a particular base image and OpenHands version combination
3. The generic tag (e.g., `runtime:oh_v0.9.3_ubuntu_tag_22.04`) always points to the latest build for a particular base image and OpenHands version combination
## Runtime Plugin System
+1 -1
View File
@@ -57,7 +57,7 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.11 \
ghcr.io/all-hands-ai/openhands:0.9 \
python -m openhands.core.cli
```
@@ -1,64 +1,81 @@
# Custom Sandbox
The sandbox is where the agent performs its tasks. Instead of running commands directly on your computer
(which could be risky), the agent runs them inside a Docker container.
The sandbox is where the agent does its work. Instead of running commands directly on your computer
(which could be dangerous), the agent runs them inside of a Docker container.
The default OpenHands sandbox (`python-nodejs:python3.12-nodejs22`
from [nikolaik/python-nodejs](https://hub.docker.com/r/nikolaik/python-nodejs)) comes with some packages installed such
as python and Node.js but may need other software installed by default.
as python and Node.js but your use case may need additional software installed by default.
You have two options for customization:
There are two ways you can do so:
1. Use an existing image with the required software.
2. Create your own custom Docker image.
1. Use an existing image from docker hub.
2. Creating your own custom docker image and using it.
If you choose the first option, you can skip the `Create Your Docker Image` section.
If you want to take the first approach, you can skip the `Create Your Docker Image` section.
## Setup
Make sure you are able to run OpenHands using the [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) first.
## Create Your Docker Image
To create a custom Docker image, it must be Debian based.
To create a custom docker image, it must be debian/ubuntu based.
For example, if you want OpenHands to have `ruby` installed, create a `Dockerfile` with the following content:
For example, if we want OpenHands to have access to the `node` binary, we would use the following Dockerfile:
```dockerfile
FROM debian:latest
# Start with latest ubuntu image
FROM ubuntu:latest
# Install required packages
RUN apt-get update && apt-get install -y ruby
# Run needed updates
RUN apt-get update && apt-get install -y
# Install node
RUN apt-get install -y nodejs
```
Save this file in a folder. Then, build your Docker image (e.g., named custom-image) by navigating to the folder in
the terminal and running::
Next build your docker image with the name of your choice, for example `custom_image`.
To do this you can create a directory and put your file inside it with the name `Dockerfile`, and inside the directory run the following command:
```bash
docker build -t custom-image .
docker build -t custom_image .
```
This will produce a new image called `custom-image`, which will be available in Docker.
This will produce a new image called ```custom_image``` that will be available in Docker Engine.
> Note that in the configuration described in this document, OpenHands will run as user "openhands" inside the
> sandbox and thus all packages installed via the docker file should be available to all users on the system, not just root.
> Note that in the configuration described in this document, OpenHands will run as user "openhands" inside the sandbox and thus all packages installed via the docker file should be available to all users on the system, not just root.
>
> Installing with apt-get above installs node for all users.
## Using the Development Workflow
## Specify your sandbox image in config.toml file
### Setup
OpenHands configuration occurs via the top-level `config.toml` file.
First, ensure you can run OpenHands by following the instructions in [Development.md](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md).
Create a `config.toml` file in the OpenHands directory and enter these contents:
### Specify the Base Sandbox Image
In the `config.toml` file within the OpenHands directory, set the `sandbox_base_container_image` to the image you want to use.
This can be an image youve already pulled or one youve built:
```bash
```toml
[core]
...
sandbox_base_container_image="custom-image"
workspace_base="./workspace"
run_as_openhands=true
sandbox_base_container_image="custom_image"
```
### Run
For `sandbox_base_container_image`, you can specify either:
1. The name of your custom image that you built in the previous step (e.g., `”custom_image”`)
2. A pre-existing image from Docker Hub (e.g., `”node:20”` if you want a sandbox with Node.js pre-installed)
## Run
Run OpenHands by running ```make run``` in the top level directory.
Navigate to ```localhost:3001``` and check if your desired dependencies are available.
In the case of the example above, running ```node -v``` in the terminal produces ```v20.15.0```.
Congratulations!
## Technical Explanation
Please refer to [custom docker image section of the runtime documentation](https://docs.all-hands.dev/modules/usage/architecture/runtime#advanced-how-openhands-builds-and-maintains-od-runtime-images) for more details.
@@ -134,11 +134,9 @@ To create an evaluation workflow for your benchmark, follow these steps:
4. Create a function to process each instance:
```python
from openhands.utils.async_utils import call_async_from_sync
def process_instance(instance: pd.Series, metadata: EvalMetadata) -> EvalOutput:
config = get_config(instance, metadata)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
instruction = get_instruction(instance, metadata)
+1 -1
View File
@@ -51,6 +51,6 @@ docker run -it \
-v /var/run/docker.sock:/var/run/docker.sock \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.11 \
ghcr.io/all-hands-ai/openhands:0.9 \
python -m openhands.core.main -t "write a bash script that prints hi"
```
+3 -3
View File
@@ -14,10 +14,10 @@ existing code that you'd like to modify.
```bash
export WORKSPACE_BASE=$(pwd)/workspace
docker pull ghcr.io/all-hands-ai/runtime:0.11-nikolaik
docker pull ghcr.io/all-hands-ai/runtime:0.9-nikolaik
docker run -it --pull=always \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.11-nikolaik \
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.9-nikolaik \
-e SANDBOX_USER_ID=$(id -u) \
-e WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE \
-v $WORKSPACE_BASE:/opt/workspace_base \
@@ -25,7 +25,7 @@ docker run -it --pull=always \
-p 3000:3000 \
--add-host host.docker.internal:host-gateway \
--name openhands-app-$(date +%Y%m%d%H%M%S) \
ghcr.io/all-hands-ai/openhands:0.11
ghcr.io/all-hands-ai/openhands:0.9
```
You can also run OpenHands in a scriptable [headless mode](https://docs.all-hands.dev/modules/usage/how-to/headless-mode), as an [interactive CLI](https://docs.all-hands.dev/modules/usage/how-to/cli-mode), or using the [OpenHands GitHub Action](https://docs.all-hands.dev/modules/usage/how-to/github-action).
+2 -2
View File
@@ -5,7 +5,7 @@ OpenHands uses LiteLLM to make calls to Azure's chat models. You can find their
## Azure OpenAI Configuration
When running OpenHands, you'll need to set the following environment variable using `-e` in the
[docker run command](/modules/usage/installation#start-the-app):
[docker run command](/modules/usage/installation):
```
LLM_API_VERSION="<api-version>" # e.g. "2023-05-15"
@@ -37,7 +37,7 @@ OpenHands uses llama-index for embeddings. You can find their documentation on A
### Azure OpenAI Configuration
When running OpenHands, set the following environment variables using `-e` in the
[docker run command](/modules/usage/installation#start-the-app):
[docker run command](/modules/usage/installation):
```
LLM_EMBEDDING_MODEL="azureopenai"
+1 -1
View File
@@ -16,7 +16,7 @@ If the model is not in the list, toggle `Advanced Options`, and enter it in `Cus
## VertexAI - Google Cloud Platform Configs
To use Vertex AI through Google Cloud Platform when running OpenHands, you'll need to set the following environment
variables using `-e` in the [docker run command](/modules/usage/installation#start-the-app):
variables using `-e` in the [docker run command](/modules/usage/installation):
```
GOOGLE_APPLICATION_CREDENTIALS="<json-dump-of-gcp-service-account-json>"
+1 -1
View File
@@ -48,7 +48,7 @@ The following can be set in the OpenHands UI through the Settings:
- `Base URL` (through `Advanced Settings`)
There are some settings that may be necessary for some LLMs/providers that cannot be set through the UI. Instead, these
can be set through environment variables passed to the [docker run command](/modules/usage/installation#start-the-app)
can be set through environment variables passed to the [docker run command](/modules/usage/installation)
using `-e`:
- `LLM_API_VERSION`
+4 -4
View File
@@ -12,7 +12,7 @@
"@docusaurus/plugin-content-pages": "^3.5.2",
"@docusaurus/preset-classic": "^3.5.2",
"@docusaurus/theme-mermaid": "^3.5.2",
"@mdx-js/react": "^3.1.0",
"@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0",
"prism-react-renderer": "^2.4.0",
"react": "^18.3.1",
@@ -2883,9 +2883,9 @@
}
},
"node_modules/@mdx-js/react": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz",
"integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==",
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.0.1.tgz",
"integrity": "sha512-9ZrPIU4MGf6et1m1ov3zKf+q9+deetI51zprKB1D/z3NOb+rUxxtEl3mCjW5wTGh6VhRdwPueh1oRzi6ezkA8A==",
"dependencies": {
"@types/mdx": "^2.0.0"
},
+1 -1
View File
@@ -19,7 +19,7 @@
"@docusaurus/plugin-content-pages": "^3.5.2",
"@docusaurus/preset-classic": "^3.5.2",
"@docusaurus/theme-mermaid": "^3.5.2",
"@mdx-js/react": "^3.1.0",
"@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0",
"prism-react-renderer": "^2.4.0",
"react": "^18.3.1",
Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

After

Width:  |  Height:  |  Size: 26 KiB

-9093
View File
File diff suppressed because it is too large Load Diff
-2
View File
@@ -23,7 +23,6 @@ from openhands.core.config import (
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.utils.async_utils import call_async_from_sync
game = None
@@ -120,7 +119,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
+37 -48
View File
@@ -2,47 +2,19 @@
This folder contains code and resources to run experiments and evaluations.
## For Benchmark Users
## Logistics
### Setup
To better organize the evaluation folder, we should follow the rules below:
Before starting evaluation, follow the instructions here [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
Once you are done with setup, you can follow the benchmark-specific instructions in each subdirectory of the evaluation directory.
Generally these will involve running `run_infer.py` to perform inference with the agents.
### Implementing and Evaluating an Agent
To add an agent to OpenHands, you will need to implement it in the [agenthub directory](https://github.com/All-Hands-AI/OpenHands/tree/main/openhands/agenthub). There is a README there with more information.
To evaluate an agent, you can provide the agent's name to the `run_infer.py` program.
### Evaluating Different LLMs
OpenHands in development mode uses `config.toml` to keep track of most configuration.
Here's an example configuration file you can use to define and use multiple LLMs:
```toml
[llm]
# IMPORTANT: add your API key here, and set the model to the one you want to evaluate
model = "gpt-4o-2024-05-13"
api_key = "sk-XXX"
[llm.eval_gpt4_1106_preview_llm]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[llm.eval_some_openai_compatible_model_llm]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"
temperature = 0.0
```
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
all the preprocessing/evaluation/analysis scripts.
- Raw data and experimental records should not be stored within this repo.
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
- Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
## Supported Benchmarks
The OpenHands evaluation harness supports a wide variety of benchmarks across software engineering, web browsing, and miscellaneous assistance tasks.
To learn more about how to integrate your benchmark into OpenHands, check out [tutorial here](https://docs.all-hands.dev/modules/usage/how-to/evaluation-harness).
### Software Engineering
@@ -69,19 +41,36 @@ The OpenHands evaluation harness supports a wide variety of benchmarks across so
- Entity deduction Arena (EDA): [`evaluation/EDA`](./EDA)
- ProofWriter: [`evaluation/logic_reasoning`](./logic_reasoning)
## Result Visualization
## Before everything begins: Setup Environment and LLM Configuration
Please follow instruction [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
OpenHands in development mode uses `config.toml` to keep track of most configurations.
Here's an example configuration file you can use to define and use multiple LLMs:
```toml
[llm]
# IMPORTANT: add your API key here, and set the model to the one you want to evaluate
model = "gpt-4o-2024-05-13"
api_key = "sk-XXX"
[llm.eval_gpt4_1106_preview_llm]
model = "gpt-4-1106-preview"
api_key = "XXX"
temperature = 0.0
[llm.eval_some_openai_compatible_model_llm]
model = "openai/MODEL_NAME"
base_url = "https://OPENAI_COMPATIBLE_URL/v1"
api_key = "XXX"
temperature = 0.0
```
### Result Visualization
Check [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization of existing experimental results.
### Upload your results
You can start your own fork of [our huggingface evaluation outputs](https://huggingface.co/spaces/OpenHands/evaluation) and submit a PR of your evaluation results to our hosted huggingface repo via PR following the guide [here](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions#pull-requests-and-discussions).
## For Benchmark Developers
To learn more about how to integrate your benchmark into OpenHands, check out [tutorial here](https://docs.all-hands.dev/modules/usage/how-to/evaluation-harness). Briefly,
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/swe_bench` should contain
all the preprocessing/evaluation/analysis scripts.
- Raw data and experimental records should not be stored within this repo.
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
- Important data files of manageable size and analysis scripts (e.g., jupyter notebooks) can be directly uploaded to this repo.
+1 -3
View File
@@ -32,8 +32,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
def get_config(
@@ -211,7 +210,6 @@ def process_instance(
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance=instance)
+3 -8
View File
@@ -32,8 +32,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
# Configure visibility of unit tests to the Agent.
USE_UNIT_TESTS = os.environ.get('USE_UNIT_TESTS', 'false').lower() == 'true'
@@ -49,14 +48,13 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime=os.environ.get('RUNTIME', 'eventstream'),
runtime='eventstream',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.11-bookworm',
enable_auto_lint=True,
use_host_network=False,
timeout=100,
api_key=os.environ.get('ALLHANDS_API_KEY', None),
),
# do not mount workspace
workspace_base=None,
@@ -188,9 +186,7 @@ def process_instance(
signature_file=f'{instance.instance_name}.py',
)
if USE_UNIT_TESTS:
logger.info(
f'\nInstruction to run test_file: {instance.instance_name}_test.py\n'
)
print(f'\nInstruction to run test_file: {instance.instance_name}_test.py\n')
instruction += (
f'Use `python -m unittest {instance.instance_name}_test.py` to run the test_file '
'and verify the correctness of your solution. DO NOT EDIT the test file.\n\n'
@@ -208,7 +204,6 @@ def process_instance(
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance=instance)
+2 -3
View File
@@ -29,8 +29,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': functools.partial(
@@ -276,7 +275,7 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
+1 -3
View File
@@ -32,8 +32,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
def codeact_user_response(state: State) -> str:
@@ -404,7 +403,6 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
+1 -3
View File
@@ -28,8 +28,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
DATASET_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'data')
@@ -143,7 +142,6 @@ def process_instance(
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
-2
View File
@@ -25,7 +25,6 @@ from openhands.core.config import (
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.utils.async_utils import call_async_from_sync
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@@ -82,7 +81,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
config=config,
+1 -2
View File
@@ -48,7 +48,6 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import Observation
from openhands.utils.async_utils import call_async_from_sync
ACTION_FORMAT = """
<<FINAL_ANSWER||
@@ -216,7 +215,7 @@ Ok now its time to start solving the question. Good luck!
"""
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
state: State | None = asyncio.run(
run_controller(
config=config,
+1 -3
View File
@@ -37,8 +37,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
IMPORT_HELPER = {
'python': [
@@ -234,7 +233,6 @@ def process_instance(
# Here's how you can run the agent (similar to the `main` function) and get the final task state
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
state: State | None = asyncio.run(
run_controller(
-69
View File
@@ -1,69 +0,0 @@
# Integration tests
This directory implements integration tests that [was running in CI](https://github.com/All-Hands-AI/OpenHands/tree/23d3becf1d6f5d07e592f7345750c314a826b4e9/tests/integration).
[PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985) introduce LLM-based editing, which requires access to LLM to perform edit. Hence, we remove integration tests from CI and intend to run them as nightly evaluation to ensure the quality of OpenHands softwares.
## To add new tests
Each test is a file named like `tXX_testname.py` where `XX` is a number.
Make sure to name the file for each test to start with `t` and ends with `.py`.
Each test should be structured as a subclass of [`BaseIntegrationTest`](./tests/base.py), where you need to implement `initialize_runtime` that setup the runtime enviornment before test, and `verify_result` that takes in a `Runtime` and history of `Event` and return a `TestResult`. See [t01_fix_simple_typo.py](./tests/t01_fix_simple_typo.py) and [t05_simple_browsing.py](./tests/t05_simple_browsing.py) for two representative examples.
```python
class TestResult(BaseModel):
success: bool
reason: str | None = None
class BaseIntegrationTest(ABC):
"""Base class for integration tests."""
INSTRUCTION: str
@classmethod
@abstractmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
"""Initialize the runtime for the test to run."""
pass
@classmethod
@abstractmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
"""Verify the result of the test.
This method will be called after the agent performs the task on the runtime.
"""
pass
```
## Setup Environment and LLM Configuration
Please follow instruction [here](../README.md#setup) to setup your local
development environment and LLM.
## Start the evaluation
```bash
./evaluation/integration_tests/scripts/run_infer.sh [model_config] [git-version] [agent] [eval_limit] [eval-num-workers] [eval_ids]
```
- `model_config`, e.g. `eval_gpt4_1106_preview`, is the config group name for
your LLM settings, as defined in your `config.toml`.
- `git-version`, e.g. `HEAD`, is the git commit hash of the OpenHands version
you would like to evaluate. It could also be a release tag like `0.9.0`.
- `agent`, e.g. `CodeActAgent`, is the name of the agent for benchmarks,
defaulting to `CodeActAgent`.
- `eval_limit`, e.g. `10`, limits the evaluation to the first `eval_limit`
instances. By default, the script evaluates the entire Exercism test set
(133 issues). Note: in order to use `eval_limit`, you must also set `agent`.
- `eval-num-workers`: the number of workers to use for evaluation. Default: `1`.
- `eval_ids`, e.g. `"1,3,10"`, limits the evaluation to instances with the
given IDs (comma separated).
Example:
```bash
./evaluation/integration_tests/scripts/run_infer.sh llm.claude-35-sonnet-eval HEAD CodeActAgent
```
-213
View File
@@ -1,213 +0,0 @@
import asyncio
import importlib.util
import os
import pandas as pd
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import (
EvalMetadata,
EvalOutput,
codeact_user_response,
make_metadata,
prepare_dataset,
reset_logger_for_multiprocessing,
run_evaluation,
)
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
SandboxConfig,
get_llm_config_arg,
parse_arguments,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import MessageAction
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
FAKE_RESPONSES = {
'CodeActAgent': codeact_user_response,
}
def get_config(
metadata: EvalMetadata,
instance_id: str,
) -> AppConfig:
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
# use default base_container_image
enable_auto_lint=True,
use_host_network=False,
timeout=100,
),
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
)
if metadata.llm_config.log_completions:
metadata.llm_config.log_completions_folder = os.path.join(
metadata.eval_output_dir, 'llm_completions', instance_id
)
logger.info(
f'Logging LLM completions for instance {instance_id} to '
f'{metadata.llm_config.log_completions_folder}'
)
config.set_llm_config(metadata.llm_config)
return config
def process_instance(
instance: pd.Series,
metadata: EvalMetadata,
reset_logger: bool = True,
) -> EvalOutput:
config = get_config(metadata, instance.instance_id)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
log_dir = os.path.join(metadata.eval_output_dir, 'infer_logs')
reset_logger_for_multiprocessing(logger, str(instance.instance_id), log_dir)
else:
logger.info(
f'\nStarting evaluation for instance {str(instance.instance_id)}.\n'
)
# =============================================
# import test instance
# =============================================
instance_id = instance.instance_id
spec = importlib.util.spec_from_file_location(instance_id, instance.file_path)
test_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(test_module)
assert hasattr(
test_module, 'Test'
), f'Test module {instance_id} does not have a Test class'
test_class: type[BaseIntegrationTest] = test_module.Test
assert issubclass(
test_class, BaseIntegrationTest
), f'Test class {instance_id} does not inherit from BaseIntegrationTest'
instruction = test_class.INSTRUCTION
# =============================================
# create sandbox and run the agent
# =============================================
runtime: Runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
test_class.initialize_runtime(runtime)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
state: State | None = asyncio.run(
run_controller(
config=config,
initial_user_action=MessageAction(content=instruction),
runtime=runtime,
fake_user_response_fn=FAKE_RESPONSES[metadata.agent_class],
)
)
if state is None:
raise ValueError('State should not be None.')
# # =============================================
# # result evaluation
# # =============================================
histories = state.history.get_events()
test_result: TestResult = test_class.verify_result(runtime, histories)
metrics = state.metrics.get() if state.metrics else None
# Save the output
output = EvalOutput(
instance_id=str(instance.instance_id),
instance=instance.to_dict(),
instruction=instruction,
metadata=metadata,
history=histories,
metrics=metrics,
error=state.last_error if state and state.last_error else None,
test_result=test_result.model_dump(),
)
return output
def load_integration_tests() -> pd.DataFrame:
"""Load tests from python files under ./tests"""
cur_dir = os.path.dirname(os.path.abspath(__file__))
test_dir = os.path.join(cur_dir, 'tests')
test_files = [
os.path.join(test_dir, f)
for f in os.listdir(test_dir)
if f.startswith('t') and f.endswith('.py')
]
df = pd.DataFrame(test_files, columns=['file_path'])
df['instance_id'] = df['file_path'].apply(
lambda x: os.path.basename(x).rstrip('.py')
)
return df
if __name__ == '__main__':
args = parse_arguments()
integration_tests = load_integration_tests()
llm_config = None
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
if llm_config is None:
raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
metadata = make_metadata(
llm_config,
'integration_tests',
args.agent_cls,
args.max_iterations,
args.eval_note,
args.eval_output_dir,
)
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
# Parse dataset IDs if provided
eval_ids = None
if args.eval_ids:
eval_ids = str(args.eval_ids).split(',')
logger.info(f'\nUsing specific dataset IDs: {eval_ids}\n')
instances = prepare_dataset(
integration_tests,
output_file,
args.eval_n_limit,
eval_ids=eval_ids,
)
run_evaluation(
instances,
metadata,
output_file,
args.eval_num_workers,
process_instance,
)
df = pd.read_json(output_file, lines=True, orient='records')
df['success'] = df['test_result'].apply(lambda x: x['success'])
df['reason'] = df['test_result'].apply(lambda x: x['reason'])
logger.info('-' * 100)
logger.info(
f'Success rate: {df["success"].mean():.2%} ({df["success"].sum()}/{len(df)})'
)
logger.info(
'\nEvaluation Results:'
+ '\n'
+ df[['instance_id', 'success', 'reason']].to_string(index=False)
)
logger.info('-' * 100)
@@ -1,61 +0,0 @@
#!/bin/bash
set -eo pipefail
source "evaluation/utils/version_control.sh"
MODEL_CONFIG=$1
COMMIT_HASH=$2
AGENT=$3
EVAL_LIMIT=$4
NUM_WORKERS=$5
EVAL_IDS=$6
if [ -z "$NUM_WORKERS" ]; then
NUM_WORKERS=1
echo "Number of workers not specified, use default $NUM_WORKERS"
fi
checkout_eval_branch
if [ -z "$AGENT" ]; then
echo "Agent not specified, use default CodeActAgent"
AGENT="CodeActAgent"
fi
get_agent_version
echo "AGENT: $AGENT"
echo "AGENT_VERSION: $AGENT_VERSION"
echo "MODEL_CONFIG: $MODEL_CONFIG"
EVAL_NOTE=$AGENT_VERSION
# Default to NOT use unit tests.
if [ -z "$USE_UNIT_TESTS" ]; then
export USE_UNIT_TESTS=false
fi
echo "USE_UNIT_TESTS: $USE_UNIT_TESTS"
# If use unit tests, set EVAL_NOTE to the commit hash
if [ "$USE_UNIT_TESTS" = true ]; then
EVAL_NOTE=$EVAL_NOTE-w-test
fi
# export PYTHONPATH=evaluation/integration_tests:\$PYTHONPATH
COMMAND="poetry run python evaluation/integration_tests/run_infer.py \
--agent-cls $AGENT \
--llm-config $MODEL_CONFIG \
--max-iterations 10 \
--eval-num-workers $NUM_WORKERS \
--eval-note $EVAL_NOTE"
if [ -n "$EVAL_LIMIT" ]; then
echo "EVAL_LIMIT: $EVAL_LIMIT"
COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
fi
if [ -n "$EVAL_IDS" ]; then
echo "EVAL_IDS: $EVAL_IDS"
COMMAND="$COMMAND --eval-ids $EVAL_IDS"
fi
# Run the command
eval $COMMAND
@@ -1,32 +0,0 @@
from abc import ABC, abstractmethod
from pydantic import BaseModel
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class TestResult(BaseModel):
success: bool
reason: str | None = None
class BaseIntegrationTest(ABC):
"""Base class for integration tests."""
INSTRUCTION: str
@classmethod
@abstractmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
"""Initialize the runtime for the test to run."""
pass
@classmethod
@abstractmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
"""Verify the result of the test.
This method will be called after the agent performs the task on the runtime.
"""
pass
@@ -1,39 +0,0 @@
import os
import tempfile
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = 'Fix typos in bad.txt.'
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
# create a file with a typo in /workspace/bad.txt
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_path = os.path.join(temp_dir, 'bad.txt')
with open(temp_file_path, 'w') as f:
f.write('This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!')
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/workspace')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/bad.txt has been fixed
action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False, reason=f'Failed to run command: {obs.content}'
)
# check if the file /workspace/bad.txt has been fixed
if (
obs.content.strip().replace('\r\n', '\n')
== 'This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!'
):
return TestResult(success=True)
return TestResult(success=False, reason=f'File not fixed: {obs.content}')
@@ -1,40 +0,0 @@
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = "Write a shell script '/workspace/hello.sh' that prints 'hello'."
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/hello.sh: {obs.content}.',
)
# execute the script
action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to execute /workspace/hello.sh: {obs.content}.',
)
if obs.content.strip() != 'hello':
return TestResult(
success=False, reason=f'Script did not print "hello": {obs.content}.'
)
return TestResult(success=True)
@@ -1,43 +0,0 @@
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'."
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.sh exists
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/test.txt: {obs.content}.',
)
# execute the script
action = CmdRunAction(command='cat /workspace/test.txt', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/test.txt: {obs.content}.',
)
if 'hello world' not in obs.content.strip():
return TestResult(
success=False,
reason=f'File did not contain "hello world": {obs.content}.',
)
return TestResult(success=True)
@@ -1,58 +0,0 @@
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import CmdRunAction
from openhands.events.event import Event
from openhands.runtime.base import Runtime
class Test(BaseIntegrationTest):
INSTRUCTION = 'Write a git commit message for the current staging area and commit the changes.'
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# git init
action = CmdRunAction(command='git init', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# create README.md
action = CmdRunAction(
command='echo \'print("hello world")\' > hello.py', keep_prompt=False
)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# git add README.md
action = CmdRunAction(command='git add hello.py', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the file /workspace/hello.py exists
action = CmdRunAction(command='cat /workspace/hello.py', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False,
reason=f'Failed to cat /workspace/hello.py: {obs.content}.',
)
# check if the staging area is empty
action = CmdRunAction(command='git status', keep_prompt=False)
obs = runtime.run_action(action)
if obs.exit_code != 0:
return TestResult(
success=False, reason=f'Failed to git status: {obs.content}.'
)
if 'nothing to commit, working tree clean' in obs.content.strip():
return TestResult(success=True)
return TestResult(
success=False,
reason=f'Failed to check for "nothing to commit, working tree clean": {obs.content}.',
)
@@ -1,134 +0,0 @@
import os
import tempfile
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
from evaluation.utils.shared import assert_and_raise
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
from openhands.events.event import Event
from openhands.events.observation import AgentDelegateObservation
from openhands.runtime.base import Runtime
HTML_FILE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>The Ultimate Answer</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
background: linear-gradient(to right, #1e3c72, #2a5298);
color: #fff;
font-family: 'Arial', sans-serif;
text-align: center;
}
.container {
text-align: center;
padding: 20px;
background: rgba(255, 255, 255, 0.1);
border-radius: 10px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
}
h1 {
font-size: 36px;
margin-bottom: 20px;
}
p {
font-size: 18px;
margin-bottom: 30px;
}
#showButton {
padding: 10px 20px;
font-size: 16px;
color: #1e3c72;
background: #fff;
border: none;
border-radius: 5px;
cursor: pointer;
transition: background 0.3s ease;
}
#showButton:hover {
background: #f0f0f0;
}
#result {
margin-top: 20px;
font-size: 24px;
}
</style>
</head>
<body>
<div class="container">
<h1>The Ultimate Answer</h1>
<p>Click the button to reveal the answer to life, the universe, and everything.</p>
<button id="showButton">Click me</button>
<div id="result"></div>
</div>
<script>
document.getElementById('showButton').addEventListener('click', function() {
document.getElementById('result').innerText = 'The answer is OpenHands is all you need!';
});
</script>
</body>
</html>
"""
class Test(BaseIntegrationTest):
INSTRUCTION = 'Browse localhost:8000, and tell me the ultimate answer to life.'
@classmethod
def initialize_runtime(cls, runtime: Runtime) -> None:
action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
obs = runtime.run_action(action)
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
# create a file with a typo in /workspace/bad.txt
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_path = os.path.join(temp_dir, 'index.html')
with open(temp_file_path, 'w') as f:
f.write(HTML_FILE)
# Copy the file to the desired location
runtime.copy_to(temp_file_path, '/tmp/server')
# create README.md
action = CmdRunAction(
command='cd /tmp/server && nohup python3 -m http.server 8000 &',
keep_prompt=False,
)
obs = runtime.run_action(action)
@classmethod
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
# check if the "The answer is OpenHands is all you need!" is in any message
message_actions = [
event
for event in histories
if isinstance(
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
)
]
for event in message_actions:
if isinstance(event, AgentDelegateObservation):
content = event.content
elif isinstance(event, AgentFinishAction):
content = event.outputs.get('content', '')
elif isinstance(event, MessageAction):
content = event.content
else:
raise ValueError(f'Unknown event type: {type(event)}')
if 'OpenHands is all you need!' in content:
return TestResult(success=True)
return TestResult(
success=False,
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}. Messages: {message_actions}',
)
+1 -3
View File
@@ -29,8 +29,7 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@@ -203,7 +202,6 @@ def process_instance(
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
+1 -1
View File
@@ -1,4 +1,4 @@
# Mini-World of Bits Evaluation with OpenHands Browsing Agents
# WebArena Evaluation with OpenHands Browsing Agents
This folder contains evaluation for [MiniWoB++](https://miniwob.farama.org/) benchmark, powered by [BrowserGym](https://github.com/ServiceNow/BrowserGym) for easy evaluation of how well an agent capable of browsing can perform on synthetic web browsing tasks.
+1 -3
View File
@@ -30,12 +30,11 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
@@ -128,7 +127,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(
run_controller(
+1 -3
View File
@@ -32,8 +32,7 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
def codeact_user_response_mint(state: State, task: Task, task_config: dict[str, int]):
@@ -177,7 +176,6 @@ def process_instance(
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime)
state: State | None = asyncio.run(
+7 -5
View File
@@ -131,9 +131,11 @@ class MultipleChoiceTask(Task):
def compare_two_numbers(p, gt):
if isinstance(p, (int, float)):
if isinstance(p, int) or isinstance(p, float):
pass
elif isinstance(p, (bool, complex, dict, list, str, tuple)):
elif isinstance(p, list) or isinstance(p, bool) or isinstance(p, str):
return False
elif isinstance(p, tuple) or isinstance(p, complex) or isinstance(p, dict):
return False
else:
raise ValueError(p)
@@ -225,8 +227,8 @@ class TheoremqaTask(Task):
prediction = prediction.replace('°', '')
# Detect the boolean keyword in the generation
if prediction in ('true', 'yes', 'false', 'no'):
if prediction in ('true', 'yes'):
if prediction in ['true', 'yes', 'false', 'no']:
if prediction == 'true' or prediction == 'yes':
prediction = 'True'
else:
prediction = 'False'
@@ -340,7 +342,7 @@ class TheoremqaTask(Task):
answer_type = self._answer_type
gt = self.extract_answer(self.reference)
if isinstance(prediction, (str, int, float, list)):
if isinstance(prediction, (str, int, float)) or isinstance(prediction, list):
# Comparing prediction against the reference
if answer_type in ['bool', 'option', 'Option']:
cur_correct = int(prediction == f'({gt})') or int(prediction == gt)
+1 -3
View File
@@ -41,8 +41,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
config = load_app_config()
@@ -234,7 +233,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
instruction += AGENT_CLS_TO_INST_SUFFIX[metadata.agent_class]
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime, instance)
# Run the agent
+1 -2
View File
@@ -28,7 +28,6 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.utils.async_utils import call_async_from_sync
# TODO: migrate all swe-bench docker to ghcr.io/openhands
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
@@ -129,7 +128,7 @@ def process_instance(
)
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
# Get patch and save it to /tmp/patch.diff
with tempfile.TemporaryDirectory() as temp_dir:
# Patch file
+26 -47
View File
@@ -11,7 +11,6 @@ from datasets import load_dataset
import openhands.agenthub
from evaluation.swe_bench.prompt import CODEACT_SWE_PROMPT
from evaluation.utils.shared import (
EvalException,
EvalMetadata,
EvalOutput,
assert_and_raise,
@@ -33,9 +32,8 @@ from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation, ErrorObservation
from openhands.events.serialization.event import event_to_dict
from openhands.runtime.base import Runtime
from openhands.runtime.runtime import Runtime
from openhands.runtime.utils.shutdown_listener import sleep_if_should_continue
from openhands.utils.async_utils import call_async_from_sync
USE_HINT_TEXT = os.environ.get('USE_HINT_TEXT', 'false').lower() == 'true'
USE_INSTANCE_IMAGE = os.environ.get('USE_INSTANCE_IMAGE', 'false').lower() == 'true'
@@ -82,10 +80,8 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
instruction += f'# Hints\n{instance.hints_text}\n\n'
instruction += (
'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
'You should NOT modify any existing test case files. You SHOULD add new test in a NEW file to reproduce the issue.\n'
'You should verify that the issue is resolved and any new tests you create pass successfully.\n'
'You should NEVER use web browsing or any other web-based tools.\n'
'You should ALWAYS use the default Python interpreter available in the <execute_bash> environment to run code related to the provided issue and/or repository.\n'
'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n'
'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n'
)
# NOTE: You can actually set slightly different instruction for different agents
@@ -126,6 +122,7 @@ def get_config(
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_budget_per_task=4,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'eventstream'),
sandbox=SandboxConfig(
@@ -134,8 +131,6 @@ def get_config(
use_host_network=False,
# large enough timeout, since some testcases take very long to run
timeout=300,
# Add platform to the sandbox config to solve issue 4401
platform='linux/amd64',
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_remote_runtime_alive=False,
@@ -144,14 +139,6 @@ def get_config(
workspace_base=None,
workspace_mount_path=None,
)
if metadata.llm_config.log_completions:
metadata.llm_config.log_completions_folder = os.path.join(
metadata.eval_output_dir, 'llm_completions', instance['instance_id']
)
logger.info(
f'Logging LLM completions for instance {instance["instance_id"]} to '
f'{metadata.llm_config.log_completions_folder}'
)
config.set_llm_config(metadata.llm_config)
return config
@@ -179,7 +166,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}'
obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {obs.content}'
)
action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
@@ -187,7 +174,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to export USER: {obs.content}')
if USE_INSTANCE_IMAGE:
# inject the init script
@@ -201,7 +188,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to create /swe_util/eval_data/instances: {str(obs)}',
f'Failed to create /swe_util/eval_data/instances: {obs.content}',
)
swe_instance_json_name = 'swe-bench-instance.json'
@@ -228,16 +215,16 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to cat ~/.bashrc: {obs.content}')
action = CmdRunAction(command='source ~/.bashrc')
action.timeout = 600
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
if isinstance(obs, ErrorObservation):
logger.error(f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to source ~/.bashrc: {str(obs)}')
assert_and_raise(
obs.exit_code == 0, f'Failed to source ~/.bashrc: {obs.content}'
)
action = CmdRunAction(command='source /swe_util/instance_swe_entry.sh')
action.timeout = 3600
@@ -246,7 +233,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/instance_swe_entry.sh: {str(obs)}',
f'Failed to source /swe_util/instance_swe_entry.sh: {obs.content}',
)
else:
action = CmdRunAction(command='source /swe_util/swe_entry.sh')
@@ -256,7 +243,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to source /swe_util/swe_entry.sh: {str(obs)}',
f'Failed to source /swe_util/swe_entry.sh: {obs.content}',
)
action = CmdRunAction(command=f'cd /workspace/{workspace_dir_name}')
@@ -266,7 +253,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
f'Failed to cd to /workspace/{workspace_dir_name}: {obs.content}',
)
action = CmdRunAction(command='git reset --hard')
@@ -274,7 +261,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to git reset --hard: {obs.content}')
action = CmdRunAction(
command='for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
@@ -283,7 +270,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to remove git remotes: {obs.content}')
logger.info('-' * 30)
logger.info('END Runtime Initialization Fn')
@@ -313,7 +300,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to cd to /workspace/{workspace_dir_name}: {str(obs)}',
f'Failed to cd to /workspace/{workspace_dir_name}: {obs.content}',
)
action = CmdRunAction(command='git config --global core.pager ""')
@@ -323,7 +310,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(
obs.exit_code == 0,
f'Failed to git config --global core.pager "": {str(obs)}',
f'Failed to git config --global core.pager "": {obs.content}',
)
action = CmdRunAction(command='git add -A')
@@ -331,7 +318,7 @@ def complete_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {str(obs)}')
assert_and_raise(obs.exit_code == 0, f'Failed to git add -A: {obs.content}')
n_retries = 0
git_patch = None
@@ -356,9 +343,7 @@ def complete_runtime(
logger.error(f'Error occurred: {obs.content}. Retrying...')
sleep_if_should_continue(10)
else:
assert_and_raise(False, f'Unexpected observation type: {str(obs)}')
assert_and_raise(git_patch is not None, 'Failed to get git diff (None)')
assert_and_raise(False, f'Unexpected observation type: {type(obs)}')
logger.info('-' * 30)
logger.info('END Runtime Completion Fn')
@@ -381,7 +366,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {instance.instance_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
try:
initialize_runtime(runtime, instance)
@@ -400,13 +384,6 @@ def process_instance(
)
)
# if fatal error, throw EvalError to trigger re-run
if (
state.last_error
and 'fatal error during agent execution' in state.last_error
):
raise EvalException('Fatal error detected: ' + state.last_error)
# ======= THIS IS SWE-Bench specific =======
# Get git patch
return_val = complete_runtime(runtime, instance)
@@ -442,6 +419,7 @@ def process_instance(
metadata=metadata,
history=histories,
metrics=metrics,
llm_completions=state.extra_data.get('llm_completions', []),
error=state.last_error if state and state.last_error else None,
)
return output
@@ -494,13 +472,14 @@ if __name__ == '__main__':
details = {}
_agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls)
if hasattr(_agent_cls, 'system_message'):
details['system_message'] = _agent_cls.system_message
if hasattr(_agent_cls, 'in_context_example'):
details['in_context_example'] = _agent_cls.in_context_example
dataset_descrption = (
args.dataset.replace('/', '__') + '-' + args.split.replace('/', '__')
)
metadata = make_metadata(
llm_config,
dataset_descrption,
'swe-bench-lite',
args.agent_cls,
args.max_iterations,
args.eval_note,
@@ -1,67 +0,0 @@
#!/usr/bin/env python3
import argparse
import pandas as pd
parser = argparse.ArgumentParser(
description='Compare two swe_bench output JSONL files and print the resolved diff'
)
parser.add_argument('input_file_1', type=str)
parser.add_argument('input_file_2', type=str)
args = parser.parse_args()
df1 = pd.read_json(args.input_file_1, orient='records', lines=True)
df2 = pd.read_json(args.input_file_2, orient='records', lines=True)
# Get the intersection of the instance_ids
df = pd.merge(df1, df2, on='instance_id', how='inner')
def _get_resolved(report):
if report is None:
return False
if isinstance(report, float):
return False
else:
return report.get('resolved', False)
df['resolved_x'] = df['report_x'].apply(_get_resolved)
df['resolved_y'] = df['report_y'].apply(_get_resolved)
df['diff'] = df.apply(lambda x: x['resolved_x'] != x['resolved_y'], axis=1)
df_diff = df[df['diff']].sort_values(
by=['resolved_x', 'resolved_y'], ascending=[False, False]
)
# skip if any of the resolved is nan, which means one of the eval is not finished yet
df_diff = df_diff[df_diff['resolved_x'].notna() & df_diff['resolved_y'].notna()]
print(f'X={args.input_file_1}')
print(f'Y={args.input_file_2}')
print(f'# diff={df_diff.shape[0]}')
df_diff = df_diff[['instance_id', 'resolved_x', 'resolved_y', 'report_x', 'report_y']]
# x resolved but y not
print('-' * 100)
df_diff_x_only = df_diff[df_diff['resolved_x'] & ~df_diff['resolved_y']].sort_values(
by='instance_id'
)
print(f'# x resolved but y not={df_diff_x_only.shape[0]}')
print(df_diff_x_only[['instance_id', 'report_x', 'report_y']])
# y resolved but x not
print('-' * 100)
df_diff_y_only = df_diff[~df_diff['resolved_x'] & df_diff['resolved_y']].sort_values(
by='instance_id'
)
print(f'# y resolved but x not={df_diff_y_only.shape[0]}')
print(df_diff_y_only[['instance_id', 'report_x', 'report_y']])
# get instance_id from df_diff_y_only
print('-' * 100)
print('Instances that x resolved but y not:')
print(df_diff_x_only['instance_id'].tolist())
print('-' * 100)
print('Instances that y resolved but x not:')
print(df_diff_y_only['instance_id'].tolist())
@@ -3,9 +3,6 @@ import argparse
import json
from collections import Counter
from openhands.events.serialization import event_from_dict
from openhands.events.utils import get_pairs_from_events
ERROR_KEYWORDS = [
'Agent encountered an error while processing the last action',
'APIError',
@@ -29,37 +26,8 @@ if __name__ == '__main__':
error_counter = Counter()
main_agent_cost = []
editor_cost = []
num_turns = []
for line in lines:
_d = json.loads(line)
# Cost
costs = _d['metrics'].get('costs', [])
_cur_main_agent_cost = 0
_cur_editor_cost = 0
for cost in costs:
if isinstance(cost, float):
# backward compatible
_cur_main_agent_cost += cost
else:
if 'draft_editor' in cost['model']:
_cur_editor_cost += cost['cost']
else:
_cur_main_agent_cost += cost['cost']
main_agent_cost.append(_cur_main_agent_cost)
editor_cost.append(_cur_editor_cost)
# Turn status
history = _d.get('history', [])
events = [event_from_dict(event) for event in history]
pairs = get_pairs_from_events(events)
num_turns.append(len(pairs))
# Patch & resolve status
patch = _d.get('test_result', {}).get('git_patch', '')
if patch == '':
num_empty_patch += 1
@@ -70,7 +38,6 @@ if __name__ == '__main__':
if resolved:
num_resolved += 1
# Error
error = _d.get('error', None)
if error is not None and isinstance(error, str):
@@ -103,17 +70,7 @@ if __name__ == '__main__':
print(
f'# of loop: {num_agent_stuck_in_loop} / {num_lines} ({num_agent_stuck_in_loop / num_lines * 100:.2f}%)'
)
assert len(num_turns) == num_lines
assert len(main_agent_cost) == num_lines
assert len(editor_cost) == num_lines
print(f'Avg. num of turns per instance: {sum(num_turns) / num_lines:.2f}')
print(f'Avg. agent cost per instance: {sum(main_agent_cost) / num_lines:.2f} USD')
print(f'Avg. editor cost per instance: {sum(editor_cost) / num_lines:.2f} USD')
print(
f'Avg. total cost per instance: {(sum(main_agent_cost) + sum(editor_cost)) / num_lines:.2f} USD'
)
print('-' * 100)
print('Detailed error breakdown:')
for error, count in error_counter.items():
print(f'{error}: {count} ({count / num_lines * 100:.2f}%)')
print('-' * 100)
+2 -2
View File
@@ -25,8 +25,8 @@ if [ -z "$AGENT" ]; then
fi
if [ -z "$MAX_ITER" ]; then
echo "MAX_ITER not specified, use default 100"
MAX_ITER=100
echo "MAX_ITER not specified, use default 30"
MAX_ITER=30
fi
if [ -z "$USE_INSTANCE_IMAGE" ]; then
+1 -3
View File
@@ -25,8 +25,7 @@ from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = {
'CodeActAgent': codeact_user_response,
@@ -104,7 +103,6 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
logger.info(f'Instruction:\n{instruction}', extra={'msg_type': 'OBSERVATION'})
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
initialize_runtime(runtime)
# Here's how you can run the agent (similar to the `main` function) and get the final task state
+1
View File
@@ -61,6 +61,7 @@ class EvalOutput(BaseModel):
history: (
list[dict[str, Any]] | list[tuple[dict[str, Any], dict[str, Any]]] | None
) = None
llm_completions: list[dict[str, Any]] | None = None
metrics: dict[str, Any] | None = None
error: str | None = None
+1 -3
View File
@@ -30,12 +30,11 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.base import Runtime
from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
from openhands.utils.async_utils import call_async_from_sync
from openhands.runtime.runtime import Runtime
SUPPORTED_AGENT_CLS = {'BrowsingAgent'}
@@ -144,7 +143,6 @@ def process_instance(
logger.info(f'Starting evaluation for instance {env_id}.')
runtime = create_runtime(config)
call_async_from_sync(runtime.connect)
task_str = initialize_runtime(runtime)
state: State | None = asyncio.run(
+4
View File
@@ -1,2 +1,6 @@
VITE_BACKEND_BASE_URL="localhost:3000" # Backend URL without protocol (e.g. localhost:3000)
VITE_MOCK_API="false" # true or false
# GitHub OAuth
VITE_GITHUB_CLIENT_ID=""
VITE_APP_MODE="oss" # "oss" or "saas"
@@ -1,73 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { describe, it, expect, test } from "vitest";
import { ChatMessage } from "#/components/chat-message";
describe("ChatMessage", () => {
it("should render a user message", () => {
render(<ChatMessage type="user" message="Hello, World!" />);
expect(screen.getByTestId("user-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it("should render an assistant message", () => {
render(<ChatMessage type="assistant" message="Hello, World!" />);
expect(screen.getByTestId("assistant-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it.skip("should support code syntax highlighting", () => {
const code = "```js\nconsole.log('Hello, World!')\n```";
render(<ChatMessage type="user" message={code} />);
// SyntaxHighlighter breaks the code blocks into "tokens"
expect(screen.getByText("console")).toBeInTheDocument();
expect(screen.getByText("log")).toBeInTheDocument();
expect(screen.getByText("'Hello, World!'")).toBeInTheDocument();
});
it.todo("should support markdown content");
it("should render the copy to clipboard button when the user hovers over the message", async () => {
const user = userEvent.setup();
render(<ChatMessage type="user" message="Hello, World!" />);
const message = screen.getByText("Hello, World!");
expect(screen.getByTestId("copy-to-clipboard")).not.toBeVisible();
await user.hover(message);
expect(screen.getByTestId("copy-to-clipboard")).toBeVisible();
});
it("should copy content to clipboard", async () => {
const user = userEvent.setup();
render(<ChatMessage type="user" message="Hello, World!" />);
const copyToClipboardButton = screen.getByTestId("copy-to-clipboard");
await user.click(copyToClipboardButton);
expect(navigator.clipboard.readText()).resolves.toBe("Hello, World!");
});
// BUG: vi.useFakeTimers() seems to break the tests
it.todo(
"should display a checkmark for 200ms and disable the button after copying content to clipboard",
);
it("should display an error toast if copying content to clipboard fails", async () => {});
test.todo("push a toast after successfully copying content to clipboard");
it("should render a component passed as a prop", () => {
function Component() {
return <div data-testid="custom-component">Custom Component</div>;
}
render(
<ChatMessage type="user" message="Hello, World">
<Component />
</ChatMessage>,
);
expect(screen.getByTestId("custom-component")).toBeInTheDocument();
});
});
@@ -0,0 +1,28 @@
import { screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";
import { renderWithProviders } from "test-utils";
import Chat from "#/components/chat/Chat";
const MESSAGES: Message[] = [
{
sender: "assistant",
content: "Hello!",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "user",
content: "Hi!",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
describe("Chat", () => {
it("should render chat messages", () => {
renderWithProviders(<Chat messages={MESSAGES} />);
const messages = screen.getAllByTestId("article");
expect(messages).toHaveLength(MESSAGES.length);
});
});
@@ -0,0 +1,119 @@
import userEvent from "@testing-library/user-event";
import { render, screen } from "@testing-library/react";
import { describe, afterEach, vi, it, expect } from "vitest";
import ChatInput from "#/components/chat/ChatInput";
describe.skip("ChatInput", () => {
afterEach(() => {
vi.clearAllMocks();
});
const onSendMessage = vi.fn();
it("should render a textarea", () => {
render(<ChatInput onSendMessage={onSendMessage} />);
expect(screen.getByRole("textbox")).toBeInTheDocument();
});
it("should be able to be set as disabled", async () => {
const user = userEvent.setup();
render(<ChatInput disabled onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
expect(textarea).not.toBeDisabled(); // user can still type
expect(button).toBeDisabled(); // user cannot submit
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSendMessage).not.toHaveBeenCalled();
});
it("should render with a placeholder", () => {
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByPlaceholderText(
/CHAT_INTERFACE\$INPUT_PLACEHOLDER/i,
);
expect(textarea).toBeInTheDocument();
});
it("should render a send button", () => {
render(<ChatInput onSendMessage={onSendMessage} />);
expect(screen.getByRole("button")).toBeInTheDocument();
});
it("should call sendChatMessage with the input when the send button is clicked", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
await user.click(button);
expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
// Additionally, check if it was called exactly once
expect(onSendMessage).toHaveBeenCalledTimes(1);
});
it("should be able to send a message when the enter key is pressed", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSendMessage).toHaveBeenCalledWith("Hello, world!", []);
});
it("should NOT send a message when shift + enter is pressed", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Shift>} {Enter}"); // Shift + Enter
expect(onSendMessage).not.toHaveBeenCalled();
});
it("should NOT send an empty message", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, " ");
// with enter key
await user.keyboard("{Enter}");
expect(onSendMessage).not.toHaveBeenCalled();
// with button click
await user.click(button);
expect(onSendMessage).not.toHaveBeenCalled();
});
it("should clear the input message after sending a message", async () => {
const user = userEvent.setup();
render(<ChatInput onSendMessage={onSendMessage} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
expect(textarea).toHaveValue("Hello, world!");
await user.click(button);
expect(textarea).toHaveValue("");
});
// this is already implemented but need to figure out how to test it
it.todo(
"should NOT send a message when the enter key is pressed while composing",
);
});
@@ -0,0 +1,148 @@
import { screen, act } from "@testing-library/react";
import { describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import { renderWithProviders } from "test-utils";
import { createMemoryRouter, RouterProvider } from "react-router-dom";
import { addAssistantMessage } from "#/state/chatSlice";
import AgentState from "#/types/AgentState";
import ChatInterface from "#/components/chat/ChatInterface";
const router = createMemoryRouter([
{
path: "/",
element: <ChatInterface />,
},
]);
/// <reference types="vitest" />
interface CustomMatchers<R = unknown> {
toMatchMessageEvent(expected: string): R;
}
declare module "vitest" {
interface Assertion<T> extends CustomMatchers<T> {}
// @ts-expect-error - recursively references itself
interface AsymmetricMatchersContaining extends CustomMatchers {}
}
// This is for the scrollview ref in Chat.tsx
// TODO: Move this into test setup
HTMLElement.prototype.scrollTo = vi.fn().mockImplementation(() => {});
const TEST_TIMESTAMP = new Date().toISOString();
describe.skip("ChatInterface", () => {
// TODO: replace below with e.g. fake timers
// https://vitest.dev/guide/mocking#timers
// https://vitest.dev/api/vi.html#vi-usefaketimers
// Custom matcher for testing message events
expect.extend({
toMatchMessageEvent(received, expected) {
const receivedObj = JSON.parse(received);
const expectedObj = JSON.parse(expected);
// Compare everything except the timestamp
const { timestamp: receivedTimestamp, ...receivedRest } =
receivedObj.args;
const { timestamp: expectedTimestamp, ...expectedRest } =
expectedObj.args;
const pass =
this.equals(receivedRest, expectedRest) &&
typeof receivedTimestamp === "string";
return {
pass,
message: () =>
pass
? `expected ${received} not to match the structure of ${expected} (ignoring exact timestamp)`
: `expected ${received} to match the structure of ${expected} (ignoring exact timestamp)`,
};
},
});
it("should render empty message list and input", () => {
renderWithProviders(<ChatInterface />);
expect(screen.queryAllByTestId("article")).toHaveLength(0);
});
it("should render user and assistant messages", () => {
const { store } = renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
chat: {
messages: [
{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: TEST_TIMESTAMP,
},
],
},
},
});
expect(screen.getAllByTestId("article")).toHaveLength(1);
expect(screen.getByText("Hello")).toBeInTheDocument();
act(() => {
// simulate assistant response
store.dispatch(addAssistantMessage("Hello to you!"));
});
expect(screen.getAllByTestId("article")).toHaveLength(2);
expect(screen.getByText("Hello to you!")).toBeInTheDocument();
});
it("should send the user message as an event to the Session when the agent state is INIT", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
agent: {
curAgentState: AgentState.INIT,
},
},
});
const input = screen.getByRole("textbox");
await user.type(input, "my message");
await user.keyboard("{Enter}");
});
it("should send the user message as an event to the Session when the agent state is AWAITING_USER_INPUT", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
agent: {
curAgentState: AgentState.AWAITING_USER_INPUT,
},
},
});
const input = screen.getByRole("textbox");
await user.type(input, "my message");
await user.keyboard("{Enter}");
});
it("should disable the user input if agent is not initialized", async () => {
const user = userEvent.setup();
renderWithProviders(<RouterProvider router={router} />, {
preloadedState: {
agent: {
curAgentState: AgentState.LOADING,
},
},
});
const input = screen.getByRole("textbox");
await user.type(input, "my message");
await user.keyboard("{Enter}");
const submitButton = screen.getByLabelText(
"CHAT_INTERFACE$TOOLTIP_SEND_MESSAGE",
);
expect(submitButton).toBeDisabled();
});
it.todo("test scroll-related behaviour");
});
@@ -0,0 +1,200 @@
import { fireEvent, render, screen, within } from "@testing-library/react";
import { describe, it, expect, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import toast from "#/utils/toast";
import ChatMessage from "#/components/chat/ChatMessage";
describe("Message", () => {
it("should render a user message", () => {
render(
<ChatMessage
message={{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
expect(screen.getByTestId("article")).toBeInTheDocument();
expect(screen.getByTestId("article")).toHaveClass("self-end"); // user message should be on the right side
});
it("should render an assistant message", () => {
render(
<ChatMessage
message={{
sender: "assistant",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
expect(screen.getByTestId("article")).toBeInTheDocument();
expect(screen.getByTestId("article")).not.toHaveClass("self-end"); // assistant message should be on the left side
});
it("should render markdown content", () => {
render(
<ChatMessage
message={{
sender: "user",
content: "```js\nconsole.log('Hello')\n```",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
// SyntaxHighlighter breaks the code blocks into "tokens"
expect(screen.getByText("console")).toBeInTheDocument();
expect(screen.getByText("log")).toBeInTheDocument();
expect(screen.getByText("'Hello'")).toBeInTheDocument();
});
describe("copy to clipboard", () => {
const toastInfoSpy = vi.spyOn(toast, "info");
const toastErrorSpy = vi.spyOn(toast, "error");
it("should copy any message to clipboard", async () => {
const user = userEvent.setup();
render(
<ChatMessage
message={{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
const message = screen.getByTestId("article");
let copyButton = within(message).queryByTestId("copy-button");
expect(copyButton).not.toBeInTheDocument();
// I am using `fireEvent` here because `userEvent.hover()` seems to interfere with the
// `userEvent.click()` call later on
fireEvent.mouseEnter(message);
copyButton = within(message).getByTestId("copy-button");
await user.click(copyButton);
expect(navigator.clipboard.readText()).resolves.toBe("Hello");
expect(toastInfoSpy).toHaveBeenCalled();
});
it("should show an error message when the message cannot be copied", async () => {
const user = userEvent.setup();
render(
<ChatMessage
message={{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
/>,
);
const message = screen.getByTestId("article");
fireEvent.mouseEnter(message);
const copyButton = within(message).getByTestId("copy-button");
const clipboardSpy = vi
.spyOn(navigator.clipboard, "writeText")
.mockRejectedValue(new Error("Failed to copy"));
await user.click(copyButton);
expect(clipboardSpy).toHaveBeenCalled();
expect(toastErrorSpy).toHaveBeenCalled();
});
});
describe("confirmation buttons", () => {
const expectButtonsNotToBeRendered = () => {
expect(
screen.queryByTestId("action-confirm-button"),
).not.toBeInTheDocument();
expect(
screen.queryByTestId("action-reject-button"),
).not.toBeInTheDocument();
};
it.skip("should display confirmation buttons for the last assistant message", () => {
// it should not render buttons if the message is not the last one
const { rerender } = render(
<ChatMessage
message={{
sender: "assistant",
content: "Are you sure?",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage={false}
awaitingUserConfirmation
/>,
);
expectButtonsNotToBeRendered();
// it should not render buttons if the message is not from the assistant
rerender(
<ChatMessage
message={{
sender: "user",
content: "Yes",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage
awaitingUserConfirmation
/>,
);
expectButtonsNotToBeRendered();
// it should not render buttons if the message is not awaiting user confirmation
rerender(
<ChatMessage
message={{
sender: "assistant",
content: "Are you sure?",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage
awaitingUserConfirmation={false}
/>,
);
expectButtonsNotToBeRendered();
// it should render buttons if all conditions are met
rerender(
<ChatMessage
message={{
sender: "assistant",
content: "Are you sure?",
imageUrls: [],
timestamp: new Date().toISOString(),
}}
isLastMessage
awaitingUserConfirmation
/>,
);
const confirmButton = screen.getByTestId("action-confirm-button");
const rejectButton = screen.getByTestId("action-reject-button");
expect(confirmButton).toBeInTheDocument();
expect(rejectButton).toBeInTheDocument();
});
});
});
@@ -1,161 +0,0 @@
import userEvent from "@testing-library/user-event";
import { render, screen } from "@testing-library/react";
import { describe, afterEach, vi, it, expect } from "vitest";
import { ChatInput } from "#/components/chat-input";
describe("ChatInput", () => {
const onSubmitMock = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render a textarea", () => {
render(<ChatInput onSubmit={onSubmitMock} />);
expect(screen.getByTestId("chat-input")).toBeInTheDocument();
expect(screen.getByRole("textbox")).toBeInTheDocument();
});
it("should call onSubmit when the user types and presses enter", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!");
});
it("should call onSubmit when pressing the submit button", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
await user.click(button);
expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!");
});
it("should not call onSubmit when the message is empty", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const button = screen.getByRole("button");
await user.click(button);
expect(onSubmitMock).not.toHaveBeenCalled();
await user.keyboard("{Enter}");
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should disable submit", async () => {
const user = userEvent.setup();
render(<ChatInput disabled onSubmit={onSubmitMock} />);
const button = screen.getByRole("button");
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
expect(button).toBeDisabled();
await user.click(button);
expect(onSubmitMock).not.toHaveBeenCalled();
await user.keyboard("{Enter}");
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should render a placeholder", () => {
render(
<ChatInput placeholder="Enter your message" onSubmit={onSubmitMock} />,
);
const textarea = screen.getByPlaceholderText("Enter your message");
expect(textarea).toBeInTheDocument();
});
it("should create a newline instead of submitting when shift + enter is pressed", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Shift>} {Enter}"); // Shift + Enter
expect(onSubmitMock).not.toHaveBeenCalled();
// expect(textarea).toHaveValue("Hello, world!\n");
});
it("should clear the input message after sending a message", async () => {
const user = userEvent.setup();
render(<ChatInput onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
const button = screen.getByRole("button");
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(textarea).toHaveValue("");
await user.type(textarea, "Hello, world!");
await user.click(button);
expect(textarea).toHaveValue("");
});
it("should hide the submit button", () => {
render(<ChatInput onSubmit={onSubmitMock} showButton={false} />);
expect(screen.queryByRole("button")).not.toBeInTheDocument();
});
it("should call onChange when the user types", async () => {
const user = userEvent.setup();
const onChangeMock = vi.fn();
render(<ChatInput onSubmit={onSubmitMock} onChange={onChangeMock} />);
const textarea = screen.getByRole("textbox");
await user.type(textarea, "Hello, world!");
expect(onChangeMock).toHaveBeenCalledTimes("Hello, world!".length);
});
it("should have set the passed value", () => {
render(<ChatInput value="Hello, world!" onSubmit={onSubmitMock} />);
const textarea = screen.getByRole("textbox");
expect(textarea).toHaveValue("Hello, world!");
});
it("should display the stop button and trigger the callback", async () => {
const user = userEvent.setup();
const onStopMock = vi.fn();
render(
<ChatInput onSubmit={onSubmitMock} button="stop" onStop={onStopMock} />,
);
const stopButton = screen.getByTestId("stop-button");
await user.click(stopButton);
expect(onStopMock).toHaveBeenCalledOnce();
});
it("should call onFocus and onBlur when the textarea is focused and blurred", async () => {
const user = userEvent.setup();
const onFocusMock = vi.fn();
const onBlurMock = vi.fn();
render(
<ChatInput
onSubmit={onSubmitMock}
onFocus={onFocusMock}
onBlur={onBlurMock}
/>,
);
const textarea = screen.getByRole("textbox");
await user.click(textarea);
expect(onFocusMock).toHaveBeenCalledOnce();
await user.tab();
expect(onBlurMock).toHaveBeenCalledOnce();
});
});
@@ -1,185 +0,0 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { render, screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { ChatInterface } from "#/components/chat-interface";
import { SocketProvider } from "#/context/socket";
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const renderChatInterface = (messages: (Message | ErrorMessage)[]) =>
render(<ChatInterface />, { wrapper: SocketProvider });
describe.skip("ChatInterface", () => {
afterEach(() => {
vi.clearAllMocks();
});
it.todo("should render suggestions if empty");
it("should render messages", () => {
const messages: Message[] = [
{
sender: "user",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "assistant",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
renderChatInterface(messages);
expect(screen.getAllByTestId(/-message/)).toHaveLength(2);
});
it("should render a chat input", () => {
const messages: Message[] = [];
renderChatInterface(messages);
expect(screen.getByTestId("chat-input")).toBeInTheDocument();
});
it.todo("should call socket send when submitting a message", async () => {
const user = userEvent.setup();
const messages: Message[] = [];
renderChatInterface(messages);
const input = screen.getByTestId("chat-input");
await user.type(input, "Hello");
await user.keyboard("{Enter}");
// spy on send and expect to have been called
});
it("should render an image carousel with a message", () => {
let messages: Message[] = [
{
sender: "assistant",
content: "Here are some images",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
const { rerender } = renderChatInterface(messages);
expect(screen.queryByTestId("image-carousel")).not.toBeInTheDocument();
messages = [
{
sender: "assistant",
content: "Here are some images",
imageUrls: ["image1", "image2"],
timestamp: new Date().toISOString(),
},
];
rerender(<ChatInterface />);
const imageCarousel = screen.getByTestId("image-carousel");
expect(imageCarousel).toBeInTheDocument();
expect(within(imageCarousel).getAllByTestId("image-preview")).toHaveLength(
2,
);
});
it.todo("should render confirmation buttons");
it("should render a 'continue' action when there are more than 2 messages and awaiting user input", () => {
const messages: Message[] = [
{
sender: "assistant",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "user",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
const { rerender } = renderChatInterface(messages);
expect(
screen.queryByTestId("continue-action-button"),
).not.toBeInTheDocument();
messages.push({
sender: "assistant",
content: "How can I help you?",
imageUrls: [],
timestamp: new Date().toISOString(),
});
rerender(<ChatInterface />);
expect(screen.getByTestId("continue-action-button")).toBeInTheDocument();
});
it("should render inline errors", () => {
const messages: (Message | ErrorMessage)[] = [
{
sender: "assistant",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
error: "Woops!",
message: "Something went wrong",
},
];
renderChatInterface(messages);
const error = screen.getByTestId("error-message");
expect(within(error).getByText("Woops!")).toBeInTheDocument();
expect(within(error).getByText("Something went wrong")).toBeInTheDocument();
});
it("should render feedback actions if there are more than 3 messages", () => {
const messages: Message[] = [
{
sender: "assistant",
content: "Hello",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "user",
content: "Hi",
imageUrls: [],
timestamp: new Date().toISOString(),
},
{
sender: "assistant",
content: "How can I help you?",
imageUrls: [],
timestamp: new Date().toISOString(),
},
];
const { rerender } = renderChatInterface(messages);
expect(screen.queryByTestId("feedback-actions")).not.toBeInTheDocument();
messages.push({
sender: "user",
content: "I need help",
imageUrls: [],
timestamp: new Date().toISOString(),
});
rerender(<ChatInterface />);
expect(screen.getByTestId("feedback-actions")).toBeInTheDocument();
});
describe("feedback", () => {
it.todo("should open the feedback modal when a feedback action is clicked");
it.todo(
"should submit feedback and hide the actions when feedback is shared",
);
it.todo("should render the actions once more after new messages are added");
});
});
@@ -1,99 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, test, vi } from "vitest";
import { AccountSettingsContextMenu } from "#/components/context-menu/account-settings-context-menu";
describe("AccountSettingsContextMenu", () => {
const user = userEvent.setup();
const onClickAccountSettingsMock = vi.fn();
const onLogoutMock = vi.fn();
const onCloseMock = vi.fn();
afterEach(() => {
onClickAccountSettingsMock.mockClear();
onLogoutMock.mockClear();
onCloseMock.mockClear();
});
it("should always render the right options", () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
expect(
screen.getByTestId("account-settings-context-menu"),
).toBeInTheDocument();
expect(screen.getByText("Account Settings")).toBeInTheDocument();
expect(screen.getByText("Logout")).toBeInTheDocument();
});
it("should call onClickAccountSettings when the account settings option is clicked", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
const accountSettingsOption = screen.getByText("Account Settings");
await user.click(accountSettingsOption);
expect(onClickAccountSettingsMock).toHaveBeenCalledOnce();
});
it("should call onLogout when the logout option is clicked", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).toHaveBeenCalledOnce();
});
test("onLogout should be disabled if the user is not logged in", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn={false}
/>,
);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
});
it("should call onClose when clicking outside of the element", async () => {
render(
<AccountSettingsContextMenu
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
onClose={onCloseMock}
isLoggedIn
/>,
);
const accountSettingsButton = screen.getByText("Account Settings");
await user.click(accountSettingsButton);
await user.click(document.body);
expect(onCloseMock).toHaveBeenCalledOnce();
});
});
@@ -1,41 +0,0 @@
import { describe, it, expect, vi } from "vitest";
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { ContextMenuListItem } from "#/components/context-menu/context-menu-list-item";
describe("ContextMenuListItem", () => {
it("should render the component with the children", () => {
render(<ContextMenuListItem onClick={vi.fn}>Test</ContextMenuListItem>);
expect(screen.getByTestId("context-menu-list-item")).toBeInTheDocument();
expect(screen.getByText("Test")).toBeInTheDocument();
});
it("should call the onClick callback when clicked", async () => {
const user = userEvent.setup();
const onClickMock = vi.fn();
render(
<ContextMenuListItem onClick={onClickMock}>Test</ContextMenuListItem>,
);
const element = screen.getByTestId("context-menu-list-item");
await user.click(element);
expect(onClickMock).toHaveBeenCalledOnce();
});
it("should not call the onClick callback when clicked and the button is disabled", async () => {
const user = userEvent.setup();
const onClickMock = vi.fn();
render(
<ContextMenuListItem onClick={onClickMock} isDisabled>
Test
</ContextMenuListItem>,
);
const element = screen.getByTestId("context-menu-list-item");
await user.click(element);
expect(onClickMock).not.toHaveBeenCalled();
});
});
@@ -1,55 +0,0 @@
import { render, screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { FeedbackActions } from "#/components/feedback-actions";
describe("FeedbackActions", () => {
const user = userEvent.setup();
const onPositiveFeedback = vi.fn();
const onNegativeFeedback = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render correctly", () => {
render(
<FeedbackActions
onPositiveFeedback={onPositiveFeedback}
onNegativeFeedback={onNegativeFeedback}
/>,
);
const actions = screen.getByTestId("feedback-actions");
within(actions).getByTestId("positive-feedback");
within(actions).getByTestId("negative-feedback");
});
it("should call onPositiveFeedback when positive feedback is clicked", async () => {
render(
<FeedbackActions
onPositiveFeedback={onPositiveFeedback}
onNegativeFeedback={onNegativeFeedback}
/>,
);
const positiveFeedback = screen.getByTestId("positive-feedback");
await user.click(positiveFeedback);
expect(onPositiveFeedback).toHaveBeenCalled();
});
it("should call onNegativeFeedback when negative feedback is clicked", async () => {
render(
<FeedbackActions
onPositiveFeedback={onPositiveFeedback}
onNegativeFeedback={onNegativeFeedback}
/>,
);
const negativeFeedback = screen.getByTestId("negative-feedback");
await user.click(negativeFeedback);
expect(onNegativeFeedback).toHaveBeenCalled();
});
});
@@ -1,108 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { FeedbackForm } from "#/components/feedback-form";
describe("FeedbackForm", () => {
const user = userEvent.setup();
const onSubmitMock = vi.fn();
const onCloseMock = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render correctly", () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
screen.getByLabelText("Email");
screen.getByLabelText("Private");
screen.getByLabelText("Public");
screen.getByRole("button", { name: "Submit" });
screen.getByRole("button", { name: "Cancel" });
});
it("should switch between private and public permissions", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const privateRadio = screen.getByLabelText("Private");
const publicRadio = screen.getByLabelText("Public");
expect(privateRadio).toBeChecked(); // private is the default value
expect(publicRadio).not.toBeChecked();
await user.click(publicRadio);
expect(publicRadio).toBeChecked();
expect(privateRadio).not.toBeChecked();
await user.click(privateRadio);
expect(privateRadio).toBeChecked();
expect(publicRadio).not.toBeChecked();
});
it("should call onSubmit when the form is submitted", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const email = screen.getByLabelText("Email");
await user.type(email, "test@test.test");
await user.click(screen.getByRole("button", { name: "Submit" }));
expect(onSubmitMock).toHaveBeenCalledWith("private", "test@test.test"); // private is the default value
});
it("should not call onSubmit when the email is invalid", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const email = screen.getByLabelText("Email");
const submitButton = screen.getByRole("button", { name: "Submit" });
await user.click(submitButton);
expect(onSubmitMock).not.toHaveBeenCalled();
await user.type(email, "test");
await user.click(submitButton);
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should submit public permissions when the public radio is checked", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
const email = screen.getByLabelText("Email");
const publicRadio = screen.getByLabelText("Public");
await user.type(email, "test@test.test");
await user.click(publicRadio);
await user.click(screen.getByRole("button", { name: "Submit" }));
expect(onSubmitMock).toHaveBeenCalledWith("public", "test@test.test");
});
it("should call onClose when the close button is clicked", async () => {
render(<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />);
await user.click(screen.getByRole("button", { name: "Cancel" }));
expect(onSubmitMock).not.toHaveBeenCalled();
expect(onCloseMock).toHaveBeenCalled();
});
it("should disable the buttons if isSubmitting is true", () => {
const { rerender } = render(
<FeedbackForm onSubmit={onSubmitMock} onClose={onCloseMock} />,
);
const submitButton = screen.getByRole("button", { name: "Submit" });
const cancelButton = screen.getByRole("button", { name: "Cancel" });
expect(submitButton).not.toBeDisabled();
expect(cancelButton).not.toBeDisabled();
rerender(
<FeedbackForm
onSubmit={onSubmitMock}
onClose={onCloseMock}
isSubmitting
/>,
);
expect(submitButton).toBeDisabled();
expect(cancelButton).toBeDisabled();
});
});
@@ -16,7 +16,7 @@ vi.mock("../../services/fileService", async () => ({
}));
const renderFileExplorerWithRunningAgentState = () =>
renderWithProviders(<FileExplorer error={null} />, {
renderWithProviders(<FileExplorer />, {
preloadedState: {
agent: {
curAgentState: AgentState.RUNNING,
@@ -1,37 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { describe, expect, it, vi } from "vitest";
import { ImagePreview } from "#/components/image-preview";
describe("ImagePreview", () => {
it("should render an image", () => {
render(
<ImagePreview src="https://example.com/image.jpg" onRemove={vi.fn} />,
);
const img = screen.getByRole("img");
expect(screen.getByTestId("image-preview")).toBeInTheDocument();
expect(img).toHaveAttribute("src", "https://example.com/image.jpg");
});
it("should call onRemove when the close button is clicked", async () => {
const user = userEvent.setup();
const onRemoveMock = vi.fn();
render(
<ImagePreview
src="https://example.com/image.jpg"
onRemove={onRemoveMock}
/>,
);
const closeButton = screen.getByRole("button");
await user.click(closeButton);
expect(onRemoveMock).toHaveBeenCalledOnce();
});
it("shoud not display the close button when onRemove is not provided", () => {
render(<ImagePreview src="https://example.com/image.jpg" />);
expect(screen.queryByRole("button")).not.toBeInTheDocument();
});
});
@@ -1,119 +0,0 @@
import { render, screen, within } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import { InteractiveChatBox } from "#/components/interactive-chat-box";
describe("InteractiveChatBox", () => {
const onSubmitMock = vi.fn();
const onStopMock = vi.fn();
beforeAll(() => {
global.URL.createObjectURL = vi
.fn()
.mockReturnValue("blob:http://example.com");
});
afterEach(() => {
vi.clearAllMocks();
});
it("should render", () => {
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const chatBox = screen.getByTestId("interactive-chat-box");
within(chatBox).getByTestId("chat-input");
within(chatBox).getByTestId("upload-image-input");
});
it("should display the image previews when images are uploaded", async () => {
const user = userEvent.setup();
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
const input = screen.getByTestId("upload-image-input");
expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
await user.upload(input, file);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(1);
const files = [
new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }),
new File(["(⌐□_□)"], "chucknorris3.png", { type: "image/png" }),
];
await user.upload(input, files);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(3);
});
it("should remove the image preview when the close button is clicked", async () => {
const user = userEvent.setup();
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
const input = screen.getByTestId("upload-image-input");
await user.upload(input, file);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(1);
const imagePreview = screen.getByTestId("image-preview");
const closeButton = within(imagePreview).getByRole("button");
await user.click(closeButton);
expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
});
it("should call onSubmit with the message and images", async () => {
const user = userEvent.setup();
render(<InteractiveChatBox onSubmit={onSubmitMock} onStop={onStopMock} />);
const textarea = within(screen.getByTestId("chat-input")).getByRole(
"textbox",
);
const input = screen.getByTestId("upload-image-input");
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
await user.upload(input, file);
await user.type(textarea, "Hello, world!");
await user.keyboard("{Enter}");
expect(onSubmitMock).toHaveBeenCalledWith("Hello, world!", [file]);
// clear images after submission
expect(screen.queryAllByTestId("image-preview")).toHaveLength(0);
});
it("should disable the submit button", async () => {
const user = userEvent.setup();
render(
<InteractiveChatBox
isDisabled
onSubmit={onSubmitMock}
onStop={onStopMock}
/>,
);
const button = screen.getByRole("button");
expect(button).toBeDisabled();
await user.click(button);
expect(onSubmitMock).not.toHaveBeenCalled();
});
it("should display the stop button if set and call onStop when clicked", async () => {
const user = userEvent.setup();
render(
<InteractiveChatBox
mode="stop"
onSubmit={onSubmitMock}
onStop={onStopMock}
/>,
);
const stopButton = screen.getByTestId("stop-button");
expect(stopButton).toBeInTheDocument();
await user.click(stopButton);
expect(onStopMock).toHaveBeenCalledOnce();
});
});
@@ -0,0 +1,193 @@
import { render, screen, within } from "@testing-library/react";
import { Mock, afterEach, describe, expect, it, vi } from "vitest";
import userEvent from "@testing-library/user-event";
import toast from "react-hot-toast";
import FeedbackModal from "#/components/modals/feedback/FeedbackModal";
import OpenHands from "#/api/open-hands";
describe.skip("FeedbackModal", () => {
Storage.prototype.setItem = vi.fn();
Storage.prototype.getItem = vi.fn();
vi.mock("#/services/feedbackService", () => ({
sendFeedback: vi.fn(),
}));
vi.mock("#/services/auth", () => ({
getToken: vi.fn().mockReturnValue("some-token"),
}));
// mock Session class
vi.mock("#/services/session", () => ({
default: {
_history: [
{ args: { LLM_API_KEY: "DANGER-key-should-not-be-here" } },
{ content: "Hello" },
],
},
}));
afterEach(() => {
vi.clearAllMocks();
});
it("should render the feedback model when open", () => {
const { rerender } = render(
<FeedbackModal
polarity="positive"
isOpen={false}
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
expect(screen.queryByTestId("feedback-modal")).not.toBeInTheDocument();
rerender(
<FeedbackModal
polarity="positive"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
expect(screen.getByTestId("feedback-modal")).toBeInTheDocument();
});
it("should display an error if the email is invalid when submitting", async () => {
const user = userEvent.setup();
render(
<FeedbackModal
polarity="positive"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
await user.click(submitButton);
expect(screen.getByTestId("invalid-email-message")).toBeInTheDocument();
expect(OpenHands.sendFeedback).not.toHaveBeenCalled();
});
it("should call sendFeedback with the correct data when the share button is clicked", async () => {
const user = userEvent.setup();
render(
<FeedbackModal
polarity="negative"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
const email = "example@example.com";
const emailInput = screen.getByTestId("email-input");
await user.type(emailInput, email);
// select public
const permissionsGroup = screen.getByTestId("permissions-group");
const publicOption = within(permissionsGroup).getByRole("radio", {
name: "FEEDBACK$PUBLIC_LABEL",
});
expect(publicOption).not.toBeChecked();
await user.click(publicOption);
expect(publicOption).toBeChecked();
await user.click(submitButton);
expect(
screen.queryByTestId("invalid-email-message"),
).not.toBeInTheDocument();
expect(OpenHands.sendFeedback).toHaveBeenCalledWith({
email,
permissions: "public",
feedback: "negative",
trajectory: [{ args: {} }, { content: "Hello" }], // api key should be removed
token: "some-token",
version: "1.0",
});
});
it("should store the users email in local state for later use", async () => {
const email = "example@example.com";
const user = userEvent.setup();
const { rerender } = render(
<FeedbackModal
polarity="negative"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
expect(localStorage.getItem).toHaveBeenCalledWith("feedback-email");
const emailInput = screen.getByTestId("email-input");
expect(emailInput).toHaveValue("");
await user.type(emailInput, email);
expect(emailInput).toHaveValue(email);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
await user.click(submitButton);
expect(localStorage.setItem).toHaveBeenCalledWith("feedback-email", email);
rerender(
<FeedbackModal
polarity="positive"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const emailInputAfterClose = screen.getByTestId("email-input");
expect(emailInputAfterClose).toHaveValue(email);
});
// TODO: figure out how to properly mock toast
it.skip("should display a success toast when the feedback is shared successfully", async () => {
(OpenHands.sendFeedback as Mock).mockResolvedValue({
statusCode: 200,
body: {
message: "Feedback shared",
feedback_id: "some-id",
password: "some-password",
},
});
const user = userEvent.setup();
render(
<FeedbackModal
polarity="negative"
isOpen
onOpenChange={vi.fn}
onSendFeedback={vi.fn}
/>,
);
const submitButton = screen.getByRole("button", {
name: "FEEDBACK$SHARE_LABEL",
});
const email = "example@example.com";
const emailInput = screen.getByTestId("email-input");
await user.type(emailInput, email);
await user.click(submitButton);
expect(toast).toHaveBeenCalled();
});
});
@@ -1,9 +0,0 @@
import { describe, it } from "vitest";
describe("AIConfigForm", () => {
it.todo("should render the AI config form");
it.todo("should toggle the advanced settings when clicked");
it.todo("should call the onSubmit callback when the form is submitted");
it.todo("should call the onReset callback when the reset button is clicked");
it.todo("should call the onClose callback when the close button is clicked");
});
@@ -1,9 +0,0 @@
import { describe, it } from "vitest";
describe("DropdownInput", () => {
it.todo("should render the input");
it.todo("should render the placeholder");
it.todo("should render the dropdown when clicked");
it.todo("should select an option when clicked");
it.todo("should filter the options when typing");
});
@@ -1,12 +0,0 @@
import { describe, it } from "vitest";
describe("ModelSelector", () => {
it.todo("should render the model selector");
it.todo("should display and select the providers");
it.todo("should display and select the models");
it.todo("should disable the models if a provider is not selected");
it.todo("should disable the inputs if isDisabled is true");
it.todo(
"should set the selected model and provider if the currentModel prop is set",
);
});
@@ -1,71 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { UploadImageInput } from "#/components/upload-image-input";
describe("UploadImageInput", () => {
const user = userEvent.setup();
const onUploadMock = vi.fn();
afterEach(() => {
vi.clearAllMocks();
});
it("should render an input", () => {
render(<UploadImageInput onUpload={onUploadMock} />);
expect(screen.getByTestId("upload-image-input")).toBeInTheDocument();
});
it("should call onUpload when a file is selected", async () => {
render(<UploadImageInput onUpload={onUploadMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" });
const input = screen.getByTestId("upload-image-input");
await user.upload(input, file);
expect(onUploadMock).toHaveBeenNthCalledWith(1, [file]);
});
it("should call onUpload when multiple files are selected", async () => {
render(<UploadImageInput onUpload={onUploadMock} />);
const files = [
new File(["(⌐□_□)"], "chucknorris.png", { type: "image/png" }),
new File(["(⌐□_□)"], "chucknorris2.png", { type: "image/png" }),
];
const input = screen.getByTestId("upload-image-input");
await user.upload(input, files);
expect(onUploadMock).toHaveBeenNthCalledWith(1, files);
});
it("should not upload any file that is not an image", async () => {
render(<UploadImageInput onUpload={onUploadMock} />);
const file = new File(["(⌐□_□)"], "chucknorris.txt", {
type: "text/plain",
});
const input = screen.getByTestId("upload-image-input");
await user.upload(input, file);
expect(onUploadMock).not.toHaveBeenCalled();
});
it("should render custom labels", () => {
const { rerender } = render(<UploadImageInput onUpload={onUploadMock} />);
expect(screen.getByTestId("default-label")).toBeInTheDocument();
function CustomLabel() {
return <span>Custom label</span>;
}
rerender(
<UploadImageInput onUpload={onUploadMock} label={<CustomLabel />} />,
);
expect(screen.getByText("Custom label")).toBeInTheDocument();
expect(screen.queryByTestId("default-label")).not.toBeInTheDocument();
});
});
@@ -1,132 +0,0 @@
import { render, screen } from "@testing-library/react";
import { describe, expect, it, test, vi, afterEach } from "vitest";
import userEvent from "@testing-library/user-event";
import * as Remix from "@remix-run/react";
import { UserActions } from "#/components/user-actions";
describe("UserActions", () => {
const user = userEvent.setup();
const onClickAccountSettingsMock = vi.fn();
const onLogoutMock = vi.fn();
const useFetcherSpy = vi.spyOn(Remix, "useFetcher");
// @ts-expect-error - Only returning the relevant properties for the test
useFetcherSpy.mockReturnValue({ state: "idle" });
afterEach(() => {
onClickAccountSettingsMock.mockClear();
onLogoutMock.mockClear();
useFetcherSpy.mockClear();
});
it("should render", () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
expect(screen.getByTestId("user-actions")).toBeInTheDocument();
expect(screen.getByTestId("user-avatar")).toBeInTheDocument();
});
it("should toggle the user menu when the user avatar is clicked", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
expect(
screen.getByTestId("account-settings-context-menu"),
).toBeInTheDocument();
await user.click(userAvatar);
expect(
screen.queryByTestId("account-settings-context-menu"),
).not.toBeInTheDocument();
});
it("should call onClickAccountSettings and close the menu when the account settings option is clicked", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
const accountSettingsOption = screen.getByText("Account Settings");
await user.click(accountSettingsOption);
expect(onClickAccountSettingsMock).toHaveBeenCalledOnce();
expect(
screen.queryByTestId("account-settings-context-menu"),
).not.toBeInTheDocument();
});
it("should call onLogout and close the menu when the logout option is clicked", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
user={{ avatar_url: "https://example.com/avatar.png" }}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).toHaveBeenCalledOnce();
expect(
screen.queryByTestId("account-settings-context-menu"),
).not.toBeInTheDocument();
});
test("onLogout should not be called when the user is not logged in", async () => {
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
await user.click(userAvatar);
const logoutOption = screen.getByText("Logout");
await user.click(logoutOption);
expect(onLogoutMock).not.toHaveBeenCalled();
});
it("should display the loading spinner", () => {
// @ts-expect-error - Only returning the relevant properties for the test
useFetcherSpy.mockReturnValue({ state: "loading" });
render(
<UserActions
onClickAccountSettings={onClickAccountSettingsMock}
onLogout={onLogoutMock}
user={{ avatar_url: "https://example.com/avatar.png" }}
/>,
);
const userAvatar = screen.getByTestId("user-avatar");
user.click(userAvatar);
expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
expect(screen.queryByAltText("user avatar")).not.toBeInTheDocument();
});
});
@@ -1,68 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { afterEach, describe, expect, it, vi } from "vitest";
import { UserAvatar } from "#/components/user-avatar";
describe("UserAvatar", () => {
const onClickMock = vi.fn();
afterEach(() => {
onClickMock.mockClear();
});
it("(default) should render the placeholder avatar when the user is logged out", () => {
render(<UserAvatar onClick={onClickMock} />);
expect(screen.getByTestId("user-avatar")).toBeInTheDocument();
expect(
screen.getByLabelText("user avatar placeholder"),
).toBeInTheDocument();
});
it("should call onClick when clicked", async () => {
const user = userEvent.setup();
render(<UserAvatar onClick={onClickMock} />);
const userAvatarContainer = screen.getByTestId("user-avatar");
await user.click(userAvatarContainer);
expect(onClickMock).toHaveBeenCalledOnce();
});
it("should display the user's avatar when available", () => {
render(
<UserAvatar
onClick={onClickMock}
avatarUrl="https://example.com/avatar.png"
/>,
);
expect(screen.getByAltText("user avatar")).toBeInTheDocument();
expect(
screen.queryByLabelText("user avatar placeholder"),
).not.toBeInTheDocument();
});
it("should display a loading spinner instead of an avatar when isLoading is true", () => {
const { rerender } = render(<UserAvatar onClick={onClickMock} />);
expect(screen.queryByTestId("loading-spinner")).not.toBeInTheDocument();
expect(
screen.getByLabelText("user avatar placeholder"),
).toBeInTheDocument();
rerender(<UserAvatar onClick={onClickMock} isLoading />);
expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
expect(
screen.queryByLabelText("user avatar placeholder"),
).not.toBeInTheDocument();
rerender(
<UserAvatar
onClick={onClickMock}
avatarUrl="https://example.com/avatar.png"
isLoading
/>,
);
expect(screen.getByTestId("loading-spinner")).toBeInTheDocument();
expect(screen.queryByAltText("user avatar")).not.toBeInTheDocument();
});
});
@@ -1,36 +0,0 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { expect, test, vi } from "vitest";
import { useClickOutsideElement } from "#/hooks/useClickOutsideElement";
interface ClickOutsideTestComponentProps {
callback: () => void;
}
function ClickOutsideTestComponent({
callback,
}: ClickOutsideTestComponentProps) {
const ref = useClickOutsideElement<HTMLDivElement>(callback);
return (
<div>
<div data-testid="inside-element" ref={ref} />
<div data-testid="outside-element" />
</div>
);
}
test("call the callback when the element is clicked outside", async () => {
const user = userEvent.setup();
const callback = vi.fn();
render(<ClickOutsideTestComponent callback={callback} />);
const insideElement = screen.getByTestId("inside-element");
const outsideElement = screen.getByTestId("outside-element");
await user.click(insideElement);
expect(callback).not.toHaveBeenCalled();
await user.click(outsideElement);
expect(callback).toHaveBeenCalled();
});
-35
View File
@@ -1,35 +0,0 @@
import { describe, it, test } from "vitest";
describe("frontend/routes/_oh", () => {
describe("brand logo", () => {
it.todo("should not do anything if the user is in the main screen");
it.todo(
"should be clickable and redirect to the main screen if the user is not in the main screen",
);
});
describe("user menu", () => {
it.todo("should open the user menu when clicked");
describe("logged out", () => {
it.todo("should display a placeholder");
test.todo("the logout option in the user menu should be disabled");
});
describe("logged in", () => {
it.todo("should display the user's avatar");
it.todo("should log the user out when the logout option is clicked");
});
});
describe("config", () => {
it.todo("should open the config modal when clicked");
it.todo(
"should not save the config and close the config modal when the close button is clicked",
);
it.todo(
"should save the config when the save button is clicked and close the modal",
);
it.todo("should warn the user about saving the config when in /app");
});
});
@@ -0,0 +1,43 @@
import { createRemixStub } from "@remix-run/testing";
import { describe, expect, it } from "vitest";
import { screen, within } from "@testing-library/react";
import { renderWithProviders } from "test-utils";
import userEvent from "@testing-library/user-event";
import CodeEditor from "#/routes/app._index/route";
const RemixStub = createRemixStub([{ path: "/app", Component: CodeEditor }]);
describe.skip("CodeEditor", () => {
it("should render", async () => {
renderWithProviders(<RemixStub initialEntries={["/app"]} />);
await screen.findByTestId("file-explorer");
expect(screen.getByTestId("code-editor-empty-message")).toBeInTheDocument();
});
it("should retrieve the files", async () => {
renderWithProviders(<RemixStub initialEntries={["/app"]} />);
const explorer = await screen.findByTestId("file-explorer");
const files = within(explorer).getAllByTestId("tree-node");
// request mocked with msw
expect(files).toHaveLength(3);
});
it("should open a file", async () => {
const user = userEvent.setup();
renderWithProviders(<RemixStub initialEntries={["/app"]} />);
const explorer = await screen.findByTestId("file-explorer");
const files = within(explorer).getAllByTestId("tree-node");
await user.click(files[0]);
// check if the file is opened
expect(
screen.queryByTestId("code-editor-empty-message"),
).not.toBeInTheDocument();
const editor = await screen.findByTestId("code-editor");
expect(
within(editor).getByText(/content of file1.ts/i),
).toBeInTheDocument();
});
});
+56
View File
@@ -0,0 +1,56 @@
import { createRemixStub } from "@remix-run/testing";
import { beforeAll, describe, expect, it, vi } from "vitest";
import { render, screen, waitFor } from "@testing-library/react";
import { ws } from "msw";
import { setupServer } from "msw/node";
import App from "#/routes/app";
import AgentState from "#/types/AgentState";
import { AgentStateChangeObservation } from "#/types/core/observations";
const RemixStub = createRemixStub([{ path: "/app", Component: App }]);
describe.skip("App", () => {
const agent = ws.link("ws://localhost:3001/ws");
const server = setupServer();
beforeAll(() => {
// mock `dom.scrollTo`
HTMLElement.prototype.scrollTo = vi.fn().mockImplementation(() => {});
});
it("should render", async () => {
render(<RemixStub initialEntries={["/app"]} />);
await waitFor(() => {
expect(screen.getByTestId("app")).toBeInTheDocument();
expect(
screen.getByText(/INITIALIZING_AGENT_LOADING_MESSAGE/i),
).toBeInTheDocument();
});
});
it("should establish a ws connection and send the init message", async () => {
server.use(
agent.addEventListener("connection", ({ client }) => {
client.send(
JSON.stringify({
id: 1,
cause: 0,
message: "AGENT_INIT_MESSAGE",
source: "agent",
timestamp: new Date().toISOString(),
observation: "agent_state_changed",
content: "AGENT_INIT_MESSAGE",
extras: { agent_state: AgentState.INIT },
} satisfies AgentStateChangeObservation),
);
}),
);
render(<RemixStub initialEntries={["/app"]} />);
await waitFor(() => {
expect(screen.getByText(/AGENT_INIT_MESSAGE/i)).toBeInTheDocument();
});
});
});
+50
View File
@@ -0,0 +1,50 @@
import { createRemixStub } from "@remix-run/testing";
import { describe, expect, it } from "vitest";
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import Home from "#/routes/_index/route";
const renderRemixStub = (config?: { authenticated: boolean }) =>
createRemixStub([
{
path: "/",
Component: Home,
loader: () => ({
ghToken: config?.authenticated ? "ghp_123456" : null,
}),
},
]);
describe.skip("Home (_index)", () => {
it("should render", async () => {
const RemixStub = renderRemixStub();
render(<RemixStub />);
await screen.findByText(/let's start building/i);
});
it("should load the gh repos if a token is present", async () => {
const user = userEvent.setup();
const RemixStub = renderRemixStub({ authenticated: true });
render(<RemixStub />);
const repos = await screen.findByPlaceholderText(
/select a github project/i,
);
await user.click(repos);
// mocked responses from msw
screen.getByText(/octocat\/hello-world/i);
screen.getByText(/octocat\/earth/i);
});
it("should not load the gh repos if a token is not present", async () => {
const RemixStub = renderRemixStub();
render(<RemixStub />);
const repos = await screen.findByPlaceholderText(
/select a github project/i,
);
await userEvent.click(repos);
expect(screen.queryByText(/octocat\/hello-world/i)).not.toBeInTheDocument();
expect(screen.queryByText(/octocat\/earth/i)).not.toBeInTheDocument();
});
});
+40
View File
@@ -0,0 +1,40 @@
import { describe, expect, it } from "vitest";
import { createRemixStub } from "@remix-run/testing";
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import App, { clientLoader } from "#/root";
const RemixStub = createRemixStub([
{
path: "/",
Component: App,
loader: clientLoader,
},
]);
describe.skip("Root", () => {
it("should render", async () => {
render(<RemixStub />);
await screen.findByTestId("link-to-main");
});
describe("Auth Modal", () => {
it("should display the auth modal on first time visit", async () => {
render(<RemixStub />);
await screen.findByTestId("auth-modal");
});
it("should close the auth modal on accepting the terms", async () => {
const user = userEvent.setup();
render(<RemixStub />);
await screen.findByTestId("auth-modal");
await user.click(screen.getByTestId("accept-terms"));
await user.click(screen.getByRole("button", { name: /continue/i }));
expect(screen.queryByTestId("auth-modal")).not.toBeInTheDocument();
expect(screen.getByTestId("link-to-main")).toBeInTheDocument();
});
it.todo("should not display the auth modal on subsequent visits");
});
});
-4
View File
@@ -1,4 +0,0 @@
interface Window {
__APP_MODE__?: "saas" | "oss";
__GITHUB_CLIENT_ID__?: string | null;
}
+73 -84
View File
@@ -1,12 +1,12 @@
{
"name": "openhands-frontend",
"version": "0.11.0",
"version": "0.9.8",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "openhands-frontend",
"version": "0.11.0",
"version": "0.9.8",
"dependencies": {
"@monaco-editor/react": "^4.6.0",
"@nextui-org/react": "^2.4.8",
@@ -35,8 +35,7 @@
"react-markdown": "^9.0.1",
"react-redux": "^9.1.2",
"react-router-dom": "^6.26.1",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.4",
"react-syntax-highlighter": "^15.5.0",
"remark-gfm": "^4.0.0",
"sirv-cli": "^3.0.0",
"tailwind-merge": "^2.5.4",
@@ -48,10 +47,10 @@
"@remix-run/dev": "^2.11.2",
"@remix-run/testing": "^2.11.2",
"@tailwindcss/typography": "^0.5.15",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/jest-dom": "^6.5.0",
"@testing-library/react": "^16.0.1",
"@testing-library/user-event": "^14.5.2",
"@types/node": "^22.7.6",
"@types/node": "^22.7.5",
"@types/react": "^18.3.11",
"@types/react-dom": "^18.3.0",
"@types/react-highlight": "^0.12.8",
@@ -1602,9 +1601,9 @@
}
},
"node_modules/@jspm/core": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@jspm/core/-/core-2.1.0.tgz",
"integrity": "sha512-3sRl+pkyFY/kLmHl0cgHiFp2xEqErA8N3ECjMs7serSUBmoJ70lBa0PG5t0IM6WJgdZNyyI0R8YFfi5wM8+mzg==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/@jspm/core/-/core-2.0.1.tgz",
"integrity": "sha512-Lg3PnLp0QXpxwLIAuuJboLeRaIhrgJjeuh797QADg3xz8wGLugQOS5DpsE8A6i6Adgzf+bacllkKZG3J0tGfDw==",
"dev": true
},
"node_modules/@mdx-js/mdx": {
@@ -3561,15 +3560,15 @@
}
},
"node_modules/@react-aria/grid": {
"version": "3.10.5",
"resolved": "https://registry.npmjs.org/@react-aria/grid/-/grid-3.10.5.tgz",
"integrity": "sha512-9sLa+rpLgRZk7VX+tvdSudn1tdVgolVzhDLGWd95yS4UtPVMihTMGBrRoByY57Wxvh1V+7Ptw8kc6tsRSotYKg==",
"version": "3.10.4",
"resolved": "https://registry.npmjs.org/@react-aria/grid/-/grid-3.10.4.tgz",
"integrity": "sha512-3AjJ0hwRhOCIHThIZrGWrjAuKDpaZuBkODW3dvgLqtsNm3tL46DI6U9O3vfp8lNbrWMsXJgjRXwvXvdv0/gwCA==",
"dependencies": {
"@react-aria/focus": "^3.18.4",
"@react-aria/focus": "^3.18.3",
"@react-aria/i18n": "^3.12.3",
"@react-aria/interactions": "^3.22.4",
"@react-aria/interactions": "^3.22.3",
"@react-aria/live-announcer": "^3.4.0",
"@react-aria/selection": "^3.20.1",
"@react-aria/selection": "^3.20.0",
"@react-aria/utils": "^3.25.3",
"@react-stately/collections": "^3.11.0",
"@react-stately/grid": "^3.9.3",
@@ -3585,11 +3584,11 @@
}
},
"node_modules/@react-aria/grid/node_modules/@react-aria/focus": {
"version": "3.18.4",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.4.tgz",
"integrity": "sha512-91J35077w9UNaMK1cpMUEFRkNNz0uZjnSwiyBCFuRdaVuivO53wNC9XtWSDNDdcO5cGy87vfJRVAiyoCn/mjqA==",
"version": "3.18.3",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.3.tgz",
"integrity": "sha512-WKUElg+5zS0D3xlVn8MntNnkzJql2J6MuzAMP8Sv5WTgFDse/XGR842dsxPTIyKKdrWVCRegCuwa4m3n/GzgJw==",
"dependencies": {
"@react-aria/interactions": "^3.22.4",
"@react-aria/interactions": "^3.22.3",
"@react-aria/utils": "^3.25.3",
"@react-types/shared": "^3.25.0",
"@swc/helpers": "^0.5.0",
@@ -3618,9 +3617,9 @@
}
},
"node_modules/@react-aria/grid/node_modules/@react-aria/interactions": {
"version": "3.22.4",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.4.tgz",
"integrity": "sha512-E0vsgtpItmknq/MJELqYJwib+YN18Qag8nroqwjk1qOnBa9ROIkUhWJerLi1qs5diXq9LHKehZDXRlwPvdEFww==",
"version": "3.22.3",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.3.tgz",
"integrity": "sha512-RRUb/aG+P0IKTIWikY/SylB6bIbLZeztnZY2vbe7RAG5MgVaCgn5HQ45SI15GlTmhsFG8CnF6slJsUFJiNHpbQ==",
"dependencies": {
"@react-aria/ssr": "^3.9.6",
"@react-aria/utils": "^3.25.3",
@@ -3632,13 +3631,13 @@
}
},
"node_modules/@react-aria/grid/node_modules/@react-aria/selection": {
"version": "3.20.1",
"resolved": "https://registry.npmjs.org/@react-aria/selection/-/selection-3.20.1.tgz",
"integrity": "sha512-My0w8UC/7PAkz/1yZUjr2VRuzDZz1RrbgTqP36j5hsJx8RczDTjI4TmKtQNKG0ggaP4w83G2Og5JPTq3w3LMAw==",
"version": "3.20.0",
"resolved": "https://registry.npmjs.org/@react-aria/selection/-/selection-3.20.0.tgz",
"integrity": "sha512-h3giMcXo4SMZRL5HrqZvOLNTsdh5jCXwLUx0wpj/2EF0tcYQL6WDfn1iJ+rHARkUIs7X70fUV8iwlbUySZy1xg==",
"dependencies": {
"@react-aria/focus": "^3.18.4",
"@react-aria/focus": "^3.18.3",
"@react-aria/i18n": "^3.12.3",
"@react-aria/interactions": "^3.22.4",
"@react-aria/interactions": "^3.22.3",
"@react-aria/utils": "^3.25.3",
"@react-stately/selection": "^3.17.0",
"@react-types/shared": "^3.25.0",
@@ -4111,12 +4110,12 @@
}
},
"node_modules/@react-aria/toggle": {
"version": "3.10.9",
"resolved": "https://registry.npmjs.org/@react-aria/toggle/-/toggle-3.10.9.tgz",
"integrity": "sha512-dtfnyIU2/kcH9rFAiB48diSmaXDv45K7UCuTkMQLjbQa3QHC1oYNbleVN/VdGyAMBsIWtfl8L4uuPrAQmDV/bg==",
"version": "3.10.8",
"resolved": "https://registry.npmjs.org/@react-aria/toggle/-/toggle-3.10.8.tgz",
"integrity": "sha512-N6WTgE8ByMYY+ZygUUPGON2vW5NrxwU91H98+Nozl+Rq6ZYR2fD9i8oRtLtrYPxjU2HmaFwDyQdWvmMJZuDxig==",
"dependencies": {
"@react-aria/focus": "^3.18.4",
"@react-aria/interactions": "^3.22.4",
"@react-aria/focus": "^3.18.3",
"@react-aria/interactions": "^3.22.3",
"@react-aria/utils": "^3.25.3",
"@react-stately/toggle": "^3.7.8",
"@react-types/checkbox": "^3.8.4",
@@ -4128,11 +4127,11 @@
}
},
"node_modules/@react-aria/toggle/node_modules/@react-aria/focus": {
"version": "3.18.4",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.4.tgz",
"integrity": "sha512-91J35077w9UNaMK1cpMUEFRkNNz0uZjnSwiyBCFuRdaVuivO53wNC9XtWSDNDdcO5cGy87vfJRVAiyoCn/mjqA==",
"version": "3.18.3",
"resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.3.tgz",
"integrity": "sha512-WKUElg+5zS0D3xlVn8MntNnkzJql2J6MuzAMP8Sv5WTgFDse/XGR842dsxPTIyKKdrWVCRegCuwa4m3n/GzgJw==",
"dependencies": {
"@react-aria/interactions": "^3.22.4",
"@react-aria/interactions": "^3.22.3",
"@react-aria/utils": "^3.25.3",
"@react-types/shared": "^3.25.0",
"@swc/helpers": "^0.5.0",
@@ -4143,9 +4142,9 @@
}
},
"node_modules/@react-aria/toggle/node_modules/@react-aria/interactions": {
"version": "3.22.4",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.4.tgz",
"integrity": "sha512-E0vsgtpItmknq/MJELqYJwib+YN18Qag8nroqwjk1qOnBa9ROIkUhWJerLi1qs5diXq9LHKehZDXRlwPvdEFww==",
"version": "3.22.3",
"resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.3.tgz",
"integrity": "sha512-RRUb/aG+P0IKTIWikY/SylB6bIbLZeztnZY2vbe7RAG5MgVaCgn5HQ45SI15GlTmhsFG8CnF6slJsUFJiNHpbQ==",
"dependencies": {
"@react-aria/ssr": "^3.9.6",
"@react-aria/utils": "^3.25.3",
@@ -5815,9 +5814,9 @@
}
},
"node_modules/@testing-library/jest-dom": {
"version": "6.6.1",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.6.1.tgz",
"integrity": "sha512-mNYIiAuP4yJwV2zBRQCV7PHoQwbb6/8TfMpPcwSUzcSVDJHWOXt6hjNtIN1v5knDmimYnjJxKhsoVd4LVGIO+w==",
"version": "6.5.0",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.5.0.tgz",
"integrity": "sha512-xGGHpBXYSHUUr6XsKBfs85TWlYKpTc37cSBBVrXcib2MkHLboWlkClhWF37JKlDb9KEq3dHs+f2xR7XJEWGBxA==",
"dev": true,
"dependencies": {
"@adobe/css-tools": "^4.4.0",
@@ -6029,9 +6028,9 @@
}
},
"node_modules/@types/node": {
"version": "22.7.6",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.6.tgz",
"integrity": "sha512-/d7Rnj0/ExXDMcioS78/kf1lMzYk4BZV8MZGTBKzTGZ6/406ukkbYlIsZmMPhcR5KlkunDHQLrtAVmSq7r+mSw==",
"version": "22.7.5",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.5.tgz",
"integrity": "sha512-jML7s2NAzMWc//QSJ1a3prpk78cOPchGvXJsC3C6R6PSMoooztvRVQEz89gmBTBY1SPMaqo5teB4uNHPdetShQ==",
"devOptional": true,
"dependencies": {
"undici-types": "~6.19.2"
@@ -6620,9 +6619,9 @@
}
},
"node_modules/acorn": {
"version": "8.13.0",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.13.0.tgz",
"integrity": "sha512-8zSiw54Oxrdym50NlZ9sUusyO1Z1ZchgRLWRaK6c86XJFClyCgFKetdowBg5bKxyp/u+CDBJG4Mpp0m3HLZl9w==",
"version": "8.12.1",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
"integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==",
"dev": true,
"bin": {
"acorn": "bin/acorn"
@@ -7326,9 +7325,9 @@
}
},
"node_modules/caniuse-lite": {
"version": "1.0.30001669",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001669.tgz",
"integrity": "sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==",
"version": "1.0.30001668",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001668.tgz",
"integrity": "sha512-nWLrdxqCdblixUO+27JtGJJE/txpJlyUy5YN1u53wLZkP0emYCo5zgS6QYft7VUYR42LGgi/S5hdLZTrnyIddw==",
"funding": [
{
"type": "opencollective",
@@ -8397,9 +8396,9 @@
"integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="
},
"node_modules/electron-to-chromium": {
"version": "1.5.39",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.39.tgz",
"integrity": "sha512-4xkpSR6CjuiaNyvwiWDI85N9AxsvbPawB8xc7yzLPonYTuP19BVgYweKyUMFtHEZgIcHWMt1ks5Cqx2m+6/Grg=="
"version": "1.5.36",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.36.tgz",
"integrity": "sha512-HYTX8tKge/VNp6FGO+f/uVDmUkq+cEfcxYhKf15Akc4M5yxt5YmorwlAitKWjWhWQnKcDRBAQKXkhqqXMqcrjw=="
},
"node_modules/emoji-regex": {
"version": "9.2.2",
@@ -9841,9 +9840,9 @@
}
},
"node_modules/framer-motion": {
"version": "11.11.9",
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.11.9.tgz",
"integrity": "sha512-XpdZseuCrZehdHGuW22zZt3SF5g6AHJHJi7JwQIigOznW4Jg1n0oGPMJQheMaKLC+0rp5gxUKMRYI6ytd3q4RQ==",
"version": "11.11.8",
"resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.11.8.tgz",
"integrity": "sha512-mnGQNEoz99GtFXBBPw+Ag5K4FcfP5XrXxrxHz+iE4Lmg7W3sf2gKmGuvfkZCW/yIfcdv5vJd6KiSPETH1Pw68Q==",
"peer": true,
"dependencies": {
"tslib": "^2.4.0"
@@ -10322,9 +10321,9 @@
}
},
"node_modules/hast-util-to-jsx-runtime": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.2.tgz",
"integrity": "sha512-1ngXYb+V9UT5h+PxNRa1O1FYguZK/XL+gkeqvp7EdHlB9oHUG0eYRo/vY5inBdcqo3RkPMC58/H94HvkbfGdyg==",
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.1.tgz",
"integrity": "sha512-Rbemi1rzrkysSin0FDHZfsxYPoqLGHFfxFm28aOBHPibT7aqjy7kUgY636se9xbuCWUsFpWAYlmtGHQakiqtEA==",
"dependencies": {
"@types/estree": "^1.0.0",
"@types/hast": "^3.0.0",
@@ -11044,11 +11043,6 @@
"node": "*"
}
},
"node_modules/highlightjs-vue": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/highlightjs-vue/-/highlightjs-vue-1.0.0.tgz",
"integrity": "sha512-PDEfEF102G23vHmPhLyPboFCD+BkMGu+GuJe2d9/eH4FsCwvgBpnc9n0pGE+ffKdph38s6foEZiEjdgHdzp+IA=="
},
"node_modules/hosted-git-info": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-6.1.1.tgz",
@@ -20740,13 +20734,12 @@
}
},
"node_modules/react-syntax-highlighter": {
"version": "15.6.1",
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.6.1.tgz",
"integrity": "sha512-OqJ2/vL7lEeV5zTJyG7kmARppUjiB9h9udl4qHQjjgEos66z00Ia0OckwYfRxCSFrW8RJIBnsBwQsHZbVPspqg==",
"version": "15.5.0",
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.5.0.tgz",
"integrity": "sha512-+zq2myprEnQmH5yw6Gqc8lD55QHnpKaU8TOcFeC/Lg/MQSs8UknEA0JC4nTZGFAXC2J2Hyj/ijJ7NlabyPi2gg==",
"dependencies": {
"@babel/runtime": "^7.3.1",
"highlight.js": "^10.4.1",
"highlightjs-vue": "^1.0.0",
"lowlight": "^1.17.0",
"prismjs": "^1.27.0",
"refractor": "^3.6.0"
@@ -22720,17 +22713,13 @@
}
},
"node_modules/string.prototype.includes": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.1.tgz",
"integrity": "sha512-o7+c9bW6zpAdJHTtujeePODAhkuicdAryFsfVKwA+wGw89wJ4GTY484WTucM9hLtDEOpOvI+aHnzqnC5lHp4Rg==",
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/string.prototype.includes/-/string.prototype.includes-2.0.0.tgz",
"integrity": "sha512-E34CkBgyeqNDcrbU76cDjL5JLcVrtSdYq0MEh/B10r17pRP4ciHLwTgnuLV8Ay6cgEMLkcBkFCKyFZ43YldYzg==",
"dev": true,
"dependencies": {
"call-bind": "^1.0.7",
"define-properties": "^1.2.1",
"es-abstract": "^1.23.3"
},
"engines": {
"node": ">= 0.4"
"define-properties": "^1.1.3",
"es-abstract": "^1.17.5"
}
},
"node_modules/string.prototype.matchall": {
@@ -23510,9 +23499,9 @@
}
},
"node_modules/tslib": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.0.tgz",
"integrity": "sha512-jWVzBLplnCmoaTr13V9dYbiQ99wvZRd0vNWaDRg+aVYRcjDF3nDksxFDE/+fkXnKhpnUUkmx5pK/v8mCtLVqZA=="
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.7.0.tgz",
"integrity": "sha512-gLXCKdN1/j47AiHiOkJN69hJmcbGTHI0ImLmbYLHykhgeN0jVGola9yVjFgzCUklsZQMW55o+dW7IXv3RCXDzA=="
},
"node_modules/turbo-stream": {
"version": "2.4.0",
@@ -23672,9 +23661,9 @@
}
},
"node_modules/undici": {
"version": "6.20.1",
"resolved": "https://registry.npmjs.org/undici/-/undici-6.20.1.tgz",
"integrity": "sha512-AjQF1QsmqfJys+LXfGTNum+qw4S88CojRInG/6t31W/1fk6G59s92bnAvGz5Cmur+kQv2SURXEvvudLmbrE8QA==",
"version": "6.20.0",
"resolved": "https://registry.npmjs.org/undici/-/undici-6.20.0.tgz",
"integrity": "sha512-AITZfPuxubm31Sx0vr8bteSalEbs9wQb/BOBi9FPlD9Qpd6HxZ4Q0+hI742jBhkPb4RT2v5MQzaW5VhRVyj+9A==",
"engines": {
"node": ">=18.17"
}
+5 -6
View File
@@ -1,6 +1,6 @@
{
"name": "openhands-frontend",
"version": "0.11.0",
"version": "0.9.8",
"private": true,
"type": "module",
"engines": {
@@ -34,8 +34,7 @@
"react-markdown": "^9.0.1",
"react-redux": "^9.1.2",
"react-router-dom": "^6.26.1",
"react-syntax-highlighter": "^15.6.1",
"react-textarea-autosize": "^8.5.4",
"react-syntax-highlighter": "^15.5.0",
"remark-gfm": "^4.0.0",
"sirv-cli": "^3.0.0",
"tailwind-merge": "^2.5.4",
@@ -47,7 +46,7 @@
"dev": "npm run make-i18n && VITE_MOCK_API=false remix vite:dev",
"dev:mock": "npm run make-i18n && VITE_MOCK_API=true remix vite:dev",
"build": "npm run make-i18n && tsc && remix vite:build",
"start": "npx sirv-cli build/ --single",
"start": "npx sirv-cli build/client/ --single",
"test": "vitest run",
"test:coverage": "npm run make-i18n && vitest run --coverage",
"dev_wsl": "VITE_WATCH_USE_POLLING=true vite",
@@ -73,10 +72,10 @@
"@remix-run/dev": "^2.11.2",
"@remix-run/testing": "^2.11.2",
"@tailwindcss/typography": "^0.5.15",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/jest-dom": "^6.5.0",
"@testing-library/react": "^16.0.1",
"@testing-library/user-event": "^14.5.2",
"@types/node": "^22.7.6",
"@types/node": "^22.7.5",
"@types/react": "^18.3.11",
"@types/react-dom": "^18.3.0",
"@types/react-highlight": "^0.12.8",
-4
View File
@@ -1,4 +0,0 @@
{
"APP_MODE": "oss",
"GITHUB_CLIENT_ID": ""
}
+3
View File
@@ -0,0 +1,3 @@
{
"users": []
}
+15 -48
View File
@@ -6,7 +6,6 @@ import {
FeedbackResponse,
GitHubAccessTokenResponse,
ErrorResponse,
GetConfigResponse,
} from "./open-hands.types";
/**
@@ -61,15 +60,6 @@ class OpenHands {
return response.json();
}
static async getConfig(): Promise<GetConfigResponse> {
const response = await fetch("config.json", {
headers: {
"Cache-Control": "no-cache",
},
});
return response.json();
}
/**
* Retrieve the list of files available in the workspace
* @param token User token provided by the server
@@ -81,9 +71,7 @@ class OpenHands {
if (path) url.searchParams.append("path", path);
const response = await fetch(url.toString(), {
headers: {
Authorization: `Bearer ${token}`,
},
headers: OpenHands.generateHeaders(token),
});
return response.json();
@@ -99,9 +87,7 @@ class OpenHands {
const url = new URL(`${OpenHands.BASE_URL}/api/select-file`);
url.searchParams.append("file", path);
const response = await fetch(url.toString(), {
headers: {
Authorization: `Bearer ${token}`,
},
headers: OpenHands.generateHeaders(token),
});
const data = await response.json();
@@ -123,10 +109,7 @@ class OpenHands {
const response = await fetch(`${OpenHands.BASE_URL}/api/save-file`, {
method: "POST",
body: JSON.stringify({ filePath: path, content }),
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
headers: OpenHands.generateHeaders(token),
});
return response.json();
@@ -147,10 +130,8 @@ class OpenHands {
const response = await fetch(`${OpenHands.BASE_URL}/api/upload-files`, {
method: "POST",
headers: OpenHands.generateHeaders(token),
body: formData,
headers: {
Authorization: `Bearer ${token}`,
},
});
return response.json();
@@ -163,11 +144,8 @@ class OpenHands {
*/
static async getWorkspaceZip(token: string): Promise<Blob> {
const response = await fetch(`${OpenHands.BASE_URL}/api/zip-directory`, {
headers: {
Authorization: `Bearer ${token}`,
},
headers: OpenHands.generateHeaders(token),
});
return response.blob();
}
@@ -180,14 +158,12 @@ class OpenHands {
static async sendFeedback(
token: string,
data: Feedback,
// TODO: Type the response
): Promise<FeedbackResponse> {
const response = await fetch(`${OpenHands.BASE_URL}/api/submit-feedback`, {
method: "POST",
headers: OpenHands.generateHeaders(token),
body: JSON.stringify(data),
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
});
return response.json();
@@ -201,32 +177,23 @@ class OpenHands {
static async getGitHubAccessToken(
code: string,
): Promise<GitHubAccessTokenResponse> {
const response = await fetch(`${OpenHands.BASE_URL}/api/github/callback`, {
const response = await fetch(`${OpenHands.BASE_URL}/github/callback`, {
method: "POST",
body: JSON.stringify({ code }),
headers: {
"Content-Type": "application/json",
},
});
return response.json();
}
/**
* Check if the user is authenticated
* @param login The user's GitHub login handle
* @returns Whether the user is authenticated
* Generate the headers for the request
* @param token User token provided by the server
* @returns Headers for the request
*/
static async isAuthenticated(login: string): Promise<boolean> {
const response = await fetch(`${OpenHands.BASE_URL}/api/authenticate`, {
method: "POST",
body: JSON.stringify({ login }),
headers: {
"Content-Type": "application/json",
},
});
return response.status === 200;
private static generateHeaders(token: string) {
return {
Authorization: `Bearer ${token}`,
};
}
}
-5
View File
@@ -35,8 +35,3 @@ export interface Feedback {
permissions: "public" | "private";
trajectory: unknown[];
}
export interface GetConfigResponse {
APP_MODE: "saas" | "oss";
GITHUB_CLIENT_ID: string | null;
}
-28
View File
@@ -1,28 +0,0 @@
interface ChevronLeftProps {
width?: number;
height?: number;
active?: boolean;
}
export function ChevronLeft({
width = 20,
height = 20,
active,
}: ChevronLeftProps) {
return (
<svg
width={width}
height={height}
viewBox={`0 0 ${width} ${height}`}
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M11.204 15.0037L6.65511 9.99993L11.204 4.99617L12.1289 5.83701L8.34444 9.99993L12.1289 14.1628L11.204 15.0037Z"
fill={active ? "#D4D4D4" : "#525252"}
/>
</svg>
);
}
-28
View File
@@ -1,28 +0,0 @@
interface ChevronRightProps {
width?: number;
height?: number;
active?: boolean;
}
export function ChevronRight({
width = 20,
height = 20,
active,
}: ChevronRightProps) {
return (
<svg
width={width}
height={height}
viewBox={`0 0 ${width} ${height}`}
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
fillRule="evenodd"
clipRule="evenodd"
d="M8.79602 4.99634L13.3449 10.0001L8.79602 15.0038L7.87109 14.163L11.6556 10.0001L7.87109 5.83718L8.79602 4.99634Z"
fill={active ? "#D4D4D4" : "#525252"}
/>
</svg>
);
}
-5
View File
@@ -1,5 +0,0 @@
<svg width="11" height="11" viewBox="0 0 11 11" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd"
d="M5.69949 5.72974L7.91965 7.9505L8.35077 7.51999L6.13001 5.29922L8.35077 3.07907L7.92026 2.64795L5.69949 4.86871L3.47934 2.64795L3.04883 3.07907L5.26898 5.29922L3.04883 7.51938L3.47934 7.9505L5.69949 5.72974Z"
fill="black" />
</svg>

Before

Width:  |  Height:  |  Size: 387 B

@@ -2,4 +2,4 @@
<path
d="M11.8749 1.75H3.91861C3.47998 1.75015 3.05528 1.90407 2.71841 2.18499C2.38154 2.4659 2.15382 2.85603 2.07486 3.2875L1.28111 7.6625C1.23166 7.93277 1.2422 8.21062 1.31201 8.47636C1.38182 8.74211 1.50917 8.98927 1.68507 9.20035C1.86097 9.41142 2.08111 9.58126 2.32991 9.69785C2.57872 9.81443 2.8501 9.87491 3.12486 9.875H5.97486L5.62486 10.7688C5.47928 11.1601 5.4308 11.5809 5.48357 11.995C5.53635 12.4092 5.68881 12.8044 5.92787 13.1467C6.16694 13.489 6.48547 13.7683 6.85615 13.9604C7.22683 14.1526 7.63859 14.2519 8.05611 14.25C8.17634 14.2497 8.29394 14.2148 8.39482 14.1494C8.4957 14.084 8.57557 13.9909 8.62486 13.8813L10.4061 9.875H11.8749C12.3721 9.875 12.8491 9.67746 13.2007 9.32583C13.5523 8.97419 13.7499 8.49728 13.7499 8V3.625C13.7499 3.12772 13.5523 2.65081 13.2007 2.29917C12.8491 1.94754 12.3721 1.75 11.8749 1.75ZM9.37486 9.11875L7.67486 12.9438C7.50092 12.8911 7.3396 12.8034 7.20083 12.6861C7.06206 12.5688 6.94878 12.4242 6.86798 12.2615C6.78717 12.0987 6.74055 11.9211 6.73099 11.7396C6.72143 11.5581 6.74912 11.3766 6.81236 11.2062L7.14361 10.3125C7.2142 10.1236 7.23803 9.92041 7.21307 9.72029C7.18811 9.52018 7.1151 9.32907 7.00028 9.16329C6.88546 8.9975 6.73223 8.86196 6.55367 8.76823C6.37511 8.67449 6.17653 8.62535 5.97486 8.625H3.12486C3.03304 8.62515 2.94232 8.60507 2.85914 8.56618C2.77597 8.52729 2.70238 8.47055 2.64361 8.4C2.58341 8.33042 2.5393 8.24841 2.51445 8.15982C2.4896 8.07123 2.48462 7.97824 2.49986 7.8875L3.29361 3.5125C3.32024 3.3669 3.39767 3.23548 3.51212 3.14162C3.62657 3.04777 3.77062 2.99759 3.91861 3H9.37486V9.11875ZM12.4999 8C12.4999 8.16576 12.434 8.32473 12.3168 8.44194C12.1996 8.55915 12.0406 8.625 11.8749 8.625H10.6249V3H11.8749C12.0406 3 12.1996 3.06585 12.3168 3.18306C12.434 3.30027 12.4999 3.45924 12.4999 3.625V8Z"
fill="white" />
</svg>
</svg>

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Some files were not shown because too many files have changed in this diff Show More