mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
1 Commits
openhands-
...
openhands-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f6e4160884 |
23
.github/workflows/eval-runner.yml
vendored
23
.github/workflows/eval-runner.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Run SWE-Bench Evaluation
|
||||
name: Run Evaluation
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -58,6 +58,24 @@ jobs:
|
||||
echo "api_key = \"$DEEPSEEK_API_KEY\"" >> config.toml
|
||||
echo "temperature = 0.0" >> config.toml
|
||||
|
||||
- name: Run integration test evaluation
|
||||
env:
|
||||
ALLHANDS_API_KEY: ${{ secrets.ALLHANDS_EVAL_RUNTIME_API_KEY }}
|
||||
RUNTIME: remote
|
||||
SANDBOX_REMOTE_RUNTIME_API_URL: https://runtime.eval.all-hands.dev
|
||||
EVAL_DOCKER_IMAGE_PREFIX: us-central1-docker.pkg.dev/evaluation-092424/swe-bench-images
|
||||
|
||||
run: |
|
||||
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES
|
||||
|
||||
# get evaluation report
|
||||
REPORT_FILE=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek-chat_maxiter_10_N* -name "report.md" -type f | head -n 1)
|
||||
echo "REPORT_FILE: $REPORT_FILE"
|
||||
echo "INTEGRATION_TEST_REPORT<<EOF" >> $GITHUB_ENV
|
||||
cat $REPORT_FILE >> $GITHUB_ENV
|
||||
echo >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
- name: Run SWE-Bench evaluation
|
||||
env:
|
||||
ALLHANDS_API_KEY: ${{ secrets.ALLHANDS_EVAL_RUNTIME_API_KEY }}
|
||||
@@ -125,6 +143,9 @@ jobs:
|
||||
**SWE-Bench Evaluation Report**
|
||||
${{ env.SWEBENCH_REPORT }}
|
||||
---
|
||||
**Integration Tests Evaluation Report**
|
||||
${{ env.INTEGRATION_TEST_REPORT }}
|
||||
---
|
||||
You can download the full evaluation outputs [here](${{ env.ARTIFACT_URL }}).
|
||||
|
||||
- name: Post to a Slack channel
|
||||
|
||||
3
.github/workflows/fe-unit-tests.yml
vendored
3
.github/workflows/fe-unit-tests.yml
vendored
@@ -35,9 +35,6 @@ jobs:
|
||||
- name: Install dependencies
|
||||
working-directory: ./frontend
|
||||
run: npm ci
|
||||
- name: Run TypeScript compilation
|
||||
working-directory: ./frontend
|
||||
run: npm run make-i18n && tsc
|
||||
- name: Run tests and collect coverage
|
||||
working-directory: ./frontend
|
||||
run: npm run test:coverage
|
||||
|
||||
4
.github/workflows/ghcr-build.yml
vendored
4
.github/workflows/ghcr-build.yml
vendored
@@ -291,7 +291,7 @@ jobs:
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=false \
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
@@ -368,7 +368,7 @@ jobs:
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=true \
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
|
||||
158
.github/workflows/integration-runner.yml
vendored
158
.github/workflows/integration-runner.yml
vendored
@@ -1,158 +0,0 @@
|
||||
name: Run Integration Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
reason:
|
||||
description: 'Reason for manual trigger'
|
||||
required: true
|
||||
default: ''
|
||||
schedule:
|
||||
- cron: '30 22 * * *' # Runs at 10:30pm UTC every day
|
||||
|
||||
env:
|
||||
N_PROCESSES: 10 # Global configuration for number of parallel processes for evaluation
|
||||
|
||||
jobs:
|
||||
run-integration-tests:
|
||||
if: github.event.label.name == 'integration-test' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: "read"
|
||||
id-token: "write"
|
||||
pull-requests: "write"
|
||||
issues: "write"
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install poetry via pipx
|
||||
run: pipx install poetry
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: "poetry"
|
||||
|
||||
- name: Comment on PR if 'integration-test' label is present
|
||||
if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
|
||||
uses: KeisukeYamashita/create-comment@v1
|
||||
with:
|
||||
unique: false
|
||||
comment: |
|
||||
Hi! I started running the integration tests on your PR. You will receive a comment with the results shortly.
|
||||
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: poetry install --without evaluation,llama-index
|
||||
|
||||
- name: Configure config.toml for testing with Haiku
|
||||
env:
|
||||
LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
run: |
|
||||
echo "[llm.eval]" > config.toml
|
||||
echo "model = \"$LLM_MODEL\"" >> config.toml
|
||||
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
|
||||
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
|
||||
echo "temperature = 0.0" >> config.toml
|
||||
|
||||
- name: Build environment
|
||||
run: make build
|
||||
|
||||
- name: Run integration test evaluation for Haiku
|
||||
env:
|
||||
SANDBOX_FORCE_REBUILD_RUNTIME: True
|
||||
run: |
|
||||
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'haiku_run'
|
||||
|
||||
# get integration tests report
|
||||
REPORT_FILE_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/*haiku*_maxiter_10_N* -name "report.md" -type f | head -n 1)
|
||||
echo "REPORT_FILE: $REPORT_FILE_HAIKU"
|
||||
echo "INTEGRATION_TEST_REPORT_HAIKU<<EOF" >> $GITHUB_ENV
|
||||
cat $REPORT_FILE_HAIKU >> $GITHUB_ENV
|
||||
echo >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
- name: Wait a little bit
|
||||
run: sleep 10
|
||||
|
||||
- name: Configure config.toml for testing with DeepSeek
|
||||
env:
|
||||
LLM_MODEL: "litellm_proxy/deepseek-chat"
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
run: |
|
||||
echo "[llm.eval]" > config.toml
|
||||
echo "model = \"$LLM_MODEL\"" >> config.toml
|
||||
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
|
||||
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
|
||||
echo "temperature = 0.0" >> config.toml
|
||||
|
||||
- name: Run integration test evaluation for DeepSeek
|
||||
env:
|
||||
SANDBOX_FORCE_REBUILD_RUNTIME: True
|
||||
run: |
|
||||
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'deepseek_run'
|
||||
|
||||
# get integration tests report
|
||||
REPORT_FILE_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek*_maxiter_10_N* -name "report.md" -type f | head -n 1)
|
||||
echo "REPORT_FILE: $REPORT_FILE_DEEPSEEK"
|
||||
echo "INTEGRATION_TEST_REPORT_DEEPSEEK<<EOF" >> $GITHUB_ENV
|
||||
cat $REPORT_FILE_DEEPSEEK >> $GITHUB_ENV
|
||||
echo >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
- name: Create archive of evaluation outputs
|
||||
run: |
|
||||
TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
|
||||
cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
|
||||
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* # Only include the actual result directories
|
||||
|
||||
- name: Upload evaluation results as artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
id: upload_results_artifact
|
||||
with:
|
||||
name: integration-test-outputs-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
path: integration_tests_*.tar.gz
|
||||
|
||||
- name: Get artifact URLs
|
||||
run: |
|
||||
echo "ARTIFACT_URL=${{ steps.upload_results_artifact.outputs.artifact-url }}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set timestamp and trigger reason
|
||||
run: |
|
||||
echo "TIMESTAMP=$(date +'%Y-%m-%d-%H-%M')" >> $GITHUB_ENV
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
echo "TRIGGER_REASON=pr-${{ github.event.pull_request.number }}" >> $GITHUB_ENV
|
||||
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
|
||||
echo "TRIGGER_REASON=manual-${{ github.event.inputs.reason }}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "TRIGGER_REASON=nightly-scheduled" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Comment with results and artifact link
|
||||
id: create_comment
|
||||
uses: KeisukeYamashita/create-comment@v1
|
||||
with:
|
||||
# if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers
|
||||
number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }}
|
||||
unique: false
|
||||
comment: |
|
||||
Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }}
|
||||
Commit: ${{ github.sha }}
|
||||
**Integration Tests Report (Haiku)**
|
||||
Haiku LLM Test Results:
|
||||
${{ env.INTEGRATION_TEST_REPORT_HAIKU }}
|
||||
---
|
||||
**Integration Tests Report (DeepSeek)**
|
||||
DeepSeek LLM Test Results:
|
||||
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
|
||||
---
|
||||
Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
|
||||
39
.github/workflows/lint-fix.yml
vendored
39
.github/workflows/lint-fix.yml
vendored
@@ -5,10 +5,9 @@ on:
|
||||
types: [labeled]
|
||||
|
||||
jobs:
|
||||
# Frontend lint fixes
|
||||
lint-fix-frontend:
|
||||
lint-fix:
|
||||
if: github.event.label.name == 'lint-fix'
|
||||
name: Fix frontend linting issues
|
||||
name: Fix linting issues
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -21,6 +20,7 @@ jobs:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Frontend lint fixes
|
||||
- name: Install Node.js 20
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
@@ -34,36 +34,7 @@ jobs:
|
||||
cd frontend
|
||||
npm run lint:fix
|
||||
|
||||
# Commit and push changes if any
|
||||
- name: Check for changes
|
||||
id: git-check
|
||||
run: |
|
||||
git diff --quiet || echo "changes=true" >> $GITHUB_OUTPUT
|
||||
- name: Commit and push if there are changes
|
||||
if: steps.git-check.outputs.changes == 'true'
|
||||
run: |
|
||||
git config --local user.email "openhands@all-hands.dev"
|
||||
git config --local user.name "OpenHands Bot"
|
||||
git add -A
|
||||
git commit -m "🤖 Auto-fix frontend linting issues"
|
||||
git push
|
||||
|
||||
# Python lint fixes
|
||||
lint-fix-python:
|
||||
if: github.event.label.name == 'lint-fix'
|
||||
name: Fix Python linting issues
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.head_ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Python lint fixes
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
@@ -87,5 +58,5 @@ jobs:
|
||||
git config --local user.email "openhands@all-hands.dev"
|
||||
git config --local user.name "OpenHands Bot"
|
||||
git add -A
|
||||
git commit -m "🤖 Auto-fix Python linting issues"
|
||||
git commit -m "🤖 Auto-fix linting issues"
|
||||
git push
|
||||
|
||||
3
.github/workflows/lint.yml
vendored
3
.github/workflows/lint.yml
vendored
@@ -30,11 +30,10 @@ jobs:
|
||||
run: |
|
||||
cd frontend
|
||||
npm install --frozen-lockfile
|
||||
- name: Lint and TypeScript compilation
|
||||
- name: Lint
|
||||
run: |
|
||||
cd frontend
|
||||
npm run lint
|
||||
npm run make-i18n && tsc
|
||||
|
||||
# Run lint on the python code
|
||||
lint-python:
|
||||
|
||||
6
.github/workflows/openhands-resolver.yml
vendored
6
.github/workflows/openhands-resolver.yml
vendored
@@ -16,11 +16,6 @@ on:
|
||||
type: string
|
||||
default: "main"
|
||||
description: "Target branch to pull and create PR against"
|
||||
base_container_image:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
description: "Custom sandbox env"
|
||||
secrets:
|
||||
LLM_MODEL:
|
||||
required: true
|
||||
@@ -144,7 +139,6 @@ jobs:
|
||||
|
||||
echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
|
||||
echo "SANDBOX_ENV_GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}" >> $GITHUB_ENV
|
||||
echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
|
||||
|
||||
# Set branch variables
|
||||
echo "TARGET_BRANCH=${{ inputs.target_branch }}" >> $GITHUB_ENV
|
||||
|
||||
@@ -21,14 +21,14 @@ There are many ways that you can contribute:
|
||||
|
||||
1. **Download and use** OpenHands, and send [issues](https://github.com/All-Hands-AI/OpenHands/issues) when you encounter something that isn't working or a feature that you'd like to see.
|
||||
2. **Send feedback** after each session by [clicking the thumbs-up thumbs-down buttons](https://docs.all-hands.dev/modules/usage/feedback), so we can see where things are working and failing, and also build an open dataset for training code agents.
|
||||
3. **Improve the Codebase** by sending [PRs](#sending-pull-requests-to-openhands) (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on.
|
||||
3. **Improve the Codebase** by sending PRs (see details below). In particular, we have some [good first issues](https://github.com/All-Hands-AI/OpenHands/labels/good%20first%20issue) that may be ones to start on.
|
||||
|
||||
## What can I build?
|
||||
Here are a few ways you can help improve the codebase.
|
||||
|
||||
#### UI/UX
|
||||
We're always looking to improve the look and feel of the application. If you've got a small fix
|
||||
for something that's bugging you, feel free to open up a PR that changes the [`./frontend`](./frontend) directory.
|
||||
for something that's bugging you, feel free to open up a PR that changes the `./frontend` directory.
|
||||
|
||||
If you're looking to make a bigger change, add a new UI element, or significantly alter the style
|
||||
of the application, please open an issue first, or better, join the #frontend channel in our Slack
|
||||
@@ -46,7 +46,7 @@ We use the [SWE-bench](https://www.swebench.com/) benchmark to test our agent. Y
|
||||
channel in Slack to learn more.
|
||||
|
||||
#### Adding a new agent
|
||||
You may want to experiment with building new types of agents. You can add an agent to [`openhands/agenthub`](./openhands/agenthub)
|
||||
You may want to experiment with building new types of agents. You can add an agent to `openhands/agenthub`
|
||||
to help expand the capabilities of OpenHands.
|
||||
|
||||
#### Adding a new runtime
|
||||
@@ -57,8 +57,8 @@ If you work for a company that provides a cloud-based runtime, you could help us
|
||||
by implementing the [interface specified here](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/base.py).
|
||||
|
||||
#### Testing
|
||||
When you write code, it is also good to write tests. Please navigate to the [`./tests`](./tests) folder to see existing test suites.
|
||||
At the moment, we have two kinds of tests: [`unit`](./tests/unit) and [`integration`](./evaluation/integration_tests). Please refer to the README for each test suite. These tests also run on GitHub's continuous integration to ensure quality of the project.
|
||||
When you write code, it is also good to write tests. Please navigate to the `tests` folder to see existing test suites.
|
||||
At the moment, we have two kinds of tests: `unit` and `integration`. Please refer to the README for each test suite. These tests also run on GitHub's continuous integration to ensure quality of the project.
|
||||
|
||||
## Sending Pull Requests to OpenHands
|
||||
|
||||
@@ -103,7 +103,7 @@ Further, if you see an issue you like, please leave a "thumbs-up" or a comment,
|
||||
|
||||
### Making Pull Requests
|
||||
|
||||
We're generally happy to consider all [PRs](https://github.com/All-Hands-AI/OpenHands/pulls), with the evaluation process varying based on the type of change:
|
||||
We're generally happy to consider all PRs, with the evaluation process varying based on the type of change:
|
||||
|
||||
#### For Small Improvements
|
||||
|
||||
|
||||
2
Makefile
2
Makefile
@@ -133,7 +133,7 @@ install-python-dependencies:
|
||||
export HNSWLIB_NO_NATIVE=1; \
|
||||
poetry run pip install chroma-hnswlib; \
|
||||
fi
|
||||
@poetry install --without llama-index,evaluation
|
||||
@poetry install --without llama-index
|
||||
@if [ -f "/etc/manjaro-release" ]; then \
|
||||
echo "$(BLUE)Detected Manjaro Linux. Installing Playwright dependencies...$(RESET)"; \
|
||||
poetry run pip install playwright; \
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License"></a>
|
||||
<br/>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2tom0er4l-JeNUGHt_AxpEfIBstbLPiw"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community"></a>
|
||||
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits"></a>
|
||||
<br/>
|
||||
@@ -82,7 +82,7 @@ troubleshooting resources, and advanced configuration options.
|
||||
OpenHands is a community-driven project, and we welcome contributions from everyone. We do most of our communication
|
||||
through Slack, so this is the best place to start, but we also are happy to have you contact us on Discord or Github:
|
||||
|
||||
- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg) - Here we talk about research, architecture, and future development.
|
||||
- [Join our Slack workspace](https://join.slack.com/t/openhands-ai/shared_invite/zt-2tom0er4l-JeNUGHt_AxpEfIBstbLPiw) - Here we talk about research, architecture, and future development.
|
||||
- [Join our Discord server](https://discord.gg/ESHStjSjD4) - This is a community-run server for general discussion, questions, and feedback.
|
||||
- [Read or post Github Issues](https://github.com/All-Hands-AI/OpenHands/issues) - Check out the issues we're working on, or add your own ideas.
|
||||
|
||||
|
||||
@@ -5,24 +5,24 @@ FROM ubuntu:22.04 AS dind
|
||||
|
||||
# https://docs.docker.com/engine/install/ubuntu/
|
||||
RUN apt-get update && apt-get install -y \
|
||||
ca-certificates \
|
||||
curl \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
|
||||
&& chmod a+r /etc/apt/keyrings/docker.asc \
|
||||
&& echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
ca-certificates \
|
||||
curl \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc \
|
||||
&& chmod a+r /etc/apt/keyrings/docker.asc \
|
||||
&& echo \
|
||||
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||||
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
docker-ce \
|
||||
docker-ce-cli \
|
||||
containerd.io \
|
||||
docker-buildx-plugin \
|
||||
docker-compose-plugin \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
docker-ce \
|
||||
docker-ce-cli \
|
||||
containerd.io \
|
||||
docker-buildx-plugin \
|
||||
docker-compose-plugin \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
|
||||
###
|
||||
FROM dind AS openhands
|
||||
@@ -31,25 +31,25 @@ ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
#
|
||||
RUN apt-get update && apt-get install -y \
|
||||
bash \
|
||||
bash \
|
||||
build-essential \
|
||||
curl \
|
||||
git \
|
||||
git-lfs \
|
||||
software-properties-common \
|
||||
make \
|
||||
git \
|
||||
git-lfs \
|
||||
software-properties-common \
|
||||
make \
|
||||
netcat \
|
||||
sudo \
|
||||
wget \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
wget \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
|
||||
# https://github.com/cli/cli/blob/trunk/docs/install_linux.md
|
||||
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
||||
&& apt-get update && apt-get -y install \
|
||||
&& chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
|
||||
&& apt-get update && apt-get -y install \
|
||||
gh \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
@@ -92,17 +92,17 @@ EOF
|
||||
FROM openhands AS dev
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
dnsutils \
|
||||
file \
|
||||
iproute2 \
|
||||
jq \
|
||||
lsof \
|
||||
ripgrep \
|
||||
silversearcher-ag \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
dnsutils \
|
||||
file \
|
||||
iproute2 \
|
||||
jq \
|
||||
lsof \
|
||||
ripgrep \
|
||||
silversearcher-ag \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean \
|
||||
&& apt-get autoremove -y
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ Pour plus de détails, veuillez consulter [ce document](https://github.com/All-H
|
||||
|
||||
Nous avons à la fois un espace de travail Slack pour la collaboration sur la construction d'OpenHands et un serveur Discord pour discuter de tout ce qui est lié, par exemple, à ce projet, LLM, agent, etc.
|
||||
|
||||
- [Espace de travail Slack](https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg)
|
||||
- [Espace de travail Slack](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA)
|
||||
- [Serveur Discord](https://discord.gg/ESHStjSjD4)
|
||||
|
||||
Si vous souhaitez contribuer, n'hésitez pas à rejoindre notre communauté. Simplifions ensemble l'ingénierie logicielle !
|
||||
|
||||
@@ -27,7 +27,7 @@ OpenHands 是一个社区驱动的项目,我们欢迎每个人的贡献。无
|
||||
|
||||
我们有 Slack 工作区用于协作构建 OpenHands,也有 Discord 服务器用于讨论任何相关的内容,例如此项目、大语言模型、代理等。
|
||||
|
||||
- [Slack 工作区](https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg)
|
||||
- [Slack 工作区](https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA)
|
||||
- [Discord 服务器](https://discord.gg/ESHStjSjD4)
|
||||
|
||||
如果你想做出贡献,欢迎加入我们的社区。让我们一起简化软件工程!
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
---
|
||||
id: configuration-options
|
||||
title: Configuration Options
|
||||
sidebar_label: Configuration Options
|
||||
---
|
||||
|
||||
# Configuration Options
|
||||
|
||||
This guide details all configuration options available for OpenHands, helping you customize its behavior and integrate it with other services.
|
||||
|
||||
:::note
|
||||
If you are running in [GUI Mode](https://docs.all-hands.dev/modules/usage/how-to/gui-mode), the settings available in the Settings UI will always
|
||||
take precedence.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
# Table of Contents
|
||||
@@ -47,10 +48,10 @@ take precedence.
|
||||
- [Confirmation Mode](#confirmation-mode)
|
||||
- [Security Analyzer](#security-analyzer)
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Core Configuration
|
||||
|
||||
The core configuration options are defined in the `[core]` section of the `config.toml` file.
|
||||
|
||||
**API Keys**
|
||||
@@ -177,11 +178,8 @@ The core configuration options are defined in the `[core]` section of the `confi
|
||||
- Description: JWT secret for authentication. Please set it to your own value.
|
||||
|
||||
## LLM Configuration
|
||||
|
||||
The LLM (Large Language Model) configuration options are defined in the `[llm]` section of the `config.toml` file.
|
||||
|
||||
To use these with the docker command, pass in `-e LLM_<option>`. Example: `-e LLM_NUM_RETRIES`.
|
||||
|
||||
**AWS Credentials**
|
||||
- `aws_access_key_id`
|
||||
- Type: `str`
|
||||
@@ -326,7 +324,6 @@ To use these with the docker command, pass in `-e LLM_<option>`. Example: `-e LL
|
||||
- Description: If model is vision capable, this option allows to disable image processing (useful for cost reduction)
|
||||
|
||||
## Agent Configuration
|
||||
|
||||
The agent configuration options are defined in the `[agent]` and `[agent.<agent_name>]` sections of the `config.toml` file.
|
||||
|
||||
**Microagent Configuration**
|
||||
@@ -385,11 +382,8 @@ The agent configuration options are defined in the `[agent]` and `[agent.<agent_
|
||||
- Description: A list of microagents to disable
|
||||
|
||||
## Sandbox Configuration
|
||||
|
||||
The sandbox configuration options are defined in the `[sandbox]` section of the `config.toml` file.
|
||||
|
||||
To use these with the docker command, pass in `-e SANDBOX_<option>`. Example: `-e SANDBOX_TIMEOUT`.
|
||||
|
||||
**Execution**
|
||||
- `timeout`
|
||||
- Type: `int`
|
||||
@@ -442,11 +436,8 @@ To use these with the docker command, pass in `-e SANDBOX_<option>`. Example: `-
|
||||
- Description: BrowserGym environment to use for evaluation
|
||||
|
||||
## Security Configuration
|
||||
|
||||
The security configuration options are defined in the `[security]` section of the `config.toml` file.
|
||||
|
||||
To use these with the docker command, pass in `-e SECURITY_<option>`. Example: `-e SECURITY_CONFIRMATION_MODE`.
|
||||
|
||||
**Confirmation Mode**
|
||||
- `confirmation_mode`
|
||||
- Type: `bool`
|
||||
@@ -459,6 +450,7 @@ To use these with the docker command, pass in `-e SECURITY_<option>`. Example: `
|
||||
- Default: `""`
|
||||
- Description: The security analyzer to use
|
||||
|
||||
|
||||
---
|
||||
|
||||
> **Note**: Adjust configurations carefully, especially for memory, security, and network-related settings to ensure optimal performance and security.
|
||||
|
||||
@@ -37,15 +37,12 @@ the [README for the OpenHands Resolver](https://github.com/All-Hands-AI/OpenHand
|
||||
|
||||
You can provide custom directions for OpenHands by following the [README for the resolver](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/resolver/README.md#providing-custom-instructions).
|
||||
|
||||
### Custom configurations
|
||||
### Configure custom macro
|
||||
|
||||
Github resolver will automatically check for valid [repository secrets](https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions?tool=webui#creating-secrets-for-a-repository) or [repository variables](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) to customize its behavior. The customization options you can set are:
|
||||
To customize the default macro (`@openhands-agent`):
|
||||
|
||||
| **Attribute name** | **Type** | **Purpose** | **Example** |
|
||||
| -------------------------------- | -------- | --------------------------------------------------------------------------------------------------- | ----------------------------------------------- |
|
||||
| `OPENHANDS_MAX_ITER` | Variable | Set max limit for agent iterations | `OPENHANDS_MAX_ITER=10` |
|
||||
| `OPENHANDS_MACRO` | Variable | Customize default macro for invoking the resolver | `OPENHANDS_MACRO=@resolveit` |
|
||||
| `OPENHANDS_BASE_CONTAINER_IMAGE` | Variable | Custom Sandbox ([learn more](https://docs.all-hands.dev/modules/usage/how-to/custom-sandbox-guide)) | `OPENHANDS_BASE_CONTAINER_IMAGE="custom_image"` |
|
||||
1. [Create a repository variable](https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#creating-configuration-variables-for-a-repository) named `OPENHANDS_MACRO`
|
||||
2. Assign the variable a custom value
|
||||
|
||||
## Writing Effective .openhands_instructions Files
|
||||
|
||||
@@ -58,7 +55,6 @@ The `.openhands_instructions` file is a file that you can put in the root direct
|
||||
2. **Repository Structure**: Explain the key directories and their purposes, especially highlighting where different types of code (e.g., frontend, backend) are located.
|
||||
|
||||
3. **Development Workflows**: Document the essential commands for:
|
||||
|
||||
- Building and setting up the project
|
||||
- Running tests
|
||||
- Linting and code quality checks
|
||||
@@ -73,29 +69,24 @@ The `.openhands_instructions` file is a file that you can put in the root direct
|
||||
|
||||
```markdown
|
||||
# Repository Overview
|
||||
|
||||
[Brief description of the project]
|
||||
|
||||
## General Setup
|
||||
|
||||
- Main build command
|
||||
- Development environment setup
|
||||
- Pre-commit checks
|
||||
|
||||
## Backend
|
||||
|
||||
- Location and structure
|
||||
- Testing instructions
|
||||
- Environment requirements
|
||||
|
||||
## Frontend
|
||||
|
||||
- Setup prerequisites
|
||||
- Build and test commands
|
||||
- Environment variables
|
||||
|
||||
## Additional Guidelines
|
||||
|
||||
- Code style requirements
|
||||
- Special considerations
|
||||
- Common workflows
|
||||
|
||||
9
docs/package-lock.json
generated
9
docs/package-lock.json
generated
@@ -24,7 +24,7 @@
|
||||
"@docusaurus/module-type-aliases": "^3.5.1",
|
||||
"@docusaurus/tsconfig": "^3.6.3",
|
||||
"@docusaurus/types": "^3.5.1",
|
||||
"typescript": "~5.7.2"
|
||||
"typescript": "~5.6.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0"
|
||||
@@ -16985,10 +16985,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.7.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
|
||||
"integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
|
||||
"license": "Apache-2.0",
|
||||
"version": "5.6.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz",
|
||||
"integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==",
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"@docusaurus/module-type-aliases": "^3.5.1",
|
||||
"@docusaurus/tsconfig": "^3.6.3",
|
||||
"@docusaurus/types": "^3.5.1",
|
||||
"typescript": "~5.7.2"
|
||||
"typescript": "~5.6.3"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
@@ -44,6 +44,17 @@ const sidebars: SidebarsConfig = {
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Configuration Options',
|
||||
items: [
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Overview',
|
||||
id: 'usage/configuration-options',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Advanced Configuration',
|
||||
@@ -100,11 +111,6 @@ const sidebars: SidebarsConfig = {
|
||||
label: 'Runtime Configuration',
|
||||
id: 'usage/runtimes',
|
||||
},
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Configuration Options',
|
||||
id: 'usage/configuration-options',
|
||||
},
|
||||
{
|
||||
type: 'doc',
|
||||
label: 'Custom Sandbox',
|
||||
|
||||
@@ -8,7 +8,7 @@ function CustomFooter() {
|
||||
<footer className="custom-footer">
|
||||
<div className="footer-content">
|
||||
<div className="footer-icons">
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg" target="_blank" rel="noopener noreferrer">
|
||||
<a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA" target="_blank" rel="noopener noreferrer">
|
||||
<FaSlack />
|
||||
</a>
|
||||
<a href="https://discord.gg/ESHStjSjD4" target="_blank" rel="noopener noreferrer">
|
||||
|
||||
@@ -23,7 +23,7 @@ export function HomepageHeader() {
|
||||
<a href="https://codecov.io/github/All-Hands-AI/OpenHands?branch=main"><img alt="CodeCov" src="https://img.shields.io/codecov/c/github/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/LICENSE"><img src="https://img.shields.io/github/license/All-Hands-AI/OpenHands?style=for-the-badge&color=blue" alt="MIT License" /></a>
|
||||
<br/>
|
||||
<a href="https://join.slack.com/t/openhands-ai/shared_invite/zt-2vbfigwev-G03twSpXaErwzYVD4CFiBg"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
|
||||
<a href="https://join.slack.com/t/opendevin/shared_invite/zt-2oikve2hu-UDxHeo8nsE69y6T7yFX_BA"><img src="https://img.shields.io/badge/Slack-Join%20Us-red?logo=slack&logoColor=white&style=for-the-badge" alt="Join our Slack community" /></a>
|
||||
<a href="https://discord.gg/ESHStjSjD4"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community" /></a>
|
||||
<a href="https://github.com/All-Hands-AI/OpenHands/blob/main/CREDITS.md"><img src="https://img.shields.io/badge/Project-Credits-blue?style=for-the-badge&color=FFE165&logo=github&logoColor=white" alt="Credits" /></a>
|
||||
<br/>
|
||||
|
||||
10172
docs/yarn.lock
Normal file
10172
docs/yarn.lock
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,9 +6,9 @@ This folder contains code and resources to run experiments and evaluations.
|
||||
|
||||
### Setup
|
||||
|
||||
Before starting evaluation, follow the instructions [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
|
||||
Before starting evaluation, follow the instructions here [here](https://github.com/All-Hands-AI/OpenHands/blob/main/Development.md) to setup your local development environment and LLM.
|
||||
|
||||
Once you are done with setup, you can follow the benchmark-specific instructions in each subdirectory of the [evaluation directory](#supported-benchmarks).
|
||||
Once you are done with setup, you can follow the benchmark-specific instructions in each subdirectory of the evaluation directory.
|
||||
Generally these will involve running `run_infer.py` to perform inference with the agents.
|
||||
|
||||
### Implementing and Evaluating an Agent
|
||||
@@ -42,7 +42,7 @@ temperature = 0.0
|
||||
|
||||
## Supported Benchmarks
|
||||
|
||||
The OpenHands evaluation harness supports a wide variety of benchmarks across [software engineering](#software-engineering), [web browsing](#web-browsing), and [miscellaneous assistance](#misc-assistance) tasks.
|
||||
The OpenHands evaluation harness supports a wide variety of benchmarks across software engineering, web browsing, and miscellaneous assistance tasks.
|
||||
|
||||
### Software Engineering
|
||||
|
||||
@@ -83,7 +83,7 @@ You can start your own fork of [our huggingface evaluation outputs](https://hugg
|
||||
|
||||
To learn more about how to integrate your benchmark into OpenHands, check out [tutorial here](https://docs.all-hands.dev/modules/usage/how-to/evaluation-harness). Briefly,
|
||||
|
||||
- Each subfolder contains a specific benchmark or experiment. For example, [`evaluation/benchmarks/swe_bench`](./benchmarks/swe_bench) should contain
|
||||
- Each subfolder contains a specific benchmark or experiment. For example, `evaluation/benchmarks/swe_bench` should contain
|
||||
all the preprocessing/evaluation/analysis scripts.
|
||||
- Raw data and experimental records should not be stored within this repo.
|
||||
- For model outputs, they should be stored at [this huggingface space](https://huggingface.co/spaces/OpenHands/evaluation) for visualization.
|
||||
|
||||
@@ -21,7 +21,7 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
if [ -z "$DATASET" ]; then
|
||||
echo "Dataset not specified, use default 'things'"
|
||||
@@ -34,9 +34,12 @@ if [ -z "$OPENAI_API_KEY" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenHands
|
||||
# We need to track the version of Agent in the evaluation to make sure results are comparable
|
||||
AGENT_VERSION=v$(poetry run python -c "import openhands.agenthub; from openhands.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
|
||||
@@ -48,7 +51,7 @@ COMMAND="poetry run python evaluation/benchmarks/EDA/run_infer.py \
|
||||
--max-iterations 20 \
|
||||
--OPENAI_API_KEY $OPENAI_API_KEY \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${OPENHANDS_VERSION}_${DATASET}"
|
||||
--eval-note ${AGENT_VERSION}_${DATASET}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -20,10 +20,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && poetry run python evaluation/benchmarks/agent_bench/run_infer.py \
|
||||
@@ -31,7 +31,7 @@ COMMAND="export PYTHONPATH=evaluation/benchmarks/agent_bench:\$PYTHONPATH && poe
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 30 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -21,13 +21,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE=$OPENHANDS_VERSION
|
||||
EVAL_NOTE=$AGENT_VERSION
|
||||
|
||||
# Default to NOT use unit tests.
|
||||
if [ -z "$USE_UNIT_TESTS" ]; then
|
||||
|
||||
@@ -21,10 +21,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
|
||||
@@ -33,7 +33,7 @@ COMMAND="poetry run python evaluation/benchmarks/biocoder/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${OPENHANDS_VERSION}_${DATASET}"
|
||||
--eval-note ${AGENT_VERSION}_${DATASET}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -20,10 +20,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/bird/run_infer.py \
|
||||
@@ -31,7 +31,7 @@ COMMAND="poetry run python evaluation/benchmarks/bird/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 5 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION" \
|
||||
--eval-note $AGENT_VERSION" \
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -20,13 +20,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/browsing_delegation/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
|
||||
@@ -61,10 +61,10 @@ echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "HF SPLIT: $SPLIT"
|
||||
@@ -75,7 +75,7 @@ if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
|
||||
@@ -23,10 +23,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/discoverybench/run_infer.py \
|
||||
@@ -35,7 +35,7 @@ COMMAND="poetry run python evaluation/benchmarks/discoverybench/run_infer.py \
|
||||
--max-iterations 10 \
|
||||
--max-chars 10000000 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -21,17 +21,17 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
if [ -z "$LEVELS" ]; then
|
||||
LEVELS="2023_level1"
|
||||
echo "Levels not specified, use default $LEVELS"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "LEVELS: $LEVELS"
|
||||
|
||||
@@ -42,7 +42,7 @@ COMMAND="poetry run python ./evaluation/benchmarks/gaia/run_infer.py \
|
||||
--level $LEVELS \
|
||||
--data-split validation \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${OPENHANDS_VERSION}_${LEVELS}"
|
||||
--eval-note ${AGENT_VERSION}_${LEVELS}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -21,7 +21,7 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
if [ -z "$HUBS" ]; then
|
||||
HUBS="hf,torch,tf"
|
||||
@@ -29,7 +29,7 @@ if [ -z "$HUBS" ]; then
|
||||
fi
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "HUBS: $HUBS"
|
||||
|
||||
@@ -40,7 +40,7 @@ COMMAND="poetry run python evaluation/benchmarks/gorilla/run_infer.py \
|
||||
--hubs $HUBS \
|
||||
--data-split validation \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${OPENHANDS_VERSION}_${LEVELS}"
|
||||
--eval-note ${AGENT_VERSION}_${LEVELS}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -27,10 +27,10 @@ if [ -z "$DATA_SPLIT" ]; then
|
||||
DATA_SPLIT="gpqa_diamond"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/gpqa/run_infer.py \
|
||||
@@ -39,7 +39,7 @@ COMMAND="poetry run python evaluation/benchmarks/gpqa/run_infer.py \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--data-split $DATA_SPLIT \
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -58,10 +58,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/humanevalfix/run_infer.py \
|
||||
@@ -69,7 +69,7 @@ COMMAND="poetry run python evaluation/benchmarks/humanevalfix/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -28,10 +28,10 @@ if [ -z "$DATASET" ]; then
|
||||
DATASET="ProofWriter"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/logic_reasoning/run_infer.py \
|
||||
@@ -40,7 +40,7 @@ COMMAND="poetry run python evaluation/benchmarks/logic_reasoning/run_infer.py \
|
||||
--dataset $DATASET \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -25,13 +25,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="BrowsingAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="${OPENHANDS_VERSION}_${NOTE}"
|
||||
EVAL_NOTE="${AGENT_VERSION}_${NOTE}"
|
||||
|
||||
COMMAND="export PYTHONPATH=evaluation/benchmarks/miniwob:\$PYTHONPATH && poetry run python evaluation/benchmarks/miniwob/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
|
||||
@@ -18,10 +18,10 @@ checkout_eval_branch
|
||||
# Only 'CodeActAgent' is supported for MINT now
|
||||
AGENT="CodeActAgent"
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
|
||||
export PYTHONPATH=$(pwd)
|
||||
|
||||
|
||||
@@ -26,10 +26,10 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_infer.py \
|
||||
@@ -37,7 +37,7 @@ COMMAND="poetry run python evaluation/benchmarks/ml_bench/run_infer.py \
|
||||
--llm-config $MODEL_CONFIG \
|
||||
--max-iterations 10 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION"
|
||||
--eval-note $AGENT_VERSION"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -26,10 +26,10 @@ if [ -z "$USE_KNOWLEDGE" ]; then
|
||||
USE_KNOWLEDGE=false
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py \
|
||||
@@ -38,7 +38,7 @@ COMMAND="poetry run python evaluation/benchmarks/scienceagentbench/run_infer.py
|
||||
--use_knowledge $USE_KNOWLEDGE \
|
||||
--max-iterations 30 \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note $OPENHANDS_VERSION" \
|
||||
--eval-note $AGENT_VERSION" \
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
from collections import Counter
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from openhands.events.serialization import event_from_dict
|
||||
from openhands.events.utils import get_pairs_from_events
|
||||
|
||||
@@ -14,21 +10,25 @@ ERROR_KEYWORDS = [
|
||||
'Agent encountered an error while processing the last action',
|
||||
'APIError',
|
||||
'Action execution failed',
|
||||
'litellm.Timeout: APITimeoutError',
|
||||
]
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('output_file', type=str, help='The file to summarize')
|
||||
args = parser.parse_args()
|
||||
|
||||
def process_file(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
with open(args.output_file, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
num_lines = len(lines)
|
||||
num_error_lines = 0
|
||||
num_agent_stuck_in_loop = 0
|
||||
|
||||
num_resolved = 0
|
||||
num_empty_patch = 0
|
||||
num_unfinished_runs = 0
|
||||
|
||||
error_counter = Counter()
|
||||
|
||||
main_agent_cost = []
|
||||
editor_cost = []
|
||||
num_turns = []
|
||||
@@ -36,11 +36,6 @@ def process_file(file_path):
|
||||
for line in lines:
|
||||
_d = json.loads(line)
|
||||
|
||||
if 'metrics' not in _d or _d['metrics'] is None:
|
||||
# this is a failed run
|
||||
num_unfinished_runs += 1
|
||||
continue
|
||||
|
||||
# Cost
|
||||
costs = _d['metrics'].get('costs', [])
|
||||
_cur_main_agent_cost = 0
|
||||
@@ -94,180 +89,30 @@ def process_file(file_path):
|
||||
num_error_lines += 1
|
||||
break
|
||||
|
||||
return {
|
||||
'file_path': file_path,
|
||||
'total_instances': num_lines,
|
||||
'resolved': {
|
||||
'count': num_resolved,
|
||||
'percentage': (num_resolved / num_lines * 100) if num_lines > 0 else 0,
|
||||
},
|
||||
'empty_patches': {
|
||||
'count': num_empty_patch,
|
||||
'percentage': (num_empty_patch / num_lines * 100) if num_lines > 0 else 0,
|
||||
},
|
||||
'unfinished_runs': {
|
||||
'count': num_unfinished_runs,
|
||||
'percentage': (num_unfinished_runs / num_lines * 100)
|
||||
if num_lines > 0
|
||||
else 0,
|
||||
},
|
||||
'errors': {
|
||||
'total': num_error_lines,
|
||||
'percentage': (num_error_lines / num_lines * 100) if num_lines > 0 else 0,
|
||||
'stuck_in_loop': {
|
||||
'count': num_agent_stuck_in_loop,
|
||||
'percentage': (num_agent_stuck_in_loop / num_lines * 100)
|
||||
if num_lines > 0
|
||||
else 0,
|
||||
},
|
||||
'breakdown': {
|
||||
str(error): {
|
||||
'count': count,
|
||||
'percentage': (count / num_lines * 100) if num_lines > 0 else 0,
|
||||
}
|
||||
for error, count in error_counter.items()
|
||||
},
|
||||
},
|
||||
'statistics': {
|
||||
'avg_turns': sum(num_turns) / num_lines if num_lines > 0 else 0,
|
||||
'costs': {
|
||||
'main_agent': sum(main_agent_cost) / num_lines if num_lines > 0 else 0,
|
||||
'editor': sum(editor_cost) / num_lines if num_lines > 0 else 0,
|
||||
'total': (sum(main_agent_cost) + sum(editor_cost)) / num_lines
|
||||
if num_lines > 0
|
||||
else 0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def aggregate_directory(input_path) -> pd.DataFrame:
|
||||
# Process all output.jsonl files in subdirectories
|
||||
pattern = os.path.join(input_path, '**/output.jsonl')
|
||||
files = glob.glob(pattern, recursive=True)
|
||||
print(f'Processing {len(files)} files from directory {input_path}')
|
||||
|
||||
# Process each file silently and collect results
|
||||
results = []
|
||||
for file_path in files:
|
||||
try:
|
||||
result = process_file(file_path)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
print(f'Error processing {file_path}: {str(e)}')
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
continue
|
||||
|
||||
# Convert results to pandas DataFrame and sort by resolve rate
|
||||
df = pd.DataFrame(results)
|
||||
|
||||
# Extract directory name from file path
|
||||
df['directory'] = df['file_path'].apply(
|
||||
lambda x: os.path.basename(os.path.dirname(x))
|
||||
# print the error counter (with percentage)
|
||||
print(
|
||||
f'Number of resolved: {num_resolved} / {num_lines} ({num_resolved / num_lines * 100:.2f}%)'
|
||||
)
|
||||
print(
|
||||
f'Number of empty patch: {num_empty_patch} / {num_lines} ({num_empty_patch / num_lines * 100:.2f}%)'
|
||||
)
|
||||
print(
|
||||
f'Number of error lines: {num_error_lines} / {num_lines} ({num_error_lines / num_lines * 100:.2f}%)'
|
||||
)
|
||||
print(
|
||||
f'Number of agent stuck in loop: {num_agent_stuck_in_loop} / {num_lines} ({num_agent_stuck_in_loop / num_lines * 100:.2f}%)'
|
||||
)
|
||||
assert len(num_turns) == num_lines
|
||||
assert len(main_agent_cost) == num_lines
|
||||
assert len(editor_cost) == num_lines
|
||||
print('## Statistics')
|
||||
print(f'Avg. num of turns per instance: {sum(num_turns) / num_lines:.2f}')
|
||||
print(f'Avg. agent cost per instance: {sum(main_agent_cost) / num_lines:.2f} USD')
|
||||
print(f'Avg. editor cost per instance: {sum(editor_cost) / num_lines:.2f} USD')
|
||||
print(
|
||||
f'Avg. total cost per instance: {(sum(main_agent_cost) + sum(editor_cost)) / num_lines:.2f} USD'
|
||||
)
|
||||
|
||||
df['resolve_rate'] = df['resolved'].apply(lambda x: x['percentage'])
|
||||
df['empty_patch_rate'] = df['empty_patches'].apply(lambda x: x['percentage'])
|
||||
df['unfinished_rate'] = df['unfinished_runs'].apply(lambda x: x['percentage'])
|
||||
df['avg_turns'] = df['statistics'].apply(lambda x: x['avg_turns'])
|
||||
df['error_rate'] = df['errors'].apply(lambda x: x['percentage'])
|
||||
df['avg_cost'] = df['statistics'].apply(lambda x: x['costs']['total'])
|
||||
|
||||
df = df.sort_values('resolve_rate', ascending=False)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'input_path', type=str, help='The file or directory to summarize'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
type=str,
|
||||
help='Output JSONL file for results',
|
||||
default='summary_results.jsonl',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if os.path.isdir(args.input_path):
|
||||
df = aggregate_directory(args.input_path)
|
||||
# Create the summary string
|
||||
columns = [
|
||||
'directory',
|
||||
'resolve_rate',
|
||||
'empty_patch_rate',
|
||||
'unfinished_rate',
|
||||
'error_rate',
|
||||
'avg_turns',
|
||||
'avg_cost',
|
||||
'total_instances',
|
||||
]
|
||||
summary_str = df[columns].to_string(
|
||||
float_format=lambda x: '{:.2f}'.format(x),
|
||||
formatters={
|
||||
'directory': lambda x: x[:90]
|
||||
}, # Truncate directory names to 20 chars
|
||||
index=False,
|
||||
)
|
||||
|
||||
# Print to console
|
||||
print('\nResults summary (sorted by resolve rate):')
|
||||
print(summary_str)
|
||||
|
||||
# Save to text file
|
||||
txt_output = args.output.rsplit('.', 1)[0] + '.txt'
|
||||
with open(txt_output, 'w') as f:
|
||||
f.write('Results summary (sorted by resolve rate):\n')
|
||||
f.write(summary_str)
|
||||
|
||||
# Save
|
||||
df.to_json(args.output, lines=True, orient='records')
|
||||
df[columns].to_csv(args.output.rsplit('.', 1)[0] + '.csv', index=False)
|
||||
else:
|
||||
# Process single file with detailed output
|
||||
results = []
|
||||
try:
|
||||
result = process_file(args.input_path)
|
||||
results.append(result)
|
||||
|
||||
# Print detailed results for single file
|
||||
print(f'\nResults for {args.input_path}:')
|
||||
print(
|
||||
f"Number of resolved: {result['resolved']['count']} / {result['total_instances']} ({result['resolved']['percentage']:.2f}%)"
|
||||
)
|
||||
print(
|
||||
f"Number of empty patch: {result['empty_patches']['count']} / {result['total_instances']} ({result['empty_patches']['percentage']:.2f}%)"
|
||||
)
|
||||
print(
|
||||
f"Number of error lines: {result['errors']['total']} / {result['total_instances']} ({result['errors']['percentage']:.2f}%)"
|
||||
)
|
||||
print(
|
||||
f"Number of agent stuck in loop: {result['errors']['stuck_in_loop']['count']} / {result['total_instances']} ({result['errors']['stuck_in_loop']['percentage']:.2f}%)"
|
||||
)
|
||||
print(
|
||||
f"Number of unfinished runs: {result['unfinished_runs']['count']} / {result['total_instances']} ({result['unfinished_runs']['percentage']:.2f}%)"
|
||||
)
|
||||
print('## Statistics')
|
||||
print(
|
||||
f"Avg. num of turns per instance: {result['statistics']['avg_turns']:.2f}"
|
||||
)
|
||||
print(
|
||||
f"Avg. agent cost per instance: {result['statistics']['costs']['main_agent']:.2f} USD"
|
||||
)
|
||||
print(
|
||||
f"Avg. editor cost per instance: {result['statistics']['costs']['editor']:.2f} USD"
|
||||
)
|
||||
print(
|
||||
f"Avg. total cost per instance: {result['statistics']['costs']['total']:.2f} USD"
|
||||
)
|
||||
|
||||
print('## Detailed error breakdown:')
|
||||
for error, data in result['errors']['breakdown'].items():
|
||||
print(f"{error}: {data['count']} ({data['percentage']:.2f}%)")
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error processing {args.input_path}: {str(e)}')
|
||||
print('## Detailed error breakdown:')
|
||||
for error, count in error_counter.items():
|
||||
print(f'{error}: {count} ({count / num_lines * 100:.2f}%)')
|
||||
|
||||
@@ -55,10 +55,10 @@ echo "USE_INSTANCE_IMAGE: $USE_INSTANCE_IMAGE"
|
||||
export RUN_WITH_BROWSING=$RUN_WITH_BROWSING
|
||||
echo "RUN_WITH_BROWSING: $RUN_WITH_BROWSING"
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "SPLIT: $SPLIT"
|
||||
@@ -68,7 +68,7 @@ if [ -z "$USE_HINT_TEXT" ]; then
|
||||
export USE_HINT_TEXT=false
|
||||
fi
|
||||
echo "USE_HINT_TEXT: $USE_HINT_TEXT"
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
# if not using Hint, add -no-hint to the eval note
|
||||
if [ "$USE_HINT_TEXT" = false ]; then
|
||||
EVAL_NOTE="$EVAL_NOTE-no-hint"
|
||||
|
||||
@@ -38,10 +38,10 @@ if [ -z "$WOLFRAM_APPID" ]; then
|
||||
echo "WOLFRAM_APPID not specified"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
echo "DATASET: $DATASET"
|
||||
echo "HARDNESS: $HARDNESS"
|
||||
@@ -56,7 +56,7 @@ COMMAND="poetry run python evaluation/benchmarks/toolqa/run_infer.py \
|
||||
--wolfram_alpha_appid $WOLFRAM_APPID\
|
||||
--data-split validation \
|
||||
--eval-num-workers $NUM_WORKERS \
|
||||
--eval-note ${OPENHANDS_VERSION}_${LEVELS}"
|
||||
--eval-note ${AGENT_VERSION}_${LEVELS}"
|
||||
|
||||
if [ -n "$EVAL_LIMIT" ]; then
|
||||
echo "EVAL_LIMIT: $EVAL_LIMIT"
|
||||
|
||||
@@ -27,13 +27,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="BrowsingAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE="$OPENHANDS_VERSION"
|
||||
EVAL_NOTE="$AGENT_VERSION"
|
||||
|
||||
COMMAND="poetry run python evaluation/benchmarks/webarena/run_infer.py \
|
||||
--agent-cls $AGENT \
|
||||
|
||||
@@ -48,19 +48,13 @@ def get_config(
|
||||
# use default base_container_image
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
timeout=100,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
# debug
|
||||
debug=True,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -113,37 +107,31 @@ def process_instance(
|
||||
# =============================================
|
||||
# create sandbox and run the agent
|
||||
# =============================================
|
||||
|
||||
runtime: Runtime = create_runtime(config)
|
||||
call_async_from_sync(runtime.connect)
|
||||
try:
|
||||
test_class.initialize_runtime(runtime)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=MessageAction(content=instruction),
|
||||
runtime=runtime,
|
||||
fake_user_response_fn=FAKE_RESPONSES[metadata.agent_class],
|
||||
)
|
||||
test_class.initialize_runtime(runtime)
|
||||
|
||||
# Here's how you can run the agent (similar to the `main` function) and get the final task state
|
||||
state: State | None = asyncio.run(
|
||||
run_controller(
|
||||
config=config,
|
||||
initial_user_action=MessageAction(content=instruction),
|
||||
runtime=runtime,
|
||||
fake_user_response_fn=FAKE_RESPONSES[metadata.agent_class],
|
||||
)
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
)
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
# # =============================================
|
||||
# # result evaluation
|
||||
# # =============================================
|
||||
# # =============================================
|
||||
# # result evaluation
|
||||
# # =============================================
|
||||
|
||||
histories = state.history
|
||||
|
||||
# some basic check
|
||||
logger.info(f'Total events in history: {len(histories)}')
|
||||
assert len(histories) > 0, 'History should not be empty'
|
||||
|
||||
test_result: TestResult = test_class.verify_result(runtime, histories)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
finally:
|
||||
runtime.close()
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
test_result: TestResult = test_class.verify_result(runtime, histories)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
@@ -151,7 +139,7 @@ def process_instance(
|
||||
instance=instance.to_dict(),
|
||||
instruction=instruction,
|
||||
metadata=metadata,
|
||||
history=[event_to_dict(event) for event in histories],
|
||||
history=histories,
|
||||
metrics=metrics,
|
||||
error=state.last_error if state and state.last_error else None,
|
||||
test_result=test_result.model_dump(),
|
||||
@@ -218,8 +206,6 @@ if __name__ == '__main__':
|
||||
)
|
||||
|
||||
df = pd.read_json(output_file, lines=True, orient='records')
|
||||
|
||||
# record success and reason for failure for the final report
|
||||
df['success'] = df['test_result'].apply(lambda x: x['success'])
|
||||
df['reason'] = df['test_result'].apply(lambda x: x['reason'])
|
||||
logger.info('-' * 100)
|
||||
@@ -233,16 +219,9 @@ if __name__ == '__main__':
|
||||
)
|
||||
logger.info('-' * 100)
|
||||
|
||||
# record cost for each instance, with 3 decimal places
|
||||
df['cost'] = df['metrics'].apply(lambda x: round(x['accumulated_cost'], 3))
|
||||
logger.info(f'Total cost: USD {df["cost"].sum():.2f}')
|
||||
|
||||
report_file = os.path.join(metadata.eval_output_dir, 'report.md')
|
||||
with open(report_file, 'w') as f:
|
||||
f.write(
|
||||
f'Success rate: {df["success"].mean():.2%} ({df["success"].sum()}/{len(df)})\n'
|
||||
)
|
||||
f.write(f'\nTotal cost: USD {df["cost"].sum():.2f}\n')
|
||||
f.write(
|
||||
df[['instance_id', 'success', 'reason', 'cost']].to_markdown(index=False)
|
||||
)
|
||||
f.write(df[['instance_id', 'success', 'reason']].to_markdown(index=False))
|
||||
|
||||
@@ -21,13 +21,13 @@ if [ -z "$AGENT" ]; then
|
||||
AGENT="CodeActAgent"
|
||||
fi
|
||||
|
||||
get_openhands_version
|
||||
get_agent_version
|
||||
|
||||
echo "AGENT: $AGENT"
|
||||
echo "OPENHANDS_VERSION: $OPENHANDS_VERSION"
|
||||
echo "AGENT_VERSION: $AGENT_VERSION"
|
||||
echo "MODEL_CONFIG: $MODEL_CONFIG"
|
||||
|
||||
EVAL_NOTE=$OPENHANDS_VERSION
|
||||
EVAL_NOTE=$AGENT_VERSION
|
||||
|
||||
# Default to NOT use unit tests.
|
||||
if [ -z "$USE_UNIT_TESTS" ]; then
|
||||
|
||||
@@ -108,8 +108,6 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# check if the "The answer is OpenHands is all you need!" is in any message
|
||||
message_actions = [
|
||||
event
|
||||
@@ -118,29 +116,19 @@ class Test(BaseIntegrationTest):
|
||||
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
|
||||
)
|
||||
]
|
||||
logger.debug(f'Total message-like events: {len(message_actions)}')
|
||||
|
||||
for event in message_actions:
|
||||
try:
|
||||
if isinstance(event, AgentDelegateObservation):
|
||||
content = event.content
|
||||
elif isinstance(event, AgentFinishAction):
|
||||
content = event.outputs.get('content', '')
|
||||
elif isinstance(event, MessageAction):
|
||||
content = event.content
|
||||
else:
|
||||
logger.warning(f'Unexpected event type: {type(event)}')
|
||||
continue
|
||||
if isinstance(event, AgentDelegateObservation):
|
||||
content = event.content
|
||||
elif isinstance(event, AgentFinishAction):
|
||||
content = event.outputs.get('content', '')
|
||||
elif isinstance(event, MessageAction):
|
||||
content = event.content
|
||||
else:
|
||||
raise ValueError(f'Unknown event type: {type(event)}')
|
||||
|
||||
if 'OpenHands is all you need!' in content:
|
||||
return TestResult(success=True)
|
||||
except Exception as e:
|
||||
logger.error(f'Error processing event: {e}')
|
||||
|
||||
logger.debug(
|
||||
f'Total messages: {len(message_actions)}. Messages: {message_actions}'
|
||||
)
|
||||
if 'OpenHands is all you need!' in content:
|
||||
return TestResult(success=True)
|
||||
return TestResult(
|
||||
success=False,
|
||||
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
|
||||
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}. Messages: {message_actions}',
|
||||
)
|
||||
|
||||
@@ -14,9 +14,7 @@ class Test(BaseIntegrationTest):
|
||||
|
||||
@classmethod
|
||||
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# check if the license information is in any message
|
||||
# check if the "The answer is OpenHands is all you need!" is in any message
|
||||
message_actions = [
|
||||
event
|
||||
for event in histories
|
||||
@@ -24,35 +22,23 @@ class Test(BaseIntegrationTest):
|
||||
event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
|
||||
)
|
||||
]
|
||||
logger.info(f'Total message-like events: {len(message_actions)}')
|
||||
|
||||
for event in message_actions:
|
||||
try:
|
||||
if isinstance(event, AgentDelegateObservation):
|
||||
content = event.content
|
||||
elif isinstance(event, AgentFinishAction):
|
||||
content = event.outputs.get('content', '')
|
||||
if event.thought:
|
||||
content += f'\n\n{event.thought}'
|
||||
elif isinstance(event, MessageAction):
|
||||
content = event.content
|
||||
else:
|
||||
logger.warning(f'Unexpected event type: {type(event)}')
|
||||
continue
|
||||
if isinstance(event, AgentDelegateObservation):
|
||||
content = event.content
|
||||
elif isinstance(event, AgentFinishAction):
|
||||
content = event.outputs.get('content', '')
|
||||
elif isinstance(event, MessageAction):
|
||||
content = event.content
|
||||
else:
|
||||
raise ValueError(f'Unknown event type: {type(event)}')
|
||||
|
||||
if (
|
||||
'non-commercial' in content
|
||||
or 'MIT' in content
|
||||
or 'Apache 2.0' in content
|
||||
):
|
||||
return TestResult(success=True)
|
||||
except Exception as e:
|
||||
logger.error(f'Error processing event: {e}')
|
||||
|
||||
logger.debug(
|
||||
f'Total messages: {len(message_actions)}. Messages: {message_actions}'
|
||||
)
|
||||
if (
|
||||
'non-commercial' in content
|
||||
or 'MIT' in content
|
||||
or 'Apache 2.0' in content
|
||||
):
|
||||
return TestResult(success=True)
|
||||
return TestResult(
|
||||
success=False,
|
||||
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
|
||||
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}. Messages: {message_actions}',
|
||||
)
|
||||
|
||||
@@ -39,8 +39,8 @@ checkout_original_branch() {
|
||||
git checkout $current_branch
|
||||
}
|
||||
|
||||
get_openhands_version() {
|
||||
get_agent_version() {
|
||||
# IMPORTANT: Because Agent's prompt changes fairly often in the rapidly evolving codebase of OpenHands
|
||||
# We need to track the version of Agent in the evaluation to make sure results are comparable
|
||||
OPENHANDS_VERSION=v$(poetry run python -c "from openhands import get_version; print(get_version())")
|
||||
AGENT_VERSION=v$(poetry run python -c "import openhands.agenthub; from openhands.controller.agent import Agent; print(Agent.get_cls('$AGENT').VERSION)")
|
||||
}
|
||||
|
||||
1
frontend/.gitignore
vendored
1
frontend/.gitignore
vendored
@@ -7,4 +7,3 @@ node_modules/
|
||||
/playwright-report/
|
||||
/blob-report/
|
||||
/playwright/.cache/
|
||||
.react-router/
|
||||
|
||||
@@ -9,7 +9,6 @@ This is the frontend of the OpenHands project. It is a React application that pr
|
||||
- Remix SPA Mode (React + Vite + React Router)
|
||||
- TypeScript
|
||||
- Redux
|
||||
- TanStack Query
|
||||
- Tailwind CSS
|
||||
- i18next
|
||||
- React Testing Library
|
||||
@@ -86,7 +85,7 @@ frontend
|
||||
├── src
|
||||
│ ├── api # API calls
|
||||
│ ├── assets
|
||||
│ ├── components
|
||||
│ ├── components # Reusable components
|
||||
│ ├── context # Local state management
|
||||
│ ├── hooks # Custom hooks
|
||||
│ ├── i18n # Internationalization
|
||||
@@ -100,18 +99,6 @@ frontend
|
||||
└── .env.sample # Sample environment variables
|
||||
```
|
||||
|
||||
#### Components
|
||||
|
||||
Components are organized into folders based on their **domain**, **feature**, or **shared functionality**.
|
||||
|
||||
```sh
|
||||
components
|
||||
├── features # Domain-specific components
|
||||
├── layout
|
||||
├── modals
|
||||
└── ui # Shared UI components
|
||||
```
|
||||
|
||||
### Features
|
||||
|
||||
- Real-time updates with WebSockets
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { screen } from "@testing-library/react";
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { renderWithProviders } from "../../test-utils";
|
||||
import { BrowserPanel } from "#/components/features/browser/browser";
|
||||
|
||||
import BrowserPanel from "#/components/browser";
|
||||
|
||||
describe("Browser", () => {
|
||||
it("renders a message if no screenshotSrc is provided", () => {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { describe, it, expect, test } from "vitest";
|
||||
import { ChatMessage } from "#/components/features/chat/chat-message";
|
||||
import { ChatMessage } from "#/components/chat-message";
|
||||
|
||||
describe("ChatMessage", () => {
|
||||
it("should render a user message", () => {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { fireEvent, render, screen } from "@testing-library/react";
|
||||
import { describe, afterEach, vi, it, expect } from "vitest";
|
||||
import { ChatInput } from "#/components/features/chat/chat-input";
|
||||
import { ChatInput } from "#/components/chat-input";
|
||||
|
||||
describe("ChatInput", () => {
|
||||
const onSubmitMock = vi.fn();
|
||||
|
||||
@@ -6,10 +6,10 @@ import { addUserMessage } from "#/state/chat-slice";
|
||||
import { SUGGESTIONS } from "#/utils/suggestions";
|
||||
import * as ChatSlice from "#/state/chat-slice";
|
||||
import { WsClientProviderStatus } from "#/context/ws-client-provider";
|
||||
import { ChatInterface } from "#/components/features/chat/chat-interface";
|
||||
import { ChatInterface } from "#/routes/_oh.app/chat-interface";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const renderChatInterface = (messages: (Message)[]) =>
|
||||
const renderChatInterface = (messages: (Message | ErrorMessage)[]) =>
|
||||
renderWithProviders(<ChatInterface />);
|
||||
|
||||
describe("Empty state", () => {
|
||||
@@ -26,8 +26,8 @@ describe("Empty state", () => {
|
||||
}));
|
||||
|
||||
beforeAll(() => {
|
||||
vi.mock("react-router", async (importActual) => ({
|
||||
...(await importActual<typeof import("react-router")>()),
|
||||
vi.mock("@remix-run/react", async (importActual) => ({
|
||||
...(await importActual<typeof import("@remix-run/react")>()),
|
||||
useRouteLoaderData: vi.fn(() => ({})),
|
||||
}));
|
||||
|
||||
@@ -56,7 +56,6 @@ describe("Empty state", () => {
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -173,14 +172,12 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
{
|
||||
sender: "assistant",
|
||||
content: "Hi",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
renderChatInterface(messages);
|
||||
@@ -214,7 +211,6 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Here are some images",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
const { rerender } = renderChatInterface(messages);
|
||||
@@ -227,7 +223,6 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Here are some images",
|
||||
imageUrls: ["image1", "image2"],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -249,14 +244,12 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
{
|
||||
sender: "user",
|
||||
content: "Hi",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
const { rerender } = renderChatInterface(messages);
|
||||
@@ -269,7 +262,6 @@ describe.skip("ChatInterface", () => {
|
||||
content: "How can I help you?",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
});
|
||||
|
||||
rerender(<ChatInterface />);
|
||||
@@ -278,19 +270,17 @@ describe.skip("ChatInterface", () => {
|
||||
});
|
||||
|
||||
it("should render inline errors", () => {
|
||||
const messages: (Message)[] = [
|
||||
const messages: (Message | ErrorMessage)[] = [
|
||||
{
|
||||
sender: "assistant",
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
{
|
||||
type: "error",
|
||||
content: "Something went wrong",
|
||||
sender: "assistant",
|
||||
timestamp: new Date().toISOString(),
|
||||
error: true,
|
||||
id: "",
|
||||
message: "Something went wrong",
|
||||
},
|
||||
];
|
||||
renderChatInterface(messages);
|
||||
@@ -300,8 +290,8 @@ describe.skip("ChatInterface", () => {
|
||||
});
|
||||
|
||||
it("should render both GitHub buttons initially when ghToken is available", () => {
|
||||
vi.mock("react-router", async (importActual) => ({
|
||||
...(await importActual<typeof import("react-router")>()),
|
||||
vi.mock("@remix-run/react", async (importActual) => ({
|
||||
...(await importActual<typeof import("@remix-run/react")>()),
|
||||
useRouteLoaderData: vi.fn(() => ({ ghToken: "test-token" })),
|
||||
}));
|
||||
|
||||
@@ -311,7 +301,6 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
renderChatInterface(messages);
|
||||
@@ -326,8 +315,8 @@ describe.skip("ChatInterface", () => {
|
||||
});
|
||||
|
||||
it("should render only 'Push changes to PR' button after PR is created", async () => {
|
||||
vi.mock("react-router", async (importActual) => ({
|
||||
...(await importActual<typeof import("react-router")>()),
|
||||
vi.mock("@remix-run/react", async (importActual) => ({
|
||||
...(await importActual<typeof import("@remix-run/react")>()),
|
||||
useRouteLoaderData: vi.fn(() => ({ ghToken: "test-token" })),
|
||||
}));
|
||||
|
||||
@@ -337,7 +326,6 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
const { rerender } = renderChatInterface(messages);
|
||||
@@ -370,21 +358,18 @@ describe.skip("ChatInterface", () => {
|
||||
content: "Hello",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
{
|
||||
sender: "user",
|
||||
content: "Hi",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
{
|
||||
sender: "assistant",
|
||||
content: "How can I help you?",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
},
|
||||
];
|
||||
const { rerender } = renderChatInterface(messages);
|
||||
@@ -395,7 +380,6 @@ describe.skip("ChatInterface", () => {
|
||||
content: "I need help",
|
||||
imageUrls: [],
|
||||
timestamp: new Date().toISOString(),
|
||||
pending: true,
|
||||
});
|
||||
|
||||
rerender(<ChatInterface />);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, test, vi } from "vitest";
|
||||
import { AccountSettingsContextMenu } from "#/components/features/context-menu/account-settings-context-menu";
|
||||
import { AccountSettingsContextMenu } from "#/components/context-menu/account-settings-context-menu";
|
||||
|
||||
describe("AccountSettingsContextMenu", () => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { ContextMenuListItem } from "#/components/features/context-menu/context-menu-list-item";
|
||||
import { ContextMenuListItem } from "#/components/context-menu/context-menu-list-item";
|
||||
|
||||
describe("ContextMenuListItem", () => {
|
||||
it("should render the component with the children", () => {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen, within } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { FeedbackActions } from "#/components/features/feedback/feedback-actions";
|
||||
import { FeedbackActions } from "#/components/feedback-actions";
|
||||
|
||||
describe("FeedbackActions", () => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { screen } from "@testing-library/react";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import { FeedbackForm } from "#/components/features/feedback/feedback-form";
|
||||
import { FeedbackForm } from "#/components/feedback-form";
|
||||
|
||||
describe("FeedbackForm", () => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { screen } from "@testing-library/react";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import { describe, afterEach, vi, it, expect } from "vitest";
|
||||
import { ExplorerTree } from "#/components/features/file-explorer/explorer-tree";
|
||||
import ExplorerTree from "#/components/file-explorer/explorer-tree";
|
||||
|
||||
const FILES = ["file-1-1.ts", "folder-1-2"];
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ import { renderWithProviders } from "test-utils";
|
||||
import { describe, it, expect, vi, Mock, afterEach } from "vitest";
|
||||
import toast from "#/utils/toast";
|
||||
import AgentState from "#/types/agent-state";
|
||||
import FileExplorer from "#/components/file-explorer/file-explorer";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
import { FileExplorer } from "#/components/features/file-explorer/file-explorer";
|
||||
|
||||
const toastSpy = vi.spyOn(toast, "error");
|
||||
const uploadFilesSpy = vi.spyOn(OpenHands, "uploadFiles");
|
||||
|
||||
@@ -2,7 +2,7 @@ import { screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import { vi, describe, afterEach, it, expect } from "vitest";
|
||||
import TreeNode from "#/components/features/file-explorer/tree-node";
|
||||
import TreeNode from "#/components/file-explorer/tree-node";
|
||||
import OpenHands from "#/api/open-hands";
|
||||
|
||||
const getFileSpy = vi.spyOn(OpenHands, "getFile");
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { ImagePreview } from "#/components/features/images/image-preview";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { ImagePreview } from "#/components/image-preview";
|
||||
|
||||
describe("ImagePreview", () => {
|
||||
it("should render an image", () => {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen, within } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { InteractiveChatBox } from "#/components/features/chat/interactive-chat-box";
|
||||
import { InteractiveChatBox } from "#/components/interactive-chat-box";
|
||||
|
||||
describe("InteractiveChatBox", () => {
|
||||
const onSubmitMock = vi.fn();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen, act } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { describe, it, vi, expect } from "vitest";
|
||||
import { BaseModal } from "#/components/shared/modals/base-modal/base-modal";
|
||||
import BaseModal from "#/components/modals/base-modal/base-modal";
|
||||
|
||||
describe("BaseModal", () => {
|
||||
it("should render if the modal is open", () => {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { ModelSelector } from "#/components/shared/modals/settings/model-selector";
|
||||
import { ModelSelector } from "#/components/modals/settings/model-selector";
|
||||
|
||||
describe("ModelSelector", () => {
|
||||
const models = {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { SuggestionItem } from "#/components/features/suggestions/suggestion-item";
|
||||
import { SuggestionItem } from "#/components/suggestion-item";
|
||||
|
||||
describe("SuggestionItem", () => {
|
||||
const suggestionItem = { label: "suggestion1", value: "a long text value" };
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { Suggestions } from "#/components/features/suggestions/suggestions";
|
||||
import { Suggestions } from "#/components/suggestions";
|
||||
|
||||
describe("Suggestions", () => {
|
||||
const firstSuggestion = {
|
||||
|
||||
@@ -2,7 +2,7 @@ import { act, screen } from "@testing-library/react";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import { vi, describe, afterEach, it, expect } from "vitest";
|
||||
import { Command, appendInput, appendOutput } from "#/state/command-slice";
|
||||
import Terminal from "#/components/features/terminal/terminal";
|
||||
import Terminal from "#/components/terminal/terminal";
|
||||
|
||||
global.ResizeObserver = vi.fn().mockImplementation(() => ({
|
||||
observe: vi.fn(),
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { UploadImageInput } from "#/components/features/images/upload-image-input";
|
||||
import { UploadImageInput } from "#/components/upload-image-input";
|
||||
|
||||
describe("UploadImageInput", () => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { describe, expect, it, test, vi, afterEach } from "vitest";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { UserActions } from "#/components/features/sidebar/user-actions";
|
||||
import { UserActions } from "#/components/user-actions";
|
||||
|
||||
describe("UserActions", () => {
|
||||
const user = userEvent.setup();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { UserAvatar } from "#/components/features/sidebar/user-avatar";
|
||||
import { UserAvatar } from "#/components/user-avatar";
|
||||
|
||||
describe("UserAvatar", () => {
|
||||
const onClickMock = vi.fn();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { act, renderHook } from "@testing-library/react";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { useRate } from "#/hooks/use-rate";
|
||||
import { useRate } from "#/utils/use-rate";
|
||||
|
||||
describe("useRate", () => {
|
||||
beforeEach(() => {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { createRoutesStub } from "react-router";
|
||||
import { createRemixStub } from "@remix-run/testing";
|
||||
import { screen, waitFor, within } from "@testing-library/react";
|
||||
import { renderWithProviders } from "test-utils";
|
||||
import userEvent from "@testing-library/user-event";
|
||||
@@ -8,7 +8,7 @@ import * as CaptureConsent from "#/utils/handle-capture-consent";
|
||||
import i18n from "#/i18n";
|
||||
|
||||
describe("frontend/routes/_oh", () => {
|
||||
const RouteStub = createRoutesStub([{ Component: MainApp, path: "/" }]);
|
||||
const RemixStub = createRemixStub([{ Component: MainApp, path: "/" }]);
|
||||
|
||||
const { userIsAuthenticatedMock, settingsAreUpToDateMock } = vi.hoisted(
|
||||
() => ({
|
||||
@@ -34,26 +34,26 @@ describe("frontend/routes/_oh", () => {
|
||||
});
|
||||
|
||||
it("should render", async () => {
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
await screen.findByTestId("root-layout");
|
||||
});
|
||||
|
||||
it("should render the AI config modal if the user is authed", async () => {
|
||||
// Our mock return value is true by default
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
await screen.findByTestId("ai-config-modal");
|
||||
});
|
||||
|
||||
it("should render the AI config modal if settings are not up-to-date", async () => {
|
||||
settingsAreUpToDateMock.mockReturnValue(false);
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
|
||||
await screen.findByTestId("ai-config-modal");
|
||||
});
|
||||
|
||||
it("should not render the AI config modal if the settings are up-to-date", async () => {
|
||||
settingsAreUpToDateMock.mockReturnValue(true);
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.queryByTestId("ai-config-modal")).not.toBeInTheDocument();
|
||||
@@ -67,7 +67,7 @@ describe("frontend/routes/_oh", () => {
|
||||
"handleCaptureConsent",
|
||||
);
|
||||
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
|
||||
// The user has not consented to tracking
|
||||
const consentForm = await screen.findByTestId("user-capture-consent-form");
|
||||
@@ -89,7 +89,7 @@ describe("frontend/routes/_oh", () => {
|
||||
|
||||
it("should not render the user consent form if the user has already made a decision", async () => {
|
||||
localStorage.setItem("analytics-consent", "true");
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
@@ -98,15 +98,14 @@ describe("frontend/routes/_oh", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// TODO: Likely failing due to how tokens are now handled in context. Move to e2e tests
|
||||
it.skip("should render a new project button if a token is set", async () => {
|
||||
it("should render a new project button if a token is set", async () => {
|
||||
localStorage.setItem("token", "test-token");
|
||||
const { rerender } = renderWithProviders(<RouteStub />);
|
||||
const { rerender } = renderWithProviders(<RemixStub />);
|
||||
|
||||
await screen.findByTestId("new-project-button");
|
||||
|
||||
localStorage.removeItem("token");
|
||||
rerender(<RouteStub />);
|
||||
rerender(<RemixStub />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(
|
||||
@@ -118,17 +117,17 @@ describe("frontend/routes/_oh", () => {
|
||||
// TODO: Move to e2e tests
|
||||
it.skip("should update the i18n language when the language settings change", async () => {
|
||||
const changeLanguageSpy = vi.spyOn(i18n, "changeLanguage");
|
||||
const { rerender } = renderWithProviders(<RouteStub />);
|
||||
const { rerender } = renderWithProviders(<RemixStub />);
|
||||
|
||||
// The default language is English
|
||||
expect(changeLanguageSpy).toHaveBeenCalledWith("en");
|
||||
|
||||
localStorage.setItem("LANGUAGE", "es");
|
||||
|
||||
rerender(<RouteStub />);
|
||||
rerender(<RemixStub />);
|
||||
expect(changeLanguageSpy).toHaveBeenCalledWith("es");
|
||||
|
||||
rerender(<RouteStub />);
|
||||
rerender(<RemixStub />);
|
||||
// The language has not changed, so the spy should not have been called again
|
||||
expect(changeLanguageSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
@@ -139,7 +138,7 @@ describe("frontend/routes/_oh", () => {
|
||||
localStorage.setItem("ghToken", "test-token");
|
||||
|
||||
// const logoutCleanupSpy = vi.spyOn(LogoutCleanup, "logoutCleanup");
|
||||
renderWithProviders(<RouteStub />);
|
||||
renderWithProviders(<RemixStub />);
|
||||
|
||||
const userActions = await screen.findByTestId("user-actions");
|
||||
const userAvatar = within(userActions).getByTestId("user-avatar");
|
||||
|
||||
10707
frontend/package-lock.json
generated
10707
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -9,15 +9,15 @@
|
||||
"dependencies": {
|
||||
"@monaco-editor/react": "^4.6.0",
|
||||
"@nextui-org/react": "^2.4.8",
|
||||
"@react-router/node": "^7.0.1",
|
||||
"@react-router/serve": "^7.0.1",
|
||||
"@react-types/shared": "^3.25.0",
|
||||
"@reduxjs/toolkit": "^2.3.0",
|
||||
"@remix-run/node": "^2.11.2",
|
||||
"@remix-run/react": "^2.11.2",
|
||||
"@remix-run/serve": "^2.11.2",
|
||||
"@tanstack/react-query": "^5.60.5",
|
||||
"@vitejs/plugin-react": "^4.3.2",
|
||||
"@xterm/addon-fit": "^0.10.0",
|
||||
"@xterm/xterm": "^5.4.0",
|
||||
"axios": "^1.7.7",
|
||||
"clsx": "^2.1.1",
|
||||
"eslint-config-airbnb-typescript": "^18.0.0",
|
||||
"i18next": "^23.15.2",
|
||||
@@ -35,7 +35,7 @@
|
||||
"react-icons": "^5.3.0",
|
||||
"react-markdown": "^9.0.1",
|
||||
"react-redux": "^9.1.2",
|
||||
"react-router": "^7.0.1",
|
||||
"react-router-dom": "^6.26.1",
|
||||
"react-syntax-highlighter": "^15.6.1",
|
||||
"react-textarea-autosize": "^8.5.4",
|
||||
"remark-gfm": "^4.0.0",
|
||||
@@ -47,9 +47,9 @@
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"scripts": {
|
||||
"dev": "npm run make-i18n && cross-env VITE_MOCK_API=false react-router dev",
|
||||
"dev:mock": "npm run make-i18n && cross-env VITE_MOCK_API=true react-router dev",
|
||||
"build": "npm run make-i18n && tsc && react-router build",
|
||||
"dev": "npm run make-i18n && cross-env VITE_MOCK_API=false remix vite:dev",
|
||||
"dev:mock": "npm run make-i18n && cross-env VITE_MOCK_API=true remix vite:dev",
|
||||
"build": "npm run make-i18n && tsc && remix vite:build",
|
||||
"start": "npx sirv-cli build/ --single",
|
||||
"test": "vitest run",
|
||||
"test:e2e": "playwright test",
|
||||
@@ -60,8 +60,7 @@
|
||||
"prelint": "npm run make-i18n",
|
||||
"lint": "eslint src --ext .ts,.tsx,.js && prettier --check src/**/*.{ts,tsx}",
|
||||
"lint:fix": "eslint src --ext .ts,.tsx,.js --fix && prettier --write src/**/*.{ts,tsx}",
|
||||
"prepare": "cd .. && husky frontend/.husky",
|
||||
"typecheck": "react-router typegen && tsc"
|
||||
"prepare": "cd .. && husky frontend/.husky"
|
||||
},
|
||||
"husky": {
|
||||
"hooks": {
|
||||
@@ -76,7 +75,8 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@playwright/test": "^1.48.2",
|
||||
"@react-router/dev": "^7.0.1",
|
||||
"@remix-run/dev": "^2.11.2",
|
||||
"@remix-run/testing": "^2.11.2",
|
||||
"@tailwindcss/typography": "^0.5.15",
|
||||
"@tanstack/eslint-plugin-query": "^5.60.1",
|
||||
"@testing-library/jest-dom": "^6.6.1",
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
import type { Config } from "@react-router/dev/config";
|
||||
|
||||
/**
|
||||
* This script is used to unpack the client directory from the frontend build directory.
|
||||
* Remix SPA mode builds the client directory into the build directory. This function
|
||||
* moves the contents of the client directory to the build directory and then removes the
|
||||
* client directory.
|
||||
*
|
||||
* This script is used in the buildEnd function of the Vite config.
|
||||
*/
|
||||
const unpackClientDirectory = async () => {
|
||||
const fs = await import("fs");
|
||||
const path = await import("path");
|
||||
|
||||
const buildDir = path.resolve(__dirname, "build");
|
||||
const clientDir = path.resolve(buildDir, "client");
|
||||
|
||||
const files = await fs.promises.readdir(clientDir);
|
||||
await Promise.all(
|
||||
files.map((file) =>
|
||||
fs.promises.rename(
|
||||
path.resolve(clientDir, file),
|
||||
path.resolve(buildDir, file),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
||||
await fs.promises.rmdir(clientDir);
|
||||
};
|
||||
|
||||
export default {
|
||||
appDirectory: "src",
|
||||
buildEnd: unpackClientDirectory,
|
||||
ssr: false,
|
||||
} satisfies Config;
|
||||
@@ -1,21 +0,0 @@
|
||||
import axios from "axios";
|
||||
|
||||
const github = axios.create({
|
||||
baseURL: "https://api.github.com",
|
||||
headers: {
|
||||
Accept: "application/vnd.github+json",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
},
|
||||
});
|
||||
|
||||
const setAuthTokenHeader = (token: string) => {
|
||||
github.defaults.headers.common.Authorization = `Bearer ${token}`;
|
||||
};
|
||||
|
||||
const removeAuthTokenHeader = () => {
|
||||
if (github.defaults.headers.common.Authorization) {
|
||||
delete github.defaults.headers.common.Authorization;
|
||||
}
|
||||
};
|
||||
|
||||
export { github, setAuthTokenHeader, removeAuthTokenHeader };
|
||||
@@ -1,5 +1,14 @@
|
||||
import { extractNextPageFromLink } from "#/utils/extract-next-page-from-link";
|
||||
import { github } from "./github-axios-instance";
|
||||
/**
|
||||
* Generates the headers for the GitHub API
|
||||
* @param token The GitHub token
|
||||
* @returns The headers for the GitHub API
|
||||
*/
|
||||
const generateGitHubAPIHeaders = (token: string) =>
|
||||
({
|
||||
Accept: "application/vnd.github+json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
}) as const;
|
||||
|
||||
/**
|
||||
* Checks if the data is a GitHub error response
|
||||
@@ -17,31 +26,18 @@ export const isGitHubErrorReponse = <T extends object | Array<unknown>>(
|
||||
* @returns A list of repositories or an error response
|
||||
*/
|
||||
export const retrieveGitHubUserRepositories = async (
|
||||
token: string,
|
||||
page = 1,
|
||||
per_page = 30,
|
||||
) => {
|
||||
const response = await github.get<GitHubRepository[]>("/user/repos", {
|
||||
params: {
|
||||
sort: "pushed",
|
||||
page,
|
||||
per_page,
|
||||
},
|
||||
transformResponse: (data) => {
|
||||
const parsedData: GitHubRepository[] | GitHubErrorReponse =
|
||||
JSON.parse(data);
|
||||
): Promise<Response> => {
|
||||
const url = new URL("https://api.github.com/user/repos");
|
||||
url.searchParams.append("sort", "pushed"); // sort by most recently pushed
|
||||
url.searchParams.append("page", page.toString());
|
||||
url.searchParams.append("per_page", per_page.toString());
|
||||
|
||||
if (isGitHubErrorReponse(parsedData)) {
|
||||
throw new Error(parsedData.message);
|
||||
}
|
||||
|
||||
return parsedData;
|
||||
},
|
||||
return fetch(url.toString(), {
|
||||
headers: generateGitHubAPIHeaders(token),
|
||||
});
|
||||
|
||||
const link = response.headers.link ?? "";
|
||||
const nextPage = extractNextPageFromLink(link);
|
||||
|
||||
return { data: response.data, nextPage };
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -49,54 +45,55 @@ export const retrieveGitHubUserRepositories = async (
|
||||
* @param token The GitHub token
|
||||
* @returns The authenticated user or an error response
|
||||
*/
|
||||
export const retrieveGitHubUser = async () => {
|
||||
const response = await github.get<GitHubUser>("/user", {
|
||||
transformResponse: (data) => {
|
||||
const parsedData: GitHubUser | GitHubErrorReponse = JSON.parse(data);
|
||||
|
||||
if (isGitHubErrorReponse(parsedData)) {
|
||||
throw new Error(parsedData.message);
|
||||
}
|
||||
|
||||
return parsedData;
|
||||
},
|
||||
export const retrieveGitHubUser = async (
|
||||
token: string,
|
||||
): Promise<GitHubUser | GitHubErrorReponse> => {
|
||||
const response = await fetch("https://api.github.com/user", {
|
||||
headers: generateGitHubAPIHeaders(token),
|
||||
});
|
||||
|
||||
const { data } = response;
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to retrieve user data");
|
||||
}
|
||||
|
||||
const user: GitHubUser = {
|
||||
id: data.id,
|
||||
login: data.login,
|
||||
avatar_url: data.avatar_url,
|
||||
company: data.company,
|
||||
name: data.name,
|
||||
email: data.email,
|
||||
const data = await response.json();
|
||||
|
||||
if (!isGitHubErrorReponse(data)) {
|
||||
// Only return the necessary user data
|
||||
const user: GitHubUser = {
|
||||
id: data.id,
|
||||
login: data.login,
|
||||
avatar_url: data.avatar_url,
|
||||
company: data.company,
|
||||
name: data.name,
|
||||
email: data.email,
|
||||
};
|
||||
|
||||
return user;
|
||||
}
|
||||
|
||||
const error: GitHubErrorReponse = {
|
||||
message: data.message,
|
||||
documentation_url: data.documentation_url,
|
||||
status: response.status,
|
||||
};
|
||||
|
||||
return user;
|
||||
return error;
|
||||
};
|
||||
|
||||
export const retrieveLatestGitHubCommit = async (
|
||||
token: string,
|
||||
repository: string,
|
||||
): Promise<GitHubCommit> => {
|
||||
const response = await github.get<GitHubCommit>(
|
||||
`/repos/${repository}/commits`,
|
||||
{
|
||||
params: {
|
||||
per_page: 1,
|
||||
},
|
||||
transformResponse: (data) => {
|
||||
const parsedData: GitHubCommit[] | GitHubErrorReponse =
|
||||
JSON.parse(data);
|
||||
): Promise<GitHubCommit[] | GitHubErrorReponse> => {
|
||||
const url = new URL(`https://api.github.com/repos/${repository}/commits`);
|
||||
url.searchParams.append("per_page", "1");
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: generateGitHubAPIHeaders(token),
|
||||
});
|
||||
|
||||
if (isGitHubErrorReponse(parsedData)) {
|
||||
throw new Error(parsedData.message);
|
||||
}
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to retrieve latest commit");
|
||||
}
|
||||
|
||||
return parsedData[0];
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
return response.data;
|
||||
return response.json();
|
||||
};
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
import { openHands } from "./open-hands-axios";
|
||||
|
||||
class InvariantService {
|
||||
static async getPolicy() {
|
||||
const { data } = await openHands.get("/api/security/policy");
|
||||
return data.policy;
|
||||
}
|
||||
|
||||
static async getRiskSeverity() {
|
||||
const { data } = await openHands.get("/api/security/settings");
|
||||
return data.RISK_SEVERITY;
|
||||
}
|
||||
|
||||
static async getTraces() {
|
||||
const { data } = await openHands.get("/api/security/export-trace");
|
||||
return data;
|
||||
}
|
||||
|
||||
static async updatePolicy(policy: string) {
|
||||
await openHands.post("/api/security/policy", { policy });
|
||||
}
|
||||
|
||||
static async updateRiskSeverity(riskSeverity: number) {
|
||||
await openHands.post("/api/security/settings", {
|
||||
RISK_SEVERITY: riskSeverity,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export default InvariantService;
|
||||
@@ -1,23 +0,0 @@
|
||||
import axios from "axios";
|
||||
|
||||
export const openHands = axios.create();
|
||||
|
||||
export const setAuthTokenHeader = (token: string) => {
|
||||
openHands.defaults.headers.common.Authorization = `Bearer ${token}`;
|
||||
};
|
||||
|
||||
export const setGitHubTokenHeader = (token: string) => {
|
||||
openHands.defaults.headers.common["X-GitHub-Token"] = token;
|
||||
};
|
||||
|
||||
export const removeAuthTokenHeader = () => {
|
||||
if (openHands.defaults.headers.common.Authorization) {
|
||||
delete openHands.defaults.headers.common.Authorization;
|
||||
}
|
||||
};
|
||||
|
||||
export const removeGitHubTokenHeader = () => {
|
||||
if (openHands.defaults.headers.common["X-GitHub-Token"]) {
|
||||
delete openHands.defaults.headers.common["X-GitHub-Token"];
|
||||
}
|
||||
};
|
||||
@@ -1,3 +1,4 @@
|
||||
import { request } from "#/services/api";
|
||||
import {
|
||||
SaveFileSuccessResponse,
|
||||
FileUploadSuccessResponse,
|
||||
@@ -7,9 +8,7 @@ import {
|
||||
ErrorResponse,
|
||||
GetConfigResponse,
|
||||
GetVSCodeUrlResponse,
|
||||
AuthenticateResponse,
|
||||
} from "./open-hands.types";
|
||||
import { openHands } from "./open-hands-axios";
|
||||
|
||||
class OpenHands {
|
||||
/**
|
||||
@@ -17,8 +16,13 @@ class OpenHands {
|
||||
* @returns List of models available
|
||||
*/
|
||||
static async getModels(): Promise<string[]> {
|
||||
const { data } = await openHands.get<string[]>("/api/options/models");
|
||||
return data;
|
||||
const response = await fetch("/api/options/models");
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch models");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -26,8 +30,13 @@ class OpenHands {
|
||||
* @returns List of agents available
|
||||
*/
|
||||
static async getAgents(): Promise<string[]> {
|
||||
const { data } = await openHands.get<string[]>("/api/options/agents");
|
||||
return data;
|
||||
const response = await fetch("/api/options/agents");
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch agents");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -35,15 +44,23 @@ class OpenHands {
|
||||
* @returns List of security analyzers available
|
||||
*/
|
||||
static async getSecurityAnalyzers(): Promise<string[]> {
|
||||
const { data } = await openHands.get<string[]>(
|
||||
"/api/options/security-analyzers",
|
||||
);
|
||||
return data;
|
||||
const response = await fetch("/api/options/security-analyzers");
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch security analyzers");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
static async getConfig(): Promise<GetConfigResponse> {
|
||||
const { data } = await openHands.get<GetConfigResponse>("/config.json");
|
||||
return data;
|
||||
const response = await fetch("/config.json");
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch config");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -51,11 +68,21 @@ class OpenHands {
|
||||
* @param path Path to list files from
|
||||
* @returns List of files available in the given path. If path is not provided, it lists all the files in the workspace
|
||||
*/
|
||||
static async getFiles(path?: string): Promise<string[]> {
|
||||
const { data } = await openHands.get<string[]>("/api/list-files", {
|
||||
params: { path },
|
||||
static async getFiles(token: string, path?: string): Promise<string[]> {
|
||||
const url = new URL("/api/list-files", window.location.origin);
|
||||
if (path) url.searchParams.append("path", path);
|
||||
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
});
|
||||
return data;
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch files");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -63,11 +90,21 @@ class OpenHands {
|
||||
* @param path Full path of the file to retrieve
|
||||
* @returns Content of the file
|
||||
*/
|
||||
static async getFile(path: string): Promise<string> {
|
||||
const { data } = await openHands.get<{ code: string }>("/api/select-file", {
|
||||
params: { file: path },
|
||||
static async getFile(token: string, path: string): Promise<string> {
|
||||
const url = new URL("/api/select-file", window.location.origin);
|
||||
url.searchParams.append("file", path);
|
||||
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch file");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.code;
|
||||
}
|
||||
|
||||
@@ -78,17 +115,31 @@ class OpenHands {
|
||||
* @returns Success message or error message
|
||||
*/
|
||||
static async saveFile(
|
||||
token: string,
|
||||
path: string,
|
||||
content: string,
|
||||
): Promise<SaveFileSuccessResponse> {
|
||||
const { data } = await openHands.post<
|
||||
SaveFileSuccessResponse | ErrorResponse
|
||||
>("/api/save-file", {
|
||||
filePath: path,
|
||||
content,
|
||||
const response = await fetch("/api/save-file", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ filePath: path, content }),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
});
|
||||
|
||||
if ("error" in data) throw new Error(data.error);
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to save file");
|
||||
}
|
||||
|
||||
const data = (await response.json()) as
|
||||
| SaveFileSuccessResponse
|
||||
| ErrorResponse;
|
||||
|
||||
if ("error" in data) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -97,15 +148,33 @@ class OpenHands {
|
||||
* @param file File to upload
|
||||
* @returns Success message or error message
|
||||
*/
|
||||
static async uploadFiles(files: File[]): Promise<FileUploadSuccessResponse> {
|
||||
static async uploadFiles(
|
||||
token: string,
|
||||
files: File[],
|
||||
): Promise<FileUploadSuccessResponse> {
|
||||
const formData = new FormData();
|
||||
files.forEach((file) => formData.append("files", file));
|
||||
|
||||
const { data } = await openHands.post<
|
||||
FileUploadSuccessResponse | ErrorResponse
|
||||
>("/api/upload-files", formData);
|
||||
const response = await fetch("/api/upload-files", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to upload files");
|
||||
}
|
||||
|
||||
const data = (await response.json()) as
|
||||
| FileUploadSuccessResponse
|
||||
| ErrorResponse;
|
||||
|
||||
if ("error" in data) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
if ("error" in data) throw new Error(data.error);
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -114,12 +183,24 @@ class OpenHands {
|
||||
* @param data Feedback data
|
||||
* @returns The stored feedback data
|
||||
*/
|
||||
static async submitFeedback(feedback: Feedback): Promise<FeedbackResponse> {
|
||||
const { data } = await openHands.post<FeedbackResponse>(
|
||||
"/api/submit-feedback",
|
||||
feedback,
|
||||
);
|
||||
return data;
|
||||
static async submitFeedback(
|
||||
token: string,
|
||||
feedback: Feedback,
|
||||
): Promise<FeedbackResponse> {
|
||||
const response = await fetch("/api/submit-feedback", {
|
||||
method: "POST",
|
||||
body: JSON.stringify(feedback),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to submit feedback");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -127,13 +208,19 @@ class OpenHands {
|
||||
* @returns Response with authentication status and user info if successful
|
||||
*/
|
||||
static async authenticate(
|
||||
gitHubToken: string,
|
||||
appMode: GetConfigResponse["APP_MODE"],
|
||||
): Promise<boolean> {
|
||||
if (appMode === "oss") return true;
|
||||
|
||||
const response =
|
||||
await openHands.post<AuthenticateResponse>("/api/authenticate");
|
||||
return response.status === 200;
|
||||
const response = await fetch("/api/authenticate", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"X-GitHub-Token": gitHubToken,
|
||||
},
|
||||
});
|
||||
|
||||
return response.ok;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -141,10 +228,8 @@ class OpenHands {
|
||||
* @returns Blob of the workspace zip
|
||||
*/
|
||||
static async getWorkspaceZip(): Promise<Blob> {
|
||||
const response = await openHands.get("/api/zip-directory", {
|
||||
responseType: "blob",
|
||||
});
|
||||
return response.data;
|
||||
const response = await request(`/api/zip-directory`, {}, false, true);
|
||||
return response.blob();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -154,13 +239,19 @@ class OpenHands {
|
||||
static async getGitHubAccessToken(
|
||||
code: string,
|
||||
): Promise<GitHubAccessTokenResponse> {
|
||||
const { data } = await openHands.post<GitHubAccessTokenResponse>(
|
||||
"/api/github/callback",
|
||||
{
|
||||
code,
|
||||
const response = await fetch("/api/github/callback", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({ code }),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
);
|
||||
return data;
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to get GitHub access token");
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -168,15 +259,12 @@ class OpenHands {
|
||||
* @returns VSCode URL
|
||||
*/
|
||||
static async getVSCodeUrl(): Promise<GetVSCodeUrlResponse> {
|
||||
const { data } =
|
||||
await openHands.get<GetVSCodeUrlResponse>("/api/vscode-url");
|
||||
return data;
|
||||
return request(`/api/vscode-url`, {}, false, false, 1);
|
||||
}
|
||||
|
||||
static async getRuntimeId(): Promise<{ runtime_id: string }> {
|
||||
const { data } = await openHands.get<{ runtime_id: string }>(
|
||||
"/api/conversation",
|
||||
);
|
||||
const data = await request("/api/conversation");
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,8 +51,3 @@ export interface GetVSCodeUrlResponse {
|
||||
vscode_url: string | null;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface AuthenticateResponse {
|
||||
message?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
110
frontend/src/components/agent-control-bar.tsx
Normal file
110
frontend/src/components/agent-control-bar.tsx
Normal file
@@ -0,0 +1,110 @@
|
||||
import { Tooltip } from "@nextui-org/react";
|
||||
import React from "react";
|
||||
import { useSelector } from "react-redux";
|
||||
import PauseIcon from "#/assets/pause";
|
||||
import PlayIcon from "#/assets/play";
|
||||
import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
|
||||
import { RootState } from "#/store";
|
||||
import AgentState from "#/types/agent-state";
|
||||
import { useWsClient } from "#/context/ws-client-provider";
|
||||
|
||||
const IgnoreTaskStateMap: Record<string, AgentState[]> = {
|
||||
[AgentState.PAUSED]: [
|
||||
AgentState.INIT,
|
||||
AgentState.PAUSED,
|
||||
AgentState.STOPPED,
|
||||
AgentState.FINISHED,
|
||||
AgentState.REJECTED,
|
||||
AgentState.AWAITING_USER_INPUT,
|
||||
AgentState.AWAITING_USER_CONFIRMATION,
|
||||
],
|
||||
[AgentState.RUNNING]: [
|
||||
AgentState.INIT,
|
||||
AgentState.RUNNING,
|
||||
AgentState.STOPPED,
|
||||
AgentState.FINISHED,
|
||||
AgentState.REJECTED,
|
||||
AgentState.AWAITING_USER_INPUT,
|
||||
AgentState.AWAITING_USER_CONFIRMATION,
|
||||
],
|
||||
[AgentState.STOPPED]: [AgentState.INIT, AgentState.STOPPED],
|
||||
[AgentState.USER_CONFIRMED]: [AgentState.RUNNING],
|
||||
[AgentState.USER_REJECTED]: [AgentState.RUNNING],
|
||||
[AgentState.AWAITING_USER_CONFIRMATION]: [],
|
||||
};
|
||||
|
||||
interface ActionButtonProps {
|
||||
isDisabled?: boolean;
|
||||
content: string;
|
||||
action: AgentState;
|
||||
handleAction: (action: AgentState) => void;
|
||||
large?: boolean;
|
||||
}
|
||||
|
||||
function ActionButton({
|
||||
isDisabled = false,
|
||||
content,
|
||||
action,
|
||||
handleAction,
|
||||
children,
|
||||
large = false,
|
||||
}: React.PropsWithChildren<ActionButtonProps>) {
|
||||
return (
|
||||
<Tooltip content={content} closeDelay={100}>
|
||||
<button
|
||||
onClick={() => handleAction(action)}
|
||||
disabled={isDisabled}
|
||||
className={`
|
||||
relative overflow-visible cursor-default hover:cursor-pointer group
|
||||
disabled:cursor-not-allowed
|
||||
${large ? "rounded-full bg-neutral-800 p-3" : ""}
|
||||
transition-all duration-300 ease-in-out
|
||||
`}
|
||||
type="button"
|
||||
>
|
||||
<span className="relative z-10 group-hover:filter group-hover:drop-shadow-[0_0_5px_rgba(255,64,0,0.4)]">
|
||||
{children}
|
||||
</span>
|
||||
<span className="absolute -inset-[5px] border-2 border-red-400/40 rounded-full opacity-0 group-hover:opacity-100 transition-opacity duration-300 ease-in-out" />
|
||||
</button>
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
|
||||
function AgentControlBar() {
|
||||
const { send } = useWsClient();
|
||||
const { curAgentState } = useSelector((state: RootState) => state.agent);
|
||||
|
||||
const handleAction = (action: AgentState) => {
|
||||
if (!IgnoreTaskStateMap[action].includes(curAgentState)) {
|
||||
send(generateAgentStateChangeEvent(action));
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-between items-center gap-20">
|
||||
<ActionButton
|
||||
isDisabled={
|
||||
curAgentState !== AgentState.RUNNING &&
|
||||
curAgentState !== AgentState.PAUSED
|
||||
}
|
||||
content={
|
||||
curAgentState === AgentState.PAUSED
|
||||
? "Resume the agent task"
|
||||
: "Pause the current task"
|
||||
}
|
||||
action={
|
||||
curAgentState === AgentState.PAUSED
|
||||
? AgentState.RUNNING
|
||||
: AgentState.PAUSED
|
||||
}
|
||||
handleAction={handleAction}
|
||||
large
|
||||
>
|
||||
{curAgentState === AgentState.PAUSED ? <PlayIcon /> : <PauseIcon />}
|
||||
</ActionButton>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default AgentControlBar;
|
||||
132
frontend/src/components/agent-status-bar.tsx
Normal file
132
frontend/src/components/agent-status-bar.tsx
Normal file
@@ -0,0 +1,132 @@
|
||||
import React, { useEffect } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useSelector } from "react-redux";
|
||||
import toast from "react-hot-toast";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { RootState } from "#/store";
|
||||
import AgentState from "#/types/agent-state";
|
||||
import beep from "#/utils/beep";
|
||||
|
||||
enum IndicatorColor {
|
||||
BLUE = "bg-blue-500",
|
||||
GREEN = "bg-green-500",
|
||||
ORANGE = "bg-orange-500",
|
||||
YELLOW = "bg-yellow-500",
|
||||
RED = "bg-red-500",
|
||||
DARK_ORANGE = "bg-orange-800",
|
||||
}
|
||||
|
||||
function AgentStatusBar() {
|
||||
const { t, i18n } = useTranslation();
|
||||
const { curAgentState } = useSelector((state: RootState) => state.agent);
|
||||
const { curStatusMessage } = useSelector((state: RootState) => state.status);
|
||||
|
||||
const AgentStatusMap: {
|
||||
[k: string]: { message: string; indicator: IndicatorColor };
|
||||
} = {
|
||||
[AgentState.INIT]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_INIT_MESSAGE),
|
||||
indicator: IndicatorColor.BLUE,
|
||||
},
|
||||
[AgentState.RUNNING]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_RUNNING_MESSAGE),
|
||||
indicator: IndicatorColor.GREEN,
|
||||
},
|
||||
[AgentState.AWAITING_USER_INPUT]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_AWAITING_USER_INPUT_MESSAGE),
|
||||
indicator: IndicatorColor.ORANGE,
|
||||
},
|
||||
[AgentState.PAUSED]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_PAUSED_MESSAGE),
|
||||
indicator: IndicatorColor.YELLOW,
|
||||
},
|
||||
[AgentState.LOADING]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$INITIALIZING_AGENT_LOADING_MESSAGE),
|
||||
indicator: IndicatorColor.DARK_ORANGE,
|
||||
},
|
||||
[AgentState.STOPPED]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_STOPPED_MESSAGE),
|
||||
indicator: IndicatorColor.RED,
|
||||
},
|
||||
[AgentState.FINISHED]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_FINISHED_MESSAGE),
|
||||
indicator: IndicatorColor.GREEN,
|
||||
},
|
||||
[AgentState.REJECTED]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_REJECTED_MESSAGE),
|
||||
indicator: IndicatorColor.YELLOW,
|
||||
},
|
||||
[AgentState.ERROR]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_ERROR_MESSAGE),
|
||||
indicator: IndicatorColor.RED,
|
||||
},
|
||||
[AgentState.AWAITING_USER_CONFIRMATION]: {
|
||||
message: t(
|
||||
I18nKey.CHAT_INTERFACE$AGENT_AWAITING_USER_CONFIRMATION_MESSAGE,
|
||||
),
|
||||
indicator: IndicatorColor.ORANGE,
|
||||
},
|
||||
[AgentState.USER_CONFIRMED]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_ACTION_USER_CONFIRMED_MESSAGE),
|
||||
indicator: IndicatorColor.GREEN,
|
||||
},
|
||||
[AgentState.USER_REJECTED]: {
|
||||
message: t(I18nKey.CHAT_INTERFACE$AGENT_ACTION_USER_REJECTED_MESSAGE),
|
||||
indicator: IndicatorColor.RED,
|
||||
},
|
||||
};
|
||||
|
||||
// TODO: Extend the agent status, e.g.:
|
||||
// - Agent is typing
|
||||
// - Agent is initializing
|
||||
// - Agent is thinking
|
||||
// - Agent is ready
|
||||
// - Agent is not available
|
||||
useEffect(() => {
|
||||
if (
|
||||
curAgentState === AgentState.AWAITING_USER_INPUT ||
|
||||
curAgentState === AgentState.ERROR ||
|
||||
curAgentState === AgentState.INIT
|
||||
) {
|
||||
if (document.cookie.indexOf("audio") !== -1) beep();
|
||||
}
|
||||
}, [curAgentState]);
|
||||
|
||||
const [statusMessage, setStatusMessage] = React.useState<string>("");
|
||||
|
||||
React.useEffect(() => {
|
||||
let message = curStatusMessage.message || "";
|
||||
if (curStatusMessage?.id) {
|
||||
const id = curStatusMessage.id.trim();
|
||||
if (i18n.exists(id)) {
|
||||
message = t(curStatusMessage.id.trim()) || message;
|
||||
}
|
||||
}
|
||||
if (curStatusMessage?.type === "error") {
|
||||
toast.error(message);
|
||||
return;
|
||||
}
|
||||
if (curAgentState === AgentState.LOADING && message.trim()) {
|
||||
setStatusMessage(message);
|
||||
} else {
|
||||
setStatusMessage(AgentStatusMap[curAgentState].message);
|
||||
}
|
||||
}, [curStatusMessage.id]);
|
||||
|
||||
React.useEffect(() => {
|
||||
setStatusMessage(AgentStatusMap[curAgentState].message);
|
||||
}, [curAgentState]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center">
|
||||
<div className="flex items-center bg-neutral-800 px-2 py-1 text-gray-400 rounded-[100px] text-sm gap-[6px]">
|
||||
<div
|
||||
className={`w-2 h-2 rounded-full animate-pulse ${AgentStatusMap[curAgentState].indicator}`}
|
||||
/>
|
||||
<span className="text-sm text-stone-400">{statusMessage}</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default AgentStatusBar;
|
||||
@@ -1,64 +0,0 @@
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import AgentState from "#/types/agent-state";
|
||||
|
||||
enum IndicatorColor {
|
||||
BLUE = "bg-blue-500",
|
||||
GREEN = "bg-green-500",
|
||||
ORANGE = "bg-orange-500",
|
||||
YELLOW = "bg-yellow-500",
|
||||
RED = "bg-red-500",
|
||||
DARK_ORANGE = "bg-orange-800",
|
||||
}
|
||||
|
||||
export const AGENT_STATUS_MAP: {
|
||||
[k: string]: { message: string; indicator: IndicatorColor };
|
||||
} = {
|
||||
[AgentState.INIT]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_INIT_MESSAGE,
|
||||
indicator: IndicatorColor.BLUE,
|
||||
},
|
||||
[AgentState.RUNNING]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_RUNNING_MESSAGE,
|
||||
indicator: IndicatorColor.GREEN,
|
||||
},
|
||||
[AgentState.AWAITING_USER_INPUT]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_AWAITING_USER_INPUT_MESSAGE,
|
||||
indicator: IndicatorColor.ORANGE,
|
||||
},
|
||||
[AgentState.PAUSED]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_PAUSED_MESSAGE,
|
||||
indicator: IndicatorColor.YELLOW,
|
||||
},
|
||||
[AgentState.LOADING]: {
|
||||
message: I18nKey.CHAT_INTERFACE$INITIALIZING_AGENT_LOADING_MESSAGE,
|
||||
indicator: IndicatorColor.DARK_ORANGE,
|
||||
},
|
||||
[AgentState.STOPPED]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_STOPPED_MESSAGE,
|
||||
indicator: IndicatorColor.RED,
|
||||
},
|
||||
[AgentState.FINISHED]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_FINISHED_MESSAGE,
|
||||
indicator: IndicatorColor.GREEN,
|
||||
},
|
||||
[AgentState.REJECTED]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_REJECTED_MESSAGE,
|
||||
indicator: IndicatorColor.YELLOW,
|
||||
},
|
||||
[AgentState.ERROR]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_ERROR_MESSAGE,
|
||||
indicator: IndicatorColor.RED,
|
||||
},
|
||||
[AgentState.AWAITING_USER_CONFIRMATION]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_AWAITING_USER_CONFIRMATION_MESSAGE,
|
||||
indicator: IndicatorColor.ORANGE,
|
||||
},
|
||||
[AgentState.USER_CONFIRMED]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_ACTION_USER_CONFIRMED_MESSAGE,
|
||||
indicator: IndicatorColor.GREEN,
|
||||
},
|
||||
[AgentState.USER_REJECTED]: {
|
||||
message: I18nKey.CHAT_INTERFACE$AGENT_ACTION_USER_REJECTED_MESSAGE,
|
||||
indicator: IndicatorColor.RED,
|
||||
},
|
||||
};
|
||||
@@ -1,10 +1,10 @@
|
||||
import { ModalButton } from "#/components/shared/buttons/modal-button";
|
||||
import { ModalBackdrop } from "./modals/modal-backdrop";
|
||||
import ModalBody from "./modals/modal-body";
|
||||
import ModalButton from "./buttons/modal-button";
|
||||
import {
|
||||
BaseModalTitle,
|
||||
BaseModalDescription,
|
||||
} from "#/components/shared/modals/confirmation-modals/base-modal";
|
||||
import { ModalBackdrop } from "#/components/shared/modals/modal-backdrop";
|
||||
import { ModalBody } from "#/components/shared/modals/modal-body";
|
||||
} from "./modals/confirmation-modals/base-modal";
|
||||
import { handleCaptureConsent } from "#/utils/handle-capture-consent";
|
||||
|
||||
interface AnalyticsConsentFormModalProps {
|
||||
@@ -1,9 +1,12 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { IoIosGlobe } from "react-icons/io";
|
||||
import { useSelector } from "react-redux";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { RootState } from "#/store";
|
||||
import { BrowserSnapshot } from "./browser-snapshot";
|
||||
import { EmptyBrowserMessage } from "./empty-browser-message";
|
||||
|
||||
export function BrowserPanel() {
|
||||
function BrowserPanel() {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { url, screenshotSrc } = useSelector(
|
||||
(state: RootState) => state.browser,
|
||||
);
|
||||
@@ -20,11 +23,21 @@ export function BrowserPanel() {
|
||||
</div>
|
||||
<div className="overflow-y-auto grow scrollbar-hide rounded-xl">
|
||||
{screenshotSrc ? (
|
||||
<BrowserSnapshot src={imgSrc} />
|
||||
<img
|
||||
src={imgSrc}
|
||||
style={{ objectFit: "contain", width: "100%", height: "auto" }}
|
||||
className="rounded-xl"
|
||||
alt="Browser Screenshot"
|
||||
/>
|
||||
) : (
|
||||
<EmptyBrowserMessage />
|
||||
<div className="flex flex-col items-center h-full justify-center">
|
||||
<IoIosGlobe size={100} />
|
||||
{t(I18nKey.BROWSER$EMPTY_MESSAGE)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default BrowserPanel;
|
||||
@@ -13,7 +13,7 @@ interface ModalButtonProps {
|
||||
intent?: string;
|
||||
}
|
||||
|
||||
export function ModalButton({
|
||||
function ModalButton({
|
||||
testId,
|
||||
variant = "default",
|
||||
onClick,
|
||||
@@ -45,3 +45,5 @@ export function ModalButton({
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
export default ModalButton;
|
||||
@@ -1,8 +1,7 @@
|
||||
import React from "react";
|
||||
import TextareaAutosize from "react-textarea-autosize";
|
||||
import ArrowSendIcon from "#/icons/arrow-send.svg?react";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { SubmitButton } from "#/components/shared/buttons/submit-button";
|
||||
import { StopButton } from "#/components/shared/buttons/stop-button";
|
||||
|
||||
interface ChatInputProps {
|
||||
name?: string;
|
||||
@@ -133,10 +132,27 @@ export function ChatInput({
|
||||
{showButton && (
|
||||
<div className={buttonClassName}>
|
||||
{button === "submit" && (
|
||||
<SubmitButton isDisabled={disabled} onClick={handleSubmitMessage} />
|
||||
<button
|
||||
aria-label="Send"
|
||||
disabled={disabled}
|
||||
onClick={handleSubmitMessage}
|
||||
type="submit"
|
||||
className="border border-white rounded-lg w-6 h-6 hover:bg-neutral-500 focus:bg-neutral-500 flex items-center justify-center"
|
||||
>
|
||||
<ArrowSendIcon />
|
||||
</button>
|
||||
)}
|
||||
{button === "stop" && (
|
||||
<StopButton isDisabled={disabled} onClick={onStop} />
|
||||
<button
|
||||
data-testid="stop-button"
|
||||
aria-label="Stop"
|
||||
disabled={disabled}
|
||||
onClick={onStop}
|
||||
type="button"
|
||||
className="border border-white rounded-lg w-6 h-6 hover:bg-neutral-500 focus:bg-neutral-500 flex items-center justify-center"
|
||||
>
|
||||
<div className="w-[10px] h-[10px] bg-white" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
@@ -1,10 +1,11 @@
|
||||
import React from "react";
|
||||
import Markdown from "react-markdown";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import { code } from "../markdown/code";
|
||||
import CheckmarkIcon from "#/icons/checkmark.svg?react";
|
||||
import CopyIcon from "#/icons/copy.svg?react";
|
||||
import { code } from "./markdown/code";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { ul, ol } from "../markdown/list";
|
||||
import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipboard-button";
|
||||
import { ul, ol } from "./markdown/list";
|
||||
|
||||
interface ChatMessageProps {
|
||||
type: "user" | "assistant";
|
||||
@@ -47,15 +48,26 @@ export function ChatMessage({
|
||||
"rounded-xl relative",
|
||||
"flex flex-col gap-2",
|
||||
type === "user" && " max-w-[305px] p-4 bg-neutral-700 self-end",
|
||||
type === "assistant" && "mt-6 max-w-full bg-tranparent",
|
||||
type === "assistant" && "pb-4 max-w-full bg-tranparent",
|
||||
)}
|
||||
>
|
||||
<CopyToClipboardButton
|
||||
isHidden={!isHovering}
|
||||
isDisabled={isCopy}
|
||||
<button
|
||||
hidden={!isHovering}
|
||||
disabled={isCopy}
|
||||
data-testid="copy-to-clipboard"
|
||||
type="button"
|
||||
onClick={handleCopyToClipboard}
|
||||
mode={isCopy ? "copied" : "copy"}
|
||||
/>
|
||||
className={cn(
|
||||
"bg-neutral-700 border border-neutral-600 rounded p-1",
|
||||
"absolute top-1 right-1",
|
||||
)}
|
||||
>
|
||||
{!isCopy ? (
|
||||
<CopyIcon width={15} height={15} />
|
||||
) : (
|
||||
<CheckmarkIcon width={15} height={15} />
|
||||
)}
|
||||
</button>
|
||||
<Markdown
|
||||
className="text-sm overflow-auto"
|
||||
components={{
|
||||
64
frontend/src/components/chat/confirmation-buttons.tsx
Normal file
64
frontend/src/components/chat/confirmation-buttons.tsx
Normal file
@@ -0,0 +1,64 @@
|
||||
import { Tooltip } from "@nextui-org/react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import ConfirmIcon from "#/assets/confirm";
|
||||
import RejectIcon from "#/assets/reject";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import AgentState from "#/types/agent-state";
|
||||
import { generateAgentStateChangeEvent } from "#/services/agent-state-service";
|
||||
import { useWsClient } from "#/context/ws-client-provider";
|
||||
|
||||
interface ActionTooltipProps {
|
||||
type: "confirm" | "reject";
|
||||
onClick: () => void;
|
||||
}
|
||||
|
||||
function ActionTooltip({ type, onClick }: ActionTooltipProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const content =
|
||||
type === "confirm"
|
||||
? t(I18nKey.CHAT_INTERFACE$USER_CONFIRMED)
|
||||
: t(I18nKey.CHAT_INTERFACE$USER_REJECTED);
|
||||
|
||||
return (
|
||||
<Tooltip content={content} closeDelay={100}>
|
||||
<button
|
||||
data-testid={`action-${type}-button`}
|
||||
type="button"
|
||||
aria-label={type === "confirm" ? "Confirm action" : "Reject action"}
|
||||
className="bg-neutral-700 rounded-full p-1 hover:bg-neutral-800"
|
||||
onClick={onClick}
|
||||
>
|
||||
{type === "confirm" ? <ConfirmIcon /> : <RejectIcon />}
|
||||
</button>
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
|
||||
function ConfirmationButtons() {
|
||||
const { t } = useTranslation();
|
||||
const { send } = useWsClient();
|
||||
|
||||
const handleStateChange = (state: AgentState) => {
|
||||
const event = generateAgentStateChangeEvent(state);
|
||||
send(event);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-between items-center pt-4">
|
||||
<p>{t(I18nKey.CHAT_INTERFACE$USER_ASK_CONFIRMATION)}</p>
|
||||
<div className="flex items-center gap-3">
|
||||
<ActionTooltip
|
||||
type="confirm"
|
||||
onClick={() => handleStateChange(AgentState.USER_CONFIRMED)}
|
||||
/>
|
||||
<ActionTooltip
|
||||
type="reject"
|
||||
onClick={() => handleStateChange(AgentState.USER_REJECTED)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default ConfirmationButtons;
|
||||
12
frontend/src/components/chat/message.d.ts
vendored
Normal file
12
frontend/src/components/chat/message.d.ts
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
type Message = {
|
||||
sender: "user" | "assistant";
|
||||
content: string;
|
||||
imageUrls: string[];
|
||||
timestamp: string;
|
||||
};
|
||||
|
||||
type ErrorMessage = {
|
||||
error: boolean;
|
||||
id?: string;
|
||||
message: string;
|
||||
};
|
||||
@@ -1,4 +1,6 @@
|
||||
export function TypingIndicator() {
|
||||
import React from "react";
|
||||
|
||||
function TypingIndicator(): React.ReactElement {
|
||||
return (
|
||||
<div className="flex items-center space-x-1.5 bg-neutral-700 px-3 py-1.5 rounded-full">
|
||||
<span
|
||||
@@ -16,3 +18,5 @@ export function TypingIndicator() {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default TypingIndicator;
|
||||
@@ -1,6 +1,14 @@
|
||||
import { NavLink } from "@remix-run/react";
|
||||
import clsx from "clsx";
|
||||
import React from "react";
|
||||
import { NavTab } from "./nav-tab";
|
||||
|
||||
function BetaBadge() {
|
||||
return (
|
||||
<span className="text-[11px] leading-5 text-root-primary bg-neutral-400 px-1 rounded-xl">
|
||||
Beta
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
interface ContainerProps {
|
||||
label?: string;
|
||||
@@ -30,7 +38,23 @@ export function Container({
|
||||
{labels && (
|
||||
<div className="flex text-xs h-[36px]">
|
||||
{labels.map(({ label: l, to, icon, isBeta }) => (
|
||||
<NavTab key={to} to={to} label={l} icon={icon} isBeta={isBeta} />
|
||||
<NavLink
|
||||
end
|
||||
key={to}
|
||||
to={to}
|
||||
className={({ isActive }) =>
|
||||
clsx(
|
||||
"px-2 border-b border-r border-neutral-600 bg-root-primary flex-1",
|
||||
"first-of-type:rounded-tl-xl last-of-type:rounded-tr-xl last-of-type:border-r-0",
|
||||
"flex items-center gap-2",
|
||||
isActive && "bg-root-secondary",
|
||||
)
|
||||
}
|
||||
>
|
||||
{icon}
|
||||
{l}
|
||||
{isBeta && <BetaBadge />}
|
||||
</NavLink>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
@@ -11,7 +11,7 @@ export function ContinueButton({ onClick }: ContinueButtonProps) {
|
||||
type="button"
|
||||
onClick={onClick}
|
||||
className={cn(
|
||||
"button-base px-2 py-1",
|
||||
"px-2 py-1 bg-neutral-700 border border-neutral-600 rounded",
|
||||
"text-[11px] leading-4 tracking-[0.01em] font-[500]",
|
||||
"flex items-center gap-2",
|
||||
)}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user