mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
77 Commits
e2e-gitlab
...
feat/pause
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cedd94009b | ||
|
|
23713bfe8c | ||
|
|
81829289ab | ||
|
|
9709431874 | ||
|
|
0e9906f41e | ||
|
|
9ac9a47207 | ||
|
|
75653e805a | ||
|
|
9630b536cd | ||
|
|
6f5c8186b8 | ||
|
|
36e0d8d3da | ||
|
|
e68abf8d75 | ||
|
|
93ef1b0cda | ||
|
|
77b5c6b161 | ||
|
|
57aa7d5c12 | ||
|
|
50391ecdf3 | ||
|
|
672650d3d9 | ||
|
|
9afedea170 | ||
|
|
c0bb84dfa2 | ||
|
|
18b5139237 | ||
|
|
4849369ede | ||
|
|
b082ccc0fb | ||
|
|
b0007076c0 | ||
|
|
4a4f213f57 | ||
|
|
f9099fe6db | ||
|
|
8f46a0a7a3 | ||
|
|
55d204ae1b | ||
|
|
4d7cd228da | ||
|
|
a3f92df4b3 | ||
|
|
e41f8f5215 | ||
|
|
6448f5a681 | ||
|
|
5fcc648d5f | ||
|
|
c9d96038c1 | ||
|
|
408af4e012 | ||
|
|
d9ac2faff6 | ||
|
|
64383a66e2 | ||
|
|
7fbcb29499 | ||
|
|
e7aae1495c | ||
|
|
d33f27d141 | ||
|
|
d08851859b | ||
|
|
7f4d311294 | ||
|
|
049f058ed1 | ||
|
|
bb6cf5a816 | ||
|
|
d9bc5824a0 | ||
|
|
fd5b5075d6 | ||
|
|
f5cd7b256d | ||
|
|
df86fd275d | ||
|
|
d22a2e39e7 | ||
|
|
ca424ec15d | ||
|
|
4507a25b85 | ||
|
|
d9cf5b7302 | ||
|
|
2a86e32263 | ||
|
|
b311ae6e15 | ||
|
|
adb773789a | ||
|
|
91d3d1d20a | ||
|
|
e9e2c98946 | ||
|
|
7861c1ddf7 | ||
|
|
5ce5469bfa | ||
|
|
4a3f5dd9b4 | ||
|
|
bc8b995dd3 | ||
|
|
07c4742496 | ||
|
|
b5887f8a9d | ||
|
|
0166df6575 | ||
|
|
e03a1f4e37 | ||
|
|
c763f0e368 | ||
|
|
bb0e24d23b | ||
|
|
aa6b454772 | ||
|
|
0297b3da18 | ||
|
|
476954f3a4 | ||
|
|
f296d7bde5 | ||
|
|
f866b3f8ea | ||
|
|
36d31b74f7 | ||
|
|
634a7691a2 | ||
|
|
81ba4399fa | ||
|
|
875036d920 | ||
|
|
39333dd5de | ||
|
|
3660933d59 | ||
|
|
baf2cc5c7e |
5
.github/workflows/e2e-tests.yml
vendored
5
.github/workflows/e2e-tests.yml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install poetry via pipx
|
||||
uses: abatilo/actions-poetry@v3
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: 2.1.3
|
||||
|
||||
@@ -169,7 +169,6 @@ jobs:
|
||||
- name: Run end-to-end tests
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.E2E_TEST_GITHUB_TOKEN }}
|
||||
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL || 'gpt-4o' }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY || 'test-key' }}
|
||||
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
|
||||
@@ -188,7 +187,7 @@ jobs:
|
||||
test_settings.py::test_github_token_configuration \
|
||||
test_conversation.py::test_conversation_start \
|
||||
test_browsing_catchphrase.py::test_browsing_catchphrase \
|
||||
test_gitlab_integration.py::test_gitlab_repository_cloning \
|
||||
test_multi_conversation_resume.py::test_multi_conversation_resume \
|
||||
-v --no-header --capture=no --timeout=900
|
||||
|
||||
- name: Upload test results
|
||||
|
||||
2
.github/workflows/py-tests.yml
vendored
2
.github/workflows/py-tests.yml
vendored
@@ -73,7 +73,7 @@ jobs:
|
||||
- name: Install Python dependencies using Poetry
|
||||
run: poetry install --with dev,test,runtime
|
||||
- name: Run Windows unit tests
|
||||
run: poetry run pytest -svv tests/unit/test_windows_bash.py
|
||||
run: poetry run pytest -svv tests/unit/runtime/utils/test_windows_bash.py
|
||||
env:
|
||||
PYTHONPATH: ".;$env:PYTHONPATH"
|
||||
DEBUG: "1"
|
||||
|
||||
2
.github/workflows/stale.yml
vendored
2
.github/workflows/stale.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
stale-issue-message: 'This issue is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
|
||||
stale-pr-message: 'This PR is stale because it has been open for 40 days with no activity. Remove the stale label or leave a comment, otherwise it will be closed in 10 days.'
|
||||
days-before-stale: 40
|
||||
exempt-issue-labels: 'roadmap'
|
||||
exempt-issue-labels: roadmap,backlog
|
||||
close-issue-message: 'This issue was automatically closed due to 50 days of inactivity. We do this to help keep the issues somewhat manageable and focus on active issues.'
|
||||
close-pr-message: 'This PR was closed because it had no activity for 50 days. If you feel this was closed in error, and you would like to continue the PR, please resubmit or let us know.'
|
||||
days-before-close: 10
|
||||
|
||||
50
.github/workflows/welcome-good-first-issue.yml
vendored
Normal file
50
.github/workflows/welcome-good-first-issue.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: Welcome Good First Issue
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [labeled]
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
comment-on-good-first-issue:
|
||||
if: github.event.label.name == 'good first issue'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check if welcome comment already exists
|
||||
id: check_comment
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
result-encoding: string
|
||||
script: |
|
||||
const issueNumber = context.issue.number;
|
||||
const comments = await github.rest.issues.listComments({
|
||||
...context.repo,
|
||||
issue_number: issueNumber
|
||||
});
|
||||
|
||||
const alreadyCommented = comments.data.some(
|
||||
(comment) =>
|
||||
comment.body.includes('<!-- auto-comment:good-first-issue -->')
|
||||
);
|
||||
|
||||
return alreadyCommented ? 'true' : 'false';
|
||||
|
||||
- name: Leave welcome comment
|
||||
if: steps.check_comment.outputs.result == 'false'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
|
||||
|
||||
await github.rest.issues.createComment({
|
||||
...context.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: "🙌 **Hey there, future contributor!** 🙌\n\n" +
|
||||
"This issue has been labeled as **good first issue**, which means it's a great place to get started with the OpenHands project.\n\n" +
|
||||
"If you're interested in working on it, feel free to! No need to ask for permission.\n\n" +
|
||||
"Be sure to check out our [development setup guide](" + repoUrl + "/blob/main/Development.md) to get your environment set up, and follow our [contribution guidelines](" + repoUrl + "/blob/main/CONTRIBUTING.md) when you're ready to submit a fix.\n\n" +
|
||||
"🙌 Happy hacking! 🙌\n\n" +
|
||||
"<!-- auto-comment:good-first-issue -->"
|
||||
});
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -257,3 +257,5 @@ containers/runtime/code
|
||||
|
||||
# test results
|
||||
test-results
|
||||
.sessions
|
||||
.eval_sessions
|
||||
|
||||
@@ -159,7 +159,7 @@ poetry run pytest ./tests/unit/test_*.py
|
||||
To reduce build time (e.g., if no changes were made to the client-runtime component), you can use an existing Docker
|
||||
container image by setting the SANDBOX_RUNTIME_CONTAINER_IMAGE environment variable to the desired Docker image.
|
||||
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.53-nikolaik`
|
||||
Example: `export SANDBOX_RUNTIME_CONTAINER_IMAGE=ghcr.io/all-hands-ai/runtime:0.54-nikolaik`
|
||||
|
||||
## Develop inside Docker container
|
||||
|
||||
|
||||
@@ -79,17 +79,17 @@ You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)
|
||||
You can also run OpenHands directly with Docker:
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
@@ -51,17 +51,17 @@ OpenHands也可以使用Docker在本地系统上运行。
|
||||
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
```
|
||||
|
||||
> **注意**: 如果您在0.44版本之前使用过OpenHands,您可能需要运行 `mv ~/.openhands-state ~/.openhands` 来将对话历史迁移到新位置。
|
||||
|
||||
@@ -42,17 +42,17 @@ OpenHandsはDockerを利用してローカル環境でも実行できます。
|
||||
> 公共ネットワークで実行していますか?[Hardened Docker Installation Guide](https://docs.all-hands.dev/usage/runtimes/docker#hardened-docker-installation)を参照して、ネットワークバインディングの制限や追加のセキュリティ対策を実施してください。
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
```
|
||||
|
||||
**注**: バージョン0.44以前のOpenHandsを使用していた場合は、会話履歴を移行するために `mv ~/.openhands-state ~/.openhands` を実行してください。
|
||||
|
||||
@@ -363,10 +363,11 @@ classpath = "my_package.my_module.MyCustomAgent"
|
||||
#confirmation_mode = false
|
||||
|
||||
# The security analyzer to use (For Headless / CLI only - In Web this is overridden by Session Init)
|
||||
#security_analyzer = ""
|
||||
# Available options: 'llm' (default), 'invariant'
|
||||
#security_analyzer = "llm"
|
||||
|
||||
# Whether to enable security analyzer
|
||||
#enable_security_analyzer = false
|
||||
#enable_security_analyzer = true
|
||||
|
||||
#################################### Condenser #################################
|
||||
# Condensers control how conversation history is managed and compressed when
|
||||
|
||||
@@ -21,7 +21,7 @@ ENV POETRY_NO_INTERACTION=1 \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& apt-get install -y curl make git build-essential \
|
||||
&& apt-get install -y curl make git build-essential jq gettext \
|
||||
&& python3 -m pip install poetry --break-system-packages
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
@@ -58,34 +58,34 @@ RUN sed -i 's/^UID_MIN.*/UID_MIN 499/' /etc/login.defs
|
||||
# Default is 60000, but we've seen up to 200000
|
||||
RUN sed -i 's/^UID_MAX.*/UID_MAX 1000000/' /etc/login.defs
|
||||
|
||||
RUN groupadd --gid $OPENHANDS_USER_ID app
|
||||
RUN groupadd --gid $OPENHANDS_USER_ID openhands
|
||||
RUN useradd -l -m -u $OPENHANDS_USER_ID --gid $OPENHANDS_USER_ID -s /bin/bash openhands && \
|
||||
usermod -aG app openhands && \
|
||||
usermod -aG openhands openhands && \
|
||||
usermod -aG sudo openhands && \
|
||||
echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
||||
RUN chown -R openhands:app /app && chmod -R 770 /app
|
||||
RUN sudo chown -R openhands:app $WORKSPACE_BASE && sudo chmod -R 770 $WORKSPACE_BASE
|
||||
RUN chown -R openhands:openhands /app && chmod -R 770 /app
|
||||
RUN sudo chown -R openhands:openhands $WORKSPACE_BASE && sudo chmod -R 770 $WORKSPACE_BASE
|
||||
USER openhands
|
||||
|
||||
ENV VIRTUAL_ENV=/app/.venv \
|
||||
PATH="/app/.venv/bin:$PATH" \
|
||||
PYTHONPATH='/app'
|
||||
|
||||
COPY --chown=openhands:app --chmod=770 --from=backend-builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
COPY --chown=openhands:openhands --chmod=770 --from=backend-builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
|
||||
COPY --chown=openhands:app --chmod=770 ./microagents ./microagents
|
||||
COPY --chown=openhands:app --chmod=770 ./openhands ./openhands
|
||||
COPY --chown=openhands:app --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
|
||||
COPY --chown=openhands:app pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
|
||||
COPY --chown=openhands:openhands --chmod=770 ./microagents ./microagents
|
||||
COPY --chown=openhands:openhands --chmod=770 ./openhands ./openhands
|
||||
COPY --chown=openhands:openhands --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
|
||||
COPY --chown=openhands:openhands pyproject.toml poetry.lock README.md MANIFEST.in LICENSE ./
|
||||
|
||||
# This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
|
||||
RUN python openhands/core/download.py # No-op to download assets
|
||||
# Add this line to set group ownership of all files/directories not already in "app" group
|
||||
# openhands:openhands -> openhands:app
|
||||
RUN find /app \! -group app -exec chgrp app {} +
|
||||
# openhands:openhands -> openhands:openhands
|
||||
RUN find /app \! -group openhands -exec chgrp openhands {} +
|
||||
|
||||
COPY --chown=openhands:app --chmod=770 --from=frontend-builder /app/build ./frontend/build
|
||||
COPY --chown=openhands:app --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
|
||||
COPY --chown=openhands:openhands --chmod=770 --from=frontend-builder /app/build ./frontend/build
|
||||
COPY --chown=openhands:openhands --chmod=770 ./containers/app/entrypoint.sh /app/entrypoint.sh
|
||||
|
||||
USER root
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ else
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
usermod -aG app enduser
|
||||
usermod -aG openhands enduser
|
||||
# get the user group of /var/run/docker.sock and set openhands to that group
|
||||
DOCKER_SOCKET_GID=$(stat -c '%g' /var/run/docker.sock)
|
||||
echo "Docker socket group id: $DOCKER_SOCKET_GID"
|
||||
|
||||
@@ -12,7 +12,7 @@ services:
|
||||
- SANDBOX_API_HOSTNAME=host.docker.internal
|
||||
- DOCKER_HOST_ADDR=host.docker.internal
|
||||
#
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.53-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-ghcr.io/all-hands-ai/runtime:0.54-nikolaik}
|
||||
- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234}
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
@@ -7,7 +7,7 @@ services:
|
||||
image: openhands:latest
|
||||
container_name: openhands-app-${DATE:-}
|
||||
environment:
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik}
|
||||
- SANDBOX_RUNTIME_CONTAINER_IMAGE=${SANDBOX_RUNTIME_CONTAINER_IMAGE:-docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik}
|
||||
#- SANDBOX_USER_ID=${SANDBOX_USER_ID:-1234} # enable this only if you want a specific non-root sandbox user but you will have to manually adjust permissions of ~/.openhands for this user
|
||||
- WORKSPACE_MOUNT_PATH=${WORKSPACE_BASE:-$PWD/workspace}
|
||||
ports:
|
||||
|
||||
4744
docs/openapi.json
4744
docs/openapi.json
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Jira Data Center Integration (Beta)
|
||||
title: Jira Data Center Integration (Coming soon...)
|
||||
description: Complete guide for setting up Jira Data Center integration with OpenHands Cloud, including service account creation, personal access token generation, webhook configuration, and workspace integration setup.
|
||||
---
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Jira Cloud Integration
|
||||
title: Jira Cloud Integration (Coming soon...)
|
||||
description: Complete guide for setting up Jira Cloud integration with OpenHands Cloud, including service account creation, API token generation, webhook configuration, and workspace integration setup.
|
||||
---
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Linear Integration
|
||||
title: Linear Integration (Coming soon...)
|
||||
description: Complete guide for setting up Linear integration with OpenHands Cloud, including service account creation, API key generation, webhook configuration, and workspace integration setup.
|
||||
---
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Project Management Tool Integrations
|
||||
title: Project Management Tool Integrations (Coming soon...)
|
||||
description: Overview of OpenHands Cloud integrations with project management platforms including Jira Cloud, Jira Data Center, and Linear. Learn about setup requirements, usage methods, and troubleshooting.
|
||||
---
|
||||
|
||||
@@ -18,9 +18,9 @@ Integration requires two levels of setup:
|
||||
2. **Workspace Integration** - Self-service configuration through the OpenHands Cloud UI to link your OpenHands account to the target workspace
|
||||
|
||||
### Platform-Specific Setup Guides:
|
||||
- [Jira Cloud Integration](./jira-integration.md)
|
||||
- [Jira Data Center Integration](./jira-dc-integration.md)
|
||||
- [Linear Integration](./linear-integration.md)
|
||||
- [Jira Cloud Integration (Coming soon...)](./jira-integration.md)
|
||||
- [Jira Data Center Integration (Coming soon...)](./jira-dc-integration.md)
|
||||
- [Linear Integration (Coming soon...)](./linear-integration.md)
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
52
docs/usage/confirmation-mode.mdx
Normal file
52
docs/usage/confirmation-mode.mdx
Normal file
@@ -0,0 +1,52 @@
|
||||
# Confirmation Mode and Security Analyzers
|
||||
|
||||
OpenHands provides a security framework to help protect users from potentially risky actions through **Confirmation Mode** and **Security Analyzers**. This system analyzes agent actions and prompts users for confirmation when high-risk operations are detected.
|
||||
|
||||
## Overview
|
||||
|
||||
The security system consists of two main components:
|
||||
|
||||
1. **Confirmation Mode**: When enabled, the agent will pause and ask for user confirmation before executing actions that are flagged as high-risk by the security analyzer.
|
||||
|
||||
2. **Security Analyzers**: These are modules that evaluate the risk level of agent actions and determine whether user confirmation is required.
|
||||
|
||||
## Configuration
|
||||
|
||||
### CLI
|
||||
In CLI mode, confirmation is enabled by default. You will have an option to uses the LLM Analyzer and will automatically confirm LOW and MEDIUM risk actions, only prompting for HIGH risk actions.
|
||||
|
||||
## Security Analyzers
|
||||
|
||||
OpenHands includes multiple analyzers:
|
||||
|
||||
- **No Analyzer**: Do not use any security analyzer. The agent will prompt you to confirm *EVERY* action.
|
||||
- **LLM Risk Analyzer** (default): Uses the same LLM as the agent to assess action risk levels
|
||||
- **Invariant Analyzer**: Uses Invariant Labs' policy engine to evaluate action traces against security policies
|
||||
|
||||
### LLM Risk Analyzer
|
||||
The default analyzer that leverages the agent's LLM to evaluate the security risk of each action. It considers the action type, parameters, and context to assign risk levels.
|
||||
|
||||
### Invariant Analyzer
|
||||
An advanced analyzer that:
|
||||
- Collects conversation events and parses them into a trace
|
||||
- Checks the trace against an Invariant policy to classify risk (low, medium, high)
|
||||
- Manages an Invariant server container automatically if needed
|
||||
- Supports optional browsing-alignment and harmful-content checks
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Action Analysis**: When the agent wants to perform an action, the selected security analyzer evaluates its risk level.
|
||||
|
||||
2. **Risk Assessment**: The analyzer returns one of three risk levels:
|
||||
- **LOW**: Action proceeds without confirmation
|
||||
- **MEDIUM**: Action proceeds without confirmation (may be configurable in future)
|
||||
- **HIGH**: Action is paused, and user confirmation is requested
|
||||
|
||||
3. **User Confirmation**: For high-risk actions, a confirmation dialog appears with:
|
||||
- Description of the action
|
||||
- Risk assessment explanation
|
||||
- Options to approve or deny action
|
||||
|
||||
4. **Action Execution**: Based on user response:
|
||||
- **Approve**: Action proceeds as planned
|
||||
- **Deny**: Action is cancelled
|
||||
@@ -87,19 +87,13 @@ source ~/.bashrc # or source ~/.zshrc
|
||||
|
||||
</AccordionGroup>
|
||||
|
||||
3. Launch an interactive OpenHands conversation from the command line:
|
||||
```bash
|
||||
# If using uvx (recommended)
|
||||
uvx --python 3.12 --from openhands-ai openhands
|
||||
```
|
||||
|
||||
<Note>
|
||||
If you have cloned the repository, you can also run the CLI directly using Poetry:
|
||||
|
||||
poetry run openhands
|
||||
</Note>
|
||||
|
||||
4. Set your model, API key, and other preferences using the UI (or alternatively environment variables, below).
|
||||
3. Set your model, API key, and other preferences using the UI (or alternatively environment variables, below).
|
||||
|
||||
This command opens an interactive prompt where you can type tasks or commands and get responses from OpenHands.
|
||||
The first time you run the CLI, it will take you through configuring the required LLM
|
||||
@@ -119,7 +113,7 @@ The conversation history will be saved in `~/.openhands/sessions`.
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -128,8 +122,8 @@ docker run -it \
|
||||
-v ~/.openhands:/.openhands \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
|
||||
python -m openhands.cli.main --override-cli-mode true
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
|
||||
python -m openhands.cli.entry --override-cli-mode true
|
||||
```
|
||||
|
||||
<Note>
|
||||
|
||||
@@ -61,7 +61,7 @@ export GITHUB_TOKEN="your-token" # Required for repository operations
|
||||
# Run OpenHands
|
||||
docker run -it \
|
||||
--pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e SANDBOX_USER_ID=$(id -u) \
|
||||
-e SANDBOX_VOLUMES=$SANDBOX_VOLUMES \
|
||||
-e LLM_API_KEY=$LLM_API_KEY \
|
||||
@@ -73,7 +73,7 @@ docker run -it \
|
||||
-v ~/.openhands:/.openhands \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app-$(date +%Y%m%d%H%M%S) \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53 \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54 \
|
||||
python -m openhands.core.main -t "write a bash script that prints hi"
|
||||
```
|
||||
|
||||
|
||||
@@ -68,23 +68,23 @@ Download and install the LM Studio desktop app from [lmstudio.ai](https://lmstud
|
||||
1. Check [the installation guide](/usage/local-setup) and ensure all prerequisites are met before running OpenHands, then run:
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
```
|
||||
|
||||
2. Wait until the server is running (see log below):
|
||||
```
|
||||
Digest: sha256:e72f9baecb458aedb9afc2cd5bc935118d1868719e55d50da73190d3a85c674f
|
||||
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
Status: Image is up to date for docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
Starting OpenHands...
|
||||
Running OpenHands as root
|
||||
14:22:13 - openhands:INFO: server_config.py:50 - Using config class None
|
||||
|
||||
@@ -45,6 +45,13 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
1. [Install WSL](https://learn.microsoft.com/en-us/windows/wsl/install).
|
||||
2. Run `wsl --version` in powershell and confirm `Default Version: 2`.
|
||||
|
||||
**Ubuntu (Linux Distribution)**
|
||||
|
||||
1. Install Ubuntu: `wsl --install -d Ubuntu` in PowerShell as Administrator.
|
||||
2. Restart computer when prompted.
|
||||
3. Open Ubuntu from Start menu to complete setup.
|
||||
4. Verify installation: `wsl --list` should show Ubuntu.
|
||||
|
||||
**Docker Desktop**
|
||||
|
||||
1. [Install Docker Desktop on Windows](https://docs.docker.com/desktop/setup/install/windows-install).
|
||||
@@ -53,7 +60,7 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
|
||||
- Resources > WSL Integration: `Enable integration with my default WSL distro` is enabled.
|
||||
|
||||
<Note>
|
||||
The docker command below to start the app must be run inside the WSL terminal.
|
||||
The docker command below to start the app must be run inside the WSL terminal. Use `wsl -d Ubuntu` in PowerShell or search "Ubuntu" in the Start menu to access the Ubuntu terminal.
|
||||
</Note>
|
||||
|
||||
**Alternative: Windows without WSL**
|
||||
@@ -109,17 +116,17 @@ Note that you'll still need `uv` installed for the default MCP servers to work p
|
||||
<Accordion title="Docker Command (Click to expand)">
|
||||
|
||||
```bash
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik
|
||||
docker pull docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik
|
||||
|
||||
docker run -it --rm --pull=always \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.53-nikolaik \
|
||||
-e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:0.54-nikolaik \
|
||||
-e LOG_ALL_EVENTS=true \
|
||||
-v /var/run/docker.sock:/var/run/docker.sock \
|
||||
-v ~/.openhands:/.openhands \
|
||||
-p 3000:3000 \
|
||||
--add-host host.docker.internal:host-gateway \
|
||||
--name openhands-app \
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.53
|
||||
docker.all-hands.dev/all-hands-ai/openhands:0.54
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
||||
@@ -130,3 +130,28 @@ docker run # ... \
|
||||
<Note>
|
||||
**Docker Desktop Required**: Network isolation features, including custom networks and `host.docker.internal` routing, require Docker Desktop. Docker Engine alone does not support these features on localhost across custom networks. If you're using Docker Engine without Docker Desktop, network isolation may not work as expected.
|
||||
</Note>
|
||||
|
||||
### Sidecar Containers
|
||||
|
||||
If you want to run sidecar containers to the sandbox 'runner' containers without exposing the sandbox containers to the host network, you can use the `SANDBOX_ADDITIONAL_NETWORKS` environment variable to specify additional Docker network names that should be added to the sandbox containers.
|
||||
|
||||
```bash
|
||||
docker network create openhands-sccache
|
||||
|
||||
docker run -d \
|
||||
--hostname openhandsredis \
|
||||
--network openhands-sccache \
|
||||
redis
|
||||
|
||||
docker run # ...
|
||||
-e SANDBOX_ADDITIONAL_NETWORKS='["openhands-sccache"]' \
|
||||
# ...
|
||||
```
|
||||
|
||||
Then all sandbox instances will have to access a shared redis instance at `openhandsredis:6379`.
|
||||
|
||||
#### Docker Compose gotcha
|
||||
|
||||
Note that Docker Compose adds a prefix (a scope) by default to created networks, which is not taken into account by the additional networks config. Therefore when using docker compose you have to either:
|
||||
- specify a network name via the `name` field to remove the scoping (https://docs.docker.com/reference/compose-file/networks/#name)
|
||||
- or provide the scope within the given config (e.g. `SANDBOX_ADDITIONAL_NETWORKS: '["myscope_openhands-sccache"]'` where `myscope` is the docker-compose assigned prefix).
|
||||
@@ -22,7 +22,7 @@ SDK to spawn and control these sandboxes.
|
||||
|
||||
You can use the E2B CLI to create a custom sandbox with a Dockerfile. Read the full guide
|
||||
[here](https://e2b.dev/docs/guide/custom-sandbox). The premade OpenHands sandbox for E2B is set up in the `containers`
|
||||
directory. and it's called `openhands`.
|
||||
directory, and it's called `openhands`.
|
||||
|
||||
## Debugging
|
||||
|
||||
|
||||
@@ -9,7 +9,8 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -60,18 +61,15 @@ AGENT_CLS_TO_INST_SUFFIX = {
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
# Create config with EDA-specific container image
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
# Override the container image for EDA
|
||||
config.sandbox.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
@@ -146,7 +144,7 @@ def process_instance(
|
||||
|
||||
logger.info(f'Final message: {final_message} | Ground truth: {instance["text"]}')
|
||||
test_result = game.reward()
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -17,7 +17,8 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -40,19 +41,12 @@ from openhands.utils.async_utils import call_async_from_sync
|
||||
def get_config(
|
||||
metadata: EvalMetadata,
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-slim'
|
||||
# Create config with agent_bench-specific container image
|
||||
config = get_openhands_config_for_eval(metadata=metadata)
|
||||
|
||||
# Override the container image for agent_bench
|
||||
config.sandbox.base_container_image = 'python:3.12-slim'
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
agent_config.enable_prompt_extensions = False
|
||||
@@ -273,7 +267,7 @@ def process_instance(
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -17,6 +17,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -49,15 +51,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.11-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -246,7 +243,7 @@ def process_instance(
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
histories = compatibility_for_eval_history_pairs(state.history)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -15,6 +15,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -60,15 +62,10 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = BIOCODER_BENCH_CONTAINER_IMAGE
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -294,7 +291,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -18,6 +18,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -74,15 +76,10 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -422,7 +419,7 @@ def process_instance(
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -11,6 +11,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -39,14 +41,8 @@ def get_config(
|
||||
)
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata, runtime='docker', sandbox_config=sandbox_config
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -88,7 +84,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -16,6 +16,8 @@ from evaluation.utils.shared import (
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -113,16 +115,11 @@ def get_config(
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = base_container_image
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -480,7 +477,7 @@ def process_instance(
|
||||
|
||||
# NOTE: this is NO LONGER the event stream, but an agent history that includes delegate agent's events
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -17,6 +17,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -64,15 +66,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -294,7 +291,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
test_result = complete_runtime(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
|
||||
@@ -22,6 +22,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -59,15 +61,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'nikolaik/python-nodejs:python3.12-nodejs22'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
if metadata.agent_config:
|
||||
@@ -269,7 +266,7 @@ Here is the task:
|
||||
'model_answer': model_answer,
|
||||
'ground_truth': instance['Final answer'],
|
||||
}
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -12,6 +12,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -42,15 +44,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -108,7 +105,7 @@ def process_instance(
|
||||
# attempt to parse model_answer
|
||||
ast_eval_fn = instance['ast_eval']
|
||||
correct, hallucination = ast_eval_fn(instance_id, model_answer_raw)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
logger.info(
|
||||
f'Final message: {model_answer_raw} | Correctness: {correct} | Hallucination: {hallucination}'
|
||||
)
|
||||
|
||||
@@ -30,6 +30,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -63,15 +65,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -292,7 +289,7 @@ Ok now its time to start solving the question. Good luck!
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
output = EvalOutput(
|
||||
|
||||
@@ -23,6 +23,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -84,15 +86,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -248,7 +245,7 @@ def process_instance(
|
||||
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
|
||||
@@ -16,6 +16,7 @@ import ruamel.yaml
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
)
|
||||
from openhands.core.config import (
|
||||
@@ -37,15 +38,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
|
||||
@@ -22,6 +22,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -47,15 +49,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -335,7 +332,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
metrics = get_metrics(state)
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -51,15 +53,10 @@ def get_config(
|
||||
'$OH_INTERPRETER_PATH -m pip install scitools-pyke'
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -247,7 +244,7 @@ def process_instance(
|
||||
)
|
||||
test_result['final_message'] = final_message
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
@@ -13,6 +13,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -57,15 +59,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'xingyaoww/od-eval-miniwob:v1.0'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -174,7 +171,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Instruction is the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -15,6 +15,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -109,15 +111,10 @@ def get_config(
|
||||
f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -205,7 +202,7 @@ def process_instance(
|
||||
task_state = state.extra_data['task_state']
|
||||
logger.info('Task state: ' + str(task_state.to_dict()))
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -26,6 +26,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -79,15 +81,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'public.ecr.aws/i5g0m1f6/ml-bench'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -250,7 +247,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
)
|
||||
)
|
||||
assert state is not None
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
metrics = get_metrics(state)
|
||||
|
||||
test_result = complete_runtime(runtime)
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -87,13 +88,9 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -341,16 +342,11 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -31,6 +31,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -174,15 +175,10 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -12,6 +12,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -63,16 +65,10 @@ def get_config(
|
||||
sandbox_config.base_container_image = (
|
||||
'docker.io/xingyaoww/openhands-eval-scienceagentbench'
|
||||
)
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_budget_per_task=4,
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -218,7 +214,7 @@ If the program uses some packages that are incompatible, please figure out alter
|
||||
# You can simply get the LAST `MessageAction` from the returned `state.history` and parse it for evaluation.
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -19,6 +19,7 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -83,13 +84,9 @@ def get_config(metadata: EvalMetadata, instance: pd.Series) -> OpenHandsConfig:
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -227,16 +228,11 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
|
||||
config.set_llm_config(
|
||||
|
||||
@@ -21,6 +21,7 @@ from evaluation.utils.shared import (
|
||||
EvalException,
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -179,7 +180,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
histories = [event_to_dict(event) for event in state.history]
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Save the output
|
||||
instruction = message_action.content
|
||||
|
||||
@@ -20,6 +20,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -199,16 +200,11 @@ def get_config(
|
||||
'REPO_PATH': f'/workspace/{workspace_dir_name}/',
|
||||
}
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -13,6 +13,7 @@ N_RUNS=${4:-1}
|
||||
export EXP_NAME=$EXP_NAME
|
||||
# use 2x resources for rollout since some codebases are pretty resource-intensive
|
||||
export DEFAULT_RUNTIME_RESOURCE_FACTOR=2
|
||||
export ITERATIVE_EVAL_MODE=false
|
||||
echo "MODEL: $MODEL"
|
||||
echo "EXP_NAME: $EXP_NAME"
|
||||
DATASET="SWE-Gym/SWE-Gym" # change this to the "/SWE-Gym-Lite" if you want to rollout the lite subset
|
||||
|
||||
@@ -37,6 +37,7 @@ from evaluation.benchmarks.testgeneval.utils import load_testgeneval_dataset
|
||||
from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_openhands_config_for_eval,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
run_evaluation,
|
||||
@@ -58,20 +59,21 @@ def get_config(instance: pd.Series) -> OpenHandsConfig:
|
||||
f'Invalid container image for instance {instance["instance_id_swebench"]}.'
|
||||
)
|
||||
logger.info(f'Using instance container image: {base_container_image}.')
|
||||
return OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
runtime=os.environ.get('RUNTIME', 'eventstream'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
timeout=1800,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY'),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
|
||||
# Create custom sandbox config for testgeneval with specific requirements
|
||||
sandbox_config = SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
use_host_network=False,
|
||||
timeout=1800, # Longer timeout than default (300)
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY'),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
)
|
||||
|
||||
return get_openhands_config_for_eval(
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'), # Different default runtime
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from evaluation.utils.shared import (
|
||||
assert_and_raise,
|
||||
codeact_user_response,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -126,29 +127,26 @@ def get_config(
|
||||
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
runtime=os.environ.get('RUNTIME', 'eventstream'),
|
||||
sandbox=SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
sandbox_config = SandboxConfig(
|
||||
base_container_image=base_container_image,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
# Add platform to the sandbox config to solve issue 4401
|
||||
platform='linux/amd64',
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get(
|
||||
'SANDBOX_REMOTE_RUNTIME_API_URL', 'http://localhost:8000'
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
)
|
||||
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
sandbox_config=sandbox_config,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -12,7 +12,10 @@ import tempfile
|
||||
import yaml
|
||||
from browsing import pre_login
|
||||
|
||||
from evaluation.utils.shared import get_default_sandbox_config_for_eval
|
||||
from evaluation.utils.shared import (
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_openhands_config_for_eval,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import (
|
||||
LLMConfig,
|
||||
@@ -42,19 +45,17 @@ def get_config(
|
||||
sandbox_config.enable_auto_lint = True
|
||||
# If the web services are running on the host machine, this must be set to True
|
||||
sandbox_config.use_host_network = True
|
||||
config = OpenHandsConfig(
|
||||
run_as_openhands=False,
|
||||
max_budget_per_task=4,
|
||||
config = get_openhands_config_for_eval(
|
||||
max_iterations=100,
|
||||
save_trajectory_path=os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
),
|
||||
sandbox=sandbox_config,
|
||||
# we mount trajectories path so that trajectories, generated by OpenHands
|
||||
# controller, can be accessible to the evaluator file in the runtime container
|
||||
sandbox_config=sandbox_config,
|
||||
workspace_mount_path=mount_path_on_host,
|
||||
workspace_mount_path_in_sandbox='/outputs',
|
||||
)
|
||||
config.save_trajectory_path = os.path.join(
|
||||
mount_path_on_host, f'traj_{task_short_name}.json'
|
||||
)
|
||||
config.max_budget_per_task = 4
|
||||
config.set_llm_config(llm_config)
|
||||
if agent_config:
|
||||
config.set_agent_config(agent_config)
|
||||
|
||||
@@ -11,6 +11,8 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -43,15 +45,10 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.base_container_image = 'python:3.12-bookworm'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -134,7 +131,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
|
||||
correct = eval_answer(str(model_answer_raw), str(answer))
|
||||
logger.info(f'Final message: {model_answer_raw} | Correctness: {correct}')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
@@ -20,6 +20,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
is_fatal_evaluation_error,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
@@ -160,16 +161,11 @@ def get_config(
|
||||
instance_id=instance['instance_id'],
|
||||
)
|
||||
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
max_iterations=metadata.max_iterations,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
enable_browser=RUN_WITH_BROWSING,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
|
||||
@@ -12,6 +12,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -72,16 +74,10 @@ def get_config(
|
||||
'VWA_WIKIPEDIA': f'{base_url}:8888',
|
||||
'VWA_HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
attach_to_existing=True,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
@@ -179,7 +175,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Instruction obtained from the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -12,6 +12,8 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -64,15 +66,10 @@ def get_config(
|
||||
'MAP': f'{base_url}:3000',
|
||||
'HOMEPAGE': f'{base_url}:4399',
|
||||
}
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime='docker',
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.set_llm_config(metadata.llm_config)
|
||||
agent_config = config.get_agent_config(metadata.agent_class)
|
||||
@@ -163,7 +160,7 @@ def process_instance(
|
||||
if state is None:
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# Instruction is the first message from the USER
|
||||
instruction = ''
|
||||
|
||||
@@ -9,6 +9,8 @@ from evaluation.utils.shared import (
|
||||
EvalMetadata,
|
||||
EvalOutput,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
get_openhands_config_for_eval,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -44,18 +46,12 @@ def get_config(
|
||||
) -> OpenHandsConfig:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
sandbox_config.platform = 'linux/amd64'
|
||||
config = OpenHandsConfig(
|
||||
default_agent=metadata.agent_class,
|
||||
run_as_openhands=False,
|
||||
config = get_openhands_config_for_eval(
|
||||
metadata=metadata,
|
||||
runtime=os.environ.get('RUNTIME', 'docker'),
|
||||
max_iterations=metadata.max_iterations,
|
||||
sandbox=sandbox_config,
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
# debug
|
||||
debug=True,
|
||||
sandbox_config=sandbox_config,
|
||||
)
|
||||
config.debug = True
|
||||
config.set_llm_config(
|
||||
update_llm_config_for_completions_logging(
|
||||
metadata.llm_config, metadata.eval_output_dir, instance_id
|
||||
@@ -135,7 +131,7 @@ def process_instance(
|
||||
assert len(histories) > 0, 'History should not be empty'
|
||||
|
||||
test_result: TestResult = test_class.verify_result(runtime, histories)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
finally:
|
||||
runtime.close()
|
||||
|
||||
|
||||
209
evaluation/utils/scripts/aggregate_token_usage.py
Executable file
209
evaluation/utils/scripts/aggregate_token_usage.py
Executable file
@@ -0,0 +1,209 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to aggregate token usage metrics from LLM completion files.
|
||||
|
||||
Usage:
|
||||
python aggregate_token_usage.py <directory_path> [--input-cost <cost>] [--output-cost <cost>] [--cached-cost <cost>]
|
||||
|
||||
Arguments:
|
||||
directory_path: Path to the directory containing completion files
|
||||
--input-cost: Cost per input token (default: 0.0)
|
||||
--output-cost: Cost per output token (default: 0.0)
|
||||
--cached-cost: Cost per cached token (default: 0.0)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def aggregate_token_usage(
|
||||
directory_path, input_cost=0.0, output_cost=0.0, cached_cost=0.0
|
||||
):
|
||||
"""
|
||||
Aggregate token usage metrics from all JSON completion files in the directory.
|
||||
|
||||
Args:
|
||||
directory_path (str): Path to directory containing completion files
|
||||
input_cost (float): Cost per input token
|
||||
output_cost (float): Cost per output token
|
||||
cached_cost (float): Cost per cached token
|
||||
"""
|
||||
|
||||
# Initialize counters
|
||||
totals = {
|
||||
'input_tokens': 0,
|
||||
'output_tokens': 0,
|
||||
'cached_tokens': 0,
|
||||
'total_tokens': 0,
|
||||
'files_processed': 0,
|
||||
'files_with_errors': 0,
|
||||
'cost': 0,
|
||||
}
|
||||
|
||||
# Find all JSON files recursively
|
||||
json_files = list(Path(directory_path).rglob('*.json'))
|
||||
|
||||
print(f'Found {len(json_files)} JSON files to process...')
|
||||
|
||||
for json_file in json_files:
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Look for usage data in response or fncall_response
|
||||
usage_data = None
|
||||
if (
|
||||
'response' in data
|
||||
and isinstance(data['response'], dict)
|
||||
and 'usage' in data['response']
|
||||
):
|
||||
usage_data = data['response']['usage']
|
||||
elif (
|
||||
'fncall_response' in data
|
||||
and isinstance(data['fncall_response'], dict)
|
||||
and 'usage' in data['fncall_response']
|
||||
):
|
||||
usage_data = data['fncall_response']['usage']
|
||||
|
||||
if usage_data:
|
||||
# Extract token counts
|
||||
completion_tokens = usage_data.get('completion_tokens', 0)
|
||||
prompt_tokens = usage_data.get('prompt_tokens', 0)
|
||||
cached_tokens = usage_data.get('cached_tokens', 0)
|
||||
|
||||
# Handle cases where cached_tokens might be in prompt_tokens_details
|
||||
if cached_tokens == 0 and 'prompt_tokens_details' in usage_data:
|
||||
details = usage_data['prompt_tokens_details']
|
||||
if isinstance(details, dict) and 'cached_tokens' in details:
|
||||
cached_tokens = details.get('cached_tokens', 0) or 0
|
||||
|
||||
# Calculate non-cached input tokens
|
||||
non_cached_input = prompt_tokens - cached_tokens
|
||||
|
||||
# Update totals
|
||||
totals['input_tokens'] += non_cached_input
|
||||
totals['output_tokens'] += completion_tokens
|
||||
totals['cached_tokens'] += cached_tokens
|
||||
totals['total_tokens'] += prompt_tokens + completion_tokens
|
||||
|
||||
if 'cost' in data:
|
||||
totals['cost'] += data['cost']
|
||||
totals['files_processed'] += 1
|
||||
|
||||
# Progress indicator
|
||||
if totals['files_processed'] % 1000 == 0:
|
||||
print(f'Processed {totals["files_processed"]} files...')
|
||||
|
||||
except Exception as e:
|
||||
totals['files_with_errors'] += 1
|
||||
if totals['files_with_errors'] <= 5: # Only show first 5 errors
|
||||
print(f'Error processing {json_file}: {e}')
|
||||
|
||||
# Calculate costs
|
||||
input_cost_total = totals['input_tokens'] * input_cost
|
||||
output_cost_total = totals['output_tokens'] * output_cost
|
||||
cached_cost_total = totals['cached_tokens'] * cached_cost
|
||||
total_cost = input_cost_total + output_cost_total + cached_cost_total
|
||||
|
||||
# Print results
|
||||
print('\n' + '=' * 60)
|
||||
print('TOKEN USAGE AGGREGATION RESULTS')
|
||||
print('=' * 60)
|
||||
print(f'Files processed: {totals["files_processed"]:,}')
|
||||
print(f'Files with errors: {totals["files_with_errors"]:,}')
|
||||
print()
|
||||
print('TOKEN COUNTS:')
|
||||
print(f' Input tokens (non-cached): {totals["input_tokens"]:,}')
|
||||
print(f' Output tokens: {totals["output_tokens"]:,}')
|
||||
print(f' Cached tokens: {totals["cached_tokens"]:,}')
|
||||
print(f' Total tokens: {totals["total_tokens"]:,}')
|
||||
print(f' Total costs (based on returned value): ${totals["cost"]:.6f}')
|
||||
print()
|
||||
|
||||
if input_cost > 0 or output_cost > 0 or cached_cost > 0:
|
||||
print('COST CALCULATED BASED ON PROVIDED RATE:')
|
||||
print(
|
||||
f' Input cost: ${input_cost_total:.6f} ({totals["input_tokens"]:,} × ${input_cost:.6f})'
|
||||
)
|
||||
print(
|
||||
f' Output cost: ${output_cost_total:.6f} ({totals["output_tokens"]:,} × ${output_cost:.6f})'
|
||||
)
|
||||
print(
|
||||
f' Cached cost: ${cached_cost_total:.6f} ({totals["cached_tokens"]:,} × ${cached_cost:.6f})'
|
||||
)
|
||||
print(f' Total cost: ${total_cost:.6f}')
|
||||
print()
|
||||
|
||||
print('SUMMARY:')
|
||||
print(
|
||||
f' Total input tokens: {totals["input_tokens"] + totals["cached_tokens"]:,}'
|
||||
)
|
||||
print(f' Total output tokens: {totals["output_tokens"]:,}')
|
||||
print(f' Grand total tokens: {totals["total_tokens"]:,}')
|
||||
|
||||
return totals
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Aggregate token usage metrics from LLM completion files',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python aggregate_token_usage.py /path/to/completions
|
||||
python aggregate_token_usage.py /path/to/completions --input-cost 0.000001 --output-cost 0.000002
|
||||
python aggregate_token_usage.py /path/to/completions --input-cost 0.000001 --output-cost 0.000002 --cached-cost 0.0000005
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'directory_path', help='Path to directory containing completion files'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--input-cost',
|
||||
type=float,
|
||||
default=0.0,
|
||||
help='Cost per input token (default: 0.0)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output-cost',
|
||||
type=float,
|
||||
default=0.0,
|
||||
help='Cost per output token (default: 0.0)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--cached-cost',
|
||||
type=float,
|
||||
default=0.0,
|
||||
help='Cost per cached token (default: 0.0)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate directory path
|
||||
if not os.path.exists(args.directory_path):
|
||||
print(f"Error: Directory '{args.directory_path}' does not exist.")
|
||||
return 1
|
||||
|
||||
if not os.path.isdir(args.directory_path):
|
||||
print(f"Error: '{args.directory_path}' is not a directory.")
|
||||
return 1
|
||||
|
||||
# Run aggregation
|
||||
try:
|
||||
aggregate_token_usage(
|
||||
args.directory_path, args.input_cost, args.output_cost, args.cached_cost
|
||||
)
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f'Error during aggregation: {e}')
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
@@ -668,8 +668,23 @@ def is_fatal_runtime_error(error: str | None) -> bool:
|
||||
|
||||
|
||||
def get_metrics(state: State) -> dict[str, Any]:
|
||||
"""Extract metrics from the state."""
|
||||
metrics = state.metrics.get() if state.metrics else {}
|
||||
"""Extract metrics for evaluations.
|
||||
|
||||
Prefer ConversationStats (source of truth) and fall back to state.metrics for
|
||||
backward compatibility.
|
||||
"""
|
||||
metrics: dict[str, Any]
|
||||
try:
|
||||
if getattr(state, 'conversation_stats', None):
|
||||
combined = state.conversation_stats.get_combined_metrics()
|
||||
metrics = combined.get()
|
||||
elif getattr(state, 'metrics', None):
|
||||
metrics = state.metrics.get()
|
||||
else:
|
||||
metrics = {}
|
||||
except Exception:
|
||||
metrics = state.metrics.get() if getattr(state, 'metrics', None) else {}
|
||||
|
||||
metrics['condenser'] = get_condensation_metadata(state)
|
||||
return metrics
|
||||
|
||||
@@ -688,3 +703,79 @@ def get_default_sandbox_config_for_eval() -> SandboxConfig:
|
||||
remote_runtime_enable_retries=True,
|
||||
remote_runtime_class='sysbox',
|
||||
)
|
||||
|
||||
|
||||
def get_openhands_config_for_eval(
|
||||
metadata: EvalMetadata | None = None,
|
||||
sandbox_config: SandboxConfig | None = None,
|
||||
runtime: str | None = None,
|
||||
max_iterations: int | None = None,
|
||||
default_agent: str | None = None,
|
||||
enable_browser: bool = False,
|
||||
workspace_base: str | None = None,
|
||||
workspace_mount_path: str | None = None,
|
||||
):
|
||||
"""Create an OpenHandsConfig with common patterns used across evaluation scripts.
|
||||
|
||||
This function provides a standardized way to create OpenHands configurations
|
||||
for evaluation runs, with sensible defaults that match the patterns used in
|
||||
most run_infer.py scripts. Individual evaluation scripts can override specific
|
||||
attributes as needed.
|
||||
|
||||
Args:
|
||||
metadata: EvalMetadata containing agent class, max iterations, etc.
|
||||
sandbox_config: Custom sandbox config. If None, uses get_default_sandbox_config_for_eval()
|
||||
runtime: Runtime type. If None, uses environment RUNTIME or 'docker'
|
||||
max_iterations: Max iterations for the agent. If None, uses metadata.max_iterations
|
||||
default_agent: Agent class name. If None, uses metadata.agent_class
|
||||
enable_browser: Whether to enable browser functionality
|
||||
workspace_base: Workspace base path. Defaults to None
|
||||
workspace_mount_path: Workspace mount path. Defaults to None
|
||||
|
||||
Returns:
|
||||
OpenHandsConfig: Configured for evaluation with eval-specific overrides applied
|
||||
"""
|
||||
# Defer import to avoid circular imports at module load time
|
||||
from openhands.core.config.openhands_config import (
|
||||
OpenHandsConfig as _OHConfig, # type: ignore
|
||||
)
|
||||
|
||||
# Use provided sandbox config or get default
|
||||
if sandbox_config is None:
|
||||
sandbox_config = get_default_sandbox_config_for_eval()
|
||||
|
||||
# Extract values from metadata if provided
|
||||
if metadata is not None:
|
||||
if max_iterations is None:
|
||||
max_iterations = metadata.max_iterations
|
||||
if default_agent is None:
|
||||
default_agent = metadata.agent_class
|
||||
|
||||
# Use environment runtime or default
|
||||
if runtime is None:
|
||||
runtime = os.environ.get('RUNTIME', 'docker')
|
||||
|
||||
# Provide sensible defaults if still None
|
||||
if default_agent is None:
|
||||
default_agent = 'CodeActAgent'
|
||||
if max_iterations is None:
|
||||
max_iterations = 50
|
||||
|
||||
# Always use repo-local .eval_sessions directory (absolute path)
|
||||
eval_store = os.path.abspath(os.path.join(os.getcwd(), '.eval_sessions'))
|
||||
|
||||
# Create the base config with evaluation-specific overrides
|
||||
config = _OHConfig(
|
||||
default_agent=default_agent,
|
||||
run_as_openhands=False,
|
||||
runtime=runtime,
|
||||
max_iterations=max_iterations,
|
||||
enable_browser=enable_browser,
|
||||
sandbox=sandbox_config,
|
||||
workspace_base=workspace_base,
|
||||
workspace_mount_path=workspace_mount_path,
|
||||
file_store='local',
|
||||
file_store_path=eval_store,
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
@@ -232,13 +232,16 @@ describe("RepositorySelectionForm", () => {
|
||||
renderForm();
|
||||
|
||||
const dropdown = await screen.findByTestId("repo-dropdown");
|
||||
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
|
||||
const input = dropdown.querySelector(
|
||||
'input[type="text"]',
|
||||
) as HTMLInputElement;
|
||||
expect(input).toBeInTheDocument();
|
||||
|
||||
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
|
||||
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
|
||||
"kubernetes/kubernetes",
|
||||
3,
|
||||
"github",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -268,13 +271,16 @@ describe("RepositorySelectionForm", () => {
|
||||
renderForm();
|
||||
|
||||
const dropdown = await screen.findByTestId("repo-dropdown");
|
||||
const input = dropdown.querySelector('input[type="text"]') as HTMLInputElement;
|
||||
const input = dropdown.querySelector(
|
||||
'input[type="text"]',
|
||||
) as HTMLInputElement;
|
||||
expect(input).toBeInTheDocument();
|
||||
|
||||
await userEvent.type(input, "https://github.com/kubernetes/kubernetes");
|
||||
expect(searchGitReposSpy).toHaveBeenLastCalledWith(
|
||||
"kubernetes/kubernetes",
|
||||
3,
|
||||
"github",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -79,6 +79,35 @@ describe("Content", () => {
|
||||
expect(screen.getByTestId("set-indicator")).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
it("should conditionally show security analyzer based on confirmation mode", async () => {
|
||||
renderLlmSettingsScreen();
|
||||
await screen.findByTestId("llm-settings-screen");
|
||||
|
||||
const confirmation = screen.getByTestId("enable-confirmation-mode-switch");
|
||||
|
||||
// Initially confirmation mode is false, so security analyzer should not be visible
|
||||
expect(confirmation).not.toBeChecked();
|
||||
expect(
|
||||
screen.queryByTestId("security-analyzer-input"),
|
||||
).not.toBeInTheDocument();
|
||||
|
||||
// Enable confirmation mode
|
||||
await userEvent.click(confirmation);
|
||||
expect(confirmation).toBeChecked();
|
||||
|
||||
// Security analyzer should now be visible
|
||||
screen.getByTestId("security-analyzer-input");
|
||||
|
||||
// Disable confirmation mode again
|
||||
await userEvent.click(confirmation);
|
||||
expect(confirmation).not.toBeChecked();
|
||||
|
||||
// Security analyzer should be hidden again
|
||||
expect(
|
||||
screen.queryByTestId("security-analyzer-input"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
describe("Advanced form", () => {
|
||||
@@ -107,7 +136,6 @@ describe("Content", () => {
|
||||
within(advancedForm).getByTestId("llm-api-key-input");
|
||||
within(advancedForm).getByTestId("llm-api-key-help-anchor-advanced");
|
||||
within(advancedForm).getByTestId("agent-input");
|
||||
within(advancedForm).getByTestId("enable-confirmation-mode-switch");
|
||||
within(advancedForm).getByTestId("enable-memory-condenser-switch");
|
||||
|
||||
await userEvent.click(advancedSwitch);
|
||||
@@ -130,9 +158,6 @@ describe("Content", () => {
|
||||
const baseUrl = screen.getByTestId("base-url-input");
|
||||
const apiKey = screen.getByTestId("llm-api-key-input");
|
||||
const agent = screen.getByTestId("agent-input");
|
||||
const confirmation = screen.getByTestId(
|
||||
"enable-confirmation-mode-switch",
|
||||
);
|
||||
const condensor = screen.getByTestId("enable-memory-condenser-switch");
|
||||
|
||||
expect(model).toHaveValue("openhands/claude-sonnet-4-20250514");
|
||||
@@ -140,15 +165,7 @@ describe("Content", () => {
|
||||
expect(apiKey).toHaveValue("");
|
||||
expect(apiKey).toHaveProperty("placeholder", "");
|
||||
expect(agent).toHaveValue("CodeActAgent");
|
||||
expect(confirmation).not.toBeChecked();
|
||||
expect(condensor).toBeChecked();
|
||||
|
||||
// check that security analyzer is present
|
||||
expect(
|
||||
screen.queryByTestId("security-analyzer-input"),
|
||||
).not.toBeInTheDocument();
|
||||
await userEvent.click(confirmation);
|
||||
screen.getByTestId("security-analyzer-input");
|
||||
});
|
||||
|
||||
it("should render the advanced form if existings settings are advanced", async () => {
|
||||
@@ -177,7 +194,7 @@ describe("Content", () => {
|
||||
agent: "CoActAgent",
|
||||
confirmation_mode: true,
|
||||
enable_default_condenser: false,
|
||||
security_analyzer: "mock-invariant",
|
||||
security_analyzer: "none",
|
||||
});
|
||||
|
||||
renderLlmSettingsScreen();
|
||||
@@ -203,7 +220,7 @@ describe("Content", () => {
|
||||
expect(agent).toHaveValue("CoActAgent");
|
||||
expect(confirmation).toBeChecked();
|
||||
expect(condensor).not.toBeChecked();
|
||||
expect(securityAnalyzer).toHaveValue("mock-invariant");
|
||||
expect(securityAnalyzer).toHaveValue("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -293,7 +310,7 @@ describe("Form submission", () => {
|
||||
// select security analyzer
|
||||
const securityAnalyzer = screen.getByTestId("security-analyzer-input");
|
||||
await userEvent.click(securityAnalyzer);
|
||||
const securityAnalyzerOption = screen.getByText("mock-invariant");
|
||||
const securityAnalyzerOption = screen.getByText("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
await userEvent.click(securityAnalyzerOption);
|
||||
|
||||
const submitButton = screen.getByTestId("submit-button");
|
||||
@@ -306,7 +323,7 @@ describe("Form submission", () => {
|
||||
agent: "CoActAgent",
|
||||
confirmation_mode: true,
|
||||
enable_default_condenser: false,
|
||||
security_analyzer: "mock-invariant",
|
||||
security_analyzer: null,
|
||||
}),
|
||||
);
|
||||
});
|
||||
@@ -375,9 +392,11 @@ describe("Form submission", () => {
|
||||
const baseUrl = await screen.findByTestId("base-url-input");
|
||||
const apiKey = await screen.findByTestId("llm-api-key-input");
|
||||
const agent = await screen.findByTestId("agent-input");
|
||||
const confirmation = await screen.findByTestId("enable-confirmation-mode-switch");
|
||||
const condensor = await screen.findByTestId("enable-memory-condenser-switch");
|
||||
|
||||
// Confirmation mode switch is now in basic settings, always visible
|
||||
const confirmation = await screen.findByTestId("enable-confirmation-mode-switch");
|
||||
|
||||
// enter custom model
|
||||
await userEvent.type(model, "-mini");
|
||||
expect(model).toHaveValue("openai/gpt-4o-mini");
|
||||
@@ -451,14 +470,17 @@ describe("Form submission", () => {
|
||||
// select security analyzer
|
||||
const securityAnalyzer = await screen.findByTestId("security-analyzer-input");
|
||||
await userEvent.click(securityAnalyzer);
|
||||
const securityAnalyzerOption = screen.getByText("mock-invariant");
|
||||
const securityAnalyzerOption = screen.getByText("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
await userEvent.click(securityAnalyzerOption);
|
||||
expect(securityAnalyzer).toHaveValue("mock-invariant");
|
||||
expect(securityAnalyzer).toHaveValue("SETTINGS$SECURITY_ANALYZER_NONE");
|
||||
|
||||
expect(submitButton).not.toBeDisabled();
|
||||
|
||||
await userEvent.clear(securityAnalyzer);
|
||||
expect(securityAnalyzer).toHaveValue("");
|
||||
// revert back to original value
|
||||
await userEvent.click(securityAnalyzer);
|
||||
const originalSecurityAnalyzerOption = screen.getByText("SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT");
|
||||
await userEvent.click(originalSecurityAnalyzerOption);
|
||||
expect(securityAnalyzer).toHaveValue("SETTINGS$SECURITY_ANALYZER_LLM_DEFAULT");
|
||||
expect(submitButton).toBeDisabled();
|
||||
});
|
||||
|
||||
@@ -552,7 +574,7 @@ describe("Form submission", () => {
|
||||
expect.objectContaining({
|
||||
llm_model: "openhands/claude-sonnet-4-20250514",
|
||||
llm_base_url: "",
|
||||
confirmation_mode: false,
|
||||
confirmation_mode: true, // Confirmation mode is now a basic setting, should be preserved
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -107,9 +107,7 @@ describe("Content", () => {
|
||||
expect(screen.queryByTestId("add-secret-button")).not.toBeInTheDocument(),
|
||||
);
|
||||
const button = await screen.findByTestId("connect-git-button");
|
||||
await userEvent.click(button);
|
||||
|
||||
screen.getByTestId("git-settings-screen");
|
||||
expect(button).toHaveAttribute("href", "/settings/integrations");
|
||||
});
|
||||
|
||||
it("should render an empty table when there are no existing secrets", async () => {
|
||||
|
||||
@@ -136,7 +136,7 @@ describe("Settings Screen", () => {
|
||||
"secrets",
|
||||
"api keys",
|
||||
];
|
||||
const sectionsToExclude = ["llm", "mcp"];
|
||||
const sectionsToExclude = ["llm"];
|
||||
|
||||
renderSettingsScreen();
|
||||
|
||||
|
||||
@@ -29,23 +29,5 @@ describe("hasAdvancedSettingsSet", () => {
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("CONFIRMATION_MODE is true", () => {
|
||||
expect(
|
||||
hasAdvancedSettingsSet({
|
||||
...DEFAULT_SETTINGS,
|
||||
CONFIRMATION_MODE: true,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("SECURITY_ANALYZER is set", () => {
|
||||
expect(
|
||||
hasAdvancedSettingsSet({
|
||||
...DEFAULT_SETTINGS,
|
||||
SECURITY_ANALYZER: "test",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
682
frontend/package-lock.json
generated
682
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "openhands-frontend",
|
||||
"version": "0.53.0",
|
||||
"version": "0.54.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
@@ -11,17 +11,17 @@
|
||||
"@heroui/use-infinite-scroll": "^2.2.10",
|
||||
"@microlink/react-json-view": "^1.26.2",
|
||||
"@monaco-editor/react": "^4.7.0-rc.0",
|
||||
"@react-router/node": "^7.8.0",
|
||||
"@react-router/serve": "^7.8.0",
|
||||
"@react-types/shared": "^3.31.0",
|
||||
"@react-router/node": "^7.8.2",
|
||||
"@react-router/serve": "^7.8.2",
|
||||
"@react-types/shared": "^3.32.0",
|
||||
"@reduxjs/toolkit": "^2.8.2",
|
||||
"@stripe/react-stripe-js": "^3.9.0",
|
||||
"@stripe/stripe-js": "^7.8.0",
|
||||
"@stripe/react-stripe-js": "^3.9.2",
|
||||
"@stripe/stripe-js": "^7.9.0",
|
||||
"@tailwindcss/postcss": "^4.1.12",
|
||||
"@tailwindcss/vite": "^4.1.12",
|
||||
"@tanstack/react-query": "^5.85.3",
|
||||
"@tanstack/react-query": "^5.85.5",
|
||||
"@uidotdev/usehooks": "^2.4.1",
|
||||
"@vitejs/plugin-react": "^5.0.0",
|
||||
"@vitejs/plugin-react": "^5.0.2",
|
||||
"@xterm/addon-fit": "^0.10.0",
|
||||
"@xterm/xterm": "^5.4.0",
|
||||
"axios": "^1.11.0",
|
||||
@@ -29,32 +29,32 @@
|
||||
"date-fns": "^4.1.0",
|
||||
"eslint-config-airbnb-typescript": "^18.0.0",
|
||||
"framer-motion": "^12.23.12",
|
||||
"i18next": "^25.3.6",
|
||||
"i18next": "^25.4.2",
|
||||
"i18next-browser-languagedetector": "^8.2.0",
|
||||
"i18next-http-backend": "^3.0.2",
|
||||
"isbot": "^5.1.29",
|
||||
"jose": "^6.0.12",
|
||||
"lucide-react": "^0.539.0",
|
||||
"isbot": "^5.1.30",
|
||||
"jose": "^6.1.0",
|
||||
"lucide-react": "^0.542.0",
|
||||
"monaco-editor": "^0.52.2",
|
||||
"posthog-js": "^1.260.1",
|
||||
"posthog-js": "^1.261.0",
|
||||
"react": "^19.1.1",
|
||||
"react-dom": "^19.1.1",
|
||||
"react-highlight": "^0.15.0",
|
||||
"react-hot-toast": "^2.5.1",
|
||||
"react-i18next": "^15.6.1",
|
||||
"react-hot-toast": "^2.6.0",
|
||||
"react-i18next": "^15.7.2",
|
||||
"react-icons": "^5.5.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"react-redux": "^9.2.0",
|
||||
"react-router": "^7.8.0",
|
||||
"react-router": "^7.8.2",
|
||||
"react-select": "^5.10.2",
|
||||
"react-syntax-highlighter": "^15.6.1",
|
||||
"react-syntax-highlighter": "^15.6.6",
|
||||
"react-textarea-autosize": "^8.5.9",
|
||||
"remark-breaks": "^4.0.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"sirv-cli": "^3.0.1",
|
||||
"socket.io-client": "^4.8.1",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"vite": "^7.1.1",
|
||||
"vite": "^7.1.3",
|
||||
"web-vitals": "^5.1.0",
|
||||
"ws": "^8.18.2"
|
||||
},
|
||||
@@ -88,17 +88,17 @@
|
||||
"@babel/traverse": "^7.28.3",
|
||||
"@babel/types": "^7.28.2",
|
||||
"@mswjs/socket.io-binding": "^0.2.0",
|
||||
"@playwright/test": "^1.54.2",
|
||||
"@react-router/dev": "^7.8.0",
|
||||
"@playwright/test": "^1.55.0",
|
||||
"@react-router/dev": "^7.8.2",
|
||||
"@tailwindcss/typography": "^0.5.16",
|
||||
"@tanstack/eslint-plugin-query": "^5.83.1",
|
||||
"@testing-library/dom": "^10.4.1",
|
||||
"@testing-library/jest-dom": "^6.7.0",
|
||||
"@testing-library/jest-dom": "^6.8.0",
|
||||
"@testing-library/react": "^16.3.0",
|
||||
"@testing-library/user-event": "^14.6.1",
|
||||
"@types/node": "^24.2.0",
|
||||
"@types/react": "^19.1.9",
|
||||
"@types/react-dom": "^19.1.7",
|
||||
"@types/node": "^24.3.0",
|
||||
"@types/react": "^19.1.12",
|
||||
"@types/react-dom": "^19.1.9",
|
||||
"@types/react-highlight": "^0.12.8",
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@types/ws": "^8.18.1",
|
||||
@@ -117,16 +117,16 @@
|
||||
"eslint-plugin-prettier": "^5.5.4",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-react-hooks": "^4.6.2",
|
||||
"eslint-plugin-unused-imports": "^4.1.4",
|
||||
"eslint-plugin-unused-imports": "^4.2.0",
|
||||
"husky": "^9.1.7",
|
||||
"jsdom": "^26.1.0",
|
||||
"lint-staged": "^16.1.4",
|
||||
"msw": "^2.6.6",
|
||||
"prettier": "^3.6.2",
|
||||
"stripe": "^18.4.0",
|
||||
"stripe": "^18.5.0",
|
||||
"tailwindcss": "^4.1.8",
|
||||
"typescript": "^5.9.2",
|
||||
"vite-plugin-svgr": "^4.2.0",
|
||||
"vite-plugin-svgr": "^4.5.0",
|
||||
"vite-tsconfig-paths": "^5.1.4",
|
||||
"vitest": "^3.0.2"
|
||||
},
|
||||
|
||||
@@ -726,6 +726,27 @@ class OpenHands {
|
||||
);
|
||||
return data;
|
||||
}
|
||||
|
||||
static async getMicroagentManagementConversations(
|
||||
selectedRepository: string,
|
||||
pageId?: string,
|
||||
limit: number = 100,
|
||||
): Promise<Conversation[]> {
|
||||
const params: Record<string, string | number> = {
|
||||
limit,
|
||||
selected_repository: selectedRepository,
|
||||
};
|
||||
|
||||
if (pageId) {
|
||||
params.page_id = pageId;
|
||||
}
|
||||
|
||||
const { data } = await openHands.get<ResultSet<Conversation>>(
|
||||
"/api/microagent-management/conversations",
|
||||
{ params },
|
||||
);
|
||||
return data.results;
|
||||
}
|
||||
}
|
||||
|
||||
export default OpenHands;
|
||||
|
||||
@@ -10,6 +10,7 @@ function PauseIcon() {
|
||||
stroke="currentColor"
|
||||
className="w-5 h-5"
|
||||
>
|
||||
<circle cx="12" cy="12" r="10" fill="#e5e7eb" />
|
||||
<path
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { useMemo } from "react";
|
||||
import { StylesConfig } from "react-select";
|
||||
import { Provider } from "../../types/settings";
|
||||
import { ReactSelectDropdown, SelectOption } from "./react-select-dropdown";
|
||||
|
||||
@@ -11,6 +12,8 @@ export interface GitProviderDropdownProps {
|
||||
disabled?: boolean;
|
||||
isLoading?: boolean;
|
||||
onChange?: (provider: Provider | null) => void;
|
||||
classNamePrefix?: string;
|
||||
styles?: StylesConfig<SelectOption, false>;
|
||||
}
|
||||
|
||||
export function GitProviderDropdown({
|
||||
@@ -22,6 +25,8 @@ export function GitProviderDropdown({
|
||||
disabled = false,
|
||||
isLoading = false,
|
||||
onChange,
|
||||
classNamePrefix,
|
||||
styles,
|
||||
}: GitProviderDropdownProps) {
|
||||
const options: SelectOption[] = useMemo(
|
||||
() =>
|
||||
@@ -53,6 +58,8 @@ export function GitProviderDropdown({
|
||||
isSearchable={false}
|
||||
isLoading={isLoading}
|
||||
onChange={handleChange}
|
||||
classNamePrefix={classNamePrefix}
|
||||
styles={styles}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import { useCallback, useMemo, useRef } from "react";
|
||||
import { useCallback, useMemo, useState } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Provider } from "../../types/settings";
|
||||
import { useGitRepositories } from "../../hooks/query/use-git-repositories";
|
||||
import { useSearchRepositories } from "../../hooks/query/use-search-repositories";
|
||||
import { useDebounce } from "../../hooks/use-debounce";
|
||||
import OpenHands from "../../api/open-hands";
|
||||
import { GitRepository } from "../../types/git";
|
||||
import {
|
||||
@@ -19,10 +21,6 @@ export interface GitRepositoryDropdownProps {
|
||||
onChange?: (repository?: GitRepository) => void;
|
||||
}
|
||||
|
||||
interface SearchCache {
|
||||
[key: string]: GitRepository[];
|
||||
}
|
||||
|
||||
export function GitRepositoryDropdown({
|
||||
provider,
|
||||
value,
|
||||
@@ -33,6 +31,20 @@ export function GitRepositoryDropdown({
|
||||
onChange,
|
||||
}: GitRepositoryDropdownProps) {
|
||||
const { t } = useTranslation();
|
||||
const [searchInput, setSearchInput] = useState("");
|
||||
const debouncedSearchInput = useDebounce(searchInput, 300);
|
||||
|
||||
// Process search input to handle URLs
|
||||
const processedSearchInput = useMemo(() => {
|
||||
if (debouncedSearchInput.startsWith("https://")) {
|
||||
const match = debouncedSearchInput.match(
|
||||
/https:\/\/[^/]+\/([^/]+\/[^/]+)/,
|
||||
);
|
||||
return match ? match[1] : debouncedSearchInput;
|
||||
}
|
||||
return debouncedSearchInput;
|
||||
}, [debouncedSearchInput]);
|
||||
|
||||
const {
|
||||
data,
|
||||
fetchNextPage,
|
||||
@@ -45,6 +57,10 @@ export function GitRepositoryDropdown({
|
||||
enabled: !disabled,
|
||||
});
|
||||
|
||||
// Search query for processed input (handles URLs)
|
||||
const { data: searchData, isLoading: isSearchLoading } =
|
||||
useSearchRepositories(processedSearchInput, provider);
|
||||
|
||||
const allOptions: AsyncSelectOption[] = useMemo(
|
||||
() =>
|
||||
data?.pages
|
||||
@@ -58,90 +74,83 @@ export function GitRepositoryDropdown({
|
||||
[data],
|
||||
);
|
||||
|
||||
// Keep track of search results
|
||||
const searchCache = useRef<SearchCache>({});
|
||||
const searchOptions: AsyncSelectOption[] = useMemo(
|
||||
() =>
|
||||
searchData
|
||||
? searchData.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}))
|
||||
: [],
|
||||
[searchData],
|
||||
);
|
||||
|
||||
const selectedOption = useMemo(() => {
|
||||
// First check in loaded pages
|
||||
const option = allOptions.find((opt) => opt.value === value);
|
||||
if (option) return option;
|
||||
|
||||
// If not found, check in search cache
|
||||
const repo = Object.values(searchCache.current)
|
||||
.flat()
|
||||
.find((r) => r.id === value);
|
||||
|
||||
if (repo) {
|
||||
return {
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
};
|
||||
}
|
||||
// If not found, check in search results
|
||||
const searchOption = searchOptions.find((opt) => opt.value === value);
|
||||
if (searchOption) return searchOption;
|
||||
|
||||
return null;
|
||||
}, [allOptions, value]);
|
||||
}, [allOptions, searchOptions, value]);
|
||||
|
||||
const loadOptions = useCallback(
|
||||
async (inputValue: string): Promise<AsyncSelectOption[]> => {
|
||||
// Update search input to trigger debounced search
|
||||
setSearchInput(inputValue);
|
||||
|
||||
// If empty input, show all loaded options
|
||||
if (!inputValue.trim()) {
|
||||
return allOptions;
|
||||
}
|
||||
|
||||
// If it looks like a URL, pass the full URL to the API along with the provider
|
||||
// For very short inputs, do local filtering
|
||||
if (inputValue.length < 2) {
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(inputValue.toLowerCase()),
|
||||
);
|
||||
}
|
||||
|
||||
// Handle URL inputs by performing direct search
|
||||
if (inputValue.startsWith("https://")) {
|
||||
try {
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
inputValue,
|
||||
3,
|
||||
provider,
|
||||
);
|
||||
// Cache by URL to preserve mapping
|
||||
searchCache.current[inputValue] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}));
|
||||
} catch (_) {
|
||||
// Fallback: attempt with extracted path if server doesn't support URL search
|
||||
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
|
||||
if (match) {
|
||||
const repoName = match[1];
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
const match = inputValue.match(/https:\/\/[^/]+\/([^/]+\/[^/]+)/);
|
||||
if (match) {
|
||||
const repoName = match[1];
|
||||
try {
|
||||
// Perform direct search for URL-based inputs
|
||||
const repositories = await OpenHands.searchGitRepositories(
|
||||
repoName,
|
||||
3,
|
||||
provider,
|
||||
);
|
||||
searchCache.current[repoName] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
return repositories.map((repo) => ({
|
||||
value: repo.full_name,
|
||||
label: repo.full_name,
|
||||
data: repo,
|
||||
}));
|
||||
} catch (error) {
|
||||
// Fall back to local filtering if search fails
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(repoName.toLowerCase()),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For any other input, search via API for the selected provider
|
||||
if (inputValue.length >= 2) {
|
||||
const searchResults = await OpenHands.searchGitRepositories(
|
||||
inputValue,
|
||||
10,
|
||||
provider,
|
||||
);
|
||||
// Cache the search results
|
||||
searchCache.current[inputValue] = searchResults;
|
||||
return searchResults.map((repo) => ({
|
||||
value: repo.id,
|
||||
label: repo.full_name,
|
||||
}));
|
||||
// For regular text inputs, use hook-based search results if available
|
||||
if (searchOptions.length > 0 && processedSearchInput === inputValue) {
|
||||
return searchOptions;
|
||||
}
|
||||
|
||||
// For very short inputs, do local filtering
|
||||
// Fallback to local filtering while search is loading
|
||||
return allOptions.filter((option) =>
|
||||
option.label.toLowerCase().includes(inputValue.toLowerCase()),
|
||||
);
|
||||
},
|
||||
[allOptions, provider],
|
||||
[allOptions, searchOptions, processedSearchInput, provider],
|
||||
);
|
||||
|
||||
const handleChange = (option: AsyncSelectOption | null) => {
|
||||
@@ -157,9 +166,7 @@ export function GitRepositoryDropdown({
|
||||
|
||||
// If not found, check in search results
|
||||
if (!repo) {
|
||||
repo = Object.values(searchCache.current)
|
||||
.flat()
|
||||
.find((r) => r.id === option.value);
|
||||
repo = searchData?.find((r) => r.id === option.value);
|
||||
}
|
||||
|
||||
onChange?.(repo);
|
||||
@@ -182,7 +189,7 @@ export function GitRepositoryDropdown({
|
||||
errorMessage={errorMessage}
|
||||
disabled={disabled}
|
||||
isClearable={false}
|
||||
isLoading={isLoading || isLoading || isFetchingNextPage}
|
||||
isLoading={isLoading || isFetchingNextPage || isSearchLoading}
|
||||
cacheOptions
|
||||
defaultOptions={allOptions}
|
||||
onChange={handleChange}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { useMemo } from "react";
|
||||
import Select from "react-select";
|
||||
import Select, { StylesConfig } from "react-select";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { SelectOptionBase, getCustomStyles } from "./react-select-styles";
|
||||
|
||||
@@ -17,6 +17,8 @@ export interface ReactSelectDropdownProps {
|
||||
isSearchable?: boolean;
|
||||
isLoading?: boolean;
|
||||
onChange?: (option: SelectOption | null) => void;
|
||||
classNamePrefix?: string;
|
||||
styles?: StylesConfig<SelectOption, false>;
|
||||
}
|
||||
|
||||
export function ReactSelectDropdown({
|
||||
@@ -31,6 +33,8 @@ export function ReactSelectDropdown({
|
||||
isSearchable = true,
|
||||
isLoading = false,
|
||||
onChange,
|
||||
classNamePrefix,
|
||||
styles,
|
||||
}: ReactSelectDropdownProps) {
|
||||
const customStyles = useMemo(() => getCustomStyles<SelectOption>(), []);
|
||||
|
||||
@@ -46,8 +50,9 @@ export function ReactSelectDropdown({
|
||||
isSearchable={isSearchable}
|
||||
isLoading={isLoading}
|
||||
onChange={onChange}
|
||||
styles={customStyles}
|
||||
styles={styles || customStyles}
|
||||
className="w-full"
|
||||
classNamePrefix={classNamePrefix}
|
||||
/>
|
||||
{errorMessage && (
|
||||
<p className="text-red-500 text-sm mt-1">{errorMessage}</p>
|
||||
|
||||
@@ -90,3 +90,26 @@ export const getCustomStyles = <T extends SelectOptionBase>(): StylesConfig<
|
||||
color: "#B7BDC2", // tertiary-light
|
||||
}),
|
||||
});
|
||||
|
||||
export const getGitProviderMicroagentManagementCustomStyles = <
|
||||
T extends SelectOptionBase,
|
||||
>(): StylesConfig<T, false> => ({
|
||||
...getCustomStyles<T>(),
|
||||
control: (provided, state) => ({
|
||||
...provided,
|
||||
backgroundColor: state.isDisabled ? "#363636" : "#454545", // darker tertiary when disabled
|
||||
border: "1px solid #717888",
|
||||
borderRadius: "0.125rem",
|
||||
minHeight: "2.5rem",
|
||||
padding: "0 0.5rem",
|
||||
boxShadow: "none",
|
||||
opacity: state.isDisabled ? 0.6 : 1,
|
||||
cursor: state.isDisabled ? "not-allowed" : "pointer",
|
||||
"&:hover": {
|
||||
borderColor: "#717888",
|
||||
},
|
||||
"& .git-provider-dropdown__value-container": {
|
||||
padding: "2px 0",
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -9,6 +9,7 @@ import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipb
|
||||
import { anchor } from "../markdown/anchor";
|
||||
import { OpenHandsSourceType } from "#/types/core/base";
|
||||
import { paragraph } from "../markdown/paragraph";
|
||||
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
|
||||
|
||||
interface ChatMessageProps {
|
||||
type: OpenHandsSourceType;
|
||||
@@ -16,6 +17,7 @@ interface ChatMessageProps {
|
||||
actions?: Array<{
|
||||
icon: React.ReactNode;
|
||||
onClick: () => void;
|
||||
tooltip?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
@@ -66,17 +68,35 @@ export function ChatMessage({
|
||||
"items-center gap-1",
|
||||
)}
|
||||
>
|
||||
{actions?.map((action, index) => (
|
||||
<button
|
||||
key={index}
|
||||
type="button"
|
||||
onClick={action.onClick}
|
||||
className="button-base p-1 cursor-pointer"
|
||||
aria-label={`Action ${index + 1}`}
|
||||
>
|
||||
{action.icon}
|
||||
</button>
|
||||
))}
|
||||
{actions?.map((action, index) =>
|
||||
action.tooltip ? (
|
||||
<TooltipButton
|
||||
key={index}
|
||||
tooltip={action.tooltip}
|
||||
ariaLabel={action.tooltip}
|
||||
placement="top"
|
||||
>
|
||||
<button
|
||||
type="button"
|
||||
onClick={action.onClick}
|
||||
className="button-base p-1 cursor-pointer"
|
||||
aria-label={`Action ${index + 1}`}
|
||||
>
|
||||
{action.icon}
|
||||
</button>
|
||||
</TooltipButton>
|
||||
) : (
|
||||
<button
|
||||
key={index}
|
||||
type="button"
|
||||
onClick={action.onClick}
|
||||
className="button-base p-1 cursor-pointer"
|
||||
aria-label={`Action ${index + 1}`}
|
||||
>
|
||||
{action.icon}
|
||||
</button>
|
||||
),
|
||||
)}
|
||||
|
||||
<CopyToClipboardButton
|
||||
isHidden={!isHovering}
|
||||
|
||||
@@ -72,6 +72,9 @@ const getRecallObservationContent = (event: RecallObservation): string => {
|
||||
if (event.extras.repo_instructions) {
|
||||
content += `\n\n**Repository Instructions:**\n\n${event.extras.repo_instructions}`;
|
||||
}
|
||||
if (event.extras.conversation_instructions) {
|
||||
content += `\n\n**Conversation Instructions:**\n\n${event.extras.conversation_instructions}`;
|
||||
}
|
||||
if (event.extras.additional_agent_instructions) {
|
||||
content += `\n\n**Additional Instructions:**\n\n${event.extras.additional_agent_instructions}`;
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ interface EventMessageProps {
|
||||
actions?: Array<{
|
||||
icon: React.ReactNode;
|
||||
onClick: () => void;
|
||||
tooltip?: string;
|
||||
}>;
|
||||
isInLast10Actions: boolean;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import React from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { createPortal } from "react-dom";
|
||||
import { OpenHandsAction } from "#/types/core/actions";
|
||||
import { OpenHandsObservation } from "#/types/core/observations";
|
||||
@@ -24,6 +25,17 @@ import { AgentState } from "#/types/agent-state";
|
||||
import { getFirstPRUrl } from "#/utils/parse-pr-url";
|
||||
import MemoryIcon from "#/icons/memory_icon.svg?react";
|
||||
|
||||
const isErrorEvent = (evt: unknown): evt is { error: true; message: string } =>
|
||||
typeof evt === "object" &&
|
||||
evt !== null &&
|
||||
"error" in evt &&
|
||||
evt.error === true;
|
||||
|
||||
const isAgentStatusError = (evt: unknown): boolean =>
|
||||
isOpenHandsEvent(evt) &&
|
||||
isAgentStateChangeObservation(evt) &&
|
||||
evt.extras.agent_state === AgentState.ERROR;
|
||||
|
||||
interface MessagesProps {
|
||||
messages: (OpenHandsAction | OpenHandsObservation)[];
|
||||
isAwaitingUserConfirmation: boolean;
|
||||
@@ -31,8 +43,11 @@ interface MessagesProps {
|
||||
|
||||
export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
({ messages, isAwaitingUserConfirmation }) => {
|
||||
const { createConversationAndSubscribe, isPending } =
|
||||
useCreateConversationAndSubscribeMultiple();
|
||||
const {
|
||||
createConversationAndSubscribe,
|
||||
isPending,
|
||||
unsubscribeFromConversation,
|
||||
} = useCreateConversationAndSubscribeMultiple();
|
||||
const { getOptimisticUserMessage } = useOptimisticUserMessage();
|
||||
const { conversationId } = useConversationId();
|
||||
const { data: conversation } = useUserConversation(conversationId);
|
||||
@@ -48,6 +63,8 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
EventMicroagentStatus[]
|
||||
>([]);
|
||||
|
||||
const { t } = useTranslation();
|
||||
|
||||
const actionHasObservationPair = React.useCallback(
|
||||
(event: OpenHandsAction | OpenHandsObservation): boolean => {
|
||||
if (isOpenHandsAction(event)) {
|
||||
@@ -93,20 +110,6 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
|
||||
const handleMicroagentEvent = React.useCallback(
|
||||
(socketEvent: unknown, microagentConversationId: string) => {
|
||||
// Handle error events
|
||||
const isErrorEvent = (
|
||||
evt: unknown,
|
||||
): evt is { error: true; message: string } =>
|
||||
typeof evt === "object" &&
|
||||
evt !== null &&
|
||||
"error" in evt &&
|
||||
evt.error === true;
|
||||
|
||||
const isAgentStatusError = (evt: unknown): boolean =>
|
||||
isOpenHandsEvent(evt) &&
|
||||
isAgentStateChangeObservation(evt) &&
|
||||
evt.extras.agent_state === AgentState.ERROR;
|
||||
|
||||
if (isErrorEvent(socketEvent) || isAgentStatusError(socketEvent)) {
|
||||
setMicroagentStatuses((prev) =>
|
||||
prev.map((statusEntry) =>
|
||||
@@ -119,7 +122,11 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
isOpenHandsEvent(socketEvent) &&
|
||||
isAgentStateChangeObservation(socketEvent)
|
||||
) {
|
||||
if (socketEvent.extras.agent_state === AgentState.FINISHED) {
|
||||
// Handle completion states
|
||||
if (
|
||||
socketEvent.extras.agent_state === AgentState.FINISHED ||
|
||||
socketEvent.extras.agent_state === AgentState.AWAITING_USER_INPUT
|
||||
) {
|
||||
setMicroagentStatuses((prev) =>
|
||||
prev.map((statusEntry) =>
|
||||
statusEntry.conversationId === microagentConversationId
|
||||
@@ -127,6 +134,8 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
: statusEntry,
|
||||
),
|
||||
);
|
||||
|
||||
unsubscribeFromConversation(microagentConversationId);
|
||||
}
|
||||
} else if (
|
||||
isOpenHandsEvent(socketEvent) &&
|
||||
@@ -147,9 +156,27 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
unsubscribeFromConversation(microagentConversationId);
|
||||
} else {
|
||||
// For any other event, transition from WAITING to CREATING if still waiting
|
||||
setMicroagentStatuses((prev) => {
|
||||
const currentStatus = prev.find(
|
||||
(entry) => entry.conversationId === microagentConversationId,
|
||||
)?.status;
|
||||
|
||||
if (currentStatus === MicroagentStatus.WAITING) {
|
||||
return prev.map((statusEntry) =>
|
||||
statusEntry.conversationId === microagentConversationId
|
||||
? { ...statusEntry, status: MicroagentStatus.CREATING }
|
||||
: statusEntry,
|
||||
);
|
||||
}
|
||||
return prev; // No change needed
|
||||
});
|
||||
}
|
||||
},
|
||||
[setMicroagentStatuses],
|
||||
[setMicroagentStatuses, unsubscribeFromConversation],
|
||||
);
|
||||
|
||||
const handleLaunchMicroagent = (
|
||||
@@ -178,13 +205,13 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
},
|
||||
onSuccessCallback: (newConversationId: string) => {
|
||||
setShowLaunchMicroagentModal(false);
|
||||
// Update status with conversation ID
|
||||
// Update status with conversation ID - start with WAITING
|
||||
setMicroagentStatuses((prev) => [
|
||||
...prev.filter((status) => status.eventId !== selectedEventId),
|
||||
{
|
||||
eventId: selectedEventId,
|
||||
conversationId: newConversationId,
|
||||
status: MicroagentStatus.CREATING,
|
||||
status: MicroagentStatus.WAITING,
|
||||
},
|
||||
]);
|
||||
},
|
||||
@@ -219,6 +246,7 @@ export const Messages: React.FC<MessagesProps> = React.memo(
|
||||
setSelectedEventId(message.id);
|
||||
setShowLaunchMicroagentModal(true);
|
||||
},
|
||||
tooltip: t("MICROAGENT$ADD_TO_MEMORY"),
|
||||
},
|
||||
]
|
||||
: undefined
|
||||
|
||||
@@ -76,6 +76,10 @@ export function LaunchMicroagentModal({
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<span className="text-sm text-[#A3A3A3] font-normal leading-5">
|
||||
{t("MICROAGENT$DEFINITION")}
|
||||
</span>
|
||||
|
||||
<form
|
||||
data-testid="launch-microagent-modal"
|
||||
onSubmit={onSubmit}
|
||||
|
||||
@@ -19,6 +19,8 @@ export function MicroagentStatusIndicator({
|
||||
|
||||
const getStatusText = () => {
|
||||
switch (status) {
|
||||
case MicroagentStatus.WAITING:
|
||||
return t("MICROAGENT$STATUS_WAITING");
|
||||
case MicroagentStatus.CREATING:
|
||||
return t("MICROAGENT$STATUS_CREATING");
|
||||
case MicroagentStatus.COMPLETED:
|
||||
@@ -35,6 +37,8 @@ export function MicroagentStatusIndicator({
|
||||
|
||||
const getStatusIcon = () => {
|
||||
switch (status) {
|
||||
case MicroagentStatus.WAITING:
|
||||
return <Spinner size="sm" />;
|
||||
case MicroagentStatus.CREATING:
|
||||
return <Spinner size="sm" />;
|
||||
case MicroagentStatus.COMPLETED:
|
||||
|
||||
@@ -10,6 +10,11 @@ interface ConversationCreatedToastProps {
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
interface ConversationStartingToastProps {
|
||||
conversationId: string;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
function ConversationCreatedToast({
|
||||
conversationId,
|
||||
onClose,
|
||||
@@ -37,6 +42,33 @@ function ConversationCreatedToast({
|
||||
);
|
||||
}
|
||||
|
||||
function ConversationStartingToast({
|
||||
conversationId,
|
||||
onClose,
|
||||
}: ConversationStartingToastProps) {
|
||||
const { t } = useTranslation();
|
||||
return (
|
||||
<div className="flex items-start gap-2">
|
||||
<Spinner size="sm" />
|
||||
<div>
|
||||
{t("MICROAGENT$CONVERSATION_STARTING")}
|
||||
<br />
|
||||
<a
|
||||
href={`/conversations/${conversationId}`}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="underline"
|
||||
>
|
||||
{t("MICROAGENT$VIEW_CONVERSATION")}
|
||||
</a>
|
||||
</div>
|
||||
<button type="button" onClick={onClose}>
|
||||
<CloseIcon />
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
interface ConversationFinishedToastProps {
|
||||
conversationId: string;
|
||||
onClose: () => void;
|
||||
@@ -78,10 +110,18 @@ function ConversationErroredToast({
|
||||
errorMessage,
|
||||
onClose,
|
||||
}: ConversationErroredToastProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
// Check if the error message is a translation key
|
||||
const displayMessage =
|
||||
errorMessage === "MICROAGENT$UNKNOWN_ERROR"
|
||||
? t(errorMessage)
|
||||
: errorMessage;
|
||||
|
||||
return (
|
||||
<div className="flex items-start gap-2">
|
||||
<SuccessIndicator status="error" />
|
||||
<div>{errorMessage}</div>
|
||||
<div>{displayMessage}</div>
|
||||
<button type="button" onClick={onClose}>
|
||||
<CloseIcon />
|
||||
</button>
|
||||
@@ -136,3 +176,18 @@ export const renderConversationErroredToast = (
|
||||
duration: 5000,
|
||||
},
|
||||
);
|
||||
|
||||
export const renderConversationStartingToast = (conversationId: string) =>
|
||||
toast(
|
||||
(toastInstance) => (
|
||||
<ConversationStartingToast
|
||||
conversationId={conversationId}
|
||||
onClose={() => toast.dismiss(toastInstance.id)}
|
||||
/>
|
||||
),
|
||||
{
|
||||
...TOAST_OPTIONS,
|
||||
id: `starting-${conversationId}`,
|
||||
duration: 10000, // Show for 10 seconds or until dismissed
|
||||
},
|
||||
);
|
||||
|
||||
@@ -7,11 +7,10 @@ import { ConversationCard } from "../conversation-panel/conversation-card";
|
||||
import { Provider } from "#/types/settings";
|
||||
|
||||
interface ControlsProps {
|
||||
setSecurityOpen: (isOpen: boolean) => void;
|
||||
showSecurityLock: boolean;
|
||||
}
|
||||
|
||||
export function Controls({ setSecurityOpen, showSecurityLock }: ControlsProps) {
|
||||
export function Controls({ showSecurityLock }: ControlsProps) {
|
||||
const { data: conversation } = useActiveConversation();
|
||||
const [contextMenuOpen, setContextMenuOpen] = React.useState(false);
|
||||
|
||||
@@ -21,9 +20,7 @@ export function Controls({ setSecurityOpen, showSecurityLock }: ControlsProps) {
|
||||
<AgentControlBar />
|
||||
<AgentStatusBar />
|
||||
|
||||
{showSecurityLock && (
|
||||
<SecurityLock onClick={() => setSecurityOpen(true)} />
|
||||
)}
|
||||
{showSecurityLock && <SecurityLock />}
|
||||
</div>
|
||||
|
||||
<ConversationCard
|
||||
|
||||
@@ -1,17 +1,28 @@
|
||||
import { IoLockClosed } from "react-icons/io5";
|
||||
import { Tooltip } from "@heroui/react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Link } from "react-router";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
interface SecurityLockProps {
|
||||
onClick: () => void;
|
||||
}
|
||||
export function SecurityLock() {
|
||||
const { t } = useTranslation();
|
||||
|
||||
export function SecurityLock({ onClick }: SecurityLockProps) {
|
||||
return (
|
||||
<div
|
||||
className="cursor-pointer hover:opacity-80 transition-all"
|
||||
style={{ marginRight: "8px" }}
|
||||
onClick={onClick}
|
||||
<Tooltip
|
||||
content={
|
||||
<div className="max-w-xs p-2">
|
||||
{t(I18nKey.SETTINGS$CONFIRMATION_MODE_LOCK_TOOLTIP)}
|
||||
</div>
|
||||
}
|
||||
placement="top"
|
||||
>
|
||||
<IoLockClosed size={20} />
|
||||
</div>
|
||||
<Link
|
||||
to="/settings"
|
||||
className="mr-2 cursor-pointer hover:opacity-80 transition-all"
|
||||
aria-label={t(I18nKey.SETTINGS$TITLE)}
|
||||
>
|
||||
<IoLockClosed size={20} />
|
||||
</Link>
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -23,9 +23,9 @@ export function ConfirmStopModal({
|
||||
<ModalBackdrop>
|
||||
<ModalBody className="items-start border border-tertiary">
|
||||
<div className="flex flex-col gap-2">
|
||||
<BaseModalTitle title={t(I18nKey.CONVERSATION$CONFIRM_STOP)} />
|
||||
<BaseModalTitle title={t(I18nKey.CONVERSATION$CONFIRM_PAUSE)} />
|
||||
<BaseModalDescription
|
||||
description={t(I18nKey.CONVERSATION$STOP_WARNING)}
|
||||
description={t(I18nKey.CONVERSATION$PAUSE_WARNING)}
|
||||
/>
|
||||
</div>
|
||||
<div
|
||||
|
||||
@@ -129,7 +129,7 @@ export function ConversationCardContextMenu({
|
||||
|
||||
{onStop && (
|
||||
<ContextMenuListItem testId="stop-button" onClick={onStop}>
|
||||
<ContextMenuIconText icon={Power} text={t(I18nKey.BUTTON$STOP)} />
|
||||
<ContextMenuIconText icon={Power} text={t(I18nKey.BUTTON$PAUSE)} />
|
||||
</ContextMenuListItem>
|
||||
)}
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { ConversationStatus } from "#/types/conversation-status";
|
||||
import ArchivedIcon from "./state-indicators/archived.svg?react";
|
||||
import ErrorIcon from "./state-indicators/error.svg?react";
|
||||
import RunningIcon from "./state-indicators/running.svg?react";
|
||||
import StartingIcon from "./state-indicators/starting.svg?react";
|
||||
import StoppedIcon from "./state-indicators/stopped.svg?react";
|
||||
@@ -9,6 +11,8 @@ const CONVERSATION_STATUS_INDICATORS: Record<ConversationStatus, SVGIcon> = {
|
||||
STOPPED: StoppedIcon,
|
||||
RUNNING: RunningIcon,
|
||||
STARTING: StartingIcon,
|
||||
ARCHIVED: ArchivedIcon,
|
||||
ERROR: ErrorIcon,
|
||||
};
|
||||
|
||||
interface ConversationStateIndicatorProps {
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 0 24 24" width="24px" fill="#A7A9AC"><path d="M0 0h24v24H0V0z" fill="none"/><path d="M17 7h-4v1.9h4c1.71 0 3.1 1.39 3.1 3.1 0 1.43-.98 2.63-2.31 2.98l1.46 1.46C20.88 15.61 22 13.95 22 12c0-2.76-2.24-5-5-5zm-1 4h-2.19l2 2H16zM2 4.27l3.11 3.11C3.29 8.12 2 9.91 2 12c0 2.76 2.24 5 5 5h4v-1.9H7c-1.71 0-3.1-1.39-3.1-3.1 0-1.59 1.21-2.9 2.76-3.07L8.73 11H8v2h2.73L13 15.27V17h1.73l4.01 4L20 19.74 3.27 3 2 4.27z"/><path d="M0 24V0" fill="none"/></svg>
|
||||
|
After Width: | Height: | Size: 512 B |
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 0 24 24" width="24px" fill="#e7000b"><path d="M0 0h24v24H0z" fill="none"/><path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 15h-2v-2h2v2zm0-4h-2V7h2v6z"/></svg>
|
||||
|
After Width: | Height: | Size: 254 B |
@@ -17,7 +17,7 @@ export function MicroagentManagementAccordionTitle({
|
||||
<TooltipButton
|
||||
tooltip={repository.full_name}
|
||||
ariaLabel={repository.full_name}
|
||||
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[232px]"
|
||||
className="text-white text-base font-normal bg-transparent p-0 min-w-0 h-auto cursor-pointer truncate max-w-[194px] translate-y-[-1px]"
|
||||
testId="repository-name-tooltip"
|
||||
placement="bottom"
|
||||
>
|
||||
|
||||
@@ -7,8 +7,6 @@ import {
|
||||
} from "#/state/microagent-management-slice";
|
||||
import { RootState } from "#/store";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import PlusIcon from "#/icons/plus.svg?react";
|
||||
import { TooltipButton } from "#/components/shared/buttons/tooltip-button";
|
||||
|
||||
interface MicroagentManagementAddMicroagentButtonProps {
|
||||
repository: GitRepository;
|
||||
@@ -25,23 +23,22 @@ export function MicroagentManagementAddMicroagentButton({
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
|
||||
const handleClick = (e: React.MouseEvent<HTMLButtonElement>) => {
|
||||
e.stopPropagation();
|
||||
dispatch(setAddMicroagentModalVisible(!addMicroagentModalVisible));
|
||||
dispatch(setSelectedRepository(repository));
|
||||
};
|
||||
|
||||
return (
|
||||
<div onClick={handleClick}>
|
||||
<TooltipButton
|
||||
tooltip={t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
ariaLabel={t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
className="p-0 min-w-0 h-6 w-6 flex items-center justify-center bg-transparent cursor-pointer"
|
||||
testId="add-microagent-button"
|
||||
placement="bottom"
|
||||
>
|
||||
<PlusIcon width={22} height={22} />
|
||||
</TooltipButton>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClick}
|
||||
className="translate-y-[-1px]"
|
||||
data-testid="add-microagent-button"
|
||||
>
|
||||
<span className="text-sm font-normal leading-5 text-[#8480FF] cursor-pointer hover:text-[#6C63FF] transition-colors duration-200">
|
||||
{t(I18nKey.COMMON$ADD_MICROAGENT)}
|
||||
</span>
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import React, { useEffect, useState } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useDispatch, useSelector } from "react-redux";
|
||||
import { MicroagentManagementSidebar } from "./microagent-management-sidebar";
|
||||
import { MicroagentManagementMain } from "./microagent-management-main";
|
||||
@@ -25,6 +26,13 @@ import { GitRepository } from "#/types/git";
|
||||
import { queryClient } from "#/query-client-config";
|
||||
import { Provider } from "#/types/settings";
|
||||
import { MicroagentManagementLearnThisRepoModal } from "./microagent-management-learn-this-repo-modal";
|
||||
import {
|
||||
displaySuccessToast,
|
||||
displayErrorToast,
|
||||
} from "#/utils/custom-toast-handlers";
|
||||
import { getFirstPRUrl } from "#/utils/parse-pr-url";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { useUserProviders } from "#/hooks/use-user-providers";
|
||||
|
||||
// Handle error events
|
||||
const isErrorEvent = (evt: unknown): evt is { error: true; message: string } =>
|
||||
@@ -58,16 +66,10 @@ const getConversationInstructions = (
|
||||
gitProvider: Provider,
|
||||
) => `Create a microagent for the repository ${repositoryName} by following the steps below:
|
||||
|
||||
- Step 1: Create a markdown file inside the .openhands/microagents folder with the name of the microagent (The microagent must be created in the .openhands/microagents folder and should be able to perform the described task when triggered).
|
||||
|
||||
- This is the instructions about what the microagent should do: ${formData.query}
|
||||
|
||||
${
|
||||
- Step 1: Create a markdown file inside the .openhands/microagents folder with the name of the microagent (The microagent must be created in the .openhands/microagents folder and should be able to perform the described task when triggered). This is the instructions about what the microagent should do: ${formData.query}. ${
|
||||
formData.triggers && formData.triggers.length > 0
|
||||
? `
|
||||
- This is the triggers of the microagent: ${formData.triggers.join(", ")}
|
||||
`
|
||||
: "- Please be noted that the microagent doesn't have any triggers."
|
||||
? `This is the triggers of the microagent: ${formData.triggers.join(", ")}`
|
||||
: "Please be noted that the microagent doesn't have any triggers."
|
||||
}
|
||||
|
||||
- Step 2: Create a new branch for the repository ${repositoryName}, must avoid duplicated branches.
|
||||
@@ -84,16 +86,10 @@ const getUpdateConversationInstructions = (
|
||||
) => `Update the microagent for the repository ${repositoryName} by following the steps below:
|
||||
|
||||
|
||||
- Step 1: Update the microagent. This is the path of the microagent: ${formData.microagentPath} (The updated microagent must be in the .openhands/microagents folder and should be able to perform the described task when triggered).
|
||||
|
||||
- This is the updated instructions about what the microagent should do: ${formData.query}
|
||||
|
||||
${
|
||||
- Step 1: Update the microagent. This is the path of the microagent: ${formData.microagentPath} (The updated microagent must be in the .openhands/microagents folder and should be able to perform the described task when triggered). This is the updated instructions about what the microagent should do: ${formData.query}. ${
|
||||
formData.triggers && formData.triggers.length > 0
|
||||
? `
|
||||
- This is the triggers of the microagent: ${formData.triggers.join(", ")}
|
||||
`
|
||||
: "- Please be noted that the microagent doesn't have any triggers."
|
||||
? `This is the triggers of the microagent: ${formData.triggers.join(", ")}`
|
||||
: "Please be noted that the microagent doesn't have any triggers."
|
||||
}
|
||||
|
||||
- Step 2: Create a new branch for the repository ${repositoryName}, must avoid duplicated branches.
|
||||
@@ -112,6 +108,10 @@ export function MicroagentManagementContent() {
|
||||
learnThisRepoModalVisible,
|
||||
} = useSelector((state: RootState) => state.microagentManagement);
|
||||
|
||||
const { providers } = useUserProviders();
|
||||
|
||||
const { t } = useTranslation();
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { createConversationAndSubscribe, isPending } =
|
||||
@@ -159,6 +159,33 @@ export function MicroagentManagementContent() {
|
||||
? (selectedRepository as GitRepository).full_name
|
||||
: "";
|
||||
|
||||
// Check if agent is running and ready to work
|
||||
if (
|
||||
isOpenHandsEvent(socketEvent) &&
|
||||
isAgentStateChangeObservation(socketEvent) &&
|
||||
socketEvent.extras.agent_state === AgentState.RUNNING
|
||||
) {
|
||||
displaySuccessToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$OPENING_PR_TO_CREATE_MICROAGENT),
|
||||
);
|
||||
}
|
||||
|
||||
// Check if agent has finished and we have a PR
|
||||
if (isOpenHandsEvent(socketEvent) && isFinishAction(socketEvent)) {
|
||||
const prUrl = getFirstPRUrl(socketEvent.args.final_thought || "");
|
||||
if (!prUrl) {
|
||||
// Agent finished but no PR found
|
||||
displaySuccessToast(t(I18nKey.MICROAGENT_MANAGEMENT$PR_NOT_CREATED));
|
||||
}
|
||||
}
|
||||
|
||||
// Handle error events
|
||||
if (isErrorEvent(socketEvent) || isAgentStatusError(socketEvent)) {
|
||||
displayErrorToast(
|
||||
t(I18nKey.MICROAGENT_MANAGEMENT$ERROR_CREATING_MICROAGENT),
|
||||
);
|
||||
}
|
||||
|
||||
if (shouldInvalidateConversationsList(socketEvent)) {
|
||||
invalidateConversationsList(repositoryName);
|
||||
}
|
||||
@@ -250,6 +277,12 @@ export function MicroagentManagementContent() {
|
||||
const repositoryName = repository.full_name;
|
||||
const gitProvider = repository.git_provider;
|
||||
|
||||
const createMicroagent = {
|
||||
repo: repositoryName,
|
||||
git_provider: gitProvider,
|
||||
title: formData.query,
|
||||
};
|
||||
|
||||
// Launch a new conversation to help the user understand the repo
|
||||
createConversationAndSubscribe({
|
||||
query: formData.query,
|
||||
@@ -259,6 +292,7 @@ export function MicroagentManagementContent() {
|
||||
branch: formData.selectedBranch,
|
||||
gitProvider,
|
||||
},
|
||||
createMicroagent,
|
||||
onSuccessCallback: () => {
|
||||
hideLearnThisRepoModal();
|
||||
},
|
||||
@@ -289,11 +323,18 @@ export function MicroagentManagementContent() {
|
||||
</>
|
||||
);
|
||||
|
||||
const providersAreSet = providers.length > 0;
|
||||
|
||||
if (width < 1024) {
|
||||
return (
|
||||
<div className="w-full h-full flex flex-col gap-6">
|
||||
<div className="w-full rounded-lg border border-[#525252] bg-[#24272E] max-h-[494px] min-h-[494px]">
|
||||
<MicroagentManagementSidebar isSmallerScreen />
|
||||
{providersAreSet && (
|
||||
<MicroagentManagementSidebar
|
||||
isSmallerScreen
|
||||
providers={providers}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="w-full rounded-lg border border-[#525252] bg-[#24272E] flex-1 min-h-[494px]">
|
||||
<MicroagentManagementMain />
|
||||
@@ -305,7 +346,7 @@ export function MicroagentManagementContent() {
|
||||
|
||||
return (
|
||||
<div className="w-full h-full flex rounded-lg border border-[#525252] bg-[#24272E] overflow-hidden">
|
||||
<MicroagentManagementSidebar />
|
||||
{providersAreSet && <MicroagentManagementSidebar providers={providers} />}
|
||||
<div className="flex-1">
|
||||
<MicroagentManagementMain />
|
||||
</div>
|
||||
|
||||
@@ -8,7 +8,7 @@ import { BrandButton } from "../settings/brand-button";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { RootState } from "#/store";
|
||||
import XIcon from "#/icons/x.svg?react";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { cn, getRepoMdCreatePrompt } from "#/utils/utils";
|
||||
import { LearnThisRepoFormData } from "#/types/microagent-management";
|
||||
import { Branch } from "#/types/git";
|
||||
import { useRepositoryBranches } from "#/hooks/query/use-repository-branches";
|
||||
@@ -76,23 +76,25 @@ export function MicroagentManagementLearnThisRepoModal({
|
||||
const onSubmit = (event: React.FormEvent<HTMLFormElement>) => {
|
||||
event.preventDefault();
|
||||
|
||||
if (!query.trim()) {
|
||||
return;
|
||||
}
|
||||
const finalQuery = getRepoMdCreatePrompt(
|
||||
selectedRepository?.git_provider || "github",
|
||||
query.trim(),
|
||||
);
|
||||
|
||||
onConfirm({
|
||||
query: query.trim(),
|
||||
query: finalQuery,
|
||||
selectedBranch: selectedBranch?.name || "",
|
||||
});
|
||||
};
|
||||
|
||||
const handleConfirm = () => {
|
||||
if (!query.trim()) {
|
||||
return;
|
||||
}
|
||||
const finalQuery = getRepoMdCreatePrompt(
|
||||
selectedRepository?.git_provider || "github",
|
||||
query.trim(),
|
||||
);
|
||||
|
||||
onConfirm({
|
||||
query: query.trim(),
|
||||
query: finalQuery,
|
||||
selectedBranch: selectedBranch?.name || "",
|
||||
});
|
||||
};
|
||||
@@ -244,7 +246,6 @@ export function MicroagentManagementLearnThisRepoModal({
|
||||
onClick={handleConfirm}
|
||||
testId="confirm-button"
|
||||
isDisabled={
|
||||
!query.trim() ||
|
||||
isLoading ||
|
||||
isLoadingBranches ||
|
||||
!selectedBranch ||
|
||||
|
||||
@@ -59,8 +59,10 @@ export function MicroagentManagementMicroagentCard({
|
||||
if (runtimeStatus === "STATUS$ERROR") {
|
||||
return t(I18nKey.MICROAGENT$STATUS_ERROR);
|
||||
}
|
||||
if (conversationStatus === "RUNNING" && runtimeStatus === "STATUS$READY") {
|
||||
return t(I18nKey.MICROAGENT$STATUS_OPENING_PR);
|
||||
if (conversationStatus === "RUNNING") {
|
||||
return runtimeStatus === "STATUS$READY"
|
||||
? t(I18nKey.MICROAGENT$STATUS_OPENING_PR)
|
||||
: t(I18nKey.COMMON$STARTING);
|
||||
}
|
||||
return "";
|
||||
}, [conversationStatus, runtimeStatus, t, hasPr]);
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useEffect } from "react";
|
||||
import { useDispatch, useSelector } from "react-redux";
|
||||
import { Spinner } from "@heroui/react";
|
||||
import { MicroagentManagementMicroagentCard } from "./microagent-management-microagent-card";
|
||||
import { MicroagentManagementLearnThisRepo } from "./microagent-management-learn-this-repo";
|
||||
import { useRepositoryMicroagents } from "#/hooks/query/use-repository-microagents";
|
||||
import { useSearchConversations } from "#/hooks/query/use-search-conversations";
|
||||
import { useMicroagentManagementConversations } from "#/hooks/query/use-microagent-management-conversations";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import { RootState } from "#/store";
|
||||
import { setSelectedMicroagentItem } from "#/state/microagent-management-slice";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
|
||||
interface MicroagentManagementRepoMicroagentsProps {
|
||||
repository: GitRepository;
|
||||
@@ -22,6 +25,8 @@ export function MicroagentManagementRepoMicroagents({
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { full_name: repositoryName } = repository;
|
||||
|
||||
// Extract owner and repo from repositoryName (format: "owner/repo")
|
||||
@@ -37,9 +42,9 @@ export function MicroagentManagementRepoMicroagents({
|
||||
data: conversations,
|
||||
isLoading: isLoadingConversations,
|
||||
isError: isErrorConversations,
|
||||
} = useSearchConversations(
|
||||
} = useMicroagentManagementConversations(
|
||||
repositoryName,
|
||||
"microagent_management",
|
||||
undefined,
|
||||
1000,
|
||||
true,
|
||||
);
|
||||
@@ -65,6 +70,18 @@ export function MicroagentManagementRepoMicroagents({
|
||||
}
|
||||
}, [conversations]);
|
||||
|
||||
useEffect(
|
||||
() => () => {
|
||||
dispatch(
|
||||
setSelectedMicroagentItem({
|
||||
microagent: null,
|
||||
conversation: null,
|
||||
}),
|
||||
);
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
// Show loading only when both queries are loading
|
||||
const isLoading = isLoadingMicroagents || isLoadingConversations;
|
||||
|
||||
@@ -82,7 +99,7 @@ export function MicroagentManagementRepoMicroagents({
|
||||
// If there's an error with microagents, show the learn this repo component
|
||||
if (isError) {
|
||||
return (
|
||||
<div className="pb-4">
|
||||
<div>
|
||||
<MicroagentManagementLearnThisRepo repository={repository} />
|
||||
</div>
|
||||
);
|
||||
@@ -91,34 +108,47 @@ export function MicroagentManagementRepoMicroagents({
|
||||
const numberOfMicroagents = microagents?.length || 0;
|
||||
const numberOfConversations = conversations?.length || 0;
|
||||
const totalItems = numberOfMicroagents + numberOfConversations;
|
||||
const hasMicroagents = numberOfMicroagents > 0;
|
||||
const hasConversations = numberOfConversations > 0;
|
||||
|
||||
return (
|
||||
<div className="pb-4">
|
||||
<div>
|
||||
{totalItems === 0 && (
|
||||
<MicroagentManagementLearnThisRepo repository={repository} />
|
||||
)}
|
||||
|
||||
{/* Render microagents */}
|
||||
{numberOfMicroagents > 0 &&
|
||||
microagents?.map((microagent) => (
|
||||
<div key={microagent.name} className="pb-4 last:pb-0">
|
||||
<MicroagentManagementMicroagentCard
|
||||
microagent={microagent}
|
||||
repository={repository}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
{hasMicroagents && (
|
||||
<div className="flex flex-col">
|
||||
<span className="text-md text-white font-medium leading-5 mb-4">
|
||||
{t(I18nKey.MICROAGENT_MANAGEMENT$EXISTING_MICROAGENTS)}
|
||||
</span>
|
||||
{microagents?.map((microagent) => (
|
||||
<div key={microagent.name} className="pb-4 last:pb-0">
|
||||
<MicroagentManagementMicroagentCard
|
||||
microagent={microagent}
|
||||
repository={repository}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Render conversations */}
|
||||
{numberOfConversations > 0 &&
|
||||
conversations?.map((conversation) => (
|
||||
<div key={conversation.conversation_id} className="pb-4 last:pb-0">
|
||||
<MicroagentManagementMicroagentCard
|
||||
conversation={conversation}
|
||||
repository={repository}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
{hasConversations && (
|
||||
<div className={cn("flex flex-col", hasMicroagents && "mt-4")}>
|
||||
<span className="text-md text-white font-medium leading-5 mb-4">
|
||||
{t(I18nKey.MICROAGENT_MANAGEMENT$OPEN_MICROAGENT_PULL_REQUESTS)}
|
||||
</span>
|
||||
{conversations?.map((conversation) => (
|
||||
<div key={conversation.conversation_id} className="pb-4 last:pb-0">
|
||||
<MicroagentManagementMicroagentCard
|
||||
conversation={conversation}
|
||||
repository={repository}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
import { useState, useMemo } from "react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Accordion, AccordionItem } from "@heroui/react";
|
||||
import { MicroagentManagementRepoMicroagents } from "./microagent-management-repo-microagents";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { TabType } from "#/types/microagent-management";
|
||||
import { MicroagentManagementNoRepositories } from "./microagent-management-no-repositories";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { DOCUMENTATION_URL } from "#/utils/constants";
|
||||
import { MicroagentManagementAccordionTitle } from "./microagent-management-accordion-title";
|
||||
import { sanitizeQuery } from "#/utils/sanitize-query";
|
||||
|
||||
type MicroagentManagementRepositoriesProps = {
|
||||
repositories: GitRepository[];
|
||||
@@ -21,23 +18,9 @@ export function MicroagentManagementRepositories({
|
||||
tabType,
|
||||
}: MicroagentManagementRepositoriesProps) {
|
||||
const { t } = useTranslation();
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
|
||||
const numberOfRepoMicroagents = repositories.length;
|
||||
|
||||
// Filter repositories based on search query
|
||||
const filteredRepositories = useMemo(() => {
|
||||
if (!searchQuery.trim()) {
|
||||
return repositories;
|
||||
}
|
||||
|
||||
const sanitizedQuery = sanitizeQuery(searchQuery);
|
||||
return repositories.filter((repository) => {
|
||||
const sanitizedRepoName = sanitizeQuery(repository.full_name);
|
||||
return sanitizedRepoName.includes(sanitizedQuery);
|
||||
});
|
||||
}, [repositories, searchQuery]);
|
||||
|
||||
if (numberOfRepoMicroagents === 0) {
|
||||
if (tabType === "personal") {
|
||||
return (
|
||||
@@ -73,36 +56,19 @@ export function MicroagentManagementRepositories({
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-4 w-full">
|
||||
{/* Search Input */}
|
||||
<div className="flex flex-col gap-2 w-full">
|
||||
<label htmlFor="repository-search" className="sr-only">
|
||||
{t(I18nKey.COMMON$SEARCH_REPOSITORIES)}
|
||||
</label>
|
||||
<input
|
||||
id="repository-search"
|
||||
name="repository-search"
|
||||
type="text"
|
||||
placeholder={`${t(I18nKey.COMMON$SEARCH_REPOSITORIES)}...`}
|
||||
value={searchQuery}
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
className={cn(
|
||||
"bg-tertiary border border-[#717888] bg-[#454545] w-full rounded-sm p-2 placeholder:italic placeholder:text-tertiary-alt",
|
||||
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed",
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Repositories Accordion */}
|
||||
<Accordion
|
||||
variant="splitted"
|
||||
className="w-full px-0 gap-3"
|
||||
itemClasses={{
|
||||
base: "shadow-none bg-transparent border border-[#ffffff40] rounded-[6px] cursor-pointer",
|
||||
trigger: "cursor-pointer gap-1",
|
||||
base: "shadow-none bg-transparent cursor-pointer px-0",
|
||||
trigger: "cursor-pointer gap-2 py-3",
|
||||
indicator:
|
||||
"flex items-center justify-center p-0.5 pr-[3px] text-white hover:bg-[#454545] rounded transition-colors duration-200 rotate-180",
|
||||
}}
|
||||
selectionMode="multiple"
|
||||
>
|
||||
{filteredRepositories.map((repository) => (
|
||||
{repositories.map((repository) => (
|
||||
<AccordionItem
|
||||
key={repository.id}
|
||||
aria-label={repository.full_name}
|
||||
|
||||
@@ -1,59 +1,109 @@
|
||||
import { useEffect } from "react";
|
||||
import { useEffect, useState, useMemo } from "react";
|
||||
import { useDispatch } from "react-redux";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { Spinner } from "@heroui/react";
|
||||
import { MicroagentManagementSidebarHeader } from "./microagent-management-sidebar-header";
|
||||
import { MicroagentManagementSidebarTabs } from "./microagent-management-sidebar-tabs";
|
||||
import { useUserRepositories } from "#/hooks/query/use-user-repositories";
|
||||
import { useUserProviders } from "#/hooks/use-user-providers";
|
||||
import { useGitRepositories } from "#/hooks/query/use-git-repositories";
|
||||
import { GitProviderDropdown } from "#/components/common/git-provider-dropdown";
|
||||
import {
|
||||
setPersonalRepositories,
|
||||
setOrganizationRepositories,
|
||||
setRepositories,
|
||||
} from "#/state/microagent-management-slice";
|
||||
import { GitRepository } from "#/types/git";
|
||||
import { Provider } from "#/types/settings";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { sanitizeQuery } from "#/utils/sanitize-query";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { getGitProviderMicroagentManagementCustomStyles } from "#/components/common/react-select-styles";
|
||||
|
||||
interface MicroagentManagementSidebarProps {
|
||||
isSmallerScreen?: boolean;
|
||||
providers: Provider[];
|
||||
}
|
||||
|
||||
export function MicroagentManagementSidebar({
|
||||
isSmallerScreen = false,
|
||||
providers,
|
||||
}: MicroagentManagementSidebarProps) {
|
||||
const [selectedProvider, setSelectedProvider] = useState<Provider | null>(
|
||||
providers.length > 0 ? providers[0] : null,
|
||||
);
|
||||
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
|
||||
const dispatch = useDispatch();
|
||||
|
||||
const { t } = useTranslation();
|
||||
const { providers } = useUserProviders();
|
||||
const selectedProvider = providers.length > 0 ? providers[0] : null;
|
||||
const { data: repositories, isLoading } =
|
||||
useUserRepositories(selectedProvider);
|
||||
|
||||
const { data: repositories, isLoading } = useGitRepositories({
|
||||
provider: selectedProvider,
|
||||
pageSize: 200,
|
||||
enabled: !!selectedProvider,
|
||||
});
|
||||
|
||||
// Auto-select provider if there's only one
|
||||
useEffect(() => {
|
||||
if (providers.length > 0 && !selectedProvider) {
|
||||
setSelectedProvider(providers[0]);
|
||||
}
|
||||
}, [providers, selectedProvider]);
|
||||
|
||||
const handleProviderChange = (provider: Provider | null) => {
|
||||
setSelectedProvider(provider);
|
||||
setSearchQuery("");
|
||||
};
|
||||
|
||||
// Filter repositories based on search query
|
||||
const filteredRepositories = useMemo(() => {
|
||||
if (!repositories?.pages) return null;
|
||||
|
||||
// Flatten all pages to get all repositories
|
||||
const allRepositories = repositories.pages.flatMap((page) => page.data);
|
||||
|
||||
if (!searchQuery.trim()) {
|
||||
return allRepositories;
|
||||
}
|
||||
|
||||
const sanitizedQuery = sanitizeQuery(searchQuery);
|
||||
return allRepositories.filter((repository: GitRepository) => {
|
||||
const sanitizedRepoName = sanitizeQuery(repository.full_name);
|
||||
return sanitizedRepoName.includes(sanitizedQuery);
|
||||
});
|
||||
}, [repositories, searchQuery, selectedProvider]);
|
||||
|
||||
useEffect(() => {
|
||||
if (repositories?.pages) {
|
||||
const personalRepos: GitRepository[] = [];
|
||||
const organizationRepos: GitRepository[] = [];
|
||||
const otherRepos: GitRepository[] = [];
|
||||
|
||||
// Flatten all pages to get all repositories
|
||||
const allRepositories = repositories.pages.flatMap((page) => page.data);
|
||||
|
||||
allRepositories.forEach((repo: GitRepository) => {
|
||||
const hasOpenHandsSuffix = repo.full_name.endsWith("/.openhands");
|
||||
|
||||
if (repo.owner_type === "user" && hasOpenHandsSuffix) {
|
||||
personalRepos.push(repo);
|
||||
} else if (repo.owner_type === "organization" && hasOpenHandsSuffix) {
|
||||
organizationRepos.push(repo);
|
||||
} else {
|
||||
otherRepos.push(repo);
|
||||
}
|
||||
});
|
||||
|
||||
dispatch(setPersonalRepositories(personalRepos));
|
||||
dispatch(setOrganizationRepositories(organizationRepos));
|
||||
dispatch(setRepositories(otherRepos));
|
||||
if (!filteredRepositories?.length) {
|
||||
dispatch(setPersonalRepositories([]));
|
||||
dispatch(setOrganizationRepositories([]));
|
||||
dispatch(setRepositories([]));
|
||||
return;
|
||||
}
|
||||
}, [repositories, dispatch]);
|
||||
|
||||
const personalRepos: GitRepository[] = [];
|
||||
const organizationRepos: GitRepository[] = [];
|
||||
const otherRepos: GitRepository[] = [];
|
||||
|
||||
filteredRepositories.forEach((repo: GitRepository) => {
|
||||
const hasOpenHandsSuffix =
|
||||
selectedProvider === "gitlab"
|
||||
? repo.full_name.endsWith("/openhands-config")
|
||||
: repo.full_name.endsWith("/.openhands");
|
||||
|
||||
if (repo.owner_type === "user" && hasOpenHandsSuffix) {
|
||||
personalRepos.push(repo);
|
||||
} else if (repo.owner_type === "organization" && hasOpenHandsSuffix) {
|
||||
organizationRepos.push(repo);
|
||||
} else {
|
||||
otherRepos.push(repo);
|
||||
}
|
||||
});
|
||||
|
||||
dispatch(setPersonalRepositories(personalRepos));
|
||||
dispatch(setOrganizationRepositories(organizationRepos));
|
||||
dispatch(setRepositories(otherRepos));
|
||||
}, [filteredRepositories, selectedProvider, dispatch]);
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -63,6 +113,41 @@ export function MicroagentManagementSidebar({
|
||||
)}
|
||||
>
|
||||
<MicroagentManagementSidebarHeader />
|
||||
|
||||
{/* Provider Selection */}
|
||||
{providers.length > 1 && (
|
||||
<div className="mt-6">
|
||||
<GitProviderDropdown
|
||||
providers={providers}
|
||||
value={selectedProvider}
|
||||
placeholder="Select Provider"
|
||||
onChange={handleProviderChange}
|
||||
className="w-full"
|
||||
classNamePrefix="git-provider-dropdown"
|
||||
styles={getGitProviderMicroagentManagementCustomStyles()}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Search Input */}
|
||||
<div className="flex flex-col gap-2 w-full mt-6">
|
||||
<label htmlFor="repository-search" className="sr-only">
|
||||
{t(I18nKey.COMMON$SEARCH_REPOSITORIES)}
|
||||
</label>
|
||||
<input
|
||||
id="repository-search"
|
||||
name="repository-search"
|
||||
type="text"
|
||||
placeholder={`${t(I18nKey.COMMON$SEARCH_REPOSITORIES)}...`}
|
||||
value={searchQuery}
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
className={cn(
|
||||
"bg-tertiary border border-[#717888] bg-[#454545] w-full rounded-sm p-2 placeholder:italic placeholder:text-tertiary-alt",
|
||||
"disabled:bg-[#2D2F36] disabled:border-[#2D2F36] disabled:cursor-not-allowed h-10 box-shadow-none outline-none",
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{isLoading ? (
|
||||
<div className="flex flex-col items-center justify-center gap-4 flex-1">
|
||||
<Spinner size="sm" />
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { Tooltip } from "@heroui/react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import ConfirmIcon from "#/assets/confirm";
|
||||
import RejectIcon from "#/assets/reject";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { cn } from "#/utils/utils";
|
||||
|
||||
interface ActionTooltipProps {
|
||||
type: "confirm" | "reject";
|
||||
@@ -12,25 +11,35 @@ interface ActionTooltipProps {
|
||||
export function ActionTooltip({ type, onClick }: ActionTooltipProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const content =
|
||||
type === "confirm"
|
||||
? t(I18nKey.CHAT_INTERFACE$USER_CONFIRMED)
|
||||
: t(I18nKey.CHAT_INTERFACE$USER_REJECTED);
|
||||
const isConfirm = type === "confirm";
|
||||
|
||||
const ariaLabel = isConfirm
|
||||
? t(I18nKey.ACTION$CONFIRM)
|
||||
: t(I18nKey.ACTION$REJECT);
|
||||
|
||||
const content = isConfirm
|
||||
? t(I18nKey.CHAT_INTERFACE$USER_CONFIRMED)
|
||||
: t(I18nKey.CHAT_INTERFACE$USER_REJECTED);
|
||||
|
||||
const buttonLabel = isConfirm
|
||||
? `${t(I18nKey.CHAT_INTERFACE$INPUT_CONTINUE_MESSAGE)} ⌘↩`
|
||||
: `${t(I18nKey.BUTTON$CANCEL)} ⇧⌘⌫`;
|
||||
|
||||
return (
|
||||
<Tooltip content={content} closeDelay={100}>
|
||||
<button
|
||||
data-testid={`action-${type}-button`}
|
||||
type="button"
|
||||
aria-label={
|
||||
aria-label={ariaLabel}
|
||||
className={cn(
|
||||
"rounded px-2 h-6.5 text-sm font-medium leading-5 cursor-pointer hover:opacity-80",
|
||||
type === "confirm"
|
||||
? t(I18nKey.ACTION$CONFIRM)
|
||||
: t(I18nKey.ACTION$REJECT)
|
||||
}
|
||||
className="bg-tertiary rounded-full p-1 hover:bg-base-secondary"
|
||||
? "bg-tertiary text-white"
|
||||
: "bg-white text-[#0D0F11]",
|
||||
)}
|
||||
onClick={onClick}
|
||||
>
|
||||
{type === "confirm" ? <ConfirmIcon /> : <RejectIcon />}
|
||||
{buttonLabel}
|
||||
</button>
|
||||
</Tooltip>
|
||||
);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user