Compare commits

...

1 Commits

Author SHA1 Message Date
openhands 3bc962b07f Fix blacksmith migration by removing eval-runner.yml 2025-03-19 14:33:58 +00:00
13 changed files with 128 additions and 63 deletions
+1 -1
View File
@@ -46,7 +46,7 @@ on:
jobs:
del_runs:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
permissions:
actions: write
contents: read
+6 -4
View File
@@ -24,20 +24,22 @@ jobs:
build:
if: github.repository == 'All-Hands-AI/OpenHands'
name: Build Docusaurus
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-node@v4
- uses: useblacksmith/setup-node@v5
with:
node-version: 18
cache: npm
cache-dependency-path: docs/package-lock.json
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
- name: Generate Python Docs
run: rm -rf docs/modules/python && pip install pydoc-markdown && pydoc-markdown
- name: Install dependencies
run: cd docs && npm ci
- name: Build website
@@ -52,7 +54,7 @@ jobs:
deploy:
if: github.ref == 'refs/heads/main' && github.repository == 'All-Hands-AI/OpenHands'
name: Deploy to GitHub Pages
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
# This job only runs on "main" so only run one of these jobs at a time
# otherwise it will fail if one is already running
concurrency:
+4 -4
View File
@@ -16,7 +16,7 @@ concurrency:
jobs:
test:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
@@ -25,18 +25,18 @@ jobs:
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Setup Node.js
uses: actions/setup-node@v4
uses: useblacksmith/setup-node@v5
with:
node-version: '22.x'
- name: Install poetry via pipx
run: pipx install poetry
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
cache: 'poetry'
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation
run: poetry install --without evaluation,llama-index
- name: Build Environment
run: make build
- name: Run tests
+2 -2
View File
@@ -21,7 +21,7 @@ jobs:
# Run frontend unit tests
fe-test:
name: FE Unit Tests
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
strategy:
matrix:
node-version: [20, 22]
@@ -30,7 +30,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
uses: useblacksmith/setup-node@v5
with:
node-version: ${{ matrix.node-version }}
- name: Install dependencies
+16 -16
View File
@@ -32,7 +32,7 @@ jobs:
# Builds the OpenHands Docker images
ghcr_build_app:
name: Build App Image
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
permissions:
contents: read
packages: write
@@ -80,7 +80,7 @@ jobs:
# Builds the runtime Docker images
ghcr_build_runtime:
name: Build Image
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
permissions:
contents: read
packages: write
@@ -108,11 +108,11 @@ jobs:
id: buildx
uses: docker/setup-buildx-action@v3
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
- name: Cache Poetry dependencies
uses: actions/cache@v4
uses: useblacksmith/cache@v5
with:
path: |
~/.cache/pypoetry
@@ -150,7 +150,7 @@ jobs:
verify_hash_equivalence_in_runtime_and_app:
name: Verify Hash Equivalence in Runtime and Docker images
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
needs: [ghcr_build_runtime, ghcr_build_app]
strategy:
fail-fast: false
@@ -161,7 +161,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Cache Poetry dependencies
uses: actions/cache@v4
uses: useblacksmith/cache@v5
with:
path: |
~/.cache/pypoetry
@@ -170,7 +170,7 @@ jobs:
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
- name: Install poetry via pipx
@@ -204,7 +204,7 @@ jobs:
test_runtime_root:
name: RT Unit Tests (Root)
needs: [ghcr_build_runtime]
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
strategy:
fail-fast: false
matrix:
@@ -226,7 +226,7 @@ jobs:
run: |
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
- name: Cache Poetry dependencies
uses: actions/cache@v4
uses: useblacksmith/cache@v5
with:
path: |
~/.cache/pypoetry
@@ -235,7 +235,7 @@ jobs:
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
- name: Install poetry via pipx
@@ -269,7 +269,7 @@ jobs:
# Run unit tests with the Docker runtime Docker images as openhands user
test_runtime_oh:
name: RT Unit Tests (openhands)
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
needs: [ghcr_build_runtime]
strategy:
matrix:
@@ -291,7 +291,7 @@ jobs:
run: |
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
- name: Cache Poetry dependencies
uses: actions/cache@v4
uses: useblacksmith/cache@v5
with:
path: |
~/.cache/pypoetry
@@ -300,7 +300,7 @@ jobs:
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: '3.12'
- name: Install poetry via pipx
@@ -338,7 +338,7 @@ jobs:
runtime_tests_check_success:
name: All Runtime Tests Passed
if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
needs: [test_runtime_root, test_runtime_oh, verify_hash_equivalence_in_runtime_and_app]
steps:
- name: All tests passed
@@ -347,7 +347,7 @@ jobs:
runtime_tests_check_fail:
name: All Runtime Tests Passed
if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
needs: [test_runtime_root, test_runtime_oh, verify_hash_equivalence_in_runtime_and_app]
steps:
- name: Some tests failed
@@ -358,7 +358,7 @@ jobs:
name: Update PR Description
if: github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork && github.actor != 'dependabot[bot]'
needs: [ghcr_build_runtime]
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- name: Checkout
uses: actions/checkout@v4
+73 -5
View File
@@ -18,7 +18,7 @@ env:
jobs:
run-integration-tests:
if: github.event.label.name == 'integration-test' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
permissions:
contents: "read"
id-token: "write"
@@ -35,13 +35,13 @@ jobs:
run: pipx install poetry
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
- name: Setup Node.js
uses: actions/setup-node@v4
uses: useblacksmith/setup-node@v5
with:
node-version: '22.x'
@@ -54,7 +54,7 @@ jobs:
Hi! I started running the integration tests on your PR. You will receive a comment with the results shortly.
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation
run: poetry install --without evaluation,llama-index
- name: Configure config.toml for testing with Haiku
env:
@@ -117,6 +117,68 @@ jobs:
echo "EOF" >> $GITHUB_ENV
# -------------------------------------------------------------
# Run DelegatorAgent tests for Haiku, limited to t01 and t02
- name: Wait a little bit (again)
run: sleep 5
- name: Configure config.toml for testing DelegatorAgent (Haiku)
env:
LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
MAX_ITERATIONS: 30
run: |
echo "[llm.eval]" > config.toml
echo "model = \"$LLM_MODEL\"" >> config.toml
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
echo "temperature = 0.0" >> config.toml
- name: Run integration test evaluation for DelegatorAgent (Haiku)
env:
SANDBOX_FORCE_REBUILD_RUNTIME: True
run: |
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_haiku_run'
# Find and export the delegator test results
REPORT_FILE_DELEGATOR_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/*haiku*_maxiter_30_N* -name "report.md" -type f | head -n 1)
echo "REPORT_FILE_DELEGATOR_HAIKU: $REPORT_FILE_DELEGATOR_HAIKU"
echo "INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU<<EOF" >> $GITHUB_ENV
cat $REPORT_FILE_DELEGATOR_HAIKU >> $GITHUB_ENV
echo >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
# -------------------------------------------------------------
# Run DelegatorAgent tests for DeepSeek, limited to t01 and t02
- name: Wait a little bit (again)
run: sleep 5
- name: Configure config.toml for testing DelegatorAgent (DeepSeek)
env:
LLM_MODEL: "litellm_proxy/deepseek-chat"
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
MAX_ITERATIONS: 30
run: |
echo "[llm.eval]" > config.toml
echo "model = \"$LLM_MODEL\"" >> config.toml
echo "api_key = \"$LLM_API_KEY\"" >> config.toml
echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
echo "temperature = 0.0" >> config.toml
- name: Run integration test evaluation for DelegatorAgent (DeepSeek)
env:
SANDBOX_FORCE_REBUILD_RUNTIME: True
run: |
poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD DelegatorAgent '' 30 $N_PROCESSES "t01_fix_simple_typo,t02_add_bash_hello" 'delegator_deepseek_run'
# Find and export the delegator test results
REPORT_FILE_DELEGATOR_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/DelegatorAgent/deepseek*_maxiter_30_N* -name "report.md" -type f | head -n 1)
echo "REPORT_FILE_DELEGATOR_DEEPSEEK: $REPORT_FILE_DELEGATOR_DEEPSEEK"
echo "INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK<<EOF" >> $GITHUB_ENV
cat $REPORT_FILE_DELEGATOR_DEEPSEEK >> $GITHUB_ENV
echo >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
# -------------------------------------------------------------
# Run VisualBrowsingAgent tests for DeepSeek, limited to t05 and t06
- name: Wait a little bit (again)
run: sleep 5
@@ -151,7 +213,7 @@ jobs:
run: |
TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/VisualBrowsingAgent/* # Only include the actual result directories
tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* integration_tests/DelegatorAgent/* integration_tests/VisualBrowsingAgent/* # Only include the actual result directories
- name: Upload evaluation results as artifact
uses: actions/upload-artifact@v4
@@ -192,6 +254,12 @@ jobs:
**Integration Tests Report (DeepSeek)**
DeepSeek LLM Test Results:
${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
---
**Integration Tests Report Delegator (Haiku)**
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_HAIKU }}
---
**Integration Tests Report Delegator (DeepSeek)**
${{ env.INTEGRATION_TEST_REPORT_DELEGATOR_DEEPSEEK }}
---
**Integration Tests Report VisualBrowsing (DeepSeek)**
${{ env.INTEGRATION_TEST_REPORT_VISUALBROWSING_DEEPSEEK }}
+4 -4
View File
@@ -9,7 +9,7 @@ jobs:
lint-fix-frontend:
if: github.event.label.name == 'lint-fix'
name: Fix frontend linting issues
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
permissions:
contents: write
pull-requests: write
@@ -22,7 +22,7 @@ jobs:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Node.js 20
uses: actions/setup-node@v4
uses: useblacksmith/setup-node@v5
with:
node-version: 20
- name: Install frontend dependencies
@@ -52,7 +52,7 @@ jobs:
lint-fix-python:
if: github.event.label.name == 'lint-fix'
name: Fix Python linting issues
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
permissions:
contents: write
pull-requests: write
@@ -65,7 +65,7 @@ jobs:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: 3.12
cache: 'pip'
+6 -6
View File
@@ -19,11 +19,11 @@ jobs:
# Run lint on the frontend code
lint-frontend:
name: Lint frontend
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
- name: Install Node.js 20
uses: actions/setup-node@v4
uses: useblacksmith/setup-node@v5
with:
node-version: 20
- name: Install dependencies
@@ -39,13 +39,13 @@ jobs:
# Run lint on the python code
lint-python:
name: Lint python
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: 3.12
cache: 'pip'
@@ -57,11 +57,11 @@ jobs:
# Check version consistency across documentation
check-version-consistency:
name: Check version consistency
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
- name: Set up python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: 3.12
- name: Run version consistency check
+7 -12
View File
@@ -74,13 +74,13 @@ jobs:
(github.event.review.author_association == 'OWNER' || github.event.review.author_association == 'COLLABORATOR' || github.event.review.author_association == 'MEMBER')
)
)
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: "3.12"
@@ -106,7 +106,7 @@ jobs:
contains(github.event.review.body, '@openhands-agent-exp')
)
)
uses: actions/cache@v4
uses: useblacksmith/cache@v5
with:
path: ${{ env.pythonLocation }}/lib/python3.12/site-packages/*
key: ${{ runner.os }}-pip-openhands-resolver-${{ hashFiles('/tmp/requirements.txt') }}
@@ -295,12 +295,11 @@ jobs:
if: always()
env:
AGENT_RESPONDED: ${{ env.AGENT_RESPONDED || 'false' }}
ISSUE_NUMBER: ${{ env.ISSUE_NUMBER }}
with:
github-token: ${{ secrets.PAT_TOKEN || github.token }}
script: |
const fs = require('fs');
const issueNumber = process.env.ISSUE_NUMBER;
const issueNumber = ${{ env.ISSUE_NUMBER }};
let logContent = '';
try {
@@ -331,15 +330,13 @@ jobs:
if: always() # Comment on issue even if the previous steps fail
env:
AGENT_RESPONDED: ${{ env.AGENT_RESPONDED || 'false' }}
ISSUE_NUMBER: ${{ env.ISSUE_NUMBER }}
RESOLUTION_SUCCESS: ${{ steps.check_result.outputs.RESOLUTION_SUCCESS }}
with:
github-token: ${{ secrets.PAT_TOKEN || github.token }}
script: |
const fs = require('fs');
const path = require('path');
const issueNumber = process.env.ISSUE_NUMBER;
const success = process.env.RESOLUTION_SUCCESS === 'true';
const issueNumber = ${{ env.ISSUE_NUMBER }};
const success = ${{ steps.check_result.outputs.RESOLUTION_SUCCESS }};
let prNumber = '';
let branchName = '';
@@ -404,12 +401,10 @@ jobs:
- name: Fallback Error Comment
uses: actions/github-script@v7
if: ${{ env.AGENT_RESPONDED == 'false' }} # Only run if no conditions were met in previous steps
env:
ISSUE_NUMBER: ${{ env.ISSUE_NUMBER }}
with:
github-token: ${{ secrets.PAT_TOKEN || github.token }}
script: |
const issueNumber = process.env.ISSUE_NUMBER;
const issueNumber = ${{ env.ISSUE_NUMBER }};
github.rest.issues.createComment({
issue_number: issueNumber,
+5 -5
View File
@@ -19,7 +19,7 @@ jobs:
# Run python unit tests on Linux
test-on-linux:
name: Python Unit Tests on Linux
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
env:
INSTALL_DOCKER: '0' # Set to '0' to skip Docker installation
strategy:
@@ -33,22 +33,22 @@ jobs:
- name: Install tmux
run: sudo apt-get update && sudo apt-get install -y tmux
- name: Setup Node.js
uses: actions/setup-node@v4
uses: useblacksmith/setup-node@v5
with:
node-version: '22.x'
- name: Install poetry via pipx
run: pipx install poetry
- name: Set up Python
uses: actions/setup-python@v5
uses: useblacksmith/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
- name: Install Python dependencies using Poetry
run: poetry install --without evaluation
run: poetry install --without evaluation,llama-index
- name: Build Environment
run: make build
- name: Run Tests
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit
run: poetry run pytest --forked -n auto --cov=openhands --cov-report=xml -svv ./tests/unit --ignore=tests/unit/test_long_term_memory.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
env:
+2 -2
View File
@@ -12,10 +12,10 @@ on:
jobs:
release:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: useblacksmith/setup-python@v6
with:
python-version: 3.12
- name: Install Poetry
+1 -1
View File
@@ -10,7 +10,7 @@ jobs:
trigger-job:
name: Trigger remote eval job
if: ${{ github.event.label.name == 'run-eval-xs' || github.event.label.name == 'run-eval-s' || github.event.label.name == 'run-eval-m' }}
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- name: Checkout PR branch
+1 -1
View File
@@ -8,7 +8,7 @@ on:
jobs:
stale:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2204
steps:
- uses: actions/stale@v9
with: