# Run evaluation on a PR, after releases, or manually name: Run Eval # Runs when a PR is labeled with one of the "run-eval-" labels, after releases, or manually triggered on: pull_request: types: [labeled] release: types: [published] workflow_dispatch: inputs: branch: description: 'Branch to evaluate' required: true default: 'main' eval_instances: description: 'Number of evaluation instances' required: true default: '50' type: choice options: - '1' - '2' - '50' - '100' reason: description: 'Reason for manual trigger' required: false default: '' env: # Environment variable for the master GitHub issue number where all evaluation results will be commented # This should be set to the issue number where you want all evaluation results to be posted MASTER_EVAL_ISSUE_NUMBER: ${{ vars.MASTER_EVAL_ISSUE_NUMBER || '0' }} jobs: trigger-job: name: Trigger remote eval job if: ${{ (github.event_name == 'pull_request' && (github.event.label.name == 'run-eval-1' || github.event.label.name == 'run-eval-2' || github.event.label.name == 'run-eval-50' || github.event.label.name == 'run-eval-100')) || github.event_name == 'release' || github.event_name == 'workflow_dispatch' }} runs-on: blacksmith-4vcpu-ubuntu-2204 steps: - name: Checkout branch uses: actions/checkout@v4 with: ref: ${{ github.event_name == 'pull_request' && github.head_ref || (github.event_name == 'workflow_dispatch' && github.event.inputs.branch) || github.ref }} - name: Set evaluation parameters id: eval_params run: | REPO_URL="https://github.com/${{ github.repository }}" echo "Repository URL: $REPO_URL" # Determine branch based on trigger type if [[ "${{ github.event_name }}" == "pull_request" ]]; then EVAL_BRANCH="${{ github.head_ref }}" echo "PR Branch: $EVAL_BRANCH" elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then EVAL_BRANCH="${{ github.event.inputs.branch }}" echo "Manual Branch: $EVAL_BRANCH" else # For release events, use the tag name or main branch EVAL_BRANCH="${{ github.ref_name }}" echo "Release Branch/Tag: $EVAL_BRANCH" fi # Determine evaluation instances based on trigger type if [[ "${{ github.event_name }}" == "pull_request" ]]; then if [[ "${{ github.event.label.name }}" == "run-eval-1" ]]; then EVAL_INSTANCES="1" elif [[ "${{ github.event.label.name }}" == "run-eval-2" ]]; then EVAL_INSTANCES="2" elif [[ "${{ github.event.label.name }}" == "run-eval-50" ]]; then EVAL_INSTANCES="50" elif [[ "${{ github.event.label.name }}" == "run-eval-100" ]]; then EVAL_INSTANCES="100" fi elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then EVAL_INSTANCES="${{ github.event.inputs.eval_instances }}" else # For release events, default to 50 instances EVAL_INSTANCES="50" fi echo "Evaluation instances: $EVAL_INSTANCES" echo "repo_url=$REPO_URL" >> $GITHUB_OUTPUT echo "eval_branch=$EVAL_BRANCH" >> $GITHUB_OUTPUT echo "eval_instances=$EVAL_INSTANCES" >> $GITHUB_OUTPUT - name: Trigger remote job run: | # Determine PR number for the remote evaluation system if [[ "${{ github.event_name }}" == "pull_request" ]]; then PR_NUMBER="${{ github.event.pull_request.number }}" else # For non-PR triggers, use the master issue number as PR number PR_NUMBER="${{ env.MASTER_EVAL_ISSUE_NUMBER }}" fi curl -X POST \ -H "Authorization: Bearer ${{ secrets.PAT_TOKEN }}" \ -H "Accept: application/vnd.github+json" \ -d "{\"ref\": \"main\", \"inputs\": {\"github-repo\": \"${{ steps.eval_params.outputs.repo_url }}\", \"github-branch\": \"${{ steps.eval_params.outputs.eval_branch }}\", \"pr-number\": \"${PR_NUMBER}\", \"eval-instances\": \"${{ steps.eval_params.outputs.eval_instances }}\"}}" \ https://api.github.com/repos/OpenHands/evaluation/actions/workflows/create-branch.yml/dispatches # Send Slack message if [[ "${{ github.event_name }}" == "pull_request" ]]; then TRIGGER_URL="https://github.com/${{ github.repository }}/pull/${{ github.event.pull_request.number }}" slack_text="PR $TRIGGER_URL has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances..." elif [[ "${{ github.event_name }}" == "release" ]]; then TRIGGER_URL="https://github.com/${{ github.repository }}/releases/tag/${{ github.ref_name }}" slack_text="Release $TRIGGER_URL has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances..." else TRIGGER_URL="https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" slack_text="Manual trigger (${{ github.event.inputs.reason || 'No reason provided' }}) has triggered evaluation on ${{ steps.eval_params.outputs.eval_instances }} instances for branch ${{ steps.eval_params.outputs.eval_branch }}..." fi curl -X POST -H 'Content-type: application/json' --data '{"text":"'"$slack_text"'"}' \ https://hooks.slack.com/services/${{ secrets.SLACK_TOKEN }} - name: Comment on issue/PR uses: KeisukeYamashita/create-comment@v1 with: # For PR triggers, comment on the PR. For other triggers, comment on the master issue number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || env.MASTER_EVAL_ISSUE_NUMBER }} unique: false comment: | **Evaluation Triggered** **Trigger:** ${{ github.event_name == 'pull_request' && format('Pull Request #{0}', github.event.pull_request.number) || (github.event_name == 'release' && 'Release') || format('Manual Trigger: {0}', github.event.inputs.reason || 'No reason provided') }} **Branch:** ${{ steps.eval_params.outputs.eval_branch }} **Instances:** ${{ steps.eval_params.outputs.eval_instances }} **Commit:** ${{ github.sha }} Running evaluation on the specified branch. Once eval is done, the results will be posted here.