Add eval_id and sync Skill Tree with Frontend(#5287)

Add eval_id to skill tree Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
2026-01-09 15:17:59 -05:00 · 2023-09-21 13:36:17 -07:00
parent 295787c948
commit f4e7b1c61c
63 changed files with 2010 additions and 792 deletions
--- a/.github/workflows/benchmark-ci.yml
+++ b/.github/workflows/benchmark-ci.yml
@@ -102,10 +102,9 @@ jobs:
          sh run &
          sleep 20
          set +e # Ignore non-zero exit codes and continue execution
-          echo "Running the following command: ${prefix}agbenchmark --maintain --mock"
-          prefix="poetry run "
+          echo "Running the following command: poetry run agbenchmark --maintain --mock"

-          ${prefix}agbenchmark --maintain --mock
+          poetry run agbenchmark --maintain --mock
          EXIT_CODE=$?
          set -e  # Stop ignoring non-zero exit codes
          # Check if the exit code was 5, and if so, exit with 0 instead
@@ -113,23 +112,37 @@ jobs:
            echo "regression_tests.json is empty."
          fi

-          echo "Running the following command: ${prefix}agbenchmark --mock"
-          ${prefix}agbenchmark --mock
+          echo "Running the following command: poetry run agbenchmark --mock"
+          poetry run agbenchmark --mock

-          echo "Running the following command: ${prefix}agbenchmark --mock --category=retrieval"
-          ${prefix}agbenchmark --mock --category=retrieval
+          echo "Running the following command: poetry run agbenchmark --mock --category=retrieval"
+          poetry run agbenchmark --mock --category=retrieval

-          echo "Running the following command: ${prefix}agbenchmark --mock --category=interface"
-          ${prefix}agbenchmark --mock --category=interface
+          echo "Running the following command: poetry run agbenchmark --mock --category=interface"
+          poetry run agbenchmark --mock --category=interface

-          echo "Running the following command: ${prefix}agbenchmark --mock --category=coding"
-          ${prefix}agbenchmark --mock --category=coding
+          echo "Running the following command: poetry run agbenchmark --mock --category=coding"
+          poetry run agbenchmark --mock --category=coding

-          echo "Running the following command: ${prefix}agbenchmark --test=WriteFile"
-          ${prefix}agbenchmark --test=WriteFile
-          sh run_benchmark &
+          echo "Running the following command: poetry run agbenchmark --test=WriteFile"
+          poetry run agbenchmark --test=WriteFile
+          sh run_benchmark serve &
+          sleep 10
          cd ../../benchmark
          poetry install
-          poetry run pytest tests
+          echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
+          export BUILD_SKILL_TREE=true
+
+          poetry run agbenchmark --mock
+          poetry run pytest -vv -s tests
+          
+          CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../frontend/assets)') || echo "No diffs"
+          if [ ! -z "$CHANGED" ]; then
+            echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
+            echo "$CHANGED"
+            exit 1
+          else
+            echo "No unstaged changes."
+          fi
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}