ci(classic): update CI workflows for consolidated Poetry project

Update all classic CI workflows to use the single consolidated pyproject.toml at classic/ instead of individual project directories. Changes: - classic-autogpt-ci.yml: Run from classic/, update cache key and test paths - classic-forge-ci.yml: Run from classic/, update cache key and test paths - classic-benchmark-ci.yml: Run from classic/, use direct-benchmark command - classic-python-checks.yml: Simplify to single job (no matrix needed) - classic-autogpts-ci.yml: Update to use direct-benchmark for smoke tests Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 02:45:18 -05:00 · 2026-01-20 00:53:50 -06:00
parent b849eafb7f
commit c1031b286d
5 changed files with 37 additions and 91 deletions
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -23,7 +23,7 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic/original_autogpt
+    working-directory: classic

 jobs:
  test:
@@ -63,7 +63,7 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -
@@ -71,17 +71,13 @@ jobs:
      - name: Install Python dependencies
        run: poetry install

-      - name: Install direct_benchmark dependencies
-        working-directory: classic/direct_benchmark
-        run: poetry install
-
      - name: Run pytest with coverage
        run: |
          poetry run pytest -vv \
            --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
            --numprocesses=logical --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            tests/unit tests/integration
+            original_autogpt/tests/unit original_autogpt/tests/integration
        env:
          CI: true
          PLAIN_OUTPUT: True
@@ -107,4 +103,4 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/original_autogpt/logs/
+          path: classic/logs/
--- a/.github/workflows/classic-autogpts-ci.yml
+++ b/.github/workflows/classic-autogpts-ci.yml
@@ -29,13 +29,9 @@ defaults:
 jobs:
  serve-agent-protocol:
    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        agent-name: [ original_autogpt ]
-      fail-fast: false
    timeout-minutes: 20
    env:
-      min-python-version: '3.10'
+      min-python-version: '3.12'
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -49,27 +45,22 @@ jobs:
          python-version: ${{ env.min-python-version }}

      - name: Install Poetry
-        working-directory: ./classic/${{ matrix.agent-name }}/
        run: |
          curl -sSL https://install.python-poetry.org | python -

      - name: Install dependencies
-        working-directory: ./classic/${{ matrix.agent-name }}/
        run: poetry install

-      - name: Run regression tests
+      - name: Run smoke tests with direct-benchmark
        run: |
-          cd ${{ matrix.agent-name }}
-          poetry run serve &
-          sleep 10  # Wait for server to start
-          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
-          poetry run agbenchmark --test=WriteFile
+          poetry run direct-benchmark run \
+            --strategies one_shot \
+            --models claude \
+            --tests ReadFile,WriteFile \
+            --json
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AGENT_NAME: ${{ matrix.agent-name }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
-          HELICONE_CACHE_ENABLED: false
-          HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
-          REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }}
-          TELEMETRY_ENVIRONMENT: autogpt-ci
-          TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
+          NONINTERACTIVE_MODE: "true"
+          CI: true
--- a/.github/workflows/classic-benchmark-ci.yml
+++ b/.github/workflows/classic-benchmark-ci.yml
@@ -36,7 +36,7 @@ jobs:
    defaults:
      run:
        shell: bash
-        working-directory: classic/direct_benchmark
+        working-directory: classic
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -53,7 +53,7 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/direct_benchmark/poetry.lock') }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: |
@@ -65,14 +65,14 @@ jobs:
      - name: Run basic benchmark tests
        run: |
          echo "Testing ReadFile challenge with one_shot strategy..."
-          poetry run python -m direct_benchmark run \
+          poetry run direct-benchmark run \
            --strategies one_shot \
            --models claude \
            --tests ReadFile \
            --json

          echo "Testing WriteFile challenge..."
-          poetry run python -m direct_benchmark run \
+          poetry run direct-benchmark run \
            --strategies one_shot \
            --models claude \
            --tests WriteFile \
@@ -86,7 +86,7 @@ jobs:
      - name: Test category filtering
        run: |
          echo "Testing coding category..."
-          poetry run python -m direct_benchmark run \
+          poetry run direct-benchmark run \
            --strategies one_shot \
            --models claude \
            --categories coding \
@@ -101,7 +101,7 @@ jobs:
      - name: Test multiple strategies
        run: |
          echo "Testing multiple strategies..."
-          poetry run python -m direct_benchmark run \
+          poetry run direct-benchmark run \
            --strategies one_shot,plan_execute \
            --models claude \
            --tests ReadFile \
@@ -121,7 +121,7 @@ jobs:
    defaults:
      run:
        shell: bash
-        working-directory: classic/direct_benchmark
+        working-directory: classic
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -144,7 +144,7 @@ jobs:
      - name: Run regression tests
        run: |
          echo "Running regression tests (previously beaten challenges)..."
-          poetry run python -m direct_benchmark run \
+          poetry run direct-benchmark run \
            --strategies one_shot \
            --models claude \
            --maintain \
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -19,7 +19,7 @@ concurrency:
 defaults:
  run:
    shell: bash
-    working-directory: classic/forge
+    working-directory: classic

 jobs:
  test:
@@ -47,7 +47,7 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: poetry-${{ runner.os }}-${{ hashFiles('classic/forge/poetry.lock') }}
+          key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -
@@ -61,7 +61,7 @@ jobs:
            --cov=forge --cov-branch --cov-report term-missing --cov-report xml \
            --durations=10 \
            --junitxml=junit.xml -o junit_family=legacy \
-            forge
+            forge/forge forge/tests
        env:
          CI: true
          PLAIN_OUTPUT: True
@@ -90,4 +90,4 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: test-logs
-          path: classic/forge/logs/
+          path: classic/logs/
--- a/.github/workflows/classic-python-checks.yml
+++ b/.github/workflows/classic-python-checks.yml
@@ -7,7 +7,9 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/benchmark/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
@@ -16,7 +18,9 @@ on:
      - '.github/workflows/classic-python-checks-ci.yml'
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
-      - 'classic/benchmark/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/pyproject.toml'
+      - 'classic/poetry.lock'
      - '**.py'
      - '!classic/forge/tests/vcr_cassettes'

@@ -27,45 +31,14 @@ concurrency:
 defaults:
  run:
    shell: bash
+    working-directory: classic

 jobs:
-  get-changed-parts:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - id: changes-in
-        name: Determine affected subprojects
-        uses: dorny/paths-filter@v3
-        with:
-          filters: |
-            original_autogpt:
-              - classic/original_autogpt/autogpt/**
-              - classic/original_autogpt/tests/**
-              - classic/original_autogpt/poetry.lock
-            forge:
-              - classic/forge/forge/**
-              - classic/forge/tests/**
-              - classic/forge/poetry.lock
-            benchmark:
-              - classic/benchmark/agbenchmark/**
-              - classic/benchmark/tests/**
-              - classic/benchmark/poetry.lock
-    outputs:
-      changed-parts: ${{ steps.changes-in.outputs.changes }}
-
  lint:
-    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
      min-python-version: "3.12"

-    strategy:
-      matrix:
-        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
-      fail-fast: false
-
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -81,43 +54,32 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

-      # Install dependencies
-
      - name: Install Python dependencies
-        run: poetry -C classic/${{ matrix.sub-package }} install
+        run: poetry install

      # Lint

      - name: Lint (isort)
        run: poetry run isort --check .
-        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Black)
        if: success() || failure()
        run: poetry run black --check .
-        working-directory: classic/${{ matrix.sub-package }}

      - name: Lint (Flake8)
        if: success() || failure()
        run: poetry run flake8 .
-        working-directory: classic/${{ matrix.sub-package }}

  types:
-    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
      min-python-version: "3.12"

-    strategy:
-      matrix:
-        sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }}
-      fail-fast: false
-
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -133,19 +95,16 @@ jobs:
        uses: actions/cache@v4
        with:
          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }}
+          key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }}

      - name: Install Poetry
        run: curl -sSL https://install.python-poetry.org | python3 -

-      # Install dependencies
-
      - name: Install Python dependencies
-        run: poetry -C classic/${{ matrix.sub-package }} install
+        run: poetry install

      # Typecheck

      - name: Typecheck
        if: success() || failure()
        run: poetry run pyright
-        working-directory: classic/${{ matrix.sub-package }}