diff --git a/.github/workflows/classic-autogpt-ci.yml b/.github/workflows/classic-autogpt-ci.yml index 3a1b5c42bf..6d8dee77f0 100644 --- a/.github/workflows/classic-autogpt-ci.yml +++ b/.github/workflows/classic-autogpt-ci.yml @@ -23,7 +23,7 @@ concurrency: defaults: run: shell: bash - working-directory: classic/original_autogpt + working-directory: classic jobs: test: @@ -63,7 +63,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry - key: poetry-${{ runner.os }}-${{ hashFiles('classic/original_autogpt/poetry.lock') }} + key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }} - name: Install Poetry run: curl -sSL https://install.python-poetry.org | python3 - @@ -71,17 +71,13 @@ jobs: - name: Install Python dependencies run: poetry install - - name: Install direct_benchmark dependencies - working-directory: classic/direct_benchmark - run: poetry install - - name: Run pytest with coverage run: | poetry run pytest -vv \ --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \ --numprocesses=logical --durations=10 \ --junitxml=junit.xml -o junit_family=legacy \ - tests/unit tests/integration + original_autogpt/tests/unit original_autogpt/tests/integration env: CI: true PLAIN_OUTPUT: True @@ -107,4 +103,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: test-logs - path: classic/original_autogpt/logs/ + path: classic/logs/ diff --git a/.github/workflows/classic-autogpts-ci.yml b/.github/workflows/classic-autogpts-ci.yml index f12b4c17d6..5d7ff7485c 100644 --- a/.github/workflows/classic-autogpts-ci.yml +++ b/.github/workflows/classic-autogpts-ci.yml @@ -29,13 +29,9 @@ defaults: jobs: serve-agent-protocol: runs-on: ubuntu-latest - strategy: - matrix: - agent-name: [ original_autogpt ] - fail-fast: false timeout-minutes: 20 env: - min-python-version: '3.10' + min-python-version: '3.12' steps: - name: Checkout repository uses: actions/checkout@v4 @@ -49,27 +45,22 @@ jobs: python-version: ${{ env.min-python-version }} - name: Install Poetry - working-directory: ./classic/${{ matrix.agent-name }}/ run: | curl -sSL https://install.python-poetry.org | python - - name: Install dependencies - working-directory: ./classic/${{ matrix.agent-name }}/ run: poetry install - - name: Run regression tests + - name: Run smoke tests with direct-benchmark run: | - cd ${{ matrix.agent-name }} - poetry run serve & - sleep 10 # Wait for server to start - poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0 - poetry run agbenchmark --test=WriteFile + poetry run direct-benchmark run \ + --strategies one_shot \ + --models claude \ + --tests ReadFile,WriteFile \ + --json env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - AGENT_NAME: ${{ matrix.agent-name }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt - HELICONE_CACHE_ENABLED: false - HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }} - REPORTS_FOLDER: ${{ format('../../reports/{0}', matrix.agent-name) }} - TELEMETRY_ENVIRONMENT: autogpt-ci - TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }} + NONINTERACTIVE_MODE: "true" + CI: true diff --git a/.github/workflows/classic-benchmark-ci.yml b/.github/workflows/classic-benchmark-ci.yml index cf65b3ae48..7a437c996d 100644 --- a/.github/workflows/classic-benchmark-ci.yml +++ b/.github/workflows/classic-benchmark-ci.yml @@ -36,7 +36,7 @@ jobs: defaults: run: shell: bash - working-directory: classic/direct_benchmark + working-directory: classic steps: - name: Checkout repository uses: actions/checkout@v4 @@ -53,7 +53,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry - key: poetry-${{ runner.os }}-${{ hashFiles('classic/direct_benchmark/poetry.lock') }} + key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }} - name: Install Poetry run: | @@ -65,14 +65,14 @@ jobs: - name: Run basic benchmark tests run: | echo "Testing ReadFile challenge with one_shot strategy..." - poetry run python -m direct_benchmark run \ + poetry run direct-benchmark run \ --strategies one_shot \ --models claude \ --tests ReadFile \ --json echo "Testing WriteFile challenge..." - poetry run python -m direct_benchmark run \ + poetry run direct-benchmark run \ --strategies one_shot \ --models claude \ --tests WriteFile \ @@ -86,7 +86,7 @@ jobs: - name: Test category filtering run: | echo "Testing coding category..." - poetry run python -m direct_benchmark run \ + poetry run direct-benchmark run \ --strategies one_shot \ --models claude \ --categories coding \ @@ -101,7 +101,7 @@ jobs: - name: Test multiple strategies run: | echo "Testing multiple strategies..." - poetry run python -m direct_benchmark run \ + poetry run direct-benchmark run \ --strategies one_shot,plan_execute \ --models claude \ --tests ReadFile \ @@ -121,7 +121,7 @@ jobs: defaults: run: shell: bash - working-directory: classic/direct_benchmark + working-directory: classic steps: - name: Checkout repository uses: actions/checkout@v4 @@ -144,7 +144,7 @@ jobs: - name: Run regression tests run: | echo "Running regression tests (previously beaten challenges)..." - poetry run python -m direct_benchmark run \ + poetry run direct-benchmark run \ --strategies one_shot \ --models claude \ --maintain \ diff --git a/.github/workflows/classic-forge-ci.yml b/.github/workflows/classic-forge-ci.yml index 6ee9ab81ed..07ceec4f90 100644 --- a/.github/workflows/classic-forge-ci.yml +++ b/.github/workflows/classic-forge-ci.yml @@ -19,7 +19,7 @@ concurrency: defaults: run: shell: bash - working-directory: classic/forge + working-directory: classic jobs: test: @@ -47,7 +47,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry - key: poetry-${{ runner.os }}-${{ hashFiles('classic/forge/poetry.lock') }} + key: poetry-${{ runner.os }}-${{ hashFiles('classic/poetry.lock') }} - name: Install Poetry run: curl -sSL https://install.python-poetry.org | python3 - @@ -61,7 +61,7 @@ jobs: --cov=forge --cov-branch --cov-report term-missing --cov-report xml \ --durations=10 \ --junitxml=junit.xml -o junit_family=legacy \ - forge + forge/forge forge/tests env: CI: true PLAIN_OUTPUT: True @@ -90,4 +90,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: test-logs - path: classic/forge/logs/ + path: classic/logs/ diff --git a/.github/workflows/classic-python-checks.yml b/.github/workflows/classic-python-checks.yml index 3b7a1767b0..399764286b 100644 --- a/.github/workflows/classic-python-checks.yml +++ b/.github/workflows/classic-python-checks.yml @@ -7,7 +7,9 @@ on: - '.github/workflows/classic-python-checks-ci.yml' - 'classic/original_autogpt/**' - 'classic/forge/**' - - 'classic/benchmark/**' + - 'classic/direct_benchmark/**' + - 'classic/pyproject.toml' + - 'classic/poetry.lock' - '**.py' - '!classic/forge/tests/vcr_cassettes' pull_request: @@ -16,7 +18,9 @@ on: - '.github/workflows/classic-python-checks-ci.yml' - 'classic/original_autogpt/**' - 'classic/forge/**' - - 'classic/benchmark/**' + - 'classic/direct_benchmark/**' + - 'classic/pyproject.toml' + - 'classic/poetry.lock' - '**.py' - '!classic/forge/tests/vcr_cassettes' @@ -27,45 +31,14 @@ concurrency: defaults: run: shell: bash + working-directory: classic jobs: - get-changed-parts: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - id: changes-in - name: Determine affected subprojects - uses: dorny/paths-filter@v3 - with: - filters: | - original_autogpt: - - classic/original_autogpt/autogpt/** - - classic/original_autogpt/tests/** - - classic/original_autogpt/poetry.lock - forge: - - classic/forge/forge/** - - classic/forge/tests/** - - classic/forge/poetry.lock - benchmark: - - classic/benchmark/agbenchmark/** - - classic/benchmark/tests/** - - classic/benchmark/poetry.lock - outputs: - changed-parts: ${{ steps.changes-in.outputs.changes }} - lint: - needs: get-changed-parts runs-on: ubuntu-latest env: min-python-version: "3.12" - strategy: - matrix: - sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }} - fail-fast: false - steps: - name: Checkout repository uses: actions/checkout@v4 @@ -81,43 +54,32 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry - key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }} + key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }} - name: Install Poetry run: curl -sSL https://install.python-poetry.org | python3 - - # Install dependencies - - name: Install Python dependencies - run: poetry -C classic/${{ matrix.sub-package }} install + run: poetry install # Lint - name: Lint (isort) run: poetry run isort --check . - working-directory: classic/${{ matrix.sub-package }} - name: Lint (Black) if: success() || failure() run: poetry run black --check . - working-directory: classic/${{ matrix.sub-package }} - name: Lint (Flake8) if: success() || failure() run: poetry run flake8 . - working-directory: classic/${{ matrix.sub-package }} types: - needs: get-changed-parts runs-on: ubuntu-latest env: min-python-version: "3.12" - strategy: - matrix: - sub-package: ${{ fromJson(needs.get-changed-parts.outputs.changed-parts) }} - fail-fast: false - steps: - name: Checkout repository uses: actions/checkout@v4 @@ -133,19 +95,16 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pypoetry - key: ${{ runner.os }}-poetry-${{ hashFiles(format('{0}/poetry.lock', matrix.sub-package)) }} + key: ${{ runner.os }}-poetry-${{ hashFiles('classic/poetry.lock') }} - name: Install Poetry run: curl -sSL https://install.python-poetry.org | python3 - - # Install dependencies - - name: Install Python dependencies - run: poetry -C classic/${{ matrix.sub-package }} install + run: poetry install # Typecheck - name: Typecheck if: success() || failure() run: poetry run pyright - working-directory: classic/${{ matrix.sub-package }}