mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-03 19:35:15 -05:00
- **FIX ALL LINT/TYPE ERRORS IN AUTOGPT, FORGE, AND BENCHMARK** ### Linting - Clean up linter configs for `autogpt`, `forge`, and `benchmark` - Add type checking with Pyright - Create unified pre-commit config - Create unified linting and type checking CI workflow ### Testing - Synchronize CI test setups for `autogpt`, `forge`, and `benchmark` - Add missing pytest-cov to benchmark dependencies - Mark GCS tests as slow to speed up pre-commit test runs - Repair `forge` test suite - Add `AgentDB.close()` method for test DB teardown in db_test.py - Use actual temporary dir instead of forge/test_workspace/ - Move left-behind dependencies for moved `forge`-code to from autogpt to forge ### Notable type changes - Replace uses of `ChatModelProvider` by `MultiProvider` - Removed unnecessary exports from various __init__.py - Simplify `FileStorage.open_file` signature by removing `IOBase` from return type union - Implement `S3BinaryIOWrapper(BinaryIO)` type interposer for `S3FileStorage` - Expand overloads of `GCSFileStorage.open_file` for improved typing of read and write modes Had to silence type checking for the extra overloads, because (I think) Pyright is reporting a false-positive: https://github.com/microsoft/pyright/issues/8007 - Change `count_tokens`, `get_tokenizer`, `count_message_tokens` methods on `ModelProvider`s from class methods to instance methods - Move `CompletionModelFunction.schema` method -> helper function `format_function_def_for_openai` in `forge.llm.providers.openai` - Rename `ModelProvider` -> `BaseModelProvider` - Rename `ChatModelProvider` -> `BaseChatModelProvider` - Add type `ChatModelProvider` which is a union of all subclasses of `BaseChatModelProvider` ### Removed rather than fixed - Remove deprecated and broken autogpt/agbenchmark_config/benchmarks.py - Various base classes and properties on base classes in `forge.llm.providers.schema` and `forge.models.providers` ### Fixes for other issues that came to light - Clean up `forge.agent_protocol.api_router`, `forge.agent_protocol.database`, and `forge.agent.agent` - Add fallback behavior to `ImageGeneratorComponent` - Remove test for deprecated failure behavior - Fix `agbenchmark.challenges.builtin` challenge exclusion mechanism on Windows - Fix `_tool_calls_compat_extract_calls` in `forge.llm.providers.openai` - Add support for `any` (= no type specified) in `JSONSchema.typescript_type`
170 lines
5.6 KiB
YAML
170 lines
5.6 KiB
YAML
name: AGBenchmark CI
|
|
|
|
on:
|
|
push:
|
|
branches: [ master, development, ci-test* ]
|
|
paths:
|
|
- 'benchmark/**'
|
|
- .github/workflows/benchmark-ci.yml
|
|
- '!benchmark/reports/**'
|
|
pull_request:
|
|
branches: [ master, development, release-* ]
|
|
paths:
|
|
- 'benchmark/**'
|
|
- '!benchmark/reports/**'
|
|
- .github/workflows/benchmark-ci.yml
|
|
|
|
concurrency:
|
|
group: ${{ format('benchmark-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
|
|
cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') }}
|
|
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
|
|
env:
|
|
min-python-version: '3.10'
|
|
|
|
jobs:
|
|
test:
|
|
permissions:
|
|
contents: read
|
|
timeout-minutes: 30
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
python-version: ["3.10"]
|
|
platform-os: [ubuntu, macos, macos-arm64, windows]
|
|
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
working-directory: benchmark
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
submodules: true
|
|
|
|
- name: Set up Python ${{ matrix.python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
|
|
- name: Set up Python dependency cache
|
|
# On Windows, unpacking cached dependencies takes longer than just installing them
|
|
if: runner.os != 'Windows'
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }}
|
|
key: poetry-${{ runner.os }}-${{ hashFiles('benchmark/poetry.lock') }}
|
|
|
|
- name: Install Poetry (Unix)
|
|
if: runner.os != 'Windows'
|
|
run: |
|
|
curl -sSL https://install.python-poetry.org | python3 -
|
|
|
|
if [ "${{ runner.os }}" = "macOS" ]; then
|
|
PATH="$HOME/.local/bin:$PATH"
|
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
|
fi
|
|
|
|
- name: Install Poetry (Windows)
|
|
if: runner.os == 'Windows'
|
|
shell: pwsh
|
|
run: |
|
|
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
|
|
|
|
$env:PATH += ";$env:APPDATA\Python\Scripts"
|
|
echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH
|
|
|
|
- name: Install Python dependencies
|
|
run: poetry install
|
|
|
|
- name: Run pytest with coverage
|
|
run: |
|
|
poetry run pytest -vv \
|
|
--cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \
|
|
--durations=10 \
|
|
tests
|
|
env:
|
|
CI: true
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
|
|
- name: Upload coverage reports to Codecov
|
|
uses: codecov/codecov-action@v4
|
|
with:
|
|
token: ${{ secrets.CODECOV_TOKEN }}
|
|
flags: agbenchmark,${{ runner.os }}
|
|
|
|
self-test-with-agent:
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
agent-name: [ forge ]
|
|
fail-fast: false
|
|
timeout-minutes: 20
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
submodules: true
|
|
|
|
- name: Set up Python ${{ env.min-python-version }}
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ env.min-python-version }}
|
|
|
|
- name: Install Poetry
|
|
run: |
|
|
curl -sSL https://install.python-poetry.org | python -
|
|
|
|
- name: Run regression tests
|
|
working-directory: .
|
|
run: |
|
|
./run agent start ${{ matrix.agent-name }}
|
|
cd ${{ matrix.agent-name }}
|
|
|
|
set +e # Ignore non-zero exit codes and continue execution
|
|
echo "Running the following command: poetry run agbenchmark --maintain --mock"
|
|
poetry run agbenchmark --maintain --mock
|
|
EXIT_CODE=$?
|
|
set -e # Stop ignoring non-zero exit codes
|
|
# Check if the exit code was 5, and if so, exit with 0 instead
|
|
if [ $EXIT_CODE -eq 5 ]; then
|
|
echo "regression_tests.json is empty."
|
|
fi
|
|
|
|
echo "Running the following command: poetry run agbenchmark --mock"
|
|
poetry run agbenchmark --mock
|
|
|
|
echo "Running the following command: poetry run agbenchmark --mock --category=data"
|
|
poetry run agbenchmark --mock --category=data
|
|
|
|
echo "Running the following command: poetry run agbenchmark --mock --category=coding"
|
|
poetry run agbenchmark --mock --category=coding
|
|
|
|
echo "Running the following command: poetry run agbenchmark --test=WriteFile"
|
|
poetry run agbenchmark --test=WriteFile
|
|
cd ../benchmark
|
|
poetry install
|
|
echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed"
|
|
export BUILD_SKILL_TREE=true
|
|
|
|
poetry run agbenchmark --mock
|
|
|
|
CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../frontend/assets)') || echo "No diffs"
|
|
if [ ! -z "$CHANGED" ]; then
|
|
echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed."
|
|
echo "$CHANGED"
|
|
exit 1
|
|
else
|
|
echo "No unstaged changes."
|
|
fi
|
|
env:
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci
|
|
TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }}
|