fix(classic): resolve CI lint, type, and test failures

- Fix line-too-long in test_permissions.py docstring
- Fix type annotation in validators.py (callable -> Callable)
- Add --fresh flag to benchmark tests to prevent state resumption
- Exclude direct_benchmark/adapters from pyright (optional deps)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Nicholas Tindle
2026-01-29 14:31:11 -06:00
parent 0040636948
commit 791e1d8982
11 changed files with 1772 additions and 1 deletions

View File

@@ -66,6 +66,7 @@ jobs:
run: |
echo "Testing ReadFile challenge with one_shot strategy..."
poetry run direct-benchmark run \
--fresh \
--strategies one_shot \
--models claude \
--tests ReadFile \
@@ -73,6 +74,7 @@ jobs:
echo "Testing WriteFile challenge..."
poetry run direct-benchmark run \
--fresh \
--strategies one_shot \
--models claude \
--tests WriteFile \
@@ -87,6 +89,7 @@ jobs:
run: |
echo "Testing coding category..."
poetry run direct-benchmark run \
--fresh \
--strategies one_shot \
--models claude \
--categories coding \
@@ -102,6 +105,7 @@ jobs:
run: |
echo "Testing multiple strategies..."
poetry run direct-benchmark run \
--fresh \
--strategies one_shot,plan_execute \
--models claude \
--tests ReadFile \
@@ -145,6 +149,7 @@ jobs:
run: |
echo "Running regression tests (previously beaten challenges)..."
poetry run direct-benchmark run \
--fresh \
--strategies one_shot \
--models claude \
--maintain \