mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-14 00:35:02 -05:00
- Remove old benchmark/ folder with agbenchmark framework - Move challenges to direct_benchmark/challenges/ - Move analysis tools (analyze_reports.py, analyze_failures.py) to direct_benchmark/ - Move challenges_already_beaten.json to direct_benchmark/ - Update CI workflow to use direct_benchmark - Update CLAUDE.md files with new benchmarking instructions - Add benchmarking section to original_autogpt/CLAUDE.md The direct_benchmark harness directly instantiates agents without HTTP server overhead, enabling parallel execution with asyncio semaphore. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
41 lines
943 B
TOML
41 lines
943 B
TOML
[tool.poetry]
|
|
name = "direct-benchmark"
|
|
version = "0.1.0"
|
|
authors = ["Significant Gravitas <support@agpt.co>"]
|
|
description = "Direct benchmark harness for AutoGPT - parallel execution without HTTP server"
|
|
readme = "README.md"
|
|
packages = [{ include = "direct_benchmark" }]
|
|
|
|
[tool.poetry.scripts]
|
|
direct-benchmark = "direct_benchmark.__main__:main"
|
|
|
|
[tool.poetry.dependencies]
|
|
python = "^3.12"
|
|
# Core dependencies
|
|
autogpt-forge = { path = "../forge", develop = true }
|
|
autogpt = { path = "../original_autogpt", develop = true }
|
|
click = "^8.0"
|
|
pydantic = "^2.7.2"
|
|
rich = "^13.0"
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
black = "^23.12.1"
|
|
isort = "^5.13.0"
|
|
pyright = "^1.1.0"
|
|
|
|
[tool.black]
|
|
line-length = 88
|
|
target-version = ['py312']
|
|
|
|
[tool.isort]
|
|
profile = "black"
|
|
src_paths = ["direct_benchmark"]
|
|
|
|
[tool.pyright]
|
|
pythonVersion = "3.12"
|
|
typeCheckingMode = "basic"
|
|
|
|
[build-system]
|
|
requires = ["poetry-core"]
|
|
build-backend = "poetry.core.masonry.api"
|