mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
## Summary
This PR modernizes AutoGPT Classic to make it more useful for day-to-day
autonomous agent development. Major changes include consolidating the
project structure, adding new prompt strategies, modernizing the
benchmark system, and improving the development experience.
**Note: AutoGPT Classic is an experimental, unsupported project
preserved for educational/historical purposes. Dependencies will not be
actively updated.**
## Changes 🏗️
### Project Structure & Build System
- **Consolidated Poetry projects** - Merged `forge/`,
`original_autogpt/`, and benchmark packages into a single
`pyproject.toml` at `classic/` root
- **Removed old benchmark infrastructure** - Deleted the complex
`agbenchmark` package (3000+ lines) in favor of the new
`direct_benchmark` harness
- **Removed frontend** - Deleted `benchmark/frontend/` React app (no
longer needed)
- **Cleaned up CI workflows** - Simplified GitHub Actions workflows for
the consolidated project structure
- **Added CLAUDE.md** - Documentation for working with the codebase
using Claude Code
### New Direct Benchmark System
- **`direct_benchmark` harness** - New streamlined benchmark runner
with:
- Rich TUI with multi-panel layout showing parallel test execution
- Incremental resume and selective reset capabilities
- CI mode for non-interactive environments
- Step-level logging with colored prefixes
- "Would have passed" tracking for timed-out challenges
- Copy-paste completion blocks for sharing results
### Multiple Prompt Strategies
Added pluggable prompt strategy system supporting:
- **one_shot** - Single-prompt completion
- **plan_execute** - Plan first, then execute steps
- **rewoo** - Reasoning without observation (deferred tool execution)
- **react** - Reason + Act iterative loop
- **lats** - Language Agent Tree Search (MCTS-based exploration)
- **sub_agent** - Multi-agent delegation architecture
- **debate** - Multi-agent debate for consensus
### LLM Provider Improvements
- Added support for modern **Anthropic Claude models**
(claude-3.5-sonnet, claude-3-haiku, etc.)
- Added **Groq** provider support
- Improved tool call error feedback for LLM self-correction
- Fixed deprecated API usage
### Web Components
- **Replaced Selenium with Playwright** for web browsing (better async
support, faster)
- Added **lightweight web fetch component** for simple URL fetching
- **Modernized web search** with tiered provider system (Tavily, Serper,
Google)
### Agent Capabilities
- **Workspace permissions system** - Pattern-based allow/deny lists for
agent commands
- **Rich interactive selector** for command approval with scopes
(once/agent/workspace/deny)
- **TodoComponent** with LLM-powered task decomposition
- **Platform blocks integration** - Connect to AutoGPT Platform API for
additional blocks
- **Sub-agent architecture** - Agents can spawn and coordinate
sub-agents
### Developer Experience
- **Python 3.12+ support** with CI testing on 3.12, 3.13, 3.14
- **Current working directory as default workspace** - Run `autogpt`
from any project directory
- Simplified log format (removed timestamps)
- Improved configuration and setup flow
- External benchmark adapters for GAIA, SWE-bench, and AgentBench
### Bug Fixes
- Fixed N/A command loop when using native tool calling
- Fixed auto-advance plan steps in Plan-Execute strategy
- Fixed approve+feedback to execute command then send feedback
- Fixed parallel tool calls in action history
- Always recreate Docker containers for code execution
- Various pyright type errors resolved
- Linting and formatting issues fixed across codebase
## Test Plan
- [x] CI lint, type, and test checks pass
- [x] Run `poetry install` from `classic/` directory
- [x] Run `poetry run autogpt` and verify CLI starts
- [x] Run `poetry run direct-benchmark run --tests ReadFile` to verify
benchmark works
## Notes
- This is a WIP PR for personal use improvements
- The project is marked as **unsupported** - no active maintenance
planned
- Contains known vulnerabilities in dependencies (intentionally not
updated)
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> **Medium Risk**
> CI/build workflows are substantially reworked (runner matrix removal,
path/layout changes, new benchmark runner), so breakage is most likely
in automation and packaging rather than runtime behavior.
>
> **Overview**
> **Modernizes the `classic/` project layout and automation around a
single consolidated Poetry project** (root
`classic/pyproject.toml`/`poetry.lock`) and updates docs
(`classic/README.md`, new `classic/CLAUDE.md`) accordingly.
>
> **Replaces the old `agbenchmark` CI usage with `direct-benchmark` in
GitHub Actions**, including new/updated benchmark smoke and regression
workflows, standardized `working-directory: classic`, and a move to
**Python 3.12** on Ubuntu-only runners (plus updated caching, coverage
flags, and required `ANTHROPIC_API_KEY` wiring).
>
> Cleans up repo/dev tooling by removing the classic frontend workflow,
deleting the Forge VCR cassette submodule (`.gitmodules`) and associated
CI steps, consolidating `flake8`/`isort`/`pyright` pre-commit hooks to
run from `classic/`, updating ignores for new report/workspace
artifacts, and updating `classic/Dockerfile.autogpt` to build from
Python 3.12 with the consolidated project structure.
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
de67834dac. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
---------
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
Co-authored-by: Zamil Majdy <zamil.majdy@agpt.co>
196 lines
4.6 KiB
TOML
196 lines
4.6 KiB
TOML
[tool.poetry]
|
|
name = "autogpt-classic"
|
|
version = "0.5.0"
|
|
description = "AutoGPT Classic - autonomous agent framework"
|
|
authors = ["Significant Gravitas <support@agpt.co>"]
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
homepage = "https://github.com/Significant-Gravitas/AutoGPT"
|
|
keywords = ["autogpt", "ai", "agents", "autonomous", "llm", "gpt", "openai"]
|
|
classifiers = [
|
|
"Development Status :: 4 - Beta",
|
|
"Intended Audience :: Developers",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: OS Independent",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
"Programming Language :: Python :: 3.14",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
]
|
|
packages = [
|
|
{ include = "forge", from = "forge" },
|
|
{ include = "autogpt", from = "original_autogpt" },
|
|
{ include = "direct_benchmark", from = "direct_benchmark" },
|
|
]
|
|
|
|
[tool.poetry.urls]
|
|
"Bug Tracker" = "https://github.com/Significant-Gravitas/AutoGPT/issues"
|
|
"Documentation" = "https://docs.agpt.co"
|
|
|
|
[tool.poetry.scripts]
|
|
autogpt = "autogpt.app.cli:cli"
|
|
serve = "autogpt.app.cli:serve"
|
|
direct-benchmark = "direct_benchmark.__main__:main"
|
|
|
|
[tool.poetry.dependencies]
|
|
python = "^3.12"
|
|
|
|
# Core networking & web
|
|
aiohttp = "^3.8.5"
|
|
beautifulsoup4 = "^4.12.2"
|
|
requests = "*"
|
|
trafilatura = "^2.0"
|
|
|
|
# LLM providers
|
|
anthropic = "^0.45.0"
|
|
groq = "^0.8.0"
|
|
litellm = "^1.17.9"
|
|
openai = "^1.50.0"
|
|
tiktoken = ">=0.7.0,<1.0.0"
|
|
|
|
# Web frameworks
|
|
fastapi = "^0.109.1"
|
|
hypercorn = "^0.14.4"
|
|
uvicorn = { extras = ["standard"], version = ">=0.23.2,<1" }
|
|
python-multipart = "^0.0.7"
|
|
|
|
# Data processing
|
|
charset-normalizer = "^3.1.0"
|
|
demjson3 = "^3.0.0"
|
|
jsonschema = "*"
|
|
orjson = "^3.8.10"
|
|
pydantic = "^2.7.2"
|
|
pyyaml = "^6.0"
|
|
toml = "^0.10.2"
|
|
|
|
# Database & storage
|
|
boto3 = "^1.33.6"
|
|
chromadb = "^1.4.0"
|
|
google-cloud-logging = "^3.8.0"
|
|
google-cloud-storage = "^2.13.0"
|
|
sqlalchemy = "^2.0.19"
|
|
|
|
# CLI & UI
|
|
click = "^8.0"
|
|
colorama = "^0.4.6"
|
|
prompt-toolkit = "^3.0.0"
|
|
rich = "^13.0"
|
|
|
|
# Document processing
|
|
pypdf = "^3.1.0"
|
|
python-docx = "*"
|
|
pylatexenc = "*"
|
|
Pillow = "*"
|
|
|
|
# NLP
|
|
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
|
|
spacy = "^3.8.0"
|
|
|
|
# Web automation & search
|
|
ddgs = "^9.9"
|
|
google-api-python-client = "^2.116"
|
|
playwright = "^1.50.0"
|
|
|
|
# Utilities
|
|
distro = "^1.8.0"
|
|
docker = "*"
|
|
gitpython = "^3.1.32"
|
|
gTTS = "^2.3.1"
|
|
jinja2 = "^3.1.2"
|
|
numpy = ">=2.0.0"
|
|
playsound = "~1.2.2"
|
|
python-dotenv = "^1.0.0"
|
|
sentry-sdk = "^1.40.4"
|
|
tenacity = "^8.2.2"
|
|
watchdog = "^6.0.0"
|
|
|
|
# Testing (needed for direct_benchmark challenge evaluation)
|
|
pytest = "^8.0"
|
|
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
# Formatting & linting
|
|
black = "^24.10.0"
|
|
flake8 = "^7.0.0"
|
|
isort = "^5.13.1"
|
|
pre-commit = "^3.3.3"
|
|
pyright = "^1.1.364"
|
|
|
|
# Type stubs
|
|
boto3-stubs = { extras = ["s3"], version = "^1.33.6" }
|
|
google-api-python-client-stubs = "^1.24"
|
|
types-beautifulsoup4 = "*"
|
|
types-colorama = "*"
|
|
types-Pillow = "*"
|
|
types-requests = "^2.31.0.2"
|
|
|
|
# Testing
|
|
pytest-asyncio = "^0.23.3"
|
|
pytest-cov = "^5.0.0"
|
|
pytest-mock = "*"
|
|
pytest-recording = "*"
|
|
pytest-xdist = "*"
|
|
mock = "^5.1.0"
|
|
aiohttp-retry = "^2.9.1"
|
|
pytest-rerunfailures = "^16.1"
|
|
|
|
|
|
[tool.poetry.group.build]
|
|
optional = true
|
|
|
|
[tool.poetry.group.build.dependencies]
|
|
cx-freeze = { git = "https://github.com/ntindle/cx_Freeze.git", rev = "main" }
|
|
|
|
|
|
[tool.poetry.group.benchmarks]
|
|
optional = true
|
|
|
|
[tool.poetry.group.benchmarks.dependencies]
|
|
# External benchmark adapters
|
|
datasets = "^2.14"
|
|
huggingface-hub = "^0.20"
|
|
# SWE-bench evaluation (optional - requires Docker)
|
|
# swebench = "^2.0" # Install separately if needed
|
|
# Modal for cloud evaluation (optional)
|
|
# modal = "^0.70" # Install separately if needed
|
|
|
|
|
|
[build-system]
|
|
requires = ["poetry-core"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
|
|
[tool.black]
|
|
line-length = 88
|
|
target-version = ['py312']
|
|
include = '\.pyi?$'
|
|
|
|
|
|
[tool.isort]
|
|
profile = "black"
|
|
skip_glob = ["data"]
|
|
|
|
|
|
[tool.pyright]
|
|
pythonVersion = "3.12"
|
|
exclude = [
|
|
"data/**",
|
|
"**/node_modules",
|
|
"**/__pycache__",
|
|
"**/.*",
|
|
"direct_benchmark/challenges/**", # Legacy code with unavailable imports
|
|
"direct_benchmark/direct_benchmark/adapters/**", # Optional deps (datasets, swebench, modal)
|
|
]
|
|
|
|
|
|
[tool.pytest.ini_options]
|
|
pythonpath = ["."]
|
|
testpaths = ["forge/forge", "forge/tests", "original_autogpt/tests"]
|
|
asyncio_mode = "auto"
|
|
markers = [
|
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
"integration: marks tests as integration tests",
|
|
"requires_agent: marks tests that require a running agent with API keys",
|
|
]
|