refactor(classic): consolidate Poetry projects into single pyproject.toml

Merge forge/, original_autogpt/, and direct_benchmark/ into a single Poetry project to eliminate cross-project path dependency issues. Changes: - Create classic/pyproject.toml with merged dependencies from all three projects - Remove individual pyproject.toml and poetry.lock files from subdirectories - Update all CLAUDE.md files to reflect commands run from classic/ root - Update all README.md files with new installation and usage instructions All packages are now included via the packages directive: - forge/forge (core agent framework) - original_autogpt/autogpt (AutoGPT agent) - direct_benchmark/direct_benchmark (benchmark harness) CLI entry points preserved: autogpt, serve, direct-benchmark Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-08 03:00:28 -04:00 · 2026-01-20 00:49:56 -06:00
parent 89003a585d
commit 572c3f5e0d
14 changed files with 878 additions and 16224 deletions
--- a/classic/CLAUDE.md
+++ b/classic/CLAUDE.md
@@ -9,41 +9,39 @@ AutoGPT Classic is an experimental, **unsupported** project demonstrating autono
 ## Repository Structure

 ```
-/forge            - Core autonomous agent framework (main library)
-/original_autogpt - Original AutoGPT implementation (depends on forge)
-/direct_benchmark - Benchmark harness for testing agent performance
+classic/
+├── pyproject.toml          # Single consolidated Poetry project
+├── poetry.lock             # Single lock file
+├── forge/
+│   └── forge/              # Core agent framework package
+├── original_autogpt/
+│   └── autogpt/            # AutoGPT agent package
+├── direct_benchmark/
+│   └── direct_benchmark/   # Benchmark harness package
+└── benchmark/              # Challenge definitions (data, not code)
 ```

-Each Python subproject has its own `pyproject.toml` and uses Poetry for dependency management.
+All packages are managed by a single `pyproject.toml` at the classic/ root.

 ## Common Commands

 ### Setup & Install
 ```bash
-# Install forge (core library)
-cd forge && poetry install
-
-# Install original_autogpt (includes forge as dependency)
-cd original_autogpt && poetry install
-
-# Install benchmark
-cd benchmark && poetry install
-
-# Install with benchmark support (optional extra)
-cd forge && poetry install --extras benchmark
-cd original_autogpt && poetry install --extras benchmark
+# Install everything from classic/ directory
+cd classic
+poetry install
 ```

 ### Running Agents
 ```bash
-# Run forge agent (from forge directory)
-cd forge && poetry run python -m forge
+# Run forge agent
+poetry run python -m forge

-# Run original autogpt (from original_autogpt directory)
-cd original_autogpt && poetry run serve --debug
+# Run original autogpt server
+poetry run serve --debug

 # Run autogpt CLI
-cd original_autogpt && poetry run autogpt
+poetry run autogpt
 ```

 Agents run on `http://localhost:8000` by default.
@@ -51,33 +49,34 @@ Agents run on `http://localhost:8000` by default.
 ### Benchmarking
 ```bash
 # Run benchmarks
-cd direct_benchmark && poetry run python -m direct_benchmark run
+poetry run direct-benchmark run

 # Run specific strategies and models
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,rewoo \
    --models claude \
    --parallel 4

 # Run a single test
-poetry run python -m direct_benchmark run --tests ReadFile
+poetry run direct-benchmark run --tests ReadFile

 # List available commands
-poetry run python -m direct_benchmark --help
+poetry run direct-benchmark --help
 ```

 ### Testing
 ```bash
-cd forge && poetry run pytest                    # All tests
-cd forge && poetry run pytest tests/             # Tests directory only
-cd forge && poetry run pytest -k test_name       # Single test by name
-cd forge && poetry run pytest path/to/test.py   # Specific test file
-cd forge && poetry run pytest --cov             # With coverage
+poetry run pytest                              # All tests
+poetry run pytest forge/tests/                 # Forge tests only
+poetry run pytest original_autogpt/tests/      # AutoGPT tests only
+poetry run pytest -k test_name                 # Single test by name
+poetry run pytest path/to/test.py              # Specific test file
+poetry run pytest --cov                        # With coverage
 ```

 ### Linting & Formatting

-Run from forge/ or original_autogpt/ directory:
+Run from the classic/ directory:

 ```bash
 # Format everything (recommended to run together)
@@ -106,20 +105,21 @@ The `forge` package is the foundation that other components depend on:
 - `forge/config/` - Configuration management

 ### Original AutoGPT
-Depends on forge via local path (`autogpt-forge = { path = "../forge" }`):
- `autogpt/app/` - CLI application entry points
- `autogpt/agents/` - Agent implementations
- `autogpt/agent_factory/` - Agent creation logic
+- `original_autogpt/autogpt/app/` - CLI application entry points
+- `original_autogpt/autogpt/agents/` - Agent implementations
+- `original_autogpt/autogpt/agent_factory/` - Agent creation logic

 ### Direct Benchmark
 Benchmark harness for testing agent performance:
- `direct_benchmark/` - CLI and harness code
+- `direct_benchmark/direct_benchmark/` - CLI and harness code
 - `benchmark/agbenchmark/challenges/` - Test cases organized by category (code, retrieval, data, etc.)
 - Reports generated in `direct_benchmark/reports/`

-### Dependency Chain
-`original_autogpt` → `forge`
-`direct_benchmark` → `original_autogpt` → `forge`
+### Package Structure
+All three packages are included in a single Poetry project. Imports are fully qualified:
+- `from forge.agent.base import BaseAgent`
+- `from autogpt.agents.agent import Agent`
+- `from direct_benchmark.harness import BenchmarkHarness`

 ## Code Style

--- a/classic/README.md
+++ b/classic/README.md
@@ -18,15 +18,21 @@ AutoGPT Classic was one of the first implementations of autonomous AI agents - A

 ## Structure

- `/benchmark` - Performance testing tools
- `/forge` - Core autonomous agent framework
- `/original_autogpt` - Original implementation
+```
+classic/
+├── pyproject.toml          # Single consolidated Poetry project
+├── poetry.lock             # Single lock file
+├── forge/                  # Core autonomous agent framework
+├── original_autogpt/       # Original implementation
+├── direct_benchmark/       # Benchmark harness
+└── benchmark/              # Challenge definitions (data)
+```

 ## Getting Started

 ### Prerequisites

- Python 3.10+
+- Python 3.12+
 - [Poetry](https://python-poetry.org/docs/#installation)

 ### Installation
@@ -36,14 +42,8 @@ AutoGPT Classic was one of the first implementations of autonomous AI agents - A
 git clone https://github.com/Significant-Gravitas/AutoGPT.git
 cd classic

-# Install forge (core library)
-cd forge && poetry install
-
-# Or install original_autogpt (includes forge as dependency)
-cd original_autogpt && poetry install
-
-# Install benchmark (optional)
-cd benchmark && poetry install
+# Install everything
+poetry install
 ```

 ### Configuration
@@ -81,15 +81,17 @@ FILE_STORAGE_BACKEND=local          # local, s3, or gcs

 ### Running

+All commands run from the `classic/` directory:
+
 ```bash
 # Run forge agent
-cd forge && poetry run python -m forge
+poetry run python -m forge

 # Run original autogpt server
-cd original_autogpt && poetry run serve --debug
+poetry run serve --debug

 # Run autogpt CLI
-cd original_autogpt && poetry run autogpt
+poetry run autogpt
 ```

 Agents run on `http://localhost:8000` by default.
@@ -97,14 +99,15 @@ Agents run on `http://localhost:8000` by default.
 ### Benchmarking

 ```bash
-cd benchmark && poetry run agbenchmark
+poetry run direct-benchmark run
 ```

 ### Testing

 ```bash
-cd forge && poetry run pytest
-cd original_autogpt && poetry run pytest
+poetry run pytest                        # All tests
+poetry run pytest forge/tests/           # Forge tests only
+poetry run pytest original_autogpt/tests/ # AutoGPT tests only
 ```

 ## Workspaces
--- a/classic/direct_benchmark/CLAUDE.md
+++ b/classic/direct_benchmark/CLAUDE.md
@@ -8,33 +8,35 @@ The Direct Benchmark Harness is a high-performance testing framework for AutoGPT

 ## Quick Reference

+All commands run from the `classic/` directory (parent of this directory):
+
 ```bash
-# Install
-cd classic/direct_benchmark
+# Install (one-time setup)
+cd classic
 poetry install

 # Run benchmarks
-poetry run python -m direct_benchmark run
+poetry run direct-benchmark run

 # Run specific strategies and models
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,rewoo \
    --models claude,openai \
    --parallel 4

 # Run a single test
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot \
    --tests ReadFile

 # List available challenges
-poetry run python -m direct_benchmark list-challenges
+poetry run direct-benchmark list-challenges

 # List model presets
-poetry run python -m direct_benchmark list-models
+poetry run direct-benchmark list-models

 # List strategies
-poetry run python -m direct_benchmark list-strategies
+poetry run direct-benchmark list-strategies
 ```

 ## CLI Options
@@ -76,15 +78,15 @@ poetry run python -m direct_benchmark list-strategies
 ### State Management Commands
 ```bash
 # Show current state
-poetry run python -m direct_benchmark state show
+poetry run direct-benchmark state show

 # Clear all state
-poetry run python -m direct_benchmark state clear
+poetry run direct-benchmark state clear

 # Reset specific strategy/model/challenge
-poetry run python -m direct_benchmark state reset --strategy reflexion
-poetry run python -m direct_benchmark state reset --model claude-thinking-25k
-poetry run python -m direct_benchmark state reset --challenge ThreeSum
+poetry run direct-benchmark state reset --strategy reflexion
+poetry run direct-benchmark state reset --model claude-thinking-25k
+poetry run direct-benchmark state reset --challenge ThreeSum
 ```

 ## Available Strategies
@@ -228,7 +230,7 @@ reports/

 ### Run Full Benchmark Suite
 ```bash
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,rewoo,plan_execute \
    --models claude \
    --parallel 8
@@ -236,7 +238,7 @@ poetry run python -m direct_benchmark run \

 ### Compare Strategies
 ```bash
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,rewoo,plan_execute,reflexion \
    --models claude \
    --tests ReadFile,WriteFile,ThreeSum
@@ -244,7 +246,7 @@ poetry run python -m direct_benchmark run \

 ### Debug a Failing Test
 ```bash
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot \
    --tests FailingTest \
    --keep-answers \
@@ -257,29 +259,29 @@ State is saved to `.benchmark_state.json` in the reports directory.

 ```bash
 # Run benchmarks - will resume from last run automatically
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,reflexion \
    --models claude

 # Start fresh (clear all saved state)
-poetry run python -m direct_benchmark run --fresh \
+poetry run direct-benchmark run --fresh \
    --strategies one_shot,reflexion \
    --models claude

 # Reset specific strategy and re-run
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --reset-strategy reflexion \
    --strategies one_shot,reflexion \
    --models claude

 # Reset specific model and re-run
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --reset-model claude-thinking-25k \
    --strategies one_shot \
    --models claude,claude-thinking-25k

 # Retry only the failures from the last run
-poetry run python -m direct_benchmark run --retry-failures \
+poetry run direct-benchmark run --retry-failures \
    --strategies one_shot,reflexion \
    --models claude
 ```
@@ -287,9 +289,9 @@ poetry run python -m direct_benchmark run --retry-failures \
 ### CI/Scripting Mode
 ```bash
 # JSON output (parseable)
-poetry run python -m direct_benchmark run --json
+poetry run direct-benchmark run --json

 # CI mode - shows completion blocks without Live display
 # Auto-enabled when CI=true env var is set or stdout is not a TTY
-poetry run python -m direct_benchmark run --ci
+poetry run direct-benchmark run --ci
 ```
--- a/classic/direct_benchmark/README.md
+++ b/classic/direct_benchmark/README.md
@@ -13,8 +13,10 @@ High-performance benchmark harness for AutoGPT that directly instantiates agents

 ## Installation

+All commands run from the `classic/` directory (parent of this directory):
+
 ```bash
-cd classic/direct_benchmark
+cd classic
 poetry install
 ```

@@ -22,41 +24,41 @@ poetry install

 ```bash
 # Run benchmarks with default settings
-poetry run python -m direct_benchmark run
+poetry run direct-benchmark run

 # Run specific strategies and models
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,rewoo \
    --models claude,openai \
    --parallel 4

 # Run a single test
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot \
    --tests ReadFile

 # Run multiple attempts per challenge
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot \
    --attempts 3

 # Run only regression tests (previously beaten)
-poetry run python -m direct_benchmark run --maintain
+poetry run direct-benchmark run --maintain

 # Run only non-regression tests (not consistently beaten)
-poetry run python -m direct_benchmark run --improve
+poetry run direct-benchmark run --improve

 # Run only never-beaten challenges
-poetry run python -m direct_benchmark run --explore
+poetry run direct-benchmark run --explore

 # List available challenges
-poetry run python -m direct_benchmark list-challenges
+poetry run direct-benchmark list-challenges

 # List model presets
-poetry run python -m direct_benchmark list-models
+poetry run direct-benchmark list-models

 # List strategies
-poetry run python -m direct_benchmark list-strategies
+poetry run direct-benchmark list-strategies
 ```

 ## CLI Options
--- a/classic/direct_benchmark/poetry.lock
+++ b/classic/direct_benchmark/poetry.lock
--- a/classic/direct_benchmark/pyproject.toml
+++ b/classic/direct_benchmark/pyproject.toml
@@ -1,42 +0,0 @@
-[tool.poetry]
-name = "direct-benchmark"
-version = "0.1.0"
-authors = ["Significant Gravitas <support@agpt.co>"]
-description = "Direct benchmark harness for AutoGPT - parallel execution without HTTP server"
-readme = "README.md"
-packages = [{ include = "direct_benchmark" }]
-
-[tool.poetry.scripts]
-direct-benchmark = "direct_benchmark.__main__:main"
-
-[tool.poetry.dependencies]
-python = "^3.12"
-# Core dependencies
-autogpt-forge = { path = "../forge", develop = true }
-autogpt = { path = "../original_autogpt", develop = true }
-click = "^8.0"
-pydantic = "^2.7.2"
-rich = "^13.0"
-# Required for evaluating challenges that use pytest-based tests
-pytest = "^8.0"
-
-[tool.poetry.group.dev.dependencies]
-black = "^23.12.1"
-isort = "^5.13.0"
-pyright = "^1.1.0"
-
-[tool.black]
-line-length = 88
-target-version = ['py312']
-
-[tool.isort]
-profile = "black"
-src_paths = ["direct_benchmark"]
-
-[tool.pyright]
-pythonVersion = "3.12"
-typeCheckingMode = "basic"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
--- a/classic/forge/CLAUDE.md
+++ b/classic/forge/CLAUDE.md
@@ -4,13 +4,15 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## Quick Reference

+All commands run from the `classic/` directory (parent of this directory):
+
 ```bash
 # Run forge agent server (port 8000)
 poetry run python -m forge

-# Run tests
-poetry run pytest
-poetry run pytest --cov=forge
+# Run forge tests
+poetry run pytest forge/tests/
+poetry run pytest forge/tests/ --cov=forge
 poetry run pytest -k test_name
 ```

@@ -317,9 +319,10 @@ GET  /ap/v1/agent/tasks/{id}/artifacts  # List artifacts
 **Fixtures** (`conftest.py`):
 - `storage` - Temporary LocalFileStorage

+Run from the `classic/` directory:
 ```bash
-poetry run pytest                    # All tests
-poetry run pytest --cov=forge        # With coverage
+poetry run pytest forge/tests/                    # All forge tests
+poetry run pytest forge/tests/ --cov=forge        # With coverage
 ```

 **Note**: Tests requiring API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY) will be skipped if not set.
--- a/classic/forge/README.md
+++ b/classic/forge/README.md
@@ -4,8 +4,11 @@ Core autonomous agent framework for building AI agents.

 ## Quick Start

+All commands run from the `classic/` directory (parent of this directory):
+
 ```bash
-# Install
+# Install (one-time setup)
+cd classic
 poetry install

 # Configure
--- a/classic/forge/poetry.lock
+++ b/classic/forge/poetry.lock
--- a/classic/original_autogpt/CLAUDE.md
+++ b/classic/original_autogpt/CLAUDE.md
@@ -4,6 +4,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## Quick Reference

+All commands run from the `classic/` directory (parent of this directory):
+
 ```bash
 # Run interactive CLI
 poetry run autogpt run
@@ -12,8 +14,8 @@ poetry run autogpt run
 poetry run serve --debug

 # Run tests
-poetry run pytest
-poetry run pytest tests/unit/ -v
+poetry run pytest original_autogpt/tests/
+poetry run pytest original_autogpt/tests/unit/ -v
 poetry run pytest -k test_name
 ```

@@ -228,13 +230,13 @@ autogpt run [OPTIONS]
 - `llm_provider` - MultiProvider
 - `agent` - Fully initialized Agent

-**Running**:
+**Running** (from `classic/` directory):
 ```bash
-poetry run pytest                           # All tests
-poetry run pytest tests/unit/ -v            # Unit tests
-poetry run pytest tests/integration/        # Integration tests
-poetry run pytest -k test_config            # By name
-OPENAI_API_KEY=sk-dummy poetry run pytest   # With dummy key
+poetry run pytest original_autogpt/tests/                    # All tests
+poetry run pytest original_autogpt/tests/unit/ -v            # Unit tests
+poetry run pytest original_autogpt/tests/integration/        # Integration tests
+poetry run pytest -k test_config                             # By name
+OPENAI_API_KEY=sk-dummy poetry run pytest original_autogpt/  # With dummy key
 ```

 ## Common Tasks
@@ -275,26 +277,23 @@ TEMPERATURE=0.7

 ## Benchmarking

-Use `direct_benchmark` (sibling directory) to run performance benchmarks:
+Run performance benchmarks from the `classic/` directory:

 ```bash
-# From classic/direct_benchmark/
-cd ../direct_benchmark
-
 # Run a single test
-poetry run python -m direct_benchmark run --tests ReadFile
+poetry run direct-benchmark run --tests ReadFile

 # Run with specific strategies and models
-poetry run python -m direct_benchmark run \
+poetry run direct-benchmark run \
    --strategies one_shot,rewoo \
    --models claude \
    --parallel 4

 # Run regression tests only
-poetry run python -m direct_benchmark run --maintain
+poetry run direct-benchmark run --maintain

 # List available challenges
-poetry run python -m direct_benchmark list-challenges
+poetry run direct-benchmark list-challenges
 ```

 See `direct_benchmark/CLAUDE.md` for full documentation on strategies, model presets, and CLI options.
--- a/classic/original_autogpt/README.md
+++ b/classic/original_autogpt/README.md
@@ -26,13 +26,16 @@ Demo made by <a href=https://twitter.com/BlakeWerlinger>Blake Werlinger</a>
 ## Setting up AutoGPT

 ### Prerequisites
- Python 3.10+
+- Python 3.12+
 - [Poetry](https://python-poetry.org/docs/#installation)
 - OpenAI [API Key](https://platform.openai.com/account/api-keys)

 ### Installation

+All commands run from the `classic/` directory (parent of this directory):
+
 ```bash
+cd classic
 poetry install
 cp .env.template .env
 # Edit .env with your OPENAI_API_KEY
--- a/classic/original_autogpt/pyproject.toml
+++ b/classic/original_autogpt/pyproject.toml
@@ -1,113 +0,0 @@
-[tool.poetry]
-name = "autogpt"
-version = "0.5.0"
-authors = ["Significant Gravitas <support@agpt.co>"]
-readme = "README.md"
-description = "An open-source attempt to make GPT-4 autonomous. Run 'autogpt' in any directory to start."
-homepage = "https://github.com/Significant-Gravitas/AutoGPT/tree/master/autogpt"
-classifiers = [
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
-    "Programming Language :: Python :: 3.14",
-    "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
-    "Development Status :: 4 - Beta",
-    "Intended Audience :: Developers",
-    "Topic :: Scientific/Engineering :: Artificial Intelligence",
-]
-packages = [{ include = "autogpt" }]
-keywords = ["autogpt", "ai", "agents", "autonomous", "llm", "gpt", "openai"]
-
-[tool.poetry.urls]
-"Bug Tracker" = "https://github.com/Significant-Gravitas/AutoGPT/issues"
-"Documentation" = "https://docs.agpt.co"
-
-[tool.poetry.scripts]
-autogpt = "autogpt.app.cli:cli"
-serve = "autogpt.app.cli:serve"
-
-
-[tool.poetry.dependencies]
-python = "^3.12"
-# For development, use local path; for publishing, use versioned package
-autogpt-forge = { path = "../forge", develop = true }
-# autogpt-forge = "^0.2.0"  # Uncomment for PyPI release
-click = "*"
-colorama = "^0.4.6"
-distro = "^1.8.0"
-fastapi = "^0.109.1"
-gitpython = "^3.1.32"
-hypercorn = "^0.14.4"
-openai = "^1.50.0"
-orjson = "^3.8.10"
-pydantic = "^2.7.2"
-python-dotenv = "^1.0.0"
-requests = "*"
-sentry-sdk = "^1.40.4"
-# UI dependencies
-rich = "^13.0"
-prompt-toolkit = "^3.0.0"
-
-# Benchmarking - use direct_benchmark instead (agbenchmark removed)
-
-[tool.poetry.extras]
-# benchmark extra removed - use direct_benchmark instead
-
-[tool.poetry.group.dev.dependencies]
-black = "^23.12.1"
-flake8 = "^7.0.0"
-isort = "^5.13.1"
-pre-commit = "*"
-pyright = "^1.1.364"
-
-# Type stubs
-types-colorama = "*"
-
-# Testing
-pytest = "*"
-pytest-asyncio = "*"
-pytest-cov = "*"
-pytest-mock = "*"
-pytest-recording = "*"
-pytest-xdist = "*"
-aiohttp-retry = "^2.9.1"
-
-[tool.poetry.group.build]
-optional = true
-
-[tool.poetry.group.build.dependencies]
-cx-freeze = { git = "https://github.com/ntindle/cx_Freeze.git", rev = "main" }
-# HACK: switch to cx-freeze release package after #2442 and #2472 are merged: https://github.com/marcelotduarte/cx_Freeze/pulls?q=is:pr+%232442+OR+%232472+
-# cx-freeze = { version = "^7.2.0", optional = true }
-
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
-
-
-[tool.black]
-line-length = 88
-target-version = ['py312']
-include = '\.pyi?$'
-
-
-[tool.isort]
-profile = "black"
-skip_glob = ["data"]
-
-
-[tool.pytest.ini_options]
-markers = [
-    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
-    "integration: marks tests as integration tests",
-    "requires_agent: marks tests that require a running agent with API keys",
-]
-asyncio_mode = "auto"
-
-
-[tool.pyright]
-pythonVersion = "3.12"
-exclude = ["data/**", "**/node_modules", "**/__pycache__", "**/.*"]
-ignore = ["../classic/forge/**"]
--- a/classic/original_autogpt/poetry.lock
+++ b/classic/original_autogpt/poetry.lock
--- a/classic/forge/pyproject.toml
+++ b/classic/forge/pyproject.toml
@@ -1,12 +1,12 @@
 [tool.poetry]
-name = "autogpt-forge"
-version = "0.2.0"
-description = "Core library for building autonomous AI agents"
-authors = ["AutoGPT <support@agpt.co>"]
+name = "autogpt-classic"
+version = "0.5.0"
+description = "AutoGPT Classic - autonomous agent framework"
+authors = ["Significant Gravitas <support@agpt.co>"]
 license = "MIT"
 readme = "README.md"
-packages = [{ include = "forge" }]
-keywords = ["autogpt", "ai", "agents", "autonomous", "llm"]
+homepage = "https://github.com/Significant-Gravitas/AutoGPT"
+keywords = ["autogpt", "ai", "agents", "autonomous", "llm", "gpt", "openai"]
 classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
@@ -16,83 +16,130 @@ classifiers = [
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+packages = [
+    { include = "forge", from = "forge" },
+    { include = "autogpt", from = "original_autogpt" },
+    { include = "direct_benchmark", from = "direct_benchmark" },
 ]

 [tool.poetry.urls]
-"Homepage" = "https://github.com/Significant-Gravitas/AutoGPT"
 "Bug Tracker" = "https://github.com/Significant-Gravitas/AutoGPT/issues"
+"Documentation" = "https://docs.agpt.co"
+
+[tool.poetry.scripts]
+autogpt = "autogpt.app.cli:cli"
+serve = "autogpt.app.cli:serve"
+direct-benchmark = "direct_benchmark.__main__:main"

 [tool.poetry.dependencies]
 python = "^3.12"
-# agbenchmark removed - use direct_benchmark instead
+
+# Core networking & web
 aiohttp = "^3.8.5"
-anthropic = "^0.45.0"
 beautifulsoup4 = "^4.12.2"
-boto3 = "^1.33.6"
-charset-normalizer = "^3.1.0"
-chromadb = "^1.4.0"
-click = "*"
-colorama = "^0.4.6"
-demjson3 = "^3.0.0"
-docker = "*"
-ddgs = "^9.9"
+requests = "*"
 trafilatura = "^2.0"
-en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
+
+# LLM providers
+anthropic = "^0.45.0"
+groq = "^0.8.0"
+litellm = "^1.17.9"
+openai = "^1.50.0"
+tiktoken = ">=0.7.0,<1.0.0"
+
+# Web frameworks
 fastapi = "^0.109.1"
-gitpython = "^3.1.32"
-google-api-python-client = "^2.116"
+hypercorn = "^0.14.4"
+uvicorn = { extras = ["standard"], version = ">=0.23.2,<1" }
+python-multipart = "^0.0.7"
+
+# Data processing
+charset-normalizer = "^3.1.0"
+demjson3 = "^3.0.0"
+jsonschema = "*"
+orjson = "^3.8.10"
+pydantic = "^2.7.2"
+pyyaml = "^6.0"
+toml = "^0.10.2"
+
+# Database & storage
+boto3 = "^1.33.6"
+chromadb = "^1.4.0"
 google-cloud-logging = "^3.8.0"
 google-cloud-storage = "^2.13.0"
-groq = "^0.8.0"
+sqlalchemy = "^2.0.19"
+
+# CLI & UI
+click = "^8.0"
+colorama = "^0.4.6"
+prompt-toolkit = "^3.0.0"
+rich = "^13.0"
+
+# Document processing
+pypdf = "^3.1.0"
+python-docx = "*"
+pylatexenc = "*"
+Pillow = "*"
+
+# NLP
+en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
+spacy = "^3.8.0"
+
+# Web automation & search
+ddgs = "^9.9"
+google-api-python-client = "^2.116"
+playwright = "^1.50.0"
+
+# Utilities
+distro = "^1.8.0"
+docker = "*"
+gitpython = "^3.1.32"
 gTTS = "^2.3.1"
 jinja2 = "^3.1.2"
-jsonschema = "*"
-litellm = "^1.17.9"
 numpy = ">=2.0.0"
-openai = "^1.50.0"
-Pillow = "*"
-playwright = "^1.50.0"
 playsound = "~1.2.2"
-pydantic = "^2.7.2"
-python-docx = "*"
 python-dotenv = "^1.0.0"
-python-multipart = "^0.0.7"
-pylatexenc = "*"
-pypdf = "^3.1.0"
-pyyaml = "^6.0"
-requests = "*"
-sqlalchemy = "^2.0.19"
 sentry-sdk = "^1.40.4"
-spacy = "^3.8.0"
 tenacity = "^8.2.2"
-tiktoken = ">=0.7.0,<1.0.0"
-toml = "^0.10.2"
-uvicorn = { extras = ["standard"], version = ">=0.23.2,<1" }
 watchdog = "^6.0.0"

-[tool.poetry.extras]
-# benchmark extra removed - use direct_benchmark instead
+# Testing (needed for direct_benchmark challenge evaluation)
+pytest = "^8.0"
+

 [tool.poetry.group.dev.dependencies]
+# Formatting & linting
 black = "^23.12.1"
 flake8 = "^7.0.0"
 isort = "^5.13.1"
-pyright = "^1.1.364"
 pre-commit = "^3.3.3"
+pyright = "^1.1.364"

 # Type stubs
 boto3-stubs = { extras = ["s3"], version = "^1.33.6" }
 google-api-python-client-stubs = "^1.24"
 types-beautifulsoup4 = "*"
+types-colorama = "*"
 types-Pillow = "*"
 types-requests = "^2.31.0.2"

 # Testing
-pytest = "^7.4.0"
 pytest-asyncio = "^0.23.3"
 pytest-cov = "^5.0.0"
 pytest-mock = "*"
+pytest-recording = "*"
+pytest-xdist = "*"
 mock = "^5.1.0"
+aiohttp-retry = "^2.9.1"
+
+
+[tool.poetry.group.build]
+optional = true
+
+[tool.poetry.group.build.dependencies]
+cx-freeze = { git = "https://github.com/ntindle/cx_Freeze.git", rev = "main" }


 [build-system]
@@ -108,13 +155,20 @@ include = '\.pyi?$'

 [tool.isort]
 profile = "black"
+skip_glob = ["data"]


 [tool.pyright]
 pythonVersion = "3.12"
+exclude = ["data/**", "**/node_modules", "**/__pycache__", "**/.*"]


 [tool.pytest.ini_options]
-pythonpath = ["forge"]
-testpaths = ["forge", "tests"]
-markers = ["slow"]
+pythonpath = ["."]
+testpaths = ["forge/forge", "forge/tests", "original_autogpt/tests"]
+asyncio_mode = "auto"
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "integration: marks tests as integration tests",
+    "requires_agent: marks tests that require a running agent with API keys",
+]