mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
refactor(classic): consolidate Poetry projects into single pyproject.toml
Merge forge/, original_autogpt/, and direct_benchmark/ into a single Poetry project to eliminate cross-project path dependency issues. Changes: - Create classic/pyproject.toml with merged dependencies from all three projects - Remove individual pyproject.toml and poetry.lock files from subdirectories - Update all CLAUDE.md files to reflect commands run from classic/ root - Update all README.md files with new installation and usage instructions All packages are now included via the packages directive: - forge/forge (core agent framework) - original_autogpt/autogpt (AutoGPT agent) - direct_benchmark/direct_benchmark (benchmark harness) CLI entry points preserved: autogpt, serve, direct-benchmark Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -9,41 +9,39 @@ AutoGPT Classic is an experimental, **unsupported** project demonstrating autono
|
||||
## Repository Structure
|
||||
|
||||
```
|
||||
/forge - Core autonomous agent framework (main library)
|
||||
/original_autogpt - Original AutoGPT implementation (depends on forge)
|
||||
/direct_benchmark - Benchmark harness for testing agent performance
|
||||
classic/
|
||||
├── pyproject.toml # Single consolidated Poetry project
|
||||
├── poetry.lock # Single lock file
|
||||
├── forge/
|
||||
│ └── forge/ # Core agent framework package
|
||||
├── original_autogpt/
|
||||
│ └── autogpt/ # AutoGPT agent package
|
||||
├── direct_benchmark/
|
||||
│ └── direct_benchmark/ # Benchmark harness package
|
||||
└── benchmark/ # Challenge definitions (data, not code)
|
||||
```
|
||||
|
||||
Each Python subproject has its own `pyproject.toml` and uses Poetry for dependency management.
|
||||
All packages are managed by a single `pyproject.toml` at the classic/ root.
|
||||
|
||||
## Common Commands
|
||||
|
||||
### Setup & Install
|
||||
```bash
|
||||
# Install forge (core library)
|
||||
cd forge && poetry install
|
||||
|
||||
# Install original_autogpt (includes forge as dependency)
|
||||
cd original_autogpt && poetry install
|
||||
|
||||
# Install benchmark
|
||||
cd benchmark && poetry install
|
||||
|
||||
# Install with benchmark support (optional extra)
|
||||
cd forge && poetry install --extras benchmark
|
||||
cd original_autogpt && poetry install --extras benchmark
|
||||
# Install everything from classic/ directory
|
||||
cd classic
|
||||
poetry install
|
||||
```
|
||||
|
||||
### Running Agents
|
||||
```bash
|
||||
# Run forge agent (from forge directory)
|
||||
cd forge && poetry run python -m forge
|
||||
# Run forge agent
|
||||
poetry run python -m forge
|
||||
|
||||
# Run original autogpt (from original_autogpt directory)
|
||||
cd original_autogpt && poetry run serve --debug
|
||||
# Run original autogpt server
|
||||
poetry run serve --debug
|
||||
|
||||
# Run autogpt CLI
|
||||
cd original_autogpt && poetry run autogpt
|
||||
poetry run autogpt
|
||||
```
|
||||
|
||||
Agents run on `http://localhost:8000` by default.
|
||||
@@ -51,33 +49,34 @@ Agents run on `http://localhost:8000` by default.
|
||||
### Benchmarking
|
||||
```bash
|
||||
# Run benchmarks
|
||||
cd direct_benchmark && poetry run python -m direct_benchmark run
|
||||
poetry run direct-benchmark run
|
||||
|
||||
# Run specific strategies and models
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,rewoo \
|
||||
--models claude \
|
||||
--parallel 4
|
||||
|
||||
# Run a single test
|
||||
poetry run python -m direct_benchmark run --tests ReadFile
|
||||
poetry run direct-benchmark run --tests ReadFile
|
||||
|
||||
# List available commands
|
||||
poetry run python -m direct_benchmark --help
|
||||
poetry run direct-benchmark --help
|
||||
```
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
cd forge && poetry run pytest # All tests
|
||||
cd forge && poetry run pytest tests/ # Tests directory only
|
||||
cd forge && poetry run pytest -k test_name # Single test by name
|
||||
cd forge && poetry run pytest path/to/test.py # Specific test file
|
||||
cd forge && poetry run pytest --cov # With coverage
|
||||
poetry run pytest # All tests
|
||||
poetry run pytest forge/tests/ # Forge tests only
|
||||
poetry run pytest original_autogpt/tests/ # AutoGPT tests only
|
||||
poetry run pytest -k test_name # Single test by name
|
||||
poetry run pytest path/to/test.py # Specific test file
|
||||
poetry run pytest --cov # With coverage
|
||||
```
|
||||
|
||||
### Linting & Formatting
|
||||
|
||||
Run from forge/ or original_autogpt/ directory:
|
||||
Run from the classic/ directory:
|
||||
|
||||
```bash
|
||||
# Format everything (recommended to run together)
|
||||
@@ -106,20 +105,21 @@ The `forge` package is the foundation that other components depend on:
|
||||
- `forge/config/` - Configuration management
|
||||
|
||||
### Original AutoGPT
|
||||
Depends on forge via local path (`autogpt-forge = { path = "../forge" }`):
|
||||
- `autogpt/app/` - CLI application entry points
|
||||
- `autogpt/agents/` - Agent implementations
|
||||
- `autogpt/agent_factory/` - Agent creation logic
|
||||
- `original_autogpt/autogpt/app/` - CLI application entry points
|
||||
- `original_autogpt/autogpt/agents/` - Agent implementations
|
||||
- `original_autogpt/autogpt/agent_factory/` - Agent creation logic
|
||||
|
||||
### Direct Benchmark
|
||||
Benchmark harness for testing agent performance:
|
||||
- `direct_benchmark/` - CLI and harness code
|
||||
- `direct_benchmark/direct_benchmark/` - CLI and harness code
|
||||
- `benchmark/agbenchmark/challenges/` - Test cases organized by category (code, retrieval, data, etc.)
|
||||
- Reports generated in `direct_benchmark/reports/`
|
||||
|
||||
### Dependency Chain
|
||||
`original_autogpt` → `forge`
|
||||
`direct_benchmark` → `original_autogpt` → `forge`
|
||||
### Package Structure
|
||||
All three packages are included in a single Poetry project. Imports are fully qualified:
|
||||
- `from forge.agent.base import BaseAgent`
|
||||
- `from autogpt.agents.agent import Agent`
|
||||
- `from direct_benchmark.harness import BenchmarkHarness`
|
||||
|
||||
## Code Style
|
||||
|
||||
|
||||
@@ -18,15 +18,21 @@ AutoGPT Classic was one of the first implementations of autonomous AI agents - A
|
||||
|
||||
## Structure
|
||||
|
||||
- `/benchmark` - Performance testing tools
|
||||
- `/forge` - Core autonomous agent framework
|
||||
- `/original_autogpt` - Original implementation
|
||||
```
|
||||
classic/
|
||||
├── pyproject.toml # Single consolidated Poetry project
|
||||
├── poetry.lock # Single lock file
|
||||
├── forge/ # Core autonomous agent framework
|
||||
├── original_autogpt/ # Original implementation
|
||||
├── direct_benchmark/ # Benchmark harness
|
||||
└── benchmark/ # Challenge definitions (data)
|
||||
```
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.10+
|
||||
- Python 3.12+
|
||||
- [Poetry](https://python-poetry.org/docs/#installation)
|
||||
|
||||
### Installation
|
||||
@@ -36,14 +42,8 @@ AutoGPT Classic was one of the first implementations of autonomous AI agents - A
|
||||
git clone https://github.com/Significant-Gravitas/AutoGPT.git
|
||||
cd classic
|
||||
|
||||
# Install forge (core library)
|
||||
cd forge && poetry install
|
||||
|
||||
# Or install original_autogpt (includes forge as dependency)
|
||||
cd original_autogpt && poetry install
|
||||
|
||||
# Install benchmark (optional)
|
||||
cd benchmark && poetry install
|
||||
# Install everything
|
||||
poetry install
|
||||
```
|
||||
|
||||
### Configuration
|
||||
@@ -81,15 +81,17 @@ FILE_STORAGE_BACKEND=local # local, s3, or gcs
|
||||
|
||||
### Running
|
||||
|
||||
All commands run from the `classic/` directory:
|
||||
|
||||
```bash
|
||||
# Run forge agent
|
||||
cd forge && poetry run python -m forge
|
||||
poetry run python -m forge
|
||||
|
||||
# Run original autogpt server
|
||||
cd original_autogpt && poetry run serve --debug
|
||||
poetry run serve --debug
|
||||
|
||||
# Run autogpt CLI
|
||||
cd original_autogpt && poetry run autogpt
|
||||
poetry run autogpt
|
||||
```
|
||||
|
||||
Agents run on `http://localhost:8000` by default.
|
||||
@@ -97,14 +99,15 @@ Agents run on `http://localhost:8000` by default.
|
||||
### Benchmarking
|
||||
|
||||
```bash
|
||||
cd benchmark && poetry run agbenchmark
|
||||
poetry run direct-benchmark run
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
cd forge && poetry run pytest
|
||||
cd original_autogpt && poetry run pytest
|
||||
poetry run pytest # All tests
|
||||
poetry run pytest forge/tests/ # Forge tests only
|
||||
poetry run pytest original_autogpt/tests/ # AutoGPT tests only
|
||||
```
|
||||
|
||||
## Workspaces
|
||||
|
||||
@@ -8,33 +8,35 @@ The Direct Benchmark Harness is a high-performance testing framework for AutoGPT
|
||||
|
||||
## Quick Reference
|
||||
|
||||
All commands run from the `classic/` directory (parent of this directory):
|
||||
|
||||
```bash
|
||||
# Install
|
||||
cd classic/direct_benchmark
|
||||
# Install (one-time setup)
|
||||
cd classic
|
||||
poetry install
|
||||
|
||||
# Run benchmarks
|
||||
poetry run python -m direct_benchmark run
|
||||
poetry run direct-benchmark run
|
||||
|
||||
# Run specific strategies and models
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,rewoo \
|
||||
--models claude,openai \
|
||||
--parallel 4
|
||||
|
||||
# Run a single test
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot \
|
||||
--tests ReadFile
|
||||
|
||||
# List available challenges
|
||||
poetry run python -m direct_benchmark list-challenges
|
||||
poetry run direct-benchmark list-challenges
|
||||
|
||||
# List model presets
|
||||
poetry run python -m direct_benchmark list-models
|
||||
poetry run direct-benchmark list-models
|
||||
|
||||
# List strategies
|
||||
poetry run python -m direct_benchmark list-strategies
|
||||
poetry run direct-benchmark list-strategies
|
||||
```
|
||||
|
||||
## CLI Options
|
||||
@@ -76,15 +78,15 @@ poetry run python -m direct_benchmark list-strategies
|
||||
### State Management Commands
|
||||
```bash
|
||||
# Show current state
|
||||
poetry run python -m direct_benchmark state show
|
||||
poetry run direct-benchmark state show
|
||||
|
||||
# Clear all state
|
||||
poetry run python -m direct_benchmark state clear
|
||||
poetry run direct-benchmark state clear
|
||||
|
||||
# Reset specific strategy/model/challenge
|
||||
poetry run python -m direct_benchmark state reset --strategy reflexion
|
||||
poetry run python -m direct_benchmark state reset --model claude-thinking-25k
|
||||
poetry run python -m direct_benchmark state reset --challenge ThreeSum
|
||||
poetry run direct-benchmark state reset --strategy reflexion
|
||||
poetry run direct-benchmark state reset --model claude-thinking-25k
|
||||
poetry run direct-benchmark state reset --challenge ThreeSum
|
||||
```
|
||||
|
||||
## Available Strategies
|
||||
@@ -228,7 +230,7 @@ reports/
|
||||
|
||||
### Run Full Benchmark Suite
|
||||
```bash
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,rewoo,plan_execute \
|
||||
--models claude \
|
||||
--parallel 8
|
||||
@@ -236,7 +238,7 @@ poetry run python -m direct_benchmark run \
|
||||
|
||||
### Compare Strategies
|
||||
```bash
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,rewoo,plan_execute,reflexion \
|
||||
--models claude \
|
||||
--tests ReadFile,WriteFile,ThreeSum
|
||||
@@ -244,7 +246,7 @@ poetry run python -m direct_benchmark run \
|
||||
|
||||
### Debug a Failing Test
|
||||
```bash
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot \
|
||||
--tests FailingTest \
|
||||
--keep-answers \
|
||||
@@ -257,29 +259,29 @@ State is saved to `.benchmark_state.json` in the reports directory.
|
||||
|
||||
```bash
|
||||
# Run benchmarks - will resume from last run automatically
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,reflexion \
|
||||
--models claude
|
||||
|
||||
# Start fresh (clear all saved state)
|
||||
poetry run python -m direct_benchmark run --fresh \
|
||||
poetry run direct-benchmark run --fresh \
|
||||
--strategies one_shot,reflexion \
|
||||
--models claude
|
||||
|
||||
# Reset specific strategy and re-run
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--reset-strategy reflexion \
|
||||
--strategies one_shot,reflexion \
|
||||
--models claude
|
||||
|
||||
# Reset specific model and re-run
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--reset-model claude-thinking-25k \
|
||||
--strategies one_shot \
|
||||
--models claude,claude-thinking-25k
|
||||
|
||||
# Retry only the failures from the last run
|
||||
poetry run python -m direct_benchmark run --retry-failures \
|
||||
poetry run direct-benchmark run --retry-failures \
|
||||
--strategies one_shot,reflexion \
|
||||
--models claude
|
||||
```
|
||||
@@ -287,9 +289,9 @@ poetry run python -m direct_benchmark run --retry-failures \
|
||||
### CI/Scripting Mode
|
||||
```bash
|
||||
# JSON output (parseable)
|
||||
poetry run python -m direct_benchmark run --json
|
||||
poetry run direct-benchmark run --json
|
||||
|
||||
# CI mode - shows completion blocks without Live display
|
||||
# Auto-enabled when CI=true env var is set or stdout is not a TTY
|
||||
poetry run python -m direct_benchmark run --ci
|
||||
poetry run direct-benchmark run --ci
|
||||
```
|
||||
|
||||
@@ -13,8 +13,10 @@ High-performance benchmark harness for AutoGPT that directly instantiates agents
|
||||
|
||||
## Installation
|
||||
|
||||
All commands run from the `classic/` directory (parent of this directory):
|
||||
|
||||
```bash
|
||||
cd classic/direct_benchmark
|
||||
cd classic
|
||||
poetry install
|
||||
```
|
||||
|
||||
@@ -22,41 +24,41 @@ poetry install
|
||||
|
||||
```bash
|
||||
# Run benchmarks with default settings
|
||||
poetry run python -m direct_benchmark run
|
||||
poetry run direct-benchmark run
|
||||
|
||||
# Run specific strategies and models
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,rewoo \
|
||||
--models claude,openai \
|
||||
--parallel 4
|
||||
|
||||
# Run a single test
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot \
|
||||
--tests ReadFile
|
||||
|
||||
# Run multiple attempts per challenge
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot \
|
||||
--attempts 3
|
||||
|
||||
# Run only regression tests (previously beaten)
|
||||
poetry run python -m direct_benchmark run --maintain
|
||||
poetry run direct-benchmark run --maintain
|
||||
|
||||
# Run only non-regression tests (not consistently beaten)
|
||||
poetry run python -m direct_benchmark run --improve
|
||||
poetry run direct-benchmark run --improve
|
||||
|
||||
# Run only never-beaten challenges
|
||||
poetry run python -m direct_benchmark run --explore
|
||||
poetry run direct-benchmark run --explore
|
||||
|
||||
# List available challenges
|
||||
poetry run python -m direct_benchmark list-challenges
|
||||
poetry run direct-benchmark list-challenges
|
||||
|
||||
# List model presets
|
||||
poetry run python -m direct_benchmark list-models
|
||||
poetry run direct-benchmark list-models
|
||||
|
||||
# List strategies
|
||||
poetry run python -m direct_benchmark list-strategies
|
||||
poetry run direct-benchmark list-strategies
|
||||
```
|
||||
|
||||
## CLI Options
|
||||
|
||||
7678
classic/direct_benchmark/poetry.lock
generated
7678
classic/direct_benchmark/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,42 +0,0 @@
|
||||
[tool.poetry]
|
||||
name = "direct-benchmark"
|
||||
version = "0.1.0"
|
||||
authors = ["Significant Gravitas <support@agpt.co>"]
|
||||
description = "Direct benchmark harness for AutoGPT - parallel execution without HTTP server"
|
||||
readme = "README.md"
|
||||
packages = [{ include = "direct_benchmark" }]
|
||||
|
||||
[tool.poetry.scripts]
|
||||
direct-benchmark = "direct_benchmark.__main__:main"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
# Core dependencies
|
||||
autogpt-forge = { path = "../forge", develop = true }
|
||||
autogpt = { path = "../original_autogpt", develop = true }
|
||||
click = "^8.0"
|
||||
pydantic = "^2.7.2"
|
||||
rich = "^13.0"
|
||||
# Required for evaluating challenges that use pytest-based tests
|
||||
pytest = "^8.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.12.1"
|
||||
isort = "^5.13.0"
|
||||
pyright = "^1.1.0"
|
||||
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
target-version = ['py312']
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
src_paths = ["direct_benchmark"]
|
||||
|
||||
[tool.pyright]
|
||||
pythonVersion = "3.12"
|
||||
typeCheckingMode = "basic"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
@@ -4,13 +4,15 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
||||
|
||||
## Quick Reference
|
||||
|
||||
All commands run from the `classic/` directory (parent of this directory):
|
||||
|
||||
```bash
|
||||
# Run forge agent server (port 8000)
|
||||
poetry run python -m forge
|
||||
|
||||
# Run tests
|
||||
poetry run pytest
|
||||
poetry run pytest --cov=forge
|
||||
# Run forge tests
|
||||
poetry run pytest forge/tests/
|
||||
poetry run pytest forge/tests/ --cov=forge
|
||||
poetry run pytest -k test_name
|
||||
```
|
||||
|
||||
@@ -317,9 +319,10 @@ GET /ap/v1/agent/tasks/{id}/artifacts # List artifacts
|
||||
**Fixtures** (`conftest.py`):
|
||||
- `storage` - Temporary LocalFileStorage
|
||||
|
||||
Run from the `classic/` directory:
|
||||
```bash
|
||||
poetry run pytest # All tests
|
||||
poetry run pytest --cov=forge # With coverage
|
||||
poetry run pytest forge/tests/ # All forge tests
|
||||
poetry run pytest forge/tests/ --cov=forge # With coverage
|
||||
```
|
||||
|
||||
**Note**: Tests requiring API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY) will be skipped if not set.
|
||||
|
||||
@@ -4,8 +4,11 @@ Core autonomous agent framework for building AI agents.
|
||||
|
||||
## Quick Start
|
||||
|
||||
All commands run from the `classic/` directory (parent of this directory):
|
||||
|
||||
```bash
|
||||
# Install
|
||||
# Install (one-time setup)
|
||||
cd classic
|
||||
poetry install
|
||||
|
||||
# Configure
|
||||
|
||||
8126
classic/forge/poetry.lock
generated
8126
classic/forge/poetry.lock
generated
File diff suppressed because one or more lines are too long
@@ -4,6 +4,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
||||
|
||||
## Quick Reference
|
||||
|
||||
All commands run from the `classic/` directory (parent of this directory):
|
||||
|
||||
```bash
|
||||
# Run interactive CLI
|
||||
poetry run autogpt run
|
||||
@@ -12,8 +14,8 @@ poetry run autogpt run
|
||||
poetry run serve --debug
|
||||
|
||||
# Run tests
|
||||
poetry run pytest
|
||||
poetry run pytest tests/unit/ -v
|
||||
poetry run pytest original_autogpt/tests/
|
||||
poetry run pytest original_autogpt/tests/unit/ -v
|
||||
poetry run pytest -k test_name
|
||||
```
|
||||
|
||||
@@ -228,13 +230,13 @@ autogpt run [OPTIONS]
|
||||
- `llm_provider` - MultiProvider
|
||||
- `agent` - Fully initialized Agent
|
||||
|
||||
**Running**:
|
||||
**Running** (from `classic/` directory):
|
||||
```bash
|
||||
poetry run pytest # All tests
|
||||
poetry run pytest tests/unit/ -v # Unit tests
|
||||
poetry run pytest tests/integration/ # Integration tests
|
||||
poetry run pytest -k test_config # By name
|
||||
OPENAI_API_KEY=sk-dummy poetry run pytest # With dummy key
|
||||
poetry run pytest original_autogpt/tests/ # All tests
|
||||
poetry run pytest original_autogpt/tests/unit/ -v # Unit tests
|
||||
poetry run pytest original_autogpt/tests/integration/ # Integration tests
|
||||
poetry run pytest -k test_config # By name
|
||||
OPENAI_API_KEY=sk-dummy poetry run pytest original_autogpt/ # With dummy key
|
||||
```
|
||||
|
||||
## Common Tasks
|
||||
@@ -275,26 +277,23 @@ TEMPERATURE=0.7
|
||||
|
||||
## Benchmarking
|
||||
|
||||
Use `direct_benchmark` (sibling directory) to run performance benchmarks:
|
||||
Run performance benchmarks from the `classic/` directory:
|
||||
|
||||
```bash
|
||||
# From classic/direct_benchmark/
|
||||
cd ../direct_benchmark
|
||||
|
||||
# Run a single test
|
||||
poetry run python -m direct_benchmark run --tests ReadFile
|
||||
poetry run direct-benchmark run --tests ReadFile
|
||||
|
||||
# Run with specific strategies and models
|
||||
poetry run python -m direct_benchmark run \
|
||||
poetry run direct-benchmark run \
|
||||
--strategies one_shot,rewoo \
|
||||
--models claude \
|
||||
--parallel 4
|
||||
|
||||
# Run regression tests only
|
||||
poetry run python -m direct_benchmark run --maintain
|
||||
poetry run direct-benchmark run --maintain
|
||||
|
||||
# List available challenges
|
||||
poetry run python -m direct_benchmark list-challenges
|
||||
poetry run direct-benchmark list-challenges
|
||||
```
|
||||
|
||||
See `direct_benchmark/CLAUDE.md` for full documentation on strategies, model presets, and CLI options.
|
||||
|
||||
@@ -26,13 +26,16 @@ Demo made by <a href=https://twitter.com/BlakeWerlinger>Blake Werlinger</a>
|
||||
## Setting up AutoGPT
|
||||
|
||||
### Prerequisites
|
||||
- Python 3.10+
|
||||
- Python 3.12+
|
||||
- [Poetry](https://python-poetry.org/docs/#installation)
|
||||
- OpenAI [API Key](https://platform.openai.com/account/api-keys)
|
||||
|
||||
### Installation
|
||||
|
||||
All commands run from the `classic/` directory (parent of this directory):
|
||||
|
||||
```bash
|
||||
cd classic
|
||||
poetry install
|
||||
cp .env.template .env
|
||||
# Edit .env with your OPENAI_API_KEY
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
[tool.poetry]
|
||||
name = "autogpt"
|
||||
version = "0.5.0"
|
||||
authors = ["Significant Gravitas <support@agpt.co>"]
|
||||
readme = "README.md"
|
||||
description = "An open-source attempt to make GPT-4 autonomous. Run 'autogpt' in any directory to start."
|
||||
homepage = "https://github.com/Significant-Gravitas/AutoGPT/tree/master/autogpt"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
]
|
||||
packages = [{ include = "autogpt" }]
|
||||
keywords = ["autogpt", "ai", "agents", "autonomous", "llm", "gpt", "openai"]
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Bug Tracker" = "https://github.com/Significant-Gravitas/AutoGPT/issues"
|
||||
"Documentation" = "https://docs.agpt.co"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
autogpt = "autogpt.app.cli:cli"
|
||||
serve = "autogpt.app.cli:serve"
|
||||
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
# For development, use local path; for publishing, use versioned package
|
||||
autogpt-forge = { path = "../forge", develop = true }
|
||||
# autogpt-forge = "^0.2.0" # Uncomment for PyPI release
|
||||
click = "*"
|
||||
colorama = "^0.4.6"
|
||||
distro = "^1.8.0"
|
||||
fastapi = "^0.109.1"
|
||||
gitpython = "^3.1.32"
|
||||
hypercorn = "^0.14.4"
|
||||
openai = "^1.50.0"
|
||||
orjson = "^3.8.10"
|
||||
pydantic = "^2.7.2"
|
||||
python-dotenv = "^1.0.0"
|
||||
requests = "*"
|
||||
sentry-sdk = "^1.40.4"
|
||||
# UI dependencies
|
||||
rich = "^13.0"
|
||||
prompt-toolkit = "^3.0.0"
|
||||
|
||||
# Benchmarking - use direct_benchmark instead (agbenchmark removed)
|
||||
|
||||
[tool.poetry.extras]
|
||||
# benchmark extra removed - use direct_benchmark instead
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.12.1"
|
||||
flake8 = "^7.0.0"
|
||||
isort = "^5.13.1"
|
||||
pre-commit = "*"
|
||||
pyright = "^1.1.364"
|
||||
|
||||
# Type stubs
|
||||
types-colorama = "*"
|
||||
|
||||
# Testing
|
||||
pytest = "*"
|
||||
pytest-asyncio = "*"
|
||||
pytest-cov = "*"
|
||||
pytest-mock = "*"
|
||||
pytest-recording = "*"
|
||||
pytest-xdist = "*"
|
||||
aiohttp-retry = "^2.9.1"
|
||||
|
||||
[tool.poetry.group.build]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.build.dependencies]
|
||||
cx-freeze = { git = "https://github.com/ntindle/cx_Freeze.git", rev = "main" }
|
||||
# HACK: switch to cx-freeze release package after #2442 and #2472 are merged: https://github.com/marcelotduarte/cx_Freeze/pulls?q=is:pr+%232442+OR+%232472+
|
||||
# cx-freeze = { version = "^7.2.0", optional = true }
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
target-version = ['py312']
|
||||
include = '\.pyi?$'
|
||||
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
skip_glob = ["data"]
|
||||
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
markers = [
|
||||
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||
"integration: marks tests as integration tests",
|
||||
"requires_agent: marks tests that require a running agent with API keys",
|
||||
]
|
||||
asyncio_mode = "auto"
|
||||
|
||||
|
||||
[tool.pyright]
|
||||
pythonVersion = "3.12"
|
||||
exclude = ["data/**", "**/node_modules", "**/__pycache__", "**/.*"]
|
||||
ignore = ["../classic/forge/**"]
|
||||
File diff suppressed because one or more lines are too long
@@ -1,12 +1,12 @@
|
||||
[tool.poetry]
|
||||
name = "autogpt-forge"
|
||||
version = "0.2.0"
|
||||
description = "Core library for building autonomous AI agents"
|
||||
authors = ["AutoGPT <support@agpt.co>"]
|
||||
name = "autogpt-classic"
|
||||
version = "0.5.0"
|
||||
description = "AutoGPT Classic - autonomous agent framework"
|
||||
authors = ["Significant Gravitas <support@agpt.co>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
packages = [{ include = "forge" }]
|
||||
keywords = ["autogpt", "ai", "agents", "autonomous", "llm"]
|
||||
homepage = "https://github.com/Significant-Gravitas/AutoGPT"
|
||||
keywords = ["autogpt", "ai", "agents", "autonomous", "llm", "gpt", "openai"]
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
@@ -16,83 +16,130 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
]
|
||||
packages = [
|
||||
{ include = "forge", from = "forge" },
|
||||
{ include = "autogpt", from = "original_autogpt" },
|
||||
{ include = "direct_benchmark", from = "direct_benchmark" },
|
||||
]
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Homepage" = "https://github.com/Significant-Gravitas/AutoGPT"
|
||||
"Bug Tracker" = "https://github.com/Significant-Gravitas/AutoGPT/issues"
|
||||
"Documentation" = "https://docs.agpt.co"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
autogpt = "autogpt.app.cli:cli"
|
||||
serve = "autogpt.app.cli:serve"
|
||||
direct-benchmark = "direct_benchmark.__main__:main"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
# agbenchmark removed - use direct_benchmark instead
|
||||
|
||||
# Core networking & web
|
||||
aiohttp = "^3.8.5"
|
||||
anthropic = "^0.45.0"
|
||||
beautifulsoup4 = "^4.12.2"
|
||||
boto3 = "^1.33.6"
|
||||
charset-normalizer = "^3.1.0"
|
||||
chromadb = "^1.4.0"
|
||||
click = "*"
|
||||
colorama = "^0.4.6"
|
||||
demjson3 = "^3.0.0"
|
||||
docker = "*"
|
||||
ddgs = "^9.9"
|
||||
requests = "*"
|
||||
trafilatura = "^2.0"
|
||||
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
|
||||
|
||||
# LLM providers
|
||||
anthropic = "^0.45.0"
|
||||
groq = "^0.8.0"
|
||||
litellm = "^1.17.9"
|
||||
openai = "^1.50.0"
|
||||
tiktoken = ">=0.7.0,<1.0.0"
|
||||
|
||||
# Web frameworks
|
||||
fastapi = "^0.109.1"
|
||||
gitpython = "^3.1.32"
|
||||
google-api-python-client = "^2.116"
|
||||
hypercorn = "^0.14.4"
|
||||
uvicorn = { extras = ["standard"], version = ">=0.23.2,<1" }
|
||||
python-multipart = "^0.0.7"
|
||||
|
||||
# Data processing
|
||||
charset-normalizer = "^3.1.0"
|
||||
demjson3 = "^3.0.0"
|
||||
jsonschema = "*"
|
||||
orjson = "^3.8.10"
|
||||
pydantic = "^2.7.2"
|
||||
pyyaml = "^6.0"
|
||||
toml = "^0.10.2"
|
||||
|
||||
# Database & storage
|
||||
boto3 = "^1.33.6"
|
||||
chromadb = "^1.4.0"
|
||||
google-cloud-logging = "^3.8.0"
|
||||
google-cloud-storage = "^2.13.0"
|
||||
groq = "^0.8.0"
|
||||
sqlalchemy = "^2.0.19"
|
||||
|
||||
# CLI & UI
|
||||
click = "^8.0"
|
||||
colorama = "^0.4.6"
|
||||
prompt-toolkit = "^3.0.0"
|
||||
rich = "^13.0"
|
||||
|
||||
# Document processing
|
||||
pypdf = "^3.1.0"
|
||||
python-docx = "*"
|
||||
pylatexenc = "*"
|
||||
Pillow = "*"
|
||||
|
||||
# NLP
|
||||
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
|
||||
spacy = "^3.8.0"
|
||||
|
||||
# Web automation & search
|
||||
ddgs = "^9.9"
|
||||
google-api-python-client = "^2.116"
|
||||
playwright = "^1.50.0"
|
||||
|
||||
# Utilities
|
||||
distro = "^1.8.0"
|
||||
docker = "*"
|
||||
gitpython = "^3.1.32"
|
||||
gTTS = "^2.3.1"
|
||||
jinja2 = "^3.1.2"
|
||||
jsonschema = "*"
|
||||
litellm = "^1.17.9"
|
||||
numpy = ">=2.0.0"
|
||||
openai = "^1.50.0"
|
||||
Pillow = "*"
|
||||
playwright = "^1.50.0"
|
||||
playsound = "~1.2.2"
|
||||
pydantic = "^2.7.2"
|
||||
python-docx = "*"
|
||||
python-dotenv = "^1.0.0"
|
||||
python-multipart = "^0.0.7"
|
||||
pylatexenc = "*"
|
||||
pypdf = "^3.1.0"
|
||||
pyyaml = "^6.0"
|
||||
requests = "*"
|
||||
sqlalchemy = "^2.0.19"
|
||||
sentry-sdk = "^1.40.4"
|
||||
spacy = "^3.8.0"
|
||||
tenacity = "^8.2.2"
|
||||
tiktoken = ">=0.7.0,<1.0.0"
|
||||
toml = "^0.10.2"
|
||||
uvicorn = { extras = ["standard"], version = ">=0.23.2,<1" }
|
||||
watchdog = "^6.0.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
# benchmark extra removed - use direct_benchmark instead
|
||||
# Testing (needed for direct_benchmark challenge evaluation)
|
||||
pytest = "^8.0"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
# Formatting & linting
|
||||
black = "^23.12.1"
|
||||
flake8 = "^7.0.0"
|
||||
isort = "^5.13.1"
|
||||
pyright = "^1.1.364"
|
||||
pre-commit = "^3.3.3"
|
||||
pyright = "^1.1.364"
|
||||
|
||||
# Type stubs
|
||||
boto3-stubs = { extras = ["s3"], version = "^1.33.6" }
|
||||
google-api-python-client-stubs = "^1.24"
|
||||
types-beautifulsoup4 = "*"
|
||||
types-colorama = "*"
|
||||
types-Pillow = "*"
|
||||
types-requests = "^2.31.0.2"
|
||||
|
||||
# Testing
|
||||
pytest = "^7.4.0"
|
||||
pytest-asyncio = "^0.23.3"
|
||||
pytest-cov = "^5.0.0"
|
||||
pytest-mock = "*"
|
||||
pytest-recording = "*"
|
||||
pytest-xdist = "*"
|
||||
mock = "^5.1.0"
|
||||
aiohttp-retry = "^2.9.1"
|
||||
|
||||
|
||||
[tool.poetry.group.build]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.build.dependencies]
|
||||
cx-freeze = { git = "https://github.com/ntindle/cx_Freeze.git", rev = "main" }
|
||||
|
||||
|
||||
[build-system]
|
||||
@@ -108,13 +155,20 @@ include = '\.pyi?$'
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
skip_glob = ["data"]
|
||||
|
||||
|
||||
[tool.pyright]
|
||||
pythonVersion = "3.12"
|
||||
exclude = ["data/**", "**/node_modules", "**/__pycache__", "**/.*"]
|
||||
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = ["forge"]
|
||||
testpaths = ["forge", "tests"]
|
||||
markers = ["slow"]
|
||||
pythonpath = ["."]
|
||||
testpaths = ["forge/forge", "forge/tests", "original_autogpt/tests"]
|
||||
asyncio_mode = "auto"
|
||||
markers = [
|
||||
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||
"integration: marks tests as integration tests",
|
||||
"requires_agent: marks tests that require a running agent with API keys",
|
||||
]
|
||||
Reference in New Issue
Block a user