refactor(forge): simplify deeply nested error handling in Anthropic provider

- Extract _get_tool_error_message helper method - Replace 20+ levels of nesting with simple for loop - Improve readability of tool_result construction - Update benchmark poetry.lock Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
feat(forge): modernize web search with tiered provider system
2026-01-19 03:58:11 -05:00 · 2026-01-19 00:15:33 -06:00 · 2026-01-19 00:06:42 -06:00 · 2026-01-18 23:53:23 -06:00 · 2026-01-18 23:37:28 -06:00 · 2026-01-18 23:36:19 -06:00
365 changed files with 24558 additions and 168869 deletions
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -29,7 +29,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12", "3.13", "3.14"]
        platform-os: [ubuntu, macos, macos-arm64, windows]
    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

--- a/.github/workflows/classic-autogpts-ci.yml
+++ b/.github/workflows/classic-autogpts-ci.yml
@@ -11,9 +11,6 @@ on:
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
      - 'classic/benchmark/**'
-      - 'classic/run'
-      - 'classic/cli.py'
-      - 'classic/setup.py'
      - '!**/*.md'
  pull_request:
    branches: [ master, dev, release-* ]
@@ -22,9 +19,6 @@ on:
      - 'classic/original_autogpt/**'
      - 'classic/forge/**'
      - 'classic/benchmark/**'
-      - 'classic/run'
-      - 'classic/cli.py'
-      - 'classic/setup.py'
      - '!**/*.md'

 defaults:
@@ -59,10 +53,15 @@ jobs:
        run: |
          curl -sSL https://install.python-poetry.org | python -

+      - name: Install dependencies
+        working-directory: ./classic/${{ matrix.agent-name }}/
+        run: poetry install
+
      - name: Run regression tests
        run: |
-          ./run agent start ${{ matrix.agent-name }}
          cd ${{ matrix.agent-name }}
+          poetry run serve &
+          sleep 10  # Wait for server to start
          poetry run agbenchmark --mock --test=BasicRetrieval --test=Battleship --test=WebArenaTask_0
          poetry run agbenchmark --test=WriteFile
        env:
--- a/.github/workflows/classic-benchmark-ci.yml
+++ b/.github/workflows/classic-benchmark-ci.yml
@@ -23,7 +23,7 @@ defaults:
    shell: bash

 env:
-  min-python-version: '3.10'
+  min-python-version: '3.12'

 jobs:
  test:
@@ -33,7 +33,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12", "3.13", "3.14"]
        platform-os: [ubuntu, macos, macos-arm64, windows]
    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}
    defaults:
@@ -128,11 +128,16 @@ jobs:
        run: |
          curl -sSL https://install.python-poetry.org | python -

+      - name: Install agent dependencies
+        working-directory: classic/${{ matrix.agent-name }}
+        run: poetry install
+
      - name: Run regression tests
        working-directory: classic
        run: |
-          ./run agent start ${{ matrix.agent-name }}
          cd ${{ matrix.agent-name }}
+          poetry run python -m forge &
+          sleep 10  # Wait for server to start

          set +e # Ignore non-zero exit codes and continue execution
          echo "Running the following command: poetry run agbenchmark --maintain --mock"
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -31,7 +31,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.10"]
+        python-version: ["3.12", "3.13", "3.14"]
        platform-os: [ubuntu, macos, macos-arm64, windows]
    runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }}

--- a/.github/workflows/classic-frontend-ci.yml
+++ b/.github/workflows/classic-frontend-ci.yml
@@ -1,60 +0,0 @@
-name: Classic - Frontend CI/CD
-
-on:
-  push:
-    branches:
-      - master
-      - dev
-      - 'ci-test*' # This will match any branch that starts with "ci-test"
-    paths:
-      - 'classic/frontend/**'
-      - '.github/workflows/classic-frontend-ci.yml'
-  pull_request:
-    paths:
-      - 'classic/frontend/**'
-      - '.github/workflows/classic-frontend-ci.yml'
-
-jobs:
-  build:
-    permissions:
-      contents: write
-      pull-requests: write
-    runs-on: ubuntu-latest
-    env:
-      BUILD_BRANCH: ${{ format('classic-frontend-build/{0}', github.ref_name) }}
-
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-
-      - name: Setup Flutter
-        uses: subosito/flutter-action@v2
-        with:
-          flutter-version: '3.13.2'
-
-      - name: Build Flutter to Web
-        run: |
-          cd classic/frontend
-          flutter build web --base-href /app/
-
-      # - name: Commit and Push to ${{ env.BUILD_BRANCH }}
-      #   if: github.event_name == 'push'
-      #   run: |
-      #     git config --local user.email "action@github.com"
-      #     git config --local user.name "GitHub Action"
-      #     git add classic/frontend/build/web
-      #     git checkout -B ${{ env.BUILD_BRANCH }}
-      #     git commit -m "Update frontend build to ${GITHUB_SHA:0:7}" -a
-      #     git push -f origin ${{ env.BUILD_BRANCH }}
-
-      - name: Create PR ${{ env.BUILD_BRANCH }} -> ${{ github.ref_name }}
-        if: github.event_name == 'push'
-        uses: peter-evans/create-pull-request@v7
-        with:
-          add-paths: classic/frontend/build/web
-          base: ${{ github.ref_name }}
-          branch: ${{ env.BUILD_BRANCH }}
-          delete-branch: true
-          title: "Update frontend build in `${{ github.ref_name }}`"
-          body: "This PR updates the frontend build based on commit ${{ github.sha }}."
-          commit-message: "Update frontend build based on commit ${{ github.sha }}"
--- a/.github/workflows/classic-python-checks.yml
+++ b/.github/workflows/classic-python-checks.yml
@@ -59,7 +59,7 @@ jobs:
    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.10"
+      min-python-version: "3.12"

    strategy:
      matrix:
@@ -111,7 +111,7 @@ jobs:
    needs: get-changed-parts
    runs-on: ubuntu-latest
    env:
-      min-python-version: "3.10"
+      min-python-version: "3.12"

    strategy:
      matrix:
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 classic/original_autogpt/keys.py
 classic/original_autogpt/*.json
 auto_gpt_workspace/*
+.autogpt/
 *.mpeg
 .env
 # Root .env files
@@ -177,5 +178,5 @@ autogpt_platform/backend/settings.py

 *.ign.*
 .test-contents
-.claude/settings.local.json
+**/.claude/settings.local.json
 /autogpt_platform/backend/logs
--- a/classic/CLAUDE.md
+++ b/classic/CLAUDE.md
@@ -0,0 +1,134 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+AutoGPT Classic is an experimental, **unsupported** project demonstrating autonomous GPT-4 operation. Dependencies will not be updated, and the codebase contains known vulnerabilities. This is preserved for educational/historical purposes.
+
+## Repository Structure
+
+```
+/forge            - Core autonomous agent framework (main library)
+/original_autogpt - Original AutoGPT implementation (depends on forge)
+/benchmark        - Performance testing/benchmarking tools
+```
+
+Each Python subproject has its own `pyproject.toml` and uses Poetry for dependency management.
+
+## Common Commands
+
+### Setup & Install
+```bash
+# Install forge (core library)
+cd forge && poetry install
+
+# Install original_autogpt (includes forge as dependency)
+cd original_autogpt && poetry install
+
+# Install benchmark
+cd benchmark && poetry install
+
+# Install with benchmark support (optional extra)
+cd forge && poetry install --extras benchmark
+cd original_autogpt && poetry install --extras benchmark
+```
+
+### Running Agents
+```bash
+# Run forge agent (from forge directory)
+cd forge && poetry run python -m forge
+
+# Run original autogpt (from original_autogpt directory)
+cd original_autogpt && poetry run serve --debug
+
+# Run autogpt CLI
+cd original_autogpt && poetry run autogpt
+```
+
+Agents run on `http://localhost:8000` by default.
+
+### Benchmarking
+```bash
+# Run benchmarks against an agent
+cd benchmark && poetry run agbenchmark
+
+# Or from forge/original_autogpt with benchmark extra installed
+cd forge && poetry run agbenchmark
+cd original_autogpt && poetry run agbenchmark
+```
+
+### Testing
+```bash
+cd forge && poetry run pytest                    # All tests
+cd forge && poetry run pytest tests/             # Tests directory only
+cd forge && poetry run pytest -k test_name       # Single test by name
+cd forge && poetry run pytest path/to/test.py   # Specific test file
+cd forge && poetry run pytest --cov             # With coverage
+```
+
+### Linting & Formatting
+
+Run from forge/ or original_autogpt/ directory:
+
+```bash
+# Format everything (recommended to run together)
+poetry run black . && poetry run isort .
+
+# Check formatting (CI-style, no changes)
+poetry run black --check . && poetry run isort --check-only .
+
+# Lint
+poetry run flake8        # Style linting
+
+# Type check
+poetry run pyright       # Type checking (some errors are expected in infrastructure code)
+```
+
+Note: Always run linters over the entire directory, not specific files, for best results.
+
+## Architecture
+
+### Forge (Core Framework)
+The `forge` package is the foundation that other components depend on:
+- `forge/agent/` - Agent implementation and protocols
+- `forge/llm/` - Multi-provider LLM integrations (OpenAI, Anthropic, Groq, LiteLLM)
+- `forge/components/` - Reusable agent components
+- `forge/file_storage/` - File system abstraction
+- `forge/config/` - Configuration management
+
+### Original AutoGPT
+Depends on forge via local path (`autogpt-forge = { path = "../forge" }`):
+- `autogpt/app/` - CLI application entry points
+- `autogpt/agents/` - Agent implementations
+- `autogpt/agent_factory/` - Agent creation logic
+
+### Benchmark
+Independent testing framework for evaluating agent performance:
+- `agbenchmark/challenges/` - Test cases organized by category (code, retrieval, memory, etc.)
+- `agbenchmark/reports/` - Benchmark result reporting
+
+### Dependency Chain
+`original_autogpt` → `forge` ← `benchmark` (optional extra)
+
+## Code Style
+
+- Python 3.10 target
+- Line length: 88 characters (Black default)
+- Black for formatting, isort for imports (profile="black")
+- Type hints with Pyright checking
+
+## Testing Patterns
+
+- VCR cassettes in `/forge/tests/vcr_cassettes/` for HTTP mocking
+- Async support via pytest-asyncio
+- Fixtures defined in `conftest.py` files provide: `tmp_project_root`, `storage`, `config`, `llm_provider`, `agent`
+- Tests require `OPENAI_API_KEY` environment variable (defaults to "sk-dummy" for mocked tests)
+
+## Environment Setup
+
+Copy `.env.example` to `.env` in the relevant directory and add your API keys:
+```bash
+cp .env.example .env
+# Edit .env with your OPENAI_API_KEY, etc.
+```
--- a/classic/CLI-USAGE.md
+++ b/classic/CLI-USAGE.md
@@ -1,182 +0,0 @@
-## CLI Documentation
-
-This document describes how to interact with the project's CLI (Command Line Interface). It includes the types of outputs you can expect from each command. Note that the `agents stop` command will terminate any process running on port 8000.
-
-### 1. Entry Point for the CLI
-
-Running the `./run` command without any parameters will display the help message, which provides a list of available commands and options. Additionally, you can append `--help` to any command to view help information specific to that command.
-
-```sh
-./run
-```
-
-**Output**:
-
-```
-Usage: cli.py [OPTIONS] COMMAND [ARGS]...
-
-Options:
-  --help  Show this message and exit.
-
-Commands:
-  agent     Commands to create, start and stop agents
-  benchmark  Commands to start the benchmark and list tests and categories
-  setup      Installs dependencies needed for your system.
-```
-
-If you need assistance with any command, simply add the `--help` parameter to the end of your command, like so:
-
-```sh
-./run COMMAND --help
-```
-
-This will display a detailed help message regarding that specific command, including a list of any additional options and arguments it accepts.
-
-### 2. Setup Command
-
-```sh
-./run setup
-```
-
-**Output**:
-
-```
-Setup initiated
-Installation has been completed.
-```
-
-This command initializes the setup of the project.
-
-### 3. Agents Commands
-
-**a. List All Agents**
-
-```sh
-./run agent list
-```
-
-**Output**:
-
-```
-Available agents: 🤖
-        🐙 forge
-        🐙 autogpt
-```
-
-Lists all the available agents.
-
-**b. Create a New Agent**
-
-```sh
-./run agent create my_agent
-```
-
-**Output**:
-
-```
-🎉 New agent 'my_agent' created and switched to the new directory in agents folder.
-```
-
-Creates a new agent named 'my_agent'.
-
-**c. Start an Agent**
-
-```sh
-./run agent start my_agent
-```
-
-**Output**:
-
-```
-... (ASCII Art representing the agent startup)
-[Date and Time] [forge.sdk.db] [DEBUG] 🐛  Initializing AgentDB with database_string: sqlite:///agent.db
-[Date and Time] [forge.sdk.agent] [INFO] 📝  Agent server starting on http://0.0.0.0:8000
-```
-
-Starts the 'my_agent' and displays startup ASCII art and logs.
-
-**d. Stop an Agent**
-
-```sh
-./run agent stop
-```
-
-**Output**:
-
-```
-Agent stopped
-```
-
-Stops the running agent.
-
-### 4. Benchmark Commands
-
-**a. List Benchmark Categories**
-
-```sh
-./run benchmark categories list
-```
-
-**Output**:
-
-```
-Available categories: 📚
-        📖 code
-        📖 safety
-        📖 memory
-        ... (and so on)
-```
-
-Lists all available benchmark categories.
-
-**b. List Benchmark Tests**
-
-```sh
-./run benchmark tests list
-```
-
-**Output**:
-
-```
-Available tests: 📚
-        📖 interface
-                🔬 Search - TestSearch
-                🔬 Write File - TestWriteFile
-        ... (and so on)
-```
-
-Lists all available benchmark tests.
-
-**c. Show Details of a Benchmark Test**
-
-```sh
-./run benchmark tests details TestWriteFile
-```
-
-**Output**:
-
-```
-TestWriteFile
-------------
-
-        Category:  interface
-        Task:  Write the word 'Washington' to a .txt file
-        ... (and other details)
-```
-
-Displays the details of the 'TestWriteFile' benchmark test.
-
-**d. Start Benchmark for the Agent**
-
-```sh
-./run benchmark start my_agent
-```
-
-**Output**:
-
-```
-(more details about the testing process shown whilst the test are running)
-============= 13 failed, 1 passed in 0.97s ============...
-```
-
-Displays the results of the benchmark tests on 'my_agent'.
--- a/classic/Dockerfile.autogpt
+++ b/classic/Dockerfile.autogpt
@@ -2,7 +2,7 @@
 ARG BUILD_TYPE=dev

 # Use an official Python base image from the Docker Hub
-FROM python:3.10-slim AS autogpt-base
+FROM python:3.12-slim AS autogpt-base

 # Install browsers
 RUN apt-get update && apt-get install -y \
@@ -34,9 +34,6 @@ COPY original_autogpt/pyproject.toml original_autogpt/poetry.lock ./
 # Include forge so it can be used as a path dependency
 COPY forge/ ../forge

-# Include frontend
-COPY frontend/ ../frontend
-
 # Set the entrypoint
 ENTRYPOINT ["poetry", "run", "autogpt"]
 CMD []
--- a/classic/FORGE-QUICKSTART.md
+++ b/classic/FORGE-QUICKSTART.md
@@ -1,173 +0,0 @@
-# Quickstart Guide
-
-> For the complete getting started [tutorial series](https://aiedge.medium.com/autogpt-forge-e3de53cc58ec) <- click here
-
-Welcome to the Quickstart Guide! This guide will walk you through setting up, building, and running your own AutoGPT agent. Whether you're a seasoned AI developer or just starting out, this guide will provide you with the steps to jumpstart your journey in AI development with AutoGPT.
-
-## System Requirements
-
-This project supports Linux (Debian-based), Mac, and Windows Subsystem for Linux (WSL). If you use a Windows system, you must install WSL. You can find the installation instructions for WSL [here](https://learn.microsoft.com/en-us/windows/wsl/).
-
-
-## Getting Setup
-1. **Fork the Repository**
-   To fork the repository, follow these steps:
-   - Navigate to the main page of the repository.
-
-   ![Repository](../docs/content/imgs/quickstart/001_repo.png)
-   - In the top-right corner of the page, click Fork.
-
-   ![Create Fork UI](../docs/content/imgs/quickstart/002_fork.png)
-   - On the next page, select your GitHub account to create the fork.
-   - Wait for the forking process to complete. You now have a copy of the repository in your GitHub account.
-
-2. **Clone the Repository**
-   To clone the repository, you need to have Git installed on your system. If you don't have Git installed, download it from [here](https://git-scm.com/downloads). Once you have Git installed, follow these steps:
-   - Open your terminal.
-   - Navigate to the directory where you want to clone the repository.
-   - Run the git clone command for the fork you just created
-
-   ![Clone the Repository](../docs/content/imgs/quickstart/003_clone.png)
-
-   - Then open your project in your ide
-
-   ![Open the Project in your IDE](../docs/content/imgs/quickstart/004_ide.png)
-
-4. **Setup the Project**
-    Next, we need to set up the required dependencies. We have a tool to help you perform all the tasks on the repo.
-    It can be accessed by running the `run` command by typing `./run` in the terminal.
-
-    The first command you need to use is `./run setup.` This will guide you through setting up your system.
-    Initially, you will get instructions for installing Flutter and Chrome and setting up your GitHub access token like the following image:
-    
-    ![Setup the Project](../docs/content/imgs/quickstart/005_setup.png)
-
-### For Windows Users
-
-If you're a Windows user and experience issues after installing WSL, follow the steps below to resolve them. 
-
-#### Update WSL 
-Run the following command in Powershell or Command Prompt:
-1. Enable the optional WSL and Virtual Machine Platform components.
-2. Download and install the latest Linux kernel.
-3. Set WSL 2 as the default.
-4. Download and install the Ubuntu Linux distribution (a reboot may be required).
-
-```shell
-wsl --install
-```
-
-For more detailed information and additional steps, refer to [Microsoft's WSL Setup Environment Documentation](https://learn.microsoft.com/en-us/windows/wsl/setup/environment).
-
-#### Resolve FileNotFoundError or "No such file or directory" Errors
-When you run `./run setup`, if you encounter errors like `No such file or directory` or `FileNotFoundError`, it might be because Windows-style line endings (CRLF - Carriage Return Line Feed) are not compatible with Unix/Linux style line endings (LF - Line Feed).
-
-To resolve this, you can use the `dos2unix` utility to convert the line endings in your script from CRLF to LF. Here’s how to install and run `dos2unix` on the script:
-
-```shell
-sudo apt update
-sudo apt install dos2unix
-dos2unix ./run
-```
-
-After executing the above commands, running `./run setup` should work successfully. 
-
-#### Store Project Files within the WSL File System
-If you continue to experience issues, consider storing your project files within the WSL file system instead of the Windows file system. This method avoids path translations and permissions issues and provides a more consistent development environment.
-
-You can keep running the command to get feedback on where you are up to with your setup. 
-When setup has been completed, the command will return an output like this:
-
-![Setup Complete](../docs/content/imgs/quickstart/006_setup_complete.png)
-
-## Creating Your Agent
-
-After completing the setup, the next step is to create your agent template.
-Execute the command `./run agent create YOUR_AGENT_NAME`, where `YOUR_AGENT_NAME` should be replaced with your chosen name.
-
-Tips for naming your agent:
-* Give it its own unique name, or name it after yourself
-* Include an important aspect of your agent in the name, such as its purpose
-
-Examples: `SwiftyosAssistant`, `PwutsPRAgent`, `MySuperAgent`
-
-![Create an Agent](../docs/content/imgs/quickstart/007_create_agent.png)
-
-## Running your Agent
-
-Your agent can be started using the command: `./run agent start YOUR_AGENT_NAME`
-
-This starts the agent on the URL: `http://localhost:8000/`
-
-![Start the Agent](../docs/content/imgs/quickstart/009_start_agent.png)
-
-The front end can be accessed from `http://localhost:8000/`; first, you must log in using either a Google account or your GitHub account.
-
-![Login](../docs/content/imgs/quickstart/010_login.png)
-
-Upon logging in, you will get a page that looks something like this: your task history down the left-hand side of the page, and the 'chat' window to send tasks to your agent.
-
-![Login](../docs/content/imgs/quickstart/011_home.png)
-
-When you have finished with your agent or just need to restart it, use Ctl-C to end the session. Then, you can re-run the start command.
-
-If you are having issues and want to ensure the agent has been stopped, there is a `./run agent stop` command, which will kill the process using port 8000, which should be the agent. 
-
-## Benchmarking your Agent
-
-The benchmarking system can also be accessed using the CLI too:
-
-```bash
-agpt % ./run benchmark
-Usage: cli.py benchmark [OPTIONS] COMMAND [ARGS]...
-
-  Commands to start the benchmark and list tests and categories
-
-Options:
-  --help  Show this message and exit.
-
-Commands:
-  categories  Benchmark categories group command
-  start       Starts the benchmark command
-  tests       Benchmark tests group command
-agpt % ./run benchmark categories     
-Usage: cli.py benchmark categories [OPTIONS] COMMAND [ARGS]...
-
-  Benchmark categories group command
-
-Options:
-  --help  Show this message and exit.
-
-Commands:
-  list  List benchmark categories command
-agpt % ./run benchmark tests      
-Usage: cli.py benchmark tests [OPTIONS] COMMAND [ARGS]...
-
-  Benchmark tests group command
-
-Options:
-  --help  Show this message and exit.
-
-Commands:
-  details  Benchmark test details command
-  list     List benchmark tests command
-```
-
-The benchmark has been split into different categories of skills you can test your agent on. You can see what categories are available with
-```bash
-./run benchmark categories list
-# And what tests are available with
-./run benchmark tests list
-```
-
-![Login](../docs/content/imgs/quickstart/012_tests.png)
-
-
-Finally, you can run the benchmark with
-
-```bash
-./run benchmark start YOUR_AGENT_NAME
-
-```
-
->
--- a/classic/README.md
+++ b/classic/README.md
@@ -4,7 +4,7 @@ AutoGPT Classic was an experimental project to demonstrate autonomous GPT-4 oper

 ## Project Status

-⚠️ **This project is unsupported, and dependencies will not be updated. It was an experiment that has concluded its initial research phase. If you want to use AutoGPT, you should use the [AutoGPT Platform](/autogpt_platform)**
+**This project is unsupported, and dependencies will not be updated.** It was an experiment that has concluded its initial research phase. If you want to use AutoGPT, you should use the [AutoGPT Platform](/autogpt_platform).

 For those interested in autonomous AI agents, we recommend exploring more actively maintained alternatives or referring to this codebase for educational purposes only.

@@ -16,37 +16,76 @@ AutoGPT Classic was one of the first implementations of autonomous AI agents - A
 - Learn from the results and adjust its approach
 - Chain multiple actions together to achieve an objective

-## Key Features
-
- 🔄 Autonomous task chaining
- 🛠 Tool and API integration capabilities
- 💾 Memory management for context retention
- 🔍 Web browsing and information gathering
- 📝 File operations and content creation
- 🔄 Self-prompting and task breakdown
-
 ## Structure

-The project is organized into several key components:
 - `/benchmark` - Performance testing tools
 - `/forge` - Core autonomous agent framework
- `/frontend` - User interface components
 - `/original_autogpt` - Original implementation

 ## Getting Started

-While this project is no longer actively maintained, you can still explore the codebase:
+### Prerequisites
+
+- Python 3.10+
+- [Poetry](https://python-poetry.org/docs/#installation)
+
+### Installation

-1. Clone the repository:
 ```bash
+# Clone the repository
 git clone https://github.com/Significant-Gravitas/AutoGPT.git
 cd classic
+
+# Install forge (core library)
+cd forge && poetry install
+
+# Or install original_autogpt (includes forge as dependency)
+cd original_autogpt && poetry install
+
+# Install benchmark (optional)
+cd benchmark && poetry install
 ```

-2. Review the documentation:
- For reference, see the [documentation](https://docs.agpt.co). You can browse at the same point in time as this commit so the docs don't change.
- Check `CLI-USAGE.md` for command-line interface details
- Refer to `TROUBLESHOOTING.md` for common issues
+### Configuration
+
+Copy the example environment file and add your API keys:
+
+```bash
+cp .env.example .env
+# Edit .env with your OPENAI_API_KEY, etc.
+```
+
+### Running
+
+```bash
+# Run forge agent
+cd forge && poetry run python -m forge
+
+# Run original autogpt server
+cd original_autogpt && poetry run serve --debug
+
+# Run autogpt CLI
+cd original_autogpt && poetry run autogpt
+```
+
+Agents run on `http://localhost:8000` by default.
+
+### Benchmarking
+
+```bash
+cd benchmark && poetry run agbenchmark
+```
+
+### Testing
+
+```bash
+cd forge && poetry run pytest
+cd original_autogpt && poetry run pytest
+```
+
+## Security Notice
+
+This codebase has **known vulnerabilities** and issues with its dependencies. It will not be updated to new dependencies. Use for educational purposes only.

 ## License

@@ -55,27 +94,3 @@ This project segment is licensed under the MIT License - see the [LICENSE](LICEN
 ## Documentation

 Please refer to the [documentation](https://docs.agpt.co) for more detailed information about the project's architecture and concepts.
-You can browse at the same point in time as this commit so the docs don't change.
-
-## Historical Impact
-
-AutoGPT Classic played a significant role in advancing the field of autonomous AI agents:
- Demonstrated practical implementation of AI autonomy
- Inspired numerous derivative projects and research
- Contributed to the development of AI agent architectures
- Helped identify key challenges in AI autonomy
-
-## Security Notice
-
-If you're studying this codebase, please understand this has KNOWN vulnerabilities and issues with its dependencies. It will not be updated to new dependencies.
-
-## Community & Support
-
-While active development has concluded:
- The codebase remains available for study and reference
- Historical discussions can be found in project issues
- Related research and developments continue in the broader AI agent community
-
-## Acknowledgments
-
-Thanks to all contributors who participated in this experimental project and helped advance the field of autonomous AI agents.
--- a/classic/benchmark/poetry.lock
+++ b/classic/benchmark/poetry.lock
--- a/classic/benchmark/pyproject.toml
+++ b/classic/benchmark/pyproject.toml
@@ -8,14 +8,14 @@ readme = "README.md"
 packages = [{ include = "agbenchmark" }]

 [tool.poetry.dependencies]
-python = "^3.10"
+python = "^3.12"
 agent-protocol-client = {git = "https://github.com/Significant-Gravitas/agent-protocol.git", subdirectory = "packages/client/python"}
 click = "^8.1.3"
 click-default-group = "^1.2.4"
 colorama = "^0.4.6"
 fastapi = "^0.109.1"
 gitpython = "^3.1.32"
-httpx = "^0.24.0"
+httpx = ">=0.27.0"
 matplotlib = "^3.7.2"
 # Multidict 6.0.4 fails to install and is a dependency of aiohttp which is a depenedency of agent-protocol-client
 multidict = "^6.0.5"
@@ -62,7 +62,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.black]
 line-length = 88
-target-version = ['py310']
+target-version = ['py312']
 include = '\.pyi?$'


@@ -72,7 +72,7 @@ skip_glob = ["reports"]


 [tool.pyright]
-pythonVersion = "3.10"
+pythonVersion = "3.12"
 exclude = [
    "notebooks/**",
    "reports/**",
--- a/classic/benchmark/run.sh
+++ b/classic/benchmark/run.sh
@@ -1,18 +0,0 @@
-# poetry install
-# poetry shell
-
-# cp .env.example .env
-# fill out OpenAI Key
-# git submodule update --init --remote --recursive
-
-# cd backend
-# pip install -r requirement.txt
-# uvicorn main:app --reload
-
-# cd ..
-
-# cd frontend 
-# npm install
-# npm run dev
-
-# localhost:3000
--- a/classic/cli.py
+++ b/classic/cli.py
@@ -1,511 +0,0 @@
-"""
-This is a minimal file intended to be run by users to help them manage the autogpt projects.
-
-If you want to contribute, please use only libraries that come as part of Python.
-To ensure efficiency, add the imports to the functions so only what is needed is imported.
-"""
-try:
-    import click
-except ImportError:
-    import os
-
-    os.system("pip3 install click")
-    import click
-
-
-@click.group()
-def cli():
-    pass
-
-
-@cli.command()
-def setup():
-    """Installs dependencies needed for your system. Works with Linux, MacOS and Windows WSL."""
-    import os
-    import subprocess
-
-    click.echo(
-        click.style(
-            """
-       d8888          888             .d8888b.  8888888b. 88888888888 
-      d88888          888            d88P  Y88b 888   Y88b    888     
-     d88P888          888            888    888 888    888    888     
-    d88P 888 888  888 888888 .d88b.  888        888   d88P    888     
-   d88P  888 888  888 888   d88""88b 888  88888 8888888P"     888     
-  d88P   888 888  888 888   888  888 888    888 888           888     
- d8888888888 Y88b 888 Y88b. Y88..88P Y88b  d88P 888           888     
-d88P     888  "Y88888  "Y888 "Y88P"   "Y8888P88 888           888     
-                                                                                                                                       
-""",
-            fg="green",
-        )
-    )
-
-    script_dir = os.path.dirname(os.path.realpath(__file__))
-    setup_script = os.path.join(script_dir, "setup.sh")
-    install_error = False
-    if os.path.exists(setup_script):
-        click.echo(click.style("🚀 Setup initiated...\n", fg="green"))
-        try:
-            subprocess.check_call([setup_script], cwd=script_dir)
-        except subprocess.CalledProcessError:
-            click.echo(
-                click.style("❌ There was an issue with the installation.", fg="red")
-            )
-            install_error = True
-    else:
-        click.echo(
-            click.style(
-                "❌ Error: setup.sh does not exist in the current directory.", fg="red"
-            )
-        )
-        install_error = True
-
-    if install_error:
-        click.echo(
-            click.style(
-                "\n\n🔴 If you need help, please raise a ticket on GitHub at https://github.com/Significant-Gravitas/AutoGPT/issues\n\n",
-                fg="magenta",
-                bold=True,
-            )
-        )
-    else:
-        click.echo(click.style("🎉 Setup completed!\n", fg="green"))
-
-
-@cli.group()
-def agent():
-    """Commands to create, start and stop agents"""
-    pass
-
-
-@agent.command()
-@click.argument("agent_name")
-def create(agent_name: str):
-    """Create's a new agent with the agent name provided"""
-    import os
-    import re
-    import shutil
-
-    if not re.match(r"\w*$", agent_name):
-        click.echo(
-            click.style(
-                f"😞 Agent name '{agent_name}' is not valid. It should not contain spaces or special characters other than -_",
-                fg="red",
-            )
-        )
-        return
-    try:
-        new_agent_dir = f"./agents/{agent_name}"
-        new_agent_name = f"{agent_name.lower()}.json"
-
-        if not os.path.exists(new_agent_dir):
-            shutil.copytree("./forge", new_agent_dir)
-            click.echo(
-                click.style(
-                    f"🎉 New agent '{agent_name}' created. The code for your new agent is in: agents/{agent_name}",
-                    fg="green",
-                )
-            )
-        else:
-            click.echo(
-                click.style(
-                    f"😞 Agent '{agent_name}' already exists. Enter a different name for your agent, the name needs to be unique regardless of case",
-                    fg="red",
-                )
-            )
-    except Exception as e:
-        click.echo(click.style(f"😢 An error occurred: {e}", fg="red"))
-
-
-@agent.command()
-@click.argument("agent_name")
-@click.option(
-    "--no-setup",
-    is_flag=True,
-    help="Disables running the setup script before starting the agent",
-)
-def start(agent_name: str, no_setup: bool):
-    """Start agent command"""
-    import os
-    import subprocess
-
-    script_dir = os.path.dirname(os.path.realpath(__file__))
-    agent_dir = os.path.join(
-        script_dir,
-        f"agents/{agent_name}"
-        if agent_name not in ["original_autogpt", "forge"]
-        else agent_name,
-    )
-    run_command = os.path.join(agent_dir, "run")
-    run_bench_command = os.path.join(agent_dir, "run_benchmark")
-    if (
-        os.path.exists(agent_dir)
-        and os.path.isfile(run_command)
-        and os.path.isfile(run_bench_command)
-    ):
-        os.chdir(agent_dir)
-        if not no_setup:
-            click.echo(f"⌛ Running setup for agent '{agent_name}'...")
-            setup_process = subprocess.Popen(["./setup"], cwd=agent_dir)
-            setup_process.wait()
-            click.echo()
-
-        # FIXME: Doesn't work: Command not found: agbenchmark
-        # subprocess.Popen(["./run_benchmark", "serve"], cwd=agent_dir)
-        # click.echo("⌛ (Re)starting benchmark server...")
-        # wait_until_conn_ready(8080)
-        # click.echo()
-
-        subprocess.Popen(["./run"], cwd=agent_dir)
-        click.echo(f"⌛ (Re)starting agent '{agent_name}'...")
-        wait_until_conn_ready(8000)
-        click.echo("✅ Agent application started and available on port 8000")
-    elif not os.path.exists(agent_dir):
-        click.echo(
-            click.style(
-                f"😞 Agent '{agent_name}' does not exist. Please create the agent first.",
-                fg="red",
-            )
-        )
-    else:
-        click.echo(
-            click.style(
-                f"😞 Run command does not exist in the agent '{agent_name}' directory.",
-                fg="red",
-            )
-        )
-
-
-@agent.command()
-def stop():
-    """Stop agent command"""
-    import os
-    import signal
-    import subprocess
-
-    try:
-        pids = subprocess.check_output(["lsof", "-t", "-i", ":8000"]).split()
-        if isinstance(pids, int):
-            os.kill(int(pids), signal.SIGTERM)
-        else:
-            for pid in pids:
-                os.kill(int(pid), signal.SIGTERM)
-    except subprocess.CalledProcessError:
-        click.echo("No process is running on port 8000")
-
-    try:
-        pids = int(subprocess.check_output(["lsof", "-t", "-i", ":8080"]))
-        if isinstance(pids, int):
-            os.kill(int(pids), signal.SIGTERM)
-        else:
-            for pid in pids:
-                os.kill(int(pid), signal.SIGTERM)
-    except subprocess.CalledProcessError:
-        click.echo("No process is running on port 8080")
-
-
-@agent.command()
-def list():
-    """List agents command"""
-    import os
-
-    try:
-        agents_dir = "./agents"
-        agents_list = [
-            d
-            for d in os.listdir(agents_dir)
-            if os.path.isdir(os.path.join(agents_dir, d))
-        ]
-        if os.path.isdir("./original_autogpt"):
-            agents_list.append("original_autogpt")
-        if agents_list:
-            click.echo(click.style("Available agents: 🤖", fg="green"))
-            for agent in agents_list:
-                click.echo(click.style(f"\t🐙 {agent}", fg="blue"))
-        else:
-            click.echo(click.style("No agents found 😞", fg="red"))
-    except FileNotFoundError:
-        click.echo(click.style("The agents directory does not exist 😢", fg="red"))
-    except Exception as e:
-        click.echo(click.style(f"An error occurred: {e} 😢", fg="red"))
-
-
-@cli.group()
-def benchmark():
-    """Commands to start the benchmark and list tests and categories"""
-    pass
-
-
-@benchmark.command(
-    context_settings=dict(
-        ignore_unknown_options=True,
-    )
-)
-@click.argument("agent_name")
-@click.argument("subprocess_args", nargs=-1, type=click.UNPROCESSED)
-def start(agent_name, subprocess_args):
-    """Starts the benchmark command"""
-    import os
-    import subprocess
-
-    script_dir = os.path.dirname(os.path.realpath(__file__))
-    agent_dir = os.path.join(
-        script_dir,
-        f"agents/{agent_name}"
-        if agent_name not in ["original_autogpt", "forge"]
-        else agent_name,
-    )
-    benchmark_script = os.path.join(agent_dir, "run_benchmark")
-    if os.path.exists(agent_dir) and os.path.isfile(benchmark_script):
-        os.chdir(agent_dir)
-        subprocess.Popen([benchmark_script, *subprocess_args], cwd=agent_dir)
-        click.echo(
-            click.style(
-                f"🚀 Running benchmark for '{agent_name}' with subprocess arguments: {' '.join(subprocess_args)}",
-                fg="green",
-            )
-        )
-    else:
-        click.echo(
-            click.style(
-                f"😞 Agent '{agent_name}' does not exist. Please create the agent first.",
-                fg="red",
-            )
-        )
-
-
-@benchmark.group(name="categories")
-def benchmark_categories():
-    """Benchmark categories group command"""
-    pass
-
-
-@benchmark_categories.command(name="list")
-def benchmark_categories_list():
-    """List benchmark categories command"""
-    import glob
-    import json
-    import os
-
-    categories = set()
-
-    # Get the directory of this file
-    this_dir = os.path.dirname(os.path.abspath(__file__))
-
-    glob_path = os.path.join(
-        this_dir,
-        "./benchmark/agbenchmark/challenges/**/[!deprecated]*/data.json",
-    )
-    # Use it as the base for the glob pattern, excluding 'deprecated' directory
-    for data_file in glob.glob(glob_path, recursive=True):
-        if "deprecated" not in data_file:
-            with open(data_file, "r") as f:
-                try:
-                    data = json.load(f)
-                    categories.update(data.get("category", []))
-                except json.JSONDecodeError:
-                    print(f"Error: {data_file} is not a valid JSON file.")
-                    continue
-                except IOError:
-                    print(f"IOError: file could not be read: {data_file}")
-                    continue
-
-    if categories:
-        click.echo(click.style("Available categories: 📚", fg="green"))
-        for category in categories:
-            click.echo(click.style(f"\t📖 {category}", fg="blue"))
-    else:
-        click.echo(click.style("No categories found 😞", fg="red"))
-
-
-@benchmark.group(name="tests")
-def benchmark_tests():
-    """Benchmark tests group command"""
-    pass
-
-
-@benchmark_tests.command(name="list")
-def benchmark_tests_list():
-    """List benchmark tests command"""
-    import glob
-    import json
-    import os
-    import re
-
-    tests = {}
-
-    # Get the directory of this file
-    this_dir = os.path.dirname(os.path.abspath(__file__))
-
-    glob_path = os.path.join(
-        this_dir,
-        "./benchmark/agbenchmark/challenges/**/[!deprecated]*/data.json",
-    )
-    # Use it as the base for the glob pattern, excluding 'deprecated' directory
-    for data_file in glob.glob(glob_path, recursive=True):
-        if "deprecated" not in data_file:
-            with open(data_file, "r") as f:
-                try:
-                    data = json.load(f)
-                    category = data.get("category", [])
-                    test_name = data.get("name", "")
-                    if category and test_name:
-                        if category[0] not in tests:
-                            tests[category[0]] = []
-                        tests[category[0]].append(test_name)
-                except json.JSONDecodeError:
-                    print(f"Error: {data_file} is not a valid JSON file.")
-                    continue
-                except IOError:
-                    print(f"IOError: file could not be read: {data_file}")
-                    continue
-
-    if tests:
-        click.echo(click.style("Available tests: 📚", fg="green"))
-        for category, test_list in tests.items():
-            click.echo(click.style(f"\t📖 {category}", fg="blue"))
-            for test in sorted(test_list):
-                test_name = (
-                    " ".join(word for word in re.split("([A-Z][a-z]*)", test) if word)
-                    .replace("_", "")
-                    .replace("C L I", "CLI")
-                    .replace("  ", " ")
-                )
-                test_name_padded = f"{test_name:<40}"
-                click.echo(click.style(f"\t\t🔬 {test_name_padded} - {test}", fg="cyan"))
-    else:
-        click.echo(click.style("No tests found 😞", fg="red"))
-
-
-@benchmark_tests.command(name="details")
-@click.argument("test_name")
-def benchmark_tests_details(test_name):
-    """Benchmark test details command"""
-    import glob
-    import json
-    import os
-
-    # Get the directory of this file
-    this_dir = os.path.dirname(os.path.abspath(__file__))
-
-    glob_path = os.path.join(
-        this_dir,
-        "./benchmark/agbenchmark/challenges/**/[!deprecated]*/data.json",
-    )
-    # Use it as the base for the glob pattern, excluding 'deprecated' directory
-    for data_file in glob.glob(glob_path, recursive=True):
-        with open(data_file, "r") as f:
-            try:
-                data = json.load(f)
-                if data.get("name") == test_name:
-                    click.echo(
-                        click.style(
-                            f"\n{data.get('name')}\n{'-'*len(data.get('name'))}\n",
-                            fg="blue",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\tCategory:  {', '.join(data.get('category'))}",
-                            fg="green",
-                        )
-                    )
-                    click.echo(click.style(f"\tTask:  {data.get('task')}", fg="green"))
-                    click.echo(
-                        click.style(
-                            f"\tDependencies:  {', '.join(data.get('dependencies')) if data.get('dependencies') else 'None'}",
-                            fg="green",
-                        )
-                    )
-                    click.echo(
-                        click.style(f"\tCutoff:  {data.get('cutoff')}\n", fg="green")
-                    )
-                    click.echo(
-                        click.style("\tTest Conditions\n\t-------", fg="magenta")
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tAnswer: {data.get('ground').get('answer')}",
-                            fg="magenta",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tShould Contain: {', '.join(data.get('ground').get('should_contain'))}",
-                            fg="magenta",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tShould Not Contain: {', '.join(data.get('ground').get('should_not_contain'))}",
-                            fg="magenta",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tFiles: {', '.join(data.get('ground').get('files'))}",
-                            fg="magenta",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tEval: {data.get('ground').get('eval').get('type')}\n",
-                            fg="magenta",
-                        )
-                    )
-                    click.echo(click.style("\tInfo\n\t-------", fg="yellow"))
-                    click.echo(
-                        click.style(
-                            f"\t\tDifficulty: {data.get('info').get('difficulty')}",
-                            fg="yellow",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tDescription: {data.get('info').get('description')}",
-                            fg="yellow",
-                        )
-                    )
-                    click.echo(
-                        click.style(
-                            f"\t\tSide Effects: {', '.join(data.get('info').get('side_effects'))}",
-                            fg="yellow",
-                        )
-                    )
-                    break
-
-            except json.JSONDecodeError:
-                print(f"Error: {data_file} is not a valid JSON file.")
-                continue
-            except IOError:
-                print(f"IOError: file could not be read: {data_file}")
-                continue
-
-
-def wait_until_conn_ready(port: int = 8000, timeout: int = 30):
-    """
-    Polls localhost:{port} until it is available for connections
-
-    Params:
-        port: The port for which to wait until it opens
-        timeout: Timeout in seconds; maximum amount of time to wait
-
-    Raises:
-        TimeoutError: If the timeout (seconds) expires before the port opens
-    """
-    import socket
-    import time
-
-    start = time.time()
-    while True:
-        time.sleep(0.5)
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            if s.connect_ex(("localhost", port)) == 0:
-                break
-        if time.time() > start + timeout:
-            raise TimeoutError(f"Port {port} did not open within {timeout} seconds")
-
-
-if __name__ == "__main__":
-    cli()
--- a/classic/forge/.env.example
+++ b/classic/forge/.env.example
@@ -1,6 +1,15 @@
 # Your OpenAI API Key. If GPT-4 is available it will use that, otherwise will use 3.5-turbo
 OPENAI_API_KEY=abc

+# Web Search API Keys (optional - will fall back to DuckDuckGo if not set)
+# Tavily - AI-optimized search with content extraction (recommended)
+# Get your key at https://tavily.com - 1000 free searches/month
+TAVILY_API_KEY=
+
+# Serper.dev - Fast, cheap Google SERP results
+# Get your key at https://serper.dev - 2500 free searches to start
+SERPER_API_KEY=
+
 # Control log level
 LOG_LEVEL=INFO
 DATABASE_STRING="sqlite:///agent.db"
--- a/classic/forge/CLAUDE.md
+++ b/classic/forge/CLAUDE.md
@@ -0,0 +1,423 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Quick Reference
+
+```bash
+# Run forge agent server (port 8000)
+poetry run python -m forge
+
+# Run tests
+poetry run pytest
+poetry run pytest --cov=forge
+poetry run pytest -k test_name
+```
+
+## Entry Point
+
+`__main__.py` → loads `.env` → configures logging → starts Uvicorn with hot-reload on port 8000
+
+The app is created in `app.py`:
+```python
+agent = ForgeAgent(database=database, workspace=workspace)
+app = agent.get_agent_app()
+```
+
+## Directory Structure
+
+```
+forge/
+├── __main__.py               # Entry: uvicorn server startup
+├── app.py                    # FastAPI app creation
+├── agent/                    # Core agent framework
+│   ├── base.py               # BaseAgent abstract class
+│   ├── forge_agent.py        # Reference implementation
+│   ├── components.py         # AgentComponent base classes
+│   └── protocols.py          # Protocol interfaces
+├── agent_protocol/           # Agent Protocol standard
+│   ├── agent.py              # ProtocolAgent mixin
+│   ├── api_router.py         # FastAPI routes
+│   └── database/             # Task/step persistence
+├── command/                  # Command system
+│   ├── command.py            # Command class
+│   ├── decorator.py          # @command decorator
+│   └── parameter.py          # CommandParameter
+├── components/               # Built-in components
+│   ├── action_history/       # Track & summarize actions
+│   ├── code_executor/        # Python & shell execution
+│   ├── context/              # File/folder context
+│   ├── file_manager/         # File operations
+│   ├── git_operations/       # Git commands
+│   ├── image_gen/            # DALL-E & SD
+│   ├── system/               # Core directives + finish
+│   ├── user_interaction/     # User prompts
+│   ├── watchdog/             # Loop detection
+│   └── web/                  # Search & Selenium
+├── config/                   # Configuration models
+├── llm/                      # LLM integration
+│   └── providers/            # OpenAI, Anthropic, Groq, etc.
+├── file_storage/             # Storage abstraction
+│   ├── base.py               # FileStorage ABC
+│   ├── local.py              # LocalFileStorage
+│   ├── s3.py                 # S3FileStorage
+│   └── gcs.py                # GCSFileStorage
+├── models/                   # Core data models
+├── content_processing/       # Text/HTML utilities
+├── logging/                  # Structured logging
+└── json/                     # JSON parsing utilities
+```
+
+## Core Abstractions
+
+### BaseAgent (`agent/base.py`)
+
+Abstract base for all agents. Generic over proposal type.
+
+```python
+class BaseAgent(Generic[AnyProposal], metaclass=AgentMeta):
+    def __init__(self, settings: BaseAgentSettings)
+```
+
+**Must Override:**
+```python
+async def propose_action(self) -> AnyProposal
+async def execute(self, proposal: AnyProposal, user_feedback: str) -> ActionResult
+async def do_not_execute(self, denied_proposal: AnyProposal, user_feedback: str) -> ActionResult
+```
+
+**Key Methods:**
+```python
+async def run_pipeline(protocol_method, *args, retry_limit=3) -> list
+# Executes protocol across all matching components with retry logic
+
+def dump_component_configs(self) -> str  # Serialize configs to JSON
+def load_component_configs(self, json: str)  # Restore configs
+```
+
+**Configuration (`BaseAgentConfiguration`):**
+```python
+fast_llm: ModelName = "gpt-3.5-turbo-16k"
+smart_llm: ModelName = "gpt-4"
+big_brain: bool = True              # Use smart_llm
+cycle_budget: Optional[int] = 1     # Steps before approval needed
+send_token_limit: Optional[int]     # Prompt token budget
+```
+
+### Component System (`agent/components.py`)
+
+**AgentComponent** - Base for all components:
+```python
+class AgentComponent(ABC):
+    _run_after: list[type[AgentComponent]] = []
+    _enabled: bool | Callable[[], bool] = True
+    _disabled_reason: str = ""
+
+    def run_after(self, *components) -> Self  # Set execution order
+    def enabled(self) -> bool                  # Check if active
+```
+
+**ConfigurableComponent** - Components with Pydantic config:
+```python
+class ConfigurableComponent(Generic[BM]):
+    config_class: ClassVar[type[BM]]  # Set in subclass
+
+    @property
+    def config(self) -> BM  # Get/create config from env
+```
+
+**Component Discovery:**
+1. Agent assigns components: `self.foo = FooComponent()`
+2. `AgentMeta.__call__` triggers `_collect_components()`
+3. Components are topologically sorted by `run_after` dependencies
+4. Disabled components skipped during pipeline execution
+
+### Protocols (`agent/protocols.py`)
+
+Protocols define what components CAN do:
+
+```python
+class DirectiveProvider(AgentComponent):
+    def get_constraints(self) -> Iterator[str]
+    def get_resources(self) -> Iterator[str]
+    def get_best_practices(self) -> Iterator[str]
+
+class CommandProvider(AgentComponent):
+    def get_commands(self) -> Iterator[Command]
+
+class MessageProvider(AgentComponent):
+    def get_messages(self) -> Iterator[ChatMessage]
+
+class AfterParse(AgentComponent, Generic[AnyProposal]):
+    def after_parse(self, result: AnyProposal) -> None
+
+class AfterExecute(AgentComponent):
+    def after_execute(self, result: ActionResult) -> None
+
+class ExecutionFailure(AgentComponent):
+    def execution_failure(self, error: Exception) -> None
+```
+
+**Pipeline execution:**
+```python
+results = await self.run_pipeline(CommandProvider.get_commands)
+# Iterates all components implementing CommandProvider
+# Collects all yielded Commands
+# Handles retries on ComponentEndpointError
+```
+
+## LLM Providers (`llm/providers/`)
+
+### MultiProvider
+
+Routes to correct provider based on model name:
+
+```python
+class MultiProvider:
+    async def create_chat_completion(
+        self,
+        model_prompt: list[ChatMessage],
+        model_name: ModelName,
+        **kwargs
+    ) -> ChatModelResponse
+
+    async def get_available_chat_models(self) -> Sequence[ChatModelInfo]
+```
+
+### Supported Models
+
+```python
+# OpenAI
+OpenAIModelName.GPT3, GPT3_16k, GPT4, GPT4_32k, GPT4_TURBO, GPT4_O
+
+# Anthropic
+AnthropicModelName.CLAUDE3_OPUS, CLAUDE3_SONNET, CLAUDE3_HAIKU
+AnthropicModelName.CLAUDE3_5_SONNET, CLAUDE3_5_SONNET_v2, CLAUDE3_5_HAIKU
+AnthropicModelName.CLAUDE4_SONNET, CLAUDE4_OPUS, CLAUDE4_5_OPUS
+
+# Groq
+GroqModelName.LLAMA3_8B, LLAMA3_70B, MIXTRAL_8X7B
+```
+
+### Key Types
+
+```python
+class ChatMessage(BaseModel):
+    role: Role  # USER, SYSTEM, ASSISTANT, TOOL, FUNCTION
+    content: str
+
+class AssistantFunctionCall(BaseModel):
+    name: str
+    arguments: dict[str, Any]
+
+class ChatModelResponse(BaseModel):
+    completion_text: str
+    function_calls: list[AssistantFunctionCall]
+```
+
+## File Storage (`file_storage/`)
+
+Abstract interface for file operations:
+
+```python
+class FileStorage(ABC):
+    def open_file(self, path, mode="r", binary=False) -> IO
+    def read_file(self, path, binary=False) -> str | bytes
+    async def write_file(self, path, content) -> None
+    def list_files(self, path=".") -> list[Path]
+    def list_folders(self, path=".", recursive=False) -> list[Path]
+    def delete_file(self, path) -> None
+    def exists(self, path) -> bool
+    def clone_with_subroot(self, subroot) -> FileStorage
+```
+
+**Implementations:** `LocalFileStorage`, `S3FileStorage`, `GCSFileStorage`
+
+## Command System (`command/`)
+
+### @command Decorator
+
+```python
+@command(
+    names=["greet", "hello"],
+    description="Greet a user",
+    parameters={
+        "name": JSONSchema(type=JSONSchema.Type.STRING, required=True),
+        "greeting": JSONSchema(type=JSONSchema.Type.STRING, required=False),
+    },
+)
+def greet(self, name: str, greeting: str = "Hello") -> str:
+    return f"{greeting}, {name}!"
+```
+
+### Providing Commands
+
+```python
+class MyComponent(CommandProvider):
+    def get_commands(self) -> Iterator[Command]:
+        yield self.greet  # Decorated method becomes Command
+```
+
+## Built-in Components
+
+| Component | Protocols | Purpose |
+|-----------|-----------|---------|
+| `SystemComponent` | DirectiveProvider, MessageProvider, CommandProvider | Core directives, `finish` command |
+| `FileManagerComponent` | DirectiveProvider, CommandProvider | read/write/list files |
+| `CodeExecutorComponent` | CommandProvider | Python & shell execution (Docker) |
+| `WebSearchComponent` | DirectiveProvider, CommandProvider | DuckDuckGo & Google search |
+| `WebSeleniumComponent` | CommandProvider | Browser automation |
+| `ActionHistoryComponent` | MessageProvider, AfterParse, AfterExecute | Track & summarize history |
+| `WatchdogComponent` | AfterParse | Loop detection, LLM switching |
+| `ContextComponent` | MessageProvider, CommandProvider | Keep files in prompt context |
+| `ImageGeneratorComponent` | CommandProvider | DALL-E, Stable Diffusion |
+| `GitOperationsComponent` | CommandProvider | Git commands |
+| `UserInteractionComponent` | CommandProvider | `ask_user` command |
+
+## Configuration
+
+### BaseAgentSettings
+
+```python
+class BaseAgentSettings(SystemSettings):
+    agent_id: str
+    ai_profile: AIProfile          # name, role, goals
+    directives: AIDirectives       # constraints, resources, best_practices
+    task: str
+    config: BaseAgentConfiguration
+```
+
+### UserConfigurable Fields
+
+```python
+class MyConfig(SystemConfiguration):
+    api_key: SecretStr = UserConfigurable(from_env="API_KEY", exclude=True)
+    max_retries: int = UserConfigurable(default=3, from_env="MAX_RETRIES")
+
+config = MyConfig.from_env()  # Load from environment
+```
+
+## Agent Protocol (`agent_protocol/`)
+
+REST API for task-based interaction:
+
+```
+POST /ap/v1/agent/tasks              # Create task
+GET  /ap/v1/agent/tasks              # List tasks
+GET  /ap/v1/agent/tasks/{id}         # Get task
+POST /ap/v1/agent/tasks/{id}/steps   # Execute step
+GET  /ap/v1/agent/tasks/{id}/steps   # List steps
+GET  /ap/v1/agent/tasks/{id}/artifacts  # List artifacts
+```
+
+**ProtocolAgent mixin** provides these endpoints + database persistence.
+
+## Testing
+
+**Fixtures** (`conftest.py`):
+- `storage` - Temporary LocalFileStorage
+- VCR cassettes in `tests/vcr_cassettes/`
+
+```bash
+poetry run pytest                    # All tests
+poetry run pytest --cov=forge        # With coverage
+poetry run pytest --record-mode=all  # Record HTTP cassettes
+```
+
+## Creating a Custom Component
+
+```python
+from forge.agent.components import AgentComponent, ConfigurableComponent
+from forge.agent.protocols import CommandProvider
+from forge.command import command
+from forge.models.json_schema import JSONSchema
+
+class MyConfig(BaseModel):
+    setting: str = "default"
+
+class MyComponent(CommandProvider, ConfigurableComponent[MyConfig]):
+    config_class = MyConfig
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.my_command
+
+    @command(
+        names=["mycmd"],
+        description="Do something",
+        parameters={"arg": JSONSchema(type=JSONSchema.Type.STRING, required=True)},
+    )
+    def my_command(self, arg: str) -> str:
+        return f"Result: {arg}"
+```
+
+## Creating a Custom Agent
+
+```python
+from forge.agent.forge_agent import ForgeAgent
+
+class MyAgent(ForgeAgent):
+    def __init__(self, database, workspace):
+        super().__init__(database, workspace)
+        self.my_component = MyComponent()
+
+    async def propose_action(self) -> ActionProposal:
+        # 1. Collect directives
+        constraints = await self.run_pipeline(DirectiveProvider.get_constraints)
+        resources = await self.run_pipeline(DirectiveProvider.get_resources)
+
+        # 2. Collect commands
+        commands = await self.run_pipeline(CommandProvider.get_commands)
+
+        # 3. Collect messages
+        messages = await self.run_pipeline(MessageProvider.get_messages)
+
+        # 4. Build prompt and call LLM
+        response = await self.llm_provider.create_chat_completion(
+            model_prompt=messages,
+            model_name=self.config.smart_llm,
+            functions=function_specs_from_commands(commands),
+        )
+
+        # 5. Parse and return proposal
+        return ActionProposal(
+            thoughts=response.completion_text,
+            use_tool=response.function_calls[0],
+            raw_message=AssistantChatMessage(content=response.completion_text),
+        )
+```
+
+## Key Patterns
+
+### Component Ordering
+```python
+self.component_a = ComponentA()
+self.component_b = ComponentB().run_after(self.component_a)
+```
+
+### Conditional Enabling
+```python
+self.search = WebSearchComponent()
+self.search._enabled = bool(os.getenv("GOOGLE_API_KEY"))
+self.search._disabled_reason = "No Google API key"
+```
+
+### Pipeline Retry Logic
+- `ComponentEndpointError` → retry same component (3x)
+- `EndpointPipelineError` → restart all components (3x)
+- `ComponentSystemError` → restart all pipelines
+
+## Key Files Reference
+
+| Purpose | Location |
+|---------|----------|
+| Entry point | `__main__.py` |
+| FastAPI app | `app.py` |
+| Base agent | `agent/base.py` |
+| Reference agent | `agent/forge_agent.py` |
+| Components base | `agent/components.py` |
+| Protocols | `agent/protocols.py` |
+| LLM providers | `llm/providers/` |
+| File storage | `file_storage/` |
+| Commands | `command/` |
+| Built-in components | `components/` |
+| Agent Protocol | `agent_protocol/` |
--- a/classic/forge/Dockerfile
+++ b/classic/forge/Dockerfile
@@ -1,5 +1,5 @@
 # Use an official Python runtime as a parent image
-FROM python:3.11-slim-buster as base
+FROM python:3.12-slim-bookworm as base

 # Set work directory in the container
 WORKDIR /app
--- a/classic/forge/forge/agent/base.py
+++ b/classic/forge/forge/agent/base.py
@@ -18,7 +18,7 @@ from typing import (
 )

 from colorama import Fore
-from pydantic import BaseModel, Field, ValidationInfo, field_validator
+from pydantic import BaseModel, Field
 from pydantic_core import from_json, to_json

 from forge.agent import protocols
@@ -34,6 +34,7 @@ from forge.llm.providers import CHAT_MODELS, ModelName, OpenAIModelName
 from forge.llm.providers.schema import ChatModelInfo
 from forge.models.action import ActionResult, AnyProposal
 from forge.models.config import SystemConfiguration, SystemSettings, UserConfigurable
+from forge.permissions import CommandPermissionManager

 logger = logging.getLogger(__name__)

@@ -52,7 +53,6 @@ class BaseAgentConfiguration(SystemConfiguration):

    fast_llm: ModelName = UserConfigurable(default=OpenAIModelName.GPT3_16k)
    smart_llm: ModelName = UserConfigurable(default=OpenAIModelName.GPT4)
-    use_functions_api: bool = UserConfigurable(default=False)

    default_cycle_instruction: str = DEFAULT_TRIGGERING_PROMPT
    """The default instruction passed to the AI for a thinking cycle."""
@@ -84,22 +84,6 @@ class BaseAgentConfiguration(SystemConfiguration):
    defaults to 75% of `llm.max_tokens`.
    """

-    @field_validator("use_functions_api")
-    def validate_openai_functions(cls, value: bool, info: ValidationInfo):
-        if value:
-            smart_llm = info.data["smart_llm"]
-            fast_llm = info.data["fast_llm"]
-            assert all(
-                [
-                    not any(s in name for s in {"-0301", "-0314"})
-                    for name in {smart_llm, fast_llm}
-                ]
-            ), (
-                f"Model {smart_llm} does not support OpenAI Functions. "
-                "Please disable OPENAI_FUNCTIONS or choose a suitable model."
-            )
-        return value
-

 class BaseAgentSettings(SystemSettings):
    agent_id: str = ""
@@ -130,10 +114,12 @@ class BaseAgent(Generic[AnyProposal], metaclass=AgentMeta):
    def __init__(
        self,
        settings: BaseAgentSettings,
+        permission_manager: Optional[CommandPermissionManager] = None,
    ):
        self.state = settings
        self.components: list[AgentComponent] = []
        self.config = settings.config
+        self.permission_manager = permission_manager
        # Execution data for debugging
        self._trace: list[str] = []

@@ -156,24 +142,21 @@ class BaseAgent(Generic[AnyProposal], metaclass=AgentMeta):
        return self.config.send_token_limit or self.llm.max_tokens * 3 // 4

    @abstractmethod
-    async def propose_action(self) -> AnyProposal:
-        ...
+    async def propose_action(self) -> AnyProposal: ...

    @abstractmethod
    async def execute(
        self,
        proposal: AnyProposal,
        user_feedback: str = "",
-    ) -> ActionResult:
-        ...
+    ) -> ActionResult: ...

    @abstractmethod
    async def do_not_execute(
        self,
        denied_proposal: AnyProposal,
        user_feedback: str,
-    ) -> ActionResult:
-        ...
+    ) -> ActionResult: ...

    def reset_trace(self):
        self._trace = []
@@ -181,8 +164,7 @@ class BaseAgent(Generic[AnyProposal], metaclass=AgentMeta):
    @overload
    async def run_pipeline(
        self, protocol_method: Callable[P, Iterator[T]], *args, retry_limit: int = 3
-    ) -> list[T]:
-        ...
+    ) -> list[T]: ...

    @overload
    async def run_pipeline(
@@ -190,8 +172,7 @@ class BaseAgent(Generic[AnyProposal], metaclass=AgentMeta):
        protocol_method: Callable[P, None | Awaitable[None]],
        *args,
        retry_limit: int = 3,
-    ) -> list[None]:
-        ...
+    ) -> list[None]: ...

    async def run_pipeline(
        self,
--- a/classic/forge/forge/agent/forge_agent.py
+++ b/classic/forge/forge/agent/forge_agent.py
@@ -19,7 +19,14 @@ from forge.agent_protocol.models.task import (
    TaskRequestBody,
 )
 from forge.command.command import Command
+from forge.components.archive_handler import ArchiveHandlerComponent
+from forge.components.clipboard import ClipboardComponent
+from forge.components.data_processor import DataProcessorComponent
+from forge.components.http_client import HTTPClientComponent
+from forge.components.math_utils import MathUtilsComponent
 from forge.components.system.system import SystemComponent
+from forge.components.text_utils import TextUtilsComponent
+from forge.components.todo import TodoComponent
 from forge.config.ai_profile import AIProfile
 from forge.file_storage.base import FileStorage
 from forge.llm.prompting.schema import ChatPrompt
@@ -82,6 +89,19 @@ class ForgeAgent(ProtocolAgent, BaseAgent):
        # System component provides "finish" command and adds some prompt information
        self.system = SystemComponent()

+        # Todo component provides task management for tracking multi-step work
+        # Note: llm_provider not available in ForgeAgent, so todo_decompose won't work
+        # For full functionality, use original_autogpt's Agent which has LLM access
+        self.todo = TodoComponent()
+
+        # Utility components
+        self.archive_handler = ArchiveHandlerComponent(workspace)
+        self.clipboard = ClipboardComponent()
+        self.data_processor = DataProcessorComponent()
+        self.http_client = HTTPClientComponent()
+        self.math_utils = MathUtilsComponent()
+        self.text_utils = TextUtilsComponent()
+
    async def create_task(self, task_request: TaskRequestBody) -> Task:
        """
        The agent protocol, which is the core of the Forge,
--- a/classic/forge/forge/components/action_history/action_history.py
+++ b/classic/forge/forge/components/action_history/action_history.py
@@ -91,6 +91,15 @@ class ActionHistoryComponent(

        yield from messages

+        # Include any pending user feedback (from approval + feedback scenarios)
+        # This feedback was provided when the user approved the command, so the
+        # command was executed successfully. Make this explicit to the agent.
+        pending_feedback = self.event_history.pop_pending_feedback()
+        for feedback in pending_feedback:
+            yield ChatMessage.user(
+                f"Command executed successfully. User feedback: {feedback}"
+            )
+
    def after_parse(self, result: AnyProposal) -> None:
        self.event_history.register_action(result)

@@ -133,7 +142,21 @@ class ActionHistoryComponent(
                )
            )
        else:
-            return ChatMessage.user(result.feedback)
+            # ActionInterruptedByHuman - user provided feedback instead of executing
+            # Must return ToolResultMessage to satisfy API requirements (both Anthropic
+            # and OpenAI require tool_use/function_call to be followed by tool_result)
+            feedback_content = (
+                f"Command not executed. User provided feedback: {result.feedback}"
+            )
+            return (
+                ToolResultMessage(
+                    content=feedback_content,
+                    is_error=True,
+                    tool_call_id=episode.action.raw_message.tool_calls[0].id,
+                )
+                if episode.action.raw_message.tool_calls
+                else ChatMessage.user(feedback_content)
+            )

    def _compile_progress(
        self,
--- a/classic/forge/forge/components/action_history/model.py
+++ b/classic/forge/forge/components/action_history/model.py
@@ -56,6 +56,8 @@ class EpisodicActionHistory(BaseModel, Generic[AnyProposal]):

    episodes: list[Episode[AnyProposal]] = Field(default_factory=list)
    cursor: int = 0
+    pending_user_feedback: list[str] = Field(default_factory=list)
+    """User feedback provided along with approval, for inclusion in next prompt"""
    _lock = asyncio.Lock()

    @property
@@ -89,6 +91,27 @@ class EpisodicActionHistory(BaseModel, Generic[AnyProposal]):
        self.current_episode.result = result
        self.cursor = len(self.episodes)

+    def append_user_feedback(self, feedback: str) -> None:
+        """Append user feedback to be included in the next prompt.
+
+        This is used when a user approves a command but also provides feedback.
+        The feedback will be sent to the agent in the next iteration.
+
+        Args:
+            feedback: The user's feedback text.
+        """
+        self.pending_user_feedback.append(feedback)
+
+    def pop_pending_feedback(self) -> list[str]:
+        """Get and clear all pending user feedback.
+
+        Returns:
+            List of feedback strings that were pending.
+        """
+        feedback = self.pending_user_feedback.copy()
+        self.pending_user_feedback.clear()
+        return feedback
+
    def rewind(self, number_of_episodes: int = 0) -> None:
        """Resets the history to an earlier state.

--- a/classic/forge/forge/components/archive_handler/init.py
+++ b/classic/forge/forge/components/archive_handler/init.py
@@ -0,0 +1,6 @@
+from forge.components.archive_handler.archive_handler import (
+    ArchiveHandlerComponent,
+    ArchiveHandlerConfiguration,
+)
+
+__all__ = ["ArchiveHandlerComponent", "ArchiveHandlerConfiguration"]
--- a/classic/forge/forge/components/archive_handler/archive_handler.py
+++ b/classic/forge/forge/components/archive_handler/archive_handler.py
@@ -0,0 +1,384 @@
+import json
+import logging
+import os
+import tarfile
+import zipfile
+from pathlib import Path
+from typing import Iterator, Optional
+
+from pydantic import BaseModel, Field
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider
+from forge.command import Command, command
+from forge.file_storage.base import FileStorage
+from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import CommandExecutionError
+
+logger = logging.getLogger(__name__)
+
+
+class ArchiveHandlerConfiguration(BaseModel):
+    max_archive_size: int = Field(
+        default=100 * 1024 * 1024,  # 100MB
+        description="Maximum archive size in bytes",
+    )
+    max_extracted_size: int = Field(
+        default=500 * 1024 * 1024,  # 500MB
+        description="Maximum total size of extracted files",
+    )
+    max_files: int = Field(
+        default=10000,
+        description="Maximum number of files in archive",
+    )
+
+
+class ArchiveHandlerComponent(
+    DirectiveProvider,
+    CommandProvider,
+    ConfigurableComponent[ArchiveHandlerConfiguration],
+):
+    """Provides commands to create, extract, and list archive files."""
+
+    config_class = ArchiveHandlerConfiguration
+
+    def __init__(
+        self,
+        workspace: FileStorage,
+        config: Optional[ArchiveHandlerConfiguration] = None,
+    ):
+        ConfigurableComponent.__init__(self, config)
+        self.workspace = workspace
+
+    def get_resources(self) -> Iterator[str]:
+        yield "Ability to create and extract zip/tar archives."
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.create_archive
+        yield self.extract_archive
+        yield self.list_archive
+
+    def _get_archive_type(self, path: str) -> str:
+        """Determine archive type from filename."""
+        path_lower = path.lower()
+        if path_lower.endswith(".zip"):
+            return "zip"
+        elif path_lower.endswith((".tar.gz", ".tgz")):
+            return "tar.gz"
+        elif path_lower.endswith((".tar.bz2", ".tbz2")):
+            return "tar.bz2"
+        elif path_lower.endswith(".tar"):
+            return "tar"
+        else:
+            return "unknown"
+
+    @command(
+        ["create_archive", "zip_files", "compress"],
+        "Create a zip or tar archive from files or directories.",
+        {
+            "output_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Output archive path (e.g. 'backup.zip')",
+                required=True,
+            ),
+            "source_paths": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                items=JSONSchema(type=JSONSchema.Type.STRING),
+                description="List of files or directories to archive",
+                required=True,
+            ),
+        },
+    )
+    def create_archive(self, output_path: str, source_paths: list[str]) -> str:
+        """Create an archive from specified files/directories.
+
+        Args:
+            output_path: Path for the output archive
+            source_paths: List of files/directories to include
+
+        Returns:
+            str: Success message with archive details
+        """
+        archive_type = self._get_archive_type(output_path)
+
+        if archive_type == "unknown":
+            raise CommandExecutionError(
+                "Unsupported archive format. Use .zip, .tar, .tar.gz, or .tar.bz2"
+            )
+
+        # Validate source paths exist
+        for path in source_paths:
+            if not self.workspace.exists(path):
+                raise CommandExecutionError(f"Source path '{path}' does not exist")
+
+        full_output = self.workspace.get_path(output_path)
+
+        # Create parent directory if needed
+        if directory := os.path.dirname(output_path):
+            self.workspace.make_dir(directory)
+
+        file_count = 0
+        total_size = 0
+
+        try:
+            if archive_type == "zip":
+                with zipfile.ZipFile(full_output, "w", zipfile.ZIP_DEFLATED) as zf:
+                    for source in source_paths:
+                        source_path = self.workspace.get_path(source)
+                        if source_path.is_file():
+                            zf.write(source_path, source)
+                            file_count += 1
+                            total_size += source_path.stat().st_size
+                        elif source_path.is_dir():
+                            for file in source_path.rglob("*"):
+                                if file.is_file():
+                                    arcname = str(
+                                        Path(source) / file.relative_to(source_path)
+                                    )
+                                    zf.write(file, arcname)
+                                    file_count += 1
+                                    total_size += file.stat().st_size
+            else:
+                # Tar formats
+                mode = "w"
+                if archive_type == "tar.gz":
+                    mode = "w:gz"
+                elif archive_type == "tar.bz2":
+                    mode = "w:bz2"
+
+                with tarfile.open(full_output, mode) as tf:
+                    for source in source_paths:
+                        source_path = self.workspace.get_path(source)
+                        tf.add(source_path, arcname=source)
+                        if source_path.is_file():
+                            file_count += 1
+                            total_size += source_path.stat().st_size
+                        else:
+                            for file in source_path.rglob("*"):
+                                if file.is_file():
+                                    file_count += 1
+                                    total_size += file.stat().st_size
+
+            archive_size = full_output.stat().st_size
+            compression_ratio = (
+                round((1 - archive_size / total_size) * 100, 1) if total_size > 0 else 0
+            )
+
+            return json.dumps(
+                {
+                    "archive": output_path,
+                    "type": archive_type,
+                    "files_added": file_count,
+                    "original_size_bytes": total_size,
+                    "archive_size_bytes": archive_size,
+                    "compression_ratio": f"{compression_ratio}%",
+                },
+                indent=2,
+            )
+
+        except Exception as e:
+            raise CommandExecutionError(f"Failed to create archive: {e}")
+
+    @command(
+        ["extract_archive", "unzip", "decompress"],
+        "Extract files from a zip or tar archive.",
+        {
+            "archive_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the archive file",
+                required=True,
+            ),
+            "destination": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Destination directory (default: current directory)",
+                required=False,
+            ),
+            "members": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                items=JSONSchema(type=JSONSchema.Type.STRING),
+                description="Specific files to extract (default: all)",
+                required=False,
+            ),
+        },
+    )
+    def extract_archive(
+        self,
+        archive_path: str,
+        destination: str = ".",
+        members: list[str] | None = None,
+    ) -> str:
+        """Extract files from an archive.
+
+        Args:
+            archive_path: Path to the archive
+            destination: Directory to extract to
+            members: Specific files to extract
+
+        Returns:
+            str: Success message with extraction details
+        """
+        if not self.workspace.exists(archive_path):
+            raise CommandExecutionError(f"Archive '{archive_path}' does not exist")
+
+        archive_type = self._get_archive_type(archive_path)
+        full_archive = self.workspace.get_path(archive_path)
+        full_dest = self.workspace.get_path(destination)
+
+        # Check archive size
+        archive_size = full_archive.stat().st_size
+        max_size = self.config.max_archive_size
+        if archive_size > max_size:
+            raise CommandExecutionError(
+                f"Archive too large: {archive_size} bytes (max: {max_size})"
+            )
+
+        # Create destination directory
+        self.workspace.make_dir(destination)
+
+        extracted_count = 0
+
+        try:
+            if archive_type == "zip":
+                with zipfile.ZipFile(full_archive, "r") as zf:
+                    # Security check for zip slip attack
+                    for name in zf.namelist():
+                        member_path = (full_dest / name).resolve()
+                        if not str(member_path).startswith(str(full_dest.resolve())):
+                            raise CommandExecutionError(
+                                f"Unsafe archive: '{name}' extracts outside dest"
+                            )
+
+                    # Check total uncompressed size
+                    total_size = sum(info.file_size for info in zf.infolist())
+                    if total_size > self.config.max_extracted_size:
+                        raise CommandExecutionError(
+                            f"Archive content too large: {total_size} bytes "
+                            f"(max: {self.config.max_extracted_size})"
+                        )
+
+                    if members:
+                        for member in members:
+                            zf.extract(member, full_dest)
+                            extracted_count += 1
+                    else:
+                        zf.extractall(full_dest)
+                        extracted_count = len(zf.namelist())
+
+            elif archive_type in ("tar", "tar.gz", "tar.bz2"):
+                mode = "r"
+                if archive_type == "tar.gz":
+                    mode = "r:gz"
+                elif archive_type == "tar.bz2":
+                    mode = "r:bz2"
+
+                with tarfile.open(full_archive, mode) as tf:
+                    # Security check for path traversal
+                    for member in tf.getmembers():
+                        member_path = (full_dest / member.name).resolve()
+                        if not str(member_path).startswith(str(full_dest.resolve())):
+                            raise CommandExecutionError(
+                                f"Unsafe archive: '{member.name}' extracts outside dest"
+                            )
+
+                    if members:
+                        for member in members:
+                            tf.extract(member, full_dest)
+                            extracted_count += 1
+                    else:
+                        tf.extractall(full_dest)
+                        extracted_count = len(tf.getmembers())
+            else:
+                raise CommandExecutionError(
+                    f"Unsupported archive format: {archive_type}"
+                )
+
+            return json.dumps(
+                {
+                    "archive": archive_path,
+                    "destination": destination,
+                    "files_extracted": extracted_count,
+                },
+                indent=2,
+            )
+
+        except (zipfile.BadZipFile, tarfile.TarError) as e:
+            raise CommandExecutionError(f"Invalid or corrupted archive: {e}")
+        except Exception as e:
+            raise CommandExecutionError(f"Extraction failed: {e}")
+
+    @command(
+        ["list_archive", "archive_contents"],
+        "List the contents of an archive without extracting.",
+        {
+            "archive_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the archive file",
+                required=True,
+            ),
+        },
+    )
+    def list_archive(self, archive_path: str) -> str:
+        """List contents of an archive.
+
+        Args:
+            archive_path: Path to the archive
+
+        Returns:
+            str: JSON with archive contents
+        """
+        if not self.workspace.exists(archive_path):
+            raise CommandExecutionError(f"Archive '{archive_path}' does not exist")
+
+        archive_type = self._get_archive_type(archive_path)
+        full_archive = self.workspace.get_path(archive_path)
+
+        contents = []
+
+        try:
+            if archive_type == "zip":
+                with zipfile.ZipFile(full_archive, "r") as zf:
+                    for info in zf.infolist():
+                        contents.append(
+                            {
+                                "name": info.filename,
+                                "size": info.file_size,
+                                "compressed_size": info.compress_size,
+                                "is_dir": info.is_dir(),
+                            }
+                        )
+            elif archive_type in ("tar", "tar.gz", "tar.bz2"):
+                mode = "r"
+                if archive_type == "tar.gz":
+                    mode = "r:gz"
+                elif archive_type == "tar.bz2":
+                    mode = "r:bz2"
+
+                with tarfile.open(full_archive, mode) as tf:
+                    for member in tf.getmembers():
+                        contents.append(
+                            {
+                                "name": member.name,
+                                "size": member.size,
+                                "is_dir": member.isdir(),
+                            }
+                        )
+            else:
+                raise CommandExecutionError(
+                    f"Unsupported archive format: {archive_type}"
+                )
+
+            total_size = sum(item.get("size", 0) for item in contents)
+
+            return json.dumps(
+                {
+                    "archive": archive_path,
+                    "type": archive_type,
+                    "file_count": len(contents),
+                    "total_size_bytes": total_size,
+                    "contents": contents,
+                },
+                indent=2,
+            )
+
+        except (zipfile.BadZipFile, tarfile.TarError) as e:
+            raise CommandExecutionError(f"Invalid or corrupted archive: {e}")
--- a/classic/forge/forge/components/clipboard/init.py
+++ b/classic/forge/forge/components/clipboard/init.py
@@ -0,0 +1,6 @@
+from forge.components.clipboard.clipboard import (
+    ClipboardComponent,
+    ClipboardConfiguration,
+)
+
+__all__ = ["ClipboardComponent", "ClipboardConfiguration"]
--- a/classic/forge/forge/components/clipboard/clipboard.py
+++ b/classic/forge/forge/components/clipboard/clipboard.py
@@ -0,0 +1,198 @@
+import json
+import logging
+from typing import Any, Iterator, Optional
+
+from pydantic import BaseModel, Field
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider
+from forge.command import Command, command
+from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import CommandExecutionError
+
+logger = logging.getLogger(__name__)
+
+
+class ClipboardConfiguration(BaseModel):
+    max_items: int = Field(
+        default=100, description="Maximum number of clipboard items to store"
+    )
+    max_value_size: int = Field(
+        default=1024 * 1024,  # 1MB
+        description="Maximum size of a single clipboard value in bytes",
+    )
+
+
+class ClipboardComponent(
+    DirectiveProvider, CommandProvider, ConfigurableComponent[ClipboardConfiguration]
+):
+    """In-memory clipboard for storing and retrieving data between commands."""
+
+    config_class = ClipboardConfiguration
+
+    def __init__(self, config: Optional[ClipboardConfiguration] = None):
+        ConfigurableComponent.__init__(self, config)
+        self._storage: dict[str, Any] = {}
+
+    def get_resources(self) -> Iterator[str]:
+        yield "In-memory clipboard for storing temporary data."
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.clipboard_copy
+        yield self.clipboard_paste
+        yield self.clipboard_list
+        yield self.clipboard_clear
+
+    @command(
+        ["clipboard_copy", "store", "remember"],
+        "Store a value in the clipboard with a key for later retrieval.",
+        {
+            "key": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="A unique key to identify this data",
+                required=True,
+            ),
+            "value": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The value to store (can be any string, including JSON)",
+                required=True,
+            ),
+        },
+    )
+    def clipboard_copy(self, key: str, value: str) -> str:
+        """Store a value in the clipboard.
+
+        Args:
+            key: The key to store under
+            value: The value to store
+
+        Returns:
+            str: Confirmation message
+        """
+        if not key:
+            raise CommandExecutionError("Key cannot be empty")
+
+        # Check value size
+        value_size = len(value.encode("utf-8"))
+        max_size = self.config.max_value_size
+        if value_size > max_size:
+            raise CommandExecutionError(
+                f"Value too large: {value_size} bytes (max: {max_size})"
+            )
+
+        # Check item limit (excluding update of existing key)
+        if key not in self._storage and len(self._storage) >= self.config.max_items:
+            raise CommandExecutionError(
+                f"Clipboard full: max {self.config.max_items} items. "
+                "Use clipboard_clear to remove items."
+            )
+
+        is_update = key in self._storage
+        self._storage[key] = value
+
+        action = "Updated" if is_update else "Stored"
+        return json.dumps(
+            {
+                "action": action.lower(),
+                "key": key,
+                "value_length": len(value),
+                "message": f"{action} value under key '{key}'",
+            }
+        )
+
+    @command(
+        ["clipboard_paste", "retrieve", "recall"],
+        "Retrieve a value from the clipboard by its key.",
+        {
+            "key": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The key of the value to retrieve",
+                required=True,
+            ),
+        },
+    )
+    def clipboard_paste(self, key: str) -> str:
+        """Retrieve a value from the clipboard.
+
+        Args:
+            key: The key to retrieve
+
+        Returns:
+            str: The stored value or error message
+        """
+        if key not in self._storage:
+            available = list(self._storage.keys())[:10]
+            raise CommandExecutionError(
+                f"Key '{key}' not found in clipboard. "
+                f"Available keys: {available if available else '(empty)'}"
+            )
+
+        value = self._storage[key]
+
+        return json.dumps({"key": key, "value": value, "found": True})
+
+    @command(
+        ["clipboard_list", "list_stored"],
+        "List all keys stored in the clipboard with their value lengths.",
+        {},
+    )
+    def clipboard_list(self) -> str:
+        """List all clipboard keys.
+
+        Returns:
+            str: JSON with all keys and metadata
+        """
+        items = []
+        for key, value in self._storage.items():
+            items.append(
+                {
+                    "key": key,
+                    "value_length": len(str(value)),
+                    "value_preview": str(value)[:50]
+                    + ("..." if len(str(value)) > 50 else ""),
+                }
+            )
+
+        return json.dumps(
+            {"count": len(items), "items": items, "max_items": self.config.max_items},
+            indent=2,
+        )
+
+    @command(
+        ["clipboard_clear", "forget"],
+        "Clear one or all items from the clipboard.",
+        {
+            "key": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Specific key to clear (omit to clear all)",
+                required=False,
+            ),
+        },
+    )
+    def clipboard_clear(self, key: str | None = None) -> str:
+        """Clear clipboard items.
+
+        Args:
+            key: Specific key to clear, or None to clear all
+
+        Returns:
+            str: Confirmation message
+        """
+        if key is not None:
+            if key not in self._storage:
+                raise CommandExecutionError(f"Key '{key}' not found in clipboard")
+
+            del self._storage[key]
+            return json.dumps(
+                {"action": "cleared", "key": key, "message": f"Removed key '{key}'"}
+            )
+        else:
+            count = len(self._storage)
+            self._storage.clear()
+            return json.dumps(
+                {
+                    "action": "cleared_all",
+                    "items_removed": count,
+                    "message": f"Cleared {count} item(s) from clipboard",
+                }
+            )
--- a/classic/forge/forge/components/code_executor/code_executor.py
+++ b/classic/forge/forge/components/code_executor/code_executor.py
@@ -18,6 +18,7 @@ from forge.command import Command, command
 from forge.file_storage import FileStorage
 from forge.models.json_schema import JSONSchema
 from forge.utils.exceptions import (
+    CodeTimeoutError,
    CommandExecutionError,
    InvalidArgumentError,
    OperationNotAllowedError,
@@ -126,9 +127,26 @@ class CodeExecutorComponent(
                description="The Python code to run",
                required=True,
            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (1-600, default: 120)",
+                minimum=1,
+                maximum=600,
+                required=False,
+            ),
+            "env_vars": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Environment variables to set for the execution",
+                required=False,
+            ),
        },
    )
-    async def execute_python_code(self, code: str) -> str:
+    async def execute_python_code(
+        self,
+        code: str,
+        timeout: int = 120,
+        env_vars: dict[str, str] | None = None,
+    ) -> str:
        """
        Create and execute a Python file in a Docker container
        and return the STDOUT of the executed code.
@@ -138,7 +156,8 @@ class CodeExecutorComponent(

        Args:
            code (str): The Python code to run.
-            agent (Agent): The Agent executing the command.
+            timeout (int): Timeout in seconds (default: 120).
+            env_vars (dict): Environment variables to set.

        Returns:
            str: The STDOUT captured from the code when it ran.
@@ -152,7 +171,9 @@ class CodeExecutorComponent(
        await self.workspace.write_file(temp_path, code)

        try:
-            return self.execute_python_file(temp_path)
+            return self.execute_python_file(
+                temp_path, timeout=timeout, env_vars=env_vars
+            )
        except Exception as e:
            raise CommandExecutionError(*e.args)
        finally:
@@ -174,14 +195,34 @@ class CodeExecutorComponent(
                required=False,
                items=JSONSchema(type=JSONSchema.Type.STRING),
            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (1-600, default: 120)",
+                minimum=1,
+                maximum=600,
+                required=False,
+            ),
+            "env_vars": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Environment variables to set for the execution",
+                required=False,
+            ),
        },
    )
-    def execute_python_file(self, filename: str | Path, args: list[str] = []) -> str:
+    def execute_python_file(
+        self,
+        filename: str | Path,
+        args: list[str] = [],
+        timeout: int = 120,
+        env_vars: dict[str, str] | None = None,
+    ) -> str:
        """Execute a Python file in a Docker container and return the output

        Args:
            filename (Path): The name of the file to execute
            args (list, optional): The arguments with which to run the python script
+            timeout (int): Timeout in seconds (default: 120)
+            env_vars (dict): Environment variables to set

        Returns:
            str: The output of the file
@@ -200,26 +241,42 @@ class CodeExecutorComponent(
                f"[Errno 2] No such file or directory"
            )

+        # Prepare environment variables
+        exec_env = os.environ.copy()
+        if env_vars:
+            exec_env.update(env_vars)
+
        if we_are_running_in_a_docker_container():
            logger.debug(
                "App is running in a Docker container; "
                f"executing {file_path} directly..."
            )
            with self.workspace.mount() as local_path:
-                result = subprocess.run(
-                    ["python", "-B", str(file_path.relative_to(self.workspace.root))]
-                    + args,
-                    capture_output=True,
-                    encoding="utf8",
-                    cwd=str(local_path),
-                )
+                try:
+                    result = subprocess.run(
+                        [
+                            "python",
+                            "-B",
+                            str(file_path.relative_to(self.workspace.root)),
+                        ]
+                        + args,
+                        capture_output=True,
+                        encoding="utf8",
+                        cwd=str(local_path),
+                        timeout=timeout,
+                        env=exec_env,
+                    )
+                except subprocess.TimeoutExpired:
+                    raise CodeTimeoutError(
+                        f"Python execution timed out after {timeout} seconds"
+                    )
                if result.returncode == 0:
                    return result.stdout
                else:
                    raise CodeExecutionError(result.stderr)

        logger.debug("App is not running in a Docker container")
-        return self._run_python_code_in_docker(file_path, args)
+        return self._run_python_code_in_docker(file_path, args, timeout, env_vars)

    def validate_command(self, command_line: str) -> tuple[bool, bool]:
        """Check whether a command is allowed and whether it may be executed in a shell.
@@ -255,14 +312,33 @@ class CodeExecutorComponent(
                type=JSONSchema.Type.STRING,
                description="The command line to execute",
                required=True,
-            )
+            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (1-600, default: 120)",
+                minimum=1,
+                maximum=600,
+                required=False,
+            ),
+            "working_dir": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Working directory (default: workspace root)",
+                required=False,
+            ),
        },
    )
-    def execute_shell(self, command_line: str) -> str:
+    def execute_shell(
+        self,
+        command_line: str,
+        timeout: int = 120,
+        working_dir: str | None = None,
+    ) -> str:
        """Execute a shell command and return the output

        Args:
            command_line (str): The command line to execute
+            timeout (int): Timeout in seconds (default: 120)
+            working_dir (str): Working directory for command execution

        Returns:
            str: The output of the command
@@ -272,25 +348,32 @@ class CodeExecutorComponent(
            logger.info(f"Command '{command_line}' not allowed")
            raise OperationNotAllowedError("This shell command is not allowed.")

-        current_dir = Path.cwd()
-        # Change dir into workspace if necessary
-        if not current_dir.is_relative_to(self.workspace.root):
-            os.chdir(self.workspace.root)
+        # Determine working directory
+        if working_dir:
+            exec_dir = self.workspace.get_path(working_dir)
+            if not exec_dir.exists():
+                raise InvalidArgumentError(
+                    f"Working directory '{working_dir}' does not exist."
+                )
+        else:
+            exec_dir = self.workspace.root

        logger.info(
-            f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
+            f"Executing command '{command_line}' in working directory '{exec_dir}'"
        )

-        result = subprocess.run(
-            command_line if allow_shell else shlex.split(command_line),
-            capture_output=True,
-            shell=allow_shell,
-        )
+        try:
+            result = subprocess.run(
+                command_line if allow_shell else shlex.split(command_line),
+                capture_output=True,
+                shell=allow_shell,
+                cwd=str(exec_dir),
+                timeout=timeout,
+            )
+        except subprocess.TimeoutExpired:
+            raise CodeTimeoutError(f"Shell command timed out after {timeout} seconds")
+
        output = f"STDOUT:\n{result.stdout.decode()}\nSTDERR:\n{result.stderr.decode()}"
-
-        # Change back to whatever the prior working dir was
-        os.chdir(current_dir)
-
        return output

    @command(
@@ -341,8 +424,24 @@ class CodeExecutorComponent(

        return f"Subprocess started with PID:'{str(process.pid)}'"

-    def _run_python_code_in_docker(self, filename: str | Path, args: list[str]) -> str:
-        """Run a Python script in a Docker container"""
+    def _run_python_code_in_docker(
+        self,
+        filename: str | Path,
+        args: list[str],
+        timeout: int = 120,
+        env_vars: dict[str, str] | None = None,
+    ) -> str:
+        """Run a Python script in a Docker container
+
+        Args:
+            filename: Path to the Python file
+            args: Command line arguments for the script
+            timeout: Timeout in seconds
+            env_vars: Environment variables to set
+
+        Returns:
+            str: The output of the script
+        """
        file_path = self.workspace.get_path(filename)
        try:
            client = docker.from_env()
@@ -376,10 +475,12 @@ class CodeExecutorComponent(
                            elif status:
                                logger.info(status)

+                    # Use timeout for container sleep duration
+                    sleep_duration = str(max(timeout, 60))
                    logger.debug(f"Creating new {image_name} container...")
                    container: DockerContainer = client.containers.run(
                        image_name,
-                        ["sleep", "60"],  # Max 60 seconds to prevent permanent hangs
+                        ["sleep", sleep_duration],
                        volumes={
                            str(local_path.resolve()): {
                                "bind": "/workspace",
@@ -391,6 +492,7 @@ class CodeExecutorComponent(
                        stdout=True,
                        detach=True,
                        name=container_name,
+                        environment=env_vars or {},
                    )  # type: ignore
                    container_is_fresh = True

@@ -401,6 +503,9 @@ class CodeExecutorComponent(

                logger.debug(f"Running {file_path} in container {container.name}...")

+                # Prepare environment for exec_run
+                exec_env = env_vars or {}
+
                exec_result = container.exec_run(
                    [
                        "python",
@@ -410,6 +515,7 @@ class CodeExecutorComponent(
                    + args,
                    stderr=True,
                    stdout=True,
+                    environment=exec_env,
                )

                if exec_result.exit_code != 0:
--- a/classic/forge/forge/components/data_processor/init.py
+++ b/classic/forge/forge/components/data_processor/init.py
@@ -0,0 +1,6 @@
+from forge.components.data_processor.data_processor import (
+    DataProcessorComponent,
+    DataProcessorConfiguration,
+)
+
+__all__ = ["DataProcessorComponent", "DataProcessorConfiguration"]
--- a/classic/forge/forge/components/data_processor/data_processor.py
+++ b/classic/forge/forge/components/data_processor/data_processor.py
@@ -0,0 +1,476 @@
+import csv
+import io
+import json
+import logging
+from typing import Any, Iterator, Literal, Optional
+
+from pydantic import BaseModel, Field
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider
+from forge.command import Command, command
+from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import DataProcessingError
+
+logger = logging.getLogger(__name__)
+
+
+class DataProcessorConfiguration(BaseModel):
+    max_json_depth: int = Field(
+        default=10, description="Maximum nesting depth for JSON parsing"
+    )
+    max_csv_rows: int = Field(
+        default=10000, description="Maximum rows to process in CSV operations"
+    )
+
+
+class DataProcessorComponent(
+    DirectiveProvider,
+    CommandProvider,
+    ConfigurableComponent[DataProcessorConfiguration],
+):
+    """Provides commands to parse, transform, and query structured data."""
+
+    config_class = DataProcessorConfiguration
+
+    def __init__(self, config: Optional[DataProcessorConfiguration] = None):
+        ConfigurableComponent.__init__(self, config)
+
+    def get_resources(self) -> Iterator[str]:
+        yield "Ability to parse and manipulate JSON and CSV data."
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.parse_json
+        yield self.format_json
+        yield self.query_json
+        yield self.parse_csv
+        yield self.filter_csv
+        yield self.aggregate_csv
+
+    @command(
+        ["parse_json", "validate_json"],
+        "Parse and validate a JSON string, returning a structured representation.",
+        {
+            "json_string": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The JSON string to parse",
+                required=True,
+            ),
+        },
+    )
+    def parse_json(self, json_string: str) -> str:
+        """Parse and validate a JSON string.
+
+        Args:
+            json_string: The JSON string to parse
+
+        Returns:
+            str: Parsed JSON as formatted string with type information
+        """
+        try:
+            data = json.loads(json_string)
+
+            # Provide type information
+            result = {
+                "valid": True,
+                "type": type(data).__name__,
+                "data": data,
+            }
+
+            if isinstance(data, list):
+                result["length"] = len(data)
+            elif isinstance(data, dict):
+                result["keys"] = list(data.keys())
+
+            return json.dumps(result, indent=2)
+
+        except json.JSONDecodeError as e:
+            return json.dumps(
+                {
+                    "valid": False,
+                    "error": str(e),
+                    "line": e.lineno,
+                    "column": e.colno,
+                },
+                indent=2,
+            )
+
+    @command(
+        ["format_json", "pretty_print_json"],
+        "Format JSON with proper indentation for readability.",
+        {
+            "json_string": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The JSON string to format",
+                required=True,
+            ),
+            "indent": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Number of spaces for indentation (default: 2)",
+                minimum=0,
+                maximum=8,
+                required=False,
+            ),
+        },
+    )
+    def format_json(self, json_string: str, indent: int = 2) -> str:
+        """Format JSON with proper indentation.
+
+        Args:
+            json_string: The JSON string to format
+            indent: Number of spaces for indentation
+
+        Returns:
+            str: Formatted JSON string
+        """
+        try:
+            data = json.loads(json_string)
+            return json.dumps(data, indent=indent, ensure_ascii=False)
+        except json.JSONDecodeError as e:
+            raise DataProcessingError(f"Invalid JSON: {e}")
+
+    def _query_path(self, data: Any, path: str) -> Any:
+        """Query JSON data using a dot-notation path with array support.
+
+        Args:
+            data: The data to query
+            path: Path like "users[0].name" or "config.settings.enabled"
+
+        Returns:
+            The value at the path
+        """
+        import re
+
+        if not path:
+            return data
+
+        # Split path into segments, handling array notation
+        segments = []
+        for part in path.split("."):
+            # Handle array notation like "users[0]"
+            array_match = re.match(r"^(\w+)\[(\d+)\]$", part)
+            if array_match:
+                segments.append(array_match.group(1))
+                segments.append(int(array_match.group(2)))
+            elif part.isdigit():
+                segments.append(int(part))
+            else:
+                segments.append(part)
+
+        result = data
+        for segment in segments:
+            try:
+                if isinstance(segment, int):
+                    result = result[segment]
+                elif isinstance(result, dict):
+                    result = result[segment]
+                elif isinstance(result, list) and segment.isdigit():
+                    result = result[int(segment)]
+                else:
+                    raise DataProcessingError(
+                        f"Cannot access '{segment}' on {type(result).__name__}"
+                    )
+            except (KeyError, IndexError, TypeError) as e:
+                raise DataProcessingError(f"Path query failed at '{segment}': {e}")
+
+        return result
+
+    @command(
+        ["query_json", "json_path"],
+        "Query JSON data using a dot-notation path (e.g., 'users[0].name').",
+        {
+            "json_string": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The JSON string to query",
+                required=True,
+            ),
+            "path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to query (e.g., 'data.users[0].email')",
+                required=True,
+            ),
+        },
+    )
+    def query_json(self, json_string: str, path: str) -> str:
+        """Query JSON using dot-notation path.
+
+        Args:
+            json_string: The JSON string to query
+            path: The path to query
+
+        Returns:
+            str: The value at the path as JSON
+        """
+        try:
+            data = json.loads(json_string)
+            result = self._query_path(data, path)
+            return json.dumps(result, indent=2)
+        except json.JSONDecodeError as e:
+            raise DataProcessingError(f"Invalid JSON: {e}")
+
+    @command(
+        ["parse_csv", "csv_to_json"],
+        "Parse CSV string into JSON array of objects.",
+        {
+            "csv_string": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The CSV string to parse",
+                required=True,
+            ),
+            "has_header": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Whether the first row is a header (default: True)",
+                required=False,
+            ),
+            "delimiter": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Field delimiter (default: ',')",
+                required=False,
+            ),
+        },
+    )
+    def parse_csv(
+        self, csv_string: str, has_header: bool = True, delimiter: str = ","
+    ) -> str:
+        """Parse CSV string into JSON.
+
+        Args:
+            csv_string: The CSV string to parse
+            has_header: Whether first row is header
+            delimiter: Field delimiter
+
+        Returns:
+            str: JSON array of objects or arrays
+        """
+        try:
+            reader = csv.reader(io.StringIO(csv_string), delimiter=delimiter)
+            rows = list(reader)
+
+            if len(rows) > self.config.max_csv_rows:
+                raise DataProcessingError(
+                    f"CSV exceeds maximum of {self.config.max_csv_rows} rows"
+                )
+
+            if not rows:
+                return json.dumps([])
+
+            if has_header:
+                headers = rows[0]
+                data = [dict(zip(headers, row)) for row in rows[1:]]
+            else:
+                data = rows
+
+            return json.dumps(data, indent=2)
+
+        except csv.Error as e:
+            raise DataProcessingError(f"CSV parsing error: {e}")
+
+    @command(
+        ["filter_csv", "csv_filter"],
+        "Filter CSV rows based on a column condition.",
+        {
+            "csv_string": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The CSV string to filter",
+                required=True,
+            ),
+            "column": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Column name or index to filter on",
+                required=True,
+            ),
+            "operator": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Comparison operator (eq, ne, gt, lt, gte, lte, contains)",
+                required=True,
+            ),
+            "value": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Value to compare against",
+                required=True,
+            ),
+        },
+    )
+    def filter_csv(
+        self,
+        csv_string: str,
+        column: str,
+        operator: Literal["eq", "ne", "gt", "lt", "gte", "lte", "contains"],
+        value: str,
+    ) -> str:
+        """Filter CSV rows based on a column condition.
+
+        Args:
+            csv_string: The CSV string to filter
+            column: Column name or index
+            operator: Comparison operator
+            value: Value to compare against
+
+        Returns:
+            str: Filtered CSV as JSON
+        """
+        # Parse CSV
+        data = json.loads(self.parse_csv(csv_string))
+
+        if not data:
+            return json.dumps([])
+
+        def compare(row_value: Any, op: str, comp_value: str) -> bool:
+            # Try numeric comparison
+            try:
+                row_num = float(row_value)
+                comp_num = float(comp_value)
+                if op == "eq":
+                    return row_num == comp_num
+                elif op == "ne":
+                    return row_num != comp_num
+                elif op == "gt":
+                    return row_num > comp_num
+                elif op == "lt":
+                    return row_num < comp_num
+                elif op == "gte":
+                    return row_num >= comp_num
+                elif op == "lte":
+                    return row_num <= comp_num
+            except (ValueError, TypeError):
+                pass
+
+            # String comparison
+            row_str = str(row_value).lower()
+            comp_str = comp_value.lower()
+
+            if op == "eq":
+                return row_str == comp_str
+            elif op == "ne":
+                return row_str != comp_str
+            elif op == "contains":
+                return comp_str in row_str
+            elif op in ("gt", "lt", "gte", "lte"):
+                # String comparison for non-numeric
+                if op == "gt":
+                    return row_str > comp_str
+                elif op == "lt":
+                    return row_str < comp_str
+                elif op == "gte":
+                    return row_str >= comp_str
+                elif op == "lte":
+                    return row_str <= comp_str
+
+            return False
+
+        filtered = []
+        for row in data:
+            if isinstance(row, dict):
+                if column in row:
+                    if compare(row[column], operator, value):
+                        filtered.append(row)
+            elif isinstance(row, list):
+                try:
+                    col_idx = int(column)
+                    if col_idx < len(row):
+                        if compare(row[col_idx], operator, value):
+                            filtered.append(row)
+                except ValueError:
+                    pass
+
+        return json.dumps(filtered, indent=2)
+
+    @command(
+        ["aggregate_csv", "csv_aggregate"],
+        "Aggregate data in a CSV column (sum, avg, min, max, count).",
+        {
+            "csv_string": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The CSV string to aggregate",
+                required=True,
+            ),
+            "column": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Column name to aggregate",
+                required=True,
+            ),
+            "operation": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Aggregation operation (sum, avg, min, max, count)",
+                required=True,
+            ),
+            "group_by": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Optional column to group by",
+                required=False,
+            ),
+        },
+    )
+    def aggregate_csv(
+        self,
+        csv_string: str,
+        column: str,
+        operation: Literal["sum", "avg", "min", "max", "count"],
+        group_by: str | None = None,
+    ) -> str:
+        """Aggregate data in a CSV column.
+
+        Args:
+            csv_string: The CSV string to aggregate
+            column: Column name to aggregate
+            operation: Aggregation operation
+            group_by: Optional grouping column
+
+        Returns:
+            str: Aggregation result as JSON
+        """
+        data = json.loads(self.parse_csv(csv_string))
+
+        if not data:
+            return json.dumps({"result": None, "error": "No data"})
+
+        def aggregate(values: list) -> float | int | None:
+            # Filter to numeric values
+            numeric = []
+            for v in values:
+                try:
+                    numeric.append(float(v))
+                except (ValueError, TypeError):
+                    continue
+
+            if not numeric:
+                if operation == "count":
+                    return len(values)
+                return None
+
+            if operation == "sum":
+                return sum(numeric)
+            elif operation == "avg":
+                return sum(numeric) / len(numeric)
+            elif operation == "min":
+                return min(numeric)
+            elif operation == "max":
+                return max(numeric)
+            elif operation == "count":
+                return len(values)
+            return None
+
+        if group_by:
+            # Group by operation
+            groups: dict[str, list] = {}
+            for row in data:
+                if isinstance(row, dict):
+                    key = str(row.get(group_by, ""))
+                    value = row.get(column)
+                    if key not in groups:
+                        groups[key] = []
+                    groups[key].append(value)
+
+            result = {key: aggregate(values) for key, values in groups.items()}
+            return json.dumps({"grouped_by": group_by, "results": result}, indent=2)
+        else:
+            # Simple aggregation
+            values = []
+            for row in data:
+                if isinstance(row, dict):
+                    values.append(row.get(column))
+
+            return json.dumps(
+                {"column": column, "operation": operation, "result": aggregate(values)},
+                indent=2,
+            )
--- a/classic/forge/forge/components/file_manager/file_manager.py
+++ b/classic/forge/forge/components/file_manager/file_manager.py
@@ -1,5 +1,8 @@
+import fnmatch
 import logging
 import os
+import re
+from datetime import datetime
 from pathlib import Path
 from typing import Iterator, Optional

@@ -11,6 +14,7 @@ from forge.agent.protocols import CommandProvider, DirectiveProvider
 from forge.command import Command, command
 from forge.file_storage.base import FileStorage
 from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import CommandExecutionError
 from forge.utils.file_operations import decode_textual_file

 logger = logging.getLogger(__name__)
@@ -109,6 +113,12 @@ class FileManagerComponent(
        yield self.read_file
        yield self.write_to_file
        yield self.list_folder
+        yield self.append_to_file
+        yield self.copy_file
+        yield self.move_file
+        yield self.delete_file
+        yield self.search_in_files
+        yield self.get_file_info

    @command(
        parameters={
@@ -184,3 +194,271 @@ class FileManagerComponent(
            list[str]: A list of files found in the folder
        """
        return [str(p) for p in self.workspace.list_files(folder)]
+
+    @command(
+        ["append_to_file", "append_file"],
+        "Append content to an existing file. Creates the file if it doesn't exist.",
+        {
+            "filename": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The path of the file to append to",
+                required=True,
+            ),
+            "contents": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The content to append to the file",
+                required=True,
+            ),
+        },
+    )
+    async def append_to_file(self, filename: str | Path, contents: str) -> str:
+        """Append content to a file, creating it if necessary.
+
+        Args:
+            filename (str): The name of the file to append to
+            contents (str): The content to append
+
+        Returns:
+            str: A message indicating success
+        """
+        if directory := os.path.dirname(filename):
+            self.workspace.make_dir(directory)
+
+        existing_content = ""
+        if self.workspace.exists(filename):
+            file = self.workspace.open_file(filename, binary=True)
+            existing_content = decode_textual_file(
+                file, os.path.splitext(filename)[1], logger
+            )
+
+        await self.workspace.write_file(filename, existing_content + contents)
+        return f"Content appended to {filename} successfully."
+
+    @command(
+        parameters={
+            "source": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The path of the file or directory to copy",
+                required=True,
+            ),
+            "destination": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The destination path",
+                required=True,
+            ),
+        },
+    )
+    def copy_file(self, source: str | Path, destination: str | Path) -> str:
+        """Copy a file or directory to a new location.
+
+        Args:
+            source (str): The source path
+            destination (str): The destination path
+
+        Returns:
+            str: A message indicating success
+        """
+        if not self.workspace.exists(source):
+            raise CommandExecutionError(f"Source path '{source}' does not exist.")
+
+        if directory := os.path.dirname(destination):
+            self.workspace.make_dir(directory)
+
+        self.workspace.copy(source, destination)
+        return f"Copied '{source}' to '{destination}' successfully."
+
+    @command(
+        ["move_file", "rename_file"],
+        "Move or rename a file or directory.",
+        {
+            "source": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The current path of the file or directory",
+                required=True,
+            ),
+            "destination": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The new path",
+                required=True,
+            ),
+        },
+    )
+    def move_file(self, source: str | Path, destination: str | Path) -> str:
+        """Move or rename a file or directory.
+
+        Args:
+            source (str): The source path
+            destination (str): The destination path
+
+        Returns:
+            str: A message indicating success
+        """
+        if not self.workspace.exists(source):
+            raise CommandExecutionError(f"Source path '{source}' does not exist.")
+
+        if directory := os.path.dirname(destination):
+            self.workspace.make_dir(directory)
+
+        self.workspace.rename(source, destination)
+        return f"Moved '{source}' to '{destination}' successfully."
+
+    @command(
+        ["delete_file", "remove_file"],
+        "Delete a file. Use with caution - this operation cannot be undone.",
+        {
+            "filename": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The path of the file to delete",
+                required=True,
+            ),
+        },
+    )
+    def delete_file(self, filename: str | Path) -> str:
+        """Delete a file.
+
+        Args:
+            filename (str): The name of the file to delete
+
+        Returns:
+            str: A message indicating success
+        """
+        if not self.workspace.exists(filename):
+            raise CommandExecutionError(f"File '{filename}' does not exist.")
+
+        self.workspace.delete_file(filename)
+        return f"File '{filename}' deleted successfully."
+
+    @command(
+        ["search_in_files", "grep_files"],
+        "Search for a pattern in files. Returns matches with filenames and lines.",
+        {
+            "pattern": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The regex pattern to search for",
+                required=True,
+            ),
+            "directory": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The directory to search in (default: current directory)",
+                required=False,
+            ),
+            "file_pattern": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Glob pattern to filter files (e.g., '*.py', '*.txt')",
+                required=False,
+            ),
+            "max_results": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Maximum number of results to return (default: 100)",
+                required=False,
+            ),
+        },
+    )
+    def search_in_files(
+        self,
+        pattern: str,
+        directory: str | Path = ".",
+        file_pattern: str = "*",
+        max_results: int = 100,
+    ) -> str:
+        """Search for a pattern in files.
+
+        Args:
+            pattern (str): The regex pattern to search for
+            directory (str): The directory to search in
+            file_pattern (str): Glob pattern to filter files
+            max_results (int): Maximum number of results
+
+        Returns:
+            str: Matching lines with file names and line numbers
+        """
+        try:
+            regex = re.compile(pattern)
+        except re.error as e:
+            raise CommandExecutionError(f"Invalid regex pattern: {e}")
+
+        results = []
+        files = self.workspace.list_files(directory)
+
+        for file_path in files:
+            if not fnmatch.fnmatch(str(file_path), file_pattern):
+                continue
+
+            try:
+                file = self.workspace.open_file(file_path, binary=True)
+                content = decode_textual_file(
+                    file, os.path.splitext(file_path)[1], logger
+                )
+
+                for line_num, line in enumerate(content.splitlines(), 1):
+                    if regex.search(line):
+                        results.append(f"{file_path}:{line_num}: {line.strip()}")
+                        if len(results) >= max_results:
+                            break
+
+                if len(results) >= max_results:
+                    break
+            except Exception:
+                # Skip files that can't be read as text
+                continue
+
+        if not results:
+            return f"No matches found for pattern '{pattern}'"
+
+        header = f"Found {len(results)} match(es)"
+        if len(results) >= max_results:
+            header += f" (limited to {max_results})"
+        header += ":"
+
+        return header + "\n" + "\n".join(results)
+
+    @command(
+        ["get_file_info", "file_info", "file_stats"],
+        "Get information about a file including size, modification time, and type.",
+        {
+            "filename": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The path of the file to get info for",
+                required=True,
+            ),
+        },
+    )
+    def get_file_info(self, filename: str | Path) -> str:
+        """Get information about a file.
+
+        Args:
+            filename (str): The name of the file
+
+        Returns:
+            str: File information in a readable format
+        """
+        if not self.workspace.exists(filename):
+            raise CommandExecutionError(f"File '{filename}' does not exist.")
+
+        file_path = self.workspace.get_path(filename)
+        stat_info = file_path.stat()
+
+        size_bytes = stat_info.st_size
+        if size_bytes < 1024:
+            size_str = f"{size_bytes} bytes"
+        elif size_bytes < 1024 * 1024:
+            size_str = f"{size_bytes / 1024:.2f} KB"
+        else:
+            size_str = f"{size_bytes / (1024 * 1024):.2f} MB"
+
+        modified_time = datetime.fromtimestamp(stat_info.st_mtime)
+        created_time = datetime.fromtimestamp(stat_info.st_ctime)
+
+        file_type = "directory" if file_path.is_dir() else "file"
+        extension = file_path.suffix if file_path.suffix else "none"
+
+        info = [
+            f"File: {filename}",
+            f"Type: {file_type}",
+            f"Extension: {extension}",
+            f"Size: {size_str}",
+            f"Modified: {modified_time.strftime('%Y-%m-%d %H:%M:%S')}",
+            f"Created: {created_time.strftime('%Y-%m-%d %H:%M:%S')}",
+        ]
+
+        return "\n".join(info)
--- a/classic/forge/forge/components/git_operations/git_operations.py
+++ b/classic/forge/forge/components/git_operations/git_operations.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 from typing import Iterator, Optional

+from git.exc import GitCommandError, InvalidGitRepositoryError
 from git.repo import Repo
 from pydantic import BaseModel, SecretStr

@@ -29,11 +30,43 @@ class GitOperationsComponent(

    def __init__(self, config: Optional[GitOperationsConfiguration] = None):
        ConfigurableComponent.__init__(self, config)
-        self._enabled = bool(self.config.github_username and self.config.github_api_key)
-        self._disabled_reason = "Configure github_username and github_api_key."
+        # Clone repository needs credentials, but other git operations work without
+        self._enabled = True

    def get_commands(self) -> Iterator[Command]:
-        yield self.clone_repository
+        # Only yield clone if credentials are configured
+        if self.config.github_username and self.config.github_api_key:
+            yield self.clone_repository
+        # These commands work on any local git repository
+        yield self.git_status
+        yield self.git_add
+        yield self.git_commit
+        yield self.git_push
+        yield self.git_pull
+        yield self.git_diff
+        yield self.git_branch
+        yield self.git_checkout
+        yield self.git_log
+
+    def _get_repo(self, repo_path: str | Path | None = None) -> Repo:
+        """Get a Repo object for the given path.
+
+        Args:
+            repo_path: Path to the repository, or None for current directory
+
+        Returns:
+            Repo: The git repository object
+
+        Raises:
+            CommandExecutionError: If the path is not a git repository
+        """
+        path = Path(repo_path) if repo_path else Path.cwd()
+        try:
+            return Repo(path, search_parent_directories=True)
+        except InvalidGitRepositoryError:
+            raise CommandExecutionError(
+                f"'{path}' is not a git repository (or any parent up to mount point)"
+            )

    @command(
        parameters={
@@ -75,3 +108,477 @@ class GitOperationsComponent(
            raise CommandExecutionError(f"Could not clone repo: {e}")

        return f"""Cloned {url} to {clone_path}"""
+
+    @command(
+        ["git_status"],
+        "Show the working tree status including staged, unstaged, and untracked files.",
+        {
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+        },
+    )
+    def git_status(self, repo_path: str | None = None) -> str:
+        """Show the working tree status.
+
+        Args:
+            repo_path: Path to the repository
+
+        Returns:
+            str: Status information
+        """
+        repo = self._get_repo(repo_path)
+
+        # Get the current branch
+        try:
+            branch = repo.active_branch.name
+        except TypeError:
+            branch = "HEAD detached"
+
+        # Get status information
+        staged = [item.a_path for item in repo.index.diff("HEAD")]
+        unstaged = [item.a_path for item in repo.index.diff(None)]
+        untracked = repo.untracked_files
+
+        lines = [f"On branch {branch}", ""]
+
+        if staged:
+            lines.append("Changes to be committed:")
+            for file in staged:
+                lines.append(f"  modified: {file}")
+            lines.append("")
+
+        if unstaged:
+            lines.append("Changes not staged for commit:")
+            for file in unstaged:
+                lines.append(f"  modified: {file}")
+            lines.append("")
+
+        if untracked:
+            lines.append("Untracked files:")
+            for file in untracked:
+                lines.append(f"  {file}")
+            lines.append("")
+
+        if not staged and not unstaged and not untracked:
+            lines.append("nothing to commit, working tree clean")
+
+        return "\n".join(lines)
+
+    @command(
+        ["git_add", "stage_files"],
+        "Stage files for commit.",
+        {
+            "files": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                items=JSONSchema(type=JSONSchema.Type.STRING),
+                description="Files to stage. Use ['.'] to stage all changes.",
+                required=True,
+            ),
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+        },
+    )
+    def git_add(self, files: list[str], repo_path: str | None = None) -> str:
+        """Stage files for commit.
+
+        Args:
+            files: List of files to stage
+            repo_path: Path to the repository
+
+        Returns:
+            str: Confirmation message
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            if files == ["."]:
+                repo.git.add(A=True)
+                return "Staged all changes"
+            else:
+                repo.index.add(files)
+                return f"Staged files: {', '.join(files)}"
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Failed to stage files: {e}")
+
+    @command(
+        ["git_commit"],
+        "Commit staged changes with a message.",
+        {
+            "message": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The commit message",
+                required=True,
+            ),
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+        },
+    )
+    def git_commit(self, message: str, repo_path: str | None = None) -> str:
+        """Commit staged changes.
+
+        Args:
+            message: The commit message
+            repo_path: Path to the repository
+
+        Returns:
+            str: Confirmation with commit hash
+        """
+        repo = self._get_repo(repo_path)
+
+        # Check if there are staged changes
+        if not repo.index.diff("HEAD"):
+            raise CommandExecutionError("Nothing to commit (no staged changes)")
+
+        try:
+            commit = repo.index.commit(message)
+            return f"Committed: {commit.hexsha[:8]} - {message}"
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Failed to commit: {e}")
+
+    @command(
+        ["git_push"],
+        "Push commits to a remote repository.",
+        {
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+            "remote": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Remote name (default: origin)",
+                required=False,
+            ),
+            "branch": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Branch to push (default: current branch)",
+                required=False,
+            ),
+        },
+    )
+    def git_push(
+        self,
+        repo_path: str | None = None,
+        remote: str = "origin",
+        branch: str | None = None,
+    ) -> str:
+        """Push commits to remote.
+
+        Args:
+            repo_path: Path to the repository
+            remote: Remote name
+            branch: Branch to push
+
+        Returns:
+            str: Confirmation message
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            if branch is None:
+                branch = repo.active_branch.name
+        except TypeError:
+            raise CommandExecutionError("Cannot push from detached HEAD state")
+
+        try:
+            push_info = repo.remote(remote).push(branch)
+            if push_info:
+                return f"Pushed {branch} to {remote}"
+            return f"Pushed {branch} to {remote}"
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Failed to push: {e}")
+
+    @command(
+        ["git_pull"],
+        "Pull changes from a remote repository.",
+        {
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+            "remote": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Remote name (default: origin)",
+                required=False,
+            ),
+            "branch": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Branch to pull (default: current branch)",
+                required=False,
+            ),
+        },
+    )
+    def git_pull(
+        self,
+        repo_path: str | None = None,
+        remote: str = "origin",
+        branch: str | None = None,
+    ) -> str:
+        """Pull changes from remote.
+
+        Args:
+            repo_path: Path to the repository
+            remote: Remote name
+            branch: Branch to pull
+
+        Returns:
+            str: Result of the pull operation
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            if branch is None:
+                branch = repo.active_branch.name
+        except TypeError:
+            raise CommandExecutionError("Cannot pull in detached HEAD state")
+
+        try:
+            pull_info = repo.remote(remote).pull(branch)
+            if pull_info:
+                return f"Pulled {branch} from {remote}: {pull_info[0].note}"
+            return f"Pulled {branch} from {remote}"
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Failed to pull: {e}")
+
+    @command(
+        ["git_diff"],
+        "Show changes between commits, working tree, etc.",
+        {
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+            "staged": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Show staged changes only (default: False)",
+                required=False,
+            ),
+            "file": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Specific file to diff (default: all files)",
+                required=False,
+            ),
+        },
+    )
+    def git_diff(
+        self,
+        repo_path: str | None = None,
+        staged: bool = False,
+        file: str | None = None,
+    ) -> str:
+        """Show changes in the repository.
+
+        Args:
+            repo_path: Path to the repository
+            staged: Show only staged changes
+            file: Specific file to diff
+
+        Returns:
+            str: The diff output
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            if staged:
+                diff = (
+                    repo.git.diff("--cached", file)
+                    if file
+                    else repo.git.diff("--cached")
+                )
+            else:
+                diff = repo.git.diff(file) if file else repo.git.diff()
+
+            if not diff:
+                return "No changes" + (" in staged files" if staged else "")
+
+            return diff
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Failed to get diff: {e}")
+
+    @command(
+        ["git_branch"],
+        "List, create, or delete branches.",
+        {
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+            "name": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Name of the branch to create (omit to list branches)",
+                required=False,
+            ),
+            "delete": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Delete the specified branch (default: False)",
+                required=False,
+            ),
+        },
+    )
+    def git_branch(
+        self,
+        repo_path: str | None = None,
+        name: str | None = None,
+        delete: bool = False,
+    ) -> str:
+        """List, create, or delete branches.
+
+        Args:
+            repo_path: Path to the repository
+            name: Branch name to create/delete
+            delete: Whether to delete the branch
+
+        Returns:
+            str: Result of the operation
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            if name is None:
+                # List branches
+                branches = []
+                current = repo.active_branch.name if not repo.head.is_detached else None
+                for branch in repo.branches:
+                    prefix = "* " if branch.name == current else "  "
+                    branches.append(f"{prefix}{branch.name}")
+                return "\n".join(branches) if branches else "No branches found"
+
+            if delete:
+                # Delete branch
+                repo.delete_head(name, force=True)
+                return f"Deleted branch '{name}'"
+            else:
+                # Create branch
+                repo.create_head(name)
+                return f"Created branch '{name}'"
+
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Branch operation failed: {e}")
+
+    @command(
+        ["git_checkout"],
+        "Switch branches or restore working tree files.",
+        {
+            "target": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Branch name or commit to checkout",
+                required=True,
+            ),
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+            "create_branch": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Create a new branch with the given name (default: False)",
+                required=False,
+            ),
+        },
+    )
+    def git_checkout(
+        self,
+        target: str,
+        repo_path: str | None = None,
+        create_branch: bool = False,
+    ) -> str:
+        """Checkout a branch or commit.
+
+        Args:
+            target: Branch or commit to checkout
+            repo_path: Path to the repository
+            create_branch: Whether to create a new branch
+
+        Returns:
+            str: Confirmation message
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            if create_branch:
+                # Create and checkout new branch
+                new_branch = repo.create_head(target)
+                new_branch.checkout()
+                return f"Switched to new branch '{target}'"
+            else:
+                # Checkout existing branch or commit
+                repo.git.checkout(target)
+                return f"Switched to '{target}'"
+
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Checkout failed: {e}")
+
+    @command(
+        ["git_log"],
+        "Show commit logs.",
+        {
+            "repo_path": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Path to the repository (default: current directory)",
+                required=False,
+            ),
+            "max_count": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Maximum number of commits to show (default: 10)",
+                required=False,
+            ),
+            "oneline": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Use single-line format (default: False)",
+                required=False,
+            ),
+        },
+    )
+    def git_log(
+        self,
+        repo_path: str | None = None,
+        max_count: int = 10,
+        oneline: bool = False,
+    ) -> str:
+        """Show commit history.
+
+        Args:
+            repo_path: Path to the repository
+            max_count: Maximum commits to show
+            oneline: Use single-line format
+
+        Returns:
+            str: Commit log
+        """
+        repo = self._get_repo(repo_path)
+
+        try:
+            commits = list(repo.iter_commits(max_count=max_count))
+            if not commits:
+                return "No commits found"
+
+            lines = []
+            for commit in commits:
+                if oneline:
+                    lines.append(f"{commit.hexsha[:8]} {commit.summary}")
+                else:
+                    lines.append(f"commit {commit.hexsha}")
+                    lines.append(
+                        f"Author: {commit.author.name} <{commit.author.email}>"
+                    )
+                    lines.append(f"Date:   {commit.committed_datetime}")
+                    lines.append("")
+                    lines.append(f"    {commit.message.strip()}")
+                    lines.append("")
+
+            return "\n".join(lines)
+
+        except GitCommandError as e:
+            raise CommandExecutionError(f"Failed to get log: {e}")
--- a/classic/forge/forge/components/http_client/init.py
+++ b/classic/forge/forge/components/http_client/init.py
@@ -0,0 +1,6 @@
+from forge.components.http_client.http_client import (
+    HTTPClientComponent,
+    HTTPClientConfiguration,
+)
+
+__all__ = ["HTTPClientComponent", "HTTPClientConfiguration"]
--- a/classic/forge/forge/components/http_client/http_client.py
+++ b/classic/forge/forge/components/http_client/http_client.py
@@ -0,0 +1,354 @@
+import json
+import logging
+from typing import Any, Iterator, Optional
+
+import requests
+from pydantic import BaseModel, Field
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider
+from forge.command import Command, command
+from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import HTTPError
+
+logger = logging.getLogger(__name__)
+
+
+class HTTPClientConfiguration(BaseModel):
+    default_timeout: int = Field(
+        default=30, description="Default timeout in seconds for HTTP requests"
+    )
+    max_retries: int = Field(
+        default=3, description="Maximum number of retries for failed requests"
+    )
+    allowed_domains: list[str] = Field(
+        default_factory=list,
+        description="List of allowed domains (empty = all domains allowed)",
+    )
+    user_agent: str = Field(
+        default="AutoGPT-HTTPClient/1.0",
+        description="User agent string for requests",
+    )
+    max_response_size: int = Field(
+        default=1024 * 1024,  # 1MB
+        description="Maximum response size in bytes",
+    )
+
+
+class HTTPClientComponent(
+    DirectiveProvider, CommandProvider, ConfigurableComponent[HTTPClientConfiguration]
+):
+    """Provides commands to make HTTP requests."""
+
+    config_class = HTTPClientConfiguration
+
+    def __init__(self, config: Optional[HTTPClientConfiguration] = None):
+        ConfigurableComponent.__init__(self, config)
+        self.session = requests.Session()
+        self.session.headers.update({"User-Agent": self.config.user_agent})
+
+    def get_resources(self) -> Iterator[str]:
+        yield "Ability to make HTTP requests to external APIs."
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.http_get
+        yield self.http_post
+        yield self.http_put
+        yield self.http_delete
+
+    def _is_domain_allowed(self, url: str) -> bool:
+        """Check if the URL's domain is in the allowed list."""
+        if not self.config.allowed_domains:
+            return True
+
+        from urllib.parse import urlparse
+
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+
+        for allowed in self.config.allowed_domains:
+            if domain == allowed.lower() or domain.endswith("." + allowed.lower()):
+                return True
+        return False
+
+    def _make_request(
+        self,
+        method: str,
+        url: str,
+        headers: dict[str, str] | None = None,
+        params: dict[str, Any] | None = None,
+        body: dict[str, Any] | str | None = None,
+        timeout: int | None = None,
+    ) -> dict[str, Any]:
+        """Make an HTTP request and return a structured response.
+
+        Args:
+            method: HTTP method (GET, POST, PUT, DELETE)
+            url: The URL to request
+            headers: Optional headers
+            params: Optional query parameters
+            body: Optional request body
+            timeout: Optional timeout override
+
+        Returns:
+            dict: Structured response with status, headers, and body
+        """
+        if not self._is_domain_allowed(url):
+            raise HTTPError(
+                f"Domain not in allowed list. Allowed: {self.config.allowed_domains}",
+                url=url,
+            )
+
+        request_timeout = timeout or self.config.default_timeout
+        request_headers = headers or {}
+
+        try:
+            if method == "GET":
+                response = self.session.get(
+                    url, headers=request_headers, params=params, timeout=request_timeout
+                )
+            elif method == "POST":
+                response = self.session.post(
+                    url,
+                    headers=request_headers,
+                    params=params,
+                    json=body if isinstance(body, dict) else None,
+                    data=body if isinstance(body, str) else None,
+                    timeout=request_timeout,
+                )
+            elif method == "PUT":
+                response = self.session.put(
+                    url,
+                    headers=request_headers,
+                    params=params,
+                    json=body if isinstance(body, dict) else None,
+                    data=body if isinstance(body, str) else None,
+                    timeout=request_timeout,
+                )
+            elif method == "DELETE":
+                response = self.session.delete(
+                    url, headers=request_headers, params=params, timeout=request_timeout
+                )
+            else:
+                raise HTTPError(f"Unsupported HTTP method: {method}", url=url)
+
+            # Check response size
+            content_length = len(response.content)
+            if content_length > self.config.max_response_size:
+                raise HTTPError(
+                    f"Response too large: {content_length} bytes "
+                    f"(max: {self.config.max_response_size})",
+                    status_code=response.status_code,
+                    url=url,
+                )
+
+            # Try to parse as JSON, fall back to text
+            try:
+                response_body = response.json()
+            except json.JSONDecodeError:
+                response_body = response.text
+
+            return {
+                "status_code": response.status_code,
+                "headers": dict(response.headers),
+                "body": response_body,
+                "url": response.url,
+            }
+
+        except requests.exceptions.Timeout:
+            raise HTTPError(
+                f"Request timed out after {request_timeout} seconds", url=url
+            )
+        except requests.exceptions.ConnectionError as e:
+            raise HTTPError(f"Connection error: {e}", url=url)
+        except requests.exceptions.RequestException as e:
+            raise HTTPError(f"Request failed: {e}", url=url)
+
+    @command(
+        ["http_get", "get_request"],
+        "Make an HTTP GET request to retrieve data from a URL.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL to fetch",
+                required=True,
+            ),
+            "headers": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Optional HTTP headers as key-value pairs",
+                required=False,
+            ),
+            "params": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Optional query parameters",
+                required=False,
+            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (default: 30)",
+                minimum=1,
+                maximum=300,
+                required=False,
+            ),
+        },
+    )
+    def http_get(
+        self,
+        url: str,
+        headers: dict[str, str] | None = None,
+        params: dict[str, Any] | None = None,
+        timeout: int | None = None,
+    ) -> str:
+        """Make an HTTP GET request.
+
+        Args:
+            url: The URL to request
+            headers: Optional headers
+            params: Optional query parameters
+            timeout: Optional timeout
+
+        Returns:
+            str: JSON-formatted response
+        """
+        result = self._make_request("GET", url, headers, params, timeout=timeout)
+        return json.dumps(result, indent=2)
+
+    @command(
+        ["http_post", "post_request"],
+        "Make an HTTP POST request to send data to a URL.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL to post to",
+                required=True,
+            ),
+            "body": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="The request body (will be sent as JSON)",
+                required=False,
+            ),
+            "headers": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Optional HTTP headers",
+                required=False,
+            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (default: 30)",
+                minimum=1,
+                maximum=300,
+                required=False,
+            ),
+        },
+    )
+    def http_post(
+        self,
+        url: str,
+        body: dict[str, Any] | None = None,
+        headers: dict[str, str] | None = None,
+        timeout: int | None = None,
+    ) -> str:
+        """Make an HTTP POST request.
+
+        Args:
+            url: The URL to request
+            body: Request body
+            headers: Optional headers
+            timeout: Optional timeout
+
+        Returns:
+            str: JSON-formatted response
+        """
+        result = self._make_request("POST", url, headers, body=body, timeout=timeout)
+        return json.dumps(result, indent=2)
+
+    @command(
+        ["http_put", "put_request"],
+        "Make an HTTP PUT request to update data at a URL.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL to put to",
+                required=True,
+            ),
+            "body": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="The request body (will be sent as JSON)",
+                required=True,
+            ),
+            "headers": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Optional HTTP headers",
+                required=False,
+            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (default: 30)",
+                minimum=1,
+                maximum=300,
+                required=False,
+            ),
+        },
+    )
+    def http_put(
+        self,
+        url: str,
+        body: dict[str, Any],
+        headers: dict[str, str] | None = None,
+        timeout: int | None = None,
+    ) -> str:
+        """Make an HTTP PUT request.
+
+        Args:
+            url: The URL to request
+            body: Request body
+            headers: Optional headers
+            timeout: Optional timeout
+
+        Returns:
+            str: JSON-formatted response
+        """
+        result = self._make_request("PUT", url, headers, body=body, timeout=timeout)
+        return json.dumps(result, indent=2)
+
+    @command(
+        ["http_delete", "delete_request"],
+        "Make an HTTP DELETE request to remove a resource.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL to delete",
+                required=True,
+            ),
+            "headers": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Optional HTTP headers",
+                required=False,
+            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds (default: 30)",
+                minimum=1,
+                maximum=300,
+                required=False,
+            ),
+        },
+    )
+    def http_delete(
+        self,
+        url: str,
+        headers: dict[str, str] | None = None,
+        timeout: int | None = None,
+    ) -> str:
+        """Make an HTTP DELETE request.
+
+        Args:
+            url: The URL to request
+            headers: Optional headers
+            timeout: Optional timeout
+
+        Returns:
+            str: JSON-formatted response
+        """
+        result = self._make_request("DELETE", url, headers, timeout=timeout)
+        return json.dumps(result, indent=2)
--- a/classic/forge/forge/components/image_gen/image_gen.py
+++ b/classic/forge/forge/components/image_gen/image_gen.py
@@ -195,9 +195,11 @@ class ImageGeneratorComponent(
        # TODO: integrate in `forge.llm.providers`(?)
        response = OpenAI(
            api_key=self.openai_credentials.api_key.get_secret_value(),
-            organization=self.openai_credentials.organization.get_secret_value()
-            if self.openai_credentials.organization
-            else None,
+            organization=(
+                self.openai_credentials.organization.get_secret_value()
+                if self.openai_credentials.organization
+                else None
+            ),
        ).images.generate(
            prompt=prompt,
            n=1,
@@ -205,11 +207,13 @@ class ImageGeneratorComponent(
            size=f"{size}x{size}",  # type: ignore
            response_format="b64_json",
        )
-        assert response.data[0].b64_json is not None  # response_format = "b64_json"
+        # response_format="b64_json" guarantees b64_json is present
+        image_b64 = response.data[0].b64_json  # type: ignore[index]
+        assert image_b64 is not None

        logger.info(f"Image Generated for prompt: {prompt}")

-        image_data = b64decode(response.data[0].b64_json)
+        image_data = b64decode(image_b64)

        with open(output_file, mode="wb") as png:
            png.write(image_data)
--- a/classic/forge/forge/components/math_utils/init.py
+++ b/classic/forge/forge/components/math_utils/init.py
@@ -0,0 +1,6 @@
+from forge.components.math_utils.math_utils import (
+    MathUtilsComponent,
+    MathUtilsConfiguration,
+)
+
+__all__ = ["MathUtilsComponent", "MathUtilsConfiguration"]
--- a/classic/forge/forge/components/math_utils/math_utils.py
+++ b/classic/forge/forge/components/math_utils/math_utils.py
@@ -0,0 +1,492 @@
+import ast
+import json
+import logging
+import math
+import operator
+import statistics
+from typing import Any, Iterator, Optional
+
+from pydantic import BaseModel
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider
+from forge.command import Command, command
+from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import CommandExecutionError
+
+logger = logging.getLogger(__name__)
+
+
+class MathUtilsConfiguration(BaseModel):
+    pass  # No configuration needed for now
+
+
+class SafeEvaluator(ast.NodeVisitor):
+    """Safe evaluator for mathematical expressions."""
+
+    # Allowed operators
+    OPERATORS = {
+        ast.Add: operator.add,
+        ast.Sub: operator.sub,
+        ast.Mult: operator.mul,
+        ast.Div: operator.truediv,
+        ast.FloorDiv: operator.floordiv,
+        ast.Mod: operator.mod,
+        ast.Pow: operator.pow,
+        ast.USub: operator.neg,
+        ast.UAdd: operator.pos,
+    }
+
+    # Allowed functions
+    FUNCTIONS = {
+        "abs": abs,
+        "round": round,
+        "min": min,
+        "max": max,
+        "sum": sum,
+        "sqrt": math.sqrt,
+        "sin": math.sin,
+        "cos": math.cos,
+        "tan": math.tan,
+        "log": math.log,
+        "log10": math.log10,
+        "log2": math.log2,
+        "exp": math.exp,
+        "floor": math.floor,
+        "ceil": math.ceil,
+        "pow": pow,
+    }
+
+    # Allowed constants
+    CONSTANTS = {
+        "pi": math.pi,
+        "e": math.e,
+        "inf": float("inf"),
+    }
+
+    def visit(self, node: ast.AST) -> float:
+        return super().visit(node)
+
+    def generic_visit(self, node: ast.AST) -> float:
+        raise CommandExecutionError(
+            f"Unsupported operation: {type(node).__name__}. "
+            "Only basic arithmetic, math functions, and constants are allowed."
+        )
+
+    def visit_Expression(self, node: ast.Expression) -> float:
+        return self.visit(node.body)
+
+    def visit_Constant(self, node: ast.Constant) -> float:
+        if isinstance(node.value, (int, float)):
+            return node.value
+        raise CommandExecutionError(f"Invalid constant: {node.value}")
+
+    def visit_Num(self, node: ast.Num) -> float:  # Python 3.7 compatibility
+        return float(node.n)  # type: ignore[attr-defined]
+
+    def visit_Name(self, node: ast.Name) -> float:
+        if node.id in self.CONSTANTS:
+            return self.CONSTANTS[node.id]
+        avail = list(self.CONSTANTS.keys())
+        raise CommandExecutionError(f"Unknown variable: {node.id}. Available: {avail}")
+
+    def visit_BinOp(self, node: ast.BinOp) -> float:
+        if type(node.op) not in self.OPERATORS:
+            raise CommandExecutionError(
+                f"Unsupported operator: {type(node.op).__name__}"
+            )
+        left = self.visit(node.left)
+        right = self.visit(node.right)
+        return self.OPERATORS[type(node.op)](left, right)
+
+    def visit_UnaryOp(self, node: ast.UnaryOp) -> float:
+        if type(node.op) not in self.OPERATORS:
+            raise CommandExecutionError(
+                f"Unsupported unary operator: {type(node.op).__name__}"
+            )
+        operand = self.visit(node.operand)
+        return self.OPERATORS[type(node.op)](operand)
+
+    def visit_Call(self, node: ast.Call) -> float:
+        if not isinstance(node.func, ast.Name):
+            raise CommandExecutionError("Only direct function calls are allowed")
+
+        func_name = node.func.id
+        if func_name not in self.FUNCTIONS:
+            avail = list(self.FUNCTIONS.keys())
+            raise CommandExecutionError(
+                f"Unknown function: {func_name}. Available: {avail}"
+            )
+
+        args = [self.visit(arg) for arg in node.args]
+        return self.FUNCTIONS[func_name](*args)
+
+    def visit_List(self, node: ast.List) -> list:
+        return [self.visit(elt) for elt in node.elts]
+
+    def visit_Tuple(self, node: ast.Tuple) -> tuple:
+        return tuple(self.visit(elt) for elt in node.elts)
+
+
+class MathUtilsComponent(
+    DirectiveProvider, CommandProvider, ConfigurableComponent[MathUtilsConfiguration]
+):
+    """Provides commands for mathematical calculations and statistics."""
+
+    config_class = MathUtilsConfiguration
+
+    def __init__(self, config: Optional[MathUtilsConfiguration] = None):
+        ConfigurableComponent.__init__(self, config)
+
+    def get_resources(self) -> Iterator[str]:
+        yield "Ability to perform mathematical calculations and statistical analysis."
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.calculate
+        yield self.statistics_calc
+        yield self.convert_units
+
+    @command(
+        ["calculate", "eval_math", "compute"],
+        "Evaluate math expressions. Supports operators, sqrt, sin, cos, log, etc.",
+        {
+            "expression": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Expression to evaluate (e.g. '2 * pi + sqrt(16)')",
+                required=True,
+            ),
+        },
+    )
+    def calculate(self, expression: str) -> str:
+        """Safely evaluate a mathematical expression.
+
+        Args:
+            expression: The expression to evaluate
+
+        Returns:
+            str: The result as JSON
+        """
+        try:
+            tree = ast.parse(expression, mode="eval")
+            evaluator = SafeEvaluator()
+            result = evaluator.visit(tree)
+
+            return json.dumps({"expression": expression, "result": result}, indent=2)
+
+        except SyntaxError as e:
+            raise CommandExecutionError(f"Invalid expression syntax: {e}")
+        except ZeroDivisionError:
+            raise CommandExecutionError("Division by zero")
+        except OverflowError:
+            raise CommandExecutionError("Result too large")
+        except Exception as e:
+            raise CommandExecutionError(f"Calculation error: {e}")
+
+    @command(
+        ["statistics", "stats_calc"],
+        "Calculate statistics on a list of numbers.",
+        {
+            "numbers": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                items=JSONSchema(type=JSONSchema.Type.NUMBER),
+                description="List of numbers to analyze",
+                required=True,
+            ),
+            "operations": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                items=JSONSchema(type=JSONSchema.Type.STRING),
+                description="Stats to compute: mean, median, mode, etc. (default: all)",
+                required=False,
+            ),
+        },
+    )
+    def statistics_calc(
+        self,
+        numbers: list[float],
+        operations: list[str] | None = None,
+    ) -> str:
+        """Calculate statistics on a list of numbers.
+
+        Args:
+            numbers: List of numbers
+            operations: Which statistics to compute
+
+        Returns:
+            str: JSON with requested statistics
+        """
+        if not numbers:
+            raise CommandExecutionError("Empty list provided")
+
+        all_ops = [
+            "mean",
+            "median",
+            "mode",
+            "stdev",
+            "variance",
+            "min",
+            "max",
+            "sum",
+            "count",
+        ]
+        ops = operations if operations else all_ops
+
+        result = {}
+        errors = []
+
+        for op in ops:
+            try:
+                if op == "mean":
+                    result["mean"] = statistics.mean(numbers)
+                elif op == "median":
+                    result["median"] = statistics.median(numbers)
+                elif op == "mode":
+                    try:
+                        result["mode"] = statistics.mode(numbers)
+                    except statistics.StatisticsError:
+                        result["mode"] = None
+                        errors.append("No unique mode found")
+                elif op == "stdev":
+                    if len(numbers) > 1:
+                        result["stdev"] = statistics.stdev(numbers)
+                    else:
+                        result["stdev"] = 0
+                elif op == "variance":
+                    if len(numbers) > 1:
+                        result["variance"] = statistics.variance(numbers)
+                    else:
+                        result["variance"] = 0
+                elif op == "min":
+                    result["min"] = min(numbers)
+                elif op == "max":
+                    result["max"] = max(numbers)
+                elif op == "sum":
+                    result["sum"] = sum(numbers)
+                elif op == "count":
+                    result["count"] = len(numbers)
+                else:
+                    errors.append(f"Unknown operation: {op}")
+            except Exception as e:
+                errors.append(f"{op}: {e}")
+
+        output: dict[str, Any] = {"statistics": result}
+        if errors:
+            output["errors"] = errors
+
+        return json.dumps(output, indent=2)
+
+    @command(
+        ["convert_units", "unit_conversion"],
+        "Convert between units of measurement.",
+        {
+            "value": JSONSchema(
+                type=JSONSchema.Type.NUMBER,
+                description="The value to convert",
+                required=True,
+            ),
+            "from_unit": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Source unit (e.g., 'km', 'miles', 'celsius', 'kg')",
+                required=True,
+            ),
+            "to_unit": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Target unit (e.g., 'm', 'feet', 'fahrenheit', 'lbs')",
+                required=True,
+            ),
+        },
+    )
+    def convert_units(
+        self,
+        value: float,
+        from_unit: str,
+        to_unit: str,
+    ) -> str:
+        """Convert between units of measurement.
+
+        Args:
+            value: The value to convert
+            from_unit: Source unit
+            to_unit: Target unit
+
+        Returns:
+            str: JSON with conversion result
+        """
+        # Normalize unit names
+        from_unit = from_unit.lower().strip()
+        to_unit = to_unit.lower().strip()
+
+        # Unit conversions to base units
+        # Length -> meters
+        length_to_m = {
+            "m": 1,
+            "meter": 1,
+            "meters": 1,
+            "km": 1000,
+            "kilometer": 1000,
+            "kilometers": 1000,
+            "cm": 0.01,
+            "centimeter": 0.01,
+            "centimeters": 0.01,
+            "mm": 0.001,
+            "millimeter": 0.001,
+            "millimeters": 0.001,
+            "mi": 1609.344,
+            "mile": 1609.344,
+            "miles": 1609.344,
+            "yd": 0.9144,
+            "yard": 0.9144,
+            "yards": 0.9144,
+            "ft": 0.3048,
+            "foot": 0.3048,
+            "feet": 0.3048,
+            "in": 0.0254,
+            "inch": 0.0254,
+            "inches": 0.0254,
+        }
+
+        # Weight -> kilograms
+        weight_to_kg = {
+            "kg": 1,
+            "kilogram": 1,
+            "kilograms": 1,
+            "g": 0.001,
+            "gram": 0.001,
+            "grams": 0.001,
+            "mg": 0.000001,
+            "milligram": 0.000001,
+            "milligrams": 0.000001,
+            "lb": 0.453592,
+            "lbs": 0.453592,
+            "pound": 0.453592,
+            "pounds": 0.453592,
+            "oz": 0.0283495,
+            "ounce": 0.0283495,
+            "ounces": 0.0283495,
+        }
+
+        # Temperature (special handling)
+        temp_units = {"c", "celsius", "f", "fahrenheit", "k", "kelvin"}
+
+        # Volume -> liters
+        volume_to_l = {
+            "l": 1,
+            "liter": 1,
+            "liters": 1,
+            "litre": 1,
+            "litres": 1,
+            "ml": 0.001,
+            "milliliter": 0.001,
+            "milliliters": 0.001,
+            "gal": 3.78541,
+            "gallon": 3.78541,
+            "gallons": 3.78541,
+            "qt": 0.946353,
+            "quart": 0.946353,
+            "quarts": 0.946353,
+            "pt": 0.473176,
+            "pint": 0.473176,
+            "pints": 0.473176,
+            "cup": 0.236588,
+            "cups": 0.236588,
+            "fl oz": 0.0295735,
+            "floz": 0.0295735,
+        }
+
+        # Time -> seconds
+        time_to_s = {
+            "s": 1,
+            "sec": 1,
+            "second": 1,
+            "seconds": 1,
+            "min": 60,
+            "minute": 60,
+            "minutes": 60,
+            "h": 3600,
+            "hr": 3600,
+            "hour": 3600,
+            "hours": 3600,
+            "d": 86400,
+            "day": 86400,
+            "days": 86400,
+            "week": 604800,
+            "weeks": 604800,
+        }
+
+        # Data -> bytes
+        data_to_bytes = {
+            "b": 1,
+            "byte": 1,
+            "bytes": 1,
+            "kb": 1024,
+            "kilobyte": 1024,
+            "kilobytes": 1024,
+            "mb": 1024**2,
+            "megabyte": 1024**2,
+            "megabytes": 1024**2,
+            "gb": 1024**3,
+            "gigabyte": 1024**3,
+            "gigabytes": 1024**3,
+            "tb": 1024**4,
+            "terabyte": 1024**4,
+            "terabytes": 1024**4,
+        }
+
+        # Temperature conversions
+        if from_unit in temp_units and to_unit in temp_units:
+            # Convert to Celsius first
+            if from_unit in ("c", "celsius"):
+                celsius = value
+            elif from_unit in ("f", "fahrenheit"):
+                celsius = (value - 32) * 5 / 9
+            elif from_unit in ("k", "kelvin"):
+                celsius = value - 273.15
+            else:
+                raise CommandExecutionError(f"Unknown temperature unit: {from_unit}")
+
+            # Convert from Celsius to target
+            if to_unit in ("c", "celsius"):
+                result = celsius
+            elif to_unit in ("f", "fahrenheit"):
+                result = celsius * 9 / 5 + 32
+            elif to_unit in ("k", "kelvin"):
+                result = celsius + 273.15
+            else:
+                raise CommandExecutionError(f"Unknown temperature unit: {to_unit}")
+
+            return json.dumps(
+                {
+                    "value": value,
+                    "from_unit": from_unit,
+                    "to_unit": to_unit,
+                    "result": round(result, 6),
+                },
+                indent=2,
+            )
+
+        # Find matching conversion table
+        for conv_table in [
+            length_to_m,
+            weight_to_kg,
+            volume_to_l,
+            time_to_s,
+            data_to_bytes,
+        ]:
+            if from_unit in conv_table and to_unit in conv_table:
+                # Convert through base unit
+                base_value = value * conv_table[from_unit]
+                result = base_value / conv_table[to_unit]
+
+                return json.dumps(
+                    {
+                        "value": value,
+                        "from_unit": from_unit,
+                        "to_unit": to_unit,
+                        "result": round(result, 6),
+                    },
+                    indent=2,
+                )
+
+        raise CommandExecutionError(
+            f"Cannot convert from '{from_unit}' to '{to_unit}'. "
+            "Units must be in the same category."
+        )
--- a/classic/forge/forge/components/text_utils/init.py
+++ b/classic/forge/forge/components/text_utils/init.py
@@ -0,0 +1,6 @@
+from forge.components.text_utils.text_utils import (
+    TextUtilsComponent,
+    TextUtilsConfiguration,
+)
+
+__all__ = ["TextUtilsComponent", "TextUtilsConfiguration"]
--- a/classic/forge/forge/components/text_utils/text_utils.py
+++ b/classic/forge/forge/components/text_utils/text_utils.py
@@ -0,0 +1,378 @@
+import base64
+import html
+import json
+import logging
+import re
+import urllib.parse
+from typing import Iterator, Literal, Optional
+
+from pydantic import BaseModel, Field
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider
+from forge.command import Command, command
+from forge.models.json_schema import JSONSchema
+from forge.utils.exceptions import CommandExecutionError
+
+logger = logging.getLogger(__name__)
+
+
+class TextUtilsConfiguration(BaseModel):
+    max_text_length: int = Field(
+        default=100000, description="Maximum text length to process"
+    )
+    max_matches: int = Field(
+        default=1000, description="Maximum number of regex matches to return"
+    )
+
+
+class TextUtilsComponent(
+    DirectiveProvider, CommandProvider, ConfigurableComponent[TextUtilsConfiguration]
+):
+    """Provides commands for text manipulation, regex operations, and encoding."""
+
+    config_class = TextUtilsConfiguration
+
+    def __init__(self, config: Optional[TextUtilsConfiguration] = None):
+        ConfigurableComponent.__init__(self, config)
+
+    def get_resources(self) -> Iterator[str]:
+        yield "Ability to manipulate text with regex and encoding operations."
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.regex_search
+        yield self.regex_replace
+        yield self.encode_text
+        yield self.decode_text
+        yield self.format_template
+
+    def _parse_flags(self, flags: str | None) -> int:
+        """Parse regex flag string into re flags.
+
+        Args:
+            flags: String of flags (i, m, s, x)
+
+        Returns:
+            int: Combined re flags
+        """
+        if not flags:
+            return 0
+
+        flag_map = {
+            "i": re.IGNORECASE,
+            "m": re.MULTILINE,
+            "s": re.DOTALL,
+            "x": re.VERBOSE,
+        }
+
+        result = 0
+        for char in flags.lower():
+            if char in flag_map:
+                result |= flag_map[char]
+
+        return result
+
+    @command(
+        ["regex_search", "find_pattern"],
+        "Search text for matches using a regular expression pattern.",
+        {
+            "text": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The text to search in",
+                required=True,
+            ),
+            "pattern": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The regex pattern to search for",
+                required=True,
+            ),
+            "flags": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Regex flags: i=ignorecase, m=multiline, s=dotall",
+                required=False,
+            ),
+            "return_groups": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Return capture groups instead of matches (default: False)",
+                required=False,
+            ),
+        },
+    )
+    def regex_search(
+        self,
+        text: str,
+        pattern: str,
+        flags: str | None = None,
+        return_groups: bool = False,
+    ) -> str:
+        """Search text using regex pattern.
+
+        Args:
+            text: The text to search
+            pattern: The regex pattern
+            flags: Optional flags string
+            return_groups: Whether to return capture groups
+
+        Returns:
+            str: JSON array of matches
+        """
+        if len(text) > self.config.max_text_length:
+            raise CommandExecutionError(
+                f"Text exceeds maximum length of {self.config.max_text_length}"
+            )
+
+        try:
+            regex = re.compile(pattern, self._parse_flags(flags))
+        except re.error as e:
+            raise CommandExecutionError(f"Invalid regex pattern: {e}")
+
+        matches = []
+        for match in regex.finditer(text):
+            if len(matches) >= self.config.max_matches:
+                break
+
+            if return_groups and match.groups():
+                matches.append(
+                    {
+                        "match": match.group(0),
+                        "groups": match.groups(),
+                        "start": match.start(),
+                        "end": match.end(),
+                    }
+                )
+            else:
+                matches.append(
+                    {
+                        "match": match.group(0),
+                        "start": match.start(),
+                        "end": match.end(),
+                    }
+                )
+
+        result = {
+            "count": len(matches),
+            "matches": matches,
+        }
+
+        if len(matches) >= self.config.max_matches:
+            result["truncated"] = True
+
+        return json.dumps(result, indent=2)
+
+    @command(
+        ["regex_replace", "replace_pattern"],
+        "Replace text matching a regex pattern with a replacement string.",
+        {
+            "text": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The text to search and replace in",
+                required=True,
+            ),
+            "pattern": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The regex pattern to match",
+                required=True,
+            ),
+            "replacement": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The replacement string (can use \\1, \\2 for groups)",
+                required=True,
+            ),
+            "flags": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Regex flags: i=ignorecase, m=multiline, s=dotall",
+                required=False,
+            ),
+            "count": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Maximum replacements (0 = all, default: 0)",
+                required=False,
+            ),
+        },
+    )
+    def regex_replace(
+        self,
+        text: str,
+        pattern: str,
+        replacement: str,
+        flags: str | None = None,
+        count: int = 0,
+    ) -> str:
+        """Replace text matching regex pattern.
+
+        Args:
+            text: The text to modify
+            pattern: The regex pattern
+            replacement: The replacement string
+            flags: Optional flags string
+            count: Max replacements (0 = unlimited)
+
+        Returns:
+            str: The modified text with replacement info
+        """
+        if len(text) > self.config.max_text_length:
+            raise CommandExecutionError(
+                f"Text exceeds maximum length of {self.config.max_text_length}"
+            )
+
+        try:
+            regex = re.compile(pattern, self._parse_flags(flags))
+        except re.error as e:
+            raise CommandExecutionError(f"Invalid regex pattern: {e}")
+
+        # Count matches before replacement
+        match_count = len(regex.findall(text))
+
+        # Perform replacement
+        result = regex.sub(replacement, text, count=count if count > 0 else 0)
+
+        actual_replacements = min(match_count, count) if count > 0 else match_count
+
+        return json.dumps(
+            {
+                "result": result,
+                "replacements_made": actual_replacements,
+                "pattern": pattern,
+            },
+            indent=2,
+        )
+
+    @command(
+        ["encode_text"],
+        "Encode text using various encoding schemes.",
+        {
+            "text": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The text to encode",
+                required=True,
+            ),
+            "encoding": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Encoding type: base64, url, html, hex",
+                required=True,
+            ),
+        },
+    )
+    def encode_text(
+        self, text: str, encoding: Literal["base64", "url", "html", "hex"]
+    ) -> str:
+        """Encode text using specified encoding.
+
+        Args:
+            text: The text to encode
+            encoding: The encoding type
+
+        Returns:
+            str: The encoded text
+        """
+        if encoding == "base64":
+            result = base64.b64encode(text.encode("utf-8")).decode("ascii")
+        elif encoding == "url":
+            result = urllib.parse.quote(text, safe="")
+        elif encoding == "html":
+            result = html.escape(text)
+        elif encoding == "hex":
+            result = text.encode("utf-8").hex()
+        else:
+            raise CommandExecutionError(
+                f"Unknown encoding: {encoding}. Supported: base64, url, html, hex"
+            )
+
+        return json.dumps(
+            {"original": text, "encoding": encoding, "result": result}, indent=2
+        )
+
+    @command(
+        ["decode_text"],
+        "Decode text from various encoding schemes.",
+        {
+            "text": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The text to decode",
+                required=True,
+            ),
+            "encoding": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Encoding type: base64, url, html, hex",
+                required=True,
+            ),
+        },
+    )
+    def decode_text(
+        self, text: str, encoding: Literal["base64", "url", "html", "hex"]
+    ) -> str:
+        """Decode text from specified encoding.
+
+        Args:
+            text: The text to decode
+            encoding: The encoding type
+
+        Returns:
+            str: The decoded text
+        """
+        try:
+            if encoding == "base64":
+                result = base64.b64decode(text).decode("utf-8")
+            elif encoding == "url":
+                result = urllib.parse.unquote(text)
+            elif encoding == "html":
+                result = html.unescape(text)
+            elif encoding == "hex":
+                result = bytes.fromhex(text).decode("utf-8")
+            else:
+                raise CommandExecutionError(
+                    f"Unknown encoding: {encoding}. Supported: base64, url, html, hex"
+                )
+
+            return json.dumps(
+                {"original": text, "encoding": encoding, "result": result}, indent=2
+            )
+
+        except Exception as e:
+            raise CommandExecutionError(f"Decoding failed: {e}")
+
+    @command(
+        ["format_template", "template_substitute"],
+        "Substitute variables in a template string using {variable} syntax.",
+        {
+            "template": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Template with {variable} placeholders",
+                required=True,
+            ),
+            "variables": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Dictionary of variable names to values",
+                required=True,
+            ),
+        },
+    )
+    def format_template(self, template: str, variables: dict[str, str]) -> str:
+        """Substitute variables in a template.
+
+        Args:
+            template: The template string with {placeholders}
+            variables: Dictionary of variable values
+
+        Returns:
+            str: The formatted string
+        """
+        try:
+            # Use safe substitution that only replaces found keys
+            result = template
+            for key, value in variables.items():
+                result = result.replace("{" + key + "}", str(value))
+
+            # Check for unfilled placeholders
+            unfilled = re.findall(r"\{(\w+)\}", result)
+
+            return json.dumps(
+                {
+                    "result": result,
+                    "unfilled_placeholders": unfilled if unfilled else None,
+                },
+                indent=2,
+            )
+
+        except Exception as e:
+            raise CommandExecutionError(f"Template formatting failed: {e}")
--- a/classic/forge/forge/components/todo/init.py
+++ b/classic/forge/forge/components/todo/init.py
@@ -0,0 +1,11 @@
+"""Todo component for task management."""
+
+from .todo import TodoComponent, TodoConfiguration, TodoItem, TodoList, TodoStatus
+
+__all__ = [
+    "TodoComponent",
+    "TodoConfiguration",
+    "TodoItem",
+    "TodoList",
+    "TodoStatus",
+]
--- a/classic/forge/forge/components/todo/todo.py
+++ b/classic/forge/forge/components/todo/todo.py
@@ -0,0 +1,564 @@
+"""
+Todo Component - Task management for autonomous agents.
+
+A simple, effective task management system modeled after Claude Code's TodoWrite tool.
+Agents use this to track multi-step tasks naturally and frequently.
+
+Features:
+- Hierarchical task structure with sub-items
+- Smart LLM-based task decomposition
+- Status tracking at all levels
+"""
+
+import json
+import logging
+from typing import TYPE_CHECKING, Iterator, Literal, Optional
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from forge.agent.components import ConfigurableComponent
+from forge.agent.protocols import CommandProvider, DirectiveProvider, MessageProvider
+from forge.command import Command, command
+from forge.llm.providers import ChatMessage
+from forge.models.json_schema import JSONSchema
+
+if TYPE_CHECKING:
+    from forge.llm.providers import MultiProvider
+
+logger = logging.getLogger(__name__)
+
+
+# Status type
+TodoStatus = Literal["pending", "in_progress", "completed"]
+
+# System prompt for task decomposition
+DECOMPOSE_SYSTEM_PROMPT = """\
+You are a task decomposition specialist. Break down tasks into actionable sub-steps.
+
+Current Plan Context:
+{current_todos}
+
+Task to Decompose:
+{task_content}
+
+Additional Context:
+{context}
+
+Instructions:
+1. Analyze the task and break it into 3-7 concrete sub-steps
+2. Each sub-step should be actionable and specific
+3. Sub-steps should be in logical order
+4. Keep sub-steps concise (1 line each)
+5. Generate both imperative (content) and present continuous (active_form) versions
+
+Respond with ONLY a JSON object (no markdown, no explanation):
+{{"sub_items": [{{"content": "Do X", "active_form": "Doing X"}}], \
+"summary": "Brief explanation"}}"""
+
+
+class TodoItem(BaseModel):
+    """A single todo item with optional nested sub-items."""
+
+    content: str = Field(..., description="Imperative form: 'Fix the bug'")
+    status: TodoStatus = Field(default="pending", description="Task status")
+    active_form: str = Field(
+        ..., description="Present continuous form: 'Fixing the bug'"
+    )
+    sub_items: list["TodoItem"] = Field(
+        default_factory=list, description="Nested sub-tasks"
+    )
+
+    model_config = ConfigDict(frozen=False)
+
+
+# Rebuild model to resolve forward reference
+TodoItem.model_rebuild()
+
+
+class TodoList(BaseModel):
+    """The complete todo list."""
+
+    items: list[TodoItem] = Field(default_factory=list)
+
+    model_config = ConfigDict(frozen=False)
+
+
+class TodoConfiguration(BaseModel):
+    """Configuration for the Todo component."""
+
+    max_items: int = Field(default=50, description="Maximum number of todos")
+    show_in_prompt: bool = Field(
+        default=True, description="Whether to include todos in LLM context"
+    )
+    decompose_model: Optional[str] = Field(
+        default=None, description="Model for decomposition (defaults to smart_llm)"
+    )
+
+    model_config = ConfigDict(frozen=False)
+
+
+class TodoComponent(
+    DirectiveProvider,
+    CommandProvider,
+    MessageProvider,
+    ConfigurableComponent[TodoConfiguration],
+):
+    """
+    Task management component for tracking multi-step tasks.
+
+    Features:
+    - Hierarchical todo list with sub-items
+    - Atomic updates (replace entire list)
+    - Three statuses: pending, in_progress, completed
+    - Dual descriptions (imperative + active form)
+    - Smart LLM-based task decomposition
+    - Visible in LLM context for awareness
+    """
+
+    config_class = TodoConfiguration
+
+    def __init__(
+        self,
+        llm_provider: Optional["MultiProvider"] = None,
+        smart_llm: Optional[str] = None,
+        config: Optional[TodoConfiguration] = None,
+    ):
+        ConfigurableComponent.__init__(self, config)
+        self._todos = TodoList()
+        self._llm_provider = llm_provider
+        self._smart_llm = smart_llm
+
+    # -------------------------------------------------------------------------
+    # DirectiveProvider Implementation
+    # -------------------------------------------------------------------------
+
+    def get_resources(self) -> Iterator[str]:
+        yield "A todo list to track and manage multi-step tasks. Use frequently!"
+
+    def get_best_practices(self) -> Iterator[str]:
+        yield "Use todo_write when working on multi-step tasks to track progress"
+        yield "Mark todos as in_progress before starting work on them"
+        yield "Mark todos as completed immediately after finishing, not in batches"
+        yield "Only have ONE todo as in_progress at a time"
+
+    # -------------------------------------------------------------------------
+    # MessageProvider Implementation
+    # -------------------------------------------------------------------------
+
+    def _format_todo_item(self, item: TodoItem, indent: int = 0) -> list[str]:
+        """Format a todo item with its sub-items recursively."""
+        lines = []
+        prefix = "  " * indent
+
+        if item.status == "completed":
+            lines.append(f"{prefix}- [x] {item.content}")
+        elif item.status == "in_progress":
+            lines.append(f"{prefix}- [~] {item.active_form}")
+        else:
+            lines.append(f"{prefix}- [ ] {item.content}")
+
+        # Recursively format sub-items
+        for sub in item.sub_items:
+            lines.extend(self._format_todo_item(sub, indent + 1))
+
+        return lines
+
+    def _get_current_todos_text(self) -> str:
+        """Get a text representation of current todos for the decomposition prompt."""
+        if not self._todos.items:
+            return "No current todos."
+
+        lines = []
+        for i, item in enumerate(self._todos.items):
+            lines.extend(self._format_todo_item(item))
+        return "\n".join(lines)
+
+    def get_messages(self) -> Iterator[ChatMessage]:
+        if not self.config.show_in_prompt or not self._todos.items:
+            return
+
+        in_progress = [t for t in self._todos.items if t.status == "in_progress"]
+        pending = [t for t in self._todos.items if t.status == "pending"]
+        completed = [t for t in self._todos.items if t.status == "completed"]
+
+        lines = ["## Your Todo List\n"]
+
+        # Show in-progress first (most important) with sub-items
+        if in_progress:
+            lines.append("**Currently working on:**")
+            for todo in in_progress:
+                lines.extend(self._format_todo_item(todo))
+
+        # Show pending with sub-items
+        if pending:
+            lines.append("\n**Pending:**")
+            for todo in pending:
+                lines.extend(self._format_todo_item(todo))
+
+        # Show completed (brief summary)
+        if completed:
+            lines.append(f"\n**Completed:** {len(completed)} task(s)")
+
+        yield ChatMessage.system("\n".join(lines))
+
+    # -------------------------------------------------------------------------
+    # Helper Methods
+    # -------------------------------------------------------------------------
+
+    def _parse_todo_item(
+        self, item: dict, path: str = "Item"
+    ) -> tuple[Optional[TodoItem], Optional[str]]:
+        """
+        Recursively parse a dict into a TodoItem with sub_items.
+
+        Returns (TodoItem, None) on success or (None, error_message) on failure.
+        """
+        # Check required fields
+        if not item.get("content"):
+            return None, f"{path}: 'content' is required and must be non-empty"
+        if not item.get("active_form"):
+            return None, f"{path}: 'active_form' is required and must be non-empty"
+        if item.get("status") not in ("pending", "in_progress", "completed"):
+            return (
+                None,
+                f"{path}: 'status' must be one of: pending, in_progress, completed",
+            )
+
+        # Parse sub_items recursively
+        sub_items = []
+        raw_sub_items = item.get("sub_items", [])
+        if raw_sub_items:
+            for j, sub_item in enumerate(raw_sub_items):
+                parsed, error = self._parse_todo_item(
+                    sub_item, f"{path}.sub_items[{j}]"
+                )
+                if error:
+                    return None, error
+                if parsed:
+                    sub_items.append(parsed)
+
+        return (
+            TodoItem(
+                content=item["content"],
+                status=item["status"],
+                active_form=item["active_form"],
+                sub_items=sub_items,
+            ),
+            None,
+        )
+
+    def _serialize_todo_item(self, item: TodoItem) -> dict:
+        """
+        Recursively serialize a TodoItem to a dict including sub_items.
+        """
+        result: dict[str, str | list] = {
+            "content": item.content,
+            "status": item.status,
+            "active_form": item.active_form,
+        }
+        if item.sub_items:
+            result["sub_items"] = [
+                self._serialize_todo_item(sub) for sub in item.sub_items
+            ]
+        return result
+
+    # -------------------------------------------------------------------------
+    # CommandProvider Implementation
+    # -------------------------------------------------------------------------
+
+    def get_commands(self) -> Iterator[Command]:
+        yield self.todo_write
+        yield self.todo_read
+        yield self.todo_clear
+        yield self.todo_decompose
+
+    @command(
+        names=["todo_write"],
+        parameters={
+            "todos": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                description=(
+                    "The complete todo list. Each item must have: "
+                    "'content' (imperative form like 'Fix bug'), "
+                    "'status' (pending|in_progress|completed), "
+                    "'active_form' (present continuous like 'Fixing bug'). "
+                    "Optional: 'sub_items' (array of nested todo items)"
+                ),
+                items=JSONSchema(
+                    type=JSONSchema.Type.OBJECT,
+                    properties={
+                        "content": JSONSchema(
+                            type=JSONSchema.Type.STRING,
+                            description="Imperative form of the task",
+                            required=True,
+                        ),
+                        "status": JSONSchema(
+                            type=JSONSchema.Type.STRING,
+                            description="pending, in_progress, or completed",
+                            enum=["pending", "in_progress", "completed"],
+                            required=True,
+                        ),
+                        "active_form": JSONSchema(
+                            type=JSONSchema.Type.STRING,
+                            description="Present continuous form (e.g. 'Fixing')",
+                            required=True,
+                        ),
+                        "sub_items": JSONSchema(
+                            type=JSONSchema.Type.ARRAY,
+                            description="Optional nested sub-tasks",
+                            required=False,
+                        ),
+                    },
+                ),
+                required=True,
+            ),
+        },
+    )
+    def todo_write(self, todos: list[dict]) -> dict:
+        """
+        Replace the entire todo list with a new list.
+
+        This is the primary command for managing todos. Use it to:
+        - Create initial todos when starting a multi-step task
+        - Mark tasks as in_progress when you start working on them
+        - Mark tasks as completed when done
+        - Add new tasks discovered during work
+        - Remove tasks that are no longer relevant
+        - Update sub-items created by todo_decompose
+
+        The entire list is replaced atomically, ensuring consistency.
+        Supports nested sub_items for hierarchical task tracking.
+        """
+        # Validate item count
+        if len(todos) > self.config.max_items:
+            return {
+                "status": "error",
+                "message": f"Too many items. Maximum is {self.config.max_items}.",
+            }
+
+        # Validate and convert items recursively
+        validated_items = []
+        for i, item in enumerate(todos):
+            parsed, error = self._parse_todo_item(item, f"Item {i}")
+            if error:
+                return {
+                    "status": "error",
+                    "message": error,
+                }
+            if parsed:
+                validated_items.append(parsed)
+
+        # Count in_progress items and warn if more than one
+        in_progress_count = sum(1 for t in validated_items if t.status == "in_progress")
+        warning = None
+        if in_progress_count > 1:
+            warning = (
+                f"Warning: {in_progress_count} tasks are in_progress. "
+                "Best practice is to have only ONE task in_progress at a time."
+            )
+            logger.warning(warning)
+
+        # Replace the list
+        self._todos = TodoList(items=validated_items)
+
+        # Build response
+        pending = sum(1 for t in validated_items if t.status == "pending")
+        completed = sum(1 for t in validated_items if t.status == "completed")
+
+        response = {
+            "status": "success",
+            "item_count": len(validated_items),
+            "pending": pending,
+            "in_progress": in_progress_count,
+            "completed": completed,
+        }
+
+        if warning:
+            response["warning"] = warning
+
+        return response
+
+    @command(names=["todo_read"])
+    def todo_read(self) -> dict:
+        """
+        Get the current todo list.
+
+        Returns all todos with their current statuses and sub-items.
+        Useful for reviewing progress or understanding current state.
+        """
+        return {
+            "status": "success",
+            "items": [self._serialize_todo_item(t) for t in self._todos.items],
+            "summary": {
+                "pending": sum(1 for t in self._todos.items if t.status == "pending"),
+                "in_progress": sum(
+                    1 for t in self._todos.items if t.status == "in_progress"
+                ),
+                "completed": sum(
+                    1 for t in self._todos.items if t.status == "completed"
+                ),
+            },
+        }
+
+    @command(names=["todo_clear"])
+    def todo_clear(self) -> dict:
+        """
+        Clear all todos.
+
+        Removes all items from the todo list.
+        Use when starting fresh or when the current task list is no longer relevant.
+        """
+        count = len(self._todos.items)
+        self._todos = TodoList()
+
+        return {
+            "status": "success",
+            "message": f"Cleared {count} todo(s)",
+        }
+
+    @command(
+        names=["todo_decompose"],
+        parameters={
+            "item_index": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Index of the todo item to decompose (0-based)",
+                required=True,
+            ),
+            "context": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Additional context to help guide the decomposition",
+                required=False,
+            ),
+        },
+    )
+    async def todo_decompose(self, item_index: int, context: str = "") -> dict:
+        """
+        Use the smart LLM to break down a todo item into actionable sub-steps.
+
+        This spawns a focused decomposition call with the current plan context.
+        The LLM analyzes the task and generates 3-7 concrete sub-steps.
+
+        Requires an LLM provider to be configured for this component.
+        """
+        # Validate LLM availability
+        if not self._llm_provider or not self._smart_llm:
+            return {
+                "status": "error",
+                "message": "LLM provider not configured. Cannot decompose tasks.",
+            }
+
+        # Validate item index
+        max_idx = len(self._todos.items) - 1
+        if item_index < 0 or item_index > max_idx:
+            return {
+                "status": "error",
+                "message": f"Invalid item_index {item_index}. Valid: 0-{max_idx}",
+            }
+
+        target_item = self._todos.items[item_index]
+
+        # Check if already has sub-items
+        if target_item.sub_items:
+            count = len(target_item.sub_items)
+            return {
+                "status": "error",
+                "message": (
+                    f"Item '{target_item.content}' already has {count} sub-items. "
+                    "Clear them first to re-decompose."
+                ),
+            }
+
+        # Build the decomposition prompt
+        prompt_content = DECOMPOSE_SYSTEM_PROMPT.format(
+            current_todos=self._get_current_todos_text(),
+            task_content=target_item.content,
+            context=context or "No additional context provided.",
+        )
+
+        try:
+            from forge.llm.providers import ChatMessage
+
+            # Call the LLM for decomposition
+            model = self.config.decompose_model or self._smart_llm
+            response = await self._llm_provider.create_chat_completion(
+                model_prompt=[ChatMessage.user(prompt_content)],
+                model_name=model,  # type: ignore[arg-type]
+            )
+
+            # Parse the JSON response
+            response_text = response.response.content
+            if not response_text:
+                return {
+                    "status": "error",
+                    "message": "LLM returned empty response",
+                }
+
+            # Try to extract JSON from response (handle potential markdown wrapping)
+            json_text = response_text.strip()
+            if json_text.startswith("```"):
+                # Remove markdown code blocks
+                lines = json_text.split("\n")
+                json_lines = []
+                in_code = False
+                for line in lines:
+                    if line.startswith("```"):
+                        in_code = not in_code
+                        continue
+                    if in_code or not line.startswith("```"):
+                        json_lines.append(line)
+                json_text = "\n".join(json_lines)
+
+            decomposition = json.loads(json_text)
+
+            # Validate response structure
+            if "sub_items" not in decomposition:
+                return {
+                    "status": "error",
+                    "message": "LLM response missing 'sub_items' field",
+                }
+
+            # Create sub-items
+            new_sub_items = []
+            for sub in decomposition["sub_items"]:
+                if not sub.get("content") or not sub.get("active_form"):
+                    continue
+                new_sub_items.append(
+                    TodoItem(
+                        content=sub["content"],
+                        active_form=sub["active_form"],
+                        status="pending",
+                    )
+                )
+
+            if not new_sub_items:
+                return {
+                    "status": "error",
+                    "message": "LLM generated no valid sub-items",
+                }
+
+            # Update the target item with sub-items
+            target_item.sub_items = new_sub_items
+
+            return {
+                "status": "success",
+                "item": target_item.content,
+                "sub_items_count": len(new_sub_items),
+                "sub_items": [
+                    {"content": s.content, "active_form": s.active_form}
+                    for s in new_sub_items
+                ],
+                "summary": decomposition.get("summary", "Task decomposed successfully"),
+            }
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse LLM decomposition response: {e}")
+            return {
+                "status": "error",
+                "message": f"Failed to parse LLM response as JSON: {e}",
+            }
+        except Exception as e:
+            logger.error(f"Decomposition failed: {e}")
+            return {
+                "status": "error",
+                "message": f"Decomposition failed: {e}",
+            }
--- a/classic/forge/forge/components/user_interaction/user_interaction.py
+++ b/classic/forge/forge/components/user_interaction/user_interaction.py
@@ -1,3 +1,4 @@
+import json
 from typing import Iterator

 import click
@@ -13,6 +14,8 @@ class UserInteractionComponent(CommandProvider):

    def get_commands(self) -> Iterator[Command]:
        yield self.ask_user
+        yield self.ask_yes_no
+        yield self.ask_choice

    @command(
        names=[ASK_COMMAND],
@@ -30,3 +33,133 @@ class UserInteractionComponent(CommandProvider):
        print(f"\nQ: {question}")
        resp = click.prompt("A")
        return f"The user's answer: '{resp}'"
+
+    @command(
+        ["ask_yes_no", "confirm"],
+        "Ask the user a yes/no confirmation question.",
+        {
+            "question": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The yes/no question to ask the user",
+                required=True,
+            ),
+            "default": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Default if Enter pressed (None = require explicit)",
+                required=False,
+            ),
+        },
+    )
+    def ask_yes_no(self, question: str, default: bool | None = None) -> str:
+        """Ask the user a yes/no question.
+
+        Args:
+            question: The question to ask
+            default: Optional default answer
+
+        Returns:
+            str: JSON with the user's answer (true/false)
+        """
+        if default is True:
+            prompt_suffix = " [Y/n]"
+        elif default is False:
+            prompt_suffix = " [y/N]"
+        else:
+            prompt_suffix = " [y/n]"
+
+        print(f"\nQ: {question}{prompt_suffix}")
+
+        while True:
+            resp = click.prompt("A", default="", show_default=False).strip().lower()
+
+            if resp == "" and default is not None:
+                answer = default
+                break
+            elif resp in ("y", "yes"):
+                answer = True
+                break
+            elif resp in ("n", "no"):
+                answer = False
+                break
+            else:
+                print("Please enter 'y' or 'n'")
+
+        return json.dumps(
+            {
+                "question": question,
+                "answer": answer,
+                "response": "yes" if answer else "no",
+            }
+        )
+
+    @command(
+        ["ask_choice", "select_option"],
+        "Present multiple choices to the user and get their selection.",
+        {
+            "question": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The question to ask",
+                required=True,
+            ),
+            "choices": JSONSchema(
+                type=JSONSchema.Type.ARRAY,
+                items=JSONSchema(type=JSONSchema.Type.STRING),
+                description="List of choices to present",
+                required=True,
+            ),
+            "allow_multiple": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Allow selecting multiple choices (default: False)",
+                required=False,
+            ),
+        },
+    )
+    def ask_choice(
+        self, question: str, choices: list[str], allow_multiple: bool = False
+    ) -> str:
+        """Present choices to the user.
+
+        Args:
+            question: The question to ask
+            choices: List of choices
+            allow_multiple: Whether multiple selections are allowed
+
+        Returns:
+            str: JSON with selected choice(s)
+        """
+        print(f"\nQ: {question}")
+        for i, choice in enumerate(choices, 1):
+            print(f"  {i}. {choice}")
+
+        if allow_multiple:
+            print("Enter choice numbers separated by commas (e.g., '1,3,4'):")
+        else:
+            print("Enter choice number:")
+
+        while True:
+            resp = click.prompt("A", default="", show_default=False).strip()
+
+            try:
+                if allow_multiple:
+                    indices = [int(x.strip()) for x in resp.split(",")]
+                    if all(1 <= i <= len(choices) for i in indices):
+                        selected = [choices[i - 1] for i in indices]
+                        return json.dumps(
+                            {
+                                "question": question,
+                                "selected": selected,
+                                "indices": indices,
+                            }
+                        )
+                else:
+                    index = int(resp)
+                    if 1 <= index <= len(choices):
+                        selected = choices[index - 1]
+                        return json.dumps(
+                            {"question": question, "selected": selected, "index": index}
+                        )
+
+                print(f"Please enter a valid number between 1 and {len(choices)}")
+
+            except ValueError:
+                print("Please enter a valid number")
--- a/classic/forge/forge/components/web/search.py
+++ b/classic/forge/forge/components/web/search.py
@@ -1,9 +1,19 @@
+"""
+Modern web search component with tiered provider support.
+
+Provider hierarchy:
+1. Tavily (primary) - AI-optimized results with content extraction
+2. Serper (secondary) - Fast, cheap Google SERP results
+3. DDGS (fallback) - Free multi-engine search (DuckDuckGo, Bing, Brave, Google, etc.)
+"""
+
 import json
 import logging
-import time
+from enum import Enum
 from typing import Iterator, Literal, Optional

-from duckduckgo_search import DDGS
+import requests
+from ddgs import DDGS
 from pydantic import BaseModel, SecretStr

 from forge.agent.components import ConfigurableComponent
@@ -15,49 +25,333 @@ from forge.utils.exceptions import ConfigurationError

 logger = logging.getLogger(__name__)

+# Available backends for DDGS text search
+# Ordered by reliability/quality for fallback chain
+DDGS_BACKENDS = [
+    "duckduckgo",
+    "bing",
+    "brave",
+    "google",
+    "mojeek",
+    "yahoo",
+    "yandex",
+    "wikipedia",
+]
+
+
+class SearchProvider(str, Enum):
+    """Available search providers."""
+
+    TAVILY = "tavily"
+    SERPER = "serper"
+    DDGS = "ddgs"  # Multi-engine free search
+    AUTO = "auto"  # Automatic provider selection based on availability
+
+
+class SearchResult(BaseModel):
+    """Standardized search result format."""
+
+    title: str
+    url: str
+    content: str  # Snippet or extracted content
+    score: Optional[float] = None  # Relevance score if available
+    raw_content: Optional[str] = None  # Full page content if extracted
+

 class WebSearchConfiguration(BaseModel):
+    """Configuration for the web search component."""
+
+    # Tavily settings (primary provider)
+    tavily_api_key: Optional[SecretStr] = UserConfigurable(
+        None, from_env="TAVILY_API_KEY", exclude=True
+    )
+    tavily_search_depth: Literal["basic", "advanced"] = "basic"
+    tavily_include_answer: bool = True  # Get AI-generated answer
+    tavily_include_raw_content: bool = False  # Extract full page content
+
+    # Serper settings (secondary provider)
+    serper_api_key: Optional[SecretStr] = UserConfigurable(
+        None, from_env="SERPER_API_KEY", exclude=True
+    )
+
+    # DDGS settings (free fallback with multiple backends)
+    ddgs_backend: Literal[
+        "auto",
+        "duckduckgo",
+        "bing",
+        "brave",
+        "google",
+        "mojeek",
+        "yahoo",
+        "yandex",
+        "wikipedia",
+    ] = "auto"
+    ddgs_region: str = "us-en"  # Region for localized results
+    ddgs_safesearch: Literal["on", "moderate", "off"] = "moderate"
+
+    # General settings
+    default_provider: SearchProvider = SearchProvider.AUTO
+    max_results: int = 8
+
+    # Legacy settings (deprecated)
    google_api_key: Optional[SecretStr] = UserConfigurable(
        None, from_env="GOOGLE_API_KEY", exclude=True
    )
    google_custom_search_engine_id: Optional[SecretStr] = UserConfigurable(
        None, from_env="GOOGLE_CUSTOM_SEARCH_ENGINE_ID", exclude=True
    )
-    duckduckgo_max_attempts: int = 3
-    duckduckgo_backend: Literal["api", "html", "lite"] = "api"
+    # Legacy aliases for backwards compatibility
+    duckduckgo_max_attempts: int = 3  # Now used as max backend attempts
+    duckduckgo_backend: Literal["api", "html", "lite"] = (
+        "api"  # Ignored, use ddgs_backend
+    )


 class WebSearchComponent(
    DirectiveProvider, CommandProvider, ConfigurableComponent[WebSearchConfiguration]
 ):
-    """Provides commands to search the web."""
+    """
+    Modern web search component with tiered provider support.
+
+    Provides intelligent web search with automatic provider selection:
+    - Tavily: AI-optimized results with optional content extraction
+    - Serper: Fast Google SERP results at low cost
+    - DDGS: Free multi-engine fallback (DuckDuckGo, Bing, Brave, Google, etc.)
+    """

    config_class = WebSearchConfiguration

    def __init__(self, config: Optional[WebSearchConfiguration] = None):
        ConfigurableComponent.__init__(self, config)
+        self._ddgs_client: Optional["DDGS"] = None
+        self._log_provider_status()

-        if (
-            not self.config.google_api_key
-            or not self.config.google_custom_search_engine_id
-        ):
+    def _log_provider_status(self) -> None:
+        """Log which providers are available."""
+        providers = []
+        if self.config.tavily_api_key:
+            providers.append("Tavily (primary)")
+        if self.config.serper_api_key:
+            providers.append("Serper (secondary)")
+        providers.append("DDGS multi-engine (fallback)")
+
+        logger.info(f"Web search providers available: {', '.join(providers)}")
+
+        if not self.config.tavily_api_key and not self.config.serper_api_key:
            logger.info(
-                "Configure google_api_key and custom_search_engine_id "
-                "to use Google API search."
+                "No premium search API keys configured. "
+                "Using DDGS multi-engine search (free). "
+                "Set TAVILY_API_KEY or SERPER_API_KEY for enhanced results."
            )

+    @property
+    def ddgs_client(self) -> "DDGS":
+        """Lazy-loaded DDGS client."""
+        if self._ddgs_client is None:
+            self._ddgs_client = DDGS()
+        return self._ddgs_client
+
    def get_resources(self) -> Iterator[str]:
        yield "Internet access for searches and information gathering."

    def get_commands(self) -> Iterator[Command]:
        yield self.web_search
+        if self.config.tavily_api_key:
+            yield self.search_and_extract

-        if self.config.google_api_key and self.config.google_custom_search_engine_id:
-            yield self.google
+    def _get_provider(self) -> SearchProvider:
+        """Determine which provider to use based on configuration."""
+        if self.config.default_provider != SearchProvider.AUTO:
+            return self.config.default_provider
+
+        # Auto-select: prefer Tavily > Serper > DDGS
+        if self.config.tavily_api_key:
+            return SearchProvider.TAVILY
+        elif self.config.serper_api_key:
+            return SearchProvider.SERPER
+        else:
+            return SearchProvider.DDGS
+
+    def _search_tavily(
+        self,
+        query: str,
+        num_results: int,
+        include_answer: bool = True,
+        include_raw_content: bool = False,
+        search_depth: Optional[str] = None,
+    ) -> tuple[list[SearchResult], Optional[str]]:
+        """
+        Search using Tavily API.
+
+        Returns:
+            Tuple of (results list, AI-generated answer or None)
+        """
+        if not self.config.tavily_api_key:
+            raise ConfigurationError("Tavily API key not configured")
+
+        url = "https://api.tavily.com/search"
+        headers = {"Content-Type": "application/json"}
+
+        payload = {
+            "api_key": self.config.tavily_api_key.get_secret_value(),
+            "query": query,
+            "max_results": num_results,
+            "search_depth": search_depth or self.config.tavily_search_depth,
+            "include_answer": include_answer,
+            "include_raw_content": include_raw_content,
+        }
+
+        try:
+            response = requests.post(url, json=payload, headers=headers, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+
+            results = [
+                SearchResult(
+                    title=r.get("title", ""),
+                    url=r.get("url", ""),
+                    content=r.get("content", ""),
+                    score=r.get("score"),
+                    raw_content=r.get("raw_content") if include_raw_content else None,
+                )
+                for r in data.get("results", [])
+            ]
+
+            answer = data.get("answer") if include_answer else None
+            return results, answer
+
+        except requests.RequestException as e:
+            logger.error(f"Tavily search failed: {e}")
+            raise
+
+    def _search_serper(self, query: str, num_results: int) -> list[SearchResult]:
+        """Search using Serper.dev API (Google SERP)."""
+        if not self.config.serper_api_key:
+            raise ConfigurationError("Serper API key not configured")
+
+        url = "https://google.serper.dev/search"
+        headers = {
+            "X-API-KEY": self.config.serper_api_key.get_secret_value(),
+            "Content-Type": "application/json",
+        }
+        payload = {"q": query, "num": num_results}
+
+        try:
+            response = requests.post(url, json=payload, headers=headers, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+
+            results = []
+            for r in data.get("organic", []):
+                results.append(
+                    SearchResult(
+                        title=r.get("title", ""),
+                        url=r.get("link", ""),
+                        content=r.get("snippet", ""),
+                        score=r.get("position"),  # Position as pseudo-score
+                    )
+                )
+
+            return results
+
+        except requests.RequestException as e:
+            logger.error(f"Serper search failed: {e}")
+            raise
+
+    def _search_ddgs(self, query: str, num_results: int) -> list[SearchResult]:
+        """
+        Search using DDGS multi-engine search.
+
+        Tries multiple backends in order until one succeeds:
+        DuckDuckGo -> Bing -> Brave -> Google -> Mojeek -> Yahoo -> Yandex
+        """
+        if not query:
+            return []
+
+        # Determine which backends to try
+        if self.config.ddgs_backend == "auto":
+            backends_to_try = DDGS_BACKENDS.copy()
+        else:
+            # Put configured backend first, then others as fallback
+            backends_to_try = [self.config.ddgs_backend] + [
+                b for b in DDGS_BACKENDS if b != self.config.ddgs_backend
+            ]
+
+        max_attempts = min(self.config.duckduckgo_max_attempts, len(backends_to_try))
+        last_error: Optional[Exception] = None
+
+        for backend in backends_to_try[:max_attempts]:
+            try:
+                logger.debug(f"Trying DDGS backend: {backend}")
+                raw_results = self.ddgs_client.text(
+                    query,
+                    max_results=num_results,
+                    backend=backend,
+                    region=self.config.ddgs_region,
+                    safesearch=self.config.ddgs_safesearch,
+                )
+
+                if raw_results:
+                    results = [
+                        SearchResult(
+                            title=r.get("title", ""),
+                            url=r.get("href", r.get("url", "")),
+                            content=r.get("body", r.get("description", "")),
+                        )
+                        for r in raw_results
+                    ]
+                    logger.info(
+                        f"DDGS search succeeded with {backend}: {len(results)} results"
+                    )
+                    return results
+
+            except Exception as e:
+                last_error = e
+                logger.warning(f"DDGS {backend} failed: {e}")
+                continue
+
+        if last_error:
+            logger.error(f"All DDGS backends failed. Last error: {last_error}")
+
+        return []
+
+    def _format_results(
+        self,
+        results: list[SearchResult],
+        answer: Optional[str] = None,
+        include_raw_content: bool = False,
+    ) -> str:
+        """Format search results for display."""
+        output_parts = []
+
+        # Include AI-generated answer if available
+        if answer:
+            output_parts.append(f"## AI Summary\n{answer}\n")
+
+        output_parts.append("## Search Results")
+
+        for i, r in enumerate(results, 1):
+            result_text = (
+                f"### {i}. {r.title}\n"
+                f"**URL:** {r.url}\n"
+                f"**Excerpt:** {r.content or 'N/A'}"
+            )
+            if r.score is not None:
+                result_text += f"\n**Relevance:** {r.score:.2f}"
+            if include_raw_content and r.raw_content:
+                # Truncate raw content to avoid overwhelming output
+                content_preview = r.raw_content[:2000]
+                if len(r.raw_content) > 2000:
+                    content_preview += "... [truncated]"
+                result_text += f"\n**Full Content:**\n{content_preview}"
+
+            output_parts.append(result_text)
+
+        return "\n\n".join(output_parts)

    @command(
        ["web_search", "search"],
-        "Searches the web",
+        "Search the web for information. Uses the best available search provider.",
        {
            "query": JSONSchema(
                type=JSONSchema.Type.STRING,
@@ -66,60 +360,68 @@ class WebSearchComponent(
            ),
            "num_results": JSONSchema(
                type=JSONSchema.Type.INTEGER,
-                description="The number of results to return",
+                description="Number of results to return (1-20)",
                minimum=1,
-                maximum=10,
+                maximum=20,
                required=False,
            ),
        },
    )
    def web_search(self, query: str, num_results: int = 8) -> str:
-        """Return the results of a Google search
+        """
+        Search the web using the best available provider.
+
+        Automatically selects provider: Tavily > Serper > DDGS (multi-engine)

        Args:
-            query (str): The search query.
-            num_results (int): The number of results to return.
+            query: The search query
+            num_results: Number of results to return (default: 8)

        Returns:
-            str: The results of the search.
+            Formatted search results with optional AI summary
        """
-        search_results = []
-        attempts = 0
+        provider = self._get_provider()
+        results: list[SearchResult] = []
+        answer: Optional[str] = None

-        while attempts < self.config.duckduckgo_max_attempts:
-            if not query:
-                return json.dumps(search_results)
+        # Try primary provider
+        try:
+            if provider == SearchProvider.TAVILY:
+                results, answer = self._search_tavily(
+                    query,
+                    num_results,
+                    include_answer=self.config.tavily_include_answer,
+                )
+            elif provider == SearchProvider.SERPER:
+                results = self._search_serper(query, num_results)
+            else:
+                results = self._search_ddgs(query, num_results)

-            search_results = DDGS().text(
-                query, max_results=num_results, backend=self.config.duckduckgo_backend
-            )
+        except Exception as e:
+            logger.warning(f"{provider.value} search failed: {e}, trying fallback...")

-            if search_results:
-                break
+            # Fallback chain
+            if provider == SearchProvider.TAVILY and self.config.serper_api_key:
+                try:
+                    results = self._search_serper(query, num_results)
+                    provider = SearchProvider.SERPER
+                except Exception as e2:
+                    logger.warning(f"Serper fallback failed: {e2}")

-            time.sleep(1)
-            attempts += 1
+            if not results:
+                logger.info("Falling back to DDGS multi-engine search")
+                results = self._search_ddgs(query, num_results)
+                provider = SearchProvider.DDGS

-        search_results = [
-            {
-                "title": r["title"],
-                "url": r["href"],
-                **({"exerpt": r["body"]} if r.get("body") else {}),
-            }
-            for r in search_results
-        ]
+        if not results:
+            return "No search results found."

-        results = ("## Search results\n") + "\n\n".join(
-            f"### \"{r['title']}\"\n"
-            f"**URL:** {r['url']}  \n"
-            "**Excerpt:** " + (f'"{exerpt}"' if (exerpt := r.get("exerpt")) else "N/A")
-            for r in search_results
-        )
-        return self.safe_google_results(results)
+        logger.info(f"Search completed using {provider.value}: {len(results)} results")
+        return self._format_results(results, answer)

    @command(
-        ["google"],
-        "Google Search",
+        ["search_and_extract"],
+        "Search and extract full content from web pages. Best for research tasks.",
        {
            "query": JSONSchema(
                type=JSONSchema.Type.STRING,
@@ -128,85 +430,55 @@ class WebSearchComponent(
            ),
            "num_results": JSONSchema(
                type=JSONSchema.Type.INTEGER,
-                description="The number of results to return",
+                description="Number of results to return (1-10)",
                minimum=1,
                maximum=10,
                required=False,
            ),
        },
    )
-    def google(self, query: str, num_results: int = 8) -> str | list[str]:
-        """Return the results of a Google search using the official Google API
+    def search_and_extract(self, query: str, num_results: int = 5) -> str:
+        """
+        Search and extract full page content using Tavily's advanced search.
+
+        This command performs a deep search and extracts the full content
+        from the most relevant pages. Best for research tasks that need
+        comprehensive information.

        Args:
-            query (str): The search query.
-            num_results (int): The number of results to return.
+            query: The search query
+            num_results: Number of results with full content (default: 5)

        Returns:
-            str: The results of the search.
+            Search results with extracted page content
        """
-
-        from googleapiclient.discovery import build
-        from googleapiclient.errors import HttpError
+        if not self.config.tavily_api_key:
+            return (
+                "Error: search_and_extract requires a Tavily API key. "
+                "Set TAVILY_API_KEY environment variable."
+            )

        try:
-            # Should be the case if this command is enabled:
-            assert self.config.google_api_key
-            assert self.config.google_custom_search_engine_id
-
-            # Initialize the Custom Search API service
-            service = build(
-                "customsearch",
-                "v1",
-                developerKey=self.config.google_api_key.get_secret_value(),
+            results, answer = self._search_tavily(
+                query,
+                num_results,
+                include_answer=True,
+                include_raw_content=True,
+                search_depth="advanced",
            )

-            # Send the search query and retrieve the results
-            result = (
-                service.cse()
-                .list(
-                    q=query,
-                    cx=self.config.google_custom_search_engine_id.get_secret_value(),
-                    num=num_results,
-                )
-                .execute()
-            )
+            if not results:
+                return "No search results found."

-            # Extract the search result items from the response
-            search_results = result.get("items", [])
+            return self._format_results(results, answer, include_raw_content=True)

-            # Create a list of only the URLs from the search results
-            search_results_links = [item["link"] for item in search_results]  # type: ignore # noqa
-
-        except HttpError as e:
-            # Handle errors in the API call
-            error_details = json.loads(e.content.decode())
-
-            # Check if the error is related to an invalid or missing API key
-            if error_details.get("error", {}).get(
-                "code"
-            ) == 403 and "invalid API key" in error_details.get("error", {}).get(
-                "message", ""
-            ):
-                raise ConfigurationError(
-                    "The provided Google API key is invalid or missing."
-                )
-            raise
-        # google_result can be a list or a string depending on the search results
-
-        # Return the list of search result URLs
-        return self.safe_google_results(search_results_links)
+        except Exception as e:
+            logger.error(f"search_and_extract failed: {e}")
+            return f"Search failed: {e}"

+    # Legacy method for backwards compatibility
    def safe_google_results(self, results: str | list) -> str:
-        """
-            Return the results of a Google search in a safe format.
-
-        Args:
-            results (str | list): The search results.
-
-        Returns:
-            str: The results of the search.
-        """
+        """Return the results of a Google search in a safe format."""
        if isinstance(results, list):
            safe_message = json.dumps(
                [result.encode("utf-8", "ignore").decode("utf-8") for result in results]
--- a/classic/forge/forge/components/web/selenium.py
+++ b/classic/forge/forge/components/web/selenium.py
@@ -94,6 +94,9 @@ class WebSeleniumComponent(

    def get_commands(self) -> Iterator[Command]:
        yield self.read_webpage
+        yield self.take_screenshot
+        yield self.click_element
+        yield self.fill_form

    @command(
        ["read_webpage"],
@@ -400,3 +403,222 @@ class WebSeleniumComponent(
                spacy_model=self.config.browse_spacy_language_model,
            )
            return result
+
+    @command(
+        ["take_screenshot"],
+        "Take a screenshot of a webpage and save it to a file.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL of the webpage to screenshot",
+                required=True,
+            ),
+            "filename": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Filename for screenshot (e.g. 'screenshot.png')",
+                required=True,
+            ),
+            "full_page": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Capture full page including scrollable content",
+                required=False,
+            ),
+        },
+    )
+    @validate_url
+    async def take_screenshot(
+        self, url: str, filename: str, full_page: bool = False
+    ) -> str:
+        """Take a screenshot of a webpage.
+
+        Args:
+            url: The URL to screenshot
+            filename: The filename to save to
+            full_page: Whether to capture full scrollable page
+
+        Returns:
+            str: Success message with file path
+        """
+        driver = None
+        try:
+            driver = await self.open_page_in_browser(url)
+
+            if full_page:
+                # Get full page dimensions
+                total_height = driver.execute_script(
+                    "return document.body.scrollHeight"
+                )
+                driver.set_window_size(1920, total_height)
+                await asyncio.sleep(0.5)  # Wait for resize
+
+            # Save screenshot
+            screenshot_path = self.data_dir / filename
+            screenshot_path.parent.mkdir(parents=True, exist_ok=True)
+            driver.save_screenshot(str(screenshot_path))
+
+            return f"Screenshot saved to {screenshot_path}"
+
+        except WebDriverException as e:
+            msg = e.msg.split("\n")[0] if e.msg else str(e)
+            raise CommandExecutionError(f"Screenshot failed: {msg}")
+        finally:
+            if driver:
+                driver.close()
+
+    @command(
+        ["click_element"],
+        "Click an element on a webpage identified by a CSS selector or XPath.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL of the webpage",
+                required=True,
+            ),
+            "selector": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="CSS selector or XPath expression to find the element",
+                required=True,
+            ),
+            "selector_type": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="Type of selector: 'css' or 'xpath' (default: 'css')",
+                required=False,
+            ),
+            "timeout": JSONSchema(
+                type=JSONSchema.Type.INTEGER,
+                description="Timeout in seconds to wait for element (default: 10)",
+                required=False,
+            ),
+        },
+    )
+    @validate_url
+    async def click_element(
+        self,
+        url: str,
+        selector: str,
+        selector_type: str = "css",
+        timeout: int = 10,
+    ) -> str:
+        """Click an element on a webpage.
+
+        Args:
+            url: The URL of the webpage
+            selector: The CSS selector or XPath
+            selector_type: Type of selector ('css' or 'xpath')
+            timeout: Timeout to wait for element
+
+        Returns:
+            str: Success message
+        """
+        driver = None
+        try:
+            driver = await self.open_page_in_browser(url)
+
+            by_type = By.CSS_SELECTOR if selector_type == "css" else By.XPATH
+
+            # Wait for element to be clickable
+            element = WebDriverWait(driver, timeout).until(
+                EC.element_to_be_clickable((by_type, selector))
+            )
+
+            element.click()
+
+            # Wait for any page changes
+            await asyncio.sleep(1)
+
+            return f"Clicked element matching '{selector}'"
+
+        except WebDriverException as e:
+            msg = e.msg.split("\n")[0] if e.msg else str(e)
+            raise CommandExecutionError(f"Click failed: {msg}")
+        finally:
+            if driver:
+                driver.close()
+
+    @command(
+        ["fill_form"],
+        "Fill form fields on a webpage with provided values.",
+        {
+            "url": JSONSchema(
+                type=JSONSchema.Type.STRING,
+                description="The URL of the webpage with the form",
+                required=True,
+            ),
+            "fields": JSONSchema(
+                type=JSONSchema.Type.OBJECT,
+                description="Dictionary mapping CSS selectors to values to enter",
+                required=True,
+            ),
+            "submit": JSONSchema(
+                type=JSONSchema.Type.BOOLEAN,
+                description="Whether to submit the form after filling (default: False)",
+                required=False,
+            ),
+        },
+    )
+    @validate_url
+    async def fill_form(
+        self,
+        url: str,
+        fields: dict[str, str],
+        submit: bool = False,
+    ) -> str:
+        """Fill form fields on a webpage.
+
+        Args:
+            url: The URL of the webpage
+            fields: Dict mapping selectors to values
+            submit: Whether to submit the form
+
+        Returns:
+            str: Success message with filled fields
+        """
+        driver = None
+        try:
+            driver = await self.open_page_in_browser(url)
+
+            filled = []
+            for selector, value in fields.items():
+                try:
+                    element = WebDriverWait(driver, 10).until(
+                        EC.presence_of_element_located((By.CSS_SELECTOR, selector))
+                    )
+
+                    # Clear and fill
+                    element.clear()
+                    element.send_keys(value)
+                    filled.append(selector)
+
+                except Exception as e:
+                    raise CommandExecutionError(
+                        f"Could not fill field '{selector}': {e}"
+                    )
+
+            if submit and filled:
+                # Find and click submit button
+                try:
+                    submit_btn = driver.find_element(
+                        By.CSS_SELECTOR, "button[type='submit'], input[type='submit']"
+                    )
+                    submit_btn.click()
+                    await asyncio.sleep(2)  # Wait for submission
+                except Exception:
+                    # Try submitting the form directly
+                    try:
+                        form = driver.find_element(By.CSS_SELECTOR, "form")
+                        form.submit()
+                        await asyncio.sleep(2)
+                    except Exception as e:
+                        raise CommandExecutionError(f"Could not submit form: {e}")
+
+            msg = f"Filled {len(filled)} field(s): {', '.join(filled)}"
+            if submit:
+                msg += " and submitted form"
+            return msg
+
+        except WebDriverException as e:
+            msg = e.msg.split("\n")[0] if e.msg else str(e)
+            raise CommandExecutionError(f"Form fill failed: {msg}")
+        finally:
+            if driver:
+                driver.close()
--- a/classic/forge/forge/components/web/test_search.py
+++ b/classic/forge/forge/components/web/test_search.py
@@ -1,152 +1,383 @@
-import json
+"""Tests for the modern web search component."""
+
+from unittest.mock import MagicMock

 import pytest
-from googleapiclient.errors import HttpError
-from httplib2 import Response
 from pydantic import SecretStr

-from forge.utils.exceptions import ConfigurationError
-
-from . import WebSearchComponent
+from .search import (
+    SearchProvider,
+    SearchResult,
+    WebSearchComponent,
+    WebSearchConfiguration,
+)


@pytest.fixture
 def web_search_component():
-    component = WebSearchComponent()
-    if component.config.google_api_key is None:
-        component.config.google_api_key = SecretStr("test")
-    if component.config.google_custom_search_engine_id is None:
-        component.config.google_custom_search_engine_id = SecretStr("test")
-    return component
-
-
-@pytest.mark.parametrize(
-    "query, expected_output",
-    [("test", "test"), (["test1", "test2"], '["test1", "test2"]')],
-)
-@pytest.fixture
-def test_safe_google_results(
-    query, expected_output, web_search_component: WebSearchComponent
-):
-    result = web_search_component.safe_google_results(query)
-    assert isinstance(result, str)
-    assert result == expected_output
+    """Create a WebSearchComponent with no API keys (DDGS multi-engine only)."""
+    config = WebSearchConfiguration()
+    return WebSearchComponent(config)


@pytest.fixture
-def test_safe_google_results_invalid_input(web_search_component: WebSearchComponent):
-    with pytest.raises(AttributeError):
-        web_search_component.safe_google_results(123)  # type: ignore
-
-
-@pytest.mark.parametrize(
-    "query, num_results, expected_output_parts, return_value",
-    [
-        (
-            "test",
-            1,
-            ("Result 1", "https://example.com/result1"),
-            [{"title": "Result 1", "href": "https://example.com/result1"}],
-        ),
-        ("", 1, (), []),
-        ("no results", 1, (), []),
-    ],
-)
-def test_google_search(
-    query,
-    num_results,
-    expected_output_parts,
-    return_value,
-    mocker,
-    web_search_component: WebSearchComponent,
-):
-    mock_ddg = mocker.Mock()
-    mock_ddg.return_value = return_value
-
-    mocker.patch("forge.components.web.search.DDGS.text", mock_ddg)
-    actual_output = web_search_component.web_search(query, num_results=num_results)
-    for o in expected_output_parts:
-        assert o in actual_output
-
-
-@pytest.fixture
-def mock_googleapiclient(mocker):
-    mock_build = mocker.patch("googleapiclient.discovery.build")
-    mock_service = mocker.Mock()
-    mock_build.return_value = mock_service
-    return mock_service.cse().list().execute().get
-
-
-@pytest.mark.parametrize(
-    "query, num_results, search_results, expected_output",
-    [
-        (
-            "test",
-            3,
-            [
-                {"link": "http://example.com/result1"},
-                {"link": "http://example.com/result2"},
-                {"link": "http://example.com/result3"},
-            ],
-            [
-                "http://example.com/result1",
-                "http://example.com/result2",
-                "http://example.com/result3",
-            ],
-        ),
-        ("", 3, [], []),
-    ],
-)
-def test_google_official_search(
-    query,
-    num_results,
-    expected_output,
-    search_results,
-    mock_googleapiclient,
-    web_search_component: WebSearchComponent,
-):
-    mock_googleapiclient.return_value = search_results
-    actual_output = web_search_component.google(query, num_results=num_results)
-    assert actual_output == web_search_component.safe_google_results(expected_output)
-
-
-@pytest.mark.parametrize(
-    "query, num_results, expected_error_type, http_code, error_msg",
-    [
-        (
-            "invalid query",
-            3,
-            HttpError,
-            400,
-            "Invalid Value",
-        ),
-        (
-            "invalid API key",
-            3,
-            ConfigurationError,
-            403,
-            "invalid API key",
-        ),
-    ],
-)
-def test_google_official_search_errors(
-    query,
-    num_results,
-    expected_error_type,
-    mock_googleapiclient,
-    http_code,
-    error_msg,
-    web_search_component: WebSearchComponent,
-):
-    response_content = {
-        "error": {"code": http_code, "message": error_msg, "reason": "backendError"}
-    }
-    error = HttpError(
-        resp=Response({"status": http_code, "reason": error_msg}),
-        content=str.encode(json.dumps(response_content)),
-        uri="https://www.googleapis.com/customsearch/v1?q=invalid+query&cx",
+def web_search_component_tavily():
+    """Create a WebSearchComponent with Tavily configured."""
+    config = WebSearchConfiguration(
+        tavily_api_key=SecretStr("test-tavily-key"),
    )
+    return WebSearchComponent(config)

-    mock_googleapiclient.side_effect = error
-    with pytest.raises(expected_error_type):
-        web_search_component.google(query, num_results=num_results)
+
+@pytest.fixture
+def web_search_component_serper():
+    """Create a WebSearchComponent with Serper configured."""
+    config = WebSearchConfiguration(
+        serper_api_key=SecretStr("test-serper-key"),
+    )
+    return WebSearchComponent(config)
+
+
+@pytest.fixture
+def web_search_component_all():
+    """Create a WebSearchComponent with all providers configured."""
+    config = WebSearchConfiguration(
+        tavily_api_key=SecretStr("test-tavily-key"),
+        serper_api_key=SecretStr("test-serper-key"),
+    )
+    return WebSearchComponent(config)
+
+
+class TestProviderSelection:
+    """Test automatic provider selection logic."""
+
+    def test_auto_selects_tavily_when_available(self, web_search_component_tavily):
+        assert web_search_component_tavily._get_provider() == SearchProvider.TAVILY
+
+    def test_auto_selects_serper_when_tavily_unavailable(
+        self, web_search_component_serper
+    ):
+        assert web_search_component_serper._get_provider() == SearchProvider.SERPER
+
+    def test_auto_selects_ddgs_when_no_keys(self, web_search_component):
+        assert web_search_component._get_provider() == SearchProvider.DDGS
+
+    def test_auto_prefers_tavily_over_serper(self, web_search_component_all):
+        assert web_search_component_all._get_provider() == SearchProvider.TAVILY
+
+    def test_explicit_provider_override(self):
+        config = WebSearchConfiguration(
+            tavily_api_key=SecretStr("test-key"),
+            default_provider=SearchProvider.DDGS,
+        )
+        component = WebSearchComponent(config)
+        assert component._get_provider() == SearchProvider.DDGS
+
+
+class TestDDGSSearch:
+    """Test DDGS multi-engine search functionality."""
+
+    @pytest.mark.parametrize(
+        "query, num_results, expected_output_parts, return_value",
+        [
+            (
+                "test query",
+                3,
+                ("Test Result", "https://example.com/test"),
+                [
+                    {
+                        "title": "Test Result",
+                        "href": "https://example.com/test",
+                        "body": "Test body content",
+                    }
+                ],
+            ),
+            ("", 1, (), []),
+            ("no results", 1, (), []),
+        ],
+    )
+    def test_ddgs_search(
+        self,
+        query,
+        num_results,
+        expected_output_parts,
+        return_value,
+        mocker,
+        web_search_component,
+    ):
+        mock_ddgs = mocker.patch("forge.components.web.search.DDGS")
+        mock_ddgs.return_value.text.return_value = return_value
+
+        result = web_search_component.web_search(query, num_results=num_results)
+
+        for expected in expected_output_parts:
+            assert expected in result
+
+    def test_ddgs_tries_multiple_backends_on_failure(
+        self, mocker, web_search_component
+    ):
+        mock_ddgs = mocker.patch("forge.components.web.search.DDGS")
+        # Fail twice, succeed on third attempt
+        mock_ddgs.return_value.text.side_effect = [
+            Exception("First failure"),
+            Exception("Second failure"),
+            [
+                {
+                    "title": "Success",
+                    "href": "https://example.com",
+                    "body": "Finally worked",
+                }
+            ],
+        ]
+
+        result = web_search_component.web_search("test", num_results=1)
+        assert "Success" in result
+        assert mock_ddgs.return_value.text.call_count == 3
+
+
+class TestTavilySearch:
+    """Test Tavily search functionality."""
+
+    def test_tavily_search_success(self, mocker, web_search_component_tavily):
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "answer": "This is the AI-generated answer.",
+            "results": [
+                {
+                    "title": "Tavily Result",
+                    "url": "https://example.com/tavily",
+                    "content": "Tavily content snippet",
+                    "score": 0.95,
+                }
+            ],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mocker.patch("requests.post", return_value=mock_response)
+
+        result = web_search_component_tavily.web_search("test query", num_results=5)
+
+        assert "AI Summary" in result
+        assert "AI-generated answer" in result
+        assert "Tavily Result" in result
+        assert "https://example.com/tavily" in result
+
+    def test_tavily_search_with_content_extraction(
+        self, mocker, web_search_component_tavily
+    ):
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "answer": "Summary answer",
+            "results": [
+                {
+                    "title": "Research Article",
+                    "url": "https://example.com/article",
+                    "content": "Brief snippet",
+                    "score": 0.9,
+                    "raw_content": "Full article content with lots of details...",
+                }
+            ],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mocker.patch("requests.post", return_value=mock_response)
+
+        result = web_search_component_tavily.search_and_extract(
+            "research topic", num_results=3
+        )
+
+        assert "Research Article" in result
+        assert "Full Content:" in result
+
+    def test_tavily_requires_api_key(self, web_search_component):
+        # Component without Tavily key should not have search_and_extract command
+        commands = list(web_search_component.get_commands())
+        command_names = [cmd.names[0] for cmd in commands]
+        assert "search_and_extract" not in command_names
+
+    def test_tavily_fallback_to_serper(self, mocker, web_search_component_all):
+        # Make Tavily fail
+        mock_tavily = mocker.patch.object(
+            web_search_component_all,
+            "_search_tavily",
+            side_effect=Exception("Tavily down"),
+        )
+
+        # Mock Serper to succeed
+        mock_serper = mocker.patch.object(
+            web_search_component_all,
+            "_search_serper",
+            return_value=[
+                SearchResult(
+                    title="Serper Result",
+                    url="https://example.com/serper",
+                    content="Serper fallback content",
+                )
+            ],
+        )
+
+        result = web_search_component_all.web_search("test", num_results=5)
+
+        assert "Serper Result" in result
+        mock_tavily.assert_called_once()
+        mock_serper.assert_called_once()
+
+
+class TestSerperSearch:
+    """Test Serper search functionality."""
+
+    def test_serper_search_success(self, mocker, web_search_component_serper):
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "organic": [
+                {
+                    "title": "Google Result",
+                    "link": "https://example.com/google",
+                    "snippet": "Google search snippet",
+                    "position": 1,
+                }
+            ]
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mocker.patch("requests.post", return_value=mock_response)
+
+        result = web_search_component_serper.web_search("test query", num_results=5)
+
+        assert "Google Result" in result
+        assert "https://example.com/google" in result
+
+
+class TestFallbackBehavior:
+    """Test fallback chain when providers fail."""
+
+    def test_full_fallback_to_ddgs(self, mocker, web_search_component_all):
+        # Make both Tavily and Serper fail
+        mocker.patch.object(
+            web_search_component_all,
+            "_search_tavily",
+            side_effect=Exception("Tavily down"),
+        )
+        mocker.patch.object(
+            web_search_component_all,
+            "_search_serper",
+            side_effect=Exception("Serper down"),
+        )
+
+        # Mock DuckDuckGo to succeed
+        mock_ddgs = mocker.patch("forge.components.web.search.DDGS")
+        mock_ddgs.return_value.text.return_value = [
+            {
+                "title": "DDG Fallback",
+                "href": "https://example.com/ddg",
+                "body": "Fallback content",
+            }
+        ]
+
+        result = web_search_component_all.web_search("test", num_results=5)
+
+        assert "DDG Fallback" in result
+
+    def test_returns_no_results_message(self, mocker, web_search_component):
+        mock_ddgs = mocker.patch("forge.components.web.search.DDGS")
+        mock_ddgs.return_value.text.return_value = []
+
+        result = web_search_component.web_search("nonexistent query", num_results=5)
+
+        assert "No search results found" in result
+
+
+class TestResultFormatting:
+    """Test search result formatting."""
+
+    def test_format_results_with_answer(self, web_search_component):
+        results = [
+            SearchResult(
+                title="Test Title",
+                url="https://example.com",
+                content="Test content",
+                score=0.85,
+            )
+        ]
+        formatted = web_search_component._format_results(
+            results, answer="AI generated answer"
+        )
+
+        assert "## AI Summary" in formatted
+        assert "AI generated answer" in formatted
+        assert "Test Title" in formatted
+        assert "0.85" in formatted
+
+    def test_format_results_with_raw_content(self, web_search_component):
+        results = [
+            SearchResult(
+                title="Article",
+                url="https://example.com",
+                content="Brief",
+                raw_content="Full article text here",
+            )
+        ]
+        formatted = web_search_component._format_results(
+            results, include_raw_content=True
+        )
+
+        assert "Full Content:" in formatted
+        assert "Full article text" in formatted
+
+    def test_format_results_truncates_long_content(self, web_search_component):
+        long_content = "x" * 3000
+        results = [
+            SearchResult(
+                title="Long Article",
+                url="https://example.com",
+                content="Brief",
+                raw_content=long_content,
+            )
+        ]
+        formatted = web_search_component._format_results(
+            results, include_raw_content=True
+        )
+
+        assert "[truncated]" in formatted
+        assert len(formatted) < len(long_content) + 500  # Reasonable overhead
+
+
+class TestLegacyCompatibility:
+    """Test backwards compatibility with old API."""
+
+    @pytest.mark.parametrize(
+        "input_val, expected",
+        [
+            ("test string", "test string"),
+            (["test1", "test2"], '["test1", "test2"]'),
+        ],
+    )
+    def test_safe_google_results(self, input_val, expected, web_search_component):
+        result = web_search_component.safe_google_results(input_val)
+        assert result == expected
+
+
+class TestConfiguration:
+    """Test configuration handling."""
+
+    def test_commands_available_based_on_config(self):
+        # No keys - only web_search
+        config = WebSearchConfiguration()
+        component = WebSearchComponent(config)
+        commands = list(component.get_commands())
+        assert len(commands) == 1
+        assert commands[0].names[0] == "web_search"
+
+        # With Tavily key - web_search + search_and_extract
+        config = WebSearchConfiguration(tavily_api_key=SecretStr("key"))
+        component = WebSearchComponent(config)
+        commands = list(component.get_commands())
+        assert len(commands) == 2
+        command_names = [cmd.names[0] for cmd in commands]
+        assert "web_search" in command_names
+        assert "search_and_extract" in command_names
+
+    def test_resources_provided(self, web_search_component):
+        resources = list(web_search_component.get_resources())
+        assert len(resources) == 1
+        assert "Internet" in resources[0]
--- a/classic/forge/forge/config/workspace_settings.py
+++ b/classic/forge/forge/config/workspace_settings.py
@@ -0,0 +1,137 @@
+"""Workspace and agent permission settings for AutoGPT."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import yaml
+from pydantic import BaseModel, Field
+
+
+class PermissionsConfig(BaseModel):
+    """Configuration for allow/deny permission patterns."""
+
+    allow: list[str] = Field(default_factory=list)
+    deny: list[str] = Field(default_factory=list)
+
+
+class WorkspaceSettings(BaseModel):
+    """Workspace-level permissions that apply to all agents."""
+
+    permissions: PermissionsConfig = Field(
+        default_factory=lambda: PermissionsConfig(
+            allow=[
+                "read_file({workspace}/**)",
+                "write_file({workspace}/**)",
+                "list_folder({workspace}/**)",
+            ],
+            deny=[
+                "read_file(**.env)",
+                "read_file(**.env.*)",
+                "read_file(**.key)",
+                "read_file(**.pem)",
+                # Shell commands use format "executable:args"
+                # Use ** to match paths containing /
+                "execute_shell(rm:-rf **)",
+                "execute_shell(rm:-r **)",
+                "execute_shell(sudo:**)",
+            ],
+        )
+    )
+
+    @classmethod
+    def load_or_create(cls, workspace: Path) -> "WorkspaceSettings":
+        """Load settings from workspace or create default settings file.
+
+        Args:
+            workspace: Path to the workspace directory.
+
+        Returns:
+            WorkspaceSettings instance.
+        """
+        autogpt_dir = workspace / ".autogpt"
+        settings_path = autogpt_dir / "autogpt.yaml"
+        if settings_path.exists():
+            with open(settings_path) as f:
+                data = yaml.safe_load(f)
+                return cls.model_validate(data or {})
+        settings = cls()
+        settings.save(workspace)
+        return settings
+
+    def save(self, workspace: Path) -> None:
+        """Save settings to the workspace .autogpt/autogpt.yaml file.
+
+        Args:
+            workspace: Path to the workspace directory.
+        """
+        autogpt_dir = workspace / ".autogpt"
+        autogpt_dir.mkdir(parents=True, exist_ok=True)
+        settings_path = autogpt_dir / "autogpt.yaml"
+        with open(settings_path, "w") as f:
+            f.write("# autogpt.yaml - Workspace Permissions (all agents)\n")
+            f.write("# Auto-generated and updated as you grant permissions\n\n")
+            yaml.safe_dump(
+                self.model_dump(), f, default_flow_style=False, sort_keys=False
+            )
+
+    def add_permission(self, pattern: str, workspace: Path) -> None:
+        """Add a permission pattern to the allow list.
+
+        Args:
+            pattern: The permission pattern to add.
+            workspace: Path to the workspace directory for saving.
+        """
+        if pattern not in self.permissions.allow:
+            self.permissions.allow.append(pattern)
+            self.save(workspace)
+
+
+class AgentPermissions(BaseModel):
+    """Agent-specific permissions that override workspace settings."""
+
+    permissions: PermissionsConfig = Field(default_factory=PermissionsConfig)
+
+    @classmethod
+    def load_or_create(cls, agent_dir: Path) -> "AgentPermissions":
+        """Load agent permissions or create empty permissions.
+
+        Args:
+            agent_dir: Path to the agent's data directory.
+
+        Returns:
+            AgentPermissions instance.
+        """
+        settings_path = agent_dir / "permissions.yaml"
+        if settings_path.exists():
+            with open(settings_path) as f:
+                data = yaml.safe_load(f)
+                return cls.model_validate(data or {})
+        return cls()
+
+    def save(self, agent_dir: Path) -> None:
+        """Save agent permissions to permissions.yaml.
+
+        Args:
+            agent_dir: Path to the agent's data directory.
+        """
+        settings_path = agent_dir / "permissions.yaml"
+        # Ensure directory exists
+        agent_dir.mkdir(parents=True, exist_ok=True)
+        with open(settings_path, "w") as f:
+            f.write("# Agent-specific permissions\n")
+            f.write("# These override workspace-level permissions\n\n")
+            yaml.safe_dump(
+                self.model_dump(), f, default_flow_style=False, sort_keys=False
+            )
+
+    def add_permission(self, pattern: str, agent_dir: Path) -> None:
+        """Add a permission pattern to the agent's allow list.
+
+        Args:
+            pattern: The permission pattern to add.
+            agent_dir: Path to the agent's data directory for saving.
+        """
+        if pattern not in self.permissions.allow:
+            self.permissions.allow.append(pattern)
+            self.save(agent_dir)
--- a/classic/forge/forge/llm/providers/_openai_base.py
+++ b/classic/forge/forge/llm/providers/_openai_base.py
@@ -290,7 +290,19 @@ class BaseOpenAIChatProvider(
        kwargs = cast(CompletionCreateParams, kwargs)

        if max_output_tokens:
-            kwargs["max_tokens"] = max_output_tokens
+            # Newer models (o1, o3, o4, gpt-5, gpt-4.1, gpt-4o)
+            # use max_completion_tokens instead of max_tokens
+            if (
+                model.startswith("o1")
+                or model.startswith("o3")
+                or model.startswith("o4")
+                or model.startswith("gpt-5")
+                or model.startswith("gpt-4.1")
+                or model.startswith("gpt-4o")
+            ):
+                kwargs["max_completion_tokens"] = max_output_tokens  # type: ignore
+            else:
+                kwargs["max_tokens"] = max_output_tokens

        if functions:
            kwargs["tools"] = [  # pyright: ignore - it fails to infer the dict type
--- a/classic/forge/forge/llm/providers/anthropic.py
+++ b/classic/forge/forge/llm/providers/anthropic.py
@@ -32,24 +32,39 @@ from .schema import (
 from .utils import validate_tool_calls

 if TYPE_CHECKING:
-    from anthropic.types.beta.tools import MessageCreateParams
-    from anthropic.types.beta.tools import ToolsBetaMessage as Message
-    from anthropic.types.beta.tools import ToolsBetaMessageParam as MessageParam
+    from anthropic.types import Message, MessageParam
+    from anthropic.types.message_create_params import MessageCreateParams

 _T = TypeVar("_T")
 _P = ParamSpec("_P")


 class AnthropicModelName(str, enum.Enum):
+    # Claude 3 models (legacy)
    CLAUDE3_OPUS_v1 = "claude-3-opus-20240229"
    CLAUDE3_SONNET_v1 = "claude-3-sonnet-20240229"
-    CLAUDE3_5_SONNET_v1 = "claude-3-5-sonnet-20240620"
    CLAUDE3_HAIKU_v1 = "claude-3-haiku-20240307"

+    # Claude 3.5 models
+    CLAUDE3_5_SONNET_v1 = "claude-3-5-sonnet-20240620"
+    CLAUDE3_5_SONNET_v2 = "claude-3-5-sonnet-20241022"
+    CLAUDE3_5_HAIKU_v1 = "claude-3-5-haiku-20241022"
+
+    # Claude 4 models
+    CLAUDE4_SONNET_v1 = "claude-sonnet-4-20250514"
+    CLAUDE4_OPUS_v1 = "claude-opus-4-20250514"
+    CLAUDE4_5_OPUS_v1 = "claude-opus-4-5-20251101"
+
+    # Rolling aliases
+    CLAUDE_SONNET = "claude-sonnet-4-20250514"
+    CLAUDE_OPUS = "claude-opus-4-5-20251101"
+    CLAUDE_HAIKU = "claude-3-5-haiku-20241022"
+

 ANTHROPIC_CHAT_MODELS = {
    info.name: info
    for info in [
+        # Claude 3 models (legacy)
        ChatModelInfo(
            name=AnthropicModelName.CLAUDE3_OPUS_v1,
            provider_name=ModelProviderName.ANTHROPIC,
@@ -66,6 +81,15 @@ ANTHROPIC_CHAT_MODELS = {
            max_tokens=200000,
            has_function_call_api=True,
        ),
+        ChatModelInfo(
+            name=AnthropicModelName.CLAUDE3_HAIKU_v1,
+            provider_name=ModelProviderName.ANTHROPIC,
+            prompt_token_cost=0.25 / 1e6,
+            completion_token_cost=1.25 / 1e6,
+            max_tokens=200000,
+            has_function_call_api=True,
+        ),
+        # Claude 3.5 models
        ChatModelInfo(
            name=AnthropicModelName.CLAUDE3_5_SONNET_v1,
            provider_name=ModelProviderName.ANTHROPIC,
@@ -75,15 +99,60 @@ ANTHROPIC_CHAT_MODELS = {
            has_function_call_api=True,
        ),
        ChatModelInfo(
-            name=AnthropicModelName.CLAUDE3_HAIKU_v1,
+            name=AnthropicModelName.CLAUDE3_5_SONNET_v2,
            provider_name=ModelProviderName.ANTHROPIC,
-            prompt_token_cost=0.25 / 1e6,
-            completion_token_cost=1.25 / 1e6,
+            prompt_token_cost=3 / 1e6,
+            completion_token_cost=15 / 1e6,
+            max_tokens=200000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=AnthropicModelName.CLAUDE3_5_HAIKU_v1,
+            provider_name=ModelProviderName.ANTHROPIC,
+            prompt_token_cost=0.80 / 1e6,
+            completion_token_cost=4 / 1e6,
+            max_tokens=200000,
+            has_function_call_api=True,
+        ),
+        # Claude 4 models
+        ChatModelInfo(
+            name=AnthropicModelName.CLAUDE4_SONNET_v1,
+            provider_name=ModelProviderName.ANTHROPIC,
+            prompt_token_cost=3 / 1e6,
+            completion_token_cost=15 / 1e6,
+            max_tokens=200000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=AnthropicModelName.CLAUDE4_OPUS_v1,
+            provider_name=ModelProviderName.ANTHROPIC,
+            prompt_token_cost=15 / 1e6,
+            completion_token_cost=75 / 1e6,
+            max_tokens=200000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=AnthropicModelName.CLAUDE4_5_OPUS_v1,
+            provider_name=ModelProviderName.ANTHROPIC,
+            prompt_token_cost=15 / 1e6,
+            completion_token_cost=75 / 1e6,
            max_tokens=200000,
            has_function_call_api=True,
        ),
    ]
 }
+# Copy entries for aliased models
+chat_model_mapping = {
+    AnthropicModelName.CLAUDE4_SONNET_v1: [AnthropicModelName.CLAUDE_SONNET],
+    AnthropicModelName.CLAUDE4_5_OPUS_v1: [AnthropicModelName.CLAUDE_OPUS],
+    AnthropicModelName.CLAUDE3_5_HAIKU_v1: [AnthropicModelName.CLAUDE_HAIKU],
+}
+for base, copies in chat_model_mapping.items():
+    for copy in copies:
+        copy_info = ANTHROPIC_CHAT_MODELS[base].model_copy()
+        ANTHROPIC_CHAT_MODELS[copy] = copy_info.__class__(
+            **{**copy_info.model_dump(), "name": copy}
+        )


 class AnthropicCredentials(ModelProviderCredentials):
@@ -251,46 +320,30 @@ class AnthropicProvider(BaseChatModelProvider[AnthropicModelName, AnthropicSetti
                    anthropic_messages.append(
                        _assistant_msg.model_dump(include={"role", "content"})  # type: ignore # noqa
                    )
+
+                    # Build tool_result blocks for each tool call
+                    # (required if last assistant message had tool_use blocks)
+                    tool_results = []
+                    for tc in assistant_msg.tool_calls or []:
+                        error_msg = self._get_tool_error_message(tc, tool_call_errors)
+                        tool_results.append(
+                            {
+                                "type": "tool_result",
+                                "tool_use_id": tc.id,
+                                "is_error": True,
+                                "content": [{"type": "text", "text": error_msg}],
+                            }
+                        )
+
                    anthropic_messages.append(
                        {
                            "role": "user",
                            "content": [
-                                *(
-                                    # tool_result is required if last assistant message
-                                    # had tool_use block(s)
-                                    {
-                                        "type": "tool_result",
-                                        "tool_use_id": tc.id,
-                                        "is_error": True,
-                                        "content": [
-                                            {
-                                                "type": "text",
-                                                "text": "Not executed because parsing "
-                                                "of your last message failed"
-                                                if not tool_call_errors
-                                                else str(e)
-                                                if (
-                                                    e := next(
-                                                        (
-                                                            tce
-                                                            for tce in tool_call_errors
-                                                            if tce.name
-                                                            == tc.function.name
-                                                        ),
-                                                        None,
-                                                    )
-                                                )
-                                                else "Not executed because validation "
-                                                "of tool input failed",
-                                            }
-                                        ],
-                                    }
-                                    for tc in assistant_msg.tool_calls or []
-                                ),
+                                *tool_results,
                                {
                                    "type": "text",
                                    "text": (
-                                        "ERROR PARSING YOUR RESPONSE:\n\n"
+                                        f"ERROR PARSING YOUR RESPONSE:\n\n"
                                        f"{e.__class__.__name__}: {e}"
                                    ),
                                },
@@ -450,7 +503,7 @@ class AnthropicProvider(BaseChatModelProvider[AnthropicModelName, AnthropicSetti

        @self._retry_api_request
        async def _create_chat_completion_with_retry() -> Message:
-            return await self._client.beta.tools.messages.create(
+            return await self._client.messages.create(
                model=model, **completion_kwargs  # type: ignore
            )

@@ -463,6 +516,32 @@ class AnthropicProvider(BaseChatModelProvider[AnthropicModelName, AnthropicSetti
        )
        return response, cost, response.usage.input_tokens, response.usage.output_tokens

+    def _get_tool_error_message(
+        self,
+        tool_call: AssistantToolCall,
+        tool_call_errors: list,
+    ) -> str:
+        """Get the error message for a failed tool call.
+
+        Args:
+            tool_call: The tool call that failed.
+            tool_call_errors: List of validation errors for tool calls.
+
+        Returns:
+            An appropriate error message for the tool result.
+        """
+        if not tool_call_errors:
+            return "Not executed because parsing of your last message failed"
+
+        # Find matching error for this specific tool call
+        matching_error = next(
+            (err for err in tool_call_errors if err.name == tool_call.function.name),
+            None,
+        )
+        if matching_error:
+            return str(matching_error)
+        return "Not executed: validation failed"
+
    def _parse_assistant_tool_calls(
        self, assistant_message: Message
    ) -> list[AssistantToolCall]:
--- a/classic/forge/forge/llm/providers/openai.py
+++ b/classic/forge/forge/llm/providers/openai.py
@@ -48,6 +48,7 @@ class OpenAIModelName(str, enum.Enum):
    EMBEDDING_v3_S = "text-embedding-3-small"
    EMBEDDING_v3_L = "text-embedding-3-large"

+    # Legacy GPT-3.5 models
    GPT3_v1 = "gpt-3.5-turbo-0301"
    GPT3_v2 = "gpt-3.5-turbo-0613"
    GPT3_v2_16k = "gpt-3.5-turbo-16k-0613"
@@ -58,6 +59,7 @@ class OpenAIModelName(str, enum.Enum):
    GPT3 = GPT3_ROLLING
    GPT3_16k = GPT3_ROLLING_16k

+    # Legacy GPT-4 models
    GPT4_v1 = "gpt-4-0314"
    GPT4_v1_32k = "gpt-4-32k-0314"
    GPT4_v2 = "gpt-4-0613"
@@ -71,11 +73,65 @@ class OpenAIModelName(str, enum.Enum):
    GPT4_TURBO = "gpt-4-turbo"
    GPT4_TURBO_PREVIEW = "gpt-4-turbo-preview"
    GPT4_VISION = "gpt-4-vision-preview"
-    GPT4_O_v1 = "gpt-4o-2024-05-13"
-    GPT4_O_ROLLING = "gpt-4o"
    GPT4 = GPT4_ROLLING
    GPT4_32k = GPT4_ROLLING_32k
+
+    # GPT-4o models (128K context)
+    GPT4_O_v1 = "gpt-4o-2024-05-13"
+    GPT4_O_v2 = "gpt-4o-2024-08-06"
+    GPT4_O_ROLLING = "gpt-4o"
    GPT4_O = GPT4_O_ROLLING
+    GPT4_O_MINI_v1 = "gpt-4o-mini-2024-07-18"
+    GPT4_O_MINI_ROLLING = "gpt-4o-mini"
+    GPT4_O_MINI = GPT4_O_MINI_ROLLING
+
+    # GPT-4.1 models (1M context)
+    GPT4_1_v1 = "gpt-4.1-2025-04-14"
+    GPT4_1_ROLLING = "gpt-4.1"
+    GPT4_1 = GPT4_1_ROLLING
+    GPT4_1_MINI_v1 = "gpt-4.1-mini-2025-04-14"
+    GPT4_1_MINI_ROLLING = "gpt-4.1-mini"
+    GPT4_1_MINI = GPT4_1_MINI_ROLLING
+    GPT4_1_NANO_v1 = "gpt-4.1-nano-2025-04-14"
+    GPT4_1_NANO_ROLLING = "gpt-4.1-nano"
+    GPT4_1_NANO = GPT4_1_NANO_ROLLING
+
+    # O-series reasoning models (200K context)
+    O1_v1 = "o1-2024-12-17"
+    O1_ROLLING = "o1"
+    O1 = O1_ROLLING
+    O1_MINI_v1 = "o1-mini-2024-09-12"
+    O1_MINI_ROLLING = "o1-mini"
+    O1_MINI = O1_MINI_ROLLING
+    O1_PRO_ROLLING = "o1-pro"
+    O1_PRO = O1_PRO_ROLLING
+    O3_v1 = "o3-2025-04-16"
+    O3_ROLLING = "o3"
+    O3 = O3_ROLLING
+    O3_MINI_v1 = "o3-mini-2025-01-31"
+    O3_MINI_ROLLING = "o3-mini"
+    O3_MINI = O3_MINI_ROLLING
+    O3_PRO_ROLLING = "o3-pro"
+    O3_PRO = O3_PRO_ROLLING
+    O4_MINI_v1 = "o4-mini-2025-04-16"
+    O4_MINI_ROLLING = "o4-mini"
+    O4_MINI = O4_MINI_ROLLING
+
+    # GPT-5 models (~200K context)
+    GPT5_ROLLING = "gpt-5"
+    GPT5 = GPT5_ROLLING
+    GPT5_1_ROLLING = "gpt-5.1"
+    GPT5_1 = GPT5_1_ROLLING
+    GPT5_2_ROLLING = "gpt-5.2"
+    GPT5_2 = GPT5_2_ROLLING
+    GPT5_MINI_ROLLING = "gpt-5-mini"
+    GPT5_MINI = GPT5_MINI_ROLLING
+    GPT5_NANO_ROLLING = "gpt-5-nano"
+    GPT5_NANO = GPT5_NANO_ROLLING
+    GPT5_PRO_ROLLING = "gpt-5-pro"
+    GPT5_PRO = GPT5_PRO_ROLLING
+    GPT5_2_PRO_ROLLING = "gpt-5.2-pro"
+    GPT5_2_PRO = GPT5_2_PRO_ROLLING


 OPEN_AI_EMBEDDING_MODELS = {
@@ -109,6 +165,7 @@ OPEN_AI_EMBEDDING_MODELS = {
 OPEN_AI_CHAT_MODELS = {
    info.name: info
    for info in [
+        # Legacy GPT-3.5 models
        ChatModelInfo(
            name=OpenAIModelName.GPT3_v1,
            provider_name=ModelProviderName.OPENAI,
@@ -141,6 +198,7 @@ OPEN_AI_CHAT_MODELS = {
            max_tokens=16384,
            has_function_call_api=True,
        ),
+        # Legacy GPT-4 models
        ChatModelInfo(
            name=OpenAIModelName.GPT4_v1,
            provider_name=ModelProviderName.OPENAI,
@@ -165,21 +223,179 @@ OPEN_AI_CHAT_MODELS = {
            max_tokens=128000,
            has_function_call_api=True,
        ),
+        # GPT-4o models (128K context)
        ChatModelInfo(
            name=OpenAIModelName.GPT4_O,
            provider_name=ModelProviderName.OPENAI,
-            prompt_token_cost=5 / 1_000_000,
-            completion_token_cost=15 / 1_000_000,
+            prompt_token_cost=2.50 / 1_000_000,
+            completion_token_cost=10.00 / 1_000_000,
            max_tokens=128_000,
            has_function_call_api=True,
        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT4_O_v1,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=5.00 / 1_000_000,
+            completion_token_cost=15.00 / 1_000_000,
+            max_tokens=128_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT4_O_MINI,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=0.15 / 1_000_000,
+            completion_token_cost=0.60 / 1_000_000,
+            max_tokens=128_000,
+            has_function_call_api=True,
+        ),
+        # GPT-4.1 models (1M context)
+        ChatModelInfo(
+            name=OpenAIModelName.GPT4_1,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=2.00 / 1_000_000,
+            completion_token_cost=8.00 / 1_000_000,
+            max_tokens=1_047_576,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT4_1_MINI,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=0.40 / 1_000_000,
+            completion_token_cost=1.60 / 1_000_000,
+            max_tokens=1_047_576,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT4_1_NANO,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=0.10 / 1_000_000,
+            completion_token_cost=0.40 / 1_000_000,
+            max_tokens=1_047_576,
+            has_function_call_api=True,
+        ),
+        # O-series reasoning models (200K context)
+        ChatModelInfo(
+            name=OpenAIModelName.O1,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=15.00 / 1_000_000,
+            completion_token_cost=60.00 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.O1_MINI,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=1.10 / 1_000_000,
+            completion_token_cost=4.40 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.O1_PRO,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=150.00 / 1_000_000,
+            completion_token_cost=600.00 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.O3,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=2.00 / 1_000_000,
+            completion_token_cost=8.00 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.O3_MINI,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=1.10 / 1_000_000,
+            completion_token_cost=4.40 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.O3_PRO,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=20.00 / 1_000_000,
+            completion_token_cost=80.00 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.O4_MINI,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=1.10 / 1_000_000,
+            completion_token_cost=4.40 / 1_000_000,
+            max_tokens=200_000,
+            has_function_call_api=True,
+        ),
+        # GPT-5 models (~400K context)
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=1.25 / 1_000_000,
+            completion_token_cost=10.00 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5_1,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=1.25 / 1_000_000,
+            completion_token_cost=10.00 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5_2,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=1.75 / 1_000_000,
+            completion_token_cost=14.00 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5_MINI,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=0.25 / 1_000_000,
+            completion_token_cost=2.00 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5_NANO,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=0.05 / 1_000_000,
+            completion_token_cost=0.40 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5_PRO,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=15.00 / 1_000_000,
+            completion_token_cost=120.00 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
+        ChatModelInfo(
+            name=OpenAIModelName.GPT5_2_PRO,
+            provider_name=ModelProviderName.OPENAI,
+            prompt_token_cost=21.00 / 1_000_000,
+            completion_token_cost=168.00 / 1_000_000,
+            max_tokens=400_000,
+            has_function_call_api=True,
+        ),
    ]
 }
 # Copy entries for models with equivalent specs
 chat_model_mapping = {
+    # Legacy GPT-3.5 mappings
    OpenAIModelName.GPT3_v1: [OpenAIModelName.GPT3_v2],
    OpenAIModelName.GPT3_v2_16k: [OpenAIModelName.GPT3_16k],
    OpenAIModelName.GPT3_v4: [OpenAIModelName.GPT3_ROLLING],
+    # Legacy GPT-4 mappings
    OpenAIModelName.GPT4_v1: [OpenAIModelName.GPT4_v2, OpenAIModelName.GPT4_ROLLING],
    OpenAIModelName.GPT4_v1_32k: [
        OpenAIModelName.GPT4_v2_32k,
@@ -193,7 +409,59 @@ chat_model_mapping = {
        OpenAIModelName.GPT4_TURBO_PREVIEW,
        OpenAIModelName.GPT4_v5,
    ],
-    OpenAIModelName.GPT4_O: [OpenAIModelName.GPT4_O_v1],
+    # GPT-4o mappings
+    OpenAIModelName.GPT4_O: [
+        OpenAIModelName.GPT4_O_ROLLING,
+        OpenAIModelName.GPT4_O_v2,
+    ],
+    OpenAIModelName.GPT4_O_MINI: [
+        OpenAIModelName.GPT4_O_MINI_ROLLING,
+        OpenAIModelName.GPT4_O_MINI_v1,
+    ],
+    # GPT-4.1 mappings
+    OpenAIModelName.GPT4_1: [
+        OpenAIModelName.GPT4_1_ROLLING,
+        OpenAIModelName.GPT4_1_v1,
+    ],
+    OpenAIModelName.GPT4_1_MINI: [
+        OpenAIModelName.GPT4_1_MINI_ROLLING,
+        OpenAIModelName.GPT4_1_MINI_v1,
+    ],
+    OpenAIModelName.GPT4_1_NANO: [
+        OpenAIModelName.GPT4_1_NANO_ROLLING,
+        OpenAIModelName.GPT4_1_NANO_v1,
+    ],
+    # O-series mappings
+    OpenAIModelName.O1: [
+        OpenAIModelName.O1_ROLLING,
+        OpenAIModelName.O1_v1,
+    ],
+    OpenAIModelName.O1_MINI: [
+        OpenAIModelName.O1_MINI_ROLLING,
+        OpenAIModelName.O1_MINI_v1,
+    ],
+    OpenAIModelName.O1_PRO: [OpenAIModelName.O1_PRO_ROLLING],
+    OpenAIModelName.O3: [
+        OpenAIModelName.O3_ROLLING,
+        OpenAIModelName.O3_v1,
+    ],
+    OpenAIModelName.O3_MINI: [
+        OpenAIModelName.O3_MINI_ROLLING,
+        OpenAIModelName.O3_MINI_v1,
+    ],
+    OpenAIModelName.O3_PRO: [OpenAIModelName.O3_PRO_ROLLING],
+    OpenAIModelName.O4_MINI: [
+        OpenAIModelName.O4_MINI_ROLLING,
+        OpenAIModelName.O4_MINI_v1,
+    ],
+    # GPT-5 mappings
+    OpenAIModelName.GPT5: [OpenAIModelName.GPT5_ROLLING],
+    OpenAIModelName.GPT5_1: [OpenAIModelName.GPT5_1_ROLLING],
+    OpenAIModelName.GPT5_2: [OpenAIModelName.GPT5_2_ROLLING],
+    OpenAIModelName.GPT5_MINI: [OpenAIModelName.GPT5_MINI_ROLLING],
+    OpenAIModelName.GPT5_NANO: [OpenAIModelName.GPT5_NANO_ROLLING],
+    OpenAIModelName.GPT5_PRO: [OpenAIModelName.GPT5_PRO_ROLLING],
+    OpenAIModelName.GPT5_2_PRO: [OpenAIModelName.GPT5_2_PRO_ROLLING],
 }
 for base, copies in chat_model_mapping.items():
    for copy in copies:
@@ -341,7 +609,26 @@ class OpenAIProvider(
            )

    def get_tokenizer(self, model_name: OpenAIModelName) -> ModelTokenizer[int]:
-        return tiktoken.encoding_for_model(model_name)
+        try:
+            return tiktoken.encoding_for_model(model_name)
+        except KeyError:
+            # Fallback for new models not yet in tiktoken's mapping.
+            # GPT-4o, GPT-4.1, GPT-5, O-series use cl100k_base or o200k_base
+            if (
+                model_name.startswith("gpt-4o")
+                or model_name.startswith("gpt-4.1")
+                or model_name.startswith("gpt-5")
+                or model_name.startswith("o1")
+                or model_name.startswith("o3")
+                or model_name.startswith("o4")
+            ):
+                # o200k_base is used by GPT-4o and newer models
+                return tiktoken.get_encoding("o200k_base")
+            elif model_name.startswith("gpt-4") or model_name.startswith("gpt-3.5"):
+                return tiktoken.get_encoding("cl100k_base")
+            else:
+                # Default fallback
+                return tiktoken.get_encoding("cl100k_base")

    def count_message_tokens(
        self,
@@ -356,8 +643,14 @@ class OpenAIProvider(
                4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
            )
            tokens_per_name = -1  # if there's a name, the role is omitted
-        # TODO: check if this is still valid for gpt-4o
-        elif model_name.startswith("gpt-4"):
+        elif (
+            model_name.startswith("gpt-4")
+            or model_name.startswith("gpt-5")
+            or model_name.startswith("o1")
+            or model_name.startswith("o3")
+            or model_name.startswith("o4")
+        ):
+            # GPT-4, GPT-4o, GPT-4.1, GPT-5, and O-series models all use similar format
            tokens_per_message = 3
            tokens_per_name = 1
        else:
--- a/classic/forge/forge/logging/config.py
+++ b/classic/forge/forge/logging/config.py
@@ -24,10 +24,8 @@ LOG_FILE = "activity.log"
 DEBUG_LOG_FILE = "debug.log"
 ERROR_LOG_FILE = "error.log"

-SIMPLE_LOG_FORMAT = "%(asctime)s %(levelname)s  %(title)s%(message)s"
-DEBUG_LOG_FORMAT = (
-    "%(asctime)s %(levelname)s %(filename)s:%(lineno)d" "  %(title)s%(message)s"
-)
+SIMPLE_LOG_FORMAT = "%(levelname)s  %(title)s%(message)s"
+DEBUG_LOG_FORMAT = "%(levelname)s %(filename)s:%(lineno)d  %(title)s%(message)s"

 SPEECH_OUTPUT_LOGGER = "VOICE"
 USER_FRIENDLY_OUTPUT_LOGGER = "USER_FRIENDLY_OUTPUT"
--- a/classic/forge/forge/models/json_schema.py
+++ b/classic/forge/forge/models/json_schema.py
@@ -65,9 +65,11 @@ class JSONSchema(BaseModel):
            type=schema["type"],
            enum=schema.get("enum"),
            items=JSONSchema.from_dict(schema["items"]) if "items" in schema else None,
-            properties=JSONSchema.parse_properties(schema)
-            if schema["type"] == "object"
-            else None,
+            properties=(
+                JSONSchema.parse_properties(schema)
+                if schema["type"] == "object"
+                else None
+            ),
            minimum=schema.get("minimum"),
            maximum=schema.get("maximum"),
            minItems=schema.get("minItems"),
@@ -86,7 +88,9 @@ class JSONSchema(BaseModel):
                v.required = k in schema_node["required"]
        return properties

-    def validate_object(self, object: object) -> tuple[bool, list[ValidationError]]:
+    def validate_object(
+        self, object: object  # noqa: A002 - shadows builtin intentionally
+    ) -> tuple[bool, list[ValidationError]]:
        """
        Validates an object or a value against the JSONSchema.

@@ -100,7 +104,10 @@ class JSONSchema(BaseModel):
        """
        validator = Draft7Validator(self.to_dict())

-        if errors := sorted(validator.iter_errors(object), key=lambda e: e.path):
+        if errors := sorted(
+            validator.iter_errors(object),  # type: ignore[arg-type]
+            key=lambda e: e.path,
+        ):
            return False, errors

        return True, []
@@ -148,13 +155,11 @@ class JSONSchema(BaseModel):


@overload
-def _resolve_type_refs_in_schema(schema: dict, definitions: dict) -> dict:
-    ...
+def _resolve_type_refs_in_schema(schema: dict, definitions: dict) -> dict: ...


@overload
-def _resolve_type_refs_in_schema(schema: list, definitions: dict) -> list:
-    ...
+def _resolve_type_refs_in_schema(schema: list, definitions: dict) -> list: ...


 def _resolve_type_refs_in_schema(schema: dict | list, definitions: dict) -> dict | list:
--- a/classic/forge/forge/permissions.py
+++ b/classic/forge/forge/permissions.py
@@ -0,0 +1,309 @@
+"""Permission management for agent command execution."""
+
+from __future__ import annotations
+
+import re
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable
+
+from forge.config.workspace_settings import AgentPermissions, WorkspaceSettings
+
+
+class ApprovalScope(str, Enum):
+    """Scope of permission approval."""
+
+    ONCE = "once"  # Allow this one time only (not saved)
+    AGENT = "agent"  # Always allow for this agent
+    WORKSPACE = "workspace"  # Always allow for all agents
+    DENY = "deny"  # Deny this command
+
+
+class UserFeedbackProvided(Exception):
+    """Raised when user provides feedback instead of approving/denying a command.
+
+    This exception should be caught by the main loop to pass feedback to the agent
+    via do_not_execute() instead of executing the command.
+    """
+
+    def __init__(self, feedback: str):
+        self.feedback = feedback
+        super().__init__(f"User provided feedback: {feedback}")
+
+
+class PermissionCheckResult:
+    """Result of a permission check.
+
+    Attributes:
+        allowed: Whether the command is allowed to execute.
+        scope: The scope of the permission decision.
+        feedback: Optional user feedback provided along with the decision.
+    """
+
+    __slots__ = ("allowed", "scope", "feedback")
+
+    def __init__(
+        self,
+        allowed: bool,
+        scope: ApprovalScope,
+        feedback: str | None = None,
+    ):
+        self.allowed = allowed
+        self.scope = scope
+        self.feedback = feedback
+
+
+class CommandPermissionManager:
+    """Manages layered permissions for agent command execution.
+
+    Check order (first match wins):
+    1. Agent deny list → block
+    2. Workspace deny list → block
+    3. Agent allow list → allow
+    4. Workspace allow list → allow
+    5. No match → prompt user
+    """
+
+    def __init__(
+        self,
+        workspace: Path,
+        agent_dir: Path,
+        workspace_settings: WorkspaceSettings,
+        agent_permissions: AgentPermissions,
+        prompt_fn: (
+            Callable[[str, str, dict], tuple[ApprovalScope, str | None]] | None
+        ) = None,
+        on_auto_approve: Callable[[str, str, dict, ApprovalScope], None] | None = None,
+    ):
+        """Initialize the permission manager.
+
+        Args:
+            workspace: Path to the workspace directory.
+            agent_dir: Path to the agent's data directory.
+            workspace_settings: Workspace-level permission settings.
+            agent_permissions: Agent-specific permission settings.
+            prompt_fn: Callback to prompt user for permission.
+                Takes (command_name, args_str, arguments) and returns
+                (ApprovalScope, feedback) tuple.
+            on_auto_approve: Callback fired when a command is auto-approved
+                from the allow lists (not prompted). Takes (command_name,
+                args_str, arguments, scope).
+        """
+        self.workspace = workspace.resolve()
+        self.agent_dir = agent_dir
+        self.workspace_settings = workspace_settings
+        self.agent_permissions = agent_permissions
+        self.prompt_fn = prompt_fn
+        self.on_auto_approve = on_auto_approve
+        self._session_denied: set[str] = set()
+
+    def check_command(
+        self, command_name: str, arguments: dict[str, Any]
+    ) -> PermissionCheckResult:
+        """Check if command execution is allowed. Prompts if needed.
+
+        Args:
+            command_name: Name of the command to check.
+            arguments: Command arguments.
+
+        Returns:
+            PermissionCheckResult with allowed status, scope, and optional feedback.
+        """
+        args_str = self._format_args(command_name, arguments)
+        perm_string = f"{command_name}({args_str})"
+
+        # 1. Check agent deny list
+        if self._matches_patterns(
+            command_name, args_str, self.agent_permissions.permissions.deny
+        ):
+            return PermissionCheckResult(False, ApprovalScope.DENY)
+
+        # 2. Check workspace deny list
+        if self._matches_patterns(
+            command_name, args_str, self.workspace_settings.permissions.deny
+        ):
+            return PermissionCheckResult(False, ApprovalScope.DENY)
+
+        # 3. Check agent allow list
+        if self._matches_patterns(
+            command_name, args_str, self.agent_permissions.permissions.allow
+        ):
+            if self.on_auto_approve:
+                self.on_auto_approve(
+                    command_name, args_str, arguments, ApprovalScope.AGENT
+                )
+            return PermissionCheckResult(True, ApprovalScope.AGENT)
+
+        # 4. Check workspace allow list
+        if self._matches_patterns(
+            command_name, args_str, self.workspace_settings.permissions.allow
+        ):
+            if self.on_auto_approve:
+                self.on_auto_approve(
+                    command_name, args_str, arguments, ApprovalScope.WORKSPACE
+                )
+            return PermissionCheckResult(True, ApprovalScope.WORKSPACE)
+
+        # 5. Check session denials
+        if perm_string in self._session_denied:
+            return PermissionCheckResult(False, ApprovalScope.DENY)
+
+        # 6. Prompt user
+        if self.prompt_fn is None:
+            return PermissionCheckResult(False, ApprovalScope.DENY)
+
+        scope, feedback = self.prompt_fn(command_name, args_str, arguments)
+        pattern = self._generalize_pattern(command_name, args_str)
+
+        if scope == ApprovalScope.ONCE:
+            # Allow this one time only, don't save anywhere
+            return PermissionCheckResult(True, ApprovalScope.ONCE, feedback)
+        elif scope == ApprovalScope.WORKSPACE:
+            self.workspace_settings.add_permission(pattern, self.workspace)
+            return PermissionCheckResult(True, ApprovalScope.WORKSPACE, feedback)
+        elif scope == ApprovalScope.AGENT:
+            self.agent_permissions.add_permission(pattern, self.agent_dir)
+            return PermissionCheckResult(True, ApprovalScope.AGENT, feedback)
+        else:
+            # Denied - feedback goes to agent instead of execution
+            self._session_denied.add(perm_string)
+            return PermissionCheckResult(False, ApprovalScope.DENY, feedback)
+
+    def _format_args(self, command_name: str, arguments: dict[str, Any]) -> str:
+        """Format command arguments for pattern matching.
+
+        Args:
+            command_name: Name of the command.
+            arguments: Command arguments dict.
+
+        Returns:
+            Formatted arguments string.
+        """
+        # For file operations, use the resolved file path for symlink handling
+        if command_name in ("read_file", "write_to_file", "list_folder"):
+            path = arguments.get("filename") or arguments.get("path") or ""
+            if path:
+                return str(Path(path).resolve())
+            return ""
+
+        # For shell commands, format as "executable:args" (first word is executable)
+        if command_name in ("execute_shell", "execute_python"):
+            cmd = arguments.get("command_line") or arguments.get("code") or ""
+            if not cmd:
+                return ""
+            parts = str(cmd).split(maxsplit=1)
+            if len(parts) == 2:
+                return f"{parts[0]}:{parts[1]}"
+            return f"{parts[0]}:"
+
+        # For web operations
+        if command_name == "web_search":
+            query = arguments.get("query", "")
+            return str(query)
+        if command_name == "read_webpage":
+            url = arguments.get("url", "")
+            return str(url)
+
+        # Generic: join all argument values
+        if arguments:
+            return ":".join(str(v) for v in arguments.values())
+        return "*"
+
+    def _matches_patterns(self, cmd: str, args: str, patterns: list[str]) -> bool:
+        """Check if command matches any pattern in the list.
+
+        Args:
+            cmd: Command name.
+            args: Formatted arguments string.
+            patterns: List of permission patterns.
+
+        Returns:
+            True if any pattern matches.
+        """
+        for pattern in patterns:
+            if self._pattern_matches(pattern, cmd, args):
+                return True
+        return False
+
+    def _pattern_matches(self, pattern: str, cmd: str, args: str) -> bool:
+        """Check if a single pattern matches the command.
+
+        Args:
+            pattern: Permission pattern like "command_name(glob_pattern)".
+            cmd: Command name.
+            args: Formatted arguments string.
+
+        Returns:
+            True if pattern matches.
+        """
+        # Parse pattern: command_name(args_pattern)
+        match = re.match(r"^(\w+)\((.+)\)$", pattern)
+        if not match:
+            return False
+
+        pattern_cmd, args_pattern = match.groups()
+
+        # Command name must match
+        if pattern_cmd != cmd:
+            return False
+
+        # Expand {workspace} placeholder
+        args_pattern = args_pattern.replace("{workspace}", str(self.workspace))
+
+        # Convert glob pattern to regex
+        # ** matches any path (including /)
+        # * matches any characters except /
+        regex_pattern = args_pattern
+        regex_pattern = re.escape(regex_pattern)
+        # Restore glob patterns
+        regex_pattern = regex_pattern.replace(r"\*\*", ".*")
+        regex_pattern = regex_pattern.replace(r"\*", "[^/]*")
+        regex_pattern = f"^{regex_pattern}$"
+
+        try:
+            return bool(re.match(regex_pattern, args))
+        except re.error:
+            return False
+
+    def _generalize_pattern(self, command_name: str, args_str: str) -> str:
+        """Create a generalized pattern from specific command args.
+
+        Args:
+            command_name: Name of the command.
+            args_str: Formatted arguments string.
+
+        Returns:
+            Generalized permission pattern.
+        """
+        # For file paths, generalize to parent directory
+        if command_name in ("read_file", "write_to_file", "list_folder"):
+            path = Path(args_str)
+            # If within workspace, use {workspace} placeholder
+            try:
+                rel = path.resolve().relative_to(self.workspace)
+                return f"{command_name}({{workspace}}/{rel.parent}/*)"
+            except ValueError:
+                # Outside workspace, use exact path
+                return f"{command_name}({path})"
+
+        # For shell commands, use executable:** pattern
+        if command_name in ("execute_shell", "execute_python"):
+            # args_str is in format "executable:args", extract executable
+            if ":" in args_str:
+                executable = args_str.split(":", 1)[0]
+                return f"{command_name}({executable}:**)"
+            return f"{command_name}(*)"
+
+        # For web operations
+        if command_name == "web_search":
+            return "web_search(*)"
+        if command_name == "read_webpage":
+            # Extract domain
+            match = re.match(r"https?://([^/]+)", args_str)
+            if match:
+                domain = match.group(1)
+                return f"read_webpage(*{domain}*)"
+            return "read_webpage(*)"
+
+        # Generic: use wildcard
+        return f"{command_name}(*)"
--- a/classic/forge/forge/utils/exceptions.py
+++ b/classic/forge/forge/utils/exceptions.py
@@ -93,3 +93,28 @@ class OperationNotAllowedError(CommandExecutionError):

 class TooMuchOutputError(CommandExecutionError):
    """The operation generated more output than what the Agent can process"""
+
+
+class CodeTimeoutError(CommandExecutionError):
+    """The code execution timed out"""
+
+    hint = (
+        "Consider breaking the operation into smaller steps or increasing the timeout."
+    )
+
+
+class HTTPError(CommandExecutionError):
+    """An error occurred during an HTTP request"""
+
+    def __init__(
+        self, message: str, status_code: Optional[int] = None, url: Optional[str] = None
+    ):
+        self.status_code = status_code
+        self.url = url
+        super().__init__(message)
+
+
+class DataProcessingError(CommandExecutionError):
+    """An error occurred while processing data (JSON, CSV, etc.)"""
+
+    hint = "Check that the input data is in the correct format."
--- a/classic/forge/poetry.lock
+++ b/classic/forge/poetry.lock
--- a/classic/forge/pyproject.toml
+++ b/classic/forge/pyproject.toml
@@ -1,28 +1,43 @@
 [tool.poetry]
-name = "AutoGPT-Forge"
+name = "autogpt-forge"
 version = "0.2.0"
-description = ""
+description = "Core library for building autonomous AI agents"
 authors = ["AutoGPT <support@agpt.co>"]
 license = "MIT"
 readme = "README.md"
 packages = [{ include = "forge" }]
+keywords = ["autogpt", "ai", "agents", "autonomous", "llm"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+]
+
+[tool.poetry.urls]
+"Homepage" = "https://github.com/Significant-Gravitas/AutoGPT"
+"Bug Tracker" = "https://github.com/Significant-Gravitas/AutoGPT/issues"

 [tool.poetry.dependencies]
-python = "^3.10"
+python = "^3.12"
 agbenchmark = { path = "../benchmark", optional = true }
 # agbenchmark = {git = "https://github.com/Significant-Gravitas/AutoGPT.git", subdirectory = "benchmark", optional = true}
 aiohttp = "^3.8.5"
-anthropic = "^0.25.1"
+anthropic = "^0.45.0"
 beautifulsoup4 = "^4.12.2"
 boto3 = "^1.33.6"
 charset-normalizer = "^3.1.0"
-chromadb = "^0.4.10"
+chromadb = "^1.4.0"
 click = "*"
 colorama = "^0.4.6"
 demjson3 = "^3.0.0"
 docker = "*"
-duckduckgo-search = "^6.1.7"
-en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl" }
+ddgs = "^9.9"
+en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
 fastapi = "^0.109.1"
 gitpython = "^3.1.32"
 google-api-python-client = "^2.116"
@@ -33,8 +48,8 @@ gTTS = "^2.3.1"
 jinja2 = "^3.1.2"
 jsonschema = "*"
 litellm = "^1.17.9"
-numpy = ">=1.26.0,<2.0.0"
-openai = "^1.7.2"
+numpy = ">=2.0.0"
+openai = "^1.50.0"
 Pillow = "*"
 playsound = "~1.2.2"
 pydantic = "^2.7.2"
@@ -48,12 +63,12 @@ requests = "*"
 selenium = "^4.13.0"
 sqlalchemy = "^2.0.19"
 sentry-sdk = "^1.40.4"
-spacy = "^3.0.0"
+spacy = "^3.8.0"
 tenacity = "^8.2.2"
 tiktoken = ">=0.7.0,<1.0.0"
 toml = "^0.10.2"
 uvicorn = { extras = ["standard"], version = ">=0.23.2,<1" }
-watchdog = "4.0.0"
+watchdog = "^6.0.0"
 webdriver-manager = "^4.0.2"

 [tool.poetry.extras]
@@ -90,7 +105,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.black]
 line-length = 88
-target-version = ['py310']
+target-version = ['py312']
 include = '\.pyi?$'


@@ -99,7 +114,7 @@ profile = "black"


 [tool.pyright]
-pythonVersion = "3.10"
+pythonVersion = "3.12"


 [tool.pytest.ini_options]
--- a/classic/forge/run
+++ b/classic/forge/run
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-kill $(lsof -t -i :8000)
-
-if [ ! -f .env ]; then
-  cp .env.example .env
-  echo "Please add your api keys to the .env file."
-fi
-poetry run python -m forge
--- a/classic/forge/run_benchmark
+++ b/classic/forge/run_benchmark
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-# Kill processes using port 8080 if any.
-if lsof -t -i :8080; then
-    kill $(lsof -t -i :8080)
-fi
-# This is the cli entry point for the benchmarking tool.
-# To run this in server mode pass in `serve` as the first argument.
-poetry run agbenchmark "$@"
--- a/classic/forge/setup
+++ b/classic/forge/setup
@@ -1,17 +0,0 @@
-#!/bin/bash
-
- ENV_PATH=$(poetry env info --path)
- if [ -d "$ENV_PATH" ]; then
-     if [ -e delete ]; then
-         rm -rf "$ENV_PATH" || { echo "Please manually remove $ENV_PATH"; exit 1; }
-     else 
-       echo "Press ENTER to remove $ENV_PATH"
-       read && { rm -r "$ENV_PATH" && echo "Removed the poetry environment at $ENV_PATH."; } || { echo "Please manually remove $ENV_PATH."; exit 1; }
-     fi 
- else
-     echo "No poetry environment found."
- fi
-
- poetry install --extras benchmark
- echo "Setup completed successfully."
- exit 0
--- a/classic/forge/tests/init.py
+++ b/classic/forge/tests/init.py
@@ -0,0 +1 @@
+# Tests package
--- a/classic/forge/tests/components/init.py
+++ b/classic/forge/tests/components/init.py
@@ -0,0 +1 @@
+# Component tests package
--- a/classic/forge/tests/components/test_todo.py
+++ b/classic/forge/tests/components/test_todo.py
@@ -0,0 +1,548 @@
+"""Tests for TodoComponent."""
+
+import pytest
+
+from forge.components.todo import TodoComponent, TodoConfiguration
+
+
+@pytest.fixture
+def todo_component():
+    """Create a fresh TodoComponent for testing."""
+    return TodoComponent()
+
+
+class TestTodoWrite:
+    """Tests for the todo_write command."""
+
+    def test_write_empty_list(self, todo_component):
+        """Writing an empty list should succeed."""
+        result = todo_component.todo_write([])
+        assert result["status"] == "success"
+        assert result["item_count"] == 0
+        assert result["pending"] == 0
+        assert result["in_progress"] == 0
+        assert result["completed"] == 0
+
+    def test_write_single_pending_todo(self, todo_component):
+        """Writing a single pending todo should succeed."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Fix the bug",
+                    "status": "pending",
+                    "active_form": "Fixing the bug",
+                }
+            ]
+        )
+        assert result["status"] == "success"
+        assert result["item_count"] == 1
+        assert result["pending"] == 1
+        assert result["in_progress"] == 0
+
+    def test_write_multiple_todos(self, todo_component):
+        """Writing multiple todos with different statuses should succeed."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Research patterns",
+                    "status": "completed",
+                    "active_form": "Researching patterns",
+                },
+                {
+                    "content": "Implement feature",
+                    "status": "in_progress",
+                    "active_form": "Implementing feature",
+                },
+                {
+                    "content": "Write tests",
+                    "status": "pending",
+                    "active_form": "Writing tests",
+                },
+            ]
+        )
+        assert result["status"] == "success"
+        assert result["item_count"] == 3
+        assert result["pending"] == 1
+        assert result["in_progress"] == 1
+        assert result["completed"] == 1
+
+    def test_write_replaces_entire_list(self, todo_component):
+        """Writing should replace the entire list, not append."""
+        # First write
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Task 1",
+                    "status": "pending",
+                    "active_form": "Doing task 1",
+                }
+            ]
+        )
+
+        # Second write should replace
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Task 2",
+                    "status": "pending",
+                    "active_form": "Doing task 2",
+                }
+            ]
+        )
+        assert result["item_count"] == 1
+
+        # Verify only Task 2 exists
+        read_result = todo_component.todo_read()
+        assert len(read_result["items"]) == 1
+        assert read_result["items"][0]["content"] == "Task 2"
+
+    def test_write_warns_on_multiple_in_progress(self, todo_component):
+        """Writing multiple in_progress items should include a warning."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Task 1",
+                    "status": "in_progress",
+                    "active_form": "Doing task 1",
+                },
+                {
+                    "content": "Task 2",
+                    "status": "in_progress",
+                    "active_form": "Doing task 2",
+                },
+            ]
+        )
+        assert result["status"] == "success"
+        assert "warning" in result
+        assert "2 tasks are in_progress" in result["warning"]
+
+    def test_write_validates_required_content(self, todo_component):
+        """Writing without content should fail."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "",
+                    "status": "pending",
+                    "active_form": "Doing something",
+                }
+            ]
+        )
+        assert result["status"] == "error"
+        assert "content" in result["message"]
+
+    def test_write_validates_required_active_form(self, todo_component):
+        """Writing without active_form should fail."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Fix bug",
+                    "status": "pending",
+                    "active_form": "",
+                }
+            ]
+        )
+        assert result["status"] == "error"
+        assert "active_form" in result["message"]
+
+    def test_write_validates_status(self, todo_component):
+        """Writing with invalid status should fail."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Fix bug",
+                    "status": "invalid_status",
+                    "active_form": "Fixing bug",
+                }
+            ]
+        )
+        assert result["status"] == "error"
+        assert "status" in result["message"]
+
+    def test_write_enforces_max_items(self, todo_component):
+        """Writing more items than max_items should fail."""
+        component = TodoComponent(config=TodoConfiguration(max_items=2))
+        result = component.todo_write(
+            [
+                {"content": "Task 1", "status": "pending", "active_form": "Task 1"},
+                {"content": "Task 2", "status": "pending", "active_form": "Task 2"},
+                {"content": "Task 3", "status": "pending", "active_form": "Task 3"},
+            ]
+        )
+        assert result["status"] == "error"
+        assert "Too many items" in result["message"]
+
+
+class TestTodoRead:
+    """Tests for the todo_read command."""
+
+    def test_read_empty_list(self, todo_component):
+        """Reading an empty list should return empty items."""
+        result = todo_component.todo_read()
+        assert result["status"] == "success"
+        assert result["items"] == []
+        assert result["summary"]["pending"] == 0
+
+    def test_read_after_write(self, todo_component):
+        """Reading after writing should return the written items."""
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Fix bug",
+                    "status": "pending",
+                    "active_form": "Fixing bug",
+                }
+            ]
+        )
+
+        result = todo_component.todo_read()
+        assert result["status"] == "success"
+        assert len(result["items"]) == 1
+        assert result["items"][0]["content"] == "Fix bug"
+        assert result["items"][0]["status"] == "pending"
+        assert result["items"][0]["active_form"] == "Fixing bug"
+
+
+class TestTodoClear:
+    """Tests for the todo_clear command."""
+
+    def test_clear_empty_list(self, todo_component):
+        """Clearing an empty list should succeed."""
+        result = todo_component.todo_clear()
+        assert result["status"] == "success"
+        assert "Cleared 0 todo(s)" in result["message"]
+
+    def test_clear_populated_list(self, todo_component):
+        """Clearing a populated list should remove all items."""
+        todo_component.todo_write(
+            [
+                {"content": "Task 1", "status": "pending", "active_form": "Task 1"},
+                {"content": "Task 2", "status": "pending", "active_form": "Task 2"},
+            ]
+        )
+
+        result = todo_component.todo_clear()
+        assert result["status"] == "success"
+        assert "Cleared 2 todo(s)" in result["message"]
+
+        # Verify list is empty
+        read_result = todo_component.todo_read()
+        assert len(read_result["items"]) == 0
+
+
+class TestProtocols:
+    """Tests for protocol implementations."""
+
+    def test_get_resources(self, todo_component):
+        """DirectiveProvider.get_resources should yield a resource."""
+        resources = list(todo_component.get_resources())
+        assert len(resources) == 1
+        assert "todo list" in resources[0].lower()
+
+    def test_get_best_practices(self, todo_component):
+        """DirectiveProvider.get_best_practices should yield practices."""
+        practices = list(todo_component.get_best_practices())
+        assert len(practices) == 4
+        assert any("todo_write" in p for p in practices)
+
+    def test_get_commands(self, todo_component):
+        """CommandProvider.get_commands should yield commands."""
+        commands = list(todo_component.get_commands())
+        command_names = [c.names[0] for c in commands]
+        assert "todo_write" in command_names
+        assert "todo_read" in command_names
+        assert "todo_clear" in command_names
+
+    def test_get_messages_empty_list(self, todo_component):
+        """MessageProvider should not yield messages for empty list."""
+        messages = list(todo_component.get_messages())
+        assert len(messages) == 0
+
+    def test_get_messages_with_todos(self, todo_component):
+        """MessageProvider should include todos in LLM context."""
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Implement feature",
+                    "status": "in_progress",
+                    "active_form": "Implementing feature",
+                },
+                {
+                    "content": "Write tests",
+                    "status": "pending",
+                    "active_form": "Writing tests",
+                },
+            ]
+        )
+
+        messages = list(todo_component.get_messages())
+        assert len(messages) == 1
+
+        content = messages[0].content
+        assert "Your Todo List" in content
+        assert "Currently working on" in content
+        assert "Implementing feature" in content
+        assert "Pending" in content
+        assert "Write tests" in content
+
+    def test_get_messages_respects_show_in_prompt_config(self):
+        """MessageProvider should respect show_in_prompt config."""
+        component = TodoComponent(config=TodoConfiguration(show_in_prompt=False))
+        component.todo_write(
+            [{"content": "Task", "status": "pending", "active_form": "Task"}]
+        )
+
+        messages = list(component.get_messages())
+        assert len(messages) == 0
+
+
+class TestConfiguration:
+    """Tests for TodoConfiguration."""
+
+    def test_default_configuration(self):
+        """Default configuration should have expected values."""
+        config = TodoConfiguration()
+        assert config.max_items == 50
+        assert config.show_in_prompt is True
+
+    def test_custom_configuration(self):
+        """Custom configuration should be respected."""
+        cfg = TodoConfiguration(max_items=10, show_in_prompt=False)
+        component = TodoComponent(config=cfg)
+        assert component.config.max_items == 10
+        assert component.config.show_in_prompt is False
+
+
+class TestSubItems:
+    """Tests for hierarchical sub-items support."""
+
+    def test_write_with_sub_items(self, todo_component):
+        """Writing todos with sub_items should succeed."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Implement feature",
+                    "status": "in_progress",
+                    "active_form": "Implementing feature",
+                    "sub_items": [
+                        {
+                            "content": "Design API",
+                            "status": "completed",
+                            "active_form": "Designing API",
+                        },
+                        {
+                            "content": "Write code",
+                            "status": "in_progress",
+                            "active_form": "Writing code",
+                        },
+                        {
+                            "content": "Add tests",
+                            "status": "pending",
+                            "active_form": "Adding tests",
+                        },
+                    ],
+                }
+            ]
+        )
+        assert result["status"] == "success"
+        assert result["item_count"] == 1
+
+    def test_read_returns_sub_items(self, todo_component):
+        """Reading should return sub_items."""
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Main task",
+                    "status": "in_progress",
+                    "active_form": "Working on main task",
+                    "sub_items": [
+                        {
+                            "content": "Sub task 1",
+                            "status": "completed",
+                            "active_form": "Doing sub task 1",
+                        },
+                        {
+                            "content": "Sub task 2",
+                            "status": "pending",
+                            "active_form": "Doing sub task 2",
+                        },
+                    ],
+                }
+            ]
+        )
+
+        result = todo_component.todo_read()
+        assert result["status"] == "success"
+        assert len(result["items"]) == 1
+        assert "sub_items" in result["items"][0]
+        assert len(result["items"][0]["sub_items"]) == 2
+        assert result["items"][0]["sub_items"][0]["content"] == "Sub task 1"
+        assert result["items"][0]["sub_items"][0]["status"] == "completed"
+
+    def test_nested_sub_items(self, todo_component):
+        """Writing deeply nested sub_items should succeed."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Level 1",
+                    "status": "in_progress",
+                    "active_form": "Level 1",
+                    "sub_items": [
+                        {
+                            "content": "Level 2",
+                            "status": "pending",
+                            "active_form": "Level 2",
+                            "sub_items": [
+                                {
+                                    "content": "Level 3",
+                                    "status": "pending",
+                                    "active_form": "Level 3",
+                                }
+                            ],
+                        }
+                    ],
+                }
+            ]
+        )
+        assert result["status"] == "success"
+
+        # Verify nested structure
+        read_result = todo_component.todo_read()
+        level1 = read_result["items"][0]
+        level2 = level1["sub_items"][0]
+        level3 = level2["sub_items"][0]
+        assert level3["content"] == "Level 3"
+
+    def test_sub_items_validation_error(self, todo_component):
+        """Sub-items with invalid fields should fail validation."""
+        result = todo_component.todo_write(
+            [
+                {
+                    "content": "Main task",
+                    "status": "pending",
+                    "active_form": "Main task",
+                    "sub_items": [
+                        {
+                            "content": "",  # Invalid: empty content
+                            "status": "pending",
+                            "active_form": "Sub task",
+                        }
+                    ],
+                }
+            ]
+        )
+        assert result["status"] == "error"
+        assert "sub_items" in result["message"]
+
+    def test_messages_include_sub_items(self, todo_component):
+        """MessageProvider should format sub-items with indentation."""
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Main task",
+                    "status": "in_progress",
+                    "active_form": "Working on main task",
+                    "sub_items": [
+                        {
+                            "content": "Sub completed",
+                            "status": "completed",
+                            "active_form": "Sub completed",
+                        },
+                        {
+                            "content": "Sub pending",
+                            "status": "pending",
+                            "active_form": "Sub pending",
+                        },
+                    ],
+                }
+            ]
+        )
+
+        messages = list(todo_component.get_messages())
+        assert len(messages) == 1
+        content = messages[0].content
+
+        # Check parent is shown
+        assert "Working on main task" in content
+        # Check sub-items are shown (with their status indicators)
+        assert "[x] Sub completed" in content
+        assert "[ ] Sub pending" in content
+
+
+class TestTodoDecompose:
+    """Tests for the todo_decompose command."""
+
+    def test_decompose_without_llm_provider(self, todo_component):
+        """Decompose should fail gracefully without LLM provider."""
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Complex task",
+                    "status": "pending",
+                    "active_form": "Complex task",
+                }
+            ]
+        )
+
+        import asyncio
+
+        result = asyncio.get_event_loop().run_until_complete(
+            todo_component.todo_decompose(item_index=0)
+        )
+        assert result["status"] == "error"
+        assert "LLM provider not configured" in result["message"]
+
+    def test_decompose_invalid_index(self, todo_component):
+        """Decompose with invalid index should fail."""
+        todo_component.todo_write(
+            [{"content": "Task", "status": "pending", "active_form": "Task"}]
+        )
+
+        import asyncio
+
+        result = asyncio.get_event_loop().run_until_complete(
+            todo_component.todo_decompose(item_index=5)
+        )
+        assert result["status"] == "error"
+        assert "Invalid item_index" in result["message"]
+
+    def test_decompose_empty_list(self, todo_component):
+        """Decompose on empty list should fail."""
+        import asyncio
+
+        result = asyncio.get_event_loop().run_until_complete(
+            todo_component.todo_decompose(item_index=0)
+        )
+        assert result["status"] == "error"
+
+    def test_decompose_already_has_sub_items(self, todo_component):
+        """Decompose should fail if item already has sub-items."""
+        todo_component.todo_write(
+            [
+                {
+                    "content": "Task with subs",
+                    "status": "pending",
+                    "active_form": "Task with subs",
+                    "sub_items": [
+                        {
+                            "content": "Existing sub",
+                            "status": "pending",
+                            "active_form": "Existing sub",
+                        }
+                    ],
+                }
+            ]
+        )
+
+        import asyncio
+
+        result = asyncio.get_event_loop().run_until_complete(
+            todo_component.todo_decompose(item_index=0)
+        )
+        assert result["status"] == "error"
+        assert "already has" in result["message"]
+
+    def test_get_commands_includes_decompose(self, todo_component):
+        """CommandProvider should include todo_decompose command."""
+        commands = list(todo_component.get_commands())
+        command_names = [c.names[0] for c in commands]
+        assert "todo_decompose" in command_names
--- a/classic/forge/tests/test_permissions.py
+++ b/classic/forge/tests/test_permissions.py
@@ -0,0 +1,820 @@
+"""Tests for the permission management system."""
+
+from pathlib import Path
+
+import pytest
+
+from forge.config.workspace_settings import (
+    AgentPermissions,
+    PermissionsConfig,
+    WorkspaceSettings,
+)
+from forge.permissions import ApprovalScope, CommandPermissionManager
+
+
+@pytest.fixture
+def workspace(tmp_path: Path) -> Path:
+    """Create a temporary workspace directory."""
+    return tmp_path / "workspace"
+
+
+@pytest.fixture
+def agent_dir(tmp_path: Path) -> Path:
+    """Create a temporary agent directory."""
+    agent_dir = tmp_path / "agents" / "test-agent"
+    agent_dir.mkdir(parents=True)
+    return agent_dir
+
+
+@pytest.fixture
+def workspace_settings() -> WorkspaceSettings:
+    """Create default workspace settings."""
+    return WorkspaceSettings()
+
+
+@pytest.fixture
+def agent_permissions() -> AgentPermissions:
+    """Create empty agent permissions."""
+    return AgentPermissions()
+
+
+@pytest.fixture
+def permission_manager(
+    workspace: Path,
+    agent_dir: Path,
+    workspace_settings: WorkspaceSettings,
+    agent_permissions: AgentPermissions,
+) -> CommandPermissionManager:
+    """Create a permission manager for testing."""
+    workspace.mkdir(parents=True, exist_ok=True)
+    return CommandPermissionManager(
+        workspace=workspace,
+        agent_dir=agent_dir,
+        workspace_settings=workspace_settings,
+        agent_permissions=agent_permissions,
+        prompt_fn=None,  # No prompting in tests
+    )
+
+
+class TestFormatArgs:
+    """Tests for _format_args() method."""
+
+    def test_format_args_read_file(self, permission_manager: CommandPermissionManager):
+        """File operations should return resolved absolute path."""
+        args = {"filename": "/tmp/test.txt"}
+        result = permission_manager._format_args("read_file", args)
+        assert result == str(Path("/tmp/test.txt").resolve())
+
+    def test_format_args_read_file_with_path_key(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """File operations should also check 'path' key."""
+        args = {"path": "/tmp/test.txt"}
+        result = permission_manager._format_args("read_file", args)
+        assert result == str(Path("/tmp/test.txt").resolve())
+
+    def test_format_args_write_file(self, permission_manager: CommandPermissionManager):
+        """write_to_file should format like read_file."""
+        args = {"filename": "/tmp/output.txt"}
+        result = permission_manager._format_args("write_to_file", args)
+        assert result == str(Path("/tmp/output.txt").resolve())
+
+    def test_format_args_list_folder(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """list_folder should format like read_file."""
+        args = {"path": "/tmp"}
+        result = permission_manager._format_args("list_folder", args)
+        assert result == str(Path("/tmp").resolve())
+
+    def test_format_args_shell_command_with_args(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Shell commands should use executable:args format."""
+        args = {"command_line": "rm -rf /tmp/foo"}
+        result = permission_manager._format_args("execute_shell", args)
+        assert result == "rm:-rf /tmp/foo"
+
+    def test_format_args_shell_command_no_args(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Shell commands without args should end with colon."""
+        args = {"command_line": "ls"}
+        result = permission_manager._format_args("execute_shell", args)
+        assert result == "ls:"
+
+    def test_format_args_shell_command_single_arg(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Shell commands with single arg should format correctly."""
+        args = {"command_line": "cat file.txt"}
+        result = permission_manager._format_args("execute_shell", args)
+        assert result == "cat:file.txt"
+
+    def test_format_args_execute_python(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """execute_python should use same format as execute_shell."""
+        args = {"code": "python script.py"}
+        result = permission_manager._format_args("execute_python", args)
+        assert result == "python:script.py"
+
+    def test_format_args_shell_empty(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Empty shell command should return empty string."""
+        args = {"command_line": ""}
+        result = permission_manager._format_args("execute_shell", args)
+        assert result == ""
+
+    def test_format_args_web_search(self, permission_manager: CommandPermissionManager):
+        """Web search should return the query."""
+        args = {"query": "python tutorial"}
+        result = permission_manager._format_args("web_search", args)
+        assert result == "python tutorial"
+
+    def test_format_args_read_webpage(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Read webpage should return the URL."""
+        args = {"url": "https://example.com"}
+        result = permission_manager._format_args("read_webpage", args)
+        assert result == "https://example.com"
+
+    def test_format_args_generic_command(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Unknown commands should join values with colon."""
+        args = {"arg1": "value1", "arg2": "value2"}
+        result = permission_manager._format_args("unknown_cmd", args)
+        assert result == "value1:value2"
+
+    def test_format_args_generic_empty(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Empty args for unknown commands should return wildcard."""
+        result = permission_manager._format_args("unknown_cmd", {})
+        assert result == "*"
+
+
+class TestPatternMatches:
+    """Tests for _pattern_matches() method."""
+
+    def test_pattern_matches_exact(self, permission_manager: CommandPermissionManager):
+        """Exact pattern should match."""
+        assert permission_manager._pattern_matches(
+            "read_file(/tmp/test.txt)", "read_file", "/tmp/test.txt"
+        )
+
+    def test_pattern_matches_single_wildcard(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Single wildcard should match non-slash characters."""
+        assert permission_manager._pattern_matches(
+            "read_file(/tmp/*.txt)", "read_file", "/tmp/test.txt"
+        )
+        assert not permission_manager._pattern_matches(
+            "read_file(/tmp/*.txt)", "read_file", "/tmp/subdir/test.txt"
+        )
+
+    def test_pattern_matches_double_wildcard(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Double wildcard should match any path including slashes."""
+        assert permission_manager._pattern_matches(
+            "read_file(/tmp/**)", "read_file", "/tmp/test.txt"
+        )
+        assert permission_manager._pattern_matches(
+            "read_file(/tmp/**)", "read_file", "/tmp/subdir/test.txt"
+        )
+        assert permission_manager._pattern_matches(
+            "read_file(/tmp/**)", "read_file", "/tmp/a/b/c/test.txt"
+        )
+
+    def test_pattern_matches_workspace_placeholder(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Workspace placeholder should expand to workspace path."""
+        workspace_path = str(permission_manager.workspace)
+        assert permission_manager._pattern_matches(
+            "read_file({workspace}/**)",
+            "read_file",
+            f"{workspace_path}/test.txt",
+        )
+
+    def test_pattern_matches_wrong_command(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Pattern should not match different command."""
+        assert not permission_manager._pattern_matches(
+            "read_file(/tmp/test.txt)", "write_to_file", "/tmp/test.txt"
+        )
+
+    def test_pattern_matches_shell_command(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Shell command patterns should match executable:args format."""
+        assert permission_manager._pattern_matches(
+            "execute_shell(rm:**)", "execute_shell", "rm:-rf /tmp/foo"
+        )
+        assert permission_manager._pattern_matches(
+            "execute_shell(rm:-rf **)", "execute_shell", "rm:-rf /tmp/foo"
+        )
+
+    def test_pattern_matches_shell_sudo(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Sudo pattern should match any sudo command."""
+        assert permission_manager._pattern_matches(
+            "execute_shell(sudo:**)", "execute_shell", "sudo:rm -rf /"
+        )
+        assert permission_manager._pattern_matches(
+            "execute_shell(sudo:**)", "execute_shell", "sudo:apt install foo"
+        )
+
+    def test_pattern_matches_env_file(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Pattern should match .env files."""
+        assert permission_manager._pattern_matches(
+            "read_file(**.env)", "read_file", "/path/to/.env"
+        )
+        assert permission_manager._pattern_matches(
+            "read_file(**.env)", "read_file", "/project/config/.env"
+        )
+
+    def test_pattern_matches_invalid_pattern(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Invalid pattern format should not match."""
+        assert not permission_manager._pattern_matches(
+            "invalid_pattern", "read_file", "/tmp/test.txt"
+        )
+        assert not permission_manager._pattern_matches(
+            "read_file", "read_file", "/tmp/test.txt"
+        )
+
+    def test_pattern_matches_wildcard_only(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Wildcard-only pattern should match anything."""
+        assert permission_manager._pattern_matches("finish(*)", "finish", "any_value")
+
+
+class TestGeneralizePattern:
+    """Tests for _generalize_pattern() method."""
+
+    def test_generalize_file_in_workspace(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """File in workspace should use {workspace} placeholder."""
+        workspace_path = permission_manager.workspace
+        file_path = str(workspace_path / "subdir" / "test.txt")
+        result = permission_manager._generalize_pattern("read_file", file_path)
+        assert result == "read_file({workspace}/subdir/*)"
+
+    def test_generalize_file_outside_workspace(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """File outside workspace should use exact path."""
+        result = permission_manager._generalize_pattern(
+            "read_file", "/outside/path/test.txt"
+        )
+        assert result == "read_file(/outside/path/test.txt)"
+
+    def test_generalize_shell_command(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Shell command should extract executable."""
+        result = permission_manager._generalize_pattern(
+            "execute_shell", "rm:-rf /tmp/foo"
+        )
+        assert result == "execute_shell(rm:**)"
+
+    def test_generalize_shell_no_colon(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Shell command without colon should return wildcard."""
+        result = permission_manager._generalize_pattern("execute_shell", "invalid")
+        assert result == "execute_shell(*)"
+
+    def test_generalize_web_search(self, permission_manager: CommandPermissionManager):
+        """Web search should generalize to wildcard."""
+        result = permission_manager._generalize_pattern("web_search", "python tutorial")
+        assert result == "web_search(*)"
+
+    def test_generalize_read_webpage(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Read webpage should extract domain."""
+        result = permission_manager._generalize_pattern(
+            "read_webpage", "https://example.com/page"
+        )
+        assert result == "read_webpage(*example.com*)"
+
+    def test_generalize_unknown_command(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Unknown command should use wildcard."""
+        result = permission_manager._generalize_pattern("unknown_cmd", "some:args")
+        assert result == "unknown_cmd(*)"
+
+
+class TestCheckCommand:
+    """Tests for check_command() method."""
+
+    def test_check_command_allowed_by_workspace(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Commands matching workspace allow list should be allowed."""
+        workspace_path = str(permission_manager.workspace)
+        # Create the file path that would be resolved
+        file_path = f"{workspace_path}/test.txt"
+        result = permission_manager.check_command("read_file", {"filename": file_path})
+        assert result.allowed
+
+    def test_check_command_denied_by_workspace(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """Commands matching workspace deny list should be denied."""
+        # .env files are denied by default
+        result = permission_manager.check_command(
+            "read_file", {"filename": "/project/.env"}
+        )
+        assert not result.allowed
+
+    def test_check_command_denied_shell_rm_rf(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """rm -rf should be denied by default."""
+        result = permission_manager.check_command(
+            "execute_shell", {"command_line": "rm -rf /tmp/foo"}
+        )
+        assert not result.allowed
+
+    def test_check_command_denied_shell_rm_r(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """rm -r should be denied by default."""
+        result = permission_manager.check_command(
+            "execute_shell", {"command_line": "rm -r /tmp/foo"}
+        )
+        assert not result.allowed
+
+    def test_check_command_denied_sudo(
+        self, permission_manager: CommandPermissionManager
+    ):
+        """sudo commands should be denied by default."""
+        result = permission_manager.check_command(
+            "execute_shell", {"command_line": "sudo apt install foo"}
+        )
+        assert not result.allowed
+
+    def test_check_command_allowed_safe_shell(self, workspace: Path, agent_dir: Path):
+        """Safe shell commands should not match deny patterns."""
+        workspace.mkdir(parents=True, exist_ok=True)
+        # Create manager with custom settings that allow shell commands
+        settings = WorkspaceSettings(
+            permissions=PermissionsConfig(
+                allow=["execute_shell(ls:**)", "execute_shell(cat:**)"],
+                deny=[
+                    "execute_shell(rm:-rf **)",
+                    "execute_shell(sudo:**)",
+                ],
+            )
+        )
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=None,
+        )
+        assert manager.check_command(
+            "execute_shell", {"command_line": "ls -la"}
+        ).allowed
+        assert manager.check_command(
+            "execute_shell", {"command_line": "cat /tmp/file.txt"}
+        ).allowed
+
+    def test_check_command_agent_deny_overrides_workspace_allow(
+        self, workspace: Path, agent_dir: Path
+    ):
+        """Agent deny list should override workspace allow list."""
+        workspace.mkdir(parents=True, exist_ok=True)
+        workspace_settings = WorkspaceSettings(
+            permissions=PermissionsConfig(
+                allow=["execute_shell(ls:**)"],
+                deny=[],
+            )
+        )
+        agent_permissions = AgentPermissions(
+            permissions=PermissionsConfig(
+                allow=[],
+                deny=["execute_shell(ls:**)"],  # Agent denies ls
+            )
+        )
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=workspace_settings,
+            agent_permissions=agent_permissions,
+            prompt_fn=None,
+        )
+        # Agent deny should block even though workspace allows
+        result = manager.check_command("execute_shell", {"command_line": "ls -la"})
+        assert not result.allowed
+
+    def test_check_command_agent_allow_overrides_no_workspace(
+        self, workspace: Path, agent_dir: Path
+    ):
+        """Agent allow list should work when workspace has no match."""
+        workspace.mkdir(parents=True, exist_ok=True)
+        workspace_settings = WorkspaceSettings(
+            permissions=PermissionsConfig(allow=[], deny=[])
+        )
+        agent_permissions = AgentPermissions(
+            permissions=PermissionsConfig(
+                allow=["execute_shell(echo:**)"],
+                deny=[],
+            )
+        )
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=workspace_settings,
+            agent_permissions=agent_permissions,
+            prompt_fn=None,
+        )
+        result = manager.check_command("execute_shell", {"command_line": "echo hello"})
+        assert result.allowed
+
+    def test_check_command_no_prompt_fn_denies(self, workspace: Path, agent_dir: Path):
+        """Without prompt_fn, unmatched commands should be denied."""
+        workspace.mkdir(parents=True, exist_ok=True)
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=None,
+        )
+        # No allow patterns, no prompt, should deny
+        result = manager.check_command(
+            "execute_shell", {"command_line": "some_command"}
+        )
+        assert not result.allowed
+
+    def test_check_command_session_denial(self, workspace: Path, agent_dir: Path):
+        """Session denials should persist for the session."""
+        workspace.mkdir(parents=True, exist_ok=True)
+        denied_commands = []
+
+        def mock_prompt(
+            cmd: str, args_str: str, _args: dict
+        ) -> tuple[ApprovalScope, str | None]:
+            denied_commands.append((cmd, args_str))
+            return (ApprovalScope.DENY, None)
+
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=mock_prompt,
+        )
+
+        # First call should prompt and deny
+        result = manager.check_command("execute_shell", {"command_line": "bad_cmd"})
+        assert not result.allowed
+        assert len(denied_commands) == 1
+
+        # Second call with same command should not prompt (session denial)
+        result = manager.check_command("execute_shell", {"command_line": "bad_cmd"})
+        assert not result.allowed
+        assert len(denied_commands) == 1  # Still 1, no new prompt
+
+
+class TestApprovalScopes:
+    """Tests for different approval scopes."""
+
+    def test_approval_once(self, workspace: Path, agent_dir: Path):
+        """ONCE approval should allow but not persist."""
+        workspace.mkdir(parents=True, exist_ok=True)
+        prompt_count = [0]
+
+        def mock_prompt(
+            _cmd: str, _args_str: str, _args: dict
+        ) -> tuple[ApprovalScope, str | None]:
+            prompt_count[0] += 1
+            return (ApprovalScope.ONCE, None)
+
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=mock_prompt,
+        )
+
+        # First call should prompt and allow
+        result = manager.check_command("execute_shell", {"command_line": "cmd1"})
+        assert result.allowed
+        assert prompt_count[0] == 1
+
+        # Second call should prompt again (ONCE doesn't persist)
+        result = manager.check_command("execute_shell", {"command_line": "cmd1"})
+        assert result.allowed
+        assert prompt_count[0] == 2
+
+    def test_approval_agent_persists_to_file(self, workspace: Path, agent_dir: Path):
+        """AGENT approval should save to agent permissions file."""
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        def mock_prompt(
+            _cmd: str, _args_str: str, _args: dict
+        ) -> tuple[ApprovalScope, str | None]:
+            return (ApprovalScope.AGENT, None)
+
+        agent_permissions = AgentPermissions()
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=agent_permissions,
+            prompt_fn=mock_prompt,
+        )
+
+        result = manager.check_command("execute_shell", {"command_line": "mycmd args"})
+        assert result.allowed
+
+        # Check that permission was added to agent permissions
+        assert "execute_shell(mycmd:**)" in agent_permissions.permissions.allow
+
+        # Check that file was created
+        perm_file = agent_dir / "permissions.yaml"
+        assert perm_file.exists()
+
+    def test_approval_workspace_persists_to_file(
+        self, workspace: Path, agent_dir: Path
+    ):
+        """WORKSPACE approval should save to workspace settings file."""
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        def mock_prompt(
+            _cmd: str, _args_str: str, _args: dict
+        ) -> tuple[ApprovalScope, str | None]:
+            return (ApprovalScope.WORKSPACE, None)
+
+        workspace_settings = WorkspaceSettings(
+            permissions=PermissionsConfig(allow=[], deny=[])
+        )
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=workspace_settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=mock_prompt,
+        )
+
+        result = manager.check_command("execute_shell", {"command_line": "mycmd args"})
+        assert result.allowed
+
+        # Check that permission was added to workspace settings
+        assert "execute_shell(mycmd:**)" in workspace_settings.permissions.allow
+
+        # Check that file was created
+        settings_file = workspace / ".autogpt" / "autogpt.yaml"
+        assert settings_file.exists()
+
+    def test_approval_with_feedback(self, workspace: Path, agent_dir: Path):
+        """Approval with feedback should return the feedback."""
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        def mock_prompt(
+            _cmd: str, _args_str: str, _args: dict
+        ) -> tuple[ApprovalScope, str | None]:
+            return (ApprovalScope.ONCE, "Be careful with this command")
+
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=mock_prompt,
+        )
+
+        result = manager.check_command("execute_shell", {"command_line": "cmd1"})
+        assert result.allowed
+        assert result.scope == ApprovalScope.ONCE
+        assert result.feedback == "Be careful with this command"
+
+    def test_denial_with_feedback(self, workspace: Path, agent_dir: Path):
+        """Denial with feedback should return the feedback."""
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        def mock_prompt(
+            _cmd: str, _args_str: str, _args: dict
+        ) -> tuple[ApprovalScope, str | None]:
+            return (ApprovalScope.DENY, "Don't run this, try X instead")
+
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        manager = CommandPermissionManager(
+            workspace=workspace,
+            agent_dir=agent_dir,
+            workspace_settings=settings,
+            agent_permissions=AgentPermissions(),
+            prompt_fn=mock_prompt,
+        )
+
+        result = manager.check_command("execute_shell", {"command_line": "bad_cmd"})
+        assert not result.allowed
+        assert result.scope == ApprovalScope.DENY
+        assert result.feedback == "Don't run this, try X instead"
+
+
+class TestDefaultDenyPatterns:
+    """Tests to verify default deny patterns work correctly."""
+
+    def test_deny_rm_rf_variations(self, permission_manager: CommandPermissionManager):
+        """Various rm -rf commands should all be denied."""
+        dangerous_commands = [
+            "rm -rf /",
+            "rm -rf /tmp",
+            "rm -rf ~/",
+            "rm -rf /home/user",
+            "rm -rf .",
+            "rm -rf ./*",
+        ]
+        for cmd in dangerous_commands:
+            result = permission_manager.check_command(
+                "execute_shell", {"command_line": cmd}
+            )
+            assert not result.allowed, f"Command '{cmd}' should be denied"
+
+    def test_deny_rm_r_variations(self, permission_manager: CommandPermissionManager):
+        """Various rm -r commands should all be denied."""
+        dangerous_commands = [
+            "rm -r /tmp",
+            "rm -r /home/user",
+        ]
+        for cmd in dangerous_commands:
+            result = permission_manager.check_command(
+                "execute_shell", {"command_line": cmd}
+            )
+            assert not result.allowed, f"Command '{cmd}' should be denied"
+
+    def test_deny_sudo_variations(self, permission_manager: CommandPermissionManager):
+        """Various sudo commands should all be denied."""
+        dangerous_commands = [
+            "sudo rm -rf /",
+            "sudo apt install something",
+            "sudo chmod 777 /",
+            "sudo su",
+        ]
+        for cmd in dangerous_commands:
+            result = permission_manager.check_command(
+                "execute_shell", {"command_line": cmd}
+            )
+            assert not result.allowed, f"Command '{cmd}' should be denied"
+
+    def test_deny_env_files(self, permission_manager: CommandPermissionManager):
+        """Reading .env files should be denied."""
+        env_files = [
+            "/project/.env",
+            "/home/user/app/.env",
+            "/var/www/.env.local",
+            "/app/.env.production",
+        ]
+        for f in env_files:
+            result = permission_manager.check_command("read_file", {"filename": f})
+            assert not result.allowed, f"Reading '{f}' should be denied"
+
+    def test_deny_key_files(self, permission_manager: CommandPermissionManager):
+        """Reading .key files should be denied."""
+        result = permission_manager.check_command(
+            "read_file", {"filename": "/home/user/.ssh/id_rsa.key"}
+        )
+        assert not result.allowed
+
+    def test_deny_pem_files(self, permission_manager: CommandPermissionManager):
+        """Reading .pem files should be denied."""
+        result = permission_manager.check_command(
+            "read_file", {"filename": "/certs/server.pem"}
+        )
+        assert not result.allowed
+
+
+class TestWorkspaceSettings:
+    """Tests for WorkspaceSettings class."""
+
+    def test_load_or_create_creates_default(self, tmp_path: Path):
+        """load_or_create should create default settings file."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        settings = WorkspaceSettings.load_or_create(workspace)
+
+        # Check defaults are set
+        assert "read_file({workspace}/**)" in settings.permissions.allow
+        assert "execute_shell(rm:-rf **)" in settings.permissions.deny
+
+        # Check file was created
+        settings_file = workspace / ".autogpt" / "autogpt.yaml"
+        assert settings_file.exists()
+
+    def test_load_or_create_loads_existing(self, tmp_path: Path):
+        """load_or_create should load existing settings file."""
+        workspace = tmp_path / "workspace"
+        autogpt_dir = workspace / ".autogpt"
+        autogpt_dir.mkdir(parents=True)
+
+        # Create custom settings file
+        settings_file = autogpt_dir / "autogpt.yaml"
+        settings_file.write_text(
+            """
+permissions:
+  allow:
+    - custom_command(*)
+  deny: []
+"""
+        )
+
+        settings = WorkspaceSettings.load_or_create(workspace)
+
+        assert settings.permissions.allow == ["custom_command(*)"]
+        assert settings.permissions.deny == []
+
+    def test_add_permission(self, tmp_path: Path):
+        """add_permission should add and save permission."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        settings = WorkspaceSettings(permissions=PermissionsConfig(allow=[], deny=[]))
+        settings.add_permission("new_pattern(*)", workspace)
+
+        assert "new_pattern(*)" in settings.permissions.allow
+
+        # Reload and verify persisted
+        loaded = WorkspaceSettings.load_or_create(workspace)
+        assert "new_pattern(*)" in loaded.permissions.allow
+
+
+class TestAgentPermissions:
+    """Tests for AgentPermissions class."""
+
+    def test_load_or_create_returns_empty(self, tmp_path: Path):
+        """load_or_create should return empty permissions if no file."""
+        agent_dir = tmp_path / "agent"
+        agent_dir.mkdir()
+
+        permissions = AgentPermissions.load_or_create(agent_dir)
+
+        assert permissions.permissions.allow == []
+        assert permissions.permissions.deny == []
+        # Should NOT create file if empty
+        assert not (agent_dir / "permissions.yaml").exists()
+
+    def test_load_or_create_loads_existing(self, tmp_path: Path):
+        """load_or_create should load existing permissions file."""
+        agent_dir = tmp_path / "agent"
+        agent_dir.mkdir()
+
+        # Create custom permissions file
+        perm_file = agent_dir / "permissions.yaml"
+        perm_file.write_text(
+            """
+permissions:
+  allow:
+    - agent_specific(*)
+  deny:
+    - agent_denied(*)
+"""
+        )
+
+        permissions = AgentPermissions.load_or_create(agent_dir)
+
+        assert permissions.permissions.allow == ["agent_specific(*)"]
+        assert permissions.permissions.deny == ["agent_denied(*)"]
+
+    def test_add_permission(self, tmp_path: Path):
+        """add_permission should add and save permission."""
+        agent_dir = tmp_path / "agent"
+        agent_dir.mkdir()
+
+        permissions = AgentPermissions()
+        permissions.add_permission("new_agent_pattern(*)", agent_dir)
+
+        assert "new_agent_pattern(*)" in permissions.permissions.allow
+
+        # Verify file was created
+        assert (agent_dir / "permissions.yaml").exists()
+
+        # Reload and verify persisted
+        loaded = AgentPermissions.load_or_create(agent_dir)
+        assert "new_agent_pattern(*)" in loaded.permissions.allow
--- a/classic/forge/tests/vcr/vcr_filter.py
+++ b/classic/forge/tests/vcr/vcr_filter.py
@@ -2,7 +2,7 @@ import contextlib
 import json
 import re
 from io import BytesIO
-from typing import Any
+from typing import Any, cast

 from vcr.request import Request

@@ -66,13 +66,11 @@ def freeze_request(request: Request) -> Request:
        return request

    with contextlib.suppress(ValueError):
-        request.body = freeze_request_body(
-            json.loads(
-                request.body.getvalue()
-                if isinstance(request.body, BytesIO)
-                else request.body
-            )
-        )
+        if isinstance(request.body, BytesIO):
+            body_data: bytes | str = request.body.getvalue()
+        else:
+            body_data = cast(bytes, request.body)
+        request.body = freeze_request_body(json.loads(body_data))

    return request

--- a/classic/forge/tutorials/001_getting_started.md
+++ b/classic/forge/tutorials/001_getting_started.md
@@ -1,120 +0,0 @@
-## [AutoGPT Forge Part 1: A Comprehensive Guide to Your First Steps](https://aiedge.medium.com/autogpt-forge-a-comprehensive-guide-to-your-first-steps-a1dfdf46e3b4)
-
-![Header](..%2F..%2F..%2Fdocs/content/imgs/quickstart/000_header_img.png)
-
-**Written by Craig Swift & [Ryan Brandt](https://github.com/paperMoose)**
-
-
-Welcome to the getting started Tutorial! This tutorial is designed to walk you through the process of setting up and running your own AutoGPT agent in the Forge environment. Whether you are a seasoned AI developer or just starting out, this guide will equip you with the necessary steps to jumpstart your journey in the world of AI development with AutoGPT.
-
-## Section 1: Understanding the Forge
-
-The Forge serves as a comprehensive template for building your own AutoGPT agent. It not only provides the setting for setting up, creating, and running your agent, but also includes the benchmarking system and the frontend for testing it. We'll touch more on those later! For now just think of the forge as a way to easily generate your boilerplate in a standardized way.
-
-## Section 2: Setting up the Forge Environment
-
-To begin, you need to fork the [repository](https://github.com/Significant-Gravitas/AutoGPT) by navigating to the main page of the repository and clicking **Fork** in the top-right corner. 
-
-![The Github repository](..%2F..%2F..%2Fdocs/content/imgs/quickstart/001_repo.png)
-
-Follow the on-screen instructions to complete the process. 
-
-![Create Fork Page](..%2F..%2F..%2Fdocs/content/imgs/quickstart/002_fork.png)
-
-### Cloning the Repository
-Next, clone your newly forked repository to your local system. Ensure you have Git installed to proceed with this step. You can download Git from [here](https://git-scm.com/downloads). Then clone the repo using the following command and the url for your repo. You can find the correct url by clicking on the green Code button on your repos main page.
-![img_1.png](..%2F..%2F..%2Fdocs/content/imgs/quickstart/003A_clone.png)
-
-```bash
-# replace the url with the one for your forked repo
-git clone https://github.com/<YOUR REPO PATH HERE>
-```
-
-![Clone the Repository](..%2F..%2F..%2Fdocs/content/imgs/quickstart/003_clone.png)
-
-### Setting up the Project
-
-Once you have clone the project change your directory to the newly cloned project:
-```bash
-# The name of the directory will match the name you gave your fork. The default is AutoGPT
-cd AutoGPT
-```
-To set up the project, utilize the `./run setup` command in the terminal. Follow the instructions to install necessary dependencies and set up your GitHub access token.
-
-![Setup the Project](..%2F..%2F..%2Fdocs/content/imgs/quickstart/005_setup.png)
-![Setup Complete](..%2F..%2F..%2Fdocs/content/imgs/quickstart/006_setup_complete.png)
-
-## Section 3: Creating Your Agent
-
-Choose a suitable name for your agent. It should be unique and descriptive. Examples of valid names include swiftyosgpt, SwiftyosAgent, or swiftyos_agent.
-
-Create your agent template using the command:
-
-```bash
- ./run agent create YOUR_AGENT_NAME
- ```
- Replacing YOUR_AGENT_NAME with the name you chose in the previous step.
-
-![Create an Agent](..%2F..%2F..%2Fdocs/content/imgs/quickstart/007_create_agent.png)
-
-## Section 4: Running Your Agent
-
-Begin by starting your agent using the command:
-
-```bash
-./run agent start YOUR_AGENT_NAME
-```
-This will initiate the agent on `http://localhost:8000/`.
-
-![Start the Agent](..%2F..%2F..%2Fdocs/content/imgs/quickstart/009_start_agent.png)
-
-### Logging in and Sending Tasks to Your Agent
-Access the frontend at `http://localhost:8000/` and log in using a Google or GitHub account. Once you're logged you'll see the agent tasking interface! However... the agent won't do anything yet. We'll implement the logic for our agent to run tasks in the upcoming tutorial chapters. 
-
-![Login](..%2F..%2F..%2Fdocs/content/imgs/quickstart/010_login.png)
-![Home](..%2F..%2F..%2Fdocs/content/imgs/quickstart/011_home.png)
-
-### Stopping and Restarting Your Agent
-When needed, use Ctrl+C to end the session or use the stop command:
-```bash
-./run agent stop
-``` 
-This command forcefully stops the agent. You can also restart it using the start command.
-
-## To Recap
- We've forked the AutoGPT repo and cloned it locally on your machine.
- we connected the library with our personal github access token as part of the setup.
- We've run the agent and its tasking server successfully without an error.
- We've logged into the server site at localhost:8000 using our github account.
-
-Make sure you've completed every step successfully before moving on :). 
-### Next Steps: Building and Enhancing Your Agent
-With our foundation set, you are now ready to build and enhance your agent! The next tutorial will look into the anatomy of an agent and how to add basic functionality.
-
-## Additional Resources
-
-### Links to Documentation and Community Forums
- [Windows Subsystem for Linux (WSL) Installation](https://learn.microsoft.com/en-us/windows/wsl/)
- [Git Download](https://git-scm.com/downloads)
-
-## Appendix
-
-### Troubleshooting Common Issues
- Ensure Git is correctly installed before cloning the repository.
- Follow the setup instructions carefully to avoid issues during project setup.
- If encountering issues during agent creation, refer to the guide for naming conventions.
- make sure your github token has the `repo` scopes toggled. 
-
-### Glossary of Terms
- **Repository**: A storage space where your project resides.
- **Forking**: Creating a copy of a repository under your GitHub account.
- **Cloning**: Making a local copy of a repository on your system.
- **Agent**: The AutoGPT you will be creating and developing in this project.
- **Benchmarking**: The process of testing your agent's skills in various categories using the Forge's integrated benchmarking system.
- **Forge**: The comprehensive template for building your AutoGPT agent, including the setting for setup, creation, running, and benchmarking your agent.
- **Frontend**: The user interface where you can log in, send tasks to your agent, and view the task history.
-
-
-### System Requirements
-
-This project supports Linux (Debian based), Mac, and Windows Subsystem for Linux (WSL). If you are using a Windows system, you will need to install WSL. You can find the installation instructions for WSL [here](https://learn.microsoft.com/en-us/windows/wsl/).
--- a/classic/forge/tutorials/002_blueprint_of_an_agent.md
+++ b/classic/forge/tutorials/002_blueprint_of_an_agent.md
@@ -1,147 +0,0 @@
-# AutoGPT Forge Part 2: The Blueprint of an AI Agent
-
-**Written by Craig Swift & [Ryan Brandt](https://github.com/paperMoose)**
-
-*8 min read*  
-
-
---
-
-![Header](..%2F..%2Fdocs/content/imgs/quickstart/t2_01.png)
-
-
-
-
-
-## What are LLM-Based AI Agents?
-
-Before we add logic to our new agent, we have to understand what an agent actually IS. 
-
-Large Language Models (LLMs) are state-of-the-art machine learning models that harness vast amounts of web knowledge. But what happens when you give the LLM the ability to use tools based on it's output? You get LLM-based AI agents — a new breed of artificial intelligence that promises more human-like decision-making in the real world.  
-
-Traditional autonomous agents operated with limited knowledge, often confined to specific tasks or environments. They were like calculators — efficient but limited to predefined functions. LLM-based agents, on the other hand don’t just compute; they understand, reason, and then act, drawing from a vast reservoir of information.  
-
-![AI visualising AI researchers hard at work](..%2F..%2Fdocs/content/imgs/quickstart/t2_02.png)
-
-
-## The Anatomy of an LLM-Based AI Agent
-
-Diving deep into the core of an LLM-based AI agent, we find it’s structured much like a human, with distinct components akin to personality, memory, thought process, and abilities. Let’s break these down:  
-
-![The Github repository](..%2F..%2Fdocs/content/imgs/quickstart/t2_03.png)
-Anatomy of an Agent from the Agent Landscape Survey  
-
-### **Profile**  
-Humans naturally adapt our mindset based on the tasks we're tackling, whether it's writing, cooking, or playing sports. Similarly, agents can be conditioned or "profiled" to specialize in specific tasks.
-
-The profile of an agent is its personality, mindset, and high-level instructions. Research indicates that merely informing an agent that it's an expert in a certain domain can boost its performance.
-
-| **Potential Applications of Profiling** | **Description**                                                                                          |
-|-----------------------------------------|----------------------------------------------------------------------------------------------------------|
-| **Prompt Engineering**                  | Tailoring agent prompts for better results.                                                              |
-| **Memory Adjustments**                  | Modifying how an agent recalls or prioritizes information.                                               |
-| **Action Selection**                    | Influencing the set of actions an agent might consider.                                                  |
-| **Driving Mechanism**                   | Potentially tweaking the underlying large language model (LLM) that powers the agent.                    |
-
-#### Example Agent Profile: Weather Expert
-
- **Profile Name:** Weather Specialist
- **Purpose:** Provide detailed and accurate weather information.
- **Preferred Memory Sources:** Meteorological databases, recent weather news, and scientific journals.
- **Action Set:** Fetching weather data, analyzing weather patterns, and providing forecasts.
- **Base Model Tweaks:** Prioritize meteorological terminology and understanding.
-
-### **Memory**  
-Just as our memories shape our decisions, reactions, and identities, an agent's memory is the cornerstone of its identity and capabilities. Memory is fundamental for an agent to learn and adapt. At a high level, agents possess two core types of memories: long-term and short-term.
-
-|                   | **Long-Term Memory**                                                                                         | **Short-Term (Working) Memory**                                                                                 |
-|-------------------|-------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
-| **Purpose**       | Serves as the agent's foundational knowledge base.                                                           | Handles recent or transient memories, much like our recollection of events from the past few days.              |
-| **What it Stores**| Historical data and interactions that have taken place over extended periods.                                | Immediate experiences and interactions.                                                                         |
-| **Role**          | Guides the agent's core behaviors and understanding, acting as a vast reservoir of accumulated knowledge.   | Essential for real-time tasks and decision-making. Not all these memories transition into long-term storage.     |
-
-
-### **Planning**  
-Planning is essential for agents to systematically tackle challenges, mirroring how humans break down complex problems into smaller tasks.
-#### **1. What is Planning?**
-
- **Concept:** It's the agent's strategy for problem-solving, ensuring solutions are both comprehensive and systematic.
- **Human Analogy:** Just like humans split challenges into smaller, more manageable tasks, agents adopt a similar methodical approach.
-
-#### **2. Key Planning Strategies**
-
-| **Strategy**               | **Description**                                                                                           |
-|----------------------------|----------------------------------------------------------------------------------------------------------|
-| **Planning with Feedback** | An adaptive approach where agents refine their strategy based on outcomes, similar to iterative design processes.|
-| **Planning without Feedback** | The agent acts as a strategist, using only its existing knowledge. It's like playing chess, anticipating challenges and planning several moves ahead. |
-
-### **Action**  
-After the introspection of memory and the strategizing of planning, comes the finale: Action. This is where the agent’s cognitive processes manifest into tangible outcomes using the agents Abilities. Every decision, every thought, culminates in the action phase, translating abstract concepts into definitive results.  
-Whether it’s penning a response, saving a file, or initiating a new process, the action component is the culmination of the agent’s decision-making journey. It’s the bridge between digital cognition and real-world impact, turning the agent’s electronic impulses into meaningful and purposeful outcomes.  
-
-![t2_agent_flow.png](..%2F..%2F..%2Fdocs%2Fcontent%2Fimgs%2Fquickstart%2Ft2_agent_flow.png)
-An example of how a basic agent works
-## The Agent Protocol: The Linguistics of AI Communication
-
-After diving deep into the anatomy of an agent, understanding its core components, there emerges a pivotal question: How do we effectively communicate with these diverse, intricately-designed agents? The answer lies in the Agent Protocol.  
-
-### Understanding the Agent Protocol
-
-At its essence, the Agent Protocol is a standardized communication interface, a universal “language” that every AI agent, regardless of its underlying structure or design, can comprehend. Think of it as the diplomatic envoy that ensures smooth conversations between agents and their developers, tools, or even other agents.  
-
-In an ecosystem where every developer might have their unique approach to crafting agents, the Agent Protocol acts as a unifying bridge. It’s akin to a standardized plug fitting into any socket or a universal translator decoding myriad languages.  
-
-## AutoGPT Forge: A Peek Inside the LLM Agent Template
-
-Now we understand the architecture of an agent lets look inside the Forge. It’s a well-organized template, meticulously architected to cater to the needs of agent developers.
-
-#### Forge’s Project Structure: A Bird’s-Eye View
-![t2_diagram.png](..%2F..%2F..%2Fdocs%2Fcontent%2Fimgs%2Fquickstart%2Ft2_diagram.png)
-
-The Forge's agent directory structure consists of three parts:
- **agent.py**: The heart of the Forge, where the agent's actual business logic is.  
- **prompts**: A directory of prompts used in agent.py's LLM logic.  
- **sdk**: The boilerplate code and the lower level APIs of the Forge.  
-
-Let’s break them down.
-
-#### Understanding the SDK
-
-The SDK is the main directory for the Forge. Here's a breakdown:
-
- **Core Components**: These are key parts of the Forge including Memory, Abilities, and Planning. They help the agent think and act.
- **Agent Protocol Routes**: In the routes sub-directory, you'll see the Agent Protocol. This is how the agent communicates.
- **Database (db.py)**: This is where the agent stores its data like experiences and learnings.
- **Prompting Engine (prompting.py)**: This tool uses templates to ask questions to the LLM for consistent interactions.
- **Agent Class**: This connects the agent's actions with the Agent Protocol routes.
-
-#### Configurations and Environment
-
-Configuration is key to ensuring our agent runs seamlessly. The .env.example file provides a template for setting up the necessary environment variables. Before diving into the Forge, developers need to copy this to a new .env file and adjust the settings:  
- **API Key**: `OPENAI_API_KEY` is where you plug in your OpenAI API key.  
- **Log Level**: With `LOG_LEVEL`, control the verbosity of the logs.  
- **Database Connection**: `DATABASE_STRING` determines where and how the agent's data gets stored.  
- **Port**: `PORT` specifies the listening port for the agent's server.  
- **Workspace**: `AGENT_WORKSPACE` points to the agent's working directory.  
-
-## To Recap
-
- **LLM-Based AI Agents**: 
-  - LLMs are machine learning models with vast knowledge. When equipped with tools to utilize their outputs, they evolve into LLM-based AI agents, enabling human-like decision-making.
-
- **Anatomy of an Agent**: 
-  - **Profile**: Sets an agent's personality and specialization.
-  - **Memory**: Encompasses the agent's long-term and short-term memory, storing both historical data and recent interactions.
-  - **Planning**: The strategy the agent employs to tackle problems.
-  - **Action**: The stage where the agent's decisions translate to tangible results.
-  
- **Agent Protocol**: 
-  - A uniform communication interface ensuring smooth interactions between agents and their developers.
-
- **AutoGPT Forge**: 
-  - A foundational template for creating agents. Components include:
-    - **agent.py**: Houses the agent's core logic.
-    - **prompts**: Directory of templates aiding LLM logic.
-    - **sdk**: Boilerplate code and essential APIs.
-
-Let's put this blueprint into practice in part 3!
--- a/classic/forge/tutorials/003_crafting_agent_logic.md
+++ b/classic/forge/tutorials/003_crafting_agent_logic.md
@@ -1,513 +0,0 @@
-# AutoGPT Forge: Crafting Intelligent Agent Logic
-
-![Header](..%2F..%2F..%2Fdocs/content/imgs/quickstart/t3_01.png)
-**By Craig Swift & [Ryan Brandt](https://github.com/paperMoose)**
-
-Hey there! Ready for part 3 of our AutoGPT Forge tutorial series? If you missed the earlier parts, catch up here:
-
- [Getting Started](001_getting_started.md)
- [Blueprint of an Agent](002_blueprint_of_an_agent.md)
-
-Now, let's get hands-on! We'll use an LLM to power our agent and complete a task. The challenge? Making the agent write "Washington" to a .txt file. We won't give it step-by-step instructions—just the task. Let's see our agent in action and watch it figure out the steps on its own!
-
-
-## Get Your Smart Agent Project Ready
-
-Make sure you've set up your project and created an agent as described in our initial guide. If you skipped that part, [click here](#) to get started. Once you're done, come back, and we'll move forward.
-
-In the image below, you'll see my "SmartAgent" and the agent.py file inside the 'forge' folder. That's where we'll be adding our LLM-based logic. If you're unsure about the project structure or agent functions from our last guide, don't worry. We'll cover the basics as we go!
-
-![SmartAgent](..%2F..%2F..%2Fdocs/content/imgs/quickstart/t3_02.png)
-
---
-
-## The Task Lifecycle
-
-The lifecycle of a task, from its creation to execution, is outlined in the agent protocol. In simple terms: a task is initiated, its steps are systematically executed, and it concludes once completed.
-
-Want your agent to perform an action? Start by dispatching a create_task request. This crucial step involves specifying the task details, much like how you'd send a prompt to ChatGPT, using the input field. If you're giving this a shot on your own, the UI is your best friend; it effortlessly handles all the API calls on your behalf.
-
-When the agent gets this, it runs the create_task function. The code `super().create_task(task_request)` takes care of protocol steps. It then logs the task's start. For this guide, you don't need to change this function.
-
-```python
-async def create_task(self, task_request: TaskRequestBody) -> Task:
-    """
-    The agent protocol, which is the core of the Forge, works by creating a task and then
-    executing steps for that task. This method is called when the agent is asked to create
-    a task.
-
-    We are hooking into function to add a custom log message. Though you can do anything you
-    want here.
-    """
-    task = await super().create_task(task_request)
-    LOG.info(
-        f"📦 Task created: {task.task_id} input: {task.input[:40]}{'...' if len(task.input) > 40 else ''}"
-    )
-    return task
-```
-
-After starting a task, the `execute_step` function runs until all steps are done. Here's a basic view of `execute_step`. I've left out the detailed comments for simplicity, but you'll find them in your project.
-
-```python
-async def execute_step(self, task_id: str, step_request: StepRequestBody) -> Step:
-    # An example that
-      step = await self.db.create_step(
-          task_id=task_id, input=step_request, is_last=True
-      )
-
-      self.workspace.write(task_id=task_id, path="output.txt", data=b"Washington D.C")
-
-      await self.db.create_artifact(
-          task_id=task_id,
-          step_id=step.step_id,
-          file_name="output.txt",
-          relative_path="",
-          agent_created=True,
-      )
-      
-      step.output = "Washington D.C"
-
-      LOG.info(f"\t✅ Final Step completed: {step.step_id}")
-
-      return step
-```
-
-Here's the breakdown of the 'write file' process in four steps:
-
-1. **Database Step Creation**: The first stage is all about creating a step within the database, an essential aspect of the agent protocol. You'll observe that while setting up this step, we've flagged it with `is_last=True`. This signals to the agent protocol that no more steps are pending. For the purpose of this guide, let's work under the assumption that our agent will only tackle single-step tasks. However, hang tight for future tutorials, where we'll level up and let the agent determine its completion point.
-
-2. **File Writing**: Next, we pen down "Washington D.C." using the workspace.write function.
-
-3. **Artifact Database Update**: After writing, we record the file in the agent's artifact database.
-
-4. **Step Output & Logging**: Finally, we set the step output to match the file content, log the executed step, and use the step object.
-
-With the 'write file' process clear, let's make our agent smarter and more autonomous. Ready to dive in?
-
---
-
-## Building the Foundations For Our Smart Agent
-
-First, we need to update the `execute_step()` function. Instead of a fixed solution, it should use the given request.
-
-To do this, we'll fetch the task details using the provided `task_id`:
-
-```python
-task = await self.db.get_task(task_id)
-```
-
-Next, remember to create a database record and mark it as a single-step task with `is_last=True`:
-
-```python
-step = await self.db.create_step(
-    task_id=task_id, input=step_request, is_last=True
-)
-```
-
-Your updated `execute_step` function will look like this:
-
-```python
-async def execute_step(self, task_id: str, step_request: StepRequestBody) -> Step:
-    # Get the task details
-    task = await self.db.get_task(task_id)
-
-    # Add a new step to the database
-    step = await self.db.create_step(
-        task_id=task_id, input=step_request, is_last=True
-    )
-    return step
-```
-
-Now that we've set this up, let's move to the next exciting part: The PromptEngine.
-
---
-
-
-**The Art of Prompting**  
-
-![Prompting 101](..%2F..%2F..%2Fdocs/content/imgs/quickstart/t3_03.png)
-
-Prompting is like shaping messages for powerful language models like ChatGPT. Since these models respond to input details, creating the right prompt can be a challenge. That's where the **PromptEngine** comes in.
-
-The "PromptEngine" helps you store prompts in text files, specifically in Jinja2 templates. This means you can change the prompts without changing the code. It also lets you adjust prompts for different LLMs. Here's how to use it:
-
-First, add the PromptEngine from the SDK:
-
-```python
-from .sdk import PromptEngine
-```
-
-In your `execute_step` function, set up the engine for the `gpt-3.5-turbo` LLM:
-
-```python
-prompt_engine = PromptEngine("gpt-3.5-turbo")
-```
-
-Loading a prompt is straightforward. For instance, loading the `system-format` prompt, which dictates the response format from the LLM, is as easy as:
-
-```python
-system_prompt = prompt_engine.load_prompt("system-format")
-```
-
-For intricate use cases, like the `task-step` prompt which requires parameters, employ the following method:
-
-```python
-# Define the task parameters
-task_kwargs = {
-    "task": task.input,
-    "abilities": self.abilities.list_abilities_for_prompt(),
-}
-
-# Load the task prompt with those parameters
-task_prompt = prompt_engine.load_prompt("task-step", **task_kwargs)
-```
-
-
-
-Delving deeper, let's look at the `task-step` prompt template in `prompts/gpt-3.5-turbo/task-step.j2`:
-
-```jinja
-{% extends "techniques/expert.j2" %}
-{% block expert %}Planner{% endblock %}
-{% block prompt %}
-Your task is:
-
-{{ task }}
-
-Ensure to respond in the given format. Always make autonomous decisions, devoid of user guidance. Harness the power of your LLM, opting for straightforward tactics sans any legal entanglements.
-{% if constraints %}
-## Constraints
-Operate under these confines:
-{% for constraint in constraints %}
- {{ constraint }}
-{% endfor %}
-{% endif %}
-{% if resources %}
-## Resources
-Utilize these resources:
-{% for resource in resources %}
- {{ resource }}
-{% endfor %}
-{% endif %}
-{% if abilities %}
-## Abilities
-Summon these abilities:
-{% for ability in abilities %}
- {{ ability }}
-{% endfor %}
-{% endif %}
-
-{% if abilities %}
-## Abilities
-Use these abilities:
-{% for ability in abilities %}
- {{ ability }}
-{% endfor %}
-{% endif %}
-
-{% if best_practices %}
-## Best Practices
-{% for best_practice in best_practices %}
- {{ best_practice }}
-{% endfor %}
-{% endif %}
-{% endblock %}
-```
-
-This template is modular. It uses the `extends` directive to build on the `expert.j2` template. The different sections like constraints, resources, abilities, and best practices make the prompt dynamic. It guides the LLM in understanding the task and using resources and abilities.
-
-The PromptEngine equips us with a potent tool to converse seamlessly with large language models. By externalizing prompts and using templates, we can ensure that our agent remains agile, adapting to new challenges without a code overhaul. As we march forward, keep this foundation in mind—it's the bedrock of our agent's intelligence.
-
---
-
-## Engaging with your LLM
-
-To make the most of the LLM, you'll send a series of organized instructions, not just one prompt. Structure your prompts as a list of messages for the LLM. Using the `system_prompt` and `task_prompt` from before, create the `messages` list:
-
-```python
-messages = [
-    {"role": "system", "content": system_prompt},
-    {"role": "user", "content": task_prompt}
-]
-```
-
-With the prompt set, send it to the LLM. This step involves foundational code, focusing on the `chat_completion_request`. This function gives the LLM your prompt, and then gets the LLM's output. The other code sets up our request and interprets the feedback:
-
-```python
-try:
-    # Set the parameters for the chat completion
-    chat_completion_kwargs = {
-        "messages": messages,
-        "model": "gpt-3.5-turbo",
-    }
-    # Get the LLM's response and interpret it
-    chat_response = await chat_completion_request(**chat_completion_kwargs)
-    answer = json.loads(chat_response.choices[0].message.content)
-
-    # Log the answer for reference
-    LOG.info(pprint.pformat(answer))
-
-except json.JSONDecodeError as e:
-    # Handle JSON decoding errors
-    LOG.error(f"Can't decode chat response: {chat_response}")
-except Exception as e:
-    # Handle other errors
-    LOG.error(f"Can't get chat response: {e}")
-```
-
-Extracting clear messages from LLM outputs can be complex. Our method is simple and works with GPT-3.5 and GPT-4. Future guides will show more ways to interpret LLM outputs. The goal? To go beyond JSON, as some LLMs work best with other response types. Stay tuned!
-
---
-
-
-## Using and Creating Abilities
-
-Abilities are the gears and levers that enable the agent to interact with tasks at hand. Let's unpack the mechanisms behind these abilities and how you can harness, and even extend, them.
-
-In the Forge folder, there's a `actions` folder containing `registry.py`, `finish.py`, and a `file_system` subfolder. You can also add your own abilities here. `registry.py` is the main file for abilities. It contains the `@action` decorator and the `ActionRegister` class. This class actively tracks abilities and outlines their function. The base Agent class includes a default Action register available via `self.abilities`. It looks like this:
-
-```python
-self.abilities = ActionRegister(self)
-```
-
-The `ActionRegister` has two key methods. `list_abilities_for_prompt` prepares abilities for prompts. `run_action` makes the ability work. An ability is a function with the `@action` decorator. It must have specific parameters, including the agent and `task_id`.
-
-```python
-@action(
-    name="write_file",
-    description="Write data to a file",
-    parameters=[
-        {
-            "name": "file_path",
-            "description": "Path to the file",
-            "type": "string",
-            "required": True,
-        },
-        {
-            "name": "data",
-            "description": "Data to write to the file",
-            "type": "bytes",
-            "required": True,
-        },
-    ],
-    output_type="None",
-)
-async def write_file(agent, task_id: str, file_path: str, data: bytes) -> None:
-    pass
-```
-
-The `@action` decorator defines the ability's details, like its identity (name), functionality (description), and operational parameters.
-
-## Example of a Custom Ability: Webpage Fetcher
-
-```python
-import requests
-
-@action(
-  name="fetch_webpage",
-  description="Retrieve the content of a webpage",
-  parameters=[
-      {
-          "name": "url",
-          "description": "Webpage URL",
-          "type": "string",
-          "required": True,
-      }
-  ],
-  output_type="string",
-)
-async def fetch_webpage(agent, task_id: str, url: str) -> str:
-  response = requests.get(url)
-  return response.text
-```
-
-This ability, `fetch_webpage`, accepts a URL as input and returns the HTML content of the webpage as a string. Custom abilities let you add more features to your agent. They can integrate other tools and libraries to enhance its functions. To make a custom ability, you need to understand the structure and add technical details. With abilities like "fetch_webpage", your agent can handle complex tasks efficiently.
-
-## Running an Ability
-
-Now that you understand abilities and how to create them, let's use them. The last piece is the `execute_step` function. Our goal is to understand the agent's response, find the ability, and use it. 
-
-First, we get the ability details from the agent's answer:
-
-```python
-# Extract the ability from the answer
-ability = answer["ability"]
-```
-
-With the ability details, we use it. We call the `run_ability` function:
-
-```python
-# Run the ability and get the output
-# We don't actually use the output in this example
-output = await self.abilities.run_action(
-    task_id, ability["name"], **ability["args"]
-)
-```
-
-Here, we’re invoking the specified ability. The task_id ensures continuity, ability['name'] pinpoints the exact function, and the arguments (ability["args"]) provide necessary context.
-
-Finally, we make the step's output show the agent's thinking:
-
-```python
-# Set the step output to the "speak" part of the answer
-step.output = answer["thoughts"]["speak"]
-
-# Return the completed step
-return step
-```
-
-And there you have it! Your first Smart Agent, sculpted with precision and purpose, stands ready to take on challenges. The stage is set. It’s showtime!
-
-Here is what your function should look like:
-
-```python
-async def execute_step(self, task_id: str, step_request: StepRequestBody) -> Step:
-    # Firstly we get the task this step is for so we can access the task input
-    task = await self.db.get_task(task_id)
-
-    # Create a new step in the database
-    step = await self.db.create_step(
-        task_id=task_id, input=step_request, is_last=True
-    )
-
-    # Log the message
-    LOG.info(f"\t✅ Final Step completed: {step.step_id} input: {step.input[:19]}")
-
-    # Initialize the PromptEngine with the "gpt-3.5-turbo" model
-    prompt_engine = PromptEngine("gpt-3.5-turbo")
-
-    # Load the system and task prompts
-    system_prompt = prompt_engine.load_prompt("system-format")
-
-    # Initialize the messages list with the system prompt
-    messages = [
-        {"role": "system", "content": system_prompt},
-    ]
-    # Define the task parameters
-    task_kwargs = {
-        "task": task.input,
-        "abilities": self.abilities.list_abilities_for_prompt(),
-    }
-
-    # Load the task prompt with the defined task parameters
-    task_prompt = prompt_engine.load_prompt("task-step", **task_kwargs)
-
-    # Append the task prompt to the messages list
-    messages.append({"role": "user", "content": task_prompt})
-
-    try:
-        # Define the parameters for the chat completion request
-        chat_completion_kwargs = {
-            "messages": messages,
-            "model": "gpt-3.5-turbo",
-        }
-        # Make the chat completion request and parse the response
-        chat_response = await chat_completion_request(**chat_completion_kwargs)
-        answer = json.loads(chat_response.choices[0].message.content)
-
-        # Log the answer for debugging purposes
-        LOG.info(pprint.pformat(answer))
-
-    except json.JSONDecodeError as e:
-        # Handle JSON decoding errors
-        LOG.error(f"Unable to decode chat response: {chat_response}")
-    except Exception as e:
-        # Handle other exceptions
-        LOG.error(f"Unable to generate chat response: {e}")
-
-    # Extract the ability from the answer
-    ability = answer["ability"]
-
-    # Run the ability and get the output
-    # We don't actually use the output in this example
-    output = await self.abilities.run_action(
-        task_id, ability["name"], **ability["args"]
-    )
-
-    # Set the step output to the "speak" part of the answer
-    step.output = answer["thoughts"]["speak"]
-
-    # Return the completed step
-    return step
-```
-
-## Interacting with your Agent
-> ⚠️ Heads up: The UI and benchmark are still in the oven, so they might be a tad glitchy.
-
-With the heavy lifting of crafting our Smart Agent behind us, it’s high time to see it in action. Kick things off by firing up the agent with this command:
-```bash
-./run agent start SmartAgent.
-```
-
-Once your digital playground is all set, your terminal should light up with:
-```bash
-
-
-       d8888          888             .d8888b.  8888888b. 88888888888 
-      d88888          888            d88P  Y88b 888   Y88b    888     
-     d88P888          888            888    888 888    888    888     
-    d88P 888 888  888 888888 .d88b.  888        888   d88P    888     
-   d88P  888 888  888 888   d88""88b 888  88888 8888888P"     888     
-  d88P   888 888  888 888   888  888 888    888 888           888     
- d8888888888 Y88b 888 Y88b. Y88..88P Y88b  d88P 888           888     
-d88P     888  "Y88888  "Y888 "Y88P"   "Y8888P88 888           888     
-                                                                      
-                                                                      
-                                                                      
-                8888888888                                            
-                888                                                   
-                888                                                   
-                8888888  .d88b.  888d888 .d88b.   .d88b.              
-                888     d88""88b 888P"  d88P"88b d8P  Y8b             
-                888     888  888 888    888  888 88888888             
-                888     Y88..88P 888    Y88b 888 Y8b.                 
-                888      "Y88P"  888     "Y88888  "Y8888              
-                                             888                      
-                                        Y8b d88P                      
-                                         "Y88P"                v0.2.0
-
-
-[2023-09-27 15:39:07,832] [forge.sdk.agent] [INFO]      📝  Agent server starting on http://localhost:8000
-
-```
-1. **Get Started**
-   - Click the link to access the AutoGPT Agent UI.
-
-2. **Login**
-   - Log in using your Gmail or Github credentials.
-
-3. **Navigate to Benchmarking**
-   - Look to the left, and you'll spot a trophy icon. Click it to enter the benchmarking arena.
-  
-![Benchmarking page of the AutoGPT UI](..%2F..%2F..%2Fdocs/content/imgs/quickstart/t3_04.png)
-
-4. **Select the 'WriteFile' Test**
-   - Choose the 'WriteFile' test from the available options.
-
-5. **Initiate the Test Suite**
-   - Hit 'Initiate test suite' to start the benchmarking process.
-
-6. **Monitor in Real-Time**
-   - Keep your eyes on the right panel as it displays real-time output.
-
-7. **Check the Console**
-   - For additional information, you can also monitor your console for progress updates and messages.
-```bash
-📝  📦 Task created: 70518b75-0104-49b0-923e-f607719d042b input: Write the word 'Washington' to a .txt fi...
-📝      ✅ Final Step completed: a736c45f-65a5-4c44-a697-f1d6dcd94d5c input: y
-```
-If you see this, you've done it!
-
-8. **Troubleshooting**
-   - If you encounter any issues or see cryptic error messages, don't worry. Just hit the retry button. Remember, LLMs are powerful but may occasionally need some guidance.
-
-## Wrap Up
- Stay tuned for our next tutorial, where we'll enhance the agent's capabilities by adding memory!
-
-## Keep Exploring
- Keep experimenting and pushing the boundaries of AI. Happy coding! 🚀
-
-## Wrap Up
-In our next tutorial, we’ll further refine this process, enhancing the agent’s capabilities, through the addition of memory!
-
-Until then, keep experimenting and pushing the boundaries of AI. Happy coding! 🚀
--- a/classic/forge/tutorials/004_memories.md
+++ b/classic/forge/tutorials/004_memories.md
@@ -1,75 +0,0 @@
-# Memory Integration: Enabling Your Agent to Remember and Learn
-
-## Introduction
- Importance of Memory Integration in AI Agents
- Overview of Memory Mechanisms in AutoGPT
-
-## Section 1: Understanding Memory Integration
- Concept of Memory in AI Agents
- Types of Memory: Short-term vs. Long-term
-
-## Section 2: Implementing Memory in Your Agent
- Setting up Memory Structures in the Forge Environment
- Utilizing Agent Protocol for Memory Integration
-
-## Section 3: Developing Learning Mechanisms
- Creating Learning Algorithms for Your Agent
- Implementing Learning Mechanisms using Task and Artifact Schemas
-
-## Section 4: Testing and Optimizing Memory Integration
- Employing AGBenchmark for Memory Testing
- Optimizing Memory for Enhanced Performance and Efficiency
-
-## Section 5: Best Practices in Memory Integration
- Tips and Strategies for Effective Memory Integration
- Avoiding Common Pitfalls in Memory Development
-
-## Conclusion
- Recap of the Tutorial
- Future Directions in Memory Integration
-
-## Additional Resources
-
-From **The Rise and Potential of Large Language Model Based Agents: A Survey** *Zhiheng Xi (Fudan University) et al. arXiv.* [[paper](https://arxiv.org/abs/2305.14497)] [[code](https://github.com/woooodyy/llm-agent-paper-list)]
-
-##### Memory capability
-
-###### Raising the length limit of Transformers
-
- [2023/05] **Randomized Positional Encodings Boost Length Generalization of Transformers.** *Anian Ruoss (DeepMind) et al. arXiv.* [[paper](https://arxiv.org/abs/2305.16843)] [[code](https://github.com/google-deepmind/randomized_positional_encodings)]
- [2023-03] **CoLT5: Faster Long-Range Transformers with Conditional Computation.** *Joshua Ainslie (Google Research) et al. arXiv.* [[paper](https://arxiv.org/abs/2303.09752)]
- [2022/03] **Efficient Classification of Long Documents Using Transformers.** *Hyunji Hayley Park (Illinois University) et al. arXiv.* [[paper](https://arxiv.org/abs/2203.11258)] [[code](https://github.com/amazon-science/efficient-longdoc-classification)]
- [2021/12] **LongT5: Efficient Text-To-Text Transformer for Long Sequences.** *Mandy Guo (Google Research) et al. arXiv.* [[paper](https://arxiv.org/abs/2112.07916)] [[code](https://github.com/google-research/longt5)]
- [2019/10] **BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension.** *Michael Lewis(Facebook AI) et al. arXiv.* [[paper](https://arxiv.org/abs/1910.13461)] [[code](https://github.com/huggingface/transformers/tree/main/src/transformers/models/bart)]
-
-###### Summarizing memory
-
- [2023/08] **ExpeL: LLM Agents Are Experiential Learners.** *Andrew Zhao (Tsinghua University) et al. arXiv.* [[paper](https://arxiv.org/abs/2308.10144)] [[code]([https://github.com/thunlp/ChatEval](https://github.com/Andrewzh112/ExpeL))]
- [2023/08] **ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate.** *Chi-Min Chan (Tsinghua University) et al. arXiv.* [[paper](https://arxiv.org/abs/2308.07201)] [[code](https://github.com/thunlp/ChatEval)]
- [2023/05] **MemoryBank: Enhancing Large Language Models with Long-Term Memory.** *Wanjun Zhong (Harbin Institute of Technology) et al. arXiv.* [[paper](https://arxiv.org/abs/2305.10250)] [[code](https://github.com/zhongwanjun/memorybank-siliconfriend)]
- [2023/04] **Generative Agents: Interactive Simulacra of Human Behavior.** *Joon Sung Park (Stanford University) et al. arXiv.* [[paper](https://arxiv.org/abs/2304.03442)] [[code](https://github.com/joonspk-research/generative_agents)]
- [2023/04] **Unleashing Infinite-Length Input Capacity for Large-scale Language Models with Self-Controlled Memory System.** *Xinnian Liang(Beihang University) et al. arXiv.* [[paper](https://arxiv.org/abs/2304.13343)] [[code](https://github.com/wbbeyourself/scm4llms)]
- [2023/03] **Reflexion: Language Agents with Verbal Reinforcement Learning.** *Noah Shinn (Northeastern University) et al. arXiv.* [[paper](https://arxiv.org/abs/2303.11366)] [[code](https://github.com/noahshinn024/reflexion)]
- [2023/05] **RecurrentGPT: Interactive Generation of (Arbitrarily) Long Text.** Wangchunshu Zhou (AIWaves) et al. arXiv.* [[paper](https://arxiv.org/pdf/2305.13304.pdf)] [[code](https://github.com/aiwaves-cn/RecurrentGPT)]  
-
-
-###### Compressing memories with vectors or data structures
-
- [2023/07] **Communicative Agents for Software Development.** *Chen Qian (Tsinghua University) et al. arXiv.* [[paper](https://arxiv.org/abs/2307.07924)] [[code](https://github.com/openbmb/chatdev)]
- [2023/06] **ChatDB: Augmenting LLMs with Databases as Their Symbolic Memory.** *Chenxu Hu(Tsinghua University) et al. arXiv.* [[paper](https://arxiv.org/abs/2306.03901)] [[code](https://github.com/huchenxucs/ChatDB)]
- [2023/05] **Ghost in the Minecraft: Generally Capable Agents for Open-World Environments via Large Language Models with Text-based Knowledge and Memory.** *Xizhou Zhu (Tsinghua University) et al. arXiv.* [[paper](https://arxiv.org/abs/2305.17144)] [[code](https://github.com/OpenGVLab/GITM)]
- [2023/05] **RET-LLM: Towards a General Read-Write Memory for Large Language Models.** *Ali Modarressi (LMU Munich) et al. arXiv.* [[paper](https://arxiv.org/abs/2305.14322)] [[code](https://github.com/tloen/alpaca-lora)]
- [2023/05] **RecurrentGPT: Interactive Generation of (Arbitrarily) Long Text.** Wangchunshu Zhou (AIWaves) et al. arXiv.* [[paper](https://arxiv.org/pdf/2305.13304.pdf)] [[code](https://github.com/aiwaves-cn/RecurrentGPT)]
-
-##### Memory retrieval
-
- [2023/08] **Memory Sandbox: Transparent and Interactive Memory Management for Conversational Agents.** *Ziheng Huang(University of California—San Diego) et al. arXiv.* [[paper](https://arxiv.org/abs/2308.01542)]
- [2023/08] **AgentSims: An Open-Source Sandbox for Large Language Model Evaluation.** *Jiaju Lin (PTA Studio) et al. arXiv.* [[paper](https://arxiv.org/abs/2308.04026)] [[project page](https://www.agentsims.com/)] [[code](https://github.com/py499372727/AgentSims/)] 
- [2023/06] **ChatDB: Augmenting LLMs with Databases as Their Symbolic Memory.** *Chenxu Hu(Tsinghua University) et al. arXiv.* [[paper](https://arxiv.org/abs/2306.03901)] [[code](https://github.com/huchenxucs/ChatDB)]
- [2023/05] **MemoryBank: Enhancing Large Language Models with Long-Term Memory.** *Wanjun Zhong (Harbin Institute of Technology) et al. arXiv.* [[paper](https://arxiv.org/abs/2305.10250)] [[code](https://github.com/zhongwanjun/memorybank-siliconfriend)]
- [2023/04] **Generative Agents: Interactive Simulacra of Human Behavior.** *Joon Sung Park (Stanford) et al. arXiv.* [[paper](https://arxiv.org/abs/2304.03442)] [[code](https://github.com/joonspk-research/generative_agents)]
- [2023/05] **RecurrentGPT: Interactive Generation of (Arbitrarily) Long Text.** Wangchunshu Zhou (AIWaves) et al. arXiv.* [[paper](https://arxiv.org/pdf/2305.13304.pdf)] [[code](https://github.com/aiwaves-cn/RecurrentGPT)]
-
-## Appendix
- Examples of Memory Integration Implementations
- Glossary of Memory-Related Terms
--- a/classic/frontend/.gitignore
+++ b/classic/frontend/.gitignore
@@ -1,45 +0,0 @@
-# Miscellaneous
-*.class
-*.log
-*.pyc
-*.swp
-.DS_Store
-.atom/
-.buildlog/
-.history
-.svn/
-migrate_working_dir/
-
-# IntelliJ related
-*.iml
-*.ipr
-*.iws
-.idea/
-
-# The .vscode folder contains launch configuration and tasks you configure in
-# VS Code which you may wish to be included in version control, so this line
-# is commented out by default.
-#.vscode/
-
-# Flutter/Dart/Pub related
-**/doc/api/
-**/ios/Flutter/.last_build_id
-.dart_tool/
-.flutter-plugins
-.flutter-plugins-dependencies
-.packages
-.pub-cache/
-.pub/
-/build/*
-!/build/web/
-
-# Symbolication related
-app.*.symbols
-
-# Obfuscation related
-app.*.map.json
-
-# Android Studio will place build artifacts here
-/android/app/debug
-/android/app/profile
-/android/app/release
--- a/classic/frontend/.metadata
+++ b/classic/frontend/.metadata
@@ -1,45 +0,0 @@
-# This file tracks properties of this Flutter project.
-# Used by Flutter tool to assess capabilities and perform upgrades etc.
-#
-# This file should be version controlled.
-
-version:
-  revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-  channel: beta
-
-project_type: app
-
-# Tracks metadata for the flutter migrate command
-migration:
-  platforms:
-    - platform: root
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-    - platform: android
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-    - platform: ios
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-    - platform: linux
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-    - platform: macos
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-    - platform: web
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-    - platform: windows
-      create_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-      base_revision: d11aff97d2df15a076d285f6ad18da75c0d75ddd
-
-  # User provided section
-
-  # List of Local paths (relative to this file) that should be
-  # ignored by the migrate tool.
-  #
-  # Files that are not part of the templates will be ignored by default.
-  unmanaged_files:
-    - 'lib/main.dart'
-    - 'ios/Runner.xcodeproj/project.pbxproj'
--- a/classic/frontend/README.md
+++ b/classic/frontend/README.md
@@ -1,64 +0,0 @@
-# AutoGPT Flutter Client
-
-## Description
-
-This repository contains the Flutter client for the AutoGPT project. The application facilitates users in discussing various tasks with a single agent. The app is built to be cross-platform and runs on Web, Android, iOS, Windows, and Mac.
-
-## Features
-
- List and manage multiple tasks.
- Engage in chat conversations related to selected tasks.
-
-## Design document
-
-The design document for this project provides a detailed outline of the architecture, components, and other important aspects of this application. Please note that this is a living, growing document and it is subject to change as the project evolves.
-
-You can access the design document [here](https://docs.google.com/document/d/1S-o2np1gq5JwFq40wPHDUVLi-mylz4WMvCB8psOUjc8/).
-
-## Requirements
-
- Flutter 3.x
- Dart 3.x
-
-Flutter comes with Dart, to install Flutter, follow the instructions here: https://docs.flutter.dev/get-started/install
-
-## Installation
-
-1. **Clone the repo:**
-```
-git clone https://github.com/Significant-Gravitas/AutoGPT.git
-```
-
-2. **Navigate to the project directory:**
-```
-cd AutoGPT/frontend
-```
-
-3. **Get Flutter packages:**
-```
-flutter pub get
-```
-
-4. **Run the app:**
-```
-#For chromium users on linux:
-#export CHROME_EXECUTABLE=/usr/bin/chromium
-flutter run -d chrome --web-port 5000
-```
-
-## Project Structure
-
- `lib/`: Contains the main source code for the application.
- `models/`: Data models that define the structure of the objects used in the app.
- `views/`: The UI components of the application.
- `viewmodels/`: The business logic and data handling for the views.
- `services/`: Contains the service classes that handle communication with backend APIs and other external data sources. These services are used to fetch and update data that the app uses, and they are consumed by the ViewModels.
- `test/`: Contains the test files for unit and widget tests.
-
-## Responsive Design
-
-The app features a responsive design that adapts to different screen sizes and orientations. On larger screens (Web, Windows, Mac), views are displayed side by side horizontally. On smaller screens (Android, iOS), views are displayed in a tab bar controller layout.
-
-## License
-
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
--- a/classic/frontend/analysis_options.yaml
+++ b/classic/frontend/analysis_options.yaml
@@ -1,29 +0,0 @@
-# This file configures the analyzer, which statically analyzes Dart code to
-# check for errors, warnings, and lints.
-#
-# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
-# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
-# invoked from the command line by running `flutter analyze`.
-
-# The following line activates a set of recommended lints for Flutter apps,
-# packages, and plugins designed to encourage good coding practices.
-include: package:flutter_lints/flutter.yaml
-
-linter:
-  # The lint rules applied to this project can be customized in the
-  # section below to disable rules from the `package:flutter_lints/flutter.yaml`
-  # included above or to enable additional rules. A list of all available lints
-  # and their documentation is published at
-  # https://dart-lang.github.io/linter/lints/index.html.
-  #
-  # Instead of disabling a lint rule for the entire project in the
-  # section below, it can also be suppressed for a single line of code
-  # or a specific dart file by using the `// ignore: name_of_lint` and
-  # `// ignore_for_file: name_of_lint` syntax on the line or in the file
-  # producing the lint.
-  rules:
-    # avoid_print: false  # Uncomment to disable the `avoid_print` rule
-    # prefer_single_quotes: true  # Uncomment to enable the `prefer_single_quotes` rule
-
-# Additional information about this file can be found at
-# https://dart.dev/guides/language/analysis-options
--- a/classic/frontend/android/.gitignore
+++ b/classic/frontend/android/.gitignore
@@ -1,13 +0,0 @@
-gradle-wrapper.jar
-/.gradle
-/captures/
-/gradlew
-/gradlew.bat
-/local.properties
-GeneratedPluginRegistrant.java
-
-# Remember to never publicly share your keystore.
-# See https://flutter.dev/docs/deployment/android#reference-the-keystore-from-the-app
-key.properties
-**/*.keystore
-**/*.jks
--- a/classic/frontend/android/app/build.gradle
+++ b/classic/frontend/android/app/build.gradle
@@ -1,72 +0,0 @@
-def localProperties = new Properties()
-def localPropertiesFile = rootProject.file('local.properties')
-if (localPropertiesFile.exists()) {
-    localPropertiesFile.withReader('UTF-8') { reader ->
-        localProperties.load(reader)
-    }
-}
-
-def flutterRoot = localProperties.getProperty('flutter.sdk')
-if (flutterRoot == null) {
-    throw new GradleException("Flutter SDK not found. Define location with flutter.sdk in the local.properties file.")
-}
-
-def flutterVersionCode = localProperties.getProperty('flutter.versionCode')
-if (flutterVersionCode == null) {
-    flutterVersionCode = '1'
-}
-
-def flutterVersionName = localProperties.getProperty('flutter.versionName')
-if (flutterVersionName == null) {
-    flutterVersionName = '1.0'
-}
-
-apply plugin: 'com.android.application'
-apply plugin: 'kotlin-android'
-apply from: "$flutterRoot/packages/flutter_tools/gradle/flutter.gradle"
-
-android {
-    namespace "com.example.auto_gpt_flutter_client"
-    compileSdkVersion flutter.compileSdkVersion
-    ndkVersion flutter.ndkVersion
-
-    compileOptions {
-        sourceCompatibility JavaVersion.VERSION_1_8
-        targetCompatibility JavaVersion.VERSION_1_8
-    }
-
-    kotlinOptions {
-        jvmTarget = '1.8'
-    }
-
-    sourceSets {
-        main.java.srcDirs += 'src/main/kotlin'
-    }
-
-    defaultConfig {
-        // TODO: Specify your own unique Application ID (https://developer.android.com/studio/build/application-id.html).
-        applicationId "com.example.auto_gpt_flutter_client"
-        // You can update the following values to match your application needs.
-        // For more information, see: https://docs.flutter.dev/deployment/android#reviewing-the-gradle-build-configuration.
-        minSdkVersion flutter.minSdkVersion
-        targetSdkVersion flutter.targetSdkVersion
-        versionCode flutterVersionCode.toInteger()
-        versionName flutterVersionName
-    }
-
-    buildTypes {
-        release {
-            // TODO: Add your own signing config for the release build.
-            // Signing with the debug keys for now, so `flutter run --release` works.
-            signingConfig signingConfigs.debug
-        }
-    }
-}
-
-flutter {
-    source '../..'
-}
-
-dependencies {
-    implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version"
-}
--- a/classic/frontend/android/app/google-services.json
+++ b/classic/frontend/android/app/google-services.json
@@ -1,39 +0,0 @@
-{
-  "project_info": {
-    "project_number": "387936576242",
-    "project_id": "prod-auto-gpt",
-    "storage_bucket": "prod-auto-gpt.appspot.com"
-  },
-  "client": [
-    {
-      "client_info": {
-        "mobilesdk_app_id": "1:387936576242:android:dad0614943c3242ad7a66b",
-        "android_client_info": {
-          "package_name": "com.example.auto_gpt_flutter_client"
-        }
-      },
-      "oauth_client": [
-        {
-          "client_id": "387936576242-iejdacrjljds7hf99q0p6eqna8rju3sb.apps.googleusercontent.com",
-          "client_type": 3
-        }
-      ],
-      "api_key": [
-        {
-          "current_key": "AIzaSyBvDJ9m38ZgRGquV3ZoTaldQTFCxFHdkiI"
-        }
-      ],
-      "services": {
-        "appinvite_service": {
-          "other_platform_oauth_client": [
-            {
-              "client_id": "387936576242-9a68qea5415i71e4mk545pdee92k9kfo.apps.googleusercontent.com",
-              "client_type": 3
-            }
-          ]
-        }
-      }
-    }
-  ],
-  "configuration_version": "1"
-}
--- a/classic/frontend/android/app/src/debug/AndroidManifest.xml
+++ b/classic/frontend/android/app/src/debug/AndroidManifest.xml
@@ -1,7 +0,0 @@
-<manifest xmlns:android="http://schemas.android.com/apk/res/android">
-    <!-- The INTERNET permission is required for development. Specifically,
-         the Flutter tool needs it to communicate with the running application
-         to allow setting breakpoints, to provide hot reload, etc.
-    -->
-    <uses-permission android:name="android.permission.INTERNET"/>
-</manifest>
--- a/classic/frontend/android/app/src/main/AndroidManifest.xml
+++ b/classic/frontend/android/app/src/main/AndroidManifest.xml
@@ -1,33 +0,0 @@
-<manifest xmlns:android="http://schemas.android.com/apk/res/android">
-    <application
-        android:label="auto_gpt_flutter_client"
-        android:name="${applicationName}"
-        android:icon="@mipmap/ic_launcher">
-        <activity
-            android:name=".MainActivity"
-            android:exported="true"
-            android:launchMode="singleTop"
-            android:theme="@style/LaunchTheme"
-            android:configChanges="orientation|keyboardHidden|keyboard|screenSize|smallestScreenSize|locale|layoutDirection|fontScale|screenLayout|density|uiMode"
-            android:hardwareAccelerated="true"
-            android:windowSoftInputMode="adjustResize">
-            <!-- Specifies an Android theme to apply to this Activity as soon as
-                 the Android process has started. This theme is visible to the user
-                 while the Flutter UI initializes. After that, this theme continues
-                 to determine the Window background behind the Flutter UI. -->
-            <meta-data
-              android:name="io.flutter.embedding.android.NormalTheme"
-              android:resource="@style/NormalTheme"
-              />
-            <intent-filter>
-                <action android:name="android.intent.action.MAIN"/>
-                <category android:name="android.intent.category.LAUNCHER"/>
-            </intent-filter>
-        </activity>
-        <!-- Don't delete the meta-data below.
-             This is used by the Flutter tool to generate GeneratedPluginRegistrant.java -->
-        <meta-data
-            android:name="flutterEmbedding"
-            android:value="2" />
-    </application>
-</manifest>
--- a/classic/frontend/android/app/src/main/kotlin/com/example/auto_gpt_flutter_client/MainActivity.kt
+++ b/classic/frontend/android/app/src/main/kotlin/com/example/auto_gpt_flutter_client/MainActivity.kt
@@ -1,6 +0,0 @@
-package com.example.auto_gpt_flutter_client
-
-import io.flutter.embedding.android.FlutterActivity
-
-class MainActivity: FlutterActivity() {
-}
--- a/classic/frontend/android/app/src/main/res/drawable-v21/launch_background.xml
+++ b/classic/frontend/android/app/src/main/res/drawable-v21/launch_background.xml
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Modify this file to customize your launch splash screen -->
-<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
-    <item android:drawable="?android:colorBackground" />
-
-    <!-- You can insert your own image assets here -->
-    <!-- <item>
-        <bitmap
-            android:gravity="center"
-            android:src="@mipmap/launch_image" />
-    </item> -->
-</layer-list>
--- a/classic/frontend/android/app/src/main/res/drawable/launch_background.xml
+++ b/classic/frontend/android/app/src/main/res/drawable/launch_background.xml
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!-- Modify this file to customize your launch splash screen -->
-<layer-list xmlns:android="http://schemas.android.com/apk/res/android">
-    <item android:drawable="@android:color/white" />
-
-    <!-- You can insert your own image assets here -->
-    <!-- <item>
-        <bitmap
-            android:gravity="center"
-            android:src="@mipmap/launch_image" />
-    </item> -->
-</layer-list>
--- a/classic/frontend/android/app/src/main/res/mipmap-hdpi/ic_launcher.png
+++ b/classic/frontend/android/app/src/main/res/mipmap-hdpi/ic_launcher.png
--- a/classic/frontend/android/app/src/main/res/mipmap-mdpi/ic_launcher.png
+++ b/classic/frontend/android/app/src/main/res/mipmap-mdpi/ic_launcher.png
--- a/classic/frontend/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png
+++ b/classic/frontend/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png
--- a/classic/frontend/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
+++ b/classic/frontend/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
--- a/classic/frontend/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
+++ b/classic/frontend/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
--- a/classic/frontend/android/app/src/main/res/values-night/styles.xml
+++ b/classic/frontend/android/app/src/main/res/values-night/styles.xml
@@ -1,18 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<resources>
-    <!-- Theme applied to the Android Window while the process is starting when the OS's Dark Mode setting is on -->
-    <style name="LaunchTheme" parent="@android:style/Theme.Black.NoTitleBar">
-        <!-- Show a splash screen on the activity. Automatically removed when
-             the Flutter engine draws its first frame -->
-        <item name="android:windowBackground">@drawable/launch_background</item>
-    </style>
-    <!-- Theme applied to the Android Window as soon as the process has started.
-         This theme determines the color of the Android Window while your
-         Flutter UI initializes, as well as behind your Flutter UI while its
-         running.
-
-         This Theme is only used starting with V2 of Flutter's Android embedding. -->
-    <style name="NormalTheme" parent="@android:style/Theme.Black.NoTitleBar">
-        <item name="android:windowBackground">?android:colorBackground</item>
-    </style>
-</resources>
--- a/classic/frontend/android/app/src/main/res/values/styles.xml
+++ b/classic/frontend/android/app/src/main/res/values/styles.xml
@@ -1,18 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<resources>
-    <!-- Theme applied to the Android Window while the process is starting when the OS's Dark Mode setting is off -->
-    <style name="LaunchTheme" parent="@android:style/Theme.Light.NoTitleBar">
-        <!-- Show a splash screen on the activity. Automatically removed when
-             the Flutter engine draws its first frame -->
-        <item name="android:windowBackground">@drawable/launch_background</item>
-    </style>
-    <!-- Theme applied to the Android Window as soon as the process has started.
-         This theme determines the color of the Android Window while your
-         Flutter UI initializes, as well as behind your Flutter UI while its
-         running.
-
-         This Theme is only used starting with V2 of Flutter's Android embedding. -->
-    <style name="NormalTheme" parent="@android:style/Theme.Light.NoTitleBar">
-        <item name="android:windowBackground">?android:colorBackground</item>
-    </style>
-</resources>
--- a/classic/frontend/android/app/src/profile/AndroidManifest.xml
+++ b/classic/frontend/android/app/src/profile/AndroidManifest.xml
@@ -1,7 +0,0 @@
-<manifest xmlns:android="http://schemas.android.com/apk/res/android">
-    <!-- The INTERNET permission is required for development. Specifically,
-         the Flutter tool needs it to communicate with the running application
-         to allow setting breakpoints, to provide hot reload, etc.
-    -->
-    <uses-permission android:name="android.permission.INTERNET"/>
-</manifest>
--- a/classic/frontend/android/build.gradle
+++ b/classic/frontend/android/build.gradle
@@ -1,31 +0,0 @@
-buildscript {
-    ext.kotlin_version = '1.7.10'
-    repositories {
-        google()
-        mavenCentral()
-    }
-
-    dependencies {
-        classpath 'com.android.tools.build:gradle:7.3.0'
-        classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
-    }
-}
-
-allprojects {
-    repositories {
-        google()
-        mavenCentral()
-    }
-}
-
-rootProject.buildDir = '../build'
-subprojects {
-    project.buildDir = "${rootProject.buildDir}/${project.name}"
-}
-subprojects {
-    project.evaluationDependsOn(':app')
-}
-
-tasks.register("clean", Delete) {
-    delete rootProject.buildDir
-}
--- a/classic/frontend/android/gradle.properties
+++ b/classic/frontend/android/gradle.properties
@@ -1,3 +0,0 @@
-org.gradle.jvmargs=-Xmx1536M
-android.useAndroidX=true
-android.enableJetifier=true
--- a/classic/frontend/android/gradle/wrapper/gradle-wrapper.properties
+++ b/classic/frontend/android/gradle/wrapper/gradle-wrapper.properties
@@ -1,5 +0,0 @@
-distributionBase=GRADLE_USER_HOME
-distributionPath=wrapper/dists
-zipStoreBase=GRADLE_USER_HOME
-zipStorePath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-all.zip
--- a/classic/frontend/android/settings.gradle
+++ b/classic/frontend/android/settings.gradle
@@ -1,11 +0,0 @@
-include ':app'
-
-def localPropertiesFile = new File(rootProject.projectDir, "local.properties")
-def properties = new Properties()
-
-assert localPropertiesFile.exists()
-localPropertiesFile.withReader("UTF-8") { reader -> properties.load(reader) }
-
-def flutterSdkPath = properties.getProperty("flutter.sdk")
-assert flutterSdkPath != null, "flutter.sdk not set in local.properties"
-apply from: "$flutterSdkPath/packages/flutter_tools/gradle/app_plugin_loader.gradle"
--- a/classic/frontend/assets/coding_tree_structure.json
+++ b/classic/frontend/assets/coding_tree_structure.json
--- a/classic/frontend/assets/data_tree_structure.json
+++ b/classic/frontend/assets/data_tree_structure.json
@@ -1,360 +0,0 @@
-{
-    "edges": [
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestAnswerQuestionSmallCsv::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestAnswerQuestionSmallCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestAnswerQuestionCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestAnswerQuestionCsv::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestAnswerQuestionSmallCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestAnswerQuestionSmallCsv::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestAnswerQuestionCsv::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestAnswerQuestionCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestAnswerQuestionCombineCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestAnswerQuestionCombineCsv::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestAnswerQuestionCombineCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestAnswerQuestionCombineCsv::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestLabelCsv::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestLabelCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestLabelCsv::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
-        }
-    ],
-    "nodes": [
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "general",
-                    "coding",
-                    "scrape_synthesize",
-                    "data"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestWriteFile"
-                ],
-                "eval_id": "f219f3d3-a41b-45a9-a3d0-389832086ee8",
-                "ground": {
-                    "answer": "The content of output.txt should be 'Hello World!'",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "Hello World!"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can read a file.",
-                    "difficulty": "interface",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestReadFile",
-                "task": "Read the file called file_to_read.txt and write its content to a file called output.txt"
-            },
-            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "label": "ReadFile",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "general",
-                    "coding",
-                    "scrape_synthesize",
-                    "data"
-                ],
-                "cutoff": 60,
-                "dependencies": [],
-                "eval_id": "021c695a-6cc4-46c2-b93a-f3a9b0f4d123",
-                "ground": {
-                    "answer": "The word 'Washington', printed to a .txt file named anything",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        ".txt"
-                    ],
-                    "should_contain": [
-                        "Washington"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can write a file",
-                    "difficulty": "interface",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestWriteFile",
-                "task": "Write the word 'Washington' to a .txt file"
-            },
-            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
-            "label": "WriteFile",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "data"
-                ],
-                "cutoff": 90,
-                "dependencies": [
-                    "TestAnswerQuestionSmallCsv"
-                ],
-                "eval_id": "bb6e0a4b-7faf-4aa6-a524-548cddbc2732",
-                "ground": {
-                    "answer": "The correct amount spent on utilities.",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "1861"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can answer a question from a csv",
-                    "difficulty": "intermediate",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestAnswerQuestionCsv",
-                "task": "How much was spent on utilities in total ? Write the answer in an output.txt file."
-            },
-            "id": "agbenchmark/generate_test.py::TestAnswerQuestionCsv::test_method[challenge_data0]",
-            "label": "AnswerQuestionCsv",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "data",
-                    "general"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestReadFile"
-                ],
-                "eval_id": "9df3f07a-5047-488f-b788-1e1f57eba970",
-                "ground": {
-                    "answer": "The correct amount spent on utilities.",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "84"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can answer a question from a small csv",
-                    "difficulty": "intermediate",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestAnswerQuestionSmallCsv",
-                "task": "How much was spent on utilities in total ? Write the answer in an output.txt file."
-            },
-            "id": "agbenchmark/generate_test.py::TestAnswerQuestionSmallCsv::test_method[challenge_data0]",
-            "label": "AnswerQuestionSmallCsv",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "data",
-                    "general"
-                ],
-                "cutoff": 120,
-                "dependencies": [
-                    "TestAnswerQuestionCsv",
-                    "TestCombineCsv"
-                ],
-                "eval_id": "b1bb61cd-3d09-4a69-bb2a-9dbb3c477589",
-                "ground": {
-                    "answer": "The correct amount spent on utilities.",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "1861"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can answer a question from a csv",
-                    "difficulty": "intermediate",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestAnswerQuestionCombineCsv",
-                "task": "How much was spent on utilities in total ? Write the answer in an output.txt file."
-            },
-            "id": "agbenchmark/generate_test.py::TestAnswerQuestionCombineCsv::test_method[challenge_data0]",
-            "label": "AnswerQuestionCombineCsv",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "data",
-                    "general"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestLabelCsv"
-                ],
-                "eval_id": "52467beb-b951-4356-9776-9a0ae46bb33b",
-                "ground": {
-                    "answer": "The csv data is combined",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.csv"
-                    ],
-                    "should_contain": [
-                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can combine data from a csv",
-                    "difficulty": "intermediate",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestCombineCsv",
-                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv"
-            },
-            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
-            "label": "CombineCsv",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "data"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestSortCsv"
-                ],
-                "eval_id": "6e2bf1f0-6842-4704-8ed1-b17c2065bbac",
-                "ground": {
-                    "answer": "The csv labelled",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.csv"
-                    ],
-                    "should_contain": [
-                        "Item,Color\nBanana,yellow\nLeaf,green\nSky,blue\nSunflower,yellow\nGrass,green\nJeans,blue\nLemon,yellow\nTree,green\nOcean,blue\nDaisy,yellow\nFern,green"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can label data in a csv",
-                    "difficulty": "basic",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestLabelCsv",
-                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
-            },
-            "id": "agbenchmark/generate_test.py::TestLabelCsv::test_method[challenge_data0]",
-            "label": "LabelCsv",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "data",
-                    "general"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestReadFile"
-                ],
-                "eval_id": "d59ec964-6f67-4b3d-a4de-c4436fc76f95",
-                "ground": {
-                    "answer": "The csv sorted by date",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.csv"
-                    ],
-                    "should_contain": [
-                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can sort a csv",
-                    "difficulty": "basic",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestSortCsv",
-                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
-            },
-            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
-            "label": "SortCsv",
-            "shape": "dot"
-        }
-    ]
-}
--- a/classic/frontend/assets/general_tree_structure.json
+++ b/classic/frontend/assets/general_tree_structure.json
--- a/classic/frontend/assets/images/autogpt_logo.png
+++ b/classic/frontend/assets/images/autogpt_logo.png
--- a/classic/frontend/assets/images/discord_logo.png
+++ b/classic/frontend/assets/images/discord_logo.png
--- a/classic/frontend/assets/images/github_logo.svg.png
+++ b/classic/frontend/assets/images/github_logo.svg.png
--- a/classic/frontend/assets/images/google_logo.svg.png
+++ b/classic/frontend/assets/images/google_logo.svg.png
--- a/classic/frontend/assets/images/twitter_logo.png
+++ b/classic/frontend/assets/images/twitter_logo.png
--- a/classic/frontend/assets/scrape_synthesize_tree_structure.json
+++ b/classic/frontend/assets/scrape_synthesize_tree_structure.json
@@ -1,375 +0,0 @@
-{
-    "edges": [
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestTestGetInformation::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestTestGetInformation::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]"
-        },
-        {
-            "arrows": "to",
-            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSynthesizeInfo::test_method[challenge_data0]",
-            "to": "agbenchmark/generate_test.py::TestSynthesizeInfo::test_method[challenge_data0]"
-        }
-    ],
-    "nodes": [
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "general",
-                    "coding",
-                    "scrape_synthesize",
-                    "data"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestWriteFile"
-                ],
-                "eval_id": "f219f3d3-a41b-45a9-a3d0-389832086ee8",
-                "ground": {
-                    "answer": "The content of output.txt should be 'Hello World!'",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "Hello World!"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can read a file.",
-                    "difficulty": "interface",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestReadFile",
-                "task": "Read the file called file_to_read.txt and write its content to a file called output.txt"
-            },
-            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
-            "label": "ReadFile",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "general",
-                    "coding",
-                    "scrape_synthesize",
-                    "data"
-                ],
-                "cutoff": 60,
-                "dependencies": [],
-                "eval_id": "021c695a-6cc4-46c2-b93a-f3a9b0f4d123",
-                "ground": {
-                    "answer": "The word 'Washington', printed to a .txt file named anything",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        ".txt"
-                    ],
-                    "should_contain": [
-                        "Washington"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can write a file",
-                    "difficulty": "interface",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestWriteFile",
-                "task": "Write the word 'Washington' to a .txt file"
-            },
-            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
-            "label": "WriteFile",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "scrape_synthesize",
-                    "general"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestSearch"
-                ],
-                "eval_id": "cd96e6b2-779d-4a4a-8367-d520023e27ae",
-                "ground": {
-                    "answer": "\u00a325.89",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        ".txt"
-                    ],
-                    "should_contain": [
-                        "25.89"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can retrieve a specific information from a website.",
-                    "difficulty": "basic",
-                    "side_effects": []
-                },
-                "name": "TestBasicRetrieval",
-                "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file."
-            },
-            "id": "agbenchmark/generate_test.py::TestBasicRetrieval::test_method[challenge_data0]",
-            "label": "BasicRetrieval",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "general",
-                    "scrape_synthesize"
-                ],
-                "cutoff": 120,
-                "dependencies": [
-                    "TestWriteFile"
-                ],
-                "eval_id": "0bb23182-b434-402b-a73e-9c226469b959",
-                "ground": {
-                    "answer": "This is a Heading\nThis is a paragraph.",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        ".txt"
-                    ],
-                    "should_contain": [
-                        "Heading",
-                        "paragraph"
-                    ],
-                    "should_not_contain": [
-                        "The",
-                        "the"
-                    ]
-                },
-                "info": {
-                    "description": "Tests if the agent can search.",
-                    "difficulty": "interface",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestSearch",
-                "task": "Open 'https://silennaihin.com/random/plain.html' and paste all of the text on the page in a .txt file"
-            },
-            "id": "agbenchmark/generate_test.py::TestSearch::test_method[challenge_data0]",
-            "label": "Search",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "scrape_synthesize",
-                    "general"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestRevenueRetrieval2"
-                ],
-                "eval_id": "1758058c-f726-484f-96fa-f05e278e5ff5",
-                "ground": {
-                    "answer": "The twitter handles of the two hosts of Latent Space.",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "swyx",
-                        "FanaHOVA"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can retrieve twitter handles given a vague description.",
-                    "difficulty": "intermediate",
-                    "side_effects": [
-                        ""
-                    ]
-                },
-                "name": "TestTestGetInformation",
-                "task": "Write the twitter handle of the two hosts of Latent Space to a file called output.txt"
-            },
-            "id": "agbenchmark/generate_test.py::TestTestGetInformation::test_method[challenge_data0]",
-            "label": "TestGetInformation",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "scrape_synthesize"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestRevenueRetrieval"
-                ],
-                "eval_id": "552bdf23-db40-4bd1-b123-4ed820886cc1",
-                "ground": {
-                    "answer": "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        ".txt"
-                    ],
-                    "should_contain": [
-                        "15",
-                        "112",
-                        "117",
-                        "204",
-                        "413",
-                        "2,014",
-                        "3,198",
-                        "4,046",
-                        "7,000",
-                        "11,759",
-                        "21,461",
-                        "24,578",
-                        "31,536",
-                        "53,823",
-                        "81,462"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can retrieve all the revenues of Tesla since its creation.",
-                    "difficulty": "intermediate",
-                    "side_effects": [
-                        "tests if there is in fact an LLM attached"
-                    ]
-                },
-                "name": "TestRevenueRetrieval2",
-                "task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 million)."
-            },
-            "id": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
-            "label": "RevenueRetrieval2",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "scrape_synthesize",
-                    "general"
-                ],
-                "cutoff": 60,
-                "dependencies": [
-                    "TestBasicRetrieval"
-                ],
-                "eval_id": "dc2114d7-1597-4c9b-bed0-a97937ad977f",
-                "ground": {
-                    "answer": "It was $81.462 billion in 2022. In millions the answer is 81,462.",
-                    "eval": {
-                        "type": "file"
-                    },
-                    "files": [
-                        ".txt"
-                    ],
-                    "should_contain": [
-                        "81,462"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can retrieve Tesla's revenue in 2022.",
-                    "difficulty": "intermediate",
-                    "side_effects": []
-                },
-                "name": "TestRevenueRetrieval",
-                "task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 million)."
-            },
-            "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
-            "label": "RevenueRetrieval",
-            "shape": "dot"
-        },
-        {
-            "color": "grey",
-            "data": {
-                "category": [
-                    "scrape_synthesize",
-                    "general"
-                ],
-                "cutoff": 240,
-                "dependencies": [
-                    "TestReadFile"
-                ],
-                "eval_id": "895ae28a-4513-44ea-a872-0164771d1597",
-                "ground": {
-                    "answer": "A report highlighting elements from the 2 files.",
-                    "eval": {
-                        "scoring": "binary",
-                        "template": "question",
-                        "type": "llm"
-                    },
-                    "files": [
-                        "output.txt"
-                    ],
-                    "should_contain": [
-                        "Is the company mentioned in the output actively addressing or capitalizing on the challenges or trends listed?"
-                    ],
-                    "should_not_contain": []
-                },
-                "info": {
-                    "description": "Tests if the agent can generate content based on the content of 2 files.",
-                    "difficulty": "basic",
-                    "side_effects": []
-                },
-                "name": "TestSynthesizeInfo",
-                "task": "Create a brief report or summary highlighting how one or more companies from companies.txt are addressing or capitalizing on challenges or trends from challenges.txt. Write a file called output.txt."
-            },
-            "id": "agbenchmark/generate_test.py::TestSynthesizeInfo::test_method[challenge_data0]",
-            "label": "SynthesizeInfo",
-            "shape": "dot"
-        }
-    ]
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Nicholas Tindle	e0784f8f6b	refactor(forge): simplify deeply nested error handling in Anthropic provider - Extract _get_tool_error_message helper method - Replace 20+ levels of nesting with simple for loop - Improve readability of tool_result construction - Update benchmark poetry.lock Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-19 00:15:33 -06:00
Nicholas Tindle	3040f39136	feat(forge): modernize web search with tiered provider system Replace basic DuckDuckGo-only search with a modern tiered system: 1. Tavily (primary) - AI-optimized results with content extraction - AI-generated answer summaries - Relevance scoring - Full page content extraction via search_and_extract command 2. Serper (secondary) - Fast, cheap Google SERP results - $0.30-1.00 per 1K queries - Real Google results without scraping 3. DDGS multi-engine (fallback) - Free, no API key required - Automatic fallback chain: DuckDuckGo → Bing → Brave → Google → etc. - 8 search backends supported Key changes: - Upgrade duckduckgo-search to ddgs v9.10 (renamed successor package) - Add Tavily and Serper API integrations - Implement automatic provider selection and fallback chain - Add search_and_extract command for research with content extraction - Add TAVILY_API_KEY and SERPER_API_KEY to env templates - Update benchmark httpx constraint for ddgs compatibility - 23 comprehensive tests for all providers and fallback scenarios Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-19 00:06:42 -06:00
Nicholas Tindle	515504c604	fix(classic): resolve pyright type errors in original_autogpt - Change Agent class to use ActionProposal instead of OneShotAgentActionProposal to support multiple prompt strategy types - Widen display_thoughts parameter type from AssistantThoughts to ModelWithSummary - Fix speak attribute access in agent_protocol_server with hasattr check - Add type: ignore comments for intentional thoughts field overrides in strategies - Remove unused OneShotAgentActionProposal import Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 23:53:23 -06:00
Nicholas Tindle	18edeaeaf4	fix(classic): fix linting and formatting errors across codebase - Fix 32+ flake8 E501 (line too long) errors by shortening descriptions - Remove unused import in todo.py - Fix test_todo.py argument order (config= keyword) - Add type annotations to fix pyright errors where straightforward - Add noqa comments for flake8 false positives in __init__.py - Remove unused nonlocal declarations in main.py - Run black and isort to fix formatting - Update CLAUDE.md with improved linting commands Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 23:37:28 -06:00
Nicholas Tindle	44182aff9c	feat(classic): add strategy benchmark test harness for CI - Add test_prompt_strategies.py harness to compare prompt strategies - Add pytest wrapper (test_strategy_benchmark.py) for CI integration - Fix serve command (remove invalid --port flag, use AP_SERVER_PORT env) - Fix test category (interface -> general) - Add aiohttp-retry dependency for agbenchmark - Add pytest markers: slow, integration, requires_agent Usage: poetry run python agbenchmark_config/test_prompt_strategies.py --quick poetry run pytest tests/integration/test_strategy_benchmark.py -v Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 23:36:19 -06:00
Nicholas Tindle	864c5a7846	fix(classic): approve+feedback now executes command then sends feedback Previously, when a user selected "Once" or "Always" with feedback (via Tab), the command was NOT executed because UserFeedbackProvided was raised before checking the approval scope. This fix changes the architecture from exception-based to return-value-based. Changes: - Add PermissionCheckResult class with allowed, scope, and feedback fields - Change check_command() to return PermissionCheckResult instead of bool - Update prompt_fn signature to return (ApprovalScope, feedback) tuple - Add pending_user_feedback mechanism to EpisodicActionHistory - Update execute() to handle feedback after successful command execution - Feedback message explicitly states "Command executed successfully" - Add on_auto_approve callback for displaying auto-approved commands - Add comprehensive tests for approval/denial with feedback scenarios Behavior: - Once + feedback → Execute command, then send feedback to agent - Always + feedback → Execute command, save permission, send feedback - Deny + feedback → Don't execute, send feedback to agent Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 22:32:43 -06:00
Nicholas Tindle	699fffb1a8	feat(classic): add Rich interactive selector for command approval Adds a custom Rich-based interactive selector for the command approval workflow. Features include: - Arrow key navigation for selecting approval options - Tab to add context to any selection (e.g., "Once + also check file x") - Dedicated inline feedback option with shadow placeholder text - Quick select with number keys 1-5 - Works within existing asyncio event loop (no prompt_toolkit dependency) Also adds UIProvider abstraction pattern for future UI implementations. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 21:49:43 -06:00
Nicholas Tindle	f0641c2d26	fix(classic): auto-advance plan steps in Plan-Execute strategy The strategy was stuck in a loop because it tracked plan steps but never advanced them - the record_step_success() method existed but was never called by the agent's execution loop. Fix by using a _pending_step_advance flag to track when an action has been proposed. On the next parse_response_content() call, advance the previous step before processing the new response. This keeps step tracking self-contained in the strategy without requiring agent changes. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 21:14:16 -06:00
Nicholas Tindle	94b6f74c95	feat(classic): add multiple prompt strategies for agent reasoning Implement four new prompt strategies based on research papers: - ReWOO: Reasoning Without Observation (5x token efficiency) - Plan-and-Execute: Separate planning from execution phases - Reflexion: Verbal reinforcement learning with episodic memory - Tree of Thoughts: Deliberate problem solving with tree search Each strategy extends a new BaseMultiStepPromptStrategy base class with shared utilities. Strategies are selectable via PROMPT_STRATEGY environment variable or config.prompt_strategy setting. Fix JSONSchema generation issue where Optional/Union types created anyOf schemas without direct type field - resolved by storing plan/phase state in strategy instances rather than ActionProposal. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 20:33:10 -06:00
Nicholas Tindle	46aabab3ea	feat(classic): upgrade to Python 3.12+ with CI testing on 3.12, 3.13, 3.14 - Update Python version constraint from ^3.10 to ^3.12 in all pyproject.toml - Update classifiers to reflect Python 3.12, 3.13, 3.14 support - Update dependencies for Python 3.13+ compatibility: - chromadb: ^0.4.10 -> ^1.4.0 - numpy: >=1.26.0,<2.0.0 -> >=2.0.0 - watchdog: 4.0.0 -> ^6.0.0 - spacy: ^3.0.0 -> ^3.8.0 (numpy 2.x compatibility) - en-core-web-sm model: 3.7.1 -> 3.8.0 - httpx (benchmark): ^0.24.0 -> ^0.27.0 - Update tool configuration: - Black target-version: py310 -> py312 - Pyright pythonVersion: 3.10 -> 3.12 - Update Dockerfiles to use Python 3.12 - Update CI workflows to test on Python 3.12, 3.13, and 3.14 - Regenerate all poetry.lock files Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 20:25:11 -06:00
Nicholas Tindle	0a65df5102	fix(classic): always use native tool calling, fix N/A command loop - Remove openai_functions config option - native tool calling is now always enabled - Remove use_functions_api from BaseAgentConfiguration and prompt strategy - Add use_prefill config to disable prefill for Anthropic (prefill + tools incompatible) - Update anthropic dependency to ^0.45.0 for tools API support - Simplify prompt strategy to always expect tool_calls from LLM response This fixes the N/A command loop bug where models would output "N/A" as a command name when function calling was disabled. With native tool calling always enabled, models are forced to pick from valid tools only. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 19:54:40 -06:00
Nicholas Tindle	6fbd208fe3	chore: ignore .claude/settings.local.json in all directories Update gitignore to use glob pattern for settings.local.json files in any .claude directory. Also untrack the existing file. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:54:42 -06:00
Nicholas Tindle	8fc174ca87	refactor(classic): simplify log format by removing timestamps Remove asctime from log formats since terminal output already has timestamps from the logging infrastructure. Makes logs cleaner and easier to read. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:52:47 -06:00
Nicholas Tindle	cacc89790f	feat(classic): improve AutoGPT configuration and setup Environment loading: - Search for .env in multiple locations (cwd, ~/.autogpt, ~/.config/autogpt) - Allows running autogpt from any directory - Document search order in .env.template Setup simplification: - Remove interactive AI settings revision (was broken/unused) - Simplify to just printing current settings - Clean up unused imports Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:52:38 -06:00
Nicholas Tindle	b9113bee02	feat(classic): enhance existing components with new capabilities CodeExecutorComponent: - Add timeout and env_vars parameters to execution commands - Add execute_shell_popen for streaming output - Improve error handling with CodeTimeoutError FileManagerComponent: - Add file_info, file_search, file_copy, file_move commands - Add directory_create, directory_list_tree commands - Better path validation and error messages GitOperationsComponent: - Add git_log, git_show, git_branch commands - Add git_stash, git_stash_pop, git_stash_list commands - Add git_cherry_pick, git_revert, git_reset commands - Add git_remote, git_fetch, git_pull, git_push commands UserInteractionComponent: - Add ask_multiple_choice for structured options - Add notify_user for non-blocking notifications - Add confirm_action for yes/no confirmations WebSearchComponent: - Minor error handling improvements WebSeleniumComponent: - Add get_page_content, execute_javascript commands - Add take_element_screenshot command - Add wait_for_element, scroll_page commands - Improve element interaction reliability Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:52:27 -06:00
Nicholas Tindle	3f65da03e7	feat(classic): add new exception types for enhanced error handling Add specialized exception classes for better error reporting: - CodeTimeoutError: For code execution timeouts - HTTPError: For HTTP request failures with status code/URL - DataProcessingError: For JSON/CSV processing errors Each exception includes helpful hints for users. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:52:10 -06:00
Nicholas Tindle	9e96d11b2d	feat(classic): add utility components for agent capabilities Add 6 new utility components to expand agent functionality: - ArchiveHandlerComponent: ZIP/TAR archive operations (create, extract, list) - ClipboardComponent: In-memory clipboard for copy/paste operations - DataProcessorComponent: CSV/JSON data manipulation and analysis - HTTPClientComponent: HTTP requests (GET, POST, PUT, DELETE) - MathUtilsComponent: Mathematical calculations and statistics - TextUtilsComponent: Text processing (regex, diff, encoding, hashing) All components follow the forge component pattern with: - CommandProvider for exposing commands - DirectiveProvider for resources/best practices - Comprehensive parameter validation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:50:52 -06:00
Nicholas Tindle	4c264b7ae9	feat(classic): add TodoComponent with LLM-powered decomposition Add a task management component modeled after Claude Code's TodoWrite: - TodoItem with recursive sub_items for hierarchical task structure - todo_write: atomic list replacement with sub-items support - todo_read: retrieve current todos with nested structure - todo_clear: clear all todos - todo_decompose: use smart LLM to break down tasks into sub-steps Features: - Hierarchical task tracking with independent status per sub-item - MessageProvider shows todos in LLM context with proper indentation - DirectiveProvider adds best practices for task management - Graceful fallback when LLM provider not configured Integrates with: - original_autogpt Agent (full LLM decomposition support) - ForgeAgent (basic task tracking, no decomposition) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 18:49:48 -06:00
Nicholas Tindle	0adbc0bd05	fix(classic): update CI for removed frontend and helper scripts Remove references to deleted files (./run, cli.py, setup.py, frontend/) from CI workflows. Replace ./run agent start with direct poetry commands to start agent servers in background. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 17:41:11 -06:00
Nicholas Tindle	8f3291bc92	feat(classic): add workspace permissions system for agent commands Add a layered permission system that controls agent command execution: - Create autogpt.yaml in .autogpt/ folder with default allow/deny rules - File operations in workspace allowed by default - Sensitive files (.env, .key, .pem) blocked by default - Dangerous shell commands (sudo, rm -rf) blocked by default - Interactive prompts for unknown commands (y=agent, Y=workspace, n=deny) - Agent-specific permissions stored in .autogpt/agents/{id}/permissions.yaml Files added: - forge/forge/config/workspace_settings.py - Pydantic models for settings - forge/forge/permissions.py - CommandPermissionManager with pattern matching Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 17:39:33 -06:00
Nicholas Tindle	7a20de880d	chore: add .autogpt/ to gitignore The .autogpt/ directory is where AutoGPT stores agent data when running from any directory. This should not be committed to version control. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 17:02:47 -06:00
Nicholas Tindle	ef8a6d2528	feat(classic): make AutoGPT installable and runnable from any directory Add --workspace option to CLI that defaults to current working directory, allowing users to run `autogpt` from any folder. Agent data is now stored in `.autogpt/` subdirectory of the workspace instead of a hardcoded path. Changes: - Add -w/--workspace CLI option to run and serve commands - Remove dependency on forge package location for PROJECT_ROOT - Update config to use workspace instead of project_root - Store agent data in .autogpt/ within workspace directory - Update pyproject.toml files with proper PyPI metadata - Fix outdated tests to match current implementation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 17:00:36 -06:00
Nicholas Tindle	fd66be2aaa	chore(classic): remove unneeded files and add CLAUDE.md docs - Remove deprecated Flutter frontend (replaced by autogpt_platform) - Remove shell scripts (run, setup, autogpt.sh, etc.) - Remove tutorials (outdated) - Remove CLI-USAGE.md and FORGE-QUICKSTART.md - Add CLAUDE.md files for Claude Code guidance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 16:17:35 -06:00
Nicholas Tindle	ae2cc97dc4	feat(classic): add modern Anthropic models and fix deprecated API - Add Claude 3.5 v2, Claude 4 Sonnet, Claude 4 Opus, and Claude 4.5 Opus models - Add rolling aliases (CLAUDE_SONNET, CLAUDE_OPUS, CLAUDE_HAIKU) - Fix deprecated beta.tools.messages.create API call to use standard messages.create - Update anthropic SDK from ^0.25.1 to >=0.40,<1.0 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-18 16:15:16 -06:00
Nicholas Tindle	ea521eed26	wip: add supprot for new openai models (non working)	2025-12-26 10:02:17 -06:00