refactor(forge): remove VCR cassettes, use real API calls with skip for forks

- Remove vcrpy and pytest-recording dependencies - Remove tests/vcr/ directory and vcr_cassettes submodule - Remove .gitmodules (only had cassette submodule) - Simplify CI workflow - no more cassette checkout/push/PAT_REVIEW - Tests requiring API keys now skip if not set (fork PRs) - Update CLAUDE.md files to remove cassette references - Fix broken agbenchmark path in pyproject.toml Security improvement: removes need for PAT with cross-repo write access. Fork PRs will have API-dependent tests skipped (GitHub protects secrets). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-30 03:00:41 -04:00 · 2026-01-19 22:51:57 -06:00
parent e477150979
commit ab95077e5b
28 changed files with 63 additions and 78430 deletions
--- a/.github/workflows/classic-autogpt-ci.yml
+++ b/.github/workflows/classic-autogpt-ci.yml
@@ -6,11 +6,15 @@ on:
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/forge/**'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-autogpt-ci.yml'
      - 'classic/original_autogpt/**'
+      - 'classic/direct_benchmark/**'
+      - 'classic/forge/**'

 concurrency:
  group: ${{ format('classic-autogpt-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -110,6 +114,10 @@ jobs:
      - name: Install Python dependencies
        run: poetry install

+      - name: Install direct_benchmark dependencies
+        working-directory: classic/direct_benchmark
+        run: poetry install
+
      - name: Run pytest with coverage
        run: |
          poetry run pytest -vv \
--- a/.github/workflows/classic-forge-ci.yml
+++ b/.github/workflows/classic-forge-ci.yml
@@ -6,13 +6,11 @@ on:
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
-      - '!classic/forge/tests/vcr_cassettes'
  pull_request:
    branches: [ master, dev, release-* ]
    paths:
      - '.github/workflows/classic-forge-ci.yml'
      - 'classic/forge/**'
-      - '!classic/forge/tests/vcr_cassettes'

 concurrency:
  group: ${{ format('forge-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
@@ -64,40 +62,6 @@ jobs:

      - name: Checkout repository
        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: true
-
-      - name: Checkout cassettes
-        if: ${{ startsWith(github.event_name, 'pull_request') }}
-        env:
-          PR_BASE: ${{ github.event.pull_request.base.ref }}
-          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-        run: |
-          cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
-          cassette_base_branch="${PR_BASE}"
-          cd tests/vcr_cassettes
-
-          if ! git ls-remote --exit-code --heads origin $cassette_base_branch ; then
-            cassette_base_branch="master"
-          fi
-
-          if git ls-remote --exit-code --heads origin $cassette_branch ; then
-            git fetch origin $cassette_branch
-            git fetch origin $cassette_base_branch
-
-            git checkout $cassette_branch
-
-            # Pick non-conflicting cassette updates from the base branch
-            git merge --no-commit --strategy-option=ours origin/$cassette_base_branch
-            echo "Using cassettes from mirror branch '$cassette_branch'," \
-              "synced to upstream branch '$cassette_base_branch'."
-          else
-            git checkout -b $cassette_branch
-            echo "Branch '$cassette_branch' does not exist in cassette submodule." \
-              "Using cassettes from '$cassette_base_branch'."
-          fi

      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
@@ -144,7 +108,10 @@ jobs:
        env:
          CI: true
          PLAIN_OUTPUT: True
+          # API keys - tests that need these will skip if not available
+          # Secrets are not available to fork PRs (GitHub security feature)
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          S3_ENDPOINT_URL: ${{ runner.os != 'Windows' && 'http://127.0.0.1:9000' || '' }}
          AWS_ACCESS_KEY_ID: minioadmin
          AWS_SECRET_ACCESS_KEY: minioadmin
@@ -161,83 +128,9 @@ jobs:
          token: ${{ secrets.CODECOV_TOKEN }}
          flags: forge,${{ runner.os }}

-      - id: setup_git_auth
-        name: Set up git token authentication
-        # Cassettes may be pushed even when tests fail
-        if: success() || failure()
-        run: |
-          config_key="http.${{ github.server_url }}/.extraheader"
-          if [ "${{ runner.os }}" = 'macOS' ]; then
-            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64)
-          else
-            base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64 -w0)
-          fi
-
-          git config "$config_key" \
-            "Authorization: Basic $base64_pat"
-
-          cd tests/vcr_cassettes
-          git config "$config_key" \
-            "Authorization: Basic $base64_pat"
-
-          echo "config_key=$config_key" >> $GITHUB_OUTPUT
-
-      - id: push_cassettes
-        name: Push updated cassettes
-        # For pull requests, push updated cassettes even when tests fail
-        if: github.event_name == 'push' || (! github.event.pull_request.head.repo.fork && (success() || failure()))
-        env:
-          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
-          PR_AUTHOR: ${{ github.event.pull_request.user.login }}
-        run: |
-          if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
-            is_pull_request=true
-            cassette_branch="${PR_AUTHOR}-${PR_BRANCH}"
-          else
-            cassette_branch="${{ github.ref_name }}"
-          fi
-
-          cd tests/vcr_cassettes
-          # Commit & push changes to cassettes if any
-          if ! git diff --quiet; then
-            git add .
-            git commit -m "Auto-update cassettes"
-            git push origin HEAD:$cassette_branch
-            if [ ! $is_pull_request ]; then
-              cd ../..
-              git add tests/vcr_cassettes
-              git commit -m "Update cassette submodule"
-              git push origin HEAD:$cassette_branch
-            fi
-            echo "updated=true" >> $GITHUB_OUTPUT
-          else
-            echo "updated=false" >> $GITHUB_OUTPUT
-            echo "No cassette changes to commit"
-          fi
-
-      - name: Post Set up git token auth
-        if: steps.setup_git_auth.outcome == 'success'
-        run: |
-          git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
-          git submodule foreach git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
-
-      - name: Apply "behaviour change" label and comment on PR
-        if: ${{ startsWith(github.event_name, 'pull_request') }}
-        run: |
-          PR_NUMBER="${{ github.event.pull_request.number }}"
-          TOKEN="${{ secrets.PAT_REVIEW }}"
-          REPO="${{ github.repository }}"
-
-          if [[ "${{ steps.push_cassettes.outputs.updated }}" == "true" ]]; then
-            echo "Adding label and comment..."
-            echo $TOKEN | gh auth login --with-token
-            gh issue edit $PR_NUMBER --add-label "behaviour change"
-            gh issue comment $PR_NUMBER --body "You changed AutoGPT's behaviour on ${{ runner.os }}. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
-          fi
-
      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: test-logs
+          name: test-logs-${{ matrix.platform-os }}-${{ matrix.python-version }}
          path: classic/forge/logs/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "classic/forge/tests/vcr_cassettes"]
-	path = classic/forge/tests/vcr_cassettes
-	url = https://github.com/Significant-Gravitas/Auto-GPT-test-cassettes
--- a/classic/CLAUDE.md
+++ b/classic/CLAUDE.md
@@ -130,10 +130,9 @@ Benchmark harness for testing agent performance:

 ## Testing Patterns

- VCR cassettes in `/forge/tests/vcr_cassettes/` for HTTP mocking
 - Async support via pytest-asyncio
 - Fixtures defined in `conftest.py` files provide: `tmp_project_root`, `storage`, `config`, `llm_provider`, `agent`
- Tests require `OPENAI_API_KEY` environment variable (defaults to "sk-dummy" for mocked tests)
+- Tests requiring API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY) will skip if not set

 ## Environment Setup

--- a/classic/forge/CLAUDE.md
+++ b/classic/forge/CLAUDE.md
@@ -316,14 +316,14 @@ GET  /ap/v1/agent/tasks/{id}/artifacts  # List artifacts

 **Fixtures** (`conftest.py`):
 - `storage` - Temporary LocalFileStorage
- VCR cassettes in `tests/vcr_cassettes/`

 ```bash
 poetry run pytest                    # All tests
 poetry run pytest --cov=forge        # With coverage
-poetry run pytest --record-mode=all  # Record HTTP cassettes
 ```

+**Note**: Tests requiring API keys (OPENAI_API_KEY, ANTHROPIC_API_KEY) will be skipped if not set.
+
 ## Creating a Custom Component

 ```python
--- a/classic/forge/conftest.py
+++ b/classic/forge/conftest.py
@@ -6,9 +6,7 @@ import pytest
 from forge.file_storage.base import FileStorage, FileStorageConfiguration
 from forge.file_storage.local import LocalFileStorage

-pytest_plugins = [
-    "tests.vcr",
-]
+pytest_plugins = []


@pytest.fixture(scope="session", autouse=True)
--- a/classic/forge/forge/components/image_gen/test_image_gen.py
+++ b/classic/forge/forge/components/image_gen/test_image_gen.py
@@ -1,5 +1,6 @@
 import functools
 import hashlib
+import os
 from pathlib import Path
 from unittest.mock import patch

@@ -12,6 +13,8 @@ from forge.components.image_gen.image_gen import ImageGeneratorConfiguration
 from forge.file_storage.base import FileStorage
 from forge.llm.providers.openai import OpenAICredentials

+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+

@pytest.fixture
 def image_gen_component(storage: FileStorage):
@@ -39,7 +42,7 @@ def image_size(request):
    return request.param


-@pytest.mark.vcr
+@pytest.mark.skipif(not OPENAI_API_KEY, reason="OPENAI_API_KEY not set")
 def test_dalle(
    image_gen_component: ImageGeneratorComponent,
    image_size,
@@ -53,8 +56,7 @@ def test_dalle(


@pytest.mark.xfail(
-    reason="The image is too big to be put in a cassette for a CI pipeline. "
-    "We're looking into a solution."
+    reason="HuggingFace image generation is unreliable in CI."
 )
@pytest.mark.parametrize(
    "image_model",
--- a/classic/forge/poetry.lock
+++ b/classic/forge/poetry.lock
--- a/classic/forge/pyproject.toml
+++ b/classic/forge/pyproject.toml
@@ -24,8 +24,7 @@ classifiers = [

 [tool.poetry.dependencies]
 python = "^3.12"
-agbenchmark = { path = "../benchmark", optional = true }
-# agbenchmark = {git = "https://github.com/Significant-Gravitas/AutoGPT.git", subdirectory = "benchmark", optional = true}
+# agbenchmark removed - use direct_benchmark instead
 aiohttp = "^3.8.5"
 anthropic = "^0.45.0"
 beautifulsoup4 = "^4.12.2"
@@ -73,7 +72,7 @@ watchdog = "^6.0.0"
 webdriver-manager = "^4.0.2"

 [tool.poetry.extras]
-benchmark = ["agbenchmark"]
+# benchmark extra removed - use direct_benchmark instead

 [tool.poetry.group.dev.dependencies]
 black = "^23.12.1"
@@ -94,9 +93,7 @@ pytest = "^7.4.0"
 pytest-asyncio = "^0.23.3"
 pytest-cov = "^5.0.0"
 pytest-mock = "*"
-pytest-recording = "*"
 mock = "^5.1.0"
-vcrpy = { git = "https://github.com/Significant-Gravitas/vcrpy.git", rev = "master" }


 [build-system]
--- a/classic/forge/tests/vcr/init.py
+++ b/classic/forge/tests/vcr/init.py
@@ -1,81 +0,0 @@
-import logging
-import os
-from hashlib import sha256
-from typing import cast
-
-import pytest
-from openai import OpenAI
-from openai._models import FinalRequestOptions
-from openai._types import Omit
-from openai._utils import is_given
-from pytest_mock import MockerFixture
-
-from .vcr_filter import (
-    before_record_request,
-    before_record_response,
-    freeze_request_body,
-)
-
-DEFAULT_RECORD_MODE = "new_episodes"
-BASE_VCR_CONFIG = {
-    "before_record_request": before_record_request,
-    "before_record_response": before_record_response,
-    "match_on": ["method", "headers"],
-}
-
-
-@pytest.fixture(scope="session")
-def vcr_config(get_base_vcr_config):
-    return get_base_vcr_config
-
-
-@pytest.fixture(scope="session")
-def get_base_vcr_config(request):
-    record_mode = request.config.getoption("--record-mode", default="new_episodes")
-    config = BASE_VCR_CONFIG
-
-    if record_mode is None:
-        config["record_mode"] = DEFAULT_RECORD_MODE
-
-    return config
-
-
-@pytest.fixture()
-def vcr_cassette_dir(request):
-    test_name = os.path.splitext(request.node.name)[0]
-    return os.path.join("tests/vcr_cassettes", test_name)
-
-
-@pytest.fixture
-def cached_openai_client(mocker: MockerFixture) -> OpenAI:
-    client = OpenAI()
-    _prepare_options = client._prepare_options
-
-    def _patched_prepare_options(self, options: FinalRequestOptions):
-        _prepare_options(options)
-
-        if not options.json_data:
-            return
-
-        headers: dict[str, str | Omit] = (
-            {**options.headers} if is_given(options.headers) else {}
-        )
-        options.headers = headers
-        data = cast(dict, options.json_data)
-
-        logging.getLogger("cached_openai_client").debug(
-            f"Outgoing API request: {headers}\n{data if data else None}"
-        )
-
-        # Add hash header for cheap & fast matching on cassette playback
-        headers["X-Content-Hash"] = sha256(
-            freeze_request_body(data), usedforsecurity=False
-        ).hexdigest()
-
-    mocker.patch.object(
-        client,
-        "_prepare_options",
-        new=_patched_prepare_options,
-    )
-
-    return client
--- a/classic/forge/tests/vcr/vcr_filter.py
+++ b/classic/forge/tests/vcr/vcr_filter.py
@@ -1,108 +0,0 @@
-import contextlib
-import json
-import re
-from io import BytesIO
-from typing import Any, cast
-
-from vcr.request import Request
-
-HOSTNAMES_TO_CACHE: list[str] = [
-    "api.openai.com",
-    "localhost:50337",
-    "duckduckgo.com",
-]
-
-IGNORE_REQUEST_HEADERS: set[str | re.Pattern] = {
-    "Authorization",
-    "Cookie",
-    "OpenAI-Organization",
-    "X-OpenAI-Client-User-Agent",
-    "User-Agent",
-    re.compile(r"X-Stainless-[\w\-]+", re.IGNORECASE),
-}
-
-LLM_MESSAGE_REPLACEMENTS: list[dict[str, str]] = [
-    {
-        "regex": r"\w{3} \w{3} {1,2}\d{1,2} \d{2}:\d{2}:\d{2} \d{4}",
-        "replacement": "Tue Jan  1 00:00:00 2000",
-    },
-    {
-        "regex": r"<selenium.webdriver.chrome.webdriver.WebDriver[^>]*>",
-        "replacement": "",
-    },
-]
-
-OPENAI_URL = "api.openai.com"
-
-
-def before_record_request(request: Request) -> Request | None:
-    if not should_cache_request(request):
-        return None
-
-    request = filter_request_headers(request)
-    request = freeze_request(request)
-    return request
-
-
-def should_cache_request(request: Request) -> bool:
-    return any(hostname in request.url for hostname in HOSTNAMES_TO_CACHE)
-
-
-def filter_request_headers(request: Request) -> Request:
-    for header_name in list(request.headers):
-        if any(
-            (
-                (type(ignore) is str and ignore.lower() == header_name.lower())
-                or (isinstance(ignore, re.Pattern) and ignore.match(header_name))
-            )
-            for ignore in IGNORE_REQUEST_HEADERS
-        ):
-            del request.headers[header_name]
-    return request
-
-
-def freeze_request(request: Request) -> Request:
-    if not request or not request.body:
-        return request
-
-    with contextlib.suppress(ValueError):
-        if isinstance(request.body, BytesIO):
-            body_data: bytes | str = request.body.getvalue()
-        else:
-            body_data = cast(bytes, request.body)
-        request.body = freeze_request_body(json.loads(body_data))
-
-    return request
-
-
-def freeze_request_body(body: dict) -> bytes:
-    """Remove any dynamic items from the request body"""
-
-    if "messages" not in body:
-        return json.dumps(body, sort_keys=True).encode()
-
-    if "max_tokens" in body:
-        del body["max_tokens"]
-
-    for message in body["messages"]:
-        if "content" in message and "role" in message:
-            if message["role"] == "system":
-                message["content"] = replace_message_content(
-                    message["content"], LLM_MESSAGE_REPLACEMENTS
-                )
-
-    return json.dumps(body, sort_keys=True).encode()
-
-
-def replace_message_content(content: str, replacements: list[dict[str, str]]) -> str:
-    for replacement in replacements:
-        pattern = re.compile(replacement["regex"])
-        content = pattern.sub(replacement["replacement"], content)
-
-    return content
-
-
-def before_record_response(response: dict[str, Any]) -> dict[str, Any]:
-    if "Transfer-Encoding" in response["headers"]:
-        del response["headers"]["Transfer-Encoding"]
-    return response
--- a/classic/forge/tests/vcr_cassettes/LICENSE
+++ b/classic/forge/tests/vcr_cassettes/LICENSE
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2023 Significant Gravitas
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/classic/forge/tests/vcr_cassettes/format_cassettes.py
+++ b/classic/forge/tests/vcr_cassettes/format_cassettes.py
@@ -1,63 +0,0 @@
-import contextlib
-import json
-import os
-from hashlib import sha256
-from pathlib import Path
-
-import yaml
-from yaml import CDumper as Dumper
-from yaml import CLoader as Loader
-
-
-def convert_cassette_file(filename: str | Path):
-    print(f"{filename} STARTING")
-
-    with open(filename) as c:
-        cassette_content = yaml.load(c, Loader)
-
-    # Iterate over all request+response pairs
-    for interaction in cassette_content["interactions"]:
-        request_body: str = interaction["request"]["body"]
-        if request_body is None:
-            continue
-
-        with contextlib.suppress(json.decoder.JSONDecodeError):
-            request_obj = json.loads(request_body)
-
-            # Strip `max_tokens`, since its value doesn't matter
-            #  as long as the request succeeds
-            if "max_tokens" in request_obj:
-                del request_obj["max_tokens"]
-
-            # Sort the keys of the request body
-            request_body = json.dumps(request_obj, sort_keys=True)
-
-        headers = interaction["request"]["headers"]
-
-        # Calculate hash for the request body, used for VCR lookup
-        headers["X-Content-Hash"] = [
-            sha256(request_body.encode(), usedforsecurity=False).hexdigest()
-        ]
-
-        # Strip auth headers
-        if "AGENT-MODE" in headers:
-            del headers["AGENT-MODE"]
-        if "AGENT-TYPE" in headers:
-            del headers["AGENT-TYPE"]
-        if "OpenAI-Organization" in headers:
-            del headers["OpenAI-Organization"]
-
-        interaction["request"]["body"] = request_body
-
-    with open(filename, "w") as c:
-        c.write(yaml.dump(cassette_content, Dumper=Dumper))
-
-    print(f"{filename} DONE")
-
-
-# Iterate over all .yaml files in the current folder and its subdirectories
-for dirpath, _, files in os.walk("."):
-    for file in files:
-        if not file.endswith(".yaml"):
-            continue
-        convert_cassette_file(os.path.join(dirpath, file))
--- a/classic/forge/tests/vcr_cassettes/test_browse_website/test_browse_website.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_browse_website/test_browse_website.yaml
--- a/classic/forge/tests/vcr_cassettes/test_dalle[1024]/test_dalle[1024].yaml
+++ b/classic/forge/tests/vcr_cassettes/test_dalle[1024]/test_dalle[1024].yaml
--- a/classic/forge/tests/vcr_cassettes/test_dalle[256]/test_dalle[256].yaml
+++ b/classic/forge/tests/vcr_cassettes/test_dalle[256]/test_dalle[256].yaml
--- a/classic/forge/tests/vcr_cassettes/test_dalle[512]/test_dalle[512].yaml
+++ b/classic/forge/tests/vcr_cassettes/test_dalle[512]/test_dalle[512].yaml
--- a/classic/forge/tests/vcr_cassettes/test_debug_code_challenge_a/test_debug_code_challenge_a.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_debug_code_challenge_a/test_debug_code_challenge_a.yaml
--- a/classic/forge/tests/vcr_cassettes/test_generate_aiconfig_automatic_default/test_generate_aiconfig_automatic_default.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_generate_aiconfig_automatic_default/test_generate_aiconfig_automatic_default.yaml
@@ -1,214 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"content": "\nYour task is to devise up to 5 highly effective
-      goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring
-      that the goals are optimally aligned with the successful completion of its assigned
-      task.\n\nThe user will provide the task, you will provide only the output in
-      the exact format specified below with no explanation or conversation.\n\nExample
-      input:\nHelp me with marketing my business\n\nExample output:\nName: CMOGPT\nDescription:
-      a professional digital marketer AI that assists Solopreneurs in growing their
-      businesses by providing world-class expertise in solving marketing problems
-      for SaaS, content products, agencies, and more.\nGoals:\n- Engage in effective
-      problem-solving, prioritization, planning, and supporting execution to address
-      your marketing needs as your virtual Chief Marketing Officer.\n\n- Provide specific,
-      actionable, and concise advice to help you make informed decisions without the
-      use of platitudes or overly wordy explanations.\n\n- Identify and prioritize
-      quick wins and cost-effective campaigns that maximize results with minimal time
-      and budget investment.\n\n- Proactively take the lead in guiding you and offering
-      suggestions when faced with unclear information or uncertainty to ensure your
-      marketing strategy remains on track.\n", "role": "system"}, {"content": "Task:
-      ''Write a wikipedia style article about the project: https://github.com/significant-gravitas/Auto-GPT''\nRespond
-      only with the output in the exact format specified in the system prompt, with
-      no explanation or conversation.", "role": "user"}], "model": "gpt-3.5-turbo",
-      "temperature": 0.0}'
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1669'
-      Content-Type:
-      - application/json
-      X-Content-Hash:
-      - cc4940b13e25117216dfcd3c7dcdc665cf7ef70a10b5633f2f67f1a832c3e89a
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA3RU227cNhB971cM+KxdrOO73oIEMNAirVGkCdC6CMbkSJqaGsqc0dqy4X8PKO16
-        myB55eXMuZHPjoOrne/QfD/E1flvny7+np4+2eXJ0dmH+w/05+kf7yf6fG6PJ7+6yqXb/8jb7sba
-        p36IZJzEVc5nQqPg6qOzi9OjN5uzzXnl+hQoutq1g62O16crG/NtWm2ON0eucqNiS65+dkNO/WBf
-        LN2RqKvfXB5X7oB9WN+cVs6SYXxdOrm8eKmc7xJ7Ulf/8+x60j1sTpFc7VCV1VCskExiJEXA79hT
-        DW9HS1fXHz/zHV9df7yR96Q+81DG1oACOFqS1KdRAVsSg0DKrVAAS7AAAwtYRzDr5ySQGih4AwXG
-        ldoUCTAb+0gKTcqwxcwFcMipmKkVsPg4BpYWbkcDSQaRe7ZliqbGHjATBNpSTENPYhWoZxLjhj1k
-        UsLsuwpQAnSsljJ7jEBbEtP1dyJhNI78RAoYtiieAgjamDFCRGlHbKlQ86RaGBXMHn3HQhAJs8yL
-        sU2Zreu1MGxJKKMRdNx2q/sRI9tUVDUp92i8pYUaej/O53YpgHVo0BOZzg6WkALmoN9YuL6RG7lK
-        GLW+kRW8FYzTE80Xdg7O4GwKmWKpIPRolBnjwg5ZAKH0KVNHorwlGCVQnucVOamZ4SwN7F/BSs7F
-        32LRuky+2st8oBhXann0NmYKy42D2G/l7UXHCTINJSvZyf2e/R1N0FCJgrSCh45LcUJHuVC0NN9p
-        Rw4UWUp6En7u2Ar+WlI+hPyjcI18J3w/0uwUiY6ZFtpzo3c6WOEhsxkJLFaWHlRl27PusiVpsS2Q
-        PYpQXkBYgVCnufWZsFhe5hzMn5m+S2IsYxo1TkvFlrYEHKwcF3o42JuWzYYo3KK/K/vcDzltl0bs
-        yve/uvlpH+9ezr6uy/TrnLYcCBCUsI+ki7PUNDw/MRiVMtDjQJlJPMHtNP8KhUxxsHs1tMxZPgFp
-        f/oDVKC4LQcKrIJxT/t5KdvavVSuYWHtvmRCTeJqp5YGVzmWQI+u3rz8+/LLVwAAAP//AwDZq0Fl
-        twUAAA==
-    headers:
-      Access-Control-Allow-Origin:
-      - '*'
-      Alt-Svc:
-      - h3=":443"; ma=86400
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Cache-Control:
-      - no-cache, must-revalidate
-      Cf-Cache-Status:
-      - DYNAMIC
-      Cf-Ray:
-      - 7cd79772daa6e157-ORD
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 26 May 2023 17:03:47 GMT
-      Function-Execution-Id:
-      - pegl0551txnb
-      Openai-Model:
-      - gpt-3.5-turbo-0301
-      Openai-Organization:
-      - significant-gravitas
-      Openai-Processing-Ms:
-      - '20547'
-      Openai-Version:
-      - '2020-10-01'
-      Server:
-      - Google Frontend
-      Strict-Transport-Security:
-      - max-age=15724800; includeSubDomains
-      Vary:
-      - Accept-Encoding
-      X-Cloud-Trace-Context:
-      - ec4c1070b2d54378e4ddcf13cd8366b7
-      X-Powered-By:
-      - Express
-      X-Ratelimit-Limit-Requests:
-      - '3500'
-      X-Ratelimit-Limit-Tokens:
-      - '90000'
-      X-Ratelimit-Remaining-Requests:
-      - '3499'
-      X-Ratelimit-Remaining-Tokens:
-      - '89605'
-      X-Ratelimit-Reset-Requests:
-      - 17ms
-      X-Ratelimit-Reset-Tokens:
-      - 262ms
-      X-Request-Id:
-      - bb863118d90782f50f13ec29157a7a7f
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"content": "\nYour task is to devise up to 5 highly effective
-      goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring
-      that the goals are optimally aligned with the successful completion of its assigned
-      task.\n\nThe user will provide the task, you will provide only the output in
-      the exact format specified below with no explanation or conversation.\n\nExample
-      input:\nHelp me with marketing my business\n\nExample output:\nName: CMOGPT\nDescription:
-      a professional digital marketer AI that assists Solopreneurs in growing their
-      businesses by providing world-class expertise in solving marketing problems
-      for SaaS, content products, agencies, and more.\nGoals:\n- Engage in effective
-      problem-solving, prioritization, planning, and supporting execution to address
-      your marketing needs as your virtual Chief Marketing Officer.\n\n- Provide specific,
-      actionable, and concise advice to help you make informed decisions without the
-      use of platitudes or overly wordy explanations.\n\n- Identify and prioritize
-      quick wins and cost-effective campaigns that maximize results with minimal time
-      and budget investment.\n\n- Proactively take the lead in guiding you and offering
-      suggestions when faced with unclear information or uncertainty to ensure your
-      marketing strategy remains on track.\n", "role": "system"}, {"content": "Task:
-      ''Write a wikipedia style article about the project: https://github.com/significant-gravitas/Auto-GPT''\nRespond
-      only with the output in the exact format specified in the system prompt, with
-      no explanation or conversation.", "role": "user"}], "model": "gpt-4-0314", "temperature":
-      0.0}'
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1666'
-      Content-Type:
-      - application/json
-      X-Content-Hash:
-      - ac6044440efb25bc33deca1c1be80cfef40e6a3eeb43febe36cc11c8f17b869c
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA1yUTW8bRwyG7/oVxF5ykQzbVZ1KN6NFg6JtkhYuWqAuAmqWu8tklpySM1KUwP+9
-        mNWHrV4W2OHXy4cz/DoDaLht1tCEAXMYU1y83rU/vue/hu6Xd6uPq58f3vZ7wzx+++/9b933zbxG
-        6OYjhXyKugo6pkiZVQ7mYISZatabu9Xy9vq72+XryTBqS7GG9Skvlovrb26Wx4hBOZA3a/h7BgDw
-        dfpWbdLS52YN1/PTyUju2FOzPjsBNKaxnjTozp5RcjN/NgaVTDLJfYsjreFP/sRv3j88yg/kwThV
-        5Wu4F7j/aZF0R0YtHINg6kUNPFFgjPyFpQcW6EnIMNe/HcW48Gwl5GLUzoGlUxsx85bmgNJCkQ2j
-        UztVTtQyLjzvIwFa5hDJATdaMmzRWItDMq2AvaYKsbS1iiaShWuxQODa5R0agVFS56zG5FeP8kYx
-        +vpRFvAwqGnph7gHIye0MByVtGSVUAt5oFOhVw6pWFKnOXSEtQ0/KK8YjDelInLIWiO23BKgAIZQ
-        DDMdHcdkNJA4bwl0S7Zl2l09SlXzK7JkZAEEoZINIyRlyaAdVLcpA7YDGdUaZ0qv/DyHvnBLkYUm
-        FSRequ9wRgjsl9xPzA99GEXGTaSjoHfWo/CX/2WQrIApmSbj2pdTmPqeg5fKr1bIpm2Zjl+iamlL
-        UdNYlQ7sWW1/4jeORTjvgWWrcUvVZX4BEkIktMk7as8BI5wv01HuH04Hr3nlEdiP9yqZduTOKhgh
-        ovQF+4kgdV3VvqW4P0kItaPLmTv3wh0HlHAYIo8JQ64JEHaTuNIySTjp+J36EtHiHkpqT/lO+LKC
-        URcpZEDZQxhQenJQe0mnInypYn4YZb3gecA8mc5jVAGjEVn88q6VtMi6qAKumuMzfzrvh46Fffhg
-        hK5S37xnTQe3pxnAP9O+KRcrpEmmY8ofsn4iqUvodnVzyNc8r7Zn683d6mjNmjE+G5Z317Na5Wn2
-        HwAAAP//AwDDjO4mWAUAAA==
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 803a41a718048023-IAD
-      Cache-Control:
-      - no-cache, must-revalidate
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 08 Sep 2023 21:24:24 GMT
-      Server:
-      - cloudflare
-      access-control-allow-origin:
-      - '*'
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-model:
-      - gpt-4-0314
-      openai-organization:
-      - significant-gravitas
-      openai-processing-ms:
-      - '17436'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15724800; includeSubDomains
-      x-ratelimit-limit-requests:
-      - '200'
-      x-ratelimit-limit-tokens:
-      - '40000'
-      x-ratelimit-remaining-requests:
-      - '199'
-      x-ratelimit-remaining-tokens:
-      - '31828'
-      x-ratelimit-reset-requests:
-      - 300ms
-      x-ratelimit-reset-tokens:
-      - 12.258s
-      x-request-id:
-      - 1c0d220cb7a5d43e8e407a3576681ad7
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/classic/forge/tests/vcr_cassettes/test_generate_aiconfig_automatic_fallback/test_generate_aiconfig_automatic_fallback.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_generate_aiconfig_automatic_fallback/test_generate_aiconfig_automatic_fallback.yaml
@@ -1,202 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"content": "\nYour task is to devise up to 5 highly effective
-      goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring
-      that the goals are optimally aligned with the successful completion of its assigned
-      task.\n\nThe user will provide the task, you will provide only the output in
-      the exact format specified below with no explanation or conversation.\n\nExample
-      input:\nHelp me with marketing my business\n\nExample output:\nName: CMOGPT\nDescription:
-      a professional digital marketer AI that assists Solopreneurs in growing their
-      businesses by providing world-class expertise in solving marketing problems
-      for SaaS, content products, agencies, and more.\nGoals:\n- Engage in effective
-      problem-solving, prioritization, planning, and supporting execution to address
-      your marketing needs as your virtual Chief Marketing Officer.\n\n- Provide specific,
-      actionable, and concise advice to help you make informed decisions without the
-      use of platitudes or overly wordy explanations.\n\n- Identify and prioritize
-      quick wins and cost-effective campaigns that maximize results with minimal time
-      and budget investment.\n\n- Proactively take the lead in guiding you and offering
-      suggestions when faced with unclear information or uncertainty to ensure your
-      marketing strategy remains on track.\n", "role": "system"}, {"content": "Task:
-      ''T&GF\u00a3OIBECC()!*''\nRespond only with the output in the exact format specified
-      in the system prompt, with no explanation or conversation.", "role": "user"}],
-      "model": "gpt-3.5-turbo", "temperature": 0.0}'
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1590'
-      Content-Type:
-      - application/json
-      X-Content-Hash:
-      - 0731f601a012ca7c6e2e389a9dc0ebe08f6a7e3bfb004b72d5c24da757085b68
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA0zOPW4CMRCG4T6nGE2TxqBlEWTxDVAo0iQFUYSMPbAG27OyZyEIcfcI8kPabzSP
-        3jN6hxpta8TGLgyent+a2PbbaJt6GV75cFwcFrt2Ppst1xYV8npHVn4+hpZjF0g8J1RoMxkhh3o0
-        bSajuprWlcLIjgJq3HYyGA8nA+nzmgfVuBqhwr6YLaE+Y5c5drIS3lMqqOumUni3/+0KhcWEv2Vc
-        NReFtmVvqaB+P2Ok8qtmDoQaTSm+iElybeQklK7988cIhXM+KZiDNSmxQJf54B2BScC9dL3A0UvL
-        vYABG8hkMMlBnxzlK+jMOhCIKfshvAQyhe4CHEzw7naEDWeIBMLwnQIn7m/yEC8KNz750q4ymcIJ
-        NRbhDhX65OgTdXX5uDx8AQAA//8DANQ76GClAQAA
-    headers:
-      Access-Control-Allow-Origin:
-      - '*'
-      Alt-Svc:
-      - h3=":443"; ma=86400
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Cache-Control:
-      - no-cache, must-revalidate
-      Cf-Cache-Status:
-      - DYNAMIC
-      Cf-Ray:
-      - 7cd797c7b8172d19-ORD
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 26 May 2023 17:03:45 GMT
-      Function-Execution-Id:
-      - u75a1w60qkmd
-      Openai-Model:
-      - gpt-3.5-turbo-0301
-      Openai-Organization:
-      - significant-gravitas
-      Openai-Processing-Ms:
-      - '4635'
-      Openai-Version:
-      - '2020-10-01'
-      Server:
-      - Google Frontend
-      Strict-Transport-Security:
-      - max-age=15724800; includeSubDomains
-      Vary:
-      - Accept-Encoding
-      X-Cloud-Trace-Context:
-      - 6444596b1ff59b706e529742bac3a855;o=1
-      X-Powered-By:
-      - Express
-      X-Ratelimit-Limit-Requests:
-      - '3500'
-      X-Ratelimit-Limit-Tokens:
-      - '90000'
-      X-Ratelimit-Remaining-Requests:
-      - '3499'
-      X-Ratelimit-Remaining-Tokens:
-      - '89626'
-      X-Ratelimit-Reset-Requests:
-      - 17ms
-      X-Ratelimit-Reset-Tokens:
-      - 248ms
-      X-Request-Id:
-      - 8328aeaa91a9fdfa98e92438444f1258
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"content": "\nYour task is to devise up to 5 highly effective
-      goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring
-      that the goals are optimally aligned with the successful completion of its assigned
-      task.\n\nThe user will provide the task, you will provide only the output in
-      the exact format specified below with no explanation or conversation.\n\nExample
-      input:\nHelp me with marketing my business\n\nExample output:\nName: CMOGPT\nDescription:
-      a professional digital marketer AI that assists Solopreneurs in growing their
-      businesses by providing world-class expertise in solving marketing problems
-      for SaaS, content products, agencies, and more.\nGoals:\n- Engage in effective
-      problem-solving, prioritization, planning, and supporting execution to address
-      your marketing needs as your virtual Chief Marketing Officer.\n\n- Provide specific,
-      actionable, and concise advice to help you make informed decisions without the
-      use of platitudes or overly wordy explanations.\n\n- Identify and prioritize
-      quick wins and cost-effective campaigns that maximize results with minimal time
-      and budget investment.\n\n- Proactively take the lead in guiding you and offering
-      suggestions when faced with unclear information or uncertainty to ensure your
-      marketing strategy remains on track.\n", "role": "system"}, {"content": "Task:
-      ''T&GF\u00a3OIBECC()!*''\nRespond only with the output in the exact format specified
-      in the system prompt, with no explanation or conversation.", "role": "user"}],
-      "model": "gpt-4-0314", "temperature": 0.0}'
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1587'
-      Content-Type:
-      - application/json
-      X-Content-Hash:
-      - 4cadf80f0732cd8436f50227b74845f28c012766a5c6c6e832045b5a064e3a1c
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA1xUzW7cRgy++ylYHZq2WBtrx02cvblOasRA2gDxoWhdBPQMV2I9IqdDal058NP0
-        TfpkwUi73qQXYTQkv+/j33w6AGg4NitoQoce+pwOX97Hy+VVoauTv9vf9c2PmdOr4bfL8+Xy6qFt
-        FjVCb/+i4Luoo6B9TuSsMptDIXSqqMcvXp2eLM9Ozk4nQ6+RUg1rsx+eHi6fH59uIzrlQNas4I8D
-        AIBP07dqk0j/NCtYLnY3PZlhS83qyQmgKZrqTYNmbI7izWJvDCpOMsn9BXtawUUZs3O4Rrv7oGlD
-        5fL99Y28JguFc01jBSiAcYMSKML5W4gUOHdUWFrAlsTBO3SwTIEx8QMZsMAgkUqlj5OfRDBNm3oO
-        MyM42p0tIGgkW0weeXh4SGRwz95BLhTYWGUy0XrNgUnCeHQjl4rJVjdyCOchDAWd0lhlaaTJmcWp
-        5EJVGX3Ft4Bn199e/vzfv7++/enNxcV333/zw7MFuEIkp9KzELDbLD6NVW1PKLsM8lCyGh3dSOV+
-        TRtKmgGh9rxQR2K8oZ1cCl7/zKu+dqwkGGMhs0nVrIbEhrmQKUGhRBsUB6y1dAMsBEHFOFKhOAFv
-        IShuRVyoOMugg6URMGL2ya3QuuZSiTDnohg6uEWjCFM9RxC6B5a1lh5rk0ELsBi3ndvczh5HwMJG
-        EGeFFSsXDWS25X5fdMORICTCUhspgY3mVmKosHibCAoF7XuSODEZrLWADaECrYeURtitzJbkf5XR
-        7NxjAkzcSl/HbRqP6hnJuBZGBw/a7/ryDlkcWQCh47aDVPsEugachiWMiy+mabEtV2K85cQ+gndF
-        h7bTYZ4eEufylPkXuqpxnwbs975S7fI4arar9/i0s2sWtu5jITSVuofmmme3xwOAP6c3YPhqrZtc
-        tM/+0fWOpD4MJy/PZrxmT7u3Hr/YvhCNq2PaG06fnx1UlseDzwAAAP//AwAeumMI7AQAAA==
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 803a4292be81064d-IAD
-      Cache-Control:
-      - no-cache, must-revalidate
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 08 Sep 2023 21:25:03 GMT
-      Server:
-      - cloudflare
-      access-control-allow-origin:
-      - '*'
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-model:
-      - gpt-4-0314
-      openai-organization:
-      - significant-gravitas
-      openai-processing-ms:
-      - '18455'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15724800; includeSubDomains
-      x-ratelimit-limit-requests:
-      - '200'
-      x-ratelimit-limit-tokens:
-      - '40000'
-      x-ratelimit-remaining-requests:
-      - '199'
-      x-ratelimit-remaining-tokens:
-      - '24247'
-      x-ratelimit-reset-requests:
-      - 300ms
-      x-ratelimit-reset-tokens:
-      - 23.629s
-      x-request-id:
-      - aa451d1c6c5532d93e60c287fd091162
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/classic/forge/tests/vcr_cassettes/test_generate_aiconfig_automatic_typical/test_generate_aiconfig_automatic_typical.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_generate_aiconfig_automatic_typical/test_generate_aiconfig_automatic_typical.yaml
@@ -1,212 +0,0 @@
-interactions:
- request:
-    body: '{"messages": [{"content": "\nYour task is to devise up to 5 highly effective
-      goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring
-      that the goals are optimally aligned with the successful completion of its assigned
-      task.\n\nThe user will provide the task, you will provide only the output in
-      the exact format specified below with no explanation or conversation.\n\nExample
-      input:\nHelp me with marketing my business\n\nExample output:\nName: CMOGPT\nDescription:
-      a professional digital marketer AI that assists Solopreneurs in growing their
-      businesses by providing world-class expertise in solving marketing problems
-      for SaaS, content products, agencies, and more.\nGoals:\n- Engage in effective
-      problem-solving, prioritization, planning, and supporting execution to address
-      your marketing needs as your virtual Chief Marketing Officer.\n\n- Provide specific,
-      actionable, and concise advice to help you make informed decisions without the
-      use of platitudes or overly wordy explanations.\n\n- Identify and prioritize
-      quick wins and cost-effective campaigns that maximize results with minimal time
-      and budget investment.\n\n- Proactively take the lead in guiding you and offering
-      suggestions when faced with unclear information or uncertainty to ensure your
-      marketing strategy remains on track.\n", "role": "system"}, {"content": "Task:
-      ''Help me create a rock opera about cybernetic giraffes''\nRespond only with
-      the output in the exact format specified in the system prompt, with no explanation
-      or conversation.", "role": "user"}], "model": "gpt-3.5-turbo", "temperature":
-      0.0}'
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1623'
-      Content-Type:
-      - application/json
-      X-Content-Hash:
-      - 85e7cf25528e4015877bc3db5bbf9674c92e7d77d5476788f64bbf697481c3c0
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA3SUQW/cRgyF7/0VxJy1C69j165uToIkRZHGaIoeEhcBd0RJzI6GkyFn7Y3h/16M
-        tI59SK8jDvm99zi6d9y51vkRzU8prC7++Ofy+tXh9cj7D3R1dv79/fXu9v2pvPz05tsn1zjZfiVv
-        xxtrL1MKZCzRNc5nQqPOtZtfL883pyfnv100bpKOgmvdkGz1Yn2+spK3sjp5cbJxjSuKA7n23qUs
-        U7IvJjuK6trTy7PGPfX+cb65vGiciWH4cXR2sXlonB+FPalrP9+7ifSxbZZArnWoymoYrUJKNIpV
-        wJ84UQt/id99SG+v/76Jr0l95lQntoARrn6HWRLvCbxELaH2ABvRQBN5xsDfSYEjjBQSxwEwG6sp
-        YOxgKsqeMSpsc/1mI3GGEvlbIdizskQFEwjc0/qJA1iX7j17DOEAHSkPkbpauyipE22kI51EkB6y
-        +B1IoozaQMqy564OpbtE2WAo3GH0NINpSUlyFZKlDKMUm7tRNM70JDll8aS6volvBYO2N3EFryQE
-        3EpGI7hlG+EgpXJ1tKcgCRBqahRCna0m+RA40mKZx2Qlky7DVKnySA/+sKUcydjDwBn7nhb/LGPU
-        gFYdrpKr/GcyoZc8oa0r1tX/2jLRJBm3gcCPmNEbZW3Ao/nxACpx0GYeRnHAoUJ3jEGGcmS+5RBg
-        R5QAS8eVWGcdvK+bDn2WCdSwminQc2QdZ6DrOQCCnqjbot8dfR8GUptzlwVV9pQxBFDLxVd35sKE
-        vqJIP9c8k2wCFHUuAy8jac2p3uApobe+hCXwmbQaNDd4RJ/J3lFIc2oR9zzUHGuJkR9j3TfAunum
-        dXjK0pWZ5LnvDXD0oXRLwjjQcUEbCDyMxnFYHFUp1de+r+2air78HwDBcgkH4GmiPCt4Yp4RPx73
-        s1JyrBiT2IxRXxXmHdnxPf3cmwnveCpTbSv66Kmx35GBYiBdu4fGLWl9yYQq0bVOTZJrHMeO7lx7
-        8vDvwy//AQAA//8DAJMll+IcBQAA
-    headers:
-      Access-Control-Allow-Origin:
-      - '*'
-      Alt-Svc:
-      - h3=":443"; ma=86400
-      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
-      Cache-Control:
-      - no-cache, must-revalidate
-      Cf-Cache-Status:
-      - DYNAMIC
-      Cf-Ray:
-      - 7cd79738dd8022d0-ORD
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 26 May 2023 17:03:38 GMT
-      Function-Execution-Id:
-      - u75anm04ybm4
-      Openai-Model:
-      - gpt-3.5-turbo-0301
-      Openai-Organization:
-      - significant-gravitas
-      Openai-Processing-Ms:
-      - '20744'
-      Openai-Version:
-      - '2020-10-01'
-      Server:
-      - Google Frontend
-      Strict-Transport-Security:
-      - max-age=15724800; includeSubDomains
-      Vary:
-      - Accept-Encoding
-      X-Cloud-Trace-Context:
-      - c402bf3036960902e1be5293361e5c65;o=1
-      X-Powered-By:
-      - Express
-      X-Ratelimit-Limit-Requests:
-      - '3500'
-      X-Ratelimit-Limit-Tokens:
-      - '90000'
-      X-Ratelimit-Remaining-Requests:
-      - '3499'
-      X-Ratelimit-Remaining-Tokens:
-      - '89617'
-      X-Ratelimit-Reset-Requests:
-      - 17ms
-      X-Ratelimit-Reset-Tokens:
-      - 254ms
-      X-Request-Id:
-      - b44e6c90eb755e0bef60a3f2b20c1e10
-    status:
-      code: 200
-      message: OK
- request:
-    body: '{"messages": [{"content": "\nYour task is to devise up to 5 highly effective
-      goals and an appropriate role-based name (_GPT) for an autonomous agent, ensuring
-      that the goals are optimally aligned with the successful completion of its assigned
-      task.\n\nThe user will provide the task, you will provide only the output in
-      the exact format specified below with no explanation or conversation.\n\nExample
-      input:\nHelp me with marketing my business\n\nExample output:\nName: CMOGPT\nDescription:
-      a professional digital marketer AI that assists Solopreneurs in growing their
-      businesses by providing world-class expertise in solving marketing problems
-      for SaaS, content products, agencies, and more.\nGoals:\n- Engage in effective
-      problem-solving, prioritization, planning, and supporting execution to address
-      your marketing needs as your virtual Chief Marketing Officer.\n\n- Provide specific,
-      actionable, and concise advice to help you make informed decisions without the
-      use of platitudes or overly wordy explanations.\n\n- Identify and prioritize
-      quick wins and cost-effective campaigns that maximize results with minimal time
-      and budget investment.\n\n- Proactively take the lead in guiding you and offering
-      suggestions when faced with unclear information or uncertainty to ensure your
-      marketing strategy remains on track.\n", "role": "system"}, {"content": "Task:
-      ''Help me create a rock opera about cybernetic giraffes''\nRespond only with
-      the output in the exact format specified in the system prompt, with no explanation
-      or conversation.", "role": "user"}], "model": "gpt-4-0314", "temperature": 0.0}'
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '1620'
-      Content-Type:
-      - application/json
-      X-Content-Hash:
-      - 37e220315181f2969a9b7eb6fa5df305799392e544329f68fdae60fe01200a18
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: !!binary |
-        H4sIAAAAAAAAA2xUwW4bRwy96yuIPcuG7SqxrVtSJ0EuTtH2YtRFMJrl7jKaJaccrmw18L8HnJUt
-        t8hFwA4f+R4fKX5fADTUNmto4hAsjjmdXD60H/sP57fDH61+vrq7K+++fHz/5tvdba9y3Sw9Qzbf
-        MNpz1mmUMSc0Ep7DUTEYetXzt9eri7Ori7erGhilxeRpfbaT1cnZL+erQ8YgFLE0a/hrAQDwvf66
-        Nm7xsVnD2fL5ZcRSQo/N+gUE0Kgkf2lCKVQssDXLYzAKG3KVextGXMPvErdfMmr49Nuf93yDJSpl
-        l7+GAFU87RACt0DMsps/330G71MKKtgQDErGSCHRv1iAGKKGzoh7mJj+meZ05D70/qYStyBOWZbw
-        QDZAgE7iVEAYNgm5dRSNjp7pbMARS60SQzZyFdxDMdG9YUrE/ek9f5KQyvqeT+AGd5gkewMy5hkA
-        HFTnchHZULGFoDJ5zf0GldEoQk8aug7L3NUzmXNPLSHHgwp8zEm0wl7USQeGcWBJ0u+XwMEmxWWF
-        24CkQM5aMLq5p/fsQn+dTYQRR9GwSbNT7Z7DSBHGqVA8KJFxQ3wgREbt905YrawWHoQEU4ohkdVw
-        NXkJyEPg6B44qro2e1mnyz0EBhpH1OLu4GNGrb2+iEwpbESD4cy1lwlMALlMiv6lx0XZUSFhoALd
-        lNIeFOtWtEsgjqLZyzhlzaIWw2xoh9huQtyCDSpTP8hkVWxWiVjKQckNFurZOabgxYspbecGfD13
-        yFVDsdAjYMIR2Q6j3Ohz/w+iqXV3fjp2gUQdLl9587/hHI3KqJ3oGI5OfZgdcZaOOKRXq+6dtFOd
-        vZuTJVEZ3JYoA3q5eVOySofFLawN4iPGyRymWKZUBRH7Xr/6G3jexJ1oj2ZV53GE0Iked/e0ORyC
-        p5cL0hFTGb4qhiLsV6GY5Bn2tAD4u16k6T9HpskqY7avJltkP1MXVxdzveZ4/I7R88vLQ9TEQjoG
-        Vm+uF87ytPgBAAD//wMAUi35pHoFAAA=
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 803a42163cee084c-IAD
-      Cache-Control:
-      - no-cache, must-revalidate
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Fri, 08 Sep 2023 21:24:44 GMT
-      Server:
-      - cloudflare
-      access-control-allow-origin:
-      - '*'
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-model:
-      - gpt-4-0314
-      openai-organization:
-      - significant-gravitas
-      openai-processing-ms:
-      - '19598'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=15724800; includeSubDomains
-      x-ratelimit-limit-requests:
-      - '200'
-      x-ratelimit-limit-tokens:
-      - '40000'
-      x-ratelimit-remaining-requests:
-      - '199'
-      x-ratelimit-remaining-tokens:
-      - '27304'
-      x-ratelimit-reset-requests:
-      - 300ms
-      x-ratelimit-reset-tokens:
-      - 19.043s
-      x-request-id:
-      - 8dba4e8ded88e2d7797cf85ea3dae37d
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/classic/forge/tests/vcr_cassettes/test_information_retrieval_challenge_a/test_information_retrieval_challenge_a.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_information_retrieval_challenge_a/test_information_retrieval_challenge_a.yaml
--- a/classic/forge/tests/vcr_cassettes/test_information_retrieval_challenge_b/test_information_retrieval_challenge_b.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_information_retrieval_challenge_b/test_information_retrieval_challenge_b.yaml
--- a/classic/forge/tests/vcr_cassettes/test_json_memory_get_relevant/test_json_memory_get_relevant.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_json_memory_get_relevant/test_json_memory_get_relevant.yaml
--- a/classic/forge/tests/vcr_cassettes/test_memory_challenge_a/test_memory_challenge_a.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_memory_challenge_a/test_memory_challenge_a.yaml
--- a/classic/forge/tests/vcr_cassettes/test_memory_challenge_c/test_memory_challenge_c.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_memory_challenge_c/test_memory_challenge_c.yaml
--- a/classic/forge/tests/vcr_cassettes/test_write_file/test_write_file.yaml
+++ b/classic/forge/tests/vcr_cassettes/test_write_file/test_write_file.yaml
--- a/classic/original_autogpt/tests/integration/test_strategy_benchmark.py
+++ b/classic/original_autogpt/tests/integration/test_strategy_benchmark.py
@@ -1,12 +1,12 @@
-"""Pytest wrapper for strategy benchmark test harness.
+"""Pytest wrapper for direct_benchmark harness.

-This provides CI-friendly integration of the strategy benchmark,
+This provides CI-friendly integration of the direct_benchmark harness,
 allowing it to be run as part of the pytest suite.

 Usage:
-    # Run tests that don't need an agent (--help, --compare-only, etc.)
+    # Run tests that don't need an agent (--help, invalid args, etc.)
    poetry run pytest tests/integration/test_strategy_benchmark.py \
-        -v -k "help or invalid or compare"
+        -v -k "help or invalid"

    # Run full tests (requires API keys and agent to be configured)
    poetry run pytest tests/integration/test_strategy_benchmark.py -v
@@ -49,25 +49,25 @@ requires_agent = pytest.mark.skipif(
 )


-def get_project_root() -> Path:
-    """Get the original_autogpt project root directory."""
-    return Path(__file__).parent.parent.parent
+def get_direct_benchmark_dir() -> Path:
+    """Get the direct_benchmark directory."""
+    return Path(__file__).parent.parent.parent.parent / "direct_benchmark"


 def run_harness(*args: str, timeout: int = 600) -> subprocess.CompletedProcess:
-    """Run the test harness with given arguments.
+    """Run the direct_benchmark harness with given arguments.

    Args:
-        *args: Arguments to pass to test_prompt_strategies.py
+        *args: Arguments to pass to direct_benchmark run command
        timeout: Timeout in seconds (default: 10 minutes)

    Returns:
        CompletedProcess with stdout/stderr captured
    """
-    cmd = [sys.executable, "agbenchmark_config/test_prompt_strategies.py", *args]
+    cmd = [sys.executable, "-m", "direct_benchmark", "run", *args]
    return subprocess.run(
        cmd,
-        cwd=get_project_root(),
+        cwd=get_direct_benchmark_dir(),
        capture_output=True,
        text=True,
        timeout=timeout,
@@ -79,13 +79,18 @@ def test_strategy_comparison_quick():
    """Run quick strategy comparison as CI smoke test.

    This test:
-    1. Starts the agent with each strategy
-    2. Runs interface tests (fastest category)
-    3. Verifies at least one strategy produces passing results
+    1. Starts the agent with one_shot strategy
+    2. Runs general category tests
+    3. Verifies at least one test produces passing results

    Note: Requires API keys to be configured in environment.
    """
-    result = run_harness("--quick")
+    result = run_harness(
+        "--strategies", "one_shot",
+        "--categories", "general",
+        "-N", "1",
+        "--tests", "ReadFile",  # Single fast test for smoke testing
+    )

    # Print output for debugging
    print(result.stdout)
@@ -99,41 +104,17 @@ def test_strategy_comparison_quick():
    )


-def test_harness_compare_only():
-    """Test that compare-only mode works with existing reports.
-
-    This test doesn't run any benchmarks, just verifies the report
-    comparison logic works correctly.
-    """
-    result = run_harness("--compare-only")
-
-    # Print output for debugging
-    print(result.stdout)
-    if result.stderr:
-        print("STDERR:", result.stderr)
-
-    # compare-only returns 0 if it can read reports (even if empty)
-    # It returns 1 only if there's an actual error
-    # We check that it ran without crashing
-    assert "PROMPT STRATEGY" in result.stdout or result.returncode in (0, 1), (
-        f"Harness crashed unexpectedly\n"
-        f"stdout: {result.stdout[-2000:]}\n"
-        f"stderr: {result.stderr[-500:]}"
-    )
-
-
@requires_agent
 def test_single_strategy():
-    """Test running a single strategy with interface tests.
+    """Test running a single strategy with coding tests.

    This is a more focused test that only runs one_shot strategy
    to verify basic functionality without testing all strategies.
    """
    result = run_harness(
-        "--strategies",
-        "one_shot",
-        "--categories",
-        "interface",
+        "--strategies", "one_shot",
+        "--categories", "coding",
+        "--tests", "ReadFile,WriteFile",
    )

    # Print output for debugging
@@ -154,7 +135,7 @@ def test_harness_help():

    assert result.returncode == 0, "Harness --help should return 0"
    assert "strategies" in result.stdout.lower(), "Help should mention strategies"
-    assert "quick" in result.stdout.lower(), "Help should mention quick mode"
+    assert "categories" in result.stdout.lower(), "Help should mention categories"


 def test_harness_invalid_strategy():
@@ -162,4 +143,6 @@ def test_harness_invalid_strategy():
    result = run_harness("--strategies", "invalid_strategy", timeout=30)

    assert result.returncode != 0, "Invalid strategy should return non-zero"
-    assert "invalid" in result.stdout.lower(), "Should mention invalid strategy"
+    # Error message may be in stdout or stderr depending on the CLI framework
+    combined_output = (result.stdout + result.stderr).lower()
+    assert "invalid" in combined_output, "Should mention invalid strategy"