Files
AutoGPT/classic/direct_benchmark/challenges/__init__.py
Nicholas Tindle 804430e243 refactor(classic): migrate from agbenchmark to direct_benchmark harness
- Remove old benchmark/ folder with agbenchmark framework
- Move challenges to direct_benchmark/challenges/
- Move analysis tools (analyze_reports.py, analyze_failures.py) to direct_benchmark/
- Move challenges_already_beaten.json to direct_benchmark/
- Update CI workflow to use direct_benchmark
- Update CLAUDE.md files with new benchmarking instructions
- Add benchmarking section to original_autogpt/CLAUDE.md

The direct_benchmark harness directly instantiates agents without HTTP
server overhead, enabling parallel execution with asyncio semaphore.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 22:29:51 -06:00

57 lines
1.6 KiB
Python

import glob
import json
import logging
from pathlib import Path
from .base import BaseChallenge, ChallengeInfo
from .builtin import OPTIONAL_CATEGORIES
logger = logging.getLogger(__name__)
def get_challenge_from_source_uri(source_uri: str) -> type[BaseChallenge]:
from .builtin import BuiltinChallenge
from .webarena import WebArenaChallenge
provider_prefix = source_uri.split("/", 1)[0]
if provider_prefix == BuiltinChallenge.SOURCE_URI_PREFIX:
return BuiltinChallenge.from_source_uri(source_uri)
if provider_prefix == WebArenaChallenge.SOURCE_URI_PREFIX:
return WebArenaChallenge.from_source_uri(source_uri)
raise ValueError(f"Cannot resolve source_uri '{source_uri}'")
def get_unique_categories() -> set[str]:
"""
Reads all challenge spec files and returns a set of all their categories.
"""
categories = set()
challenges_dir = Path(__file__).parent
glob_path = f"{challenges_dir}/**/data.json"
for data_file in glob.glob(glob_path, recursive=True):
with open(data_file, "r") as f:
try:
challenge_data = json.load(f)
categories.update(challenge_data.get("category", []))
except json.JSONDecodeError:
logger.error(f"Error: {data_file} is not a valid JSON file.")
continue
except IOError:
logger.error(f"IOError: file could not be read: {data_file}")
continue
return categories
__all__ = [
"BaseChallenge",
"ChallengeInfo",
"get_unique_categories",
"OPTIONAL_CATEGORIES",
]