mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(classic): add external benchmark adapters for GAIA, SWE-bench, and AgentBench
Integrate standard AI agent benchmarks into the direct_benchmark infrastructure
using a plugin-based adapter pattern:
- Add BenchmarkAdapter base class with setup(), load_challenges(), and evaluate()
- Implement GAIAAdapter for the GAIA benchmark (requires HF token)
- Implement SWEBenchAdapter for SWE-bench (requires Docker)
- Implement AgentBenchAdapter for AgentBench multi-environment benchmark
- Extend HarnessConfig with benchmark options (--benchmark, --benchmark-split, etc.)
- Modify ParallelExecutor to use adapter's evaluate() for external benchmarks
- Fix runner to record finish step (was being skipped, breaking answer extraction)
- Add optional benchmarks dependency group with datasets and huggingface-hub
- Increase default benchmark timeout to 900s
Usage:
poetry run direct-benchmark run \
--benchmark agent-bench \
--benchmark-subset dbbench \
--strategies one_shot \
--models claude
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -201,6 +201,35 @@ def cli():
|
||||
is_flag=True,
|
||||
help="Enable debug output.",
|
||||
)
|
||||
@click.option(
|
||||
"--benchmark",
|
||||
"-b",
|
||||
"external_benchmark",
|
||||
default=None,
|
||||
help="Run external benchmark (gaia, swe-bench, agent-bench).",
|
||||
)
|
||||
@click.option(
|
||||
"--benchmark-split",
|
||||
default="validation",
|
||||
help="Benchmark split (train, validation, test). Default: validation.",
|
||||
)
|
||||
@click.option(
|
||||
"--benchmark-subset",
|
||||
default=None,
|
||||
help="Benchmark subset (difficulty level '1', repo name, environment).",
|
||||
)
|
||||
@click.option(
|
||||
"--benchmark-limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Maximum number of benchmark challenges to load.",
|
||||
)
|
||||
@click.option(
|
||||
"--benchmark-cache-dir",
|
||||
type=click.Path(path_type=Path),
|
||||
default=None,
|
||||
help="Cache directory for benchmark datasets.",
|
||||
)
|
||||
def run(
|
||||
strategies: str,
|
||||
models: str,
|
||||
@@ -231,6 +260,11 @@ def run(
|
||||
reset_models: tuple[str, ...],
|
||||
reset_challenges: tuple[str, ...],
|
||||
debug: bool,
|
||||
external_benchmark: Optional[str],
|
||||
benchmark_split: str,
|
||||
benchmark_subset: Optional[str],
|
||||
benchmark_limit: Optional[int],
|
||||
benchmark_cache_dir: Optional[Path],
|
||||
):
|
||||
"""Run benchmarks with specified configurations."""
|
||||
# Handle timeout/cutoff options
|
||||
@@ -254,15 +288,18 @@ def run(
|
||||
console.print(f"Available: {list(MODEL_PRESETS.keys())}")
|
||||
sys.exit(1)
|
||||
|
||||
# Find challenges directory
|
||||
if challenges_dir is None:
|
||||
# Find challenges directory (not required for external benchmarks)
|
||||
if challenges_dir is None and not external_benchmark:
|
||||
challenges_dir = find_challenges_dir()
|
||||
if challenges_dir is None:
|
||||
console.print(
|
||||
"[red]Could not find challenges directory. "
|
||||
"Please specify with --challenges-dir[/red]"
|
||||
"Please specify with --challenges-dir or use --benchmark[/red]"
|
||||
)
|
||||
sys.exit(1)
|
||||
elif challenges_dir is None:
|
||||
# External benchmark - use a placeholder path
|
||||
challenges_dir = Path(".")
|
||||
|
||||
# Set up paths
|
||||
if workspace is None:
|
||||
@@ -308,6 +345,11 @@ def run(
|
||||
reset_strategies=list(reset_strategies) if reset_strategies else None,
|
||||
reset_models=list(reset_models) if reset_models else None,
|
||||
reset_challenges=list(reset_challenges) if reset_challenges else None,
|
||||
external_benchmark=external_benchmark,
|
||||
benchmark_split=benchmark_split,
|
||||
benchmark_subset=benchmark_subset,
|
||||
benchmark_limit=benchmark_limit,
|
||||
benchmark_cache_dir=benchmark_cache_dir,
|
||||
)
|
||||
|
||||
# Determine UI mode
|
||||
@@ -331,7 +373,15 @@ def run(
|
||||
console.print(f"Strategies: {strategy_list}")
|
||||
console.print(f"Models: {model_list}")
|
||||
console.print(f"Parallel: {parallel}")
|
||||
console.print(f"Challenges: {challenges_dir}")
|
||||
if external_benchmark:
|
||||
console.print(f"Benchmark: [cyan]{external_benchmark}[/cyan]")
|
||||
console.print(f" Split: {benchmark_split}")
|
||||
if benchmark_subset:
|
||||
console.print(f" Subset: {benchmark_subset}")
|
||||
if benchmark_limit:
|
||||
console.print(f" Limit: {benchmark_limit}")
|
||||
else:
|
||||
console.print(f"Challenges: {challenges_dir}")
|
||||
if categories:
|
||||
console.print(f"Categories: {categories}")
|
||||
if skip_categories:
|
||||
@@ -441,6 +491,56 @@ def list_strategies():
|
||||
console.print(f" - {s}")
|
||||
|
||||
|
||||
@cli.command()
|
||||
def list_benchmarks():
|
||||
"""List available external benchmarks."""
|
||||
from .adapters import list_adapters
|
||||
|
||||
console.print("\n[bold]Available External Benchmarks[/bold]\n")
|
||||
|
||||
benchmarks = list_adapters()
|
||||
if not benchmarks:
|
||||
console.print("[dim]No benchmarks registered.[/dim]")
|
||||
return
|
||||
|
||||
benchmark_info = {
|
||||
"gaia": {
|
||||
"name": "GAIA",
|
||||
"description": "General AI Assistant Benchmark - reasoning tasks",
|
||||
"splits": "validation, test",
|
||||
"subsets": "1 (easy), 2 (medium), 3 (hard)",
|
||||
"requires": "HF token (gated dataset)",
|
||||
},
|
||||
"swe-bench": {
|
||||
"name": "SWE-bench",
|
||||
"description": "Software Engineering Benchmark - GitHub issues",
|
||||
"splits": "dev, test",
|
||||
"subsets": "full, lite, verified, or repo name",
|
||||
"requires": "Docker, swebench package",
|
||||
},
|
||||
"agent-bench": {
|
||||
"name": "AgentBench",
|
||||
"description": "Multi-environment agent benchmark",
|
||||
"splits": "dev, test",
|
||||
"subsets": "os, db, kg, card_game, ltp, web_shopping, ...",
|
||||
"requires": "Varies by environment (Docker for os)",
|
||||
},
|
||||
}
|
||||
|
||||
for name in sorted(benchmarks):
|
||||
info = benchmark_info.get(name, {})
|
||||
console.print(f"[cyan]{name}[/cyan]:")
|
||||
if info.get("description"):
|
||||
console.print(f" {info['description']}")
|
||||
if info.get("splits"):
|
||||
console.print(f" Splits: {info['splits']}")
|
||||
if info.get("subsets"):
|
||||
console.print(f" Subsets: {info['subsets']}")
|
||||
if info.get("requires"):
|
||||
console.print(f" Requires: {info['requires']}")
|
||||
console.print()
|
||||
|
||||
|
||||
@cli.group()
|
||||
def state():
|
||||
"""Manage saved benchmark state (resume/reset)."""
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
"""Benchmark adapter registry and factory."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
# Registry of benchmark adapters
|
||||
_ADAPTER_REGISTRY: dict[str, type["BenchmarkAdapter"]] = {} # noqa: F821
|
||||
|
||||
|
||||
def register_adapter(name: str):
|
||||
"""Decorator to register a benchmark adapter.
|
||||
|
||||
Usage:
|
||||
@register_adapter("gaia")
|
||||
class GAIAAdapter(BenchmarkAdapter):
|
||||
...
|
||||
"""
|
||||
|
||||
def decorator(cls: type["BenchmarkAdapter"]) -> type["BenchmarkAdapter"]:
|
||||
_ADAPTER_REGISTRY[name.lower()] = cls
|
||||
return cls
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def get_adapter(name: str) -> Optional[type["BenchmarkAdapter"]]:
|
||||
"""Get an adapter class by name.
|
||||
|
||||
Args:
|
||||
name: The benchmark name (e.g., "gaia", "swe-bench", "agent-bench").
|
||||
|
||||
Returns:
|
||||
The adapter class, or None if not found.
|
||||
"""
|
||||
return _ADAPTER_REGISTRY.get(name.lower())
|
||||
|
||||
|
||||
def list_adapters() -> list[str]:
|
||||
"""List all registered adapter names."""
|
||||
return list(_ADAPTER_REGISTRY.keys())
|
||||
|
||||
|
||||
# Import adapters to trigger registration
|
||||
# These imports are at the bottom to avoid circular imports
|
||||
from .agent_bench import AgentBenchAdapter # noqa: E402, F401
|
||||
from .base import BenchmarkAdapter # noqa: E402, F401
|
||||
from .gaia import GAIAAdapter # noqa: E402, F401
|
||||
from .swe_bench import SWEBenchAdapter # noqa: E402, F401
|
||||
|
||||
__all__ = [
|
||||
"BenchmarkAdapter",
|
||||
"GAIAAdapter",
|
||||
"SWEBenchAdapter",
|
||||
"AgentBenchAdapter",
|
||||
"register_adapter",
|
||||
"get_adapter",
|
||||
"list_adapters",
|
||||
]
|
||||
@@ -0,0 +1,684 @@
|
||||
"""AgentBench adapter.
|
||||
|
||||
AgentBench evaluates LLMs as agents across diverse real-world environments:
|
||||
Operating System, Database, Knowledge Graph, Card Game, Lateral Thinking,
|
||||
Web Shopping, Web Browsing, and ALFWorld.
|
||||
|
||||
GitHub: https://github.com/THUDM/AgentBench
|
||||
Paper: https://arxiv.org/abs/2308.03688
|
||||
|
||||
Requires:
|
||||
- Docker (for OS environment)
|
||||
- Database drivers (for DB environment)
|
||||
- Environment-specific dependencies
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from ..models import Challenge, ChallengeResult
|
||||
from . import register_adapter
|
||||
from .base import BenchmarkAdapter
|
||||
|
||||
|
||||
@register_adapter("agent-bench")
|
||||
class AgentBenchAdapter(BenchmarkAdapter):
|
||||
"""Adapter for the AgentBench benchmark.
|
||||
|
||||
AgentBench includes 8 distinct environments:
|
||||
- os: Operating system tasks in Docker sandbox
|
||||
- db: Database query tasks (SQLite/PostgreSQL)
|
||||
- kg: Knowledge graph reasoning
|
||||
- card_game: Card game strategy (24-point, etc.)
|
||||
- ltp: Lateral thinking puzzles
|
||||
- web_shopping: WebShop navigation
|
||||
- web_browsing: Real web navigation
|
||||
- alfworld: Embodied agent tasks
|
||||
|
||||
Start with: os, db, kg, card_game, ltp (minimal infrastructure)
|
||||
|
||||
Usage:
|
||||
adapter = AgentBenchAdapter(subset="os")
|
||||
for challenge in adapter.load_challenges():
|
||||
# Run challenge...
|
||||
"""
|
||||
|
||||
name = "agent-bench"
|
||||
description = "AgentBench - Multi-Environment Agent Benchmark"
|
||||
|
||||
GITHUB_REPO = "THUDM/AgentBench"
|
||||
|
||||
# Environment definitions with requirements
|
||||
# Directory names match the actual AgentBench repo structure
|
||||
ENVIRONMENTS: dict[str, dict[str, Any]] = {
|
||||
"os_interaction": {
|
||||
"name": "Operating System",
|
||||
"difficulty": "medium",
|
||||
"requires": ["docker"],
|
||||
"timeout_multiplier": 1.5,
|
||||
},
|
||||
"dbbench": {
|
||||
"name": "Database",
|
||||
"difficulty": "easy",
|
||||
"requires": [],
|
||||
"timeout_multiplier": 1.0,
|
||||
},
|
||||
"knowledgegraph": {
|
||||
"name": "Knowledge Graph",
|
||||
"difficulty": "medium",
|
||||
"requires": [],
|
||||
"timeout_multiplier": 1.0,
|
||||
},
|
||||
"lateralthinkingpuzzle": {
|
||||
"name": "Lateral Thinking Puzzle",
|
||||
"difficulty": "hard",
|
||||
"requires": [],
|
||||
"timeout_multiplier": 1.0,
|
||||
},
|
||||
"mind2web": {
|
||||
"name": "Mind2Web (Web Browsing)",
|
||||
"difficulty": "hard",
|
||||
"requires": ["playwright"],
|
||||
"timeout_multiplier": 3.0,
|
||||
},
|
||||
"alfworld": {
|
||||
"name": "ALFWorld",
|
||||
"difficulty": "hard",
|
||||
"requires": ["alfworld_server"],
|
||||
"timeout_multiplier": 2.0,
|
||||
},
|
||||
"avalon": {
|
||||
"name": "Avalon (Game)",
|
||||
"difficulty": "medium",
|
||||
"requires": [],
|
||||
"timeout_multiplier": 1.5,
|
||||
},
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache_dir: Optional[Path] = None,
|
||||
split: str = "test",
|
||||
subset: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
):
|
||||
"""Initialize the AgentBench adapter.
|
||||
|
||||
Args:
|
||||
cache_dir: Directory to cache the dataset.
|
||||
split: Dataset split - "dev" or "test".
|
||||
subset: Environment to use (os, db, kg, card_game, ltp, etc.).
|
||||
limit: Maximum number of challenges to load.
|
||||
"""
|
||||
super().__init__(cache_dir, split, subset, limit)
|
||||
self._tasks: dict[str, list[dict[str, Any]]] = {}
|
||||
self._repo_path: Optional[Path] = None
|
||||
|
||||
def setup(self) -> None:
|
||||
"""Clone/update AgentBench repository and load tasks."""
|
||||
self._repo_path = self.cache_dir / "agent_bench" / "repo"
|
||||
|
||||
# Clone or update repository
|
||||
if self._repo_path.exists():
|
||||
self._update_repo()
|
||||
else:
|
||||
self._clone_repo()
|
||||
|
||||
# Load tasks from repository
|
||||
self._load_tasks()
|
||||
|
||||
# Check environment requirements
|
||||
if self.subset:
|
||||
self._check_requirements(self.subset)
|
||||
|
||||
self._is_setup = True
|
||||
|
||||
def _clone_repo(self) -> None:
|
||||
"""Clone the AgentBench repository."""
|
||||
assert self._repo_path is not None # Set in setup()
|
||||
self._repo_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
"git",
|
||||
"clone",
|
||||
"--depth",
|
||||
"1",
|
||||
f"https://github.com/{self.GITHUB_REPO}.git",
|
||||
str(self._repo_path),
|
||||
],
|
||||
capture_output=True,
|
||||
timeout=300,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"Failed to clone AgentBench repository: {result.stderr.decode()}"
|
||||
)
|
||||
|
||||
def _update_repo(self) -> None:
|
||||
"""Update the AgentBench repository."""
|
||||
assert self._repo_path is not None # Set in setup()
|
||||
result = subprocess.run(
|
||||
["git", "pull", "--rebase"],
|
||||
cwd=str(self._repo_path),
|
||||
capture_output=True,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
# Pull failed, try fresh clone
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(self._repo_path)
|
||||
self._clone_repo()
|
||||
|
||||
def _load_tasks(self) -> None:
|
||||
"""Load tasks from the repository data files."""
|
||||
if self._repo_path is None:
|
||||
return
|
||||
|
||||
data_dir = self._repo_path / "data"
|
||||
|
||||
if not data_dir.exists():
|
||||
# Try alternative locations
|
||||
for alt_path in ["thudm_data", "tasks", "benchmarks"]:
|
||||
alt_dir = self._repo_path / alt_path
|
||||
if alt_dir.exists():
|
||||
data_dir = alt_dir
|
||||
break
|
||||
|
||||
# Load tasks for each environment
|
||||
for env_name in self.ENVIRONMENTS:
|
||||
env_dir = data_dir / env_name
|
||||
if not env_dir.exists():
|
||||
continue
|
||||
|
||||
self._tasks[env_name] = []
|
||||
|
||||
# Try JSON file first
|
||||
tasks_file = env_dir / f"{self.split}.json"
|
||||
if not tasks_file.exists():
|
||||
tasks_file = env_dir / "tasks.json"
|
||||
|
||||
if tasks_file.exists():
|
||||
with open(tasks_file) as f:
|
||||
self._tasks[env_name] = json.load(f)
|
||||
continue
|
||||
|
||||
# Try JSONL file (AgentBench format)
|
||||
jsonl_file = env_dir / f"{self.split}.jsonl"
|
||||
if not jsonl_file.exists():
|
||||
jsonl_file = env_dir / "standard.jsonl"
|
||||
|
||||
if jsonl_file.exists():
|
||||
with open(jsonl_file) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
self._tasks[env_name].append(json.loads(line))
|
||||
continue
|
||||
|
||||
# Try to load from individual task files
|
||||
for task_file in env_dir.glob("*.json"):
|
||||
if task_file.stem not in ("config", "metadata"):
|
||||
with open(task_file) as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
self._tasks[env_name].extend(data)
|
||||
else:
|
||||
self._tasks[env_name].append(data)
|
||||
|
||||
def _check_requirements(self, environment: str) -> None:
|
||||
"""Check if required dependencies are available."""
|
||||
env_config = self.ENVIRONMENTS.get(environment, {})
|
||||
requires = env_config.get("requires", [])
|
||||
|
||||
for req in requires:
|
||||
if req == "docker":
|
||||
self._check_docker()
|
||||
elif req == "playwright":
|
||||
self._check_playwright()
|
||||
# Other requirements can be checked as needed
|
||||
|
||||
def _check_docker(self) -> None:
|
||||
"""Verify Docker is available."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["docker", "info"],
|
||||
capture_output=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError("Docker is not running")
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError(
|
||||
"Docker is required for the OS environment. " "Install Docker first."
|
||||
)
|
||||
|
||||
def _check_playwright(self) -> None:
|
||||
"""Verify Playwright is available."""
|
||||
try:
|
||||
import playwright # noqa: F401
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Playwright is required for web_browsing environment. "
|
||||
"Install with: pip install playwright && playwright install"
|
||||
)
|
||||
|
||||
def load_challenges(self) -> Iterator[Challenge]:
|
||||
"""Load challenges from the AgentBench dataset.
|
||||
|
||||
Yields:
|
||||
Challenge objects for each AgentBench task.
|
||||
"""
|
||||
self.ensure_setup()
|
||||
|
||||
environments = [self.subset] if self.subset else list(self.ENVIRONMENTS.keys())
|
||||
|
||||
count = 0
|
||||
for env_name in environments:
|
||||
if env_name not in self._tasks:
|
||||
continue
|
||||
|
||||
for idx, task in enumerate(self._tasks[env_name]):
|
||||
# Apply limit
|
||||
if self.limit and count >= self.limit:
|
||||
return
|
||||
|
||||
challenge = self._convert_to_challenge(env_name, idx, task)
|
||||
yield challenge
|
||||
count += 1
|
||||
|
||||
def _convert_to_challenge(
|
||||
self, environment: str, idx: int, task: dict[str, Any]
|
||||
) -> Challenge:
|
||||
"""Convert an AgentBench task to a Challenge."""
|
||||
env_config = self.ENVIRONMENTS[environment]
|
||||
|
||||
# Extract task details (format varies by environment)
|
||||
task_id = task.get("id", task.get("task_id", f"{environment}_{idx}"))
|
||||
description = task.get(
|
||||
"description",
|
||||
task.get("task", task.get("instruction", task.get("question", ""))),
|
||||
)
|
||||
|
||||
# Build task string based on environment
|
||||
task_str = self._format_task(environment, description, task)
|
||||
|
||||
# Get difficulty
|
||||
difficulty = task.get("difficulty", env_config["difficulty"])
|
||||
|
||||
# Calculate timeout
|
||||
base_timeout = 300
|
||||
multiplier = env_config["timeout_multiplier"]
|
||||
cutoff = int(base_timeout * multiplier)
|
||||
|
||||
# Ground truth - extract expected answer based on environment format
|
||||
expected_answer = self._extract_expected_answer(environment, task)
|
||||
ground_truth: dict[str, Any] = {
|
||||
"eval": {"type": f"agent_bench_{environment}"},
|
||||
"environment": environment,
|
||||
"expected": expected_answer,
|
||||
"task_data": task,
|
||||
}
|
||||
|
||||
# Create artifacts directory
|
||||
artifacts_dir = self.cache_dir / "agent_bench" / "artifacts" / task_id
|
||||
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return Challenge(
|
||||
name=f"AgentBench_{task_id}",
|
||||
task=task_str,
|
||||
category=["agent-bench", f"agent-bench_{environment}"],
|
||||
difficulty=difficulty,
|
||||
cutoff=cutoff,
|
||||
ground_truth=ground_truth,
|
||||
artifacts_dir=artifacts_dir,
|
||||
source_path=artifacts_dir / "task.json",
|
||||
)
|
||||
|
||||
def _extract_expected_answer(self, environment: str, task: dict[str, Any]) -> str:
|
||||
"""Extract the expected answer from task based on environment format."""
|
||||
# dbbench format: answer in "label" field (list)
|
||||
if environment == "dbbench":
|
||||
label = task.get("label", [])
|
||||
if isinstance(label, list) and label:
|
||||
return str(label[0])
|
||||
return str(label) if label else ""
|
||||
|
||||
# knowledgegraph format: answer in "answer" array with entity_name
|
||||
if environment == "knowledgegraph":
|
||||
answers = task.get("answer", [])
|
||||
if isinstance(answers, list) and answers:
|
||||
first = answers[0]
|
||||
if isinstance(first, dict):
|
||||
return first.get("entity_name", str(first))
|
||||
return str(first)
|
||||
return ""
|
||||
|
||||
# lateralthinkingpuzzle format
|
||||
if environment == "lateralthinkingpuzzle":
|
||||
return task.get("answer", task.get("solution", ""))
|
||||
|
||||
# Default: try common answer fields
|
||||
for key in ["answer", "expected", "gold", "label", "solution"]:
|
||||
val = task.get(key)
|
||||
if val:
|
||||
if isinstance(val, list):
|
||||
return str(val[0]) if val else ""
|
||||
return str(val)
|
||||
return ""
|
||||
|
||||
def _format_task(
|
||||
self, environment: str, description: str, task: dict[str, Any]
|
||||
) -> str:
|
||||
"""Format the task description based on environment."""
|
||||
if environment == "os":
|
||||
return (
|
||||
f"Operating System Task\n"
|
||||
f"=====================\n\n"
|
||||
f"{description}\n\n"
|
||||
f"You have access to a Linux command line. Execute commands "
|
||||
f"to complete the task. Save your final answer to 'answer.txt'."
|
||||
)
|
||||
|
||||
elif environment in ("db", "dbbench"):
|
||||
# Extract table information from the task
|
||||
table_info = task.get("table", {})
|
||||
table_name = table_info.get("table_name", "data_table")
|
||||
columns_info = table_info.get("table_info", {}).get("columns", [])
|
||||
rows = table_info.get("table_info", {}).get("rows", [])
|
||||
|
||||
# Format columns
|
||||
col_names = [col.get("name", "") for col in columns_info]
|
||||
|
||||
# Build table display
|
||||
table_str_parts = [
|
||||
f"Table: {table_name}",
|
||||
f"Columns: {', '.join(col_names)}",
|
||||
]
|
||||
table_str_parts.append("\nData (first 20 rows):")
|
||||
for i, row in enumerate(rows[:20]):
|
||||
row_str = " | ".join(str(cell) for cell in row)
|
||||
table_str_parts.append(f" {i+1}. {row_str}")
|
||||
if len(rows) > 20:
|
||||
table_str_parts.append(f" ... ({len(rows) - 20} more rows)")
|
||||
|
||||
table_str = "\n".join(table_str_parts)
|
||||
|
||||
return (
|
||||
f"Database Query Task\n"
|
||||
f"==================\n\n"
|
||||
f"Question: {description}\n\n"
|
||||
f"{table_str}\n\n"
|
||||
f"Analyze the table data above and answer the question. "
|
||||
f"Use the 'finish' command with your answer, or save your answer "
|
||||
f"to 'answer.txt'. Provide only the answer value, not an explanation."
|
||||
)
|
||||
|
||||
elif environment == "kg":
|
||||
kg_info = task.get("kg_info", "")
|
||||
return (
|
||||
f"Knowledge Graph Task\n"
|
||||
f"====================\n\n"
|
||||
f"{description}\n\n"
|
||||
f"Knowledge Graph Information:\n{kg_info}\n\n"
|
||||
f"Reason over the knowledge graph to answer. "
|
||||
f"Save your answer to 'answer.txt'."
|
||||
)
|
||||
|
||||
elif environment == "card_game":
|
||||
numbers = task.get("numbers", [])
|
||||
return (
|
||||
f"Card Game Task (24-point)\n"
|
||||
f"========================\n\n"
|
||||
f"Numbers: {numbers}\n\n"
|
||||
f"Use +, -, *, / and parentheses to make exactly 24. "
|
||||
f"Each number must be used exactly once.\n\n"
|
||||
f"Save your expression to 'answer.txt'."
|
||||
)
|
||||
|
||||
elif environment == "ltp":
|
||||
return (
|
||||
f"Lateral Thinking Puzzle\n"
|
||||
f"======================\n\n"
|
||||
f"{description}\n\n"
|
||||
f"Ask yes/no questions to figure out the answer. "
|
||||
f"Save your final solution to 'answer.txt'."
|
||||
)
|
||||
|
||||
elif environment in ("web_shopping", "web_browsing"):
|
||||
return (
|
||||
f"Web Task ({environment.replace('_', ' ').title()})\n"
|
||||
f"{'=' * 40}\n\n"
|
||||
f"{description}\n\n"
|
||||
f"Navigate the web to complete the task. "
|
||||
f"Save your final answer to 'answer.txt'."
|
||||
)
|
||||
|
||||
elif environment == "alfworld":
|
||||
return (
|
||||
f"ALFWorld Task\n"
|
||||
f"=============\n\n"
|
||||
f"{description}\n\n"
|
||||
f"Navigate and interact with the environment to complete the task. "
|
||||
f"Use available actions to achieve the goal."
|
||||
)
|
||||
|
||||
else:
|
||||
return description
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
result: ChallengeResult,
|
||||
challenge: Challenge,
|
||||
workspace_dir: Path,
|
||||
) -> ChallengeResult:
|
||||
"""Evaluate using environment-specific logic."""
|
||||
ground = challenge.ground_truth
|
||||
environment = ground["environment"]
|
||||
|
||||
# Extract answer from agent output
|
||||
agent_answer = self._extract_answer(result, environment)
|
||||
|
||||
if not agent_answer:
|
||||
result.success = False
|
||||
result.score = 0.0
|
||||
result.error_message = "No answer found in agent output"
|
||||
return result
|
||||
|
||||
# Environment-specific evaluation
|
||||
expected = ground.get("expected", "")
|
||||
|
||||
if environment == "card_game":
|
||||
eval_result = self._evaluate_card_game(agent_answer, ground["task_data"])
|
||||
elif environment in ("db", "dbbench"):
|
||||
eval_result = self._evaluate_db(agent_answer, expected, ground["task_data"])
|
||||
elif environment in (
|
||||
"os",
|
||||
"os_interaction",
|
||||
"kg",
|
||||
"knowledgegraph",
|
||||
"ltp",
|
||||
"lateralthinkingpuzzle",
|
||||
):
|
||||
eval_result = self._evaluate_string_match(agent_answer, expected)
|
||||
else:
|
||||
# Default string matching
|
||||
eval_result = self._evaluate_string_match(agent_answer, expected)
|
||||
|
||||
result.success = eval_result["success"]
|
||||
result.score = eval_result["score"]
|
||||
if eval_result.get("error"):
|
||||
result.error_message = eval_result["error"]
|
||||
|
||||
return result
|
||||
|
||||
def _extract_answer(self, result: ChallengeResult, environment: str) -> str:
|
||||
"""Extract answer from agent output."""
|
||||
# Look for answer.txt
|
||||
for filename, content in result.output_files.items():
|
||||
if "answer" in filename.lower():
|
||||
return content.strip()
|
||||
|
||||
# Environment-specific extraction
|
||||
if environment in ("db", "dbbench"):
|
||||
for filename, content in result.output_files.items():
|
||||
if filename.endswith(".sql"):
|
||||
return content.strip()
|
||||
|
||||
# Check if agent used finish command with an answer
|
||||
if result.steps:
|
||||
last_step = result.steps[-1]
|
||||
if last_step.tool_name == "finish":
|
||||
reason = last_step.tool_args.get("reason", "").strip()
|
||||
# Try to extract the actual answer from the finish reason
|
||||
# Often the answer is embedded in the reason
|
||||
if reason:
|
||||
return reason
|
||||
|
||||
# Look for potential answer in any text file output
|
||||
for filename, content in result.output_files.items():
|
||||
if filename.endswith(".txt") and content.strip():
|
||||
return content.strip()
|
||||
|
||||
return ""
|
||||
|
||||
def _evaluate_card_game(
|
||||
self, answer: str, task_data: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
"""Evaluate 24-point card game answer."""
|
||||
# Store numbers for potential future use in full validation
|
||||
_numbers = task_data.get("numbers", []) # noqa: F841
|
||||
|
||||
try:
|
||||
# Check that the expression evaluates to 24
|
||||
# and uses all numbers exactly once
|
||||
expr = answer.strip()
|
||||
|
||||
# Safety check - only allow math operations
|
||||
allowed_chars = set("0123456789+-*/() .")
|
||||
if not all(c in allowed_chars for c in expr):
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": "Invalid characters in expression",
|
||||
}
|
||||
|
||||
# Evaluate the expression
|
||||
result = eval(expr)
|
||||
|
||||
if abs(result - 24) < 0.0001:
|
||||
# Check that all numbers are used exactly once
|
||||
# (simplified check - full implementation would parse the expression)
|
||||
return {"success": True, "score": 1.0, "error": None}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": f"Expression evaluates to {result}, not 24",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": f"Failed to evaluate expression: {str(e)}",
|
||||
}
|
||||
|
||||
def _evaluate_db(
|
||||
self, query: str, expected: str, task_data: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
"""Evaluate SQL query answer."""
|
||||
# For now, use string matching on the result
|
||||
# Full implementation would execute query and compare results
|
||||
return self._evaluate_string_match(query, expected)
|
||||
|
||||
def _evaluate_string_match(self, actual: str, expected: str) -> dict[str, Any]:
|
||||
"""Strict normalized string matching."""
|
||||
actual_norm = actual.lower().strip()
|
||||
expected_norm = expected.lower().strip()
|
||||
|
||||
# If no expected answer, fail (can't evaluate)
|
||||
if not expected_norm:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": "No expected answer to compare against",
|
||||
}
|
||||
|
||||
# If no actual answer, fail
|
||||
if not actual_norm:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": f"No answer provided, expected '{expected}'",
|
||||
}
|
||||
|
||||
# Exact match (after normalization)
|
||||
if actual_norm == expected_norm:
|
||||
return {"success": True, "score": 1.0, "error": None}
|
||||
|
||||
# Check if expected is contained in actual (for verbose answers)
|
||||
if expected_norm in actual_norm:
|
||||
return {"success": True, "score": 0.9, "error": None}
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": f"Expected '{expected}', got '{actual}'",
|
||||
}
|
||||
|
||||
def provision_environment(self, challenge: Challenge) -> dict[str, Any]:
|
||||
"""Set up environment-specific resources."""
|
||||
ground = challenge.ground_truth
|
||||
environment = ground["environment"]
|
||||
|
||||
env_config: dict[str, Any] = {
|
||||
"environment": environment,
|
||||
}
|
||||
|
||||
if environment == "os":
|
||||
# Would spin up Docker container here
|
||||
env_config["docker_image"] = "ubuntu:22.04"
|
||||
|
||||
elif environment == "db":
|
||||
# Set up SQLite database
|
||||
task_data = ground["task_data"]
|
||||
db_setup = task_data.get("db_setup", "")
|
||||
env_config["db_type"] = "sqlite"
|
||||
env_config["db_setup"] = db_setup
|
||||
|
||||
return env_config
|
||||
|
||||
def get_challenge_count(self) -> Optional[int]:
|
||||
"""Get the number of challenges."""
|
||||
self.ensure_setup()
|
||||
|
||||
if self.subset:
|
||||
tasks = self._tasks.get(self.subset, [])
|
||||
count = len(tasks)
|
||||
else:
|
||||
count = sum(len(tasks) for tasks in self._tasks.values())
|
||||
|
||||
if self.limit:
|
||||
count = min(count, self.limit)
|
||||
|
||||
return count
|
||||
|
||||
def get_metadata(self) -> dict[str, Any]:
|
||||
"""Get AgentBench metadata."""
|
||||
metadata = super().get_metadata()
|
||||
metadata.update(
|
||||
{
|
||||
"repository": f"https://github.com/{self.GITHUB_REPO}",
|
||||
"environments": list(self.ENVIRONMENTS.keys()),
|
||||
"easy_environments": ["db", "card_game", "ltp"],
|
||||
"medium_environments": ["os", "kg", "web_shopping"],
|
||||
"hard_environments": ["web_browsing", "alfworld"],
|
||||
}
|
||||
)
|
||||
return metadata
|
||||
135
classic/direct_benchmark/direct_benchmark/adapters/base.py
Normal file
135
classic/direct_benchmark/direct_benchmark/adapters/base.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""Base class for benchmark adapters."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from ..models import Challenge, ChallengeResult
|
||||
|
||||
|
||||
class BenchmarkAdapter(ABC):
|
||||
"""Abstract base class for external benchmark adapters.
|
||||
|
||||
Adapters translate external benchmark formats into the Challenge model
|
||||
used by the direct_benchmark harness.
|
||||
|
||||
Subclasses must implement:
|
||||
- setup(): One-time initialization (download datasets, etc.)
|
||||
- load_challenges(): Yield Challenge objects from the benchmark
|
||||
- evaluate(): Custom evaluation logic for the benchmark
|
||||
|
||||
Optionally override:
|
||||
- provision_environment(): Set up runtime environment for challenges
|
||||
- cleanup(): Clean up resources after benchmark run
|
||||
"""
|
||||
|
||||
# Override in subclasses
|
||||
name: str = "base"
|
||||
description: str = "Base benchmark adapter"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache_dir: Optional[Path] = None,
|
||||
split: str = "validation",
|
||||
subset: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
):
|
||||
"""Initialize the adapter.
|
||||
|
||||
Args:
|
||||
cache_dir: Directory to cache downloaded datasets.
|
||||
split: Dataset split to use (train/validation/test).
|
||||
subset: Optional subset filter (e.g., difficulty level, repo name).
|
||||
limit: Maximum number of challenges to load.
|
||||
"""
|
||||
self.cache_dir = cache_dir or Path.home() / ".cache" / "autogpt_benchmarks"
|
||||
self.split = split
|
||||
self.subset = subset
|
||||
self.limit = limit
|
||||
self._is_setup = False
|
||||
|
||||
@abstractmethod
|
||||
def setup(self) -> None:
|
||||
"""Perform one-time setup (download datasets, authenticate, etc.).
|
||||
|
||||
This method is called before load_challenges() if not already setup.
|
||||
Should be idempotent - safe to call multiple times.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load_challenges(self) -> Iterator[Challenge]:
|
||||
"""Load and yield challenges from the external benchmark.
|
||||
|
||||
Yields:
|
||||
Challenge objects translated from the external format.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def evaluate(
|
||||
self,
|
||||
result: ChallengeResult,
|
||||
challenge: Challenge,
|
||||
workspace_dir: Path,
|
||||
) -> ChallengeResult:
|
||||
"""Evaluate a challenge result using benchmark-specific logic.
|
||||
|
||||
Args:
|
||||
result: The result from running the challenge.
|
||||
challenge: The challenge that was run.
|
||||
workspace_dir: Directory containing the agent's output.
|
||||
|
||||
Returns:
|
||||
Updated ChallengeResult with success/score populated.
|
||||
"""
|
||||
pass
|
||||
|
||||
def provision_environment(self, challenge: Challenge) -> dict[str, Any]:
|
||||
"""Set up runtime environment for a challenge.
|
||||
|
||||
Override this for benchmarks that need Docker containers,
|
||||
database setup, etc.
|
||||
|
||||
Args:
|
||||
challenge: The challenge to provision for.
|
||||
|
||||
Returns:
|
||||
Environment configuration dict (passed to runner).
|
||||
"""
|
||||
return {}
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Clean up resources after benchmark run.
|
||||
|
||||
Override this to stop containers, close connections, etc.
|
||||
"""
|
||||
pass
|
||||
|
||||
def ensure_setup(self) -> None:
|
||||
"""Ensure setup() has been called."""
|
||||
if not self._is_setup:
|
||||
self.setup()
|
||||
self._is_setup = True
|
||||
|
||||
def get_challenge_count(self) -> Optional[int]:
|
||||
"""Get the total number of challenges without loading them.
|
||||
|
||||
Returns:
|
||||
Number of challenges, or None if unknown without loading.
|
||||
"""
|
||||
return None
|
||||
|
||||
def get_metadata(self) -> dict[str, Any]:
|
||||
"""Get metadata about this benchmark.
|
||||
|
||||
Returns:
|
||||
Dict with benchmark metadata (name, description, splits, etc.).
|
||||
"""
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"split": self.split,
|
||||
"subset": self.subset,
|
||||
"limit": self.limit,
|
||||
}
|
||||
318
classic/direct_benchmark/direct_benchmark/adapters/gaia.py
Normal file
318
classic/direct_benchmark/direct_benchmark/adapters/gaia.py
Normal file
@@ -0,0 +1,318 @@
|
||||
"""GAIA benchmark adapter.
|
||||
|
||||
GAIA (General AI Assistant Benchmark) evaluates AI assistants on real-world tasks
|
||||
requiring reasoning, tool use, and web browsing.
|
||||
|
||||
Dataset: https://huggingface.co/datasets/gaia-benchmark/GAIA
|
||||
Leaderboard: https://huggingface.co/spaces/gaia-benchmark/leaderboard
|
||||
|
||||
Requires:
|
||||
- Hugging Face account with access to the gated dataset
|
||||
- HUGGING_FACE_HUB_TOKEN environment variable set
|
||||
- datasets and huggingface-hub packages
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from ..models import Challenge, ChallengeResult
|
||||
from . import register_adapter
|
||||
from .base import BenchmarkAdapter
|
||||
|
||||
|
||||
def _normalize_answer(answer: str) -> str:
|
||||
"""Normalize answer for comparison (GAIA-style normalization).
|
||||
|
||||
- Lowercase
|
||||
- Remove articles (a, an, the)
|
||||
- Remove punctuation
|
||||
- Collapse whitespace
|
||||
"""
|
||||
# Lowercase
|
||||
answer = answer.lower()
|
||||
|
||||
# Remove articles
|
||||
answer = re.sub(r"\b(a|an|the)\b", " ", answer)
|
||||
|
||||
# Remove punctuation
|
||||
answer = answer.translate(str.maketrans("", "", string.punctuation))
|
||||
|
||||
# Collapse whitespace
|
||||
answer = " ".join(answer.split())
|
||||
|
||||
return answer.strip()
|
||||
|
||||
|
||||
@register_adapter("gaia")
|
||||
class GAIAAdapter(BenchmarkAdapter):
|
||||
"""Adapter for the GAIA benchmark.
|
||||
|
||||
GAIA provides real-world tasks at three difficulty levels:
|
||||
- Level 1: Simple tasks (single tool, straightforward reasoning)
|
||||
- Level 2: Moderate tasks (multiple tools, multi-step reasoning)
|
||||
- Level 3: Complex tasks (complex reasoning, tool chaining)
|
||||
|
||||
Usage:
|
||||
adapter = GAIAAdapter(split="validation", subset="1")
|
||||
for challenge in adapter.load_challenges():
|
||||
# Run challenge...
|
||||
"""
|
||||
|
||||
name = "gaia"
|
||||
description = "GAIA - General AI Assistant Benchmark"
|
||||
|
||||
HF_DATASET = "gaia-benchmark/GAIA"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache_dir: Optional[Path] = None,
|
||||
split: str = "validation",
|
||||
subset: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
):
|
||||
"""Initialize the GAIA adapter.
|
||||
|
||||
Args:
|
||||
cache_dir: Directory to cache the dataset.
|
||||
split: Dataset split - "validation" (has answers) or "test" (leaderboard).
|
||||
subset: Difficulty level filter - "1", "2", or "3".
|
||||
limit: Maximum number of challenges to load.
|
||||
"""
|
||||
super().__init__(cache_dir, split, subset, limit)
|
||||
self._dataset = None
|
||||
self._file_cache: dict[str, Path] = {}
|
||||
|
||||
def setup(self) -> None:
|
||||
"""Download and cache the GAIA dataset from Hugging Face."""
|
||||
try:
|
||||
from datasets import load_dataset
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"GAIA adapter requires the 'datasets' package. "
|
||||
"Install with: pip install datasets huggingface-hub"
|
||||
)
|
||||
|
||||
# Check for HF token
|
||||
token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN")
|
||||
if not token:
|
||||
raise ValueError(
|
||||
"GAIA dataset requires authentication. "
|
||||
"Set HUGGING_FACE_HUB_TOKEN or HF_TOKEN environment variable. "
|
||||
"Get your token at https://huggingface.co/settings/tokens"
|
||||
)
|
||||
|
||||
# Load dataset
|
||||
self._dataset = load_dataset(
|
||||
self.HF_DATASET,
|
||||
split=self.split,
|
||||
token=token,
|
||||
cache_dir=str(self.cache_dir / "gaia"),
|
||||
)
|
||||
|
||||
# Download any associated files
|
||||
self._setup_file_cache()
|
||||
|
||||
self._is_setup = True
|
||||
|
||||
def _setup_file_cache(self) -> None:
|
||||
"""Cache file attachments from the dataset."""
|
||||
if self._dataset is None:
|
||||
return
|
||||
|
||||
file_dir = self.cache_dir / "gaia" / "files"
|
||||
file_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for item in self._dataset:
|
||||
if item.get("file_name") and item.get("file_path"):
|
||||
# The dataset includes file contents inline
|
||||
file_name = item["file_name"]
|
||||
self._file_cache[item["task_id"]] = file_dir / file_name
|
||||
|
||||
def load_challenges(self) -> Iterator[Challenge]:
|
||||
"""Load challenges from the GAIA dataset.
|
||||
|
||||
Yields:
|
||||
Challenge objects for each GAIA task.
|
||||
"""
|
||||
self.ensure_setup()
|
||||
|
||||
if self._dataset is None:
|
||||
return
|
||||
|
||||
count = 0
|
||||
for item in self._dataset:
|
||||
# Apply subset filter (difficulty level)
|
||||
if self.subset and str(item.get("Level")) != self.subset:
|
||||
continue
|
||||
|
||||
# Apply limit
|
||||
if self.limit and count >= self.limit:
|
||||
break
|
||||
|
||||
challenge = self._convert_to_challenge(item)
|
||||
yield challenge
|
||||
count += 1
|
||||
|
||||
def _convert_to_challenge(self, item: dict[str, Any]) -> Challenge:
|
||||
"""Convert a GAIA dataset item to a Challenge."""
|
||||
task_id = item["task_id"]
|
||||
question = item["Question"]
|
||||
level = item.get("Level", 1)
|
||||
final_answer = item.get("Final answer", "")
|
||||
file_name = item.get("file_name", "")
|
||||
|
||||
# Build task description
|
||||
task = question
|
||||
if file_name:
|
||||
task = f"{question}\n\nA file has been provided: {file_name}"
|
||||
|
||||
# Map GAIA levels to difficulty
|
||||
difficulty_map = {1: "easy", 2: "medium", 3: "hard"}
|
||||
difficulty = difficulty_map.get(level, "unknown")
|
||||
|
||||
# Cutoff based on difficulty
|
||||
cutoff_map = {1: 180, 2: 300, 3: 600}
|
||||
cutoff = cutoff_map.get(level, 300)
|
||||
|
||||
# Ground truth for evaluation
|
||||
ground_truth: dict[str, Any] = {
|
||||
"answer": final_answer,
|
||||
"eval": {"type": "gaia_match"},
|
||||
}
|
||||
|
||||
# Create artifacts directory for any files
|
||||
artifacts_dir = self.cache_dir / "gaia" / "artifacts" / task_id
|
||||
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Copy file if present
|
||||
if task_id in self._file_cache:
|
||||
src = self._file_cache[task_id]
|
||||
if src.exists():
|
||||
import shutil
|
||||
|
||||
shutil.copy2(src, artifacts_dir / src.name)
|
||||
|
||||
return Challenge(
|
||||
name=f"GAIA_{task_id}",
|
||||
task=task,
|
||||
category=["gaia", f"gaia_level_{level}"],
|
||||
difficulty=difficulty,
|
||||
cutoff=cutoff,
|
||||
ground_truth=ground_truth,
|
||||
artifacts_dir=artifacts_dir,
|
||||
source_path=artifacts_dir / "data.json",
|
||||
)
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
result: ChallengeResult,
|
||||
challenge: Challenge,
|
||||
workspace_dir: Path,
|
||||
) -> ChallengeResult:
|
||||
"""Evaluate using GAIA-style normalized string matching.
|
||||
|
||||
GAIA uses exact string matching after normalization:
|
||||
- Lowercase
|
||||
- Remove articles (a, an, the)
|
||||
- Remove punctuation
|
||||
- Collapse whitespace
|
||||
"""
|
||||
ground = challenge.ground_truth
|
||||
expected = ground.get("answer", "")
|
||||
|
||||
if not expected:
|
||||
# Test split has no answers - can't evaluate locally
|
||||
result.success = False
|
||||
result.score = 0.0
|
||||
result.error_message = (
|
||||
"No ground truth (test split - submit to leaderboard)"
|
||||
)
|
||||
return result
|
||||
|
||||
# Get the agent's answer from output
|
||||
agent_answer = self._extract_answer(result)
|
||||
|
||||
if not agent_answer:
|
||||
result.success = False
|
||||
result.score = 0.0
|
||||
result.error_message = "No answer found in agent output"
|
||||
return result
|
||||
|
||||
# Normalize both answers
|
||||
normalized_expected = _normalize_answer(expected)
|
||||
normalized_actual = _normalize_answer(agent_answer)
|
||||
|
||||
# Exact match after normalization
|
||||
if normalized_expected == normalized_actual:
|
||||
result.success = True
|
||||
result.score = 1.0
|
||||
else:
|
||||
result.success = False
|
||||
result.score = 0.0
|
||||
result.error_message = (
|
||||
f"Answer mismatch: expected '{expected}', got '{agent_answer}'"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_answer(self, result: ChallengeResult) -> str:
|
||||
"""Extract the final answer from the agent's output.
|
||||
|
||||
Looks for:
|
||||
1. Content in answer.txt file
|
||||
2. Final step result
|
||||
3. Any file with "answer" in the name
|
||||
"""
|
||||
# Check for answer.txt
|
||||
for filename, content in result.output_files.items():
|
||||
if "answer" in filename.lower():
|
||||
return content.strip()
|
||||
|
||||
# Check final step result
|
||||
if result.steps:
|
||||
last_step = result.steps[-1]
|
||||
if last_step.tool_name == "finish":
|
||||
# Try to extract answer from finish arguments
|
||||
reason = last_step.tool_args.get("reason", "")
|
||||
return reason.strip()
|
||||
|
||||
return ""
|
||||
|
||||
def get_challenge_count(self) -> Optional[int]:
|
||||
"""Get the number of challenges in the dataset."""
|
||||
self.ensure_setup()
|
||||
if self._dataset is None:
|
||||
return None
|
||||
|
||||
count = len(self._dataset)
|
||||
|
||||
# Apply subset filter
|
||||
if self.subset:
|
||||
count = sum(
|
||||
1 for item in self._dataset if str(item.get("Level")) == self.subset
|
||||
)
|
||||
|
||||
# Apply limit
|
||||
if self.limit:
|
||||
count = min(count, self.limit)
|
||||
|
||||
return count
|
||||
|
||||
def get_metadata(self) -> dict[str, Any]:
|
||||
"""Get GAIA benchmark metadata."""
|
||||
metadata = super().get_metadata()
|
||||
metadata.update(
|
||||
{
|
||||
"dataset": self.HF_DATASET,
|
||||
"levels": ["1", "2", "3"],
|
||||
"splits": ["validation", "test"],
|
||||
"requires_auth": True,
|
||||
"leaderboard": (
|
||||
"https://huggingface.co/spaces/gaia-benchmark/leaderboard"
|
||||
),
|
||||
}
|
||||
)
|
||||
return metadata
|
||||
458
classic/direct_benchmark/direct_benchmark/adapters/swe_bench.py
Normal file
458
classic/direct_benchmark/direct_benchmark/adapters/swe_bench.py
Normal file
@@ -0,0 +1,458 @@
|
||||
"""SWE-bench adapter.
|
||||
|
||||
SWE-bench evaluates AI models on real-world GitHub issues from popular Python
|
||||
repositories, requiring models to generate patches that fix the issues.
|
||||
|
||||
GitHub: https://github.com/SWE-bench/SWE-bench
|
||||
Dataset: https://huggingface.co/datasets/princeton-nlp/SWE-bench
|
||||
|
||||
Requires:
|
||||
- Docker Engine (for containerized evaluation)
|
||||
- swebench package (pip install swebench)
|
||||
- ~120GB disk space for full dataset
|
||||
- OR Modal for cloud-based evaluation
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from ..models import Challenge, ChallengeResult
|
||||
from . import register_adapter
|
||||
from .base import BenchmarkAdapter
|
||||
|
||||
|
||||
@register_adapter("swe-bench")
|
||||
class SWEBenchAdapter(BenchmarkAdapter):
|
||||
"""Adapter for the SWE-bench benchmark.
|
||||
|
||||
SWE-bench provides 2,294 real GitHub issues from 12 Python repositories.
|
||||
Models must generate patches that fix the issues, evaluated by running
|
||||
the repository's test suite.
|
||||
|
||||
Subsets:
|
||||
- "full": All 2,294 instances
|
||||
- "lite": 300 curated instances
|
||||
- "verified": 500 human-validated solvable instances
|
||||
|
||||
Usage:
|
||||
adapter = SWEBenchAdapter(subset="lite")
|
||||
for challenge in adapter.load_challenges():
|
||||
# Run challenge...
|
||||
"""
|
||||
|
||||
name = "swe-bench"
|
||||
description = "SWE-bench - Software Engineering Benchmark"
|
||||
|
||||
HF_DATASET = "princeton-nlp/SWE-bench"
|
||||
HF_LITE = "princeton-nlp/SWE-bench_Lite"
|
||||
HF_VERIFIED = "princeton-nlp/SWE-bench_Verified"
|
||||
|
||||
# Repository-specific timeout multipliers
|
||||
REPO_TIMEOUTS: dict[str, float] = {
|
||||
"django/django": 1.5,
|
||||
"matplotlib/matplotlib": 2.0,
|
||||
"scikit-learn/scikit-learn": 1.5,
|
||||
"sympy/sympy": 1.2,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache_dir: Optional[Path] = None,
|
||||
split: str = "test",
|
||||
subset: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
use_modal: bool = False,
|
||||
):
|
||||
"""Initialize the SWE-bench adapter.
|
||||
|
||||
Args:
|
||||
cache_dir: Directory to cache the dataset.
|
||||
split: Dataset split - "dev" or "test".
|
||||
subset: Subset to use - "full", "lite", "verified", or a repo name.
|
||||
limit: Maximum number of challenges to load.
|
||||
use_modal: Use Modal for cloud-based evaluation instead of local Docker.
|
||||
"""
|
||||
super().__init__(cache_dir, split, subset, limit)
|
||||
self._dataset = None
|
||||
self._use_modal = use_modal
|
||||
|
||||
def setup(self) -> None:
|
||||
"""Download and cache the SWE-bench dataset."""
|
||||
try:
|
||||
from datasets import load_dataset
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"SWE-bench adapter requires the 'datasets' package. "
|
||||
"Install with: pip install datasets"
|
||||
)
|
||||
|
||||
# Select dataset based on subset
|
||||
if self.subset == "lite":
|
||||
dataset_name = self.HF_LITE
|
||||
elif self.subset == "verified":
|
||||
dataset_name = self.HF_VERIFIED
|
||||
else:
|
||||
dataset_name = self.HF_DATASET
|
||||
|
||||
# Load dataset
|
||||
self._dataset = load_dataset(
|
||||
dataset_name,
|
||||
split=self.split,
|
||||
cache_dir=str(self.cache_dir / "swe_bench"),
|
||||
)
|
||||
|
||||
# Check for Docker if not using Modal
|
||||
if not self._use_modal:
|
||||
self._check_docker()
|
||||
|
||||
self._is_setup = True
|
||||
|
||||
def _check_docker(self) -> None:
|
||||
"""Verify Docker is available for evaluation."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["docker", "info"],
|
||||
capture_output=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError("Docker is not running")
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError(
|
||||
"Docker is required for SWE-bench evaluation. "
|
||||
"Install Docker or use use_modal=True for cloud evaluation."
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError("Docker is not responding")
|
||||
|
||||
def load_challenges(self) -> Iterator[Challenge]:
|
||||
"""Load challenges from the SWE-bench dataset.
|
||||
|
||||
Yields:
|
||||
Challenge objects for each SWE-bench instance.
|
||||
"""
|
||||
self.ensure_setup()
|
||||
|
||||
if self._dataset is None:
|
||||
return
|
||||
|
||||
count = 0
|
||||
for item in self._dataset:
|
||||
# Apply repo filter (if subset is a repo name)
|
||||
if self.subset and self.subset not in ("full", "lite", "verified"):
|
||||
if item.get("repo") != self.subset:
|
||||
continue
|
||||
|
||||
# Apply limit
|
||||
if self.limit and count >= self.limit:
|
||||
break
|
||||
|
||||
challenge = self._convert_to_challenge(item)
|
||||
yield challenge
|
||||
count += 1
|
||||
|
||||
def _convert_to_challenge(self, item: dict[str, Any]) -> Challenge:
|
||||
"""Convert a SWE-bench dataset item to a Challenge."""
|
||||
instance_id = item["instance_id"]
|
||||
repo = item.get("repo", "unknown")
|
||||
problem_statement = item.get("problem_statement", "")
|
||||
base_commit = item.get("base_commit", "")
|
||||
hints_text = item.get("hints_text", "")
|
||||
|
||||
# Build comprehensive task description
|
||||
task_parts = [
|
||||
f"Repository: {repo}",
|
||||
f"Base commit: {base_commit}",
|
||||
"",
|
||||
"Problem Statement:",
|
||||
problem_statement,
|
||||
]
|
||||
|
||||
if hints_text:
|
||||
task_parts.extend(["", "Hints:", hints_text])
|
||||
|
||||
task_parts.extend(
|
||||
[
|
||||
"",
|
||||
"Your task: Generate a patch file (in unified diff format) that "
|
||||
"fixes the issue described above. The patch should be saved to "
|
||||
"'patch.diff' in your workspace.",
|
||||
]
|
||||
)
|
||||
|
||||
task = "\n".join(task_parts)
|
||||
|
||||
# Determine difficulty based on repo complexity
|
||||
difficulty_map = {
|
||||
"astropy/astropy": "hard",
|
||||
"django/django": "medium",
|
||||
"flask/flask": "easy",
|
||||
"matplotlib/matplotlib": "hard",
|
||||
"pallets/flask": "easy",
|
||||
"psf/requests": "easy",
|
||||
"pydata/xarray": "medium",
|
||||
"pylint-dev/pylint": "medium",
|
||||
"pytest-dev/pytest": "medium",
|
||||
"scikit-learn/scikit-learn": "hard",
|
||||
"sphinx-doc/sphinx": "medium",
|
||||
"sympy/sympy": "hard",
|
||||
}
|
||||
difficulty = difficulty_map.get(repo, "medium")
|
||||
|
||||
# Calculate timeout with repo-specific multiplier
|
||||
base_timeout = 600
|
||||
multiplier = self.REPO_TIMEOUTS.get(repo, 1.0)
|
||||
cutoff = int(base_timeout * multiplier)
|
||||
|
||||
# Ground truth includes the gold patch for reference
|
||||
gold_patch = item.get("patch", "")
|
||||
test_patch = item.get("test_patch", "")
|
||||
|
||||
ground_truth: dict[str, Any] = {
|
||||
"eval": {"type": "swe_bench"},
|
||||
"instance_id": instance_id,
|
||||
"repo": repo,
|
||||
"base_commit": base_commit,
|
||||
"gold_patch": gold_patch,
|
||||
"test_patch": test_patch,
|
||||
"pass_to_pass": item.get("PASS_TO_PASS", ""),
|
||||
"fail_to_pass": item.get("FAIL_TO_PASS", ""),
|
||||
}
|
||||
|
||||
# Create artifacts directory
|
||||
artifacts_dir = self.cache_dir / "swe_bench" / "artifacts" / instance_id
|
||||
artifacts_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save problem context for reference
|
||||
context_file = artifacts_dir / "context.json"
|
||||
with open(context_file, "w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"instance_id": instance_id,
|
||||
"repo": repo,
|
||||
"base_commit": base_commit,
|
||||
"problem_statement": problem_statement,
|
||||
},
|
||||
f,
|
||||
indent=2,
|
||||
)
|
||||
|
||||
return Challenge(
|
||||
name=f"SWE_{instance_id}",
|
||||
task=task,
|
||||
category=["swe-bench", f"swe-bench_{repo.replace('/', '_')}"],
|
||||
difficulty=difficulty,
|
||||
cutoff=cutoff,
|
||||
ground_truth=ground_truth,
|
||||
artifacts_dir=artifacts_dir,
|
||||
source_path=context_file,
|
||||
)
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
result: ChallengeResult,
|
||||
challenge: Challenge,
|
||||
workspace_dir: Path,
|
||||
) -> ChallengeResult:
|
||||
"""Evaluate using SWE-bench's Docker-based test harness.
|
||||
|
||||
The agent's patch is applied to the repository in a Docker container,
|
||||
and the test suite is run to verify the fix.
|
||||
"""
|
||||
ground = challenge.ground_truth
|
||||
|
||||
# Get the generated patch
|
||||
patch_content = self._extract_patch(result)
|
||||
|
||||
if not patch_content:
|
||||
result.success = False
|
||||
result.score = 0.0
|
||||
result.error_message = "No patch.diff found in agent output"
|
||||
return result
|
||||
|
||||
# Run evaluation
|
||||
if self._use_modal:
|
||||
eval_result = self._evaluate_with_modal(ground, patch_content)
|
||||
else:
|
||||
eval_result = self._evaluate_with_docker(ground, patch_content)
|
||||
|
||||
result.success = eval_result["success"]
|
||||
result.score = eval_result["score"]
|
||||
if eval_result.get("error"):
|
||||
result.error_message = eval_result["error"]
|
||||
|
||||
return result
|
||||
|
||||
def _extract_patch(self, result: ChallengeResult) -> str:
|
||||
"""Extract the patch from the agent's output."""
|
||||
# Look for patch.diff file
|
||||
for filename, content in result.output_files.items():
|
||||
if filename.endswith("patch.diff") or filename.endswith(".patch"):
|
||||
return content
|
||||
|
||||
# Look for diff content in any output file
|
||||
for filename, content in result.output_files.items():
|
||||
if content.strip().startswith("diff --git") or content.strip().startswith(
|
||||
"---"
|
||||
):
|
||||
return content
|
||||
|
||||
return ""
|
||||
|
||||
def _evaluate_with_docker(
|
||||
self, ground: dict[str, Any], patch: str
|
||||
) -> dict[str, Any]:
|
||||
"""Run evaluation using local Docker."""
|
||||
try:
|
||||
# Try to import swebench harness
|
||||
from swebench.harness.run_evaluation import run_evaluation
|
||||
except ImportError:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": (
|
||||
"swebench package not installed. "
|
||||
"Install with: pip install swebench"
|
||||
),
|
||||
}
|
||||
|
||||
instance_id = ground["instance_id"]
|
||||
# These are available for future use in more sophisticated evaluation
|
||||
_repo = ground["repo"] # noqa: F841
|
||||
_base_commit = ground["base_commit"] # noqa: F841
|
||||
|
||||
# Write patch to temp file
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".patch", delete=False) as f:
|
||||
f.write(patch)
|
||||
patch_file = f.name
|
||||
|
||||
# Initialize predictions_file path before try block
|
||||
predictions_file = Path(patch_file).with_suffix(".json")
|
||||
|
||||
try:
|
||||
# Create predictions file for swebench
|
||||
predictions = [
|
||||
{
|
||||
"instance_id": instance_id,
|
||||
"model_name_or_path": "autogpt",
|
||||
"model_patch": patch,
|
||||
}
|
||||
]
|
||||
|
||||
with open(predictions_file, "w") as f:
|
||||
json.dump(predictions, f)
|
||||
|
||||
# Run evaluation
|
||||
results = run_evaluation(
|
||||
predictions_path=str(predictions_file),
|
||||
swe_bench_tasks=self.HF_DATASET,
|
||||
log_dir=str(self.cache_dir / "swe_bench" / "logs"),
|
||||
testbed=str(self.cache_dir / "swe_bench" / "testbed"),
|
||||
skip_existing=False,
|
||||
timeout=1800,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
# Check results
|
||||
if instance_id in results:
|
||||
instance_result = results[instance_id]
|
||||
resolved = instance_result.get("resolved", False)
|
||||
return {
|
||||
"success": resolved,
|
||||
"score": 1.0 if resolved else 0.0,
|
||||
"error": None if resolved else "Tests did not pass",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": "Evaluation did not produce results",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": f"Evaluation failed: {str(e)}",
|
||||
}
|
||||
|
||||
finally:
|
||||
# Cleanup temp files
|
||||
Path(patch_file).unlink(missing_ok=True)
|
||||
predictions_file.unlink(missing_ok=True)
|
||||
|
||||
def _evaluate_with_modal(
|
||||
self, ground: dict[str, Any], patch: str
|
||||
) -> dict[str, Any]:
|
||||
"""Run evaluation using Modal cloud infrastructure."""
|
||||
try:
|
||||
import modal # noqa: F401
|
||||
except ImportError:
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": (
|
||||
"Modal package not installed. " "Install with: pip install modal"
|
||||
),
|
||||
}
|
||||
|
||||
# Modal evaluation requires environment setup
|
||||
# This is a simplified interface - full implementation would use
|
||||
# modal's SWE-bench harness
|
||||
return {
|
||||
"success": False,
|
||||
"score": 0.0,
|
||||
"error": (
|
||||
"Modal evaluation not yet implemented. "
|
||||
"Use local Docker evaluation or submit to SWE-bench leaderboard."
|
||||
),
|
||||
}
|
||||
|
||||
def provision_environment(self, challenge: Challenge) -> dict[str, Any]:
|
||||
"""Provide repository context for the challenge."""
|
||||
ground = challenge.ground_truth
|
||||
return {
|
||||
"repo": ground.get("repo"),
|
||||
"base_commit": ground.get("base_commit"),
|
||||
"clone_url": f"https://github.com/{ground.get('repo')}.git",
|
||||
}
|
||||
|
||||
def get_challenge_count(self) -> Optional[int]:
|
||||
"""Get the number of challenges in the dataset."""
|
||||
self.ensure_setup()
|
||||
if self._dataset is None:
|
||||
return None
|
||||
|
||||
count = len(self._dataset)
|
||||
|
||||
# Apply repo filter
|
||||
if self.subset and self.subset not in ("full", "lite", "verified"):
|
||||
count = sum(1 for item in self._dataset if item.get("repo") == self.subset)
|
||||
|
||||
# Apply limit
|
||||
if self.limit:
|
||||
count = min(count, self.limit)
|
||||
|
||||
return count
|
||||
|
||||
def get_metadata(self) -> dict[str, Any]:
|
||||
"""Get SWE-bench metadata."""
|
||||
metadata = super().get_metadata()
|
||||
metadata.update(
|
||||
{
|
||||
"datasets": {
|
||||
"full": self.HF_DATASET,
|
||||
"lite": self.HF_LITE,
|
||||
"verified": self.HF_VERIFIED,
|
||||
},
|
||||
"subsets": ["full", "lite", "verified"]
|
||||
+ list(self.REPO_TIMEOUTS.keys()),
|
||||
"splits": ["dev", "test"],
|
||||
"requires_docker": not self._use_modal,
|
||||
"leaderboard": "https://www.swebench.com/",
|
||||
}
|
||||
)
|
||||
return metadata
|
||||
@@ -3,7 +3,8 @@
|
||||
import asyncio
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Optional, Union
|
||||
|
||||
from rich.live import Live
|
||||
|
||||
@@ -14,16 +15,55 @@ from .report import ReportGenerator
|
||||
from .state import StateManager
|
||||
from .ui import BenchmarkUI, JsonUI, QuietUI, console
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .adapters.base import BenchmarkAdapter
|
||||
|
||||
|
||||
class BenchmarkHarness:
|
||||
"""Main benchmark harness orchestrator."""
|
||||
|
||||
def __init__(self, config: HarnessConfig):
|
||||
self.config = config
|
||||
self.loader = ChallengeLoader(config.challenges_dir)
|
||||
self.reporter = ReportGenerator(config.reports_dir)
|
||||
self.state_manager = StateManager(config.reports_dir)
|
||||
|
||||
# Initialize challenge source (adapter or loader)
|
||||
self.adapter: Optional["BenchmarkAdapter"] = None
|
||||
self.loader: Optional[ChallengeLoader] = None
|
||||
|
||||
if config.external_benchmark:
|
||||
self._init_adapter()
|
||||
else:
|
||||
self.loader = ChallengeLoader(config.challenges_dir)
|
||||
|
||||
def _init_adapter(self) -> None:
|
||||
"""Initialize external benchmark adapter."""
|
||||
from .adapters import get_adapter
|
||||
|
||||
assert self.config.external_benchmark is not None
|
||||
adapter_cls = get_adapter(self.config.external_benchmark)
|
||||
if adapter_cls is None:
|
||||
from .adapters import list_adapters
|
||||
|
||||
available = list_adapters()
|
||||
raise ValueError(
|
||||
f"Unknown benchmark: {self.config.external_benchmark}. "
|
||||
f"Available: {available}"
|
||||
)
|
||||
|
||||
# Determine cache directory
|
||||
cache_dir = self.config.benchmark_cache_dir
|
||||
if cache_dir is None:
|
||||
cache_dir = Path.home() / ".cache" / "autogpt_benchmarks"
|
||||
|
||||
# Create adapter instance
|
||||
self.adapter = adapter_cls(
|
||||
cache_dir=cache_dir,
|
||||
split=self.config.benchmark_split,
|
||||
subset=self.config.benchmark_subset,
|
||||
limit=self.config.benchmark_limit,
|
||||
)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
ui_mode: str = "default",
|
||||
@@ -115,17 +155,40 @@ class BenchmarkHarness:
|
||||
strategy_names, model_names, self.config.attempts
|
||||
)
|
||||
|
||||
# Load challenges
|
||||
challenges = list(
|
||||
self.loader.load_all(
|
||||
categories=self.config.categories,
|
||||
skip_categories=self.config.skip_categories,
|
||||
names=self.config.test_names,
|
||||
maintain=self.config.maintain,
|
||||
improve=self.config.improve,
|
||||
explore=self.config.explore,
|
||||
# Load challenges (from adapter or local loader)
|
||||
if self.adapter:
|
||||
# External benchmark - load via adapter
|
||||
if ui_mode != "json":
|
||||
subset_str = (
|
||||
f", subset={self.config.benchmark_subset}"
|
||||
if self.config.benchmark_subset
|
||||
else ""
|
||||
)
|
||||
limit_str = (
|
||||
f", limit={self.config.benchmark_limit}"
|
||||
if self.config.benchmark_limit
|
||||
else ""
|
||||
)
|
||||
console.print(
|
||||
f"[cyan]Loading {self.config.external_benchmark} benchmark "
|
||||
f"(split={self.config.benchmark_split}{subset_str}{limit_str})"
|
||||
f"...[/cyan]"
|
||||
)
|
||||
assert self.adapter is not None
|
||||
challenges = list(self.adapter.load_challenges())
|
||||
else:
|
||||
# Local challenges - load via ChallengeLoader
|
||||
assert self.loader is not None
|
||||
challenges = list(
|
||||
self.loader.load_all(
|
||||
categories=self.config.categories,
|
||||
skip_categories=self.config.skip_categories,
|
||||
names=self.config.test_names,
|
||||
maintain=self.config.maintain,
|
||||
improve=self.config.improve,
|
||||
explore=self.config.explore,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if not challenges:
|
||||
console.print("[red]No challenges found matching filters[/red]")
|
||||
@@ -194,6 +257,7 @@ class BenchmarkHarness:
|
||||
attempts=self.config.attempts,
|
||||
no_cutoff=self.config.no_cutoff,
|
||||
skip_fn=should_skip,
|
||||
adapter=self.adapter,
|
||||
)
|
||||
|
||||
# Ensure workspace exists
|
||||
|
||||
@@ -205,7 +205,7 @@ class BenchmarkConfig(BaseModel):
|
||||
strategy: StrategyName
|
||||
model: ModelConfig
|
||||
max_steps: int = 50
|
||||
timeout_seconds: int = 300
|
||||
timeout_seconds: int = 900
|
||||
|
||||
@property
|
||||
def config_name(self) -> str:
|
||||
@@ -246,6 +246,13 @@ class HarnessConfig(BaseModel):
|
||||
reset_models: Optional[list[str]] = None # Reset specific models
|
||||
reset_challenges: Optional[list[str]] = None # Reset specific challenges
|
||||
|
||||
# External benchmark options
|
||||
external_benchmark: Optional[str] = None # gaia, swe-bench, agent-bench
|
||||
benchmark_split: str = "validation" # train, validation, test
|
||||
benchmark_subset: Optional[str] = None # Difficulty level, repo name, etc.
|
||||
benchmark_limit: Optional[int] = None # Max challenges to load
|
||||
benchmark_cache_dir: Optional[Path] = None # Cache directory for downloads
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
|
||||
|
||||
@@ -2,12 +2,15 @@
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from typing import AsyncIterator, Callable, Optional
|
||||
from typing import TYPE_CHECKING, AsyncIterator, Callable, Optional
|
||||
|
||||
from .evaluator import Evaluator
|
||||
from .models import BenchmarkConfig, Challenge, ChallengeResult, ExecutionProgress
|
||||
from .runner import AgentRunner, StepCallback
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .adapters.base import BenchmarkAdapter
|
||||
|
||||
# Type for skip predicate: (config_name, challenge_name, attempt) -> bool
|
||||
SkipPredicate = Callable[[str, str, int], bool]
|
||||
|
||||
@@ -23,6 +26,7 @@ class ParallelExecutor:
|
||||
attempts: int = 1,
|
||||
no_cutoff: bool = False,
|
||||
skip_fn: Optional[SkipPredicate] = None,
|
||||
adapter: Optional["BenchmarkAdapter"] = None,
|
||||
):
|
||||
self.max_parallel = max_parallel
|
||||
self.on_progress = on_progress
|
||||
@@ -30,6 +34,7 @@ class ParallelExecutor:
|
||||
self.attempts = attempts
|
||||
self.no_cutoff = no_cutoff
|
||||
self.skip_fn = skip_fn
|
||||
self.adapter = adapter
|
||||
self._semaphore = asyncio.Semaphore(max_parallel)
|
||||
self._evaluator = Evaluator()
|
||||
|
||||
@@ -121,8 +126,11 @@ class ParallelExecutor:
|
||||
)
|
||||
result = await runner.run_challenge(challenge, attempt=attempt)
|
||||
|
||||
# Evaluate result
|
||||
result = self._evaluator.evaluate(result, challenge)
|
||||
# Evaluate result - use adapter if available, otherwise standard evaluator
|
||||
if self.adapter is not None:
|
||||
result = self.adapter.evaluate(result, challenge, workspace_root)
|
||||
else:
|
||||
result = self._evaluator.evaluate(result, challenge)
|
||||
|
||||
# Notify completion
|
||||
if self.on_progress:
|
||||
|
||||
@@ -220,6 +220,14 @@ class AgentRunner:
|
||||
"service", # Block service commands
|
||||
]
|
||||
|
||||
# Disable clipboard commands for benchmarks - they add overhead without value
|
||||
app_config.disabled_commands = [
|
||||
"clipboard_copy",
|
||||
"clipboard_paste",
|
||||
"clipboard_list",
|
||||
"clipboard_clear",
|
||||
]
|
||||
|
||||
self._agent = agent
|
||||
self._llm_provider = llm_provider
|
||||
return agent
|
||||
@@ -244,14 +252,28 @@ class AgentRunner:
|
||||
# Propose next action
|
||||
proposal = await agent.propose_action()
|
||||
|
||||
# Check for finish command
|
||||
# Get cumulative cost from LLM provider
|
||||
if self._llm_provider:
|
||||
cumulative_cost = self._llm_provider.get_incurred_cost()
|
||||
|
||||
# Check for finish command - record it and return
|
||||
if proposal.use_tool.name == "finish":
|
||||
steps.append(
|
||||
StepResult(
|
||||
step_num=step_num + 1,
|
||||
tool_name=proposal.use_tool.name,
|
||||
tool_args=proposal.use_tool.arguments,
|
||||
result="Agent finished",
|
||||
is_error=False,
|
||||
cumulative_cost=cumulative_cost,
|
||||
)
|
||||
)
|
||||
return True
|
||||
|
||||
# Execute the action
|
||||
result = await agent.execute(proposal)
|
||||
|
||||
# Get cumulative cost from LLM provider
|
||||
# Update cost after execution
|
||||
if self._llm_provider:
|
||||
cumulative_cost = self._llm_provider.get_incurred_cost()
|
||||
|
||||
|
||||
552
classic/poetry.lock
generated
552
classic/poetry.lock
generated
@@ -6,7 +6,7 @@ version = "2.6.1"
|
||||
description = "Happy Eyeballs for asyncio"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
|
||||
{file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
|
||||
@@ -18,7 +18,7 @@ version = "3.13.3"
|
||||
description = "Async http client/server framework (asyncio)"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "aiohttp-3.13.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5a372fd5afd301b3a89582817fdcdb6c34124787c70dbcc616f259013e7eef7"},
|
||||
{file = "aiohttp-3.13.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:147e422fd1223005c22b4fe080f5d93ced44460f5f9c105406b753612b587821"},
|
||||
@@ -175,7 +175,7 @@ version = "1.4.0"
|
||||
description = "aiosignal: a list of registered asynchronous callbacks"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"},
|
||||
{file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"},
|
||||
@@ -247,7 +247,7 @@ version = "25.4.0"
|
||||
description = "Classes Without Boilerplate"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"},
|
||||
{file = "attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11"},
|
||||
@@ -1197,7 +1197,7 @@ version = "2026.1.4"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"},
|
||||
{file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"},
|
||||
@@ -1319,7 +1319,7 @@ version = "3.4.4"
|
||||
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d"},
|
||||
{file = "charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8"},
|
||||
@@ -1523,12 +1523,12 @@ version = "0.4.6"
|
||||
description = "Cross-platform colored terminal text."
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
markers = {dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
|
||||
markers = {benchmarks = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\""}
|
||||
|
||||
[[package]]
|
||||
name = "coloredlogs"
|
||||
@@ -1874,6 +1874,51 @@ files = [
|
||||
{file = "cymem-2.0.13.tar.gz", hash = "sha256:1c91a92ae8c7104275ac26bd4d29b08ccd3e7faff5893d3858cb6fadf1bc1588"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datasets"
|
||||
version = "2.18.0"
|
||||
description = "HuggingFace community-driven open-source library of datasets"
|
||||
optional = false
|
||||
python-versions = ">=3.8.0"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "datasets-2.18.0-py3-none-any.whl", hash = "sha256:f1bbf0e2896917a914de01cbd37075b14deea3837af87ad0d9f697388ccaeb50"},
|
||||
{file = "datasets-2.18.0.tar.gz", hash = "sha256:cdf8b8c6abf7316377ba4f49f9589a4c74556d6b481afd0abd2284f3d69185cb"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = "*"
|
||||
dill = ">=0.3.0,<0.3.9"
|
||||
filelock = "*"
|
||||
fsspec = {version = ">=2023.1.0,<=2024.2.0", extras = ["http"]}
|
||||
huggingface-hub = ">=0.19.4"
|
||||
multiprocess = "*"
|
||||
numpy = ">=1.17"
|
||||
packaging = "*"
|
||||
pandas = "*"
|
||||
pyarrow = ">=12.0.0"
|
||||
pyarrow-hotfix = "*"
|
||||
pyyaml = ">=5.1"
|
||||
requests = ">=2.19.0"
|
||||
tqdm = ">=4.62.1"
|
||||
xxhash = "*"
|
||||
|
||||
[package.extras]
|
||||
apache-beam = ["apache-beam (>=2.26.0)"]
|
||||
audio = ["librosa", "soundfile (>=0.12.1)"]
|
||||
benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
|
||||
dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1) ; sys_platform != \"darwin\" or platform_machine != \"arm64\"", "tensorflow (>=2.3,!=2.6.0,!=2.6.1) ; sys_platform != \"darwin\" or platform_machine != \"arm64\"", "tensorflow-macos ; sys_platform == \"darwin\" and platform_machine == \"arm64\"", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
|
||||
docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1) ; sys_platform != \"darwin\" or platform_machine != \"arm64\"", "tensorflow-macos ; sys_platform == \"darwin\" and platform_machine == \"arm64\"", "torch", "transformers"]
|
||||
jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
|
||||
metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
|
||||
quality = ["ruff (>=0.3.0)"]
|
||||
s3 = ["s3fs"]
|
||||
tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1) ; sys_platform != \"darwin\" or platform_machine != \"arm64\"", "tensorflow-macos ; sys_platform == \"darwin\" and platform_machine == \"arm64\""]
|
||||
tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
|
||||
tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0) ; sys_platform != \"win32\" and python_version < \"3.10\"", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.3,!=2.6.0,!=2.6.1) ; sys_platform != \"darwin\" or platform_machine != \"arm64\"", "tensorflow-macos ; sys_platform == \"darwin\" and platform_machine == \"arm64\"", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
|
||||
torch = ["torch"]
|
||||
vision = ["Pillow (>=6.2.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "dateparser"
|
||||
version = "1.2.2"
|
||||
@@ -1930,6 +1975,22 @@ files = [
|
||||
{file = "demjson3-3.0.6.tar.gz", hash = "sha256:37c83b0c6eb08d25defc88df0a2a4875d58a7809a9650bd6eee7afd8053cdbac"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dill"
|
||||
version = "0.3.8"
|
||||
description = "serialize all of Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
|
||||
{file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
graph = ["objgraph (>=1.7.2)"]
|
||||
profile = ["gprof2dot (>=2022.7.29)"]
|
||||
|
||||
[[package]]
|
||||
name = "distlib"
|
||||
version = "0.4.0"
|
||||
@@ -2181,7 +2242,7 @@ version = "3.20.3"
|
||||
description = "A platform independent file lock."
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["main", "build", "dev"]
|
||||
groups = ["main", "benchmarks", "build", "dev"]
|
||||
files = [
|
||||
{file = "filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1"},
|
||||
{file = "filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1"},
|
||||
@@ -2222,7 +2283,7 @@ version = "1.8.0"
|
||||
description = "A list-like structure which implements collections.abc.MutableSequence"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b37f6d31b3dcea7deb5e9696e529a6aa4a898adc33db82da12e4c60a7c4d2011"},
|
||||
{file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef2b7b394f208233e471abc541cc6991f907ffd47dc72584acee3147899d6565"},
|
||||
@@ -2358,25 +2419,27 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "fsspec"
|
||||
version = "2026.1.0"
|
||||
version = "2024.2.0"
|
||||
description = "File-system specification"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["main"]
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc"},
|
||||
{file = "fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b"},
|
||||
{file = "fsspec-2024.2.0-py3-none-any.whl", hash = "sha256:817f969556fa5916bc682e02ca2045f96ff7f586d45110fcb76022063ad2c7d8"},
|
||||
{file = "fsspec-2024.2.0.tar.gz", hash = "sha256:b6ad1a679f760dda52b1168c859d01b7b80648ea6f7f7c7f5a8a91dc3f3ecb84"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
|
||||
|
||||
[package.extras]
|
||||
abfs = ["adlfs"]
|
||||
adl = ["adlfs"]
|
||||
arrow = ["pyarrow (>=1)"]
|
||||
dask = ["dask", "distributed"]
|
||||
dev = ["pre-commit", "ruff (>=0.5)"]
|
||||
doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
|
||||
devel = ["pytest", "pytest-cov"]
|
||||
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
|
||||
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs (>2024.2.0)", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs (>2024.2.0)", "smbprotocol", "tqdm"]
|
||||
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
|
||||
fuse = ["fusepy"]
|
||||
gcs = ["gcsfs"]
|
||||
git = ["pygit2"]
|
||||
@@ -2391,9 +2454,6 @@ s3 = ["s3fs"]
|
||||
sftp = ["paramiko"]
|
||||
smb = ["smbprotocol"]
|
||||
ssh = ["paramiko"]
|
||||
test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
|
||||
test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
|
||||
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "backports-zstd ; python_version < \"3.14\"", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr"]
|
||||
tqdm = ["tqdm"]
|
||||
|
||||
[[package]]
|
||||
@@ -2977,42 +3037,6 @@ files = [
|
||||
hpack = ">=4.1,<5"
|
||||
hyperframe = ">=6.1,<7"
|
||||
|
||||
[[package]]
|
||||
name = "hf-xet"
|
||||
version = "1.2.0"
|
||||
description = "Fast transfer of large files with the Hugging Face Hub."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
|
||||
files = [
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649"},
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813"},
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc"},
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5"},
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f"},
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832"},
|
||||
{file = "hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f"},
|
||||
{file = "hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865"},
|
||||
{file = "hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69"},
|
||||
{file = "hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
tests = ["pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "hpack"
|
||||
version = "4.1.0"
|
||||
@@ -3170,39 +3194,36 @@ zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "huggingface-hub"
|
||||
version = "1.3.2"
|
||||
version = "0.20.3"
|
||||
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
|
||||
optional = false
|
||||
python-versions = ">=3.9.0"
|
||||
groups = ["main"]
|
||||
python-versions = ">=3.8.0"
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "huggingface_hub-1.3.2-py3-none-any.whl", hash = "sha256:b552b9562a5532102a041fa31a6966bb9de95138fc7aa578bb3703198c25d1b6"},
|
||||
{file = "huggingface_hub-1.3.2.tar.gz", hash = "sha256:15d7902e154f04174a0816d1e9594adcf15cdad57596920a5dc70fadb5d896c7"},
|
||||
{file = "huggingface_hub-0.20.3-py3-none-any.whl", hash = "sha256:d988ae4f00d3e307b0c80c6a05ca6dbb7edba8bba3079f74cda7d9c2e562a7b6"},
|
||||
{file = "huggingface_hub-0.20.3.tar.gz", hash = "sha256:94e7f8e074475fbc67d6a71957b678e1b4a74ff1b64a644fd6cbb83da962d05d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
filelock = "*"
|
||||
fsspec = ">=2023.5.0"
|
||||
hf-xet = {version = ">=1.2.0,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""}
|
||||
httpx = ">=0.23.0,<1"
|
||||
packaging = ">=20.9"
|
||||
pyyaml = ">=5.1"
|
||||
shellingham = "*"
|
||||
requests = "*"
|
||||
tqdm = ">=4.42.1"
|
||||
typer-slim = "*"
|
||||
typing-extensions = ">=4.1.0"
|
||||
typing-extensions = ">=3.7.4.3"
|
||||
|
||||
[package.extras]
|
||||
all = ["Jinja2", "Pillow", "authlib (>=1.3.2)", "fastapi", "fastapi", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0)", "numpy", "pytest (>=8.4.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
|
||||
dev = ["Jinja2", "Pillow", "authlib (>=1.3.2)", "fastapi", "fastapi", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0)", "numpy", "pytest (>=8.4.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
|
||||
all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0) ; python_version == \"3.8\"", "pydantic (>1.1,<3.0) ; python_version > \"3.8\"", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
|
||||
cli = ["InquirerPy (==0.3.4)"]
|
||||
dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0) ; python_version == \"3.8\"", "pydantic (>1.1,<3.0) ; python_version > \"3.8\"", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
|
||||
fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
|
||||
hf-xet = ["hf-xet (>=1.2.0,<2.0.0)"]
|
||||
mcp = ["mcp (>=1.8.0)"]
|
||||
oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"]
|
||||
quality = ["libcst (>=1.4.0)", "mypy (==1.15.0)", "ruff (>=0.9.0)", "ty"]
|
||||
testing = ["Jinja2", "Pillow", "authlib (>=1.3.2)", "fastapi", "fastapi", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.4.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
|
||||
torch = ["safetensors[torch]", "torch"]
|
||||
typing = ["types-PyYAML", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
|
||||
inference = ["aiohttp", "pydantic (>1.1,<2.0) ; python_version == \"3.8\"", "pydantic (>1.1,<3.0) ; python_version > \"3.8\""]
|
||||
quality = ["mypy (==1.5.1)", "ruff (>=0.1.3)"]
|
||||
tensorflow = ["graphviz", "pydot", "tensorflow"]
|
||||
testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (>1.1,<2.0) ; python_version == \"3.8\"", "pydantic (>1.1,<3.0) ; python_version > \"3.8\"", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
|
||||
torch = ["torch"]
|
||||
typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "humanfriendly"
|
||||
@@ -3276,7 +3297,7 @@ version = "3.11"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"},
|
||||
{file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"},
|
||||
@@ -4187,7 +4208,7 @@ version = "6.7.0"
|
||||
description = "multidict implementation"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "multidict-6.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9f474ad5acda359c8758c8accc22032c6abe6dc87a8be2440d097785e27a9349"},
|
||||
{file = "multidict-6.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b7a9db5a870f780220e931d0002bbfd88fb53aceb6293251e2c839415c1b20e"},
|
||||
@@ -4337,6 +4358,31 @@ files = [
|
||||
{file = "multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multiprocess"
|
||||
version = "0.70.16"
|
||||
description = "better multiprocessing and multithreading in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
|
||||
{file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
|
||||
{file = "multiprocess-0.70.16-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a"},
|
||||
{file = "multiprocess-0.70.16-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054"},
|
||||
{file = "multiprocess-0.70.16-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41"},
|
||||
{file = "multiprocess-0.70.16-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a"},
|
||||
{file = "multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02"},
|
||||
{file = "multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a"},
|
||||
{file = "multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e"},
|
||||
{file = "multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435"},
|
||||
{file = "multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3"},
|
||||
{file = "multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
dill = ">=0.3.8"
|
||||
|
||||
[[package]]
|
||||
name = "murmurhash"
|
||||
version = "1.0.12"
|
||||
@@ -4502,7 +4548,7 @@ version = "2.0.2"
|
||||
description = "Fundamental package for array computing in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
groups = ["main", "benchmarks"]
|
||||
markers = "python_version >= \"3.14\""
|
||||
files = [
|
||||
{file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"},
|
||||
@@ -4558,7 +4604,7 @@ version = "2.4.1"
|
||||
description = "Fundamental package for array computing in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.11"
|
||||
groups = ["main"]
|
||||
groups = ["main", "benchmarks"]
|
||||
markers = "python_version < \"3.14\""
|
||||
files = [
|
||||
{file = "numpy-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0cce2a669e3c8ba02ee563c7835f92c153cf02edff1ae05e1823f1dde21b16a5"},
|
||||
@@ -4941,12 +4987,108 @@ version = "25.0"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"},
|
||||
{file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pandas"
|
||||
version = "2.3.3"
|
||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"},
|
||||
{file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"},
|
||||
{file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1"},
|
||||
{file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838"},
|
||||
{file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250"},
|
||||
{file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4"},
|
||||
{file = "pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"},
|
||||
{file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"},
|
||||
{file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"},
|
||||
{file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"},
|
||||
{file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"},
|
||||
{file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"},
|
||||
{file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"},
|
||||
{file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"},
|
||||
{file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"},
|
||||
{file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"},
|
||||
{file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"},
|
||||
{file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"},
|
||||
{file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"},
|
||||
{file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"},
|
||||
{file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"},
|
||||
{file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"},
|
||||
{file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9"},
|
||||
{file = "pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa"},
|
||||
{file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""}
|
||||
python-dateutil = ">=2.8.2"
|
||||
pytz = ">=2020.1"
|
||||
tzdata = ">=2022.7"
|
||||
|
||||
[package.extras]
|
||||
all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
|
||||
aws = ["s3fs (>=2022.11.0)"]
|
||||
clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
|
||||
compression = ["zstandard (>=0.19.0)"]
|
||||
computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
|
||||
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
|
||||
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
|
||||
feather = ["pyarrow (>=10.0.1)"]
|
||||
fss = ["fsspec (>=2022.11.0)"]
|
||||
gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
|
||||
hdf5 = ["tables (>=3.8.0)"]
|
||||
html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
|
||||
mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
|
||||
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
|
||||
parquet = ["pyarrow (>=10.0.1)"]
|
||||
performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
|
||||
plot = ["matplotlib (>=3.6.3)"]
|
||||
postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
|
||||
pyarrow = ["pyarrow (>=10.0.1)"]
|
||||
spss = ["pyreadstat (>=1.2.0)"]
|
||||
sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
|
||||
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
|
||||
xml = ["lxml (>=4.9.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "patchelf"
|
||||
version = "0.17.2.4"
|
||||
@@ -5381,7 +5523,7 @@ version = "0.4.1"
|
||||
description = "Accelerated property cache"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "propcache-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c2d1fa3201efaf55d730400d945b5b3ab6e672e100ba0f9a409d950ab25d7db"},
|
||||
{file = "propcache-0.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1eb2994229cc8ce7fe9b3db88f5465f5fd8651672840b2e426b88cdb1a30aac8"},
|
||||
@@ -5545,6 +5687,78 @@ files = [
|
||||
{file = "protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyarrow"
|
||||
version = "23.0.0"
|
||||
description = "Python library for Apache Arrow"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:cbdc2bf5947aa4d462adcf8453cf04aee2f7932653cb67a27acd96e5e8528a67"},
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4d38c836930ce15cd31dce20114b21ba082da231c884bdc0a7b53e1477fe7f07"},
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:4222ff8f76919ecf6c716175a0e5fddb5599faeed4c56d9ea41a2c42be4998b2"},
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:87f06159cbe38125852657716889296c83c37b4d09a5e58f3d10245fd1f69795"},
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1675c374570d8b91ea6d4edd4608fa55951acd44e0c31bd146e091b4005de24f"},
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:247374428fde4f668f138b04031a7e7077ba5fa0b5b1722fdf89a017bf0b7ee0"},
|
||||
{file = "pyarrow-23.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:de53b1bd3b88a2ee93c9af412c903e57e738c083be4f6392288294513cd8b2c1"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5574d541923efcbfdf1294a2746ae3b8c2498a2dc6cd477882f6f4e7b1ac08d3"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:2ef0075c2488932e9d3c2eb3482f9459c4be629aa673b725d5e3cf18f777f8e4"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:65666fc269669af1ef1c14478c52222a2aa5c907f28b68fb50a203c777e4f60c"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:4d85cb6177198f3812db4788e394b757223f60d9a9f5ad6634b3e32be1525803"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1a9ff6fa4141c24a03a1a434c63c8fa97ce70f8f36bccabc18ebba905ddf0f17"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:84839d060a54ae734eb60a756aeacb62885244aaa282f3c968f5972ecc7b1ecc"},
|
||||
{file = "pyarrow-23.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a149a647dbfe928ce8830a713612aa0b16e22c64feac9d1761529778e4d4eaa5"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df"},
|
||||
{file = "pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685"},
|
||||
{file = "pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c"},
|
||||
{file = "pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0"},
|
||||
{file = "pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyarrow-hotfix"
|
||||
version = "0.7"
|
||||
description = ""
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "pyarrow_hotfix-0.7-py3-none-any.whl", hash = "sha256:3236f3b5f1260f0e2ac070a55c1a7b339c4bb7267839bd2015e283234e758100"},
|
||||
{file = "pyarrow_hotfix-0.7.tar.gz", hash = "sha256:59399cd58bdd978b2e42816a4183a55c6472d4e33d183351b6069f11ed42661d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyasn1"
|
||||
version = "0.6.2"
|
||||
@@ -6254,7 +6468,7 @@ version = "2.9.0.post0"
|
||||
description = "Extensions to the standard Python datetime module"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
|
||||
groups = ["main"]
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
|
||||
{file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
|
||||
@@ -6315,7 +6529,7 @@ version = "2025.2"
|
||||
description = "World timezone definitions, modern and historical"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["main"]
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
|
||||
{file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
|
||||
@@ -6358,7 +6572,7 @@ version = "6.0.3"
|
||||
description = "YAML parser and emitter for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"},
|
||||
{file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"},
|
||||
@@ -6599,7 +6813,7 @@ version = "2.32.5"
|
||||
description = "Python HTTP for Humans."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"},
|
||||
{file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"},
|
||||
@@ -6893,7 +7107,7 @@ version = "1.17.0"
|
||||
description = "Python 2 and 3 compatibility utilities"
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
|
||||
groups = ["main"]
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
|
||||
{file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
|
||||
@@ -7739,7 +7953,7 @@ version = "4.67.1"
|
||||
description = "Fast, Extensible Progress Meter"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["main"]
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
|
||||
{file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
|
||||
@@ -7940,7 +8154,7 @@ version = "4.15.0"
|
||||
description = "Backported and Experimental Type Hints for Python 3.9+"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
|
||||
{file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
|
||||
@@ -7967,12 +8181,12 @@ version = "2025.3"
|
||||
description = "Provider of IANA time zone data"
|
||||
optional = false
|
||||
python-versions = ">=2"
|
||||
groups = ["main"]
|
||||
markers = "platform_system == \"Windows\""
|
||||
groups = ["main", "benchmarks"]
|
||||
files = [
|
||||
{file = "tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1"},
|
||||
{file = "tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7"},
|
||||
]
|
||||
markers = {main = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "tzlocal"
|
||||
@@ -8010,7 +8224,7 @@ version = "2.6.3"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
|
||||
{file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},
|
||||
@@ -8606,13 +8820,163 @@ files = [
|
||||
[package.dependencies]
|
||||
h11 = ">=0.16.0,<1"
|
||||
|
||||
[[package]]
|
||||
name = "xxhash"
|
||||
version = "3.6.0"
|
||||
description = "Python binding for xxHash"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
groups = ["benchmarks"]
|
||||
files = [
|
||||
{file = "xxhash-3.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:87ff03d7e35c61435976554477a7f4cd1704c3596a89a8300d5ce7fc83874a71"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f572dfd3d0e2eb1a57511831cf6341242f5a9f8298a45862d085f5b93394a27d"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:89952ea539566b9fed2bbd94e589672794b4286f342254fad28b149f9615fef8"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e6f2ffb07a50b52465a1032c3cf1f4a5683f944acaca8a134a2f23674c2058"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5b848ad6c16d308c3ac7ad4ba6bede80ed5df2ba8ed382f8932df63158dd4b2"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a034590a727b44dd8ac5914236a7b8504144447a9682586c3327e935f33ec8cc"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a8f1972e75ebdd161d7896743122834fe87378160c20e97f8b09166213bf8cc"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ee34327b187f002a596d7b167ebc59a1b729e963ce645964bbc050d2f1b73d07"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:339f518c3c7a850dd033ab416ea25a692759dc7478a71131fe8869010d2b75e4"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:bf48889c9630542d4709192578aebbd836177c9f7a4a2778a7d6340107c65f06"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5576b002a56207f640636056b4160a378fe36a58db73ae5c27a7ec8db35f71d4"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af1f3278bd02814d6dedc5dec397993b549d6f16c19379721e5a1d31e132c49b"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-win32.whl", hash = "sha256:aed058764db109dc9052720da65fafe84873b05eb8b07e5e653597951af57c3b"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:e82da5670f2d0d98950317f82a0e4a0197150ff19a6df2ba40399c2a3b9ae5fb"},
|
||||
{file = "xxhash-3.6.0-cp310-cp310-win_arm64.whl", hash = "sha256:4a082ffff8c6ac07707fb6b671caf7c6e020c75226c561830b73d862060f281d"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef"},
|
||||
{file = "xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c"},
|
||||
{file = "xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8"},
|
||||
{file = "xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f"},
|
||||
{file = "xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7dac94fad14a3d1c92affb661021e1d5cbcf3876be5f5b4d90730775ccb7ac41"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6965e0e90f1f0e6cb78da568c13d4a348eeb7f40acfd6d43690a666a459458b8"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2ab89a6b80f22214b43d98693c30da66af910c04f9858dd39c8e570749593d7e"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4903530e866b7a9c1eadfd3fa2fbe1b97d3aed4739a80abf506eb9318561c850"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4da8168ae52c01ac64c511d6f4a709479da8b7a4a1d7621ed51652f93747dffa"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:97460eec202017f719e839a0d3551fbc0b2fcc9c6c6ffaa5af85bbd5de432788"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:45aae0c9df92e7fa46fbb738737324a563c727990755ec1965a6a339ea10a1df"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d50101e57aad86f4344ca9b32d091a2135a9d0a4396f19133426c88025b09f1"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9085e798c163ce310d91f8aa6b325dda3c2944c93c6ce1edb314030d4167cc65"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:a87f271a33fad0e5bf3be282be55d78df3a45ae457950deb5241998790326f87"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:9e040d3e762f84500961791fa3709ffa4784d4dcd7690afc655c095e02fff05f"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b0359391c3dad6de872fefb0cf5b69d55b0655c55ee78b1bb7a568979b2ce96b"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-win32.whl", hash = "sha256:e4ff728a2894e7f436b9e94c667b0f426b9c74b71f900cf37d5468c6b5da0536"},
|
||||
{file = "xxhash-3.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:01be0c5b500c5362871fc9cfdf58c69b3e5c4f531a82229ddb9eb1eb14138004"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cc604dc06027dbeb8281aeac5899c35fcfe7c77b25212833709f0bff4ce74d2a"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:277175a73900ad43a8caeb8b99b9604f21fe8d7c842f2f9061a364a7e220ddb7"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cfbc5b91397c8c2972fdac13fb3e4ed2f7f8ccac85cd2c644887557780a9b6e2"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2762bfff264c4e73c0e507274b40634ff465e025f0eaf050897e88ec8367575d"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f171a900d59d51511209f7476933c34a0c2c711078d3c80e74e0fe4f38680ec"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:780b90c313348f030b811efc37b0fa1431163cb8db8064cf88a7936b6ce5f222"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b242455eccdfcd1fa4134c431a30737d2b4f045770f8fe84356b3469d4b919"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a75ffc1bd5def584129774c158e108e5d768e10b75813f2b32650bb041066ed6"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1fc1ed882d1e8df932a66e2999429ba6cc4d5172914c904ab193381fba825360"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:44e342e8cc11b4e79dae5c57f2fb6360c3c20cc57d32049af8f567f5b4bcb5f4"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c2f9ccd5c4be370939a2e17602fbc49995299203da72a3429db013d44d590e86"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:02ea4cb627c76f48cd9fb37cf7ab22bd51e57e1b519807234b473faebe526796"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-win32.whl", hash = "sha256:6551880383f0e6971dc23e512c9ccc986147ce7bfa1cd2e4b520b876c53e9f3d"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:7c35c4cdc65f2a29f34425c446f2f5cdcd0e3c34158931e1cc927ece925ab802"},
|
||||
{file = "xxhash-3.6.0-cp39-cp39-win_arm64.whl", hash = "sha256:ffc578717a347baf25be8397cb10d2528802d24f94cfc005c0e44fef44b5cdd6"},
|
||||
{file = "xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0"},
|
||||
{file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296"},
|
||||
{file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13"},
|
||||
{file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd"},
|
||||
{file = "xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d"},
|
||||
{file = "xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.22.0"
|
||||
description = "Yet another URL library"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main", "dev"]
|
||||
groups = ["main", "benchmarks", "dev"]
|
||||
files = [
|
||||
{file = "yarl-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c7bd6683587567e5a49ee6e336e0612bec8329be1b7d4c8af5687dcdeb67ee1e"},
|
||||
{file = "yarl-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5cdac20da754f3a723cceea5b3448e1a2074866406adeb4ef35b469d089adb8f"},
|
||||
@@ -8774,4 +9138,4 @@ type = ["pytest-mypy"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "9d632f4341ef2e49aa81978032caf9605fd258aff8176263ccb1766b0a1ef4b1"
|
||||
content-hash = "f8040ae4c1cc04a87df47a0176c09cc4d9da81e2a51e3428d07d93d5c2e9724f"
|
||||
|
||||
@@ -142,6 +142,19 @@ optional = true
|
||||
cx-freeze = { git = "https://github.com/ntindle/cx_Freeze.git", rev = "main" }
|
||||
|
||||
|
||||
[tool.poetry.group.benchmarks]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.benchmarks.dependencies]
|
||||
# External benchmark adapters
|
||||
datasets = "^2.14"
|
||||
huggingface-hub = "^0.20"
|
||||
# SWE-bench evaluation (optional - requires Docker)
|
||||
# swebench = "^2.0" # Install separately if needed
|
||||
# Modal for cloud evaluation (optional)
|
||||
# modal = "^0.70" # Install separately if needed
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
Reference in New Issue
Block a user