OpenHands/evaluation/benchmarks/testgeneval/utils.py

import json
from pathlib import Path
from typing import cast

from datasets import Dataset, load_dataset

from evaluation.benchmarks.testgeneval.constants import (
    KEY_INSTANCE_ID,
    TestGenEvalInstance,
)


def get_test_directives(instance: TestGenEvalInstance) -> list:
    """Get test directives from the test_patch of a task instance

    Args:
        instance (dict): task instance
    Returns:
        directives (list): List of test directives
    """
    # For seq2seq code repos, testing command is fixed
    if instance['repo'] == 'swe-bench/humaneval':
        return ['test.py']

    # Get test directives from test patch and remove non-test files
    directives = [f'/testbed/{instance["test_file"]}']

    # For Django tests, remove extension + "tests/" prefix and convert slashes to dots (module referencing)
    if instance['repo'] == 'django/django':
        directives = [instance['test_file']]
        directives_transformed = []
        for d in directives:
            d = d[: -len('.py')] if d.endswith('.py') else d
            d = d[len('tests/') :] if d.startswith('tests/') else d
            d = d.replace('/', '.')
            directives_transformed.append(d)
        directives = directives_transformed

    return directives


def load_testgeneval_dataset(
    name='kjain14/testgeneval', split='test', ids=None
) -> list[TestGenEvalInstance]:
    """Load SWE-bench dataset from Hugging Face Datasets or local .json/.jsonl file"""
    # check that all instance IDs are in the dataset
    if ids:
        ids = set(ids)
    # Load from local .json/.jsonl file
    if name.endswith('.json') or name.endswith('.jsonl'):
        dataset = json.loads(Path(name).read_text())
        dataset_ids = {instance[KEY_INSTANCE_ID] for instance in dataset}
    else:
        # Load from Hugging Face Datasets
        if name.lower() in {'testgeneval'}:
            name = 'kjain14/testgeneval'
        elif name.lower() in {'testgeneval-lite', 'testgenevallite', 'lite'}:
            name = 'kjain14/testgenevallite'
        dataset = cast(Dataset, load_dataset(name, split=split))
        dataset_ids = {instance['id'] for instance in dataset}
    if ids:
        if ids - dataset_ids:
            raise ValueError(
                (
                    'Some instance IDs not found in dataset!'
                    f'\nMissing IDs:\n{" ".join(ids - dataset_ids)}'
                )
            )
        dataset = [instance for instance in dataset if instance['id'] in ids]
    return [cast(TestGenEvalInstance, instance) for instance in dataset]