diff --git a/evaluation/benchmarks/swe_bench/binary_patch_utils.py b/evaluation/benchmarks/swe_bench/binary_patch_utils.py new file mode 100644 index 0000000000..9cf0dbd714 --- /dev/null +++ b/evaluation/benchmarks/swe_bench/binary_patch_utils.py @@ -0,0 +1,52 @@ +""" +Utilities for handling binary files and patch generation in SWE-bench evaluation. +""" + + +def remove_binary_diffs(patch_text): + """ + Remove binary file diffs from a git patch. + + Args: + patch_text (str): The git patch text + + Returns: + str: The cleaned patch text with binary diffs removed + """ + lines = patch_text.splitlines() + cleaned_lines = [] + block = [] + is_binary_block = False + + for line in lines: + if line.startswith('diff --git '): + if block and not is_binary_block: + cleaned_lines.extend(block) + block = [line] + is_binary_block = False + elif 'Binary files' in line: + is_binary_block = True + block.append(line) + else: + block.append(line) + + if block and not is_binary_block: + cleaned_lines.extend(block) + return '\n'.join(cleaned_lines) + + +def remove_binary_files_from_git(): + """ + Generate a bash command to remove binary files from git staging. + + Returns: + str: A bash command that removes binary files from git staging + """ + return """ + for file in $(git status --porcelain | grep -E "^(M| M|\\?\\?|A| A)" | cut -c4-); do + if [ -f "$file" ] && (file "$file" | grep -q "executable" || git check-attr binary "$file" | grep -q "binary: set"); then + git rm -f "$file" 2>/dev/null || rm -f "$file" + echo "Removed: $file" + fi + done + """.strip() diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index 57ab8ed20d..49a0e4787d 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -10,6 +10,10 @@ import toml from datasets import load_dataset import openhands.agenthub +from evaluation.benchmarks.swe_bench.binary_patch_utils import ( + remove_binary_diffs, + remove_binary_files_from_git, +) from evaluation.benchmarks.swe_bench.resource.mapping import ( get_instance_resource_factor, ) @@ -38,8 +42,12 @@ from openhands.core.config import ( from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller from openhands.critic import AgentFinishedCritic -from openhands.events.action import CmdRunAction, MessageAction -from openhands.events.observation import CmdOutputObservation, ErrorObservation +from openhands.events.action import CmdRunAction, FileReadAction, MessageAction +from openhands.events.observation import ( + CmdOutputObservation, + ErrorObservation, + FileReadObservation, +) from openhands.events.serialization.event import event_from_dict, event_to_dict from openhands.runtime.base import Runtime from openhands.utils.async_utils import call_async_from_sync @@ -226,16 +234,17 @@ def initialize_runtime( workspace_dir_name = _get_swebench_workspace_dir_name(instance) obs: CmdOutputObservation - # Set instance id + # Set instance id and git configuration action = CmdRunAction( - command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc""" + command=f"""echo 'export SWE_INSTANCE_ID={instance['instance_id']}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo "alias git='git --no-pager'" >> ~/.bashrc && git config --global core.pager "" && git config --global diff.binary false""" ) action.set_hard_timeout(600) logger.info(action, extra={'msg_type': 'ACTION'}) obs = runtime.run_action(action) logger.info(obs, extra={'msg_type': 'OBSERVATION'}) assert_and_raise( - obs.exit_code == 0, f'Failed to export SWE_INSTANCE_ID: {str(obs)}' + obs.exit_code == 0, + f'Failed to export SWE_INSTANCE_ID and configure git: {str(obs)}', ) action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """) @@ -452,11 +461,22 @@ def complete_runtime( f'Failed to git add -A: {str(obs)}', ) + # Remove binary files from git staging + action = CmdRunAction(command=remove_binary_files_from_git()) + action.set_hard_timeout(600) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert_and_raise( + isinstance(obs, CmdOutputObservation) and obs.exit_code == 0, + f'Failed to remove binary files: {str(obs)}', + ) + n_retries = 0 git_patch = None while n_retries < 5: action = CmdRunAction( - command=f'git diff --no-color --cached {instance["base_commit"]}' + command=f'git diff --no-color --cached {instance["base_commit"]} > patch.diff' ) action.set_hard_timeout(max(300 + 100 * n_retries, 600)) logger.info(action, extra={'msg_type': 'ACTION'}) @@ -465,8 +485,28 @@ def complete_runtime( n_retries += 1 if isinstance(obs, CmdOutputObservation): if obs.exit_code == 0: - git_patch = obs.content.strip() - break + # Read the patch file + action = FileReadAction(path='patch.diff') + action.set_hard_timeout(max(300 + 100 * n_retries, 600)) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + if isinstance(obs, FileReadObservation): + git_patch = obs.content + break + elif isinstance(obs, ErrorObservation): + # Fall back to cat "patch.diff" to get the patch + assert 'File could not be decoded as utf-8' in obs.content + action = CmdRunAction(command='cat patch.diff') + action.set_hard_timeout(max(300 + 100 * n_retries, 600)) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + assert isinstance(obs, CmdOutputObservation) and obs.exit_code == 0 + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + git_patch = obs.content + break + else: + assert_and_raise(False, f'Unexpected observation type: {str(obs)}') else: logger.info('Failed to get git diff, retrying...') sleep_if_should_continue(10) @@ -478,6 +518,9 @@ def complete_runtime( assert_and_raise(git_patch is not None, 'Failed to get git diff (None)') + # Remove binary diffs from the patch + git_patch = remove_binary_diffs(git_patch) + logger.info('-' * 30) logger.info('END Runtime Completion Fn') logger.info('-' * 30) @@ -802,7 +845,11 @@ if __name__ == '__main__': with open(cur_output_file, 'r') as f: for line in f: instance = json.loads(line) - if instance['instance_id'] not in added_instance_ids: + # Also make sure git_patch is not empty - otherwise we fall back to previous attempt (empty patch is worse than anything else) + if ( + instance['instance_id'] not in added_instance_ids + and instance['test_result']['git_patch'].strip() + ): fout.write(line) added_instance_ids.add(instance['instance_id']) logger.info(