Update pre-commit hook versions to most recent versions (#8343)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Graham Neubig
2025-05-07 23:59:13 -04:00
committed by GitHub
parent d5a8d4251c
commit 689d3c9046
296 changed files with 882 additions and 847 deletions

View File

@@ -2,10 +2,9 @@
import os
import re
import sys
from typing import Set, Tuple
def find_version_references(directory: str) -> Tuple[Set[str], Set[str]]:
def find_version_references(directory: str) -> tuple[set[str], set[str]]:
openhands_versions = set()
runtime_versions = set()

0
.openhands/pre-commit.sh Normal file → Executable file
View File

View File

@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
exclude: docs/modules/python
@@ -10,17 +10,17 @@ repos:
- id: debug-statements
- repo: https://github.com/tox-dev/pyproject-fmt
rev: 1.7.0
rev: v2.5.1
hooks:
- id: pyproject-fmt
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.16
rev: v0.24.1
hooks:
- id: validate-pyproject
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.4.1
rev: v0.11.8
hooks:
# Run the linter.
- id: ruff
@@ -33,7 +33,7 @@ repos:
types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.9.0
rev: v1.15.0
hooks:
- id: mypy
additional_dependencies:

View File

@@ -20,6 +20,12 @@ ignore = [
"B010",
"B904",
"B018",
# Temporarily ignore ASYNC rules until they can be properly fixed in a separate PR
"ASYNC110",
"ASYNC220",
"ASYNC221",
"ASYNC230",
"ASYNC251",
]
[lint.flake8-quotes]

View File

@@ -73,7 +73,7 @@ class Q20Game:
usr_msg = self.answerer(guesser_question)
self.guesser_messages.append(
{'role': 'user', 'content': f"{usr_msg['content'].strip()}"}
{'role': 'user', 'content': f'{usr_msg["content"].strip()}'}
)
if 'bingo' in usr_msg['content'].lower():

View File

@@ -67,7 +67,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -100,7 +100,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -113,7 +113,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
agent_answer = None
@@ -165,7 +165,7 @@ def complete_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
final_ans = obs.content
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return {
'final_ans': final_ans,
'agent_answer': agent_answer,

View File

@@ -80,7 +80,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"\n{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}\n")
logger.info(f'\n{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}\n')
obs: CmdOutputObservation
# Set instance id
@@ -110,7 +110,7 @@ def initialize_runtime(
file_path,
'/workspace',
)
logger.info(f"\n{'-' * 50} END Runtime Initialization Fn {'-' * 50}\n")
logger.info(f'\n{"-" * 50} END Runtime Initialization Fn {"-" * 50}\n')
def complete_runtime(
@@ -123,7 +123,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"\n{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}\n")
logger.info(f'\n{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}\n')
obs: CmdOutputObservation
# Rewriting the test file to ignore any changes Agent may have made.
@@ -147,7 +147,7 @@ def complete_runtime(
if isinstance(obs, CmdOutputObservation):
exit_code = obs.exit_code
logger.info(f"\n{'-' * 50} END Runtime Completion Fn {'-' * 50}\n")
logger.info(f'\n{"-" * 50} END Runtime Completion Fn {"-" * 50}\n')
runtime.close()

View File

@@ -84,7 +84,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
file_ext = FILE_EXT_MAP[instance.language.lower()]
@@ -128,7 +128,7 @@ def initialize_runtime(
assert obs.exit_code == 0
# download repository archive
repository_url = f"https://biocoder.lilbillbiscuit.com/repos/{instance.repository.split('/')[1]}.zip"
repository_url = f'https://biocoder.lilbillbiscuit.com/repos/{instance.repository.split("/")[1]}.zip'
action = CmdRunAction(command='wget -O repo.zip ' + repository_url)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
@@ -160,7 +160,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
assert obs.exit_code == 0, f'Failed to remove the code: {obs.content}'
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -173,7 +173,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
test_result = {'result': {}, 'metadata': {}}
@@ -233,7 +233,7 @@ def complete_runtime(
test_result['metadata']['2_run_test_success'] = False
test_result['metadata']['2_run_test_result'] = str(obs.content)
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return test_result
@@ -258,7 +258,7 @@ def process_instance(
instruction = (
f'Please complete the function "{instance.signature}" in the file /workspace/{instance.repository.split("/")[1]}/{instance.filePath}.\n'
f'The environment has been set up for you to start working. You may assume all necessary tools are installed.\n'
f'To complete the task, you must directly modify the file and fill in the function, keeping in mind that the function signature is on line {instance.lineStart-1}\n\n'
f'To complete the task, you must directly modify the file and fill in the function, keeping in mind that the function signature is on line {instance.lineStart - 1}\n\n'
f'The function should do the following:\n'
f'{instance.promptSummaryOnly}\n\n'
)

View File

@@ -44,7 +44,7 @@ def remove_code(target_filepath: str, line_start: int, line_end: int, language:
lines = (
lines[:line_start]
+ [
f"{' '*comment_indent_size+comment_prefix[language.lower()]}TODO: replace with your code here"
f'{" " * comment_indent_size + comment_prefix[language.lower()]}TODO: replace with your code here'
]
+ ([''] * 2)
+ lines[line_end:]

View File

@@ -184,7 +184,7 @@ def load_bird():
.fetchall()
)
prompt += (
f"/*\n3 example rows:\n{top_k_row_query}\n{' '.join(headers)}\n"
f'/*\n3 example rows:\n{top_k_row_query}\n{" ".join(headers)}\n'
)
for row in top_k_rows:
row = [str(x) for x in row]
@@ -201,10 +201,10 @@ def load_bird():
# Extract the CREATE TABLE statements and sample data from the database
prompt = _extract_create_table_prompt(db_path)
prompt += f"-- External Knowledge: {e['evidence']}\n\n"
prompt += f'-- External Knowledge: {e["evidence"]}\n\n'
prompt += '-- Using valid SQLite and understanding External Knowledge, answer the following questions for the tables provided above.\n\n'
prompt += '-- Using valid SQLite, answer the following questions for the tables provided above.\n'
prompt += f"Question: {e['question']}\n"
prompt += f'Question: {e["question"]}\n'
return prompt
@@ -224,7 +224,7 @@ def load_bird():
item = {
'instance_id': f'{len(processed_data)}',
'db_path': os.path.join(
database_path, e['db_id'], f"{e['db_id']}.sqlite"
database_path, e['db_id'], f'{e["db_id"]}.sqlite'
),
'db_id': e['db_id'],
'instruction': _create_prompt(e, database_path),
@@ -253,7 +253,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Copy the database to the workspace
@@ -273,7 +273,7 @@ def initialize_runtime(
assert obs.exit_code == 0
assert f'{instance.db_id}.sqlite' in obs.content
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -286,7 +286,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
timeout = 30
@@ -343,7 +343,7 @@ def complete_runtime(
'gen_sql': gen_sql,
'gold_sql': gold_sql,
}
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return test_result

View File

@@ -34,9 +34,9 @@ SUPPORTED_AGENT_CLS = {'CodeActAgent'}
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
assert (
metadata.max_iterations == 1
), 'max_iterations must be 1 for browsing delegation evaluation.'
assert metadata.max_iterations == 1, (
'max_iterations must be 1 for browsing delegation evaluation.'
)
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'python:3.12-bookworm'
config = AppConfig(

View File

@@ -82,9 +82,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\n'
'You SHOULD NEVER attempt to browse the web. '
'</IMPORTANT!>\n'
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
return instruction
@@ -265,7 +263,7 @@ def complete_runtime(
test_dir = instance['test']['test_dir']
action = CmdRunAction(
command=f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1"
command=f'{instance["test"]["test_cmd"]} --json-report --json-report-file=report.json --continue-on-collection-errors {test_dir} > test_output.txt 2>&1'
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})

View File

@@ -489,7 +489,7 @@ def run_eval_gold_vs_gen_NL_hypo_workflow(
gen_subh_to_gold_subh[p_id] = g_id
gold_subh_covered.append(g_id)
gen_gold_subh_to_context[f'P{p_id}||G{g_id}'] = {
'question': f"""Comapring: GoldH: {gold_subh["text"]}, GoldC: {gold_subh['context']}\nGenH: {gen_subh['text']}, GenC: {gen_subh['context']}""",
'question': f"""Comapring: GoldH: {gold_subh['text']}, GoldC: {gold_subh['context']}\nGenH: {gen_subh['text']}, GenC: {gen_subh['context']}""",
'answer': context_bool,
'score': context_score,
}

View File

@@ -145,7 +145,7 @@ def initialize_runtime(runtime: Runtime, data_files: list[str]):
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
action = CmdRunAction(command='mkdir -p /workspace')
@@ -170,7 +170,7 @@ def initialize_runtime(runtime: Runtime, data_files: list[str]):
obs = runtime.run_action(action)
assert obs.exit_code == 0
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def get_last_agent_finish_action(state: State) -> AgentFinishAction:

View File

@@ -21,7 +21,7 @@ def main():
total += 1
if out['test_result']['score']:
success += 1
print(f'Success rate: {success}/{total} = {success/total}')
print(f'Success rate: {success}/{total} = {success / total}')
if __name__ == '__main__':

View File

@@ -78,7 +78,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
action = CmdRunAction(command='mkdir -p /workspace')
@@ -110,7 +110,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
assert obs.exit_code == 0
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def process_instance(
@@ -134,10 +134,10 @@ def process_instance(
dest_file = None
# Prepare instruction
instruction = f"{instance['Question']}\n"
instruction = f'{instance["Question"]}\n'
logger.info(f'Instruction: {instruction}')
if dest_file:
instruction += f"\n\nThe mentioned file is provided in the workspace at: {dest_file.split('/')[-1]}"
instruction += f'\n\nThe mentioned file is provided in the workspace at: {dest_file.split("/")[-1]}'
instruction += 'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n'
instruction += 'Please encapsulate your final answer (answer ONLY) within <solution> and </solution>.\n'

View File

@@ -21,7 +21,7 @@ def split_string(
) -> list[str]:
if char_list is None:
char_list = [',', ';']
pattern = f"[{''.join(char_list)}]"
pattern = f'[{"".join(char_list)}]'
return re.split(pattern, s)

View File

@@ -112,7 +112,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
action = CmdRunAction(command='mkdir -p /workspace')
@@ -143,7 +143,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
assert obs.exit_code == 0
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -156,7 +156,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
# default value
@@ -190,7 +190,7 @@ def complete_runtime(
'timeout': timeout,
'num_workers': num_workers,
}
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return test_result

View File

@@ -73,7 +73,7 @@ def run_eval(
runtime: Runtime,
):
"""Run the evaluation and create report"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
lca_path = bench_config['LCA_PATH']
@@ -146,7 +146,7 @@ def run_eval(
obs = runtime.run_action(action)
report_str = obs.content
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
return report_str

View File

@@ -95,7 +95,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
lca_path = bench_config['LCA_PATH']
@@ -177,7 +177,7 @@ def initialize_runtime(
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -190,7 +190,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
model_name = bench_config['model_name']
@@ -227,7 +227,7 @@ def complete_runtime(
obs = runtime.run_action(action)
result = json.loads(obs.content)
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return result
@@ -313,7 +313,7 @@ Phase 7. VERIFICATION: Test your implementation thoroughly.
7.2.3 The functions you changed
7.4 If any tests fail, revise your implementation until all tests pass
Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["sha_fail"]}.
Phase 8. REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance['sha_fail']}.
8.1 Ensure you've fully addressed all requirements.
Once all phases are done, announce: 'Agent Task Complete'.

View File

@@ -141,7 +141,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -174,7 +174,7 @@ def initialize_runtime(
ipynb_obs = runtime.run_action(action)
logger.info(ipynb_obs, extra={'msg_type': 'OBSERVATION'})
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
# Prepare instruction

View File

@@ -82,7 +82,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -103,7 +103,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
return goal, obs
@@ -116,7 +116,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
@@ -124,7 +124,7 @@ def complete_runtime(
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return {
'rewards': json.loads(obs.content),
}

View File

@@ -130,7 +130,7 @@ def initialize_runtime(runtime: Runtime):
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -144,7 +144,7 @@ def initialize_runtime(runtime: Runtime):
obs = runtime.run_action(action)
assert obs.exit_code == 0
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def process_instance(

View File

@@ -93,7 +93,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str:
error_category = response.choices[0].message['content']
except Exception as e:
logger.error(
f"Failed to classify the error for the failed case: {failed_case['instance_id']}"
f'Failed to classify the error for the failed case: {failed_case["instance_id"]}'
)
logger.error(e)
error_category = input(

View File

@@ -103,7 +103,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -137,7 +137,7 @@ def initialize_runtime(
obs = runtime.run_action(action)
assert obs.exit_code == 0
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -150,7 +150,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
repo_url = instance['github']
@@ -199,7 +199,7 @@ def complete_runtime(
outputs['success'] = 1
outputs['eval_exit_code'] = obs.exit_code
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return outputs

View File

@@ -120,9 +120,9 @@ def process_instance(
"""
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
assert (
log_dir is not None
), "Can't reset logger without a provided log directory."
assert log_dir is not None, (
"Can't reset logger without a provided log directory."
)
os.makedirs(log_dir, exist_ok=True)
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
@@ -289,7 +289,7 @@ def process_instance(
)
report = _report[instance_id]
logger.info(
f"[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report['resolved']}"
f'[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report["resolved"]}'
)
instance['test_result']['report']['resolved'] = report[
'resolved'
@@ -365,9 +365,9 @@ if __name__ == '__main__':
for line in tqdm(f, desc='Loading predictions')
]
)
assert (
'instance_id' in predictions.columns
), 'Input file must contain instance_id column.'
assert 'instance_id' in predictions.columns, (
'Input file must contain instance_id column.'
)
if 'model_patch' not in predictions.columns and (
'test_result' in predictions.columns
@@ -376,17 +376,17 @@ if __name__ == '__main__':
raise ValueError(
'Input file must contain model_patch column OR test_result column with model_patch field.'
)
assert len(predictions['instance_id'].unique()) == len(
predictions
), 'instance_id column must be unique.'
assert len(predictions['instance_id'].unique()) == len(predictions), (
'instance_id column must be unique.'
)
if 'model_patch' not in predictions.columns:
predictions['model_patch'] = predictions['test_result'].apply(
lambda x: x.get('git_patch', '')
)
assert {'instance_id', 'model_patch'}.issubset(
set(predictions.columns)
), 'Input file must contain instance_id and model_patch columns.'
assert {'instance_id', 'model_patch'}.issubset(set(predictions.columns)), (
'Input file must contain instance_id and model_patch columns.'
)
# Process model_patch
predictions['model_patch'] = predictions['model_patch'].apply(process_git_patch)

View File

@@ -103,21 +103,21 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
f'<issue_description>\n'
f'{instance.problem_statement}\n'
'</issue_description>\n\n'
"Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n"
'Can you help me implement the necessary changes to the repository so that the requirements specified in the <issue_description> are met?\n'
"I've already taken care of all changes to any of the test files described in the <issue_description>. This means you DON'T have to modify the testing logic or any of the tests in any way!\n"
"Also the development Java environment is already set up for you (i.e., all dependencies already installed), so you don't need to install other packages.\n"
"Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n"
"Follow these steps to resolve the issue:\n"
"1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n"
'Your task is to make the minimal changes to non-test files in the /workspace directory to ensure the <issue_description> is satisfied.\n'
'Follow these steps to resolve the issue:\n'
'1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.\n'
'2. Create a Java class to reproduce the error and execute it by first compiling with `javac <classname>.java` and then running with `java <classname>` using the BashTool, to confirm the error\n'
"3. Edit the sourcecode of the repo to resolve the issue.\n"
"4. Rerun your reproduce script or class and confirm that the error is fixed!\n"
"5. Think about edgecases, add comprehensive tests for them in your reproduce class or script, and run them to make sure your fix handles these cases as well.\n"
f"6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance['base_commit']}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n"
" - The issue you are fixing\n"
" - The files you modified\n"
" - The functions or classes you changed\n"
" Make sure all these tests pass with your changes.\n"
'3. Edit the sourcecode of the repo to resolve the issue.\n'
'4. Rerun your reproduce script or class and confirm that the error is fixed!\n'
'5. Think about edgecases, add comprehensive tests for them in your reproduce class or script, and run them to make sure your fix handles these cases as well.\n'
f'6. Once you are done with the initial implementation, please carefully re-read the problem description and check the difference between the current code and the base commit {instance["base_commit"]}. Do you think that the issue has been completely and comprehensively solved? Write tests to check the correctness of the solution, specifically focusing on tests that may point out any remaining problems that are not yet solved. Run all of the tests in the repo and check if any of them fail, and if they do fix the code. Repeat this process of carefully reading the problem description and current implementation, testing, and fixing any problems until you are confident that the current implementation is correct. Find and run any tests in the repo that are related to:\n'
' - The issue you are fixing\n'
' - The files you modified\n'
' - The functions or classes you changed\n'
' Make sure all these tests pass with your changes.\n'
"Your thinking should be thorough and so it's fine if it's very long.\n"
),
'go': (
@@ -275,9 +275,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
if instruction and RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\n'
'You SHOULD NEVER attempt to browse the web. '
'</IMPORTANT!>\n'
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
return instruction

View File

@@ -3,9 +3,10 @@ import json
input_file = 'XXX.jsonl'
output_file = 'YYY.jsonl'
with open(input_file, 'r', encoding='utf-8') as fin, open(
output_file, 'w', encoding='utf-8'
) as fout:
with (
open(input_file, 'r', encoding='utf-8') as fin,
open(output_file, 'w', encoding='utf-8') as fout,
):
for line in fin:
line = line.strip()
if not line:

View File

@@ -92,7 +92,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set up workspace directories
@@ -123,7 +123,7 @@ def initialize_runtime(
assert obs.exit_code == 0
assert dataset_name in obs.content
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def complete_runtime(
@@ -136,7 +136,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
test_result = {}
@@ -156,7 +156,7 @@ def complete_runtime(
else:
test_result = {'program': 'ERROR'}
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return test_result

View File

@@ -129,15 +129,15 @@ def process_instance(
AssertionError: if `conditional_imports` is not provided.
"""
assert (
conditional_imports is not None
), 'conditional_imports must be provided to run process_instance using multiprocessing'
assert conditional_imports is not None, (
'conditional_imports must be provided to run process_instance using multiprocessing'
)
# Setup the logger properly, so you can run multi-processing to parallelize the evaluation
if reset_logger:
assert (
log_dir is not None
), "Can't reset logger without a provided log directory."
assert log_dir is not None, (
"Can't reset logger without a provided log directory."
)
os.makedirs(log_dir, exist_ok=True)
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
@@ -319,7 +319,7 @@ def process_instance(
)
report = _report[instance_id]
logger.info(
f"[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report['resolved']}"
f'[{instance_id}] report: {report}\nResult for {instance_id}: resolved: {report["resolved"]}'
)
instance['test_result']['report']['resolved'] = report[
'resolved'
@@ -418,9 +418,9 @@ if __name__ == '__main__':
for line in tqdm(f, desc='Loading predictions')
]
)
assert (
'instance_id' in predictions.columns
), 'Input file must contain instance_id column.'
assert 'instance_id' in predictions.columns, (
'Input file must contain instance_id column.'
)
if 'model_patch' not in predictions.columns and (
'test_result' in predictions.columns
@@ -429,17 +429,17 @@ if __name__ == '__main__':
raise ValueError(
'Input file must contain model_patch column OR test_result column with model_patch field.'
)
assert len(predictions['instance_id'].unique()) == len(
predictions
), 'instance_id column must be unique.'
assert len(predictions['instance_id'].unique()) == len(predictions), (
'instance_id column must be unique.'
)
if 'model_patch' not in predictions.columns:
predictions['model_patch'] = predictions['test_result'].apply(
lambda x: x.get('git_patch', '')
)
assert {'instance_id', 'model_patch'}.issubset(
set(predictions.columns)
), 'Input file must contain instance_id and model_patch columns.'
assert {'instance_id', 'model_patch'}.issubset(set(predictions.columns)), (
'Input file must contain instance_id and model_patch columns.'
)
# Process model_patch
predictions['model_patch'] = predictions['model_patch'].apply(process_git_patch)

View File

@@ -160,7 +160,7 @@ Phase 7. VERIFICATION: Test your implementation thoroughly.
7.2 Add edge cases to your test script to ensure comprehensive coverage.
7.3 Run existing tests related to the modified code to ensure you haven't broken anything.
8. FINAL REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance["base_commit"]}.
8. FINAL REVIEW: Carefully re-read the problem description and compare your changes with the base commit {instance['base_commit']}.
8.1 Ensure you've fully addressed all requirements.
8.2 Run any tests in the repository related to:
8.2.1 The issue you are fixing
@@ -173,16 +173,14 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\n'
'You SHOULD NEVER attempt to browse the web. '
'</IMPORTANT!>\n'
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
if 'image_assets' in instance:
assets = json.loads(instance['image_assets'])
assert (
'problem_statement' in assets
), 'problem_statement is required in image_assets'
assert 'problem_statement' in assets, (
'problem_statement is required in image_assets'
)
image_urls = assets['problem_statement']
return MessageAction(content=instruction, image_urls=image_urls)
return MessageAction(content=instruction)

View File

@@ -137,7 +137,7 @@ for repo, diff in repo_diffs:
is_significant = diff >= threshold
repo_color = 'red' if is_significant else 'yellow'
print(f"\n{colored(repo, repo_color, attrs=['bold'])}:")
print(f'\n{colored(repo, repo_color, attrs=["bold"])}:')
print(
colored(
f'Difference: {diff} instances! (Larger diff = Y better)',

View File

@@ -44,17 +44,17 @@ def convert_history_to_str(history):
if isinstance(event, list):
# "event" is a legacy pair of (action, observation)
event_obj = event_from_dict(event[0])
ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
ret += f'## {i + 1}| {event_obj.__class__.__name__}\n\n'
ret += str(event_obj)
ret += separator
event_obj = event_from_dict(event[1])
ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
ret += f'## {i + 1}| {event_obj.__class__.__name__}\n\n'
ret += str(event_obj)
else:
# "event" is a single event
event_obj = event_from_dict(event)
ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
ret += f'## {i + 1}| {event_obj.__class__.__name__}\n\n'
ret += str(event_obj)
return ret
@@ -105,12 +105,12 @@ def convert_tool_call_to_string(tool_call: dict) -> str:
if tool_call['type'] != 'function':
raise ValueError("Tool call type must be 'function'.")
ret = f"<function={tool_call['function']['name']}>\n"
ret = f'<function={tool_call["function"]["name"]}>\n'
try:
args = json.loads(tool_call['function']['arguments'])
except json.JSONDecodeError as e:
raise ValueError(
f"Failed to parse arguments as JSON. Arguments: {tool_call['function']['arguments']}"
f'Failed to parse arguments as JSON. Arguments: {tool_call["function"]["arguments"]}'
) from e
for param_name, param_value in args.items():
is_multiline = isinstance(param_value, str) and '\n' in param_value

View File

@@ -263,38 +263,38 @@ if __name__ == '__main__':
# Print detailed results for single file
print(f'\nResults for {args.input_path}:')
print(
f"Number of resolved: {result['resolved']['count']} / {result['total_instances']} ({result['resolved']['percentage']:.2f}% [{result['resolved']['ci'][0]:.2f}%, {result['resolved']['ci'][1]:.2f}%])"
f'Number of resolved: {result["resolved"]["count"]} / {result["total_instances"]} ({result["resolved"]["percentage"]:.2f}% [{result["resolved"]["ci"][0]:.2f}%, {result["resolved"]["ci"][1]:.2f}%])'
)
print(
f"Number of empty patch: {result['empty_patches']['count']} / {result['total_instances']} ({result['empty_patches']['percentage']:.2f}%)"
f'Number of empty patch: {result["empty_patches"]["count"]} / {result["total_instances"]} ({result["empty_patches"]["percentage"]:.2f}%)'
)
print(
f"Number of error lines: {result['errors']['total']} / {result['total_instances']} ({result['errors']['percentage']:.2f}%)"
f'Number of error lines: {result["errors"]["total"]} / {result["total_instances"]} ({result["errors"]["percentage"]:.2f}%)'
)
print(
f"Number of agent stuck in loop: {result['errors']['stuck_in_loop']['count']} / {result['total_instances']} ({result['errors']['stuck_in_loop']['percentage']:.2f}%)"
f'Number of agent stuck in loop: {result["errors"]["stuck_in_loop"]["count"]} / {result["total_instances"]} ({result["errors"]["stuck_in_loop"]["percentage"]:.2f}%)'
)
print(
f"Number of unfinished runs: {result['unfinished_runs']['count']} / {result['total_instances']} ({result['unfinished_runs']['percentage']:.2f}%)"
f'Number of unfinished runs: {result["unfinished_runs"]["count"]} / {result["total_instances"]} ({result["unfinished_runs"]["percentage"]:.2f}%)'
)
print(f"Total cost: {result['costs']['total']:.2f} USD")
print(f'Total cost: {result["costs"]["total"]:.2f} USD')
print('## Statistics')
print(
f"Avg. num of turns per instance: {result['statistics']['avg_turns']:.2f}"
f'Avg. num of turns per instance: {result["statistics"]["avg_turns"]:.2f}'
)
print(
f"Avg. agent cost per instance: {result['statistics']['costs']['main_agent']:.2f} USD"
f'Avg. agent cost per instance: {result["statistics"]["costs"]["main_agent"]:.2f} USD'
)
print(
f"Avg. editor cost per instance: {result['statistics']['costs']['editor']:.2f} USD"
f'Avg. editor cost per instance: {result["statistics"]["costs"]["editor"]:.2f} USD'
)
print(
f"Avg. total cost per instance: {result['statistics']['costs']['total']:.2f} USD"
f'Avg. total cost per instance: {result["statistics"]["costs"]["total"]:.2f} USD'
)
print('## Detailed error breakdown:')
for error, data in result['errors']['breakdown'].items():
print(f"{error}: {data['count']} ({data['percentage']:.2f}%)")
print(f'{error}: {data["count"]} ({data["percentage"]:.2f}%)')
except Exception as e:
print(f'Error processing {args.input_path}: {str(e)}')

View File

@@ -34,16 +34,16 @@ if os.path.exists(swebench_official_report_json):
report = json.load(f)
output_md = (
"# SWE-bench Report\n"
"This folder contains the evaluation results of the SWE-bench using the [official evaluation docker containerization](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md#choosing-the-right-cache_level).\n\n"
"## Summary\n"
f"- total instances: {report['total_instances']}\n"
f"- submitted instances: {report['submitted_instances']}\n"
f"- completed instances: {report['completed_instances']}\n"
f"- empty patch instances: {report['empty_patch_instances']}\n"
f"- resolved instances: {report['resolved_instances']}\n"
f"- unresolved instances: {report['unresolved_instances']}\n"
f"- error instances: {report['error_instances']}\n"
'# SWE-bench Report\n'
'This folder contains the evaluation results of the SWE-bench using the [official evaluation docker containerization](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md#choosing-the-right-cache_level).\n\n'
'## Summary\n'
f'- total instances: {report["total_instances"]}\n'
f'- submitted instances: {report["submitted_instances"]}\n'
f'- completed instances: {report["completed_instances"]}\n'
f'- empty patch instances: {report["empty_patch_instances"]}\n'
f'- resolved instances: {report["resolved_instances"]}\n'
f'- unresolved instances: {report["unresolved_instances"]}\n'
f'- error instances: {report["error_instances"]}\n'
)
output_md += '\n## Resolved Instances\n'
@@ -111,12 +111,12 @@ elif os.path.exists(openhands_remote_report_jsonl):
print(f'Total instances in eval report: {n_eval_instances}')
# Verify no duplicates
assert (
len(instance_ids) == n_instances
), 'Duplicate instance ids found in original output'
assert (
len(eval_instance_ids) == n_eval_instances
), 'Duplicate instance ids found in eval report'
assert len(instance_ids) == n_instances, (
'Duplicate instance ids found in original output'
)
assert len(eval_instance_ids) == n_eval_instances, (
'Duplicate instance ids found in eval report'
)
# Initialize counters
stats = {'total': len(instance_ids), 'resolved': 0, 'empty_patch': 0, 'error': 0}
@@ -152,7 +152,7 @@ elif os.path.exists(openhands_remote_report_jsonl):
# Generate markdown report
def _instance_id_to_log_path(instance_id):
path = f"{args.input_file.replace('.jsonl', '.swebench_eval.logs')}/instance_{instance_id}.log"
path = f'{args.input_file.replace(".jsonl", ".swebench_eval.logs")}/instance_{instance_id}.log'
return os.path.relpath(path, start=dirname)
# ... rest of markdown generation code remains the same ...
@@ -228,9 +228,10 @@ if os.path.exists(args.input_file + '.bak'):
os.rename(args.input_file, args.input_file + '.bak')
# Process and write file row by row
with open(args.input_file + '.bak', 'r') as infile, open(
args.input_file, 'w'
) as outfile:
with (
open(args.input_file + '.bak', 'r') as infile,
open(args.input_file, 'w') as outfile,
):
for line in tqdm(infile, desc='Updating output file'):
data = json.loads(line)
instance_id = data['instance_id']

View File

@@ -20,7 +20,7 @@ def verify_instance_costs(row: pd.Series) -> float:
try:
metrics = row.get('metrics')
if not metrics:
logger.warning(f"Instance {row['instance_id']}: No metrics found")
logger.warning(f'Instance {row["instance_id"]}: No metrics found')
return 0.0
accumulated = metrics.get('accumulated_cost')
@@ -28,7 +28,7 @@ def verify_instance_costs(row: pd.Series) -> float:
if accumulated is None:
logger.warning(
f"Instance {row['instance_id']}: No accumulated_cost in metrics"
f'Instance {row["instance_id"]}: No accumulated_cost in metrics'
)
return 0.0
@@ -41,8 +41,8 @@ def verify_instance_costs(row: pd.Series) -> float:
if abs(costs[i]['cost'] - costs[i + 1]['cost']) < 1e-6:
has_duplicate = True
logger.debug(
f"Instance {row['instance_id']}: Possible buggy double-counting detected! "
f"Steps {i} and {i+1} have identical costs: {costs[i]['cost']:.2f}"
f'Instance {row["instance_id"]}: Possible buggy double-counting detected! '
f'Steps {i} and {i + 1} have identical costs: {costs[i]["cost"]:.2f}'
)
else:
all_pairs_match = False
@@ -64,15 +64,15 @@ def verify_instance_costs(row: pd.Series) -> float:
if not abs(total_cost - accumulated) < 1e-6:
logger.warning(
f"Instance {row['instance_id']}: Cost mismatch: "
f"accumulated: {accumulated:.2f}, sum of costs: {total_cost:.2f}, "
f'Instance {row["instance_id"]}: Cost mismatch: '
f'accumulated: {accumulated:.2f}, sum of costs: {total_cost:.2f}, '
)
return total_cost
except Exception as e:
logger.error(
f"Error verifying costs for instance {row.get('instance_id', 'UNKNOWN')}: {e}"
f'Error verifying costs for instance {row.get("instance_id", "UNKNOWN")}: {e}'
)
return 0.0

View File

@@ -46,7 +46,7 @@
"for FILE_PATH in FILE_PATHS:\n",
" with gzip.open(FILE_PATH, 'rb') as f: # Use 'rb' for gzipped files\n",
" for i, line in tqdm(\n",
" enumerate(f), desc=f\"Processing {FILE_PATH.split('/')[-1]}\"\n",
" enumerate(f), desc=f'Processing {FILE_PATH.split(\"/\")[-1]}'\n",
" ):\n",
" # Parse only the fields we need\n",
" raw_data = json.loads(line)\n",

View File

@@ -54,9 +54,9 @@ logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
def get_config(instance: pd.Series) -> AppConfig:
base_container_image = get_instance_docker_image(instance['instance_id_swebench'])
assert (
base_container_image
), f"Invalid container image for instance {instance['instance_id_swebench']}."
assert base_container_image, (
f'Invalid container image for instance {instance["instance_id_swebench"]}.'
)
logger.info(f'Using instance container image: {base_container_image}.')
return AppConfig(
run_as_openhands=False,
@@ -183,9 +183,9 @@ def run_mutation_testing(
mutation_action = CmdRunAction(command=f'cat {log_file}')
mutation_action.set_hard_timeout(300)
mutation_obs = runtime.run_action(mutation_action)
assert isinstance(
mutation_obs, CmdOutputObservation
), 'Failed to retrieve mutation output.'
assert isinstance(mutation_obs, CmdOutputObservation), (
'Failed to retrieve mutation output.'
)
return mutation_obs.exit_code, mutation_obs.content
@@ -294,9 +294,9 @@ def process_instance(
AssertionError: if the `reset_logger` flag is set without a provided log directory.
"""
if reset_logger:
assert (
log_dir is not None
), "Can't reset logger without a provided log directory."
assert log_dir is not None, (
"Can't reset logger without a provided log directory."
)
os.makedirs(log_dir, exist_ok=True)
reset_logger_for_multiprocessing(logger, instance.instance_id, log_dir)
else:
@@ -528,9 +528,9 @@ if __name__ == '__main__':
# Load predictions
assert args.input_file.endswith('.jsonl'), 'Input file must be a jsonl file.'
predictions = pd.read_json(args.input_file, lines=True)
assert (
'instance_id' in predictions.columns
), 'Input file must contain instance_id column.'
assert 'instance_id' in predictions.columns, (
'Input file must contain instance_id column.'
)
if 'test_suite' not in predictions.columns and (
'test_result' in predictions.columns
@@ -562,9 +562,9 @@ if __name__ == '__main__':
lambda x: x['test_suite']
)
assert len(predictions['instance_id'].unique()) == len(
predictions
), 'instance_id column must be unique.'
assert len(predictions['instance_id'].unique()) == len(predictions), (
'instance_id column must be unique.'
)
assert {'instance_id_swebench', 'test_suite', 'instance_id'}.issubset(
set(predictions.columns)

View File

@@ -1,5 +1,5 @@
import sys
from typing import Callable, Dict, List, Optional, Sequence, TypeVar, Union
from typing import Callable, Optional, Sequence, TypeVar, Union
import nltk
import numpy as np
@@ -11,7 +11,7 @@ if sys.getrecursionlimit() < 10_000:
sys.setrecursionlimit(10_000)
def bleu(gold: List[str], pred: List[str]) -> float:
def bleu(gold: list[str], pred: list[str]) -> float:
"""
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
@@ -29,7 +29,7 @@ def bleu(gold: List[str], pred: List[str]) -> float:
)
def batch_bleu(golds: List[List[str]], preds: List[List[str]]) -> List[float]:
def batch_bleu(golds: list[list[str]], preds: list[list[str]]) -> list[float]:
"""
Calculate BLEU score for a batch of sentences.
@@ -42,7 +42,7 @@ def batch_bleu(golds: List[List[str]], preds: List[List[str]]) -> List[float]:
return [bleu(gold, pred) for gold, pred in zip(golds, preds)]
def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
def corpus_bleu(golds: list[list[str]], preds: list[list[str]]) -> float:
"""
Calculate corpus-level BLEU score for a batch of sentences.
@@ -61,7 +61,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
def edit_sim(
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
gold: Union[str, list[str]], pred: Union[str, list[str]], sep: str = ' '
) -> float:
"""
Calculate char-level edit similarity, in the range of 0~100.
@@ -81,10 +81,10 @@ def edit_sim(
def batch_edit_sim(
golds: List[Union[str, List[str]]],
preds: List[Union[str, List[str]]],
golds: list[Union[str, list[str]]],
preds: list[Union[str, list[str]]],
sep: str = ' ',
) -> List[float]:
) -> list[float]:
"""
Calculate char-level edit similarity for a batch of sentences.
@@ -114,7 +114,7 @@ def exact_match(gold: T, pred: T) -> float:
return 100.0 if gold == pred else 0.0
def batch_exact_match(golds: List[T], preds: List[T]) -> List[float]:
def batch_exact_match(golds: list[T], preds: list[T]) -> list[float]:
"""
Calculate exact match accuracy for a batch of sentences.
@@ -128,8 +128,8 @@ def batch_exact_match(golds: List[T], preds: List[T]) -> List[float]:
def rouge_l(
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
) -> Dict[str, float]:
gold: Union[str, list[str]], pred: Union[str, list[str]], sep: str = ' '
) -> dict[str, float]:
"""
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
@@ -152,10 +152,10 @@ def rouge_l(
def batch_rouge_l(
golds: List[Union[str, List[str]]],
preds: List[Union[str, List[str]]],
golds: list[Union[str, list[str]]],
preds: list[Union[str, list[str]]],
sep: str = ' ',
) -> Dict[str, List[float]]:
) -> dict[str, list[float]]:
"""
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
@@ -171,8 +171,8 @@ def batch_rouge_l(
def accuracy(
gold: List[str],
pred: List[str],
gold: list[str],
pred: list[str],
ignore: Optional[Sequence[str]] = None,
) -> float:
"""
@@ -206,10 +206,10 @@ def accuracy(
def batch_accuracy(
golds: List[List[str]],
preds: List[List[str]],
golds: list[list[str]],
preds: list[list[str]],
ignore: Optional[Sequence[str]] = None,
) -> List[float]:
) -> list[float]:
"""
Calculate token-level accuracy for a batch of sentences.
@@ -224,8 +224,8 @@ def batch_accuracy(
def first_match_to_topk(
first_match_list: List[int], k_values: List[int]
) -> Dict[int, List[float]]:
first_match_list: list[int], k_values: list[int]
) -> dict[int, list[float]]:
"""
Calculate top-k accuracy with the first match ranks (1-indexed).
@@ -250,7 +250,7 @@ def pass_at_k(n: int, c: int, k: int) -> float:
return (1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1)).item()) * 100
def self_bleu(samples: List[List[str]]) -> float:
def self_bleu(samples: list[list[str]]) -> float:
"""
Calculate self-BLEU among the samples.
:param samples: the chosen m samples
@@ -273,7 +273,7 @@ def self_bleu(samples: List[List[str]]) -> float:
return np.mean(scores).item()
def self_edit_distance(samples: List[Union[str, List[str]]], sep=' ') -> float:
def self_edit_distance(samples: list[Union[str, list[str]]], sep=' ') -> float:
"""
Calculate self-edit-distance among the samples.
:param samples: the chosen m samples
@@ -299,7 +299,7 @@ def self_edit_distance(samples: List[Union[str, List[str]]], sep=' ') -> float:
return np.mean(scores).item()
QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = {
QUALITY_METRICS: dict[str, Callable[[list[str], list[str]], float]] = {
'bleu': bleu,
'xmatch': exact_match,
'edit-sim': edit_sim,

View File

@@ -95,9 +95,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\n'
'You SHOULD NEVER attempt to browse the web. '
'</IMPORTANT!>\n'
'<IMPORTANT!>\nYou SHOULD NEVER attempt to browse the web. </IMPORTANT!>\n'
)
return instruction
@@ -243,7 +241,7 @@ def initialize_runtime(
# Copy the file to the desired location
action = CmdRunAction(
command=f"cp /tmp/test_suite.py /testbed/{instance['test_file']}"
command=f'cp /tmp/test_suite.py /testbed/{instance["test_file"]}'
)
action.set_hard_timeout(600)
logger.info(action, extra={'msg_type': 'ACTION'})

View File

@@ -71,9 +71,10 @@ def process_images(dataset, original_namespace, new_namespace, start_instance_id
patch_file_path = 'patch.diff'
test_patch_file_path = 'test_patch.diff'
with open(patch_file_path, 'w') as patch_file, open(
test_patch_file_path, 'w'
) as test_patch_file:
with (
open(patch_file_path, 'w') as patch_file,
open(test_patch_file_path, 'w') as test_patch_file,
):
patch_file.write(datum['patch'])
test_patch_file.write(datum['test_patch'])

View File

@@ -34,17 +34,17 @@ def convert_history_to_str(history):
if isinstance(event, list):
# "event" is a legacy pair of (action, observation)
event_obj = event_from_dict(event[0])
ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
ret += f'## {i + 1}| {event_obj.__class__.__name__}\n\n'
ret += str(event_obj)
ret += separator
event_obj = event_from_dict(event[1])
ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
ret += f'## {i + 1}| {event_obj.__class__.__name__}\n\n'
ret += str(event_obj)
else:
# "event" is a single event
event_obj = event_from_dict(event)
ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
ret += f'## {i + 1}| {event_obj.__class__.__name__}\n\n'
ret += str(event_obj)
return ret

View File

@@ -1,6 +1,5 @@
import ast
import re
from typing import List, Tuple
from evaluation.benchmarks.testgeneval.constants import TestStatus
from evaluation.benchmarks.testgeneval.log_parsers import (
@@ -37,7 +36,7 @@ def extract_preamble_classes_and_functions(code):
current_position = 0
def extract_class_body(code: str, start_index: int) -> Tuple[str, int]:
def extract_class_body(code: str, start_index: int) -> tuple[str, int]:
"""
Extracts the body of a class from the given code starting from the specified index.
Returns the class body and the end index of the class body.
@@ -168,7 +167,7 @@ def extract_preamble_classes_and_functions(code):
def filter_passing_tests(
test_content: str, test_output: str, repo: str
) -> Tuple[str, List[str], List[str]]:
) -> tuple[str, list[str], list[str]]:
"""
Filter tests based on their execution results.
Returns:
@@ -246,7 +245,7 @@ def filter_passing_tests(
def filter_tests(
test_content: str, test_output: str, repo: str
) -> Tuple[str, List[str], List[str]]:
) -> tuple[str, list[str], list[str]]:
"""
Filter tests using AST parsing to remove failing test functions from the test file.
Non-test functions (e.g. setup or helper methods) and classes (even if all test methods are failing)

View File

@@ -24,7 +24,7 @@ def get_test_directives(instance: TestGenEvalInstance) -> list:
return ['test.py']
# Get test directives from test patch and remove non-test files
directives = [f"/testbed/{instance['test_file']}"]
directives = [f'/testbed/{instance["test_file"]}']
# For Django tests, remove extension + "tests/" prefix and convert slashes to dots (module referencing)
if instance['repo'] == 'django/django':
@@ -65,8 +65,8 @@ def load_testgeneval_dataset(
if ids - dataset_ids:
raise ValueError(
(
"Some instance IDs not found in dataset!"
f"\nMissing IDs:\n{' '.join(ids - dataset_ids)}"
'Some instance IDs not found in dataset!'
f'\nMissing IDs:\n{" ".join(ids - dataset_ids)}'
)
)
dataset = [instance for instance in dataset if instance['id'] in ids]

View File

@@ -7,7 +7,7 @@ import os
import re
from dataclasses import dataclass
from enum import Enum, auto
from typing import Dict, List, Union
from typing import Union
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import BrowseInteractiveAction
@@ -100,7 +100,7 @@ class ClickAction(BrowserAction):
return f'click("{self.selector}")'
def parse_content_to_elements(content: str) -> Dict[str, str]:
def parse_content_to_elements(content: str) -> dict[str, str]:
"""Parse the observation content into a dictionary mapping anchors to their descriptions"""
elements = {}
current_anchor = None
@@ -170,7 +170,7 @@ def resolve_action(action: BrowserAction, content: str) -> BrowserAction:
def pre_login(
runtime: Runtime,
services: List[str],
services: list[str],
save_screenshots=True,
screenshots_dir='screenshots',
):

View File

@@ -8,7 +8,6 @@ import json
import os
import shutil
import tempfile
from typing import List
import yaml
from browsing import pre_login
@@ -68,7 +67,7 @@ def get_config(
return config
def load_dependencies(runtime: Runtime) -> List[str]:
def load_dependencies(runtime: Runtime) -> list[str]:
"""
Every task has a dependencies.yml file, which lists all the services that the
task depends on. This function loads the file and returns all dependent service names.
@@ -128,7 +127,7 @@ def run_solver(
runtime: Runtime,
task_name: str,
config: AppConfig,
dependencies: List[str],
dependencies: list[str],
save_final_state: bool,
state_dir: str,
save_screenshots: bool,

View File

@@ -8,7 +8,6 @@ import json
import os
import re
import sys
from typing import Dict, Tuple
def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
@@ -60,7 +59,7 @@ def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> fl
raise ValueError(f'Unknown model: {model}')
def analyze_eval_json_file(filepath: str) -> Tuple[int, int]:
def analyze_eval_json_file(filepath: str) -> tuple[int, int]:
"""
Analyze a single eval JSON file and extract the total and result from final_score.
@@ -84,7 +83,7 @@ def analyze_eval_json_file(filepath: str) -> Tuple[int, int]:
return (0, 0)
def analyze_traj_json_file(filepath: str) -> Tuple[int, float]:
def analyze_traj_json_file(filepath: str) -> tuple[int, float]:
"""
Analyze a single trajectory JSON file and extract the steps and tokens
for each step. Then estimate the cost based on the tokens and the model type.
@@ -115,7 +114,7 @@ def analyze_traj_json_file(filepath: str) -> Tuple[int, float]:
def analyze_folder(
folder_path: str,
) -> Tuple[Dict[str, Tuple[int, int]], Dict[str, Tuple[int, float]]]:
) -> tuple[dict[str, tuple[int, int]], dict[str, tuple[int, float]]]:
"""
Analyze all eval_*.json & traj_*.json files in the specified folder.
@@ -252,7 +251,7 @@ def main():
print('\n## Summary\n')
print(f'**Tasks Evaluated:** {len(eval_results)}\n')
print(
f'**Perfect Completions:** {perfect_completions}/{len(eval_results)} ({(perfect_completions/len(eval_results)*100):.2f}%)\n'
f'**Perfect Completions:** {perfect_completions}/{len(eval_results)} ({(perfect_completions / len(eval_results) * 100):.2f}%)\n'
)
overall_score = (
@@ -278,10 +277,10 @@ def main():
print('\n## Statistics\n')
print('| Metric | Value |')
print('|---------|--------|')
print(f'| Highest Task Score | {highest_score*100:.2f}% |')
print(f'| Lowest Task Score | {lowest_score*100:.2f}% |')
print(f'| Median Task Score | {median_score*100:.2f}% |')
print(f'| Average Task Score | {avg_score*100:.2f}% |')
print(f'| Highest Task Score | {highest_score * 100:.2f}% |')
print(f'| Lowest Task Score | {lowest_score * 100:.2f}% |')
print(f'| Median Task Score | {median_score * 100:.2f}% |')
print(f'| Average Task Score | {avg_score * 100:.2f}% |')
# compute avg score per nature category
print('\n## Statistics per Nature Category\n')
@@ -307,9 +306,11 @@ def main():
if nature_category == task_nature and is_perfect
)
print(
f'| Perfect Completions for {task_nature} | {perfect_completions}/{num_of_tasks} ({perfect_completions/num_of_tasks*100:.2f}%) |'
f'| Perfect Completions for {task_nature} | {perfect_completions}/{num_of_tasks} ({perfect_completions / num_of_tasks * 100:.2f}%) |'
)
print(
f'| Average Score for {task_nature} | {task_nature_score * 100:.2f}% |'
)
print(f'| Average Score for {task_nature} | {task_nature_score*100:.2f}% |')
if __name__ == '__main__':

View File

@@ -64,7 +64,7 @@ def initialize_runtime(runtime: Runtime):
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -80,7 +80,7 @@ def initialize_runtime(runtime: Runtime):
runtime.add_env_vars({'WOLFRAM_ALPHA_APPID': args.wolfram_alpha_appid})
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = True):

View File

@@ -100,7 +100,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -116,7 +116,7 @@ def initialize_runtime(
goal_image_urls = []
if hasattr(obs, 'goal_image_urls'):
goal_image_urls = obs.goal_image_urls
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
return goal, goal_image_urls
@@ -129,7 +129,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
@@ -137,7 +137,7 @@ def complete_runtime(
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return {
'rewards': json.loads(obs.content),
}

View File

@@ -87,7 +87,7 @@ def initialize_runtime(
This function is called before the runtime is used to run the agent.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Initialization Fn {"-" * 50}')
obs: CmdOutputObservation
# Set instance id
@@ -102,7 +102,7 @@ def initialize_runtime(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
goal = obs.content
logger.info(f"{'-' * 50} END Runtime Initialization Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Initialization Fn {"-" * 50}')
return goal
@@ -115,7 +115,7 @@ def complete_runtime(
If you need to do something in the sandbox to get the correctness metric after
the agent has run, modify this function.
"""
logger.info(f"{'-' * 50} BEGIN Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} BEGIN Runtime Completion Fn {"-" * 50}')
obs: CmdOutputObservation
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
@@ -123,7 +123,7 @@ def complete_runtime(
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
logger.info(f"{'-' * 50} END Runtime Completion Fn {'-' * 50}")
logger.info(f'{"-" * 50} END Runtime Completion Fn {"-" * 50}')
return {
'rewards': json.loads(obs.content),
}

View File

@@ -93,14 +93,14 @@ def process_instance(
spec = importlib.util.spec_from_file_location(instance_id, instance.file_path)
test_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(test_module)
assert hasattr(
test_module, 'Test'
), f'Test module {instance_id} does not have a Test class'
assert hasattr(test_module, 'Test'), (
f'Test module {instance_id} does not have a Test class'
)
test_class: type[BaseIntegrationTest] = test_module.Test
assert issubclass(
test_class, BaseIntegrationTest
), f'Test class {instance_id} does not inherit from BaseIntegrationTest'
assert issubclass(test_class, BaseIntegrationTest), (
f'Test class {instance_id} does not inherit from BaseIntegrationTest'
)
instruction = test_class.INSTRUCTION

View File

@@ -132,7 +132,7 @@ def run_test_case(test_cases_dir, workspace_dir, request):
'python3',
f'{SCRIPT_DIR}/../../openhands/main.py',
'-d',
f"{os.path.join(agent_dir, 'workspace')}",
f'{os.path.join(agent_dir, "workspace")}',
'-c',
f'{agents_ref[agent]}',
'-t',
@@ -165,7 +165,7 @@ def pytest_configure(config):
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.FileHandler(f"test_results_{now.strftime('%Y%m%d_%H%M%S')}.log"),
logging.FileHandler(f'test_results_{now.strftime("%Y%m%d_%H%M%S")}.log'),
logging.StreamHandler(),
],
)

View File

@@ -221,9 +221,9 @@ def prepare_dataset(
eval_ids: list[str] | None = None,
skip_num: int | None = None,
):
assert (
'instance_id' in dataset.columns
), "Expected 'instance_id' column in the dataset. You should define your own unique identifier for each instance and use it as the 'instance_id' column."
assert 'instance_id' in dataset.columns, (
"Expected 'instance_id' column in the dataset. You should define your own unique identifier for each instance and use it as the 'instance_id' column."
)
id_column = 'instance_id'
logger.info(f'Writing evaluation output to {output_file}')
finished_ids: set[str] = set()

View File

@@ -39,7 +39,9 @@ def refine_prompt(prompt: str):
def create_cmd_run_tool(
use_short_description: bool = False,
) -> ChatCompletionToolParam:
description = _SHORT_BASH_DESCRIPTION if use_short_description else _DETAILED_BASH_DESCRIPTION
description = (
_SHORT_BASH_DESCRIPTION if use_short_description else _DETAILED_BASH_DESCRIPTION
)
return ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(

View File

@@ -131,12 +131,12 @@ upload_file(bid: str, file: str | list[str])
for _, action in _browser_action_space.action_set.items():
assert (
action.signature in _BROWSER_TOOL_DESCRIPTION
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
assert (
action.description in _BROWSER_TOOL_DESCRIPTION
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
assert action.signature in _BROWSER_TOOL_DESCRIPTION, (
f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
)
assert action.description in _BROWSER_TOOL_DESCRIPTION, (
f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
)
BrowserTool = ChatCompletionToolParam(
type='function',

View File

@@ -52,7 +52,7 @@ class ReadOnlyAgent(CodeActAgent):
super().__init__(llm, config)
logger.debug(
f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}"
f'TOOLS loaded for ReadOnlyAgent: {", ".join([tool.get("function").get("name") for tool in self.tools])}'
)
@property

View File

@@ -42,7 +42,7 @@ Review the current state of the page and all other information to find the best
goal_image_urls = []
if image_urls is not None:
for idx, url in enumerate(image_urls):
goal_txt = goal_txt + f'Images: Goal input image ({idx+1})\n'
goal_txt = goal_txt + f'Images: Goal input image ({idx + 1})\n'
goal_image_urls.append(url)
goal_txt += '\n'
return goal_txt, goal_image_urls
@@ -111,7 +111,7 @@ Note: This action set allows you to interact with your environment. Most of them
def get_history_prompt(prev_actions: list[BrowseInteractiveAction]) -> str:
history_prompt = ['# History of all previous interactions with the task:\n']
for i in range(len(prev_actions)):
history_prompt.append(f'## step {i+1}')
history_prompt.append(f'## step {i + 1}')
history_prompt.append(
f'\nOuput thought and action: {prev_actions[i].thought} ```{prev_actions[i].browser_actions}```\n'
)

View File

@@ -288,7 +288,12 @@ async def main(loop: asyncio.AbstractEventLoop):
# Use settings from settings store if available and override with command line arguments
if settings:
config.default_agent = args.agent_cls if args.agent_cls else settings.agent
if args.agent_cls:
config.default_agent = str(args.agent_cls)
else:
# settings.agent is not None because we check for it in setup_config_from_args
assert settings.agent is not None
config.default_agent = settings.agent
if not args.llm_config and settings.llm_model and settings.llm_api_key:
llm_config = config.get_llm_config()
llm_config.model = settings.llm_model

View File

@@ -84,7 +84,7 @@ def display_settings(config: AppConfig):
# Construct the summary text with aligned columns
settings_lines = [
f'{label+":":<{max_label_width+1}} {value:<}' # Changed value alignment to left (<)
f'{label + ":":<{max_label_width + 1}} {value:<}' # Changed value alignment to left (<)
for label, value in str_labels_and_values
]
settings_text = '\n'.join(settings_lines)

View File

@@ -549,7 +549,7 @@ def cli_confirm(
] + [
(
'class:selected' if i == selected[0] else 'class:unselected',
f"{'> ' if i == selected[0] else ' '}{choice}\n",
f'{"> " if i == selected[0] else " "}{choice}\n',
)
for i, choice in enumerate(choices)
]

View File

@@ -167,17 +167,17 @@ class Agent(ABC):
- mcp_tools (list[dict]): The list of MCP tools.
"""
logger.info(
f"Setting {len(mcp_tools)} MCP tools for agent {self.name}: {[tool['function']['name'] for tool in mcp_tools]}"
f'Setting {len(mcp_tools)} MCP tools for agent {self.name}: {[tool["function"]["name"] for tool in mcp_tools]}'
)
for tool in mcp_tools:
_tool = ChatCompletionToolParam(**tool)
if _tool['function']['name'] in self.mcp_tools:
logger.warning(
f"Tool {_tool['function']['name']} already exists, skipping"
f'Tool {_tool["function"]["name"]} already exists, skipping'
)
continue
self.mcp_tools[_tool['function']['name']] = _tool
self.tools.append(_tool)
logger.info(
f"Tools updated for agent {self.name}, total {len(self.tools)}: {[tool['function']['name'] for tool in self.tools]}"
f'Tools updated for agent {self.name}, total {len(self.tools)}: {[tool["function"]["name"] for tool in self.tools]}'
)

View File

@@ -220,7 +220,7 @@ class State:
'trace_version': openhands.__version__,
'tags': [
f'agent:{agent_name}',
f"web_host:{os.environ.get('WEB_HOST', 'unspecified')}",
f'web_host:{os.environ.get("WEB_HOST", "unspecified")}',
f'openhands_version:{openhands.__version__}',
],
}

View File

@@ -142,9 +142,9 @@ async def run_controller(
agent, runtime, config, replay_events=replay_events
)
assert isinstance(
initial_user_action, Action
), f'initial user actions must be an Action, got {type(initial_user_action)}'
assert isinstance(initial_user_action, Action), (
f'initial user actions must be an Action, got {type(initial_user_action)}'
)
logger.debug(
f'Agent Controller Initialized: Running agent {agent.name}, model '
f'{agent.llm.config.model}, with actions: {initial_user_action}'

View File

@@ -149,9 +149,9 @@ class Message(BaseModel):
# an observation message with tool response
if self.tool_call_id is not None:
assert (
self.name is not None
), 'name is required when tool_call_id is not None'
assert self.name is not None, (
'name is required when tool_call_id is not None'
)
message_dict['tool_call_id'] = self.tool_call_id
message_dict['name'] = self.name

View File

@@ -36,9 +36,7 @@ class BrowseInteractiveAction(Action):
@property
def message(self) -> str:
return (
f'I am interacting with the browser:\n' f'```\n{self.browser_actions}\n```'
)
return f'I am interacting with the browser:\n```\n{self.browser_actions}\n```'
def __str__(self) -> str:
ret = '**BrowseInteractiveAction**\n'

View File

@@ -115,13 +115,13 @@ class FileEditObservation(Observation):
for idx, line in enumerate(old_lines[i1:i2]):
line_num = i1 + idx + 1
cur_group['before_edits'].append(
f'-{line_num:>{_indent_pad_size-1}}|{line}'
f'-{line_num:>{_indent_pad_size - 1}}|{line}'
)
if tag in {'replace', 'insert'}:
for idx, line in enumerate(new_lines[j1:j2]):
line_num = j1 + idx + 1
cur_group['after_edits'].append(
f'+{line_num:>{_indent_pad_size-1}}|{line}'
f'+{line_num:>{_indent_pad_size - 1}}|{line}'
)
edit_groups.append(cur_group)
return edit_groups
@@ -169,12 +169,12 @@ class FileEditObservation(Observation):
for i, cur_edit_group in enumerate(edit_groups):
if i != 0:
result.append('-------------------------')
result.append(f'[begin of {op_type} {i+1} / {len(edit_groups)}]')
result.append(f'[begin of {op_type} {i + 1} / {len(edit_groups)}]')
result.append(f'(content before {op_type})')
result.extend(cur_edit_group['before_edits'])
result.append(f'(content after {op_type})')
result.extend(cur_edit_group['after_edits'])
result.append(f'[end of {op_type} {i+1} / {len(edit_groups)}]')
result.append(f'[end of {op_type} {i + 1} / {len(edit_groups)}]')
# Cache the result
self._diff_cache = '\n'.join(result)
@@ -186,9 +186,9 @@ class FileEditObservation(Observation):
return self.content
if not self.prev_exist:
assert (
self.old_content == ''
), 'old_content should be empty if the file is new (prev_exist=False).'
assert self.old_content == '', (
'old_content should be empty if the file is new (prev_exist=False).'
)
return f'[New file {self.path} is created with the provided content.]\n'
# Use cached diff if available, otherwise compute it

View File

@@ -277,7 +277,7 @@ class GitHubService(BaseGitService, GitService):
result = response.json()
if 'errors' in result:
raise UnknownException(
f"GraphQL query error: {json.dumps(result['errors'])}"
f'GraphQL query error: {json.dumps(result["errors"])}'
)
return dict(result)

View File

@@ -253,12 +253,12 @@ def convert_tool_call_to_string(tool_call: dict) -> str:
if tool_call['type'] != 'function':
raise FunctionCallConversionError("Tool call type must be 'function'.")
ret = f"<function={tool_call['function']['name']}>\n"
ret = f'<function={tool_call["function"]["name"]}>\n'
try:
args = json.loads(tool_call['function']['arguments'])
except json.JSONDecodeError as e:
raise FunctionCallConversionError(
f"Failed to parse arguments as JSON. Arguments: {tool_call['function']['arguments']}"
f'Failed to parse arguments as JSON. Arguments: {tool_call["function"]["arguments"]}'
) from e
for param_name, param_value in args.items():
is_multiline = isinstance(param_value, str) and '\n' in param_value
@@ -280,8 +280,8 @@ def convert_tools_to_description(tools: list[dict]) -> str:
fn = tool['function']
if i > 0:
ret += '\n'
ret += f"---- BEGIN FUNCTION #{i+1}: {fn['name']} ----\n"
ret += f"Description: {fn['description']}\n"
ret += f'---- BEGIN FUNCTION #{i + 1}: {fn["name"]} ----\n'
ret += f'Description: {fn["description"]}\n'
if 'parameters' in fn:
ret += 'Parameters:\n'
@@ -303,12 +303,12 @@ def convert_tools_to_description(tools: list[dict]) -> str:
desc += f'\nAllowed values: [{enum_values}]'
ret += (
f' ({j+1}) {param_name} ({param_type}, {param_status}): {desc}\n'
f' ({j + 1}) {param_name} ({param_type}, {param_status}): {desc}\n'
)
else:
ret += 'No parameters are required for this function.\n'
ret += f'---- END FUNCTION #{i+1} ----\n'
ret += f'---- END FUNCTION #{i + 1} ----\n'
return ret
@@ -667,7 +667,7 @@ def convert_non_fncall_messages_to_fncall_messages(
'content': [{'type': 'text', 'text': tool_result}]
if isinstance(content, list)
else tool_result,
'tool_call_id': f'toolu_{tool_call_counter-1:02d}', # Use last generated ID
'tool_call_id': f'toolu_{tool_call_counter - 1:02d}', # Use last generated ID
}
)
else:
@@ -790,14 +790,14 @@ def convert_from_multiple_tool_calls_to_single_tool_call_messages(
# add the tool result
converted_messages.append(message)
else:
assert (
len(pending_tool_calls) == 0
), f'Found pending tool calls but not found in pending list: {pending_tool_calls=}'
assert len(pending_tool_calls) == 0, (
f'Found pending tool calls but not found in pending list: {pending_tool_calls=}'
)
converted_messages.append(message)
else:
assert (
len(pending_tool_calls) == 0
), f'Found pending tool calls but not expect to handle it with role {role}: {pending_tool_calls=}, {message=}'
assert len(pending_tool_calls) == 0, (
f'Found pending tool calls but not expect to handle it with role {role}: {pending_tool_calls=}, {message=}'
)
converted_messages.append(message)
if not ignore_final_tool_result and len(pending_tool_calls) > 0:

View File

@@ -158,12 +158,12 @@ async def add_mcp_tools_to_agent(
ActionExecutionClient, # inline import to avoid circular import
)
assert isinstance(
runtime, ActionExecutionClient
), 'Runtime must be an instance of ActionExecutionClient'
assert (
runtime.runtime_initialized
), 'Runtime must be initialized before adding MCP tools'
assert isinstance(runtime, ActionExecutionClient), (
'Runtime must be an instance of ActionExecutionClient'
)
assert runtime.runtime_initialized, (
'Runtime must be initialized before adding MCP tools'
)
# Add the runtime as another MCP server
updated_mcp_config = runtime.get_updated_mcp_config()
@@ -171,7 +171,7 @@ async def add_mcp_tools_to_agent(
mcp_tools = await fetch_mcp_tools_from_config(updated_mcp_config)
logger.info(
f"Loaded {len(mcp_tools)} MCP tools: {[tool['function']['name'] for tool in mcp_tools]}"
f'Loaded {len(mcp_tools)} MCP tools: {[tool["function"]["name"] for tool in mcp_tools]}'
)
# Set the MCP tools on the agent

View File

@@ -214,7 +214,7 @@ class GitlabIssueHandler(IssueHandlerInterface):
def reply_to_comment(self, pr_number: int, comment_id: str, reply: str) -> None:
response = httpx.get(
f'{self.base_url}/merge_requests/{pr_number}/discussions/{comment_id.split('/')[-1]}',
f'{self.base_url}/merge_requests/{pr_number}/discussions/{comment_id.split("/")[-1]}',
headers=self.headers,
)
response.raise_for_status()
@@ -225,7 +225,7 @@ class GitlabIssueHandler(IssueHandlerInterface):
'note_id': discussions.get('notes', [])[-1]['id'],
}
response = httpx.post(
f'{self.base_url}/merge_requests/{pr_number}/discussions/{comment_id.split('/')[-1]}/notes',
f'{self.base_url}/merge_requests/{pr_number}/discussions/{comment_id.split("/")[-1]}/notes',
headers=self.headers,
json=data,
)

View File

@@ -99,7 +99,7 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
logger.info(f'Build status: {status}')
if status == 'SUCCESS':
logger.debug(f"Successfully built {status_data['image']}")
logger.debug(f'Successfully built {status_data["image"]}')
return str(status_data['image'])
elif status in [
'FAILURE',
@@ -139,9 +139,9 @@ class RemoteRuntimeBuilder(RuntimeBuilder):
if result['exists']:
logger.debug(
f"Image {image_name} exists. "
f"Uploaded at: {result['image']['upload_time']}, "
f"Size: {result['image']['image_size_bytes'] / 1024 / 1024:.2f} MB"
f'Image {image_name} exists. '
f'Uploaded at: {result["image"]["upload_time"]}, '
f'Size: {result["image"]["image_size_bytes"] / 1024 / 1024:.2f} MB'
)
else:
logger.debug(f'Image {image_name} does not exist.')

View File

@@ -115,12 +115,12 @@ class DaytonaRuntime(ActionExecutionClient):
def _construct_api_url(self, port: int) -> str:
assert self.workspace is not None, 'Workspace is not initialized'
assert (
self.workspace.instance.info is not None
), 'Workspace info is not available'
assert (
self.workspace.instance.info.provider_metadata is not None
), 'Provider metadata is not available'
assert self.workspace.instance.info is not None, (
'Workspace info is not available'
)
assert self.workspace.instance.info.provider_metadata is not None, (
'Provider metadata is not available'
)
node_domain = json.loads(self.workspace.instance.info.provider_metadata)[
'nodeDomain'

View File

@@ -40,9 +40,9 @@ class E2BBox:
def _archive(self, host_src: str, recursive: bool = False):
if recursive:
assert os.path.isdir(
host_src
), 'Source must be a directory when recursive is True'
assert os.path.isdir(host_src), (
'Source must be a directory when recursive is True'
)
files = glob(host_src + '/**/*', recursive=True)
srcname = os.path.basename(host_src)
tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
@@ -52,9 +52,9 @@ class E2BBox:
file, arcname=os.path.relpath(file, os.path.dirname(host_src))
)
else:
assert os.path.isfile(
host_src
), 'Source must be a file when recursive is False'
assert os.path.isfile(host_src), (
'Source must be a file when recursive is False'
)
srcname = os.path.basename(host_src)
tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
with tarfile.open(tar_filename, mode='w') as tar:

View File

@@ -130,12 +130,12 @@ class RemoteRuntime(ActionExecutionClient):
)
self.container_image = self.config.sandbox.runtime_container_image
self._start_runtime()
assert (
self.runtime_id is not None
), 'Runtime ID is not set. This should never happen.'
assert (
self.runtime_url is not None
), 'Runtime URL is not set. This should never happen.'
assert self.runtime_id is not None, (
'Runtime ID is not set. This should never happen.'
)
assert self.runtime_url is not None, (
'Runtime URL is not set. This should never happen.'
)
self.send_status_message('STATUS$WAITING_FOR_CLIENT')
if not self.attach_to_existing:
self.log('info', 'Waiting for runtime to be alive...')

View File

@@ -157,7 +157,7 @@ def _print_window(
else:
output += '(this is the beginning of the file)\n'
for i in range(start, end + 1):
_new_line = f'{i}|{lines[i-1]}'
_new_line = f'{i}|{lines[i - 1]}'
if not _new_line.endswith('\n'):
_new_line += '\n'
output += _new_line

View File

@@ -189,7 +189,7 @@ class JupyterKernel:
if os.environ.get('DEBUG'):
logging.info(
f"MSG TYPE: {msg_type.upper()} DONE:{execution_done}\nCONTENT: {msg_dict['content']}"
f'MSG TYPE: {msg_type.upper()} DONE:{execution_done}\nCONTENT: {msg_dict["content"]}'
)
if msg_type == 'error':
@@ -203,7 +203,7 @@ class JupyterKernel:
if 'image/png' in msg_dict['content']['data']:
# use markdone to display image (in case of large image)
outputs.append(
f"\n![image](data:image/png;base64,{msg_dict['content']['data']['image/png']})\n"
f'\n![image](data:image/png;base64,{msg_dict["content"]["data"]["image/png"]})\n'
)
elif msg_type == 'execute_reply':
@@ -272,7 +272,7 @@ class ExecuteHandler(tornado.web.RequestHandler):
def make_app() -> tornado.web.Application:
jupyter_kernel = JupyterKernel(
f"localhost:{os.environ.get('JUPYTER_GATEWAY_PORT', '8888')}",
f'localhost:{os.environ.get("JUPYTER_GATEWAY_PORT", "8888")}',
os.environ.get('JUPYTER_GATEWAY_KERNEL_ID', 'default'),
)
asyncio.get_event_loop().run_until_complete(jupyter_kernel.initialize())

View File

@@ -501,9 +501,9 @@ class BashSession:
if len(splited_commands) > 1:
return ErrorObservation(
content=(
f"ERROR: Cannot execute multiple commands at once.\n"
f"Please run each command separately OR chain them into a single command via && or ;\n"
f"Provided commands:\n{'\n'.join(f'({i + 1}) {cmd}' for i, cmd in enumerate(splited_commands))}"
f'ERROR: Cannot execute multiple commands at once.\n'
f'Please run each command separately OR chain them into a single command via && or ;\n'
f'Provided commands:\n{"\n".join(f"({i + 1}) {cmd}" for i, cmd in enumerate(splited_commands))}'
)
)
@@ -591,8 +591,8 @@ class BashSession:
logger.debug(
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds'
)
logger.debug(f"BEGIN OF PANE CONTENT: {cur_pane_output.split('\n')[:10]}")
logger.debug(f"END OF PANE CONTENT: {cur_pane_output.split('\n')[-10:]}")
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_output.split("\n")[:10]}')
logger.debug(f'END OF PANE CONTENT: {cur_pane_output.split("\n")[-10:]}')
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output)
current_ps1_count = len(ps1_matches)

View File

@@ -35,8 +35,8 @@ def generate_file_viewer_html(file_path: str) -> str:
# Check if the file extension is supported
if file_extension not in supported_extensions:
raise ValueError(
f"Unsupported file extension: {file_extension}. "
f"Supported extensions are: {', '.join(supported_extensions)}"
f'Unsupported file extension: {file_extension}. '
f'Supported extensions are: {", ".join(supported_extensions)}'
)
# Check if the file exists

View File

@@ -385,9 +385,9 @@ if __name__ == '__main__':
# and create a Dockerfile dynamically and place it in the build_folder only. This allows the Docker image to
# then be created using the Dockerfile (most likely using the containers/build.sh script)
build_folder = args.build_folder
assert os.path.exists(
build_folder
), f'Build folder {build_folder} does not exist'
assert os.path.exists(build_folder), (
f'Build folder {build_folder} does not exist'
)
logger.debug(
f'Copying the source code and generating the Dockerfile in the build folder: {build_folder}'
)

View File

@@ -926,7 +926,7 @@ class WindowsPowershellSession:
content=(
f'ERROR: Cannot execute multiple commands at once.\n'
f'Please run each command separately OR chain them into a single command via PowerShell operators (e.g., ; or |).\n'
f'Detected commands:\n{"\n".join(f"({i+1}) {cmd}" for i, cmd in enumerate(splited_cmds))}'
f'Detected commands:\n{"\n".join(f"({i + 1}) {cmd}" for i, cmd in enumerate(splited_cmds))}'
)
)
elif statements.Count == 0 and not command.strip().startswith('#'):

View File

@@ -176,9 +176,9 @@ class InvariantAnalyzer(SecurityAnalyzer):
],
)
)
assert (
self.guardrail_llm is not None
), 'InvariantAnalyzer.guardrail_llm should be initialized before calling check_usertask'
assert self.guardrail_llm is not None, (
'InvariantAnalyzer.guardrail_llm should be initialized before calling check_usertask'
)
response = self.guardrail_llm.completion(
messages=self.guardrail_llm.format_messages_for_llm(messages),
stop=['.'],
@@ -261,9 +261,9 @@ class InvariantAnalyzer(SecurityAnalyzer):
],
)
)
assert (
self.guardrail_llm is not None
), 'InvariantAnalyzer.guardrail_llm should be initialized before calling check_fillaction'
assert self.guardrail_llm is not None, (
'InvariantAnalyzer.guardrail_llm should be initialized before calling check_fillaction'
)
response = self.guardrail_llm.completion(
messages=self.guardrail_llm.format_messages_for_llm(messages),
stop=['.'],

View File

@@ -20,7 +20,7 @@ TraceElement = Message | ToolCall | ToolOutput | Function
def get_next_id(trace: list[TraceElement]) -> str:
used_ids = [el.id for el in trace if type(el) == ToolCall]
used_ids = [el.id for el in trace if isinstance(el, ToolCall)]
for i in range(1, len(used_ids) + 2):
if str(i) not in used_ids:
return str(i)
@@ -31,7 +31,7 @@ def get_last_id(
trace: list[TraceElement],
) -> str | None:
for el in reversed(trace):
if type(el) == ToolCall:
if isinstance(el, ToolCall):
return el.id
return None
@@ -39,12 +39,12 @@ def get_last_id(
def parse_action(trace: list[TraceElement], action: Action) -> list[TraceElement]:
next_id = get_next_id(trace)
inv_trace: list[TraceElement] = []
if type(action) == MessageAction:
if isinstance(action, MessageAction):
if action.source == EventSource.USER:
inv_trace.append(Message(role='user', content=action.content))
else:
inv_trace.append(Message(role='assistant', content=action.content))
elif type(action) in [NullAction, ChangeAgentStateAction]:
elif isinstance(action, (NullAction, ChangeAgentStateAction)):
pass
elif hasattr(action, 'action') and action.action is not None:
event_dict = event_to_dict(action)
@@ -63,7 +63,7 @@ def parse_observation(
trace: list[TraceElement], obs: Observation
) -> list[TraceElement]:
last_id = get_last_id(trace)
if type(obs) in [NullObservation, AgentStateChangedObservation]:
if isinstance(obs, (NullObservation, AgentStateChangedObservation)):
return []
elif hasattr(obs, 'content') and obs.content is not None:
return [ToolOutput(role='tool', content=obs.content, tool_call_id=last_id)]

View File

@@ -7,14 +7,12 @@ from typing import Callable, Iterable
import socketio
from openhands.core.config.app_config import AppConfig
from openhands.core.config.llm_config import LLMConfig
from openhands.core.exceptions import AgentRuntimeUnavailableError
from openhands.core.logger import openhands_logger as logger
from openhands.core.schema.agent import AgentState
from openhands.events.action import MessageAction
from openhands.events.event import EventSource
from openhands.events.event_store import EventStore
from openhands.events.stream import EventStream, EventStreamSubscriber, session_exists
from openhands.events.stream import EventStreamSubscriber, session_exists
from openhands.server.config.server_config import ServerConfig
from openhands.server.monitoring import MonitoringListener
from openhands.server.session.agent_session import WAIT_TIME_BEFORE_CLOSE
@@ -25,7 +23,10 @@ from openhands.storage.data_models.conversation_metadata import ConversationMeta
from openhands.storage.data_models.settings import Settings
from openhands.storage.files import FileStore
from openhands.utils.async_utils import GENERAL_TIMEOUT, call_async_from_sync, wait_all
from openhands.utils.conversation_summary import get_default_conversation_title, auto_generate_title
from openhands.utils.conversation_summary import (
auto_generate_title,
get_default_conversation_title,
)
from openhands.utils.import_utils import get_impl
from openhands.utils.shutdown_listener import should_continue
@@ -208,7 +209,6 @@ class StandaloneConversationManager(ConversationManager):
store = await conversation_store_class.get_instance(self.config, user_id)
return store
async def get_running_agent_loops(
self, user_id: str | None = None, filter_to_sids: set[str] | None = None
) -> set[str]:
@@ -287,7 +287,7 @@ class StandaloneConversationManager(ConversationManager):
response_ids = await self.get_running_agent_loops(user_id)
if len(response_ids) >= self.config.max_concurrent_conversations:
logger.info(
f'too_many_sessions_for:{user_id or ''}',
f'too_many_sessions_for:{user_id or ""}',
extra={'session_id': sid, 'user_id': user_id},
)
# Get the conversations sorted (oldest first)
@@ -300,7 +300,7 @@ class StandaloneConversationManager(ConversationManager):
while len(conversations) >= self.config.max_concurrent_conversations:
oldest_conversation_id = conversations.pop().conversation_id
logger.debug(
f'closing_from_too_many_sessions:{user_id or ''}:{oldest_conversation_id}',
f'closing_from_too_many_sessions:{user_id or ""}:{oldest_conversation_id}',
extra={'session_id': oldest_conversation_id, 'user_id': user_id},
)
# Send status message to client and close session.
@@ -332,7 +332,9 @@ class StandaloneConversationManager(ConversationManager):
try:
session.agent_session.event_stream.subscribe(
EventStreamSubscriber.SERVER,
self._create_conversation_update_callback(user_id, github_user_id, sid, settings),
self._create_conversation_update_callback(
user_id, github_user_id, sid, settings
),
UPDATED_AT_CALLBACK_ID,
)
except ValueError:
@@ -429,7 +431,11 @@ class StandaloneConversationManager(ConversationManager):
)
def _create_conversation_update_callback(
self, user_id: str | None, github_user_id: str | None, conversation_id: str, settings: Settings
self,
user_id: str | None,
github_user_id: str | None,
conversation_id: str,
settings: Settings,
) -> Callable:
def callback(event, *args, **kwargs):
call_async_from_sync(
@@ -444,9 +450,13 @@ class StandaloneConversationManager(ConversationManager):
return callback
async def _update_conversation_for_event(
self, user_id: str, github_user_id: str, conversation_id: str, settings: Settings, event=None
self,
user_id: str,
github_user_id: str,
conversation_id: str,
settings: Settings,
event=None,
):
conversation_store = await self._get_conversation_store(user_id, github_user_id)
conversation = await conversation_store.get_metadata(conversation_id)
@@ -469,8 +479,12 @@ class StandaloneConversationManager(ConversationManager):
token_usage.prompt_tokens + token_usage.completion_tokens
)
default_title = get_default_conversation_title(conversation_id)
if conversation.title == default_title: # attempt to autogenerate if default title is in use
title = await auto_generate_title(conversation_id, user_id, self.file_store, settings)
if (
conversation.title == default_title
): # attempt to autogenerate if default title is in use
title = await auto_generate_title(
conversation_id, user_id, self.file_store, settings
)
if title and not title.isspace():
conversation.title = title
try:

View File

@@ -27,7 +27,7 @@ def store_feedback(feedback: FeedbackDataModel) -> dict[str, str]:
display_feedback = feedback.model_dump()
if 'trajectory' in display_feedback:
display_feedback['trajectory'] = (
f"elided [length: {len(display_feedback['trajectory'])}"
f'elided [length: {len(display_feedback["trajectory"])}'
)
if 'token' in display_feedback:
display_feedback['token'] = 'elided'

View File

@@ -1,4 +1,3 @@
import asyncio
import uuid
from datetime import datetime, timezone
from typing import Any

View File

@@ -1,15 +1,21 @@
[build-system]
build-backend = "poetry.core.masonry.api"
requires = [
"poetry-core",
]
[tool.poetry]
name = "openhands-ai"
version = "0.37.0"
description = "OpenHands: Code Less, Make More"
authors = ["OpenHands"]
authors = [ "OpenHands" ]
license = "MIT"
readme = "README.md"
repository = "https://github.com/All-Hands-AI/OpenHands"
packages = [
{ include = "openhands/**/*" },
{ include = "pyproject.toml", to = "openhands" },
{ include = "poetry.lock", to = "openhands" }
{ include = "poetry.lock", to = "openhands" },
]
[tool.poetry.dependencies]
@@ -40,7 +46,7 @@ tenacity = ">=8.5,<10.0"
zope-interface = "7.2"
pathspec = "^0.12.1"
google-cloud-aiplatform = "*"
anthropic = {extras = ["vertex"], version = "*"}
anthropic = { extras = [ "vertex" ], version = "*" }
tree-sitter = "^0.24.0"
bashlex = "^0.18"
pyjwt = "^2.9.0"
@@ -97,39 +103,12 @@ pandas = "*"
reportlab = "*"
gevent = ">=24.2.1,<26.0.0"
[tool.coverage.run]
concurrency = ["gevent"]
[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
jupyter_kernel_gateway = "*"
flake8 = "*"
[build-system]
build-backend = "poetry.core.masonry.api"
requires = [
"poetry-core",
]
[tool.autopep8]
# autopep8 fights with mypy on line length issue
ignore = [ "E501" ]
[tool.black]
# prevent black (if installed) from changing single quotes to double quotes
skip-string-normalization = true
[tool.ruff.lint]
select = ["D"]
# ignore warnings for missing docstrings
ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"
@@ -148,14 +127,10 @@ browsergym = "0.13.3"
browsergym-webarena = "0.13.3"
browsergym-miniwob = "0.13.3"
browsergym-visualwebarena = "0.13.3"
boto3-stubs = {extras = ["s3"], version = "^1.37.19"}
boto3-stubs = { extras = [ "s3" ], version = "^1.37.19" }
pyarrow = "20.0.0" # transitive dependency, pinned here to avoid conflicts
datasets = "*"
[tool.poetry-dynamic-versioning]
enable = true
style = "semver"
[tool.poetry.scripts]
openhands = "openhands.core.cli:main"
@@ -164,3 +139,24 @@ fuzzywuzzy = "^0.18.0"
rouge = "^1.0.1"
python-levenshtein = ">=0.26.1,<0.28.0"
tree-sitter-python = "^0.23.6"
[tool.poetry-dynamic-versioning]
enable = true
style = "semver"
[tool.autopep8]
# autopep8 fights with mypy on line length issue
ignore = [ "E501" ]
[tool.black]
# prevent black (if installed) from changing single quotes to double quotes
skip-string-normalization = true
[tool.ruff]
lint.select = [ "D" ]
# ignore warnings for missing docstrings
lint.ignore = [ "D1" ]
lint.pydocstyle.convention = "google"
[tool.coverage.run]
concurrency = [ "gevent" ]

View File

@@ -760,9 +760,9 @@ def test_python_version(temp_dir, runtime_cls, run_as_openhands):
try:
obs = runtime.run_action(CmdRunAction(command='python --version'))
assert isinstance(
obs, CmdOutputObservation
), 'The observation should be a CmdOutputObservation.'
assert isinstance(obs, CmdOutputObservation), (
'The observation should be a CmdOutputObservation.'
)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'Python 3' in obs.content, 'The output should contain "Python 3".'
finally:

View File

@@ -25,9 +25,9 @@ def test_env_vars_os_environ(temp_dir, runtime_cls, run_as_openhands):
)
print(obs)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert (
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
), f'Output: [{obs.content}] for {runtime_cls}'
assert obs.content.strip().split('\n\r')[0].strip() == 'BAZ', (
f'Output: [{obs.content}] for {runtime_cls}'
)
_close_test_runtime(runtime)

View File

@@ -168,9 +168,9 @@ def test_grep_to_cmdrun_paths_with_spaces(runtime_cls, run_as_openhands, temp_di
obs = _run_cmd_action(runtime, cmd)
assert obs.exit_code == 0, f'Grep command failed for path: {path}'
assert (
'function' in obs.content
), f'Expected pattern not found in output for path: {path}'
assert 'function' in obs.content, (
f'Expected pattern not found in output for path: {path}'
)
# Verify the actual file was found
if path == 'src/my project':

View File

@@ -77,9 +77,9 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, runtime_cls, run_as_openhands):
action_read = FileReadAction(path='hello.sh')
logger.info(action_read, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action_read)
assert isinstance(
obs, FileReadObservation
), 'The observation should be a FileReadObservation.'
assert isinstance(obs, FileReadObservation), (
'The observation should be a FileReadObservation.'
)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content == 'echo "Hello, World!"\n'

View File

@@ -39,9 +39,9 @@ def test_edit_from_scratch(temp_dir, runtime_cls, run_as_openhands):
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(
obs, FileEditObservation
), 'The observation should be a FileEditObservation.'
assert isinstance(obs, FileEditObservation), (
'The observation should be a FileEditObservation.'
)
action = FileReadAction(
path=os.path.join('/workspace', 'app.py'),
@@ -78,9 +78,9 @@ def test_edit(temp_dir, runtime_cls, run_as_openhands):
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(
obs, FileEditObservation
), 'The observation should be a FileEditObservation.'
assert isinstance(obs, FileEditObservation), (
'The observation should be a FileEditObservation.'
)
action = FileReadAction(
path=os.path.join('/workspace', 'app.py'),
@@ -138,9 +138,9 @@ def test_edit_long_file(temp_dir, runtime_cls, run_as_openhands):
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(
obs, FileEditObservation
), 'The observation should be a FileEditObservation.'
assert isinstance(obs, FileEditObservation), (
'The observation should be a FileEditObservation.'
)
action = FileReadAction(
path=os.path.join('/workspace', 'app.py'),

View File

@@ -23,9 +23,9 @@ from openhands.events.observation import CmdOutputObservation, MCPObservation
def test_default_activated_tools():
project_root = os.path.dirname(openhands.__file__)
mcp_config_path = os.path.join(project_root, 'runtime', 'mcp', 'config.json')
assert os.path.exists(
mcp_config_path
), f'MCP config file not found at {mcp_config_path}'
assert os.path.exists(mcp_config_path), (
f'MCP config file not found at {mcp_config_path}'
)
with open(mcp_config_path, 'r') as f:
mcp_config = json.load(f)
assert 'default' in mcp_config
@@ -63,9 +63,9 @@ async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
mcp_action = MCPAction(name='fetch', arguments={'url': 'http://localhost:8000'})
obs = await runtime.call_tool_mcp(mcp_action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(
obs, MCPObservation
), 'The observation should be a MCPObservation.'
assert isinstance(obs, MCPObservation), (
'The observation should be a MCPObservation.'
)
result_json = json.loads(obs.content)
assert not result_json['isError']

View File

@@ -468,9 +468,9 @@ def test_stress_runtime_memory_limits_with_repeated_file_edit():
new_str=f'-content_{i:03d}',
)
obs = runtime.run_action(edit_action)
assert (
f'The file {test_file} has been edited' in obs.content
), f'Edit failed at iteration {i}'
assert f'The file {test_file} has been edited' in obs.content, (
f'Edit failed at iteration {i}'
)
logger.info(f'finished iteration {i}')
# Verify final file state using FileEditAction view command

View File

@@ -240,7 +240,9 @@ def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_con
wait_time = mock_sleep.call_args[0][0]
assert (
default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
), (
f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
)
@patch('openhands.llm.llm.litellm_completion')

View File

@@ -71,9 +71,9 @@ def test_pr_title_with_quotes(monkeypatch):
data = kwargs.get('json', {})
title = data.get('title', '')
expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
assert (
title == expected
), f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
assert title == expected, (
f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
)
return MockResponse()
class MockGetResponse:
@@ -98,7 +98,7 @@ def test_pr_title_with_quotes(monkeypatch):
original_run = subprocess.run
def mock_run(*args, **kwargs):
print(f"Running command: {args[0] if args else kwargs.get('args', [])}")
print(f'Running command: {args[0] if args else kwargs.get("args", [])}')
if isinstance(args[0], list) and args[0][0] == 'git':
if 'push' in args[0]:
return subprocess.CompletedProcess(

View File

@@ -478,13 +478,14 @@ async def test_process_issue(
mock_run_controller.return_value = test_case['run_controller_return']
# Patch the necessary functions and methods
with patch(
'openhands.resolver.resolve_issue.create_runtime', mock_create_runtime
), patch(
'openhands.resolver.resolve_issue.run_controller', mock_run_controller
), patch.object(
with (
patch('openhands.resolver.resolve_issue.create_runtime', mock_create_runtime),
patch('openhands.resolver.resolve_issue.run_controller', mock_run_controller),
patch.object(
resolver, 'complete_runtime', return_value={'git_patch': 'test patch'}
), patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime:
),
patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime,
):
# Call the process_issue method
result = await resolver.process_issue(issue, base_commit, handler_instance)

View File

@@ -142,9 +142,9 @@ index 9daeafb..b02def2 100644
with open(dos_file, 'rb') as f:
dos_content = f.read()
assert (
b'\r\n' not in unix_content
), 'Unix-style line endings were changed to DOS-style'
assert b'\r\n' not in unix_content, (
'Unix-style line endings were changed to DOS-style'
)
assert b'\r\n' in dos_content, 'DOS-style line endings were changed to Unix-style'
# Check if content was updated correctly

View File

@@ -242,7 +242,9 @@ def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_con
wait_time = mock_sleep.call_args[0][0]
assert (
default_config.retry_min_wait <= wait_time <= default_config.retry_max_wait
), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
), (
f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
)
@patch('openhands.llm.llm.litellm_completion')

View File

@@ -72,9 +72,9 @@ def test_pr_title_with_quotes(monkeypatch):
data = kwargs.get('json', {})
title = data.get('title', '')
expected = "Fix issue #123: Issue with 'quotes' and \"double quotes\" and <class 'ValueError'>"
assert (
title == expected
), f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
assert title == expected, (
f'PR title was incorrectly escaped.\nExpected: {expected}\nGot: {title}'
)
return MockResponse()
class MockGetResponse:
@@ -99,7 +99,7 @@ def test_pr_title_with_quotes(monkeypatch):
original_run = subprocess.run
def mock_run(*args, **kwargs):
logger.info(f"Running command: {args[0] if args else kwargs.get('args', [])}")
logger.info(f'Running command: {args[0] if args else kwargs.get("args", [])}')
if isinstance(args[0], list) and args[0][0] == 'git':
if 'push' in args[0]:
return subprocess.CompletedProcess(

View File

@@ -506,15 +506,18 @@ async def test_process_issue(
mock_run_controller.return_value = test_case['run_controller_return']
# Patch the necessary functions and methods
with patch(
'openhands.resolver.resolve_issue.create_runtime', mock_create_runtime
), patch(
'openhands.resolver.resolve_issue.run_controller', mock_run_controller
), patch.object(
with (
patch('openhands.resolver.resolve_issue.create_runtime', mock_create_runtime),
patch('openhands.resolver.resolve_issue.run_controller', mock_run_controller),
patch.object(
resolver, 'complete_runtime', return_value={'git_patch': 'test patch'}
), patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime, patch(
),
patch.object(resolver, 'initialize_runtime') as mock_initialize_runtime,
patch(
'openhands.resolver.resolve_issue.SandboxConfig', return_value=MagicMock()
), patch('openhands.resolver.resolve_issue.AppConfig', return_value=MagicMock()):
),
patch('openhands.resolver.resolve_issue.AppConfig', return_value=MagicMock()),
):
# Call the process_issue method
result = await resolver.process_issue(issue, base_commit, handler_instance)

Some files were not shown because too many files have changed in this diff Show More