Fix style issues with pre-commit (#7318)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Engel Nyst
2025-03-18 02:34:27 +01:00
committed by GitHub
parent f1149defc9
commit 83458f5146
8 changed files with 55 additions and 50 deletions

View File

@@ -1,8 +1,4 @@
import math import math
import os
from pathlib import Path
from tree_sitter import Language, Parser
def total_byte_entropy_stats(python_code): def total_byte_entropy_stats(python_code):
@@ -324,8 +320,8 @@ def compute_regression(results):
def compute_readability(python_code): def compute_readability(python_code):
# Create parser and set up language # Create parser and set up language
import tree_sitter_python import tree_sitter_python
from tree_sitter import Parser, Language from tree_sitter import Language, Parser
parser = Parser(Language(tree_sitter_python.language())) parser = Parser(Language(tree_sitter_python.language()))
results = code_stats(python_code) results = code_stats(python_code)

View File

@@ -6,12 +6,11 @@ import numpy as np
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from rouge import Rouge from rouge import Rouge
# increase recursion depth to ensure ROUGE can be calculated for long sentences # increase recursion depth to ensure ROUGE can be calculated for long sentences
if sys.getrecursionlimit() < 10_000: if sys.getrecursionlimit() < 10_000:
sys.setrecursionlimit(10_000) sys.setrecursionlimit(10_000)
def bleu(gold: List[str], pred: List[str]) -> float: def bleu(gold: List[str], pred: List[str]) -> float:
""" """
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100. Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
@@ -39,7 +38,7 @@ def batch_bleu(golds: List[List[str]], preds: List[List[str]]) -> List[float]:
:return: list of BLEU scores :return: list of BLEU scores
""" """
if len(golds) != len(preds): if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length") raise ValueError('golds and preds must have the same length')
return [bleu(gold, pred) for gold, pred in zip(golds, preds)] return [bleu(gold, pred) for gold, pred in zip(golds, preds)]
@@ -52,7 +51,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
:return: corpus-level BLEU score :return: corpus-level BLEU score
""" """
if len(golds) != len(preds): if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length") raise ValueError('golds and preds must have the same length')
return 100.0 * nltk.translate.bleu_score.corpus_bleu( return 100.0 * nltk.translate.bleu_score.corpus_bleu(
[[gold] for gold in golds], [[gold] for gold in golds],
preds, preds,
@@ -62,7 +61,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
def edit_sim( def edit_sim(
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " " gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
) -> float: ) -> float:
""" """
Calculate char-level edit similarity, in the range of 0~100. Calculate char-level edit similarity, in the range of 0~100.
@@ -84,7 +83,7 @@ def edit_sim(
def batch_edit_sim( def batch_edit_sim(
golds: List[Union[str, List[str]]], golds: List[Union[str, List[str]]],
preds: List[Union[str, List[str]]], preds: List[Union[str, List[str]]],
sep: str = " ", sep: str = ' ',
) -> List[float]: ) -> List[float]:
""" """
Calculate char-level edit similarity for a batch of sentences. Calculate char-level edit similarity for a batch of sentences.
@@ -95,11 +94,11 @@ def batch_edit_sim(
:return: list of char-level edit similarity :return: list of char-level edit similarity
""" """
if len(golds) != len(preds): if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length") raise ValueError('golds and preds must have the same length')
return [edit_sim(gold, pred, sep) for gold, pred in zip(golds, preds)] return [edit_sim(gold, pred, sep) for gold, pred in zip(golds, preds)]
T = TypeVar("T") T = TypeVar('T')
def exact_match(gold: T, pred: T) -> float: def exact_match(gold: T, pred: T) -> float:
@@ -124,12 +123,12 @@ def batch_exact_match(golds: List[T], preds: List[T]) -> List[float]:
:return: list of exact match accuracy :return: list of exact match accuracy
""" """
if len(golds) != len(preds): if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length") raise ValueError('golds and preds must have the same length')
return [exact_match(gold, pred) for gold, pred in zip(golds, preds)] return [exact_match(gold, pred) for gold, pred in zip(golds, preds)]
def rouge_l( def rouge_l(
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " " gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
) -> Dict[str, float]: ) -> Dict[str, float]:
""" """
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100. Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
@@ -139,7 +138,7 @@ def rouge_l(
:return: {"p": precision, "r": recall, "f": F1} :return: {"p": precision, "r": recall, "f": F1}
""" """
if len(pred) == 0 or len(gold) == 0: if len(pred) == 0 or len(gold) == 0:
return {"p": 0.0, "r": 0.0, "f": 0.0} return {'p': 0.0, 'r': 0.0, 'f': 0.0}
if isinstance(gold, list): if isinstance(gold, list):
gold = sep.join(gold) gold = sep.join(gold)
if isinstance(pred, list): if isinstance(pred, list):
@@ -147,15 +146,15 @@ def rouge_l(
try: try:
rouge = Rouge() rouge = Rouge()
scores = rouge.get_scores(hyps=pred, refs=gold, avg=True) scores = rouge.get_scores(hyps=pred, refs=gold, avg=True)
return {x: scores["rouge-l"][x] * 100.0 for x in ["p", "r", "f"]} return {x: scores['rouge-l'][x] * 100.0 for x in ['p', 'r', 'f']}
except ValueError: except ValueError:
return {"p": 0.0, "r": 0.0, "f": 0.0} return {'p': 0.0, 'r': 0.0, 'f': 0.0}
def batch_rouge_l( def batch_rouge_l(
golds: List[Union[str, List[str]]], golds: List[Union[str, List[str]]],
preds: List[Union[str, List[str]]], preds: List[Union[str, List[str]]],
sep: str = " ", sep: str = ' ',
) -> Dict[str, List[float]]: ) -> Dict[str, List[float]]:
""" """
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences. Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
@@ -166,9 +165,9 @@ def batch_rouge_l(
:return: list of {"p": precision, "r": recall, "f": F1} :return: list of {"p": precision, "r": recall, "f": F1}
""" """
if len(golds) != len(preds): if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length") raise ValueError('golds and preds must have the same length')
scores = [rouge_l(gold, pred, sep) for gold, pred in zip(golds, preds)] scores = [rouge_l(gold, pred, sep) for gold, pred in zip(golds, preds)]
return {x: [score[x] for score in scores] for x in ["p", "r", "f"]} return {x: [score[x] for score in scores] for x in ['p', 'r', 'f']}
def accuracy( def accuracy(
@@ -220,7 +219,7 @@ def batch_accuracy(
:return: list of accuracy :return: list of accuracy
""" """
if len(golds) != len(preds): if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length") raise ValueError('golds and preds must have the same length')
return [accuracy(gold, pred, ignore) for gold, pred in zip(golds, preds)] return [accuracy(gold, pred, ignore) for gold, pred in zip(golds, preds)]
@@ -274,7 +273,7 @@ def self_bleu(samples: List[List[str]]) -> float:
return np.mean(scores).item() return np.mean(scores).item()
def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float: def self_edit_distance(samples: List[Union[str, List[str]]], sep=' ') -> float:
""" """
Calculate self-edit-distance among the samples. Calculate self-edit-distance among the samples.
:param samples: the chosen m samples :param samples: the chosen m samples
@@ -300,12 +299,11 @@ def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
return np.mean(scores).item() return np.mean(scores).item()
QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = { QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = {
"bleu": bleu, 'bleu': bleu,
"xmatch": exact_match, 'xmatch': exact_match,
"edit-sim": edit_sim, 'edit-sim': edit_sim,
"rouge-f": lambda g, p: rouge_l(g, p)["f"], 'rouge-f': lambda g, p: rouge_l(g, p)['f'],
"rouge-p": lambda g, p: rouge_l(g, p)["p"], 'rouge-p': lambda g, p: rouge_l(g, p)['p'],
"rouge-r": lambda g, p: rouge_l(g, p)["r"], 'rouge-r': lambda g, p: rouge_l(g, p)['r'],
} }

View File

@@ -1,30 +1,41 @@
import re import re
from pygments.lexers.python import PythonLexer from pygments.lexers.python import PythonLexer
def tokenize_code(code): def tokenize_code(code):
lexer = PythonLexer() lexer = PythonLexer()
tokens = process_pygments_tokens(lexer.get_tokens(code)) tokens = process_pygments_tokens(lexer.get_tokens(code))
return tokens return tokens
def process_pygments_tokens(tokens): def process_pygments_tokens(tokens):
new_tokens = [] new_tokens = []
for token in tokens: for token in tokens:
if str(token[0]) == "Token.Text" and re.match(r'\s+', token[1]) or str(token[0]) == "Token.Text.Whitespace": if (
str(token[0]) == 'Token.Text'
and re.match(r'\s+', token[1])
or str(token[0]) == 'Token.Text.Whitespace'
):
continue continue
new_tokens.append(token[1]) new_tokens.append(token[1])
new_tokens_final = [] new_tokens_final = []
i = 0 i = 0
while i < len(new_tokens)-2: while i < len(new_tokens) - 2:
if new_tokens[i] == '"' and new_tokens[i+1]=='STR' and new_tokens[i+2] == '"': if (
new_tokens_final.append("\"STR\"") new_tokens[i] == '"'
and new_tokens[i + 1] == 'STR'
and new_tokens[i + 2] == '"'
):
new_tokens_final.append('"STR"')
i = i + 3 i = i + 3
else: else:
new_tokens_final.append(new_tokens[i]) new_tokens_final.append(new_tokens[i])
i = i + 1 i = i + 1
for i in range(len(new_tokens)-2, len(new_tokens)): for i in range(len(new_tokens) - 2, len(new_tokens)):
if i >= 0: if i >= 0:
new_tokens_final.append(new_tokens[i]) new_tokens_final.append(new_tokens[i])

View File

@@ -8,7 +8,6 @@ import os
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from evaluation.testgeneval.eval_infer import process_test_suite
from openhands.events.serialization import event_from_dict from openhands.events.serialization import event_from_dict
tqdm.pandas() tqdm.pandas()

View File

@@ -20,7 +20,8 @@ print(
f'Downloading gold test suites from {args.dataset_name} (split: {args.split}) to {output_filepath}' f'Downloading gold test suites from {args.dataset_name} (split: {args.split}) to {output_filepath}'
) )
test_suites = [ test_suites = [
{'instance_id': row['instance_id'], 'test_suite': row['test_src']} for row in dataset {'instance_id': row['instance_id'], 'test_suite': row['test_src']}
for row in dataset
] ]
print(f'{len(test_suites)} test suites loaded') print(f'{len(test_suites)} test suites loaded')
pd.DataFrame(test_suites).to_json(output_filepath, lines=True, orient='records') pd.DataFrame(test_suites).to_json(output_filepath, lines=True, orient='records')

View File

@@ -90,9 +90,7 @@ if __name__ == '__main__':
break break
# print the error counter (with percentage) # print the error counter (with percentage)
print( print(f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)')
f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)'
)
print( print(
f'Average mutation score for {num_lines} ({mutation_score / num_lines * 100:.2f}%)' f'Average mutation score for {num_lines} ({mutation_score / num_lines * 100:.2f}%)'
) )

View File

@@ -79,7 +79,7 @@ describe("Actions Service", () => {
// Mock implementation to capture the message // Mock implementation to capture the message
let capturedPartialMessage = ""; let capturedPartialMessage = "";
(store.dispatch as any).mockImplementation((action: any) => { (store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" && if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed partially**")) { action.payload.includes("believe that the task was **completed partially**")) {
capturedPartialMessage = action.payload; capturedPartialMessage = action.payload;
} }
@@ -87,7 +87,7 @@ describe("Actions Service", () => {
handleActionMessage(messagePartial); handleActionMessage(messagePartial);
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**"); expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
// Test not completed // Test not completed
const messageNotCompleted: ActionMessage = { const messageNotCompleted: ActionMessage = {
id: 2, id: 2,
@@ -106,7 +106,7 @@ describe("Actions Service", () => {
// Mock implementation to capture the message // Mock implementation to capture the message
let capturedNotCompletedMessage = ""; let capturedNotCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => { (store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" && if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **not completed**")) { action.payload.includes("believe that the task was **not completed**")) {
capturedNotCompletedMessage = action.payload; capturedNotCompletedMessage = action.payload;
} }
@@ -114,7 +114,7 @@ describe("Actions Service", () => {
handleActionMessage(messageNotCompleted); handleActionMessage(messageNotCompleted);
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**"); expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
// Test completed successfully // Test completed successfully
const messageCompleted: ActionMessage = { const messageCompleted: ActionMessage = {
id: 3, id: 3,
@@ -133,7 +133,7 @@ describe("Actions Service", () => {
// Mock implementation to capture the message // Mock implementation to capture the message
let capturedCompletedMessage = ""; let capturedCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => { (store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" && if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed successfully**")) { action.payload.includes("believe that the task was **completed successfully**")) {
capturedCompletedMessage = action.payload; capturedCompletedMessage = action.payload;
} }

View File

@@ -65,7 +65,9 @@ async def get_github_user(
access_token: SecretStr | None = Depends(get_access_token), access_token: SecretStr | None = Depends(get_access_token),
): ):
if provider_tokens: if provider_tokens:
client = ProviderHandler(provider_tokens=provider_tokens, external_auth_token=access_token) client = ProviderHandler(
provider_tokens=provider_tokens, external_auth_token=access_token
)
try: try:
user: User = await client.get_user() user: User = await client.get_user()
@@ -164,7 +166,7 @@ async def search_github_repositories(
@app.get('/suggested-tasks', response_model=list[SuggestedTask]) @app.get('/suggested-tasks', response_model=list[SuggestedTask])
async def get_suggested_tasks( async def get_suggested_tasks(
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens), provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
access_token: SecretStr | None = Depends(get_access_token) access_token: SecretStr | None = Depends(get_access_token),
): ):
"""Get suggested tasks for the authenticated user across their most recently pushed repositories. """Get suggested tasks for the authenticated user across their most recently pushed repositories.