mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
Fix style issues with pre-commit (#7318)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -1,8 +1,4 @@
|
|||||||
import math
|
import math
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from tree_sitter import Language, Parser
|
|
||||||
|
|
||||||
|
|
||||||
def total_byte_entropy_stats(python_code):
|
def total_byte_entropy_stats(python_code):
|
||||||
@@ -324,8 +320,8 @@ def compute_regression(results):
|
|||||||
def compute_readability(python_code):
|
def compute_readability(python_code):
|
||||||
# Create parser and set up language
|
# Create parser and set up language
|
||||||
import tree_sitter_python
|
import tree_sitter_python
|
||||||
from tree_sitter import Parser, Language
|
from tree_sitter import Language, Parser
|
||||||
|
|
||||||
parser = Parser(Language(tree_sitter_python.language()))
|
parser = Parser(Language(tree_sitter_python.language()))
|
||||||
|
|
||||||
results = code_stats(python_code)
|
results = code_stats(python_code)
|
||||||
|
|||||||
@@ -6,12 +6,11 @@ import numpy as np
|
|||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
from rouge import Rouge
|
from rouge import Rouge
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# increase recursion depth to ensure ROUGE can be calculated for long sentences
|
# increase recursion depth to ensure ROUGE can be calculated for long sentences
|
||||||
if sys.getrecursionlimit() < 10_000:
|
if sys.getrecursionlimit() < 10_000:
|
||||||
sys.setrecursionlimit(10_000)
|
sys.setrecursionlimit(10_000)
|
||||||
|
|
||||||
|
|
||||||
def bleu(gold: List[str], pred: List[str]) -> float:
|
def bleu(gold: List[str], pred: List[str]) -> float:
|
||||||
"""
|
"""
|
||||||
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
|
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
|
||||||
@@ -39,7 +38,7 @@ def batch_bleu(golds: List[List[str]], preds: List[List[str]]) -> List[float]:
|
|||||||
:return: list of BLEU scores
|
:return: list of BLEU scores
|
||||||
"""
|
"""
|
||||||
if len(golds) != len(preds):
|
if len(golds) != len(preds):
|
||||||
raise ValueError("golds and preds must have the same length")
|
raise ValueError('golds and preds must have the same length')
|
||||||
return [bleu(gold, pred) for gold, pred in zip(golds, preds)]
|
return [bleu(gold, pred) for gold, pred in zip(golds, preds)]
|
||||||
|
|
||||||
|
|
||||||
@@ -52,7 +51,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
|
|||||||
:return: corpus-level BLEU score
|
:return: corpus-level BLEU score
|
||||||
"""
|
"""
|
||||||
if len(golds) != len(preds):
|
if len(golds) != len(preds):
|
||||||
raise ValueError("golds and preds must have the same length")
|
raise ValueError('golds and preds must have the same length')
|
||||||
return 100.0 * nltk.translate.bleu_score.corpus_bleu(
|
return 100.0 * nltk.translate.bleu_score.corpus_bleu(
|
||||||
[[gold] for gold in golds],
|
[[gold] for gold in golds],
|
||||||
preds,
|
preds,
|
||||||
@@ -62,7 +61,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
|
|||||||
|
|
||||||
|
|
||||||
def edit_sim(
|
def edit_sim(
|
||||||
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " "
|
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
|
||||||
) -> float:
|
) -> float:
|
||||||
"""
|
"""
|
||||||
Calculate char-level edit similarity, in the range of 0~100.
|
Calculate char-level edit similarity, in the range of 0~100.
|
||||||
@@ -84,7 +83,7 @@ def edit_sim(
|
|||||||
def batch_edit_sim(
|
def batch_edit_sim(
|
||||||
golds: List[Union[str, List[str]]],
|
golds: List[Union[str, List[str]]],
|
||||||
preds: List[Union[str, List[str]]],
|
preds: List[Union[str, List[str]]],
|
||||||
sep: str = " ",
|
sep: str = ' ',
|
||||||
) -> List[float]:
|
) -> List[float]:
|
||||||
"""
|
"""
|
||||||
Calculate char-level edit similarity for a batch of sentences.
|
Calculate char-level edit similarity for a batch of sentences.
|
||||||
@@ -95,11 +94,11 @@ def batch_edit_sim(
|
|||||||
:return: list of char-level edit similarity
|
:return: list of char-level edit similarity
|
||||||
"""
|
"""
|
||||||
if len(golds) != len(preds):
|
if len(golds) != len(preds):
|
||||||
raise ValueError("golds and preds must have the same length")
|
raise ValueError('golds and preds must have the same length')
|
||||||
return [edit_sim(gold, pred, sep) for gold, pred in zip(golds, preds)]
|
return [edit_sim(gold, pred, sep) for gold, pred in zip(golds, preds)]
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar('T')
|
||||||
|
|
||||||
|
|
||||||
def exact_match(gold: T, pred: T) -> float:
|
def exact_match(gold: T, pred: T) -> float:
|
||||||
@@ -124,12 +123,12 @@ def batch_exact_match(golds: List[T], preds: List[T]) -> List[float]:
|
|||||||
:return: list of exact match accuracy
|
:return: list of exact match accuracy
|
||||||
"""
|
"""
|
||||||
if len(golds) != len(preds):
|
if len(golds) != len(preds):
|
||||||
raise ValueError("golds and preds must have the same length")
|
raise ValueError('golds and preds must have the same length')
|
||||||
return [exact_match(gold, pred) for gold, pred in zip(golds, preds)]
|
return [exact_match(gold, pred) for gold, pred in zip(golds, preds)]
|
||||||
|
|
||||||
|
|
||||||
def rouge_l(
|
def rouge_l(
|
||||||
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " "
|
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
|
||||||
) -> Dict[str, float]:
|
) -> Dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
|
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
|
||||||
@@ -139,7 +138,7 @@ def rouge_l(
|
|||||||
:return: {"p": precision, "r": recall, "f": F1}
|
:return: {"p": precision, "r": recall, "f": F1}
|
||||||
"""
|
"""
|
||||||
if len(pred) == 0 or len(gold) == 0:
|
if len(pred) == 0 or len(gold) == 0:
|
||||||
return {"p": 0.0, "r": 0.0, "f": 0.0}
|
return {'p': 0.0, 'r': 0.0, 'f': 0.0}
|
||||||
if isinstance(gold, list):
|
if isinstance(gold, list):
|
||||||
gold = sep.join(gold)
|
gold = sep.join(gold)
|
||||||
if isinstance(pred, list):
|
if isinstance(pred, list):
|
||||||
@@ -147,15 +146,15 @@ def rouge_l(
|
|||||||
try:
|
try:
|
||||||
rouge = Rouge()
|
rouge = Rouge()
|
||||||
scores = rouge.get_scores(hyps=pred, refs=gold, avg=True)
|
scores = rouge.get_scores(hyps=pred, refs=gold, avg=True)
|
||||||
return {x: scores["rouge-l"][x] * 100.0 for x in ["p", "r", "f"]}
|
return {x: scores['rouge-l'][x] * 100.0 for x in ['p', 'r', 'f']}
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return {"p": 0.0, "r": 0.0, "f": 0.0}
|
return {'p': 0.0, 'r': 0.0, 'f': 0.0}
|
||||||
|
|
||||||
|
|
||||||
def batch_rouge_l(
|
def batch_rouge_l(
|
||||||
golds: List[Union[str, List[str]]],
|
golds: List[Union[str, List[str]]],
|
||||||
preds: List[Union[str, List[str]]],
|
preds: List[Union[str, List[str]]],
|
||||||
sep: str = " ",
|
sep: str = ' ',
|
||||||
) -> Dict[str, List[float]]:
|
) -> Dict[str, List[float]]:
|
||||||
"""
|
"""
|
||||||
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
|
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
|
||||||
@@ -166,9 +165,9 @@ def batch_rouge_l(
|
|||||||
:return: list of {"p": precision, "r": recall, "f": F1}
|
:return: list of {"p": precision, "r": recall, "f": F1}
|
||||||
"""
|
"""
|
||||||
if len(golds) != len(preds):
|
if len(golds) != len(preds):
|
||||||
raise ValueError("golds and preds must have the same length")
|
raise ValueError('golds and preds must have the same length')
|
||||||
scores = [rouge_l(gold, pred, sep) for gold, pred in zip(golds, preds)]
|
scores = [rouge_l(gold, pred, sep) for gold, pred in zip(golds, preds)]
|
||||||
return {x: [score[x] for score in scores] for x in ["p", "r", "f"]}
|
return {x: [score[x] for score in scores] for x in ['p', 'r', 'f']}
|
||||||
|
|
||||||
|
|
||||||
def accuracy(
|
def accuracy(
|
||||||
@@ -220,7 +219,7 @@ def batch_accuracy(
|
|||||||
:return: list of accuracy
|
:return: list of accuracy
|
||||||
"""
|
"""
|
||||||
if len(golds) != len(preds):
|
if len(golds) != len(preds):
|
||||||
raise ValueError("golds and preds must have the same length")
|
raise ValueError('golds and preds must have the same length')
|
||||||
return [accuracy(gold, pred, ignore) for gold, pred in zip(golds, preds)]
|
return [accuracy(gold, pred, ignore) for gold, pred in zip(golds, preds)]
|
||||||
|
|
||||||
|
|
||||||
@@ -274,7 +273,7 @@ def self_bleu(samples: List[List[str]]) -> float:
|
|||||||
return np.mean(scores).item()
|
return np.mean(scores).item()
|
||||||
|
|
||||||
|
|
||||||
def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
|
def self_edit_distance(samples: List[Union[str, List[str]]], sep=' ') -> float:
|
||||||
"""
|
"""
|
||||||
Calculate self-edit-distance among the samples.
|
Calculate self-edit-distance among the samples.
|
||||||
:param samples: the chosen m samples
|
:param samples: the chosen m samples
|
||||||
@@ -300,12 +299,11 @@ def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
|
|||||||
return np.mean(scores).item()
|
return np.mean(scores).item()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = {
|
QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = {
|
||||||
"bleu": bleu,
|
'bleu': bleu,
|
||||||
"xmatch": exact_match,
|
'xmatch': exact_match,
|
||||||
"edit-sim": edit_sim,
|
'edit-sim': edit_sim,
|
||||||
"rouge-f": lambda g, p: rouge_l(g, p)["f"],
|
'rouge-f': lambda g, p: rouge_l(g, p)['f'],
|
||||||
"rouge-p": lambda g, p: rouge_l(g, p)["p"],
|
'rouge-p': lambda g, p: rouge_l(g, p)['p'],
|
||||||
"rouge-r": lambda g, p: rouge_l(g, p)["r"],
|
'rouge-r': lambda g, p: rouge_l(g, p)['r'],
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,30 +1,41 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from pygments.lexers.python import PythonLexer
|
from pygments.lexers.python import PythonLexer
|
||||||
|
|
||||||
|
|
||||||
def tokenize_code(code):
|
def tokenize_code(code):
|
||||||
lexer = PythonLexer()
|
lexer = PythonLexer()
|
||||||
tokens = process_pygments_tokens(lexer.get_tokens(code))
|
tokens = process_pygments_tokens(lexer.get_tokens(code))
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
def process_pygments_tokens(tokens):
|
def process_pygments_tokens(tokens):
|
||||||
new_tokens = []
|
new_tokens = []
|
||||||
|
|
||||||
for token in tokens:
|
for token in tokens:
|
||||||
if str(token[0]) == "Token.Text" and re.match(r'\s+', token[1]) or str(token[0]) == "Token.Text.Whitespace":
|
if (
|
||||||
|
str(token[0]) == 'Token.Text'
|
||||||
|
and re.match(r'\s+', token[1])
|
||||||
|
or str(token[0]) == 'Token.Text.Whitespace'
|
||||||
|
):
|
||||||
continue
|
continue
|
||||||
new_tokens.append(token[1])
|
new_tokens.append(token[1])
|
||||||
|
|
||||||
new_tokens_final = []
|
new_tokens_final = []
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(new_tokens)-2:
|
while i < len(new_tokens) - 2:
|
||||||
if new_tokens[i] == '"' and new_tokens[i+1]=='STR' and new_tokens[i+2] == '"':
|
if (
|
||||||
new_tokens_final.append("\"STR\"")
|
new_tokens[i] == '"'
|
||||||
|
and new_tokens[i + 1] == 'STR'
|
||||||
|
and new_tokens[i + 2] == '"'
|
||||||
|
):
|
||||||
|
new_tokens_final.append('"STR"')
|
||||||
i = i + 3
|
i = i + 3
|
||||||
else:
|
else:
|
||||||
new_tokens_final.append(new_tokens[i])
|
new_tokens_final.append(new_tokens[i])
|
||||||
i = i + 1
|
i = i + 1
|
||||||
|
|
||||||
for i in range(len(new_tokens)-2, len(new_tokens)):
|
for i in range(len(new_tokens) - 2, len(new_tokens)):
|
||||||
if i >= 0:
|
if i >= 0:
|
||||||
new_tokens_final.append(new_tokens[i])
|
new_tokens_final.append(new_tokens[i])
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import os
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from evaluation.testgeneval.eval_infer import process_test_suite
|
|
||||||
from openhands.events.serialization import event_from_dict
|
from openhands.events.serialization import event_from_dict
|
||||||
|
|
||||||
tqdm.pandas()
|
tqdm.pandas()
|
||||||
|
|||||||
@@ -20,7 +20,8 @@ print(
|
|||||||
f'Downloading gold test suites from {args.dataset_name} (split: {args.split}) to {output_filepath}'
|
f'Downloading gold test suites from {args.dataset_name} (split: {args.split}) to {output_filepath}'
|
||||||
)
|
)
|
||||||
test_suites = [
|
test_suites = [
|
||||||
{'instance_id': row['instance_id'], 'test_suite': row['test_src']} for row in dataset
|
{'instance_id': row['instance_id'], 'test_suite': row['test_src']}
|
||||||
|
for row in dataset
|
||||||
]
|
]
|
||||||
print(f'{len(test_suites)} test suites loaded')
|
print(f'{len(test_suites)} test suites loaded')
|
||||||
pd.DataFrame(test_suites).to_json(output_filepath, lines=True, orient='records')
|
pd.DataFrame(test_suites).to_json(output_filepath, lines=True, orient='records')
|
||||||
|
|||||||
@@ -90,9 +90,7 @@ if __name__ == '__main__':
|
|||||||
break
|
break
|
||||||
|
|
||||||
# print the error counter (with percentage)
|
# print the error counter (with percentage)
|
||||||
print(
|
print(f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)')
|
||||||
f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)'
|
|
||||||
)
|
|
||||||
print(
|
print(
|
||||||
f'Average mutation score for {num_lines} ({mutation_score / num_lines * 100:.2f}%)'
|
f'Average mutation score for {num_lines} ({mutation_score / num_lines * 100:.2f}%)'
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ describe("Actions Service", () => {
|
|||||||
// Mock implementation to capture the message
|
// Mock implementation to capture the message
|
||||||
let capturedPartialMessage = "";
|
let capturedPartialMessage = "";
|
||||||
(store.dispatch as any).mockImplementation((action: any) => {
|
(store.dispatch as any).mockImplementation((action: any) => {
|
||||||
if (action.type === "chat/addAssistantMessage" &&
|
if (action.type === "chat/addAssistantMessage" &&
|
||||||
action.payload.includes("believe that the task was **completed partially**")) {
|
action.payload.includes("believe that the task was **completed partially**")) {
|
||||||
capturedPartialMessage = action.payload;
|
capturedPartialMessage = action.payload;
|
||||||
}
|
}
|
||||||
@@ -87,7 +87,7 @@ describe("Actions Service", () => {
|
|||||||
|
|
||||||
handleActionMessage(messagePartial);
|
handleActionMessage(messagePartial);
|
||||||
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
|
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
|
||||||
|
|
||||||
// Test not completed
|
// Test not completed
|
||||||
const messageNotCompleted: ActionMessage = {
|
const messageNotCompleted: ActionMessage = {
|
||||||
id: 2,
|
id: 2,
|
||||||
@@ -106,7 +106,7 @@ describe("Actions Service", () => {
|
|||||||
// Mock implementation to capture the message
|
// Mock implementation to capture the message
|
||||||
let capturedNotCompletedMessage = "";
|
let capturedNotCompletedMessage = "";
|
||||||
(store.dispatch as any).mockImplementation((action: any) => {
|
(store.dispatch as any).mockImplementation((action: any) => {
|
||||||
if (action.type === "chat/addAssistantMessage" &&
|
if (action.type === "chat/addAssistantMessage" &&
|
||||||
action.payload.includes("believe that the task was **not completed**")) {
|
action.payload.includes("believe that the task was **not completed**")) {
|
||||||
capturedNotCompletedMessage = action.payload;
|
capturedNotCompletedMessage = action.payload;
|
||||||
}
|
}
|
||||||
@@ -114,7 +114,7 @@ describe("Actions Service", () => {
|
|||||||
|
|
||||||
handleActionMessage(messageNotCompleted);
|
handleActionMessage(messageNotCompleted);
|
||||||
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
|
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
|
||||||
|
|
||||||
// Test completed successfully
|
// Test completed successfully
|
||||||
const messageCompleted: ActionMessage = {
|
const messageCompleted: ActionMessage = {
|
||||||
id: 3,
|
id: 3,
|
||||||
@@ -133,7 +133,7 @@ describe("Actions Service", () => {
|
|||||||
// Mock implementation to capture the message
|
// Mock implementation to capture the message
|
||||||
let capturedCompletedMessage = "";
|
let capturedCompletedMessage = "";
|
||||||
(store.dispatch as any).mockImplementation((action: any) => {
|
(store.dispatch as any).mockImplementation((action: any) => {
|
||||||
if (action.type === "chat/addAssistantMessage" &&
|
if (action.type === "chat/addAssistantMessage" &&
|
||||||
action.payload.includes("believe that the task was **completed successfully**")) {
|
action.payload.includes("believe that the task was **completed successfully**")) {
|
||||||
capturedCompletedMessage = action.payload;
|
capturedCompletedMessage = action.payload;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -65,7 +65,9 @@ async def get_github_user(
|
|||||||
access_token: SecretStr | None = Depends(get_access_token),
|
access_token: SecretStr | None = Depends(get_access_token),
|
||||||
):
|
):
|
||||||
if provider_tokens:
|
if provider_tokens:
|
||||||
client = ProviderHandler(provider_tokens=provider_tokens, external_auth_token=access_token)
|
client = ProviderHandler(
|
||||||
|
provider_tokens=provider_tokens, external_auth_token=access_token
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
user: User = await client.get_user()
|
user: User = await client.get_user()
|
||||||
@@ -164,7 +166,7 @@ async def search_github_repositories(
|
|||||||
@app.get('/suggested-tasks', response_model=list[SuggestedTask])
|
@app.get('/suggested-tasks', response_model=list[SuggestedTask])
|
||||||
async def get_suggested_tasks(
|
async def get_suggested_tasks(
|
||||||
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
|
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
|
||||||
access_token: SecretStr | None = Depends(get_access_token)
|
access_token: SecretStr | None = Depends(get_access_token),
|
||||||
):
|
):
|
||||||
"""Get suggested tasks for the authenticated user across their most recently pushed repositories.
|
"""Get suggested tasks for the authenticated user across their most recently pushed repositories.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user