fix: Handle empty lines in patch parser (#6208)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Graham Neubig
2025-01-12 06:43:08 +09:00
committed by GitHub
parent f31ccad48b
commit 40c52feb5b
2 changed files with 74 additions and 26 deletions

View File

@@ -24,7 +24,7 @@ unified_header_index = re.compile('^Index: (.+)$')
unified_header_old_line = re.compile(r'^--- ' + file_timestamp_str + '$')
unified_header_new_line = re.compile(r'^\+\+\+ ' + file_timestamp_str + '$')
unified_hunk_start = re.compile(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@(.*)$')
unified_change = re.compile('^([-+ ])(.*)$')
unified_change = re.compile('^([-+ ])(.*)$', re.MULTILINE)
context_header_old_line = re.compile(r'^\*\*\* ' + file_timestamp_str + '$')
context_header_new_line = re.compile('^--- ' + file_timestamp_str + '$')
@@ -606,38 +606,39 @@ def parse_unified_diff(text):
h = unified_hunk_start.match(hunk[0])
del hunk[0]
if h:
old = int(h.group(1))
if len(h.group(2)) > 0:
old_len = int(h.group(2))
else:
old_len = 0
# The hunk header @@ -1,6 +1,6 @@ means:
# - Start at line 1 in the old file and show 6 lines
# - Start at line 1 in the new file and show 6 lines
old = int(h.group(1)) # Starting line in old file
old_len = int(h.group(2)) if len(h.group(2)) > 0 else 1 # Number of lines in old file
new = int(h.group(3))
if len(h.group(4)) > 0:
new_len = int(h.group(4))
else:
new_len = 0
new = int(h.group(3)) # Starting line in new file
new_len = int(h.group(4)) if len(h.group(4)) > 0 else 1 # Number of lines in new file
h = None
break
# Process each line in the hunk
for n in hunk:
c = unified_change.match(n)
if c:
kind = c.group(1)
line = c.group(2)
# Each line in a unified diff starts with a space (context), + (addition), or - (deletion)
# The first character is the kind, the rest is the line content
kind = n[0] if len(n) > 0 else ' ' # Empty lines in the hunk are treated as context lines
line = n[1:] if len(n) > 1 else ''
if kind == '-' and (r != old_len or r == 0):
changes.append(Change(old + r, None, line, hunk_n))
r += 1
elif kind == '+' and (i != new_len or i == 0):
changes.append(Change(None, new + i, line, hunk_n))
i += 1
elif kind == ' ':
if r != old_len and i != new_len:
changes.append(Change(old + r, new + i, line, hunk_n))
r += 1
i += 1
# Process the line based on its kind
if kind == '-' and (r != old_len or r == 0):
# Line was removed from the old file
changes.append(Change(old + r, None, line, hunk_n))
r += 1
elif kind == '+' and (i != new_len or i == 0):
# Line was added in the new file
changes.append(Change(None, new + i, line, hunk_n))
i += 1
elif kind == ' ':
# Context line - exists in both old and new file
changes.append(Change(old + r, new + i, line, hunk_n))
r += 1
i += 1
if len(changes) > 0:
return changes