Fix issue #5059: [Bug]: Github resolver looking for wrong PR number (#5062)

Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
OpenHands
2024-11-15 19:41:48 -05:00
committed by GitHub
parent a679fcc3b5
commit 7074e45ec3
3 changed files with 157 additions and 12 deletions

View File

@@ -83,7 +83,21 @@ class IssueHandler(IssueHandlerInterface):
return re.findall(image_pattern, issue_body)
def _extract_issue_references(self, body: str) -> list[int]:
pattern = r'#(\d+)'
# First, remove code blocks as they may contain false positives
body = re.sub(r'```.*?```', '', body, flags=re.DOTALL)
# Remove inline code
body = re.sub(r'`[^`]*`', '', body)
# Remove URLs that contain hash symbols
body = re.sub(r'https?://[^\s)]*#\d+[^\s)]*', '', body)
# Now extract issue numbers, making sure they're not part of other text
# The pattern matches #number that:
# 1. Is at the start of text or after whitespace/punctuation
# 2. Is followed by whitespace, punctuation, or end of text
# 3. Is not part of a URL
pattern = r'(?:^|[\s\[({]|[^\w#])#(\d+)(?=[\s,.\])}]|$)'
return [int(match) for match in re.findall(pattern, body)]
def _get_issue_comments(
@@ -455,17 +469,20 @@ class PRHandler(IssueHandler):
)
for issue_number in unique_issue_references:
url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
headers = {
'Authorization': f'Bearer {self.token}',
'Accept': 'application/vnd.github.v3+json',
}
response = requests.get(url, headers=headers)
response.raise_for_status()
issue_data = response.json()
issue_body = issue_data.get('body', '')
if issue_body:
closing_issues.append(issue_body)
try:
url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
headers = {
'Authorization': f'Bearer {self.token}',
'Accept': 'application/vnd.github.v3+json',
}
response = requests.get(url, headers=headers)
response.raise_for_status()
issue_data = response.json()
issue_body = issue_data.get('body', '')
if issue_body:
closing_issues.append(issue_body)
except requests.exceptions.RequestException as e:
logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')
return closing_issues

View File

@@ -0,0 +1,94 @@
import pytest
import requests
from unittest.mock import patch, MagicMock
from openhands.resolver.issue_definitions import PRHandler
from openhands.resolver.github_issue import ReviewThread
def test_handle_nonexistent_issue_reference():
handler = PRHandler("test-owner", "test-repo", "test-token")
# Mock the requests.get to simulate a 404 error
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError("404 Client Error: Not Found")
with patch('requests.get', return_value=mock_response):
# Call the method with a non-existent issue reference
result = handler._PRHandler__get_context_from_external_issues_references(
closing_issues=[],
closing_issue_numbers=[],
issue_body="This references #999999", # Non-existent issue
review_comments=[],
review_threads=[],
thread_comments=None
)
# The method should return an empty list since the referenced issue couldn't be fetched
assert result == []
def test_handle_rate_limit_error():
handler = PRHandler("test-owner", "test-repo", "test-token")
# Mock the requests.get to simulate a rate limit error
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
"403 Client Error: Rate Limit Exceeded"
)
with patch('requests.get', return_value=mock_response):
# Call the method with an issue reference
result = handler._PRHandler__get_context_from_external_issues_references(
closing_issues=[],
closing_issue_numbers=[],
issue_body="This references #123",
review_comments=[],
review_threads=[],
thread_comments=None
)
# The method should return an empty list since the request was rate limited
assert result == []
def test_handle_network_error():
handler = PRHandler("test-owner", "test-repo", "test-token")
# Mock the requests.get to simulate a network error
with patch('requests.get', side_effect=requests.exceptions.ConnectionError("Network Error")):
# Call the method with an issue reference
result = handler._PRHandler__get_context_from_external_issues_references(
closing_issues=[],
closing_issue_numbers=[],
issue_body="This references #123",
review_comments=[],
review_threads=[],
thread_comments=None
)
# The method should return an empty list since the network request failed
assert result == []
def test_successful_issue_reference():
handler = PRHandler("test-owner", "test-repo", "test-token")
# Mock a successful response
mock_response = MagicMock()
mock_response.raise_for_status.return_value = None
mock_response.json.return_value = {"body": "This is the referenced issue body"}
with patch('requests.get', return_value=mock_response):
# Call the method with an issue reference
result = handler._PRHandler__get_context_from_external_issues_references(
closing_issues=[],
closing_issue_numbers=[],
issue_body="This references #123",
review_comments=[],
review_threads=[],
thread_comments=None
)
# The method should return a list with the referenced issue body
assert result == ["This is the referenced issue body"]

View File

@@ -0,0 +1,34 @@
from openhands.resolver.issue_definitions import IssueHandler
def test_extract_issue_references():
handler = IssueHandler("test-owner", "test-repo", "test-token")
# Test basic issue reference
assert handler._extract_issue_references("Fixes #123") == [123]
# Test multiple issue references
assert handler._extract_issue_references("Fixes #123, #456") == [123, 456]
# Test issue references in code blocks should be ignored
assert handler._extract_issue_references("""
Here's a code block:
```python
# This is a comment with #123
def func():
pass # Another #456
```
But this #789 should be extracted
""") == [789]
# Test issue references in inline code should be ignored
assert handler._extract_issue_references("This `#123` should be ignored but #456 should be extracted") == [456]
# Test issue references in URLs should be ignored
assert handler._extract_issue_references("Check http://example.com/#123 but #456 should be extracted") == [456]
# Test issue references in markdown links should be extracted
assert handler._extract_issue_references("[Link to #123](http://example.com) and #456") == [123, 456]
# Test issue references with text around them
assert handler._extract_issue_references("Issue #123 is fixed and #456 is pending") == [123, 456]