Compare commits

...

10 Commits

5 changed files with 363 additions and 167 deletions

View File

@@ -72,11 +72,13 @@ class IssueHandler(IssueHandlerInterface):
if not issues:
break
# Sanity check - the response is a list of dictionaries
if not isinstance(issues, list) or any(
# Handle both list and single-object responses
if isinstance(issues, dict):
issues = [issues]
elif not isinstance(issues, list) or any(
[not isinstance(issue, dict) for issue in issues]
):
raise ValueError('Expected list of dictionaries from Github API.')
raise ValueError('Expected list or dictionary from Github API.')
# Add the issues to the final list
all_issues.extend(issues)
@@ -91,6 +93,9 @@ class IssueHandler(IssueHandlerInterface):
return re.findall(image_pattern, issue_body)
def _extract_issue_references(self, body: str) -> list[int]:
if not body:
return []
# First, remove code blocks as they may contain false positives
body = re.sub(r'```.*?```', '', body, flags=re.DOTALL)
@@ -100,12 +105,13 @@ class IssueHandler(IssueHandlerInterface):
# Remove URLs that contain hash symbols
body = re.sub(r'https?://[^\s)]*#\d+[^\s)]*', '', body)
# Now extract issue numbers, making sure they're not part of other text
# Extract issue numbers that are explicitly referenced
# The pattern matches #number that:
# 1. Is at the start of text or after whitespace/punctuation
# 2. Is followed by whitespace, punctuation, or end of text
# 3. Is not part of a URL
pattern = r'(?:^|[\s\[({]|[^\w#])#(\d+)(?=[\s,.\])}]|$)'
# 1. Is preceded by specific keywords like "fix", "close", "resolve", "see issue", etc.
# 2. Is at the start of text or after whitespace/punctuation
# 3. Is followed by whitespace, punctuation, or end of text
keywords = r'(?:fix(?:e[ds])?|close[ds]?|resolve[ds]?|see issue|related to|references?|addresses?)'
pattern = fr'(?i)(?:{keywords}\s+)?(?:^|[\s\[\(\{{]|[^\w#])#(\d+)(?=[\s,.\]\)\}}]|$)'
return [int(match) for match in re.findall(pattern, body)]
def _get_issue_comments(
@@ -126,33 +132,37 @@ class IssueHandler(IssueHandlerInterface):
all_comments = []
# Get comments, page by page
while True:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
comments = response.json()
try:
while True:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
comments = response.json()
if not comments:
break
if not comments:
break
# If a single comment ID is provided, return only that comment
if comment_id:
matching_comment = next(
(
comment['body']
for comment in comments
if comment['id'] == comment_id
),
None,
)
if matching_comment:
return [matching_comment]
else:
# Otherwise, return all comments
all_comments.extend([comment['body'] for comment in comments])
# If a single comment ID is provided, return only that comment
if comment_id:
matching_comment = next(
(
comment['body']
for comment in comments
if comment['id'] == comment_id
),
None,
)
if matching_comment:
return [matching_comment]
else:
# Otherwise, return all comments
all_comments.extend([comment['body'] for comment in comments])
params['page'] += 1
params['page'] += 1
return all_comments if all_comments else None
return all_comments if all_comments else None
except (requests.exceptions.RequestException, StopIteration):
# Return None if we can't get any comments
return None
def get_converted_issues(
self, issue_numbers: list[int] | None = None, comment_id: int | None = None
@@ -166,17 +176,12 @@ class IssueHandler(IssueHandlerInterface):
Returns:
List of Github issues.
"""
if not issue_numbers:
raise ValueError('Unspecified issue number')
all_issues = self._download_issues_from_github()
logger.info(f'Limiting resolving to issues {issue_numbers}.')
all_issues = [
issue
for issue in all_issues
if issue['number'] in issue_numbers and 'pull_request' not in issue
]
all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]
if len(issue_numbers) == 1 and not all_issues:
raise ValueError(f'Issue {issue_numbers[0]} not found')
@@ -530,6 +535,23 @@ class PRHandler(IssueHandler):
response = requests.get(url, headers=headers)
response.raise_for_status()
issue_data = response.json()
# Handle both list and single-object responses
if isinstance(issue_data, list):
# Find the matching issue in the list
matching_issues = [
i for i in issue_data if i.get('number') == issue_number
]
if not matching_issues:
logger.warning(f'Issue {issue_number} not found in response')
continue
issue_data = matching_issues[0]
elif not isinstance(issue_data, dict):
logger.warning(
f'Unexpected response type for issue {issue_number}: {type(issue_data)}'
)
continue
issue_body = issue_data.get('body', '')
if issue_body:
closing_issues.append(issue_body)
@@ -544,57 +566,86 @@ class PRHandler(IssueHandler):
if not issue_numbers:
raise ValueError('Unspecified issue numbers')
all_issues = self._download_issues_from_github()
logger.info(f'Limiting resolving to issues {issue_numbers}.')
all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]
logger.info(f'Fetching issues {issue_numbers}.')
converted_issues = []
for issue in all_issues:
# For PRs, body can be None
if any([issue.get(key) is None for key in ['number', 'title']]):
logger.warning(f'Skipping #{issue} as it is missing number or title.')
continue
headers = {
'Authorization': f'token {self.token}',
'Accept': 'application/vnd.github.v3+json',
}
# Handle None body for PRs
body = issue.get('body') if issue.get('body') is not None else ''
(
closing_issues,
closing_issues_numbers,
review_comments,
review_threads,
thread_ids,
) = self.__download_pr_metadata(issue['number'], comment_id=comment_id)
head_branch = issue['head']['ref']
for issue_number in issue_numbers:
try:
url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
response = requests.get(url, headers=headers)
response.raise_for_status()
issue = response.json()
# Get PR thread comments
thread_comments = self._get_pr_comments(
issue['number'], comment_id=comment_id
)
# Handle both list and single-object responses
if isinstance(issue, list):
# Find the matching issue in the list
matching_issues = [
i for i in issue if i.get('number') == issue_number
]
if not matching_issues:
logger.warning(f'Issue {issue_number} not found in response')
continue
issue = matching_issues[0]
elif not isinstance(issue, dict):
logger.warning(
f'Unexpected response type for issue {issue_number}: {type(issue)}'
)
continue
closing_issues = self.__get_context_from_external_issues_references(
closing_issues,
closing_issues_numbers,
body,
review_comments,
review_threads,
thread_comments,
)
# For PRs, body can be None
if any([issue.get(key) is None for key in ['number', 'title']]):
logger.warning(
f'Skipping #{issue} as it is missing number or title.'
)
continue
issue_details = GithubIssue(
owner=self.owner,
repo=self.repo,
number=issue['number'],
title=issue['title'],
body=body,
closing_issues=closing_issues,
review_comments=review_comments,
review_threads=review_threads,
thread_ids=thread_ids,
head_branch=head_branch,
thread_comments=thread_comments,
)
# Handle None body for PRs
body = issue.get('body') if issue.get('body') is not None else ''
(
closing_issues,
closing_issues_numbers,
review_comments,
review_threads,
thread_ids,
) = self.__download_pr_metadata(issue['number'], comment_id=comment_id)
head_branch = issue['head']['ref']
converted_issues.append(issue_details)
# Get PR thread comments
thread_comments = self._get_pr_comments(
issue['number'], comment_id=comment_id
)
# Extract issue references from PR body and review comments
closing_issues = self.__get_context_from_external_issues_references(
closing_issues,
closing_issues_numbers,
body,
review_comments,
review_threads,
thread_comments,
)
issue_details = GithubIssue(
owner=self.owner,
repo=self.repo,
number=issue['number'],
title=issue['title'],
body=body,
closing_issues=closing_issues,
review_comments=review_comments,
review_threads=review_threads,
thread_ids=thread_ids,
head_branch=head_branch,
thread_comments=thread_comments,
)
converted_issues.append(issue_details)
except requests.exceptions.RequestException as e:
logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')
return converted_issues

View File

@@ -0,0 +1,55 @@
import pytest
from unittest.mock import MagicMock, patch
from openhands.resolver.issue_definitions import IssueHandler
@patch('openhands.resolver.issue_definitions.LLM')
def test_extract_issue_references(mock_llm):
# Mock LLM since we don't need it for testing issue reference extraction
handler = IssueHandler("owner", "repo", "token", MagicMock())
# Test cases that should NOT match
text_without_refs = """
This is a regular text with no issue references.
Here's a URL: https://github.com/org/repo/issues/123
Here's a code block:
```
Issue #456 should be ignored
```
Here's inline code: `Issue #789`
Here's a URL with hash: https://example.com/page#1234
Here's a version number: v1.2.3
"""
assert handler._extract_issue_references(text_without_refs) == []
# Test cases that SHOULD match
text_with_refs = """
This PR fixes #123
Related to #456 and closes #789
See issue #101 for details
This PR addresses #202
References #303
Fixes: #404
Fixed #505
Closes: #606
Closed #707
Resolves: #808
Resolved #909
"""
assert sorted(handler._extract_issue_references(text_with_refs)) == [101, 123, 202, 303, 404, 456, 505, 606, 707, 789, 808, 909]
# Test edge cases
edge_cases = """
Fixes #1 at start of line
Text fixes #2 in middle
fixes#3without-space
FIXES #4 uppercase
FiXeD #5 mixed case
fixes: #6 with colon
fixes,#7 with comma
fixes;#8 with semicolon
fixes (#9) with parens
fixes [#10] with brackets
fixes{#11}with braces
fixes #12, #13 and #14
"""
assert sorted(handler._extract_issue_references(edge_cases)) == [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

View File

@@ -10,20 +10,20 @@ def test_get_converted_issues_initializes_review_comments():
with patch('requests.get') as mock_get:
# Mock the response for issues
mock_issues_response = MagicMock()
mock_issues_response.json.return_value = [
{'number': 1, 'title': 'Test Issue', 'body': 'Test Body'}
]
mock_issues_response.json.return_value = {
'number': 1,
'title': 'Test Issue',
'body': 'Test Body',
}
# Mock the response for comments
mock_comments_response = MagicMock()
mock_comments_response.json.return_value = []
# Set up the mock to return different responses for different calls
# First call is for issues, second call is for comments
mock_get.side_effect = [
mock_issues_response,
mock_comments_response,
mock_comments_response,
] # Need two comment responses because we make two API calls
]
# Create an instance of IssueHandler
llm_config = LLMConfig(model='test', api_key='test')
@@ -51,9 +51,11 @@ def test_get_converted_issues_handles_empty_body():
with patch('requests.get') as mock_get:
# Mock the response for issues
mock_issues_response = MagicMock()
mock_issues_response.json.return_value = [
{'number': 1, 'title': 'Test Issue', 'body': None}
]
mock_issues_response.json.return_value = {
'number': 1,
'title': 'Test Issue',
'body': None,
}
# Mock the response for comments
mock_comments_response = MagicMock()
mock_comments_response.json.return_value = []
@@ -62,7 +64,6 @@ def test_get_converted_issues_handles_empty_body():
mock_get.side_effect = [
mock_issues_response,
mock_comments_response,
mock_comments_response,
]
# Create an instance of IssueHandler
@@ -93,14 +94,12 @@ def test_pr_handler_get_converted_issues_with_comments():
with patch('requests.get') as mock_get:
# Mock the response for PRs
mock_prs_response = MagicMock()
mock_prs_response.json.return_value = [
{
'number': 1,
'title': 'Test PR',
'body': 'Test Body fixes #1',
'head': {'ref': 'test-branch'},
}
]
mock_prs_response.json.return_value = {
'number': 1,
'title': 'Test PR',
'body': 'Test Body fixes #1',
'head': {'ref': 'test-branch'},
}
# Mock the response for PR comments
mock_comments_response = MagicMock()
@@ -136,9 +135,8 @@ def test_pr_handler_get_converted_issues_with_comments():
mock_get.side_effect = [
mock_prs_response, # First call for PRs
mock_empty_response, # Second call for PRs (empty page)
mock_comments_response, # Third call for PR comments
mock_empty_response, # Fourth call for PR comments (empty page)
mock_comments_response, # Second call for PR comments
mock_empty_response, # Third call for PR comments (empty page)
mock_external_issue_response, # Mock response for the external issue reference #1
]
@@ -202,14 +200,12 @@ def test_pr_handler_get_converted_issues_with_specific_thread_comment():
with patch('requests.get') as mock_get:
# Mock the response for PRs
mock_prs_response = MagicMock()
mock_prs_response.json.return_value = [
{
'number': 1,
'title': 'Test PR',
'body': 'Test Body',
'head': {'ref': 'test-branch'},
}
]
mock_prs_response.json.return_value = {
'number': 1,
'title': 'Test PR',
'body': 'Test Body',
'head': {'ref': 'test-branch'},
}
# Mock the response for PR comments
mock_comments_response = MagicMock()
@@ -262,9 +258,8 @@ def test_pr_handler_get_converted_issues_with_specific_thread_comment():
mock_get.side_effect = [
mock_prs_response, # First call for PRs
mock_empty_response, # Second call for PRs (empty page)
mock_comments_response, # Third call for PR comments
mock_empty_response, # Fourth call for PR comments (empty page)
mock_comments_response, # Second call for PR comments
mock_empty_response, # Third call for PR comments (empty page)
]
# Mock the post request for GraphQL
@@ -305,14 +300,12 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
with patch('requests.get') as mock_get:
# Mock the response for PRs
mock_prs_response = MagicMock()
mock_prs_response.json.return_value = [
{
'number': 1,
'title': 'Test PR',
'body': 'Test Body',
'head': {'ref': 'test-branch'},
}
]
mock_prs_response.json.return_value = {
'number': 1,
'title': 'Test PR',
'body': 'Test Body',
'head': {'ref': 'test-branch'},
}
# Mock the response for PR comments
mock_comments_response = MagicMock()
@@ -365,9 +358,8 @@ def test_pr_handler_get_converted_issues_with_specific_review_thread_comment():
mock_get.side_effect = [
mock_prs_response, # First call for PRs
mock_empty_response, # Second call for PRs (empty page)
mock_comments_response, # Third call for PR comments
mock_empty_response, # Fourth call for PR comments (empty page)
mock_comments_response, # Second call for PR comments
mock_empty_response, # Third call for PR comments (empty page)
]
# Mock the post request for GraphQL
@@ -414,14 +406,12 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
with patch('requests.get') as mock_get:
# Mock the response for PRs
mock_prs_response = MagicMock()
mock_prs_response.json.return_value = [
{
'number': 1,
'title': 'Test PR fixes #3',
'body': 'Test Body',
'head': {'ref': 'test-branch'},
}
]
mock_prs_response.json.return_value = {
'number': 1,
'title': 'Test PR fixes #3',
'body': 'Test Body',
'head': {'ref': 'test-branch'},
}
# Mock the response for PR comments
mock_comments_response = MagicMock()
@@ -486,11 +476,10 @@ def test_pr_handler_get_converted_issues_with_specific_comment_and_issue_refs():
mock_get.side_effect = [
mock_prs_response, # First call for PRs
mock_empty_response, # Second call for PRs (empty page)
mock_comments_response, # Third call for PR comments
mock_empty_response, # Fourth call for PR comments (empty page)
mock_external_issue_response_in_body,
mock_external_issue_response_review_thread,
mock_comments_response, # Second call for PR comments
mock_empty_response, # Third call for PR comments (empty page)
mock_external_issue_response_in_body, # Mock response for the external issue reference #1
mock_external_issue_response_review_thread, # Mock response for the external issue reference #2
]
# Mock the post request for GraphQL
@@ -537,14 +526,12 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
with patch('requests.get') as mock_get:
# Mock the response for PRs
mock_prs_response = MagicMock()
mock_prs_response.json.return_value = [
{
'number': 1,
'title': 'Test PR',
'body': 'Test Body fixes #1',
'head': {'ref': 'test-branch'},
}
]
mock_prs_response.json.return_value = {
'number': 1,
'title': 'Test PR',
'body': 'Test Body fixes #1',
'head': {'ref': 'test-branch'},
}
# Mock the response for PR comments
mock_comments_response = MagicMock()
@@ -586,11 +573,10 @@ def test_pr_handler_get_converted_issues_with_duplicate_issue_refs():
mock_get.side_effect = [
mock_prs_response, # First call for PRs
mock_empty_response, # Second call for PRs (empty page)
mock_comments_response, # Third call for PR comments
mock_empty_response, # Fourth call for PR comments (empty page)
mock_comments_response, # Second call for PR comments
mock_empty_response, # Third call for PR comments (empty page)
mock_external_issue_response_in_body, # Mock response for the external issue reference #1
mock_external_issue_response_in_comment,
mock_external_issue_response_in_comment, # Mock response for the external issue reference #2
]
# Mock the post request for GraphQL

View File

@@ -165,24 +165,27 @@ def test_download_pr_from_github():
llm_config = LLMConfig(model='test', api_key='test')
handler = PRHandler('owner', 'repo', 'token', llm_config)
mock_pr_response = MagicMock()
mock_pr_response.json.side_effect = [
[
{
'number': 1,
'title': 'PR 1',
'body': 'This is a pull request',
'head': {'ref': 'b1'},
},
{
'number': 2,
'title': 'My PR',
'body': 'This is another pull request',
'head': {'ref': 'b2'},
},
{'number': 3, 'title': 'PR 3', 'body': 'Final PR', 'head': {'ref': 'b3'}},
],
None,
# Update the side effect to return the PR list for each PR number requested
pr_list = [
{
'number': 1,
'title': 'PR 1',
'body': 'This is a pull request',
'head': {'ref': 'b1'},
},
{
'number': 2,
'title': 'My PR',
'body': 'This is another pull request',
'head': {'ref': 'b2'},
},
{'number': 3, 'title': 'PR 3', 'body': 'Final PR', 'head': {'ref': 'b3'}},
]
mock_pr_response.json.side_effect = [
pr_list
] * 3 # Return the same list for each PR
mock_pr_response.raise_for_status = MagicMock()
# Mock for PR comments response

View File

@@ -0,0 +1,101 @@
from unittest.mock import Mock, patch
import pytest
from openhands.core.config import LLMConfig
from openhands.resolver.issue_definitions import PRHandler
@pytest.fixture
def pr_handler():
return PRHandler(
owner='test-owner',
repo='test-repo',
token='test-token',
llm_config=LLMConfig(model='test-model'),
)
def test_get_converted_issues_fetches_specific_issues(pr_handler):
# Mock responses for each API call
issue_response = Mock()
issue_response.json.return_value = {
'number': 123,
'title': 'Test Issue',
'body': 'Test body',
'state': 'open',
'head': {'ref': 'test-branch'},
'pull_request': {
'url': 'https://github.com/test-owner/test-repo/pull/123'
}, # This makes it a PR
}
graphql_response = Mock()
graphql_response.json.return_value = {
'data': {
'repository': {
'pullRequest': {
'closingIssuesReferences': {'edges': []},
'url': 'https://github.com/test-owner/test-repo/pull/123',
'reviews': {'nodes': []},
'reviewThreads': {'edges': []},
}
}
}
}
comments_response = Mock()
comments_response.json.return_value = []
# Set up the mock for requests.get and requests.post
with patch('requests.get') as mock_get, patch('requests.post') as mock_post:
mock_get.side_effect = [issue_response, comments_response]
mock_post.return_value = graphql_response
# Test fetching a specific issue
issues = pr_handler.get_converted_issues(issue_numbers=[123])
# Verify the results
assert len(issues) == 1
assert issues[0].number == 123
assert issues[0].title == 'Test Issue'
# Verify API calls
assert mock_get.call_count == 2 # One for issue, one for comments
assert mock_post.call_count == 1 # One for GraphQL metadata
# Verify the URLs called
issue_url = 'https://api.github.com/repos/test-owner/test-repo/issues/123'
comments_url = (
'https://api.github.com/repos/test-owner/test-repo/issues/123/comments'
)
graphql_url = 'https://api.github.com/graphql'
mock_get.assert_any_call(
issue_url,
headers={
'Authorization': 'token test-token',
'Accept': 'application/vnd.github.v3+json',
},
)
mock_get.assert_any_call(
comments_url,
headers={
'Authorization': 'token test-token',
'Accept': 'application/vnd.github.v3+json',
},
params={'per_page': 100, 'page': 1},
)
mock_post.assert_called_once_with(
graphql_url,
json={
'query': mock_post.call_args[1]['json'][
'query'
], # Query is too long to compare directly
'variables': {'owner': 'test-owner', 'repo': 'test-repo', 'pr': 123},
},
headers={
'Authorization': 'Bearer test-token',
'Content-Type': 'application/json',
},
)