Compare commits

...

1 Commits

2 changed files with 96 additions and 3 deletions

View File

@@ -3,11 +3,17 @@ Given the following issue description and the last message from an AI agent atte
Issue description:
{{ issue_context }}
Last message from AI agent:
Last message from AI agent (including any patch content):
{{ last_message }}
(1) has the issue been successfully resolved?
(2) If the issue has been resolved, please provide an explanation of what was done in the PR that can be sent to a human reviewer on github. If the issue has not been resolved, please provide an explanation of why.
Please analyze:
1. Has the issue been successfully resolved? Look for concrete evidence such as:
- Patch content showing actual code changes
- Clear description of what was fixed
- Specific files that were modified
2. If patch content is present, carefully examine the changes to verify they address the issue
3. If the issue has been resolved, provide an explanation of what was done in the PR that can be sent to a human reviewer on github
4. If the issue has not been resolved, explain why
Answer in exactly the format below, with only true or false for success, and an explanation of the result.

View File

@@ -0,0 +1,87 @@
import os
import pytest
from openhands.resolver.issue_definitions import IssueHandler
from openhands.resolver.github_issue import GithubIssue
from openhands.events.event import Event
from openhands.core.config import LLMConfig
@pytest.fixture
def issue_handler():
return IssueHandler(
owner="test-owner",
repo="test-repo",
token="test-token",
llm_config=LLMConfig(model="gpt-4", api_key="test-key")
)
def test_guess_success_with_patch_content(issue_handler, mocker):
# Mock the issue
issue = GithubIssue(
owner="test-owner",
repo="test-repo",
number=1,
title="Test Issue",
body="Fix the bug in the code",
thread_comments=None,
review_comments=None
)
# Mock the history with patch content
event = Event()
event._message = "All done! I've fixed the issue by making the following changes:\n\nPatch content:\n```diff\n--- a/src/file.py\n+++ b/src/file.py\n@@ -10,7 +10,7 @@\n- buggy_code()\n+ fixed_code()\n```"
history = [event]
# Mock LLM response
mock_response = mocker.MagicMock()
mock_response.choices = [
mocker.MagicMock(
message=mocker.MagicMock(
content="""--- success
true
--- explanation
The issue has been resolved. The patch shows that the buggy code was replaced with fixed code."""
)
)
]
mocker.patch.object(issue_handler.llm, '_completion', return_value=mock_response)
# Test the function
success, _, explanation = issue_handler.guess_success(issue, history)
assert success is True
assert "patch shows" in explanation.lower()
def test_guess_success_without_patch_content(issue_handler, mocker):
# Mock the issue
issue = GithubIssue(
owner="test-owner",
repo="test-repo",
number=1,
title="Test Issue",
body="Fix the bug in the code",
thread_comments=None,
review_comments=None
)
# Mock the history without patch content
event = Event()
event._message = "All done!"
history = [event]
# Mock LLM response
mock_response = mocker.MagicMock()
mock_response.choices = [
mocker.MagicMock(
message=mocker.MagicMock(
content="""--- success
false
--- explanation
Cannot verify the resolution as no patch content is provided."""
)
)
]
mocker.patch.object(issue_handler.llm, '_completion', return_value=mock_response)
# Test the function
success, _, explanation = issue_handler.guess_success(issue, history)
assert success is False
assert "no patch content" in explanation.lower()