Files
OpenHands/tests/unit/resolver/test_pr_handler_guess_success.py
Graham Neubig a753babb7a Integrate OpenHands resolver into main repository (#4964)
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Rohit Malhotra <rohitvinodmalhotra@gmail.com>
2024-11-14 09:45:46 -05:00

461 lines
15 KiB
Python

import json
from unittest.mock import patch, MagicMock
from openhands.resolver.issue_definitions import PRHandler
from openhands.resolver.github_issue import GithubIssue, ReviewThread
from openhands.events.action.message import MessageAction
from openhands.core.config import LLMConfig
def test_guess_success_review_threads_litellm_call():
"""Test that the litellm.completion() call for review threads contains the expected content."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create a mock issue with review threads
issue = GithubIssue(
owner="test-owner",
repo="test-repo",
number=1,
title="Test PR",
body="Test Body",
thread_comments=None,
closing_issues=["Issue 1 description", "Issue 2 description"],
review_comments=None,
review_threads=[
ReviewThread(
comment="Please fix the formatting\n---\nlatest feedback:\nAdd docstrings",
files=["/src/file1.py", "/src/file2.py"],
),
ReviewThread(
comment="Add more tests\n---\nlatest feedback:\nAdd test cases",
files=["/tests/test_file.py"],
),
],
thread_ids=["1", "2"],
head_branch="test-branch",
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Fixed formatting in file1.py and file2.py
2. Added docstrings to all functions
3. Added test cases in test_file.py"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch("litellm.completion") as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(
issue, history, llm_config
)
# Verify the litellm.completion() calls
assert mock_completion.call_count == 2 # One call per review thread
# Check first call
first_call = mock_completion.call_args_list[0]
first_prompt = first_call[1]["messages"][0]["content"]
assert (
"Issue descriptions:\n"
+ json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
in first_prompt
)
assert (
"Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings"
in first_prompt
)
assert (
"Files locations:\n"
+ json.dumps(["/src/file1.py", "/src/file2.py"], indent=4)
in first_prompt
)
assert "Last message from AI agent:\n" + history[0].content in first_prompt
# Check second call
second_call = mock_completion.call_args_list[1]
second_prompt = second_call[1]["messages"][0]["content"]
assert (
"Issue descriptions:\n"
+ json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
in second_prompt
)
assert (
"Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases"
in second_prompt
)
assert (
"Files locations:\n" + json.dumps(["/tests/test_file.py"], indent=4)
in second_prompt
)
assert "Last message from AI agent:\n" + history[0].content in second_prompt
def test_guess_success_thread_comments_litellm_call():
"""Test that the litellm.completion() call for thread comments contains the expected content."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create a mock issue with thread comments
issue = GithubIssue(
owner="test-owner",
repo="test-repo",
number=1,
title="Test PR",
body="Test Body",
thread_comments=[
"Please improve error handling",
"Add input validation",
"latest feedback:\nHandle edge cases",
],
closing_issues=["Issue 1 description", "Issue 2 description"],
review_comments=None,
thread_ids=None,
head_branch="test-branch",
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Added try/catch blocks for error handling
2. Added input validation checks
3. Added handling for edge cases"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch("litellm.completion") as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(
issue, history, llm_config
)
# Verify the litellm.completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]["messages"][0]["content"]
# Check prompt content
assert (
"Issue descriptions:\n"
+ json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
in prompt
)
assert "PR Thread Comments:\n" + "\n---\n".join(issue.thread_comments) in prompt
assert "Last message from AI agent:\n" + history[0].content in prompt
def test_check_feedback_with_llm():
"""Test the _check_feedback_with_llm helper function."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create mock LLM config
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Test cases for different LLM responses
test_cases = [
{
"response": "--- success\ntrue\n--- explanation\nChanges look good",
"expected": (True, "Changes look good"),
},
{
"response": "--- success\nfalse\n--- explanation\nNot all issues fixed",
"expected": (False, "Not all issues fixed"),
},
{
"response": "Invalid response format",
"expected": (
False,
"Failed to decode answer from LLM response: Invalid response format",
),
},
{
"response": "--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere",
"expected": (True, "Multiline\nexplanation\nhere"),
},
]
for case in test_cases:
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [MagicMock(message=MagicMock(content=case["response"]))]
# Test the function
with patch("litellm.completion", return_value=mock_response):
success, explanation = handler._check_feedback_with_llm(
"test prompt", llm_config
)
assert (success, explanation) == case["expected"]
def test_check_review_thread():
"""Test the _check_review_thread helper function."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create test data
review_thread = ReviewThread(
comment="Please fix the formatting\n---\nlatest feedback:\nAdd docstrings",
files=["/src/file1.py", "/src/file2.py"],
)
issues_context = json.dumps(
["Issue 1 description", "Issue 2 description"], indent=4
)
last_message = "I have fixed the formatting and added docstrings"
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch("litellm.completion") as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_thread(
review_thread, issues_context, last_message, llm_config
)
# Verify the litellm.completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]["messages"][0]["content"]
# Check prompt content
assert "Issue descriptions:\n" + issues_context in prompt
assert "Feedback:\n" + review_thread.comment in prompt
assert (
"Files locations:\n" + json.dumps(review_thread.files, indent=4) in prompt
)
assert "Last message from AI agent:\n" + last_message in prompt
# Check result
assert success is True
assert explanation == "Changes look good"
def test_check_thread_comments():
"""Test the _check_thread_comments helper function."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create test data
thread_comments = [
"Please improve error handling",
"Add input validation",
"latest feedback:\nHandle edge cases",
]
issues_context = json.dumps(
["Issue 1 description", "Issue 2 description"], indent=4
)
last_message = "I have added error handling and input validation"
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch("litellm.completion") as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_thread_comments(
thread_comments, issues_context, last_message, llm_config
)
# Verify the litellm.completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]["messages"][0]["content"]
# Check prompt content
assert "Issue descriptions:\n" + issues_context in prompt
assert "PR Thread Comments:\n" + "\n---\n".join(thread_comments) in prompt
assert "Last message from AI agent:\n" + last_message in prompt
# Check result
assert success is True
assert explanation == "Changes look good"
def test_check_review_comments():
"""Test the _check_review_comments helper function."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create test data
review_comments = [
"Please improve code readability",
"Add comments to complex functions",
"Follow PEP 8 style guide",
]
issues_context = json.dumps(
["Issue 1 description", "Issue 2 description"], indent=4
)
last_message = "I have improved code readability and added comments"
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
Changes look good"""
)
)
]
# Test the function
with patch("litellm.completion") as mock_completion:
mock_completion.return_value = mock_response
success, explanation = handler._check_review_comments(
review_comments, issues_context, last_message, llm_config
)
# Verify the litellm.completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]["messages"][0]["content"]
# Check prompt content
assert "Issue descriptions:\n" + issues_context in prompt
assert "PR Review Comments:\n" + "\n---\n".join(review_comments) in prompt
assert "Last message from AI agent:\n" + last_message in prompt
# Check result
assert success is True
assert explanation == "Changes look good"
def test_guess_success_review_comments_litellm_call():
"""Test that the litellm.completion() call for review comments contains the expected content."""
# Create a PR handler instance
handler = PRHandler("test-owner", "test-repo", "test-token")
# Create a mock issue with review comments
issue = GithubIssue(
owner="test-owner",
repo="test-repo",
number=1,
title="Test PR",
body="Test Body",
thread_comments=None,
closing_issues=["Issue 1 description", "Issue 2 description"],
review_comments=[
"Please improve code readability",
"Add comments to complex functions",
"Follow PEP 8 style guide",
],
thread_ids=None,
head_branch="test-branch",
)
# Create mock history with a detailed response
history = [
MessageAction(
content="""I have made the following changes:
1. Improved code readability by breaking down complex functions
2. Added detailed comments to all complex functions
3. Fixed code style to follow PEP 8"""
)
]
# Create mock LLM config
llm_config = LLMConfig(model="test-model", api_key="test-key")
# Mock the LLM response
mock_response = MagicMock()
mock_response.choices = [
MagicMock(
message=MagicMock(
content="""--- success
true
--- explanation
The changes successfully address the feedback."""
)
)
]
# Test the guess_success method
with patch("litellm.completion") as mock_completion:
mock_completion.return_value = mock_response
success, success_list, explanation = handler.guess_success(
issue, history, llm_config
)
# Verify the litellm.completion() call
mock_completion.assert_called_once()
call_args = mock_completion.call_args
prompt = call_args[1]["messages"][0]["content"]
# Check prompt content
assert (
"Issue descriptions:\n"
+ json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
in prompt
)
assert "PR Review Comments:\n" + "\n---\n".join(issue.review_comments) in prompt
assert "Last message from AI agent:\n" + history[0].content in prompt