Files
gpt-pilot/pilot/helpers/agents/test_Developer.py
Senko Rasic bed86fa755 fix tests
2023-11-03 21:55:54 +01:00

258 lines
11 KiB
Python

import builtins
import json
import os
import pytest
from unittest.mock import patch, MagicMock
import requests
from helpers.AgentConvo import AgentConvo
from dotenv import load_dotenv
load_dotenv()
from main import get_custom_print
from .Developer import Developer, ENVIRONMENT_SETUP_STEP
from test.mock_questionary import MockQuestionary
from helpers.test_Project import create_project
class TestDeveloper:
def setup_method(self):
builtins.print, ipc_client_instance = get_custom_print({})
name = 'TestDeveloper'
self.project = create_project()
self.project.app_id = 'test-developer'
self.project.name = name
self.project.set_root_path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
'../../../workspace/TestDeveloper')))
self.project.technologies = []
self.project.current_step = ENVIRONMENT_SETUP_STEP
self.developer = Developer(self.project)
@pytest.mark.uses_tokens
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
@patch('helpers.AgentConvo.create_gpt_chat_completion',
return_value={'text': '{"command": "python --version", "timeout": 10}'})
@patch('helpers.cli.execute_command', return_value=('', 'DONE', None))
def test_install_technology(self, mock_execute_command,
mock_completion, mock_save, mock_get_saved_step):
# Given
self.developer.convo_os_specific_tech = AgentConvo(self.developer)
# When
llm_response = self.developer.install_technology('python')
# Then
assert llm_response == 'DONE'
mock_execute_command.assert_called_once_with(self.project, 'python --version', timeout=10, command_id=None)
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
@patch('helpers.AgentConvo.create_gpt_chat_completion',
return_value={'text': '{"tasks": [{"command": "ls -al"}]}'})
def test_implement_task(self, mock_completion, mock_save, mock_get_saved_step):
# Given any project
project = create_project()
project.project_description = 'Test Project'
project.development_plan = [{
'description': 'Do stuff',
'user_review_goal': 'Do stuff',
}]
project.get_all_coded_files = lambda: []
project.current_step = 'test'
# and a developer who will execute any task
developer = Developer(project)
developer.execute_task = MagicMock()
developer.execute_task.return_value = {'success': True}
# When
developer.implement_task(0, {'description': 'Do stuff'})
# Then we parse the response correctly and send list of steps to execute_task()
assert developer.execute_task.call_count == 1
assert developer.execute_task.call_args[0][1] == [{'command': 'ls -al'}]
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
@patch('helpers.AgentConvo.create_gpt_chat_completion',
return_value={'text': '{"tasks": [{"command": "ls -al"}, {"command": "ls -al src"}, {"command": "ls -al test"}, {"command": "ls -al build"}]}'})
def test_implement_task_reject_with_user_input(self, mock_completion, mock_save, mock_get_saved_step):
# Given any project
project = create_project()
project.project_description = 'Test Project'
project.development_plan = [{
'description': 'Do stuff',
'user_review_goal': 'Do stuff',
}]
project.get_all_coded_files = lambda: []
project.current_step = 'test'
# and a developer who will execute any task except for `ls -al test`
developer = Developer(project)
developer.execute_task = MagicMock()
developer.execute_task.side_effect = [
{'success': False, 'step_index': 2, 'user_input': 'no, use a better command'},
{'success': True}
]
# When
developer.implement_task(0, {'description': 'Do stuff'})
# Then we include the user input in the conversation to update the task list
assert mock_completion.call_count == 3
prompt = mock_completion.call_args_list[2].args[0][2]['content']
assert prompt.startswith('''
# Completed Task Steps:
```
[{'command': 'ls -al'}, {'command': 'ls -al src'}]
```
# Current Step:
This step will not be executed. no, use a better command
```
{'command': 'ls -al test'}
```
# Next Task Steps:
```
[{'command': 'ls -al build'}]
```'''.lstrip())
assert 'no, use a better command' in prompt
# and call `execute_task()` again
assert developer.execute_task.call_count == 2
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
# GET_TEST_TYPE has optional properties, so we need to be able to handle missing args.
@patch('helpers.AgentConvo.create_gpt_chat_completion',
return_value={'text': '{"type": "command_test", "command": {"command": "npm run test", "timeout": 3000}}'})
# 2nd arg of return_value: `None` to debug, 'DONE' if successful
@patch('helpers.cli.execute_command', return_value=('stdout:\n```\n\n```', 'DONE', None))
# @patch('helpers.cli.ask_user', return_value='yes')
# @patch('helpers.cli.get_saved_command_run')
def test_code_changes_command_test(self, mock_get_saved_step, mock_save, mock_chat_completion,
# Note: the 2nd line below will use the LLM to debug, uncomment the @patches accordingly
mock_execute_command):
# mock_ask_user, mock_get_saved_command_run):
# Given
monkey = None
convo = AgentConvo(self.developer)
convo.save_branch = lambda branch_name=None: branch_name
# When
# "Now, we need to verify if this change was successfully implemented...
result = self.developer.test_code_changes(monkey, convo)
# Then
assert result == {'success': True, 'cli_response': 'stdout:\n```\n\n```'}
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
# GET_TEST_TYPE has optional properties, so we need to be able to handle missing args.
@patch('helpers.AgentConvo.create_gpt_chat_completion',
return_value={'text': '{"type": "manual_test", "manual_test_description": "Does it look good?"}'})
@patch('helpers.Project.ask_user', return_value='continue')
def test_code_changes_manual_test_continue(self, mock_get_saved_step, mock_save, mock_chat_completion, mock_ask_user):
# Given
monkey = None
convo = AgentConvo(self.developer)
convo.save_branch = lambda branch_name=None: branch_name
# When
result = self.developer.test_code_changes(monkey, convo)
# Then
assert result == {'success': True, 'user_input': 'continue'}
@pytest.mark.skip("endless loop in questionary")
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
@patch('helpers.AgentConvo.create_gpt_chat_completion')
@patch('utils.questionary.get_saved_user_input')
# https://github.com/Pythagora-io/gpt-pilot/issues/35
def test_code_changes_manual_test_no(self, mock_get_saved_user_input, mock_chat_completion, mock_save, mock_get_saved_step):
# Given
monkey = None
convo = AgentConvo(self.developer)
convo.save_branch = lambda branch_name=None: branch_name
convo.load_branch = lambda function_uuid=None: function_uuid
self.project.developer = self.developer
mock_chat_completion.side_effect = [
{'text': '{"type": "manual_test", "manual_test_description": "Does it look good?"}'},
{'text': '{"thoughts": "hmmm...", "reasoning": "testing", "steps": [{"type": "command", "command": {"command": "something scary", "timeout": 3000}, "check_if_fixed": true}]}'},
{'text': 'do something else scary'},
]
mock_questionary = MockQuestionary(['no', 'no'])
with patch('utils.questionary.questionary', mock_questionary):
# When
result = self.developer.test_code_changes(monkey, convo)
# Then
assert result == {'success': True, 'user_input': 'no'}
@patch('helpers.cli.execute_command', return_value=('stdout:\n```\n\n```', 'DONE', None))
@patch('helpers.AgentConvo.get_saved_development_step')
@patch('helpers.AgentConvo.save_development_step')
@patch('utils.llm_connection.requests.post')
@patch('utils.questionary.get_saved_user_input')
def test_test_code_changes_invalid_json(self, mock_get_saved_user_input,
mock_requests_post,
mock_save,
mock_get_saved_step,
mock_execute,
monkeypatch):
# Given
monkey = None
convo = AgentConvo(self.developer)
convo.save_branch = lambda branch_name=None: branch_name
convo.load_branch = lambda function_uuid=None: function_uuid
self.project.developer = self.developer
# we send a GET_TEST_TYPE spec, but the 1st response is invalid
types_in_response = ['command', 'wrong_again', 'command_test']
json_received = []
def generate_response(*args, **kwargs):
# Copy messages, including the validation errors from the request
content = [msg['content'] for msg in kwargs['json']['messages']]
json_received.append(content)
gpt_response = json.dumps({
'type': types_in_response.pop(0),
'command': {
'command': 'node server.js',
'timeout': 3000
}
})
choice = json.dumps({'delta': {'content': gpt_response}})
line = json.dumps({'choices': [json.loads(choice)]}).encode('utf-8')
response = requests.Response()
response.status_code = 200
response.iter_lines = lambda: [line]
print(f'##### mock response: {response}')
return response
mock_requests_post.side_effect = generate_response
monkeypatch.setenv('OPENAI_API_KEY', 'secret')
# mock_questionary = MockQuestionary([''])
# with patch('utils.questionary.questionary', mock_questionary):
# When
result = self.developer.test_code_changes(monkey, convo)
# Then
assert result == {'success': True, 'cli_response': 'stdout:\n```\n\n```'}
assert mock_requests_post.call_count == 3
assert "The JSON is invalid at $.type - 'command' is not one of " \
"['automated_test', 'command_test', 'manual_test', 'no_test']" in json_received[1][3]
assert mock_execute.call_count == 1