Implemented testing of code changes

2026-01-10 05:27:54 -05:00 · 2023-08-03 11:20:50 +02:00
parent 37bf5b1fc4
commit 38a50f39f5
6 changed files with 80 additions and 20 deletions
--- a/euclid/const/function_calls.py
+++ b/euclid/const/function_calls.py
@@ -351,4 +351,47 @@ IMPLEMENT_CHANGES = {
    'functions': {
        'save_files': lambda files: files
    }
+}
+
+GET_TEST_TYPE = {
+    'definitions': [{
+        'name': 'test_changes',
+        'description': f'Tests the changes based on the test type.',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'type': {
+                    'type': 'string',
+                    'description': f'Type of a test that needs to be run. It can be "automated_test", "command_test" or "manual_test".',
+                    'enum': ['automated_test', 'command_test', 'manual_test']
+                },
+                'command': {
+                    'type': 'object',
+                    'description': 'Command that needs to be run to test the changes. This should be used only if the test type is "command_test". Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                    'properties': {
+                        'command': {
+                            'type': 'string',
+                            'description': 'Command that needs to be run to test the changes.',
+                        },
+                        'timeout': {
+                            'type': 'number',
+                            'description': 'Timeout in seconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                        }
+                    },
+                },
+                'automated_test_description': {
+                    'type': 'string',
+                    'description': 'Description of an automated test that needs to be run to test the changes. This should be used only if the test type is "automated_test".',
+                },
+                'manual_test_description': {
+                    'type': 'string',
+                    'description': 'Description of a manual test that needs to be run to test the changes. This should be used only if the test type is "manual_test".',
+                }
+            },
+            'required': ['type'],
+        },
+    }],
+    'functions': {
+        'test_changes': lambda type, command=None, automated_test_description=None, manual_test_description=None: (type, command, automated_test_description, manual_test_description)
+    }
 }
--- a/euclid/helpers/agents/CodeMonkey.py
+++ b/euclid/helpers/agents/CodeMonkey.py
@@ -40,4 +40,19 @@ class CodeMonkey(Agent):
                for file_data in changes:
                    update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
        
-        self.developer.test_changes()
+                self.developer.test_code_changes(self, convo)
+
+    def implement_test(self, convo, automated_test_description):
+        files_needed = convo.send_message('development/task/request_test_files.prompt', {
+            "testing_files_tree": self.project.get_directory_tree(),
+        }, GET_FILES)
+
+        changes = convo.send_message('development/write_automated_test.prompt', {
+            "files": self.project.get_files(files_needed),
+        }, IMPLEMENT_CHANGES)
+
+        for file_data in changes:
+            update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
+
+        self.developer.run_test_and_debug()
+        self.developer.run_all_tests_and_debug()
--- a/euclid/helpers/agents/Developer.py
+++ b/euclid/helpers/agents/Developer.py
@@ -7,7 +7,7 @@ from helpers.Agent import Agent
 from helpers.AgentConvo import AgentConvo
 from utils.utils import execute_step, array_of_objects_to_string, generate_app_data
 from helpers.cli import build_directory_tree, run_command_until_success, execute_command_and_check_cli_response
-from const.function_calls import FILTER_OS_TECHNOLOGIES, DEVELOPMENT_PLAN, EXECUTE_COMMANDS, DEV_STEPS
+from const.function_calls import FILTER_OS_TECHNOLOGIES, DEVELOPMENT_PLAN, EXECUTE_COMMANDS, DEV_STEPS, GET_TEST_TYPE
 from database.database import save_progress, get_progress_steps
 from utils.utils import get_os_info
 from helpers.cli import execute_command
@@ -163,19 +163,19 @@ class Developer(Agent):
        code_monkey = CodeMonkey(self.project, self)
        code_monkey.implement_code_changes(code_changes_description)

-    def test_code_changes(self, code_changes_description):
-        verification_type = convo.send_message('development/step_check.prompt', {
-            "instructions": code_changes_description,
-            "directory_tree": self.project.get_directory_tree(),
-            "files": self.project.get_files(files_needed),
-        }, CHANGE_VERIFICATION)
+    def test_code_changes(self, code_monkey, convo):
+        (test_type, command, automated_test_description, manual_test_description) = convo.send_message('development/task/step_check.prompt', {}, GET_TEST_TYPE)
        
-        if verification_type == 'command':
-            pass
-        elif verification_type == 'automated_test':
-            pass
-        elif verification_type == 'manual_test':
-            pass
+        if test_type == 'command_test':
+            run_command_until_success(command['command'], command['timeout'], convo)
+        elif test_type == 'automated_test':
+            code_monkey.implement_test(convo, automated_test_description)
+        elif test_type == 'manual_test':
+            # TODO make the message better
+            self.project.ask_for_human_verification(
+                'Message from Euclid: I need your help. Can you please test if this was successful?',
+                manual_test_description
+            )

    def implement_step(self, convo, step_index, type, description):
        # TODO remove hardcoded folder path
--- a/euclid/prompts/development/task/request_test_files.prompt
+++ b/euclid/prompts/development/task/request_test_files.prompt
@@ -1,2 +1,4 @@
 Ok, now, I will show you the list of all files with automated tests that are written so far and I want you to tell me which automated tests do you want to see so that you can propriatelly modify tests or create new ones.
-{{ testing_files_tree }}
+{{ testing_files_tree }}
+
+Remember, ask for files relative to the project root. For example, if you need a file with path `{project_root}/models/model.py`, you need to request the file `models/model.py`.
--- a/euclid/prompts/development/task/step/write_automated_test.prompt
+++ b/euclid/prompts/development/task/step/write_automated_test.prompt
@@ -2,7 +2,7 @@
 Here are the requested files:
 {% for file in files %}
 **{{ file.name }}**
-```{{ file.language }}
+```{# file.language #}
 {{ file.content }}
 ```

--- a/euclid/prompts/development/task/step_check.prompt
+++ b/euclid/prompts/development/task/step_check.prompt
@@ -1,8 +1,8 @@
 Now, we need to verify if this change was successfully implemented. We can do that in 3 ways:
-1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page. If you think we can write an automated test, start the response with `AUTOMATED_TEST`
+1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page.

-2. By running a command - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. If you just want to run a command (or multiple commands), respond with `COMMAND_TEST: {explanation on how to test this with a command}`. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.
+2. By running a command (or multiple commands) - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.

-3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI. If you need a human to check the functionality, start the response with `MANUAL_TEST`.
+3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI.

-Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.
+Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.