merge

2026-01-10 05:27:54 -05:00 · 2023-08-03 17:27:34 +02:00
parent 371d39b6be cf2c3a5143
commit 57871493f8
12 changed files with 194 additions and 75 deletions
--- a/euclid/const/function_calls.py
+++ b/euclid/const/function_calls.py
@@ -162,7 +162,7 @@ DEV_STEPS = {
                        'description': f'List of files that need to be analized to implement the reqired changes.',
                        'items': {
                            'type': 'string',
-                            'description': f'A single file name that needs to be analized to implement the reqired changes.',
+                            'description': f'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`.',
                        }
                    }
                },
@@ -178,6 +178,52 @@ DEV_STEPS = {
    },
 }

+CODE_CHANGES = {
+    'definitions': [
+        {
+            'name': 'break_down_development_task',
+            'description': 'Implements all the smaller tasks that need to be done to complete the entire development task.',
+            'parameters': {
+                'type': 'object',
+                "properties": {
+                    "tasks": {
+                        'type': 'array',
+                        'description': 'List of smaller development steps that need to be done to complete the entire task.',
+                        'items': {
+                            'type': 'object',
+                            'description': 'A smaller development step that needs to be done to complete the entire task.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                            'properties': {
+                                'type': {
+                                    'type': 'string',
+                                    'enum': ['command', 'code_change'],
+                                    'description': 'Type of the development step that needs to be done to complete the entire task - it can be "command" or "code_change".',
+                                },
+                                'command': {
+                                    'type': 'string',
+                                    'description': 'Command that needs to be run to complete the current task. This should be used only if the task is of a type "command".',
+                                },
+                                'command_timeout': {
+                                    'type': 'number',
+                                    'description': 'Timeout in seconds that represent the approximate time the command takes to finish. This should be used only if the task is of a type "command". If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                                },
+                                'code_change_description': {
+                                    'type': 'string',
+                                    'description': 'Description of a the development step that needs to be done. This should be used only if the task is of a type "code_change" and it should thoroughly describe what needs to be done to implement the code change.',
+                                },
+                            },
+                            'required': ['type'],
+                        }
+                    }
+                },
+                "required": ['tasks'],
+            },
+        }
+    ],
+    'functions': {
+        'break_down_development_task': lambda tasks: tasks,
+    },
+}
+
 DEVELOPMENT_PLAN = {
    'definitions': [{
        'name': 'implement_development_plan',
@@ -226,7 +272,7 @@ EXECUTE_COMMANDS = {
            'properties': {
                'commands': {
                        'type': 'array',
-                        'description': f'List of commands that need to be executed.',
+                        'description': f'List of commands that need to be executed.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
                        'items': {
                        'type': 'object',
                        'properties': {
@@ -236,7 +282,7 @@ EXECUTE_COMMANDS = {
                            },
                            'timeout': {
                                'type': 'number',
-                                'description': f'Timeout in seconds that represent the approximate time this command takes to finish.',
+                                'description': f'Timeout in seconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
                            }
                        }
                    }
@@ -262,7 +308,7 @@ GET_FILES = {
                    'description': f'List of files that need to be analized to implement the reqired changes.',
                    'items': {
                        'type': 'string',
-                        'description': f'A single file name that needs to be analized to implement the reqired changes.',
+                        'description': f'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`.',
                    }
                }
            },
@@ -305,4 +351,47 @@ IMPLEMENT_CHANGES = {
    'functions': {
        'save_files': lambda files: files
    }
+}
+
+GET_TEST_TYPE = {
+    'definitions': [{
+        'name': 'test_changes',
+        'description': f'Tests the changes based on the test type.',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'type': {
+                    'type': 'string',
+                    'description': f'Type of a test that needs to be run. It can be "automated_test", "command_test" or "manual_test".',
+                    'enum': ['automated_test', 'command_test', 'manual_test']
+                },
+                'command': {
+                    'type': 'object',
+                    'description': 'Command that needs to be run to test the changes. This should be used only if the test type is "command_test". Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                    'properties': {
+                        'command': {
+                            'type': 'string',
+                            'description': 'Command that needs to be run to test the changes.',
+                        },
+                        'timeout': {
+                            'type': 'number',
+                            'description': 'Timeout in seconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                        }
+                    },
+                },
+                'automated_test_description': {
+                    'type': 'string',
+                    'description': 'Description of an automated test that needs to be run to test the changes. This should be used only if the test type is "automated_test".',
+                },
+                'manual_test_description': {
+                    'type': 'string',
+                    'description': 'Description of a manual test that needs to be run to test the changes. This should be used only if the test type is "manual_test".',
+                }
+            },
+            'required': ['type'],
+        },
+    }],
+    'functions': {
+        'test_changes': lambda type, command=None, automated_test_description=None, manual_test_description=None: (type, command, automated_test_description, manual_test_description)
+    }
 }
--- a/euclid/helpers/AgentConvo.py
+++ b/euclid/helpers/AgentConvo.py
@@ -29,16 +29,18 @@ class AgentConvo:
            self.messages.append({"role": "user", "content": prompt})

        # check if we already have the LLM response saved
-        saved_checkpoint = get_development_step_from_messages(self.agent.project.args['app_id'], self.messages)
-        if saved_checkpoint is not None:
+        development_step = get_development_step_from_messages(self.agent.project.args['app_id'], self.messages)
+        if development_step is not None:
            # if we do, use it
-            response = saved_checkpoint.llm_response
-            self.messages = saved_checkpoint.messages
+            self.agent.project.restore_files(development_step.id)
+            response = development_step.llm_response
+            self.messages = development_step.messages
        else:
            # if we don't, get the response from LLM
            response = create_gpt_chat_completion(self.messages, self.high_level_step, function_calls=function_calls)
-            save_development_step(self.agent.project.args['app_id'], self.messages, response)
-
+            development_step = save_development_step(self.agent.project.args['app_id'], self.messages, response)
+            self.agent.project.save_files_snapshot(development_step.id)
+        
        # TODO handle errors from OpenAI
        if response == {}:
            raise Exception("OpenAI API error happened.")       
--- a/euclid/helpers/Project.py
+++ b/euclid/helpers/Project.py
@@ -1,5 +1,8 @@
 import os
+
+from termcolor import colored
 from const.common import IGNORE_FOLDERS
+from utils.questionary import styled_text
 from helpers.files import get_files_content
 from helpers.cli import build_directory_tree
 from helpers.agents.TechLead import TechLead
@@ -49,7 +52,11 @@ class Project:
        self.developer.start_coding()

    def get_directory_tree(self):
-        return build_directory_tree(self.root_path, ignore=IGNORE_FOLDERS)
+        return build_directory_tree(self.root_path + '/', ignore=IGNORE_FOLDERS)
+
+    def get_test_directory_tree(self):
+        # TODO remove hardcoded path
+        return build_directory_tree(self.root_path + '/tests', ignore=IGNORE_FOLDERS)
    
    def get_files(self, files):
        files_with_content = []
@@ -87,4 +94,13 @@ class Project:

            # Write/overwrite the file with its content
            with open(full_path, 'w', encoding='utf-8') as f:
-                f.write(file_snapshot.content)
+                f.write(file_snapshot.content)
+
+    def ask_for_human_verification(self, message, description):
+        print(colored(message, "orange"))
+        print(description)
+        answer = ''
+        while answer != 'continue':
+            answer = styled_text(
+                'Once you are ready, type "continue" to continue.',
+            )
--- a/euclid/helpers/agents/CodeMonkey.py
+++ b/euclid/helpers/agents/CodeMonkey.py
@@ -1,4 +1,4 @@
-from const.function_calls import GET_FILES, DEV_STEPS, IMPLEMENT_CHANGES
+from const.function_calls import GET_FILES, DEV_STEPS, IMPLEMENT_CHANGES, CODE_CHANGES
 from helpers.files import update_file
 from helpers.cli import run_command_until_success
 from helpers.cli import build_directory_tree
@@ -12,21 +12,22 @@ class CodeMonkey(Agent):

    def implement_code_changes(self, code_changes_description):
        convo = AgentConvo(self)
-        steps, type = convo.send_message('development/task/break_down_code_changes.prompt', {
+        steps = convo.send_message('development/task/break_down_code_changes.prompt', {
            "instructions": code_changes_description,
            "directory_tree": self.project.get_directory_tree(),
-        }, DEV_STEPS)
+            "technologies": self.project.architecture
+        }, CODE_CHANGES)


        convo.save_branch('after_code_changes_breakdown')
        for i, step in enumerate(steps):
            convo.load_branch('after_code_changes_breakdown')
-            if step['type'] == 'run_command':
-                run_command_until_success(step['command'], step['description'], convo)
+            if step['type'] == 'command':
+                run_command_until_success(step['command'], step['command_timeout'], convo)
            elif step['type'] == 'code_change':
                files_needed = convo.send_message('development/task/request_files_for_code_changes.prompt', {
                    "instructions": code_changes_description,
-                    "step_description": step['description'],
+                    "step_description": step['code_change_description'],
                    "directory_tree": self.project.get_directory_tree(),
                }, GET_FILES)

@@ -39,4 +40,19 @@ class CodeMonkey(Agent):
                for file_data in changes:
                    update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
        
-        self.developer.test_changes()
+                self.developer.test_code_changes(self, convo)
+
+    def implement_test(self, convo, automated_test_description):
+        files_needed = convo.send_message('development/task/request_test_files.prompt', {
+            "testing_files_tree": self.project.get_directory_tree(),
+        }, GET_FILES)
+
+        changes = convo.send_message('development/write_automated_test.prompt', {
+            "files": self.project.get_files(files_needed),
+        }, IMPLEMENT_CHANGES)
+
+        for file_data in changes:
+            update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
+
+        self.developer.run_test_and_debug()
+        self.developer.run_all_tests_and_debug()
--- a/euclid/helpers/agents/Developer.py
+++ b/euclid/helpers/agents/Developer.py
--- a/euclid/helpers/cli.py
+++ b/euclid/helpers/cli.py
@@ -15,15 +15,22 @@ def enqueue_output(out, q):
        q.put(line)
    out.close()

-def run_command(command, q, pid_container):
-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+def run_command(command, directory, q, pid_container):
+    process = subprocess.Popen(
+        command,
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        cwd=directory
+    )
    pid_container[0] = process.pid
    t = threading.Thread(target=enqueue_output, args=(process.stdout, q))
    t.daemon = True
    t.start()
    return process

-def execute_command(command, timeout=5):
+def execute_command(root_path, command, timeout=5):
    answer = styled_text(
        f'Can i execute the command: `{command}`?\n' +
        'If yes, just press ENTER and if not, please paste the output of running this command here and press ENTER'
@@ -33,7 +40,7 @@ def execute_command(command, timeout=5):

    q = queue.Queue()
    pid_container = [None]
-    process = run_command(command, q, pid_container)
+    process = run_command(command, root_path, q, pid_container)
    output = ''
    start_time = time.time()

@@ -114,7 +121,7 @@ def execute_command_and_check_cli_response(command, timeout, convo):
 def run_command_until_success(command, timeout, convo):
    command_executed = False
    for _ in range(MAX_COMMAND_DEBUG_TRIES):
-        cli_response = execute_command(command, timeout)
+        cli_response = execute_command(convo.agent.project.root_path, command, timeout)
        response = convo.send_message('dev_ops/ran_command.prompt',
            {'cli_response': cli_response, 'command': command})

@@ -125,5 +132,7 @@ def run_command_until_success(command, timeout, convo):
        command = response

    if not command_executed:
-        # TODO ask user to debug and press enter to continue
-        pass
+        convo.agent.project.ask_for_human_verification(
+            'It seems like I cannot debug this problem by myself. Can you please help me and try debugging it yourself?',
+            command
+        )
--- a/euclid/prompts/development/task/break_down_code_changes.prompt
+++ b/euclid/prompts/development/task/break_down_code_changes.prompt
@@ -8,6 +8,13 @@ Here is the current folder tree:
 {{ directory_tree }}
 ```

+Here are technologies that you can use:
+```
+{% for technology in technologies %}
+- {{ technology }}
+{% endfor %}
+```
+
 First, you need to break down these instructions into actionable steps that can be made. There are 2 types of steps. If a step requires a change in a file content, that step is of a type `code_change` and if a change requires a command to be run (eg. to create a file or a folder), that step is of a type `run_command`.
 For a step to be actionable, it cannot have a vague description but a clear explanation of what needs to be done to finish that step. Here are a couple of examples of good and bad steps:
 BAD STEP: `Set up mongo database`
@@ -17,4 +24,6 @@ When thinking about steps, first think about what files need to changed to finis

 So, each step of type `code_change` can contain ALL changes that need to be made to a single file. If changes need to be made to multiple different files, they need to be split across multiple steps where each step contains all changes that need ot be made to a single file.

+Remember, all commands will be run from the project root folder.
+
 Now, think step by step and return a list of steps that need to be run.
--- a/euclid/prompts/development/task/request_files_for_code_changes.prompt
+++ b/euclid/prompts/development/task/request_files_for_code_changes.prompt
@@ -14,3 +14,5 @@ You are currently working on this step from the instructions above:
 ```

 I will give you each file that needs to be changed and you will implement changes from the instructions. To do this, you will need to see the currently implemented files so first, filter the files outlined above that are relevant for the instructions. Then, tell me files that you need to see so that you can make appropriate changes to the code. If no files are needed (eg. if you need to create a file), just return an empty array.
+
+Remember, ask for files relative to the project root. For example, if you need a file with path `{project_root}/models/model.py`, you need to request the file `models/model.py`.
--- a/euclid/prompts/development/task/request_test_files.prompt
+++ b/euclid/prompts/development/task/request_test_files.prompt
@@ -1,2 +1,4 @@
 Ok, now, I will show you the list of all files with automated tests that are written so far and I want you to tell me which automated tests do you want to see so that you can propriatelly modify tests or create new ones.
-{{ testing_files_tree }}
+{{ testing_files_tree }}
+
+Remember, ask for files relative to the project root. For example, if you need a file with path `{project_root}/models/model.py`, you need to request the file `models/model.py`.
--- a/euclid/prompts/development/task/step/write_automated_test.prompt
+++ b/euclid/prompts/development/task/step/write_automated_test.prompt
@@ -2,7 +2,7 @@
 Here are the requested files:
 {% for file in files %}
 **{{ file.name }}**
-```{{ file.language }}
+```{# file.language #}
 {{ file.content }}
 ```

--- a/euclid/prompts/development/task/step_check.prompt
+++ b/euclid/prompts/development/task/step_check.prompt
@@ -1,8 +1,8 @@
 Now, we need to verify if this change was successfully implemented. We can do that in 3 ways:
-1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page. If you think we can write an automated test, start the response with `AUTOMATED_TEST`
+1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page.

-2. By running a command - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. If you just want to run a command (or multiple commands), respond with `COMMAND_TEST: {explanation on how to test this with a command}`. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.
+2. By running a command (or multiple commands) - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.

-3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI. If you need a human to check the functionality, start the response with `MANUAL_TEST`.
+3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI.

-Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.
+Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.    
--- a/euclid/utils/llm_connection.py
+++ b/euclid/utils/llm_connection.py
@@ -61,7 +61,10 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO

    if function_calls is not None:
        gpt_data['functions'] = function_calls['definitions']
-        gpt_data['function_call'] = { 'name': function_calls['definitions'][0]['name'] }
+        if len(function_calls['definitions']) > 1:
+            gpt_data['function_call'] = 'auto'
+        else:
+            gpt_data['function_call'] = { 'name': function_calls['definitions'][0]['name'] }

    try:
        response = stream_gpt_completion(gpt_data, req_type)