diff --git a/euclid/const/function_calls.py b/euclid/const/function_calls.py
index 63b1dffa..6f565b9c 100644
--- a/euclid/const/function_calls.py
+++ b/euclid/const/function_calls.py
@@ -162,7 +162,7 @@ DEV_STEPS = {
                         'description': f'List of files that need to be analized to implement the reqired changes.',
                         'items': {
                             'type': 'string',
-                            'description': f'A single file name that needs to be analized to implement the reqired changes.',
+                            'description': f'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`.',
                         }
                     }
                 },
@@ -178,6 +178,52 @@ DEV_STEPS = {
     },
 }
 
+CODE_CHANGES = {
+    'definitions': [
+        {
+            'name': 'break_down_development_task',
+            'description': 'Implements all the smaller tasks that need to be done to complete the entire development task.',
+            'parameters': {
+                'type': 'object',
+                "properties": {
+                    "tasks": {
+                        'type': 'array',
+                        'description': 'List of smaller development steps that need to be done to complete the entire task.',
+                        'items': {
+                            'type': 'object',
+                            'description': 'A smaller development step that needs to be done to complete the entire task.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                            'properties': {
+                                'type': {
+                                    'type': 'string',
+                                    'enum': ['command', 'code_change'],
+                                    'description': 'Type of the development step that needs to be done to complete the entire task - it can be "command" or "code_change".',
+                                },
+                                'command': {
+                                    'type': 'string',
+                                    'description': 'Command that needs to be run to complete the current task. This should be used only if the task is of a type "command".',
+                                },
+                                'command_timeout': {
+                                    'type': 'number',
+                                    'description': 'Timeout in seconds that represent the approximate time the command takes to finish. This should be used only if the task is of a type "command". If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                                },
+                                'code_change_description': {
+                                    'type': 'string',
+                                    'description': 'Description of a the development step that needs to be done. This should be used only if the task is of a type "code_change" and it should thoroughly describe what needs to be done to implement the code change.',
+                                },
+                            },
+                            'required': ['type'],
+                        }
+                    }
+                },
+                "required": ['tasks'],
+            },
+        }
+    ],
+    'functions': {
+        'break_down_development_task': lambda tasks: tasks,
+    },
+}
+
 DEVELOPMENT_PLAN = {
     'definitions': [{
         'name': 'implement_development_plan',
@@ -226,7 +272,7 @@ EXECUTE_COMMANDS = {
             'properties': {
                 'commands': {
                         'type': 'array',
-                        'description': f'List of commands that need to be executed.',
+                        'description': f'List of commands that need to be executed.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
                         'items': {
                         'type': 'object',
                         'properties': {
@@ -236,7 +282,7 @@ EXECUTE_COMMANDS = {
                             },
                             'timeout': {
                                 'type': 'number',
-                                'description': f'Timeout in seconds that represent the approximate time this command takes to finish.',
+                                'description': f'Timeout in seconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
                             }
                         }
                     }
@@ -262,7 +308,7 @@ GET_FILES = {
                     'description': f'List of files that need to be analized to implement the reqired changes.',
                     'items': {
                         'type': 'string',
-                        'description': f'A single file name that needs to be analized to implement the reqired changes.',
+                        'description': f'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`.',
                     }
                 }
             },
@@ -305,4 +351,47 @@ IMPLEMENT_CHANGES = {
     'functions': {
         'save_files': lambda files: files
     }
+}
+
+GET_TEST_TYPE = {
+    'definitions': [{
+        'name': 'test_changes',
+        'description': f'Tests the changes based on the test type.',
+        'parameters': {
+            'type': 'object',
+            'properties': {
+                'type': {
+                    'type': 'string',
+                    'description': f'Type of a test that needs to be run. It can be "automated_test", "command_test" or "manual_test".',
+                    'enum': ['automated_test', 'command_test', 'manual_test']
+                },
+                'command': {
+                    'type': 'object',
+                    'description': 'Command that needs to be run to test the changes. This should be used only if the test type is "command_test". Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                    'properties': {
+                        'command': {
+                            'type': 'string',
+                            'description': 'Command that needs to be run to test the changes.',
+                        },
+                        'timeout': {
+                            'type': 'number',
+                            'description': 'Timeout in seconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3 seconds.',
+                        }
+                    },
+                },
+                'automated_test_description': {
+                    'type': 'string',
+                    'description': 'Description of an automated test that needs to be run to test the changes. This should be used only if the test type is "automated_test".',
+                },
+                'manual_test_description': {
+                    'type': 'string',
+                    'description': 'Description of a manual test that needs to be run to test the changes. This should be used only if the test type is "manual_test".',
+                }
+            },
+            'required': ['type'],
+        },
+    }],
+    'functions': {
+        'test_changes': lambda type, command=None, automated_test_description=None, manual_test_description=None: (type, command, automated_test_description, manual_test_description)
+    }
 }
\ No newline at end of file
diff --git a/euclid/helpers/AgentConvo.py b/euclid/helpers/AgentConvo.py
index 4ea4c74c..eb396596 100644
--- a/euclid/helpers/AgentConvo.py
+++ b/euclid/helpers/AgentConvo.py
@@ -29,16 +29,18 @@ class AgentConvo:
             self.messages.append({"role": "user", "content": prompt})
 
         # check if we already have the LLM response saved
-        saved_checkpoint = get_development_step_from_messages(self.agent.project.args['app_id'], self.messages)
-        if saved_checkpoint is not None:
+        development_step = get_development_step_from_messages(self.agent.project.args['app_id'], self.messages)
+        if development_step is not None:
             # if we do, use it
-            response = saved_checkpoint.llm_response
-            self.messages = saved_checkpoint.messages
+            self.agent.project.restore_files(development_step.id)
+            response = development_step.llm_response
+            self.messages = development_step.messages
         else:
             # if we don't, get the response from LLM
             response = create_gpt_chat_completion(self.messages, self.high_level_step, function_calls=function_calls)
-            save_development_step(self.agent.project.args['app_id'], self.messages, response)
-
+            development_step = save_development_step(self.agent.project.args['app_id'], self.messages, response)
+            self.agent.project.save_files_snapshot(development_step.id)
+        
         # TODO handle errors from OpenAI
         if response == {}:
             raise Exception("OpenAI API error happened.")       
diff --git a/euclid/helpers/Project.py b/euclid/helpers/Project.py
index 8dedd0ac..d96b1f19 100644
--- a/euclid/helpers/Project.py
+++ b/euclid/helpers/Project.py
@@ -1,5 +1,8 @@
 import os
+
+from termcolor import colored
 from const.common import IGNORE_FOLDERS
+from utils.questionary import styled_text
 from helpers.files import get_files_content
 from helpers.cli import build_directory_tree
 from helpers.agents.TechLead import TechLead
@@ -49,7 +52,11 @@ class Project:
         self.developer.start_coding()
 
     def get_directory_tree(self):
-        return build_directory_tree(self.root_path, ignore=IGNORE_FOLDERS)
+        return build_directory_tree(self.root_path + '/', ignore=IGNORE_FOLDERS)
+
+    def get_test_directory_tree(self):
+        # TODO remove hardcoded path
+        return build_directory_tree(self.root_path + '/tests', ignore=IGNORE_FOLDERS)
     
     def get_files(self, files):
         files_with_content = []
@@ -87,4 +94,13 @@ class Project:
 
             # Write/overwrite the file with its content
             with open(full_path, 'w', encoding='utf-8') as f:
-                f.write(file_snapshot.content)
\ No newline at end of file
+                f.write(file_snapshot.content)
+
+    def ask_for_human_verification(self, message, description):
+        print(colored(message, "orange"))
+        print(description)
+        answer = ''
+        while answer != 'continue':
+            answer = styled_text(
+                'Once you are ready, type "continue" to continue.',
+            )
\ No newline at end of file
diff --git a/euclid/helpers/agents/CodeMonkey.py b/euclid/helpers/agents/CodeMonkey.py
index dddb943b..dd8ad2e4 100644
--- a/euclid/helpers/agents/CodeMonkey.py
+++ b/euclid/helpers/agents/CodeMonkey.py
@@ -1,4 +1,4 @@
-from const.function_calls import GET_FILES, DEV_STEPS, IMPLEMENT_CHANGES
+from const.function_calls import GET_FILES, DEV_STEPS, IMPLEMENT_CHANGES, CODE_CHANGES
 from helpers.files import update_file
 from helpers.cli import run_command_until_success
 from helpers.cli import build_directory_tree
@@ -12,21 +12,22 @@ class CodeMonkey(Agent):
 
     def implement_code_changes(self, code_changes_description):
         convo = AgentConvo(self)
-        steps, type = convo.send_message('development/task/break_down_code_changes.prompt', {
+        steps = convo.send_message('development/task/break_down_code_changes.prompt', {
             "instructions": code_changes_description,
             "directory_tree": self.project.get_directory_tree(),
-        }, DEV_STEPS)
+            "technologies": self.project.architecture
+        }, CODE_CHANGES)
 
 
         convo.save_branch('after_code_changes_breakdown')
         for i, step in enumerate(steps):
             convo.load_branch('after_code_changes_breakdown')
-            if step['type'] == 'run_command':
-                run_command_until_success(step['command'], step['description'], convo)
+            if step['type'] == 'command':
+                run_command_until_success(step['command'], step['command_timeout'], convo)
             elif step['type'] == 'code_change':
                 files_needed = convo.send_message('development/task/request_files_for_code_changes.prompt', {
                     "instructions": code_changes_description,
-                    "step_description": step['description'],
+                    "step_description": step['code_change_description'],
                     "directory_tree": self.project.get_directory_tree(),
                 }, GET_FILES)
 
@@ -39,4 +40,19 @@ class CodeMonkey(Agent):
                 for file_data in changes:
                     update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
         
-        self.developer.test_changes()
\ No newline at end of file
+                self.developer.test_code_changes(self, convo)
+
+    def implement_test(self, convo, automated_test_description):
+        files_needed = convo.send_message('development/task/request_test_files.prompt', {
+            "testing_files_tree": self.project.get_directory_tree(),
+        }, GET_FILES)
+
+        changes = convo.send_message('development/write_automated_test.prompt', {
+            "files": self.project.get_files(files_needed),
+        }, IMPLEMENT_CHANGES)
+
+        for file_data in changes:
+            update_file(self.project.get_full_file_path(file_data['name']), file_data['content'])
+
+        self.developer.run_test_and_debug()
+        self.developer.run_all_tests_and_debug()
diff --git a/euclid/helpers/agents/Developer.py b/euclid/helpers/agents/Developer.py
index 6be47576..2afa9ed9 100644
--- a/euclid/helpers/agents/Developer.py
+++ b/euclid/helpers/agents/Developer.py
@@ -7,7 +7,7 @@ from helpers.Agent import Agent
 from helpers.AgentConvo import AgentConvo
 from utils.utils import execute_step, array_of_objects_to_string, generate_app_data
 from helpers.cli import build_directory_tree, run_command_until_success, execute_command_and_check_cli_response
-from const.function_calls import FILTER_OS_TECHNOLOGIES, DEVELOPMENT_PLAN, EXECUTE_COMMANDS, DEV_STEPS
+from const.function_calls import FILTER_OS_TECHNOLOGIES, DEVELOPMENT_PLAN, EXECUTE_COMMANDS, DEV_STEPS, GET_TEST_TYPE
 from database.database import save_progress, get_progress_steps
 from utils.utils import get_os_info
 from helpers.cli import execute_command
@@ -31,14 +31,6 @@ class Developer(Agent):
         logger.info('The app is DONE!!! Yay...you can use it now.')
 
     def implement_task(self, sibling_tasks, current_task_index, parent_task=None):
-
-
-        # TODO REMOVE
-        #sibling_tasks = [{'task_description': 'Set up the Node.js project', 'programmatic_goal': 'A valid package.json file is created once `npm init` command is run', 'user_review_goal': 'Project can be initialized using `npm init` command'}, {'task_description': 'Install necessary packages', 'programmatic_goal': 'Package.json file should include socket.io, mongoose, jest, and cypress after running `npm install socket.io mongoose jest cypress bootstrap` command', 'user_review_goal': 'Program dependencies are successfully installed via `npm install`'}, {'task_description': 'Set up an express server', 'programmatic_goal': 'Express server needs to be able to start running on a port 3000 responding all requests with status code 200', 'user_review_goal': 'User needs to be able to run the server by running a command `npm run start` and access the URL `http://localhost:3000` in a browser'}, {'task_description': 'Setup front-end serving static HTML, CSS and JavaScript', 'programmatic_goal': 'On accessing `http://localhost:3000`, express server should serve an index.html file with related styles.css and app.js', 'user_review_goal': 'User should see a basic front-end of the web app on accessing `http://localhost:3000`'}, {'task_description': 'Create chat room functionality', 'programmatic_goal': "On client socket emitting 'create', server socket should emit unique room id back to the client", 'user_review_goal': 'User should be able to create a room and see a unique room id shown on the screen'}, {'task_description': 'Join chat room functionality', 'programmatic_goal': "On client socket emitting 'join' with room id, server socket should emit 'joined' event back to the client", 'user_review_goal': 'User should be able to enter a room id and join the room'}, {'task_description': 'Send and receive messages', 'programmatic_goal': "On client socket emitting 'message', server should broadcast this to all clients in the same room", 'user_review_goal': 'User should be able to type a message, send it, and see it appear in the chat'}, {'task_description': 'Store messages in MongoDB through Mongoose', 'programmatic_goal': "On receiving a 'message' event, server should store the message in MongoDB with proper fields like room id, user, and timestamp", 'user_review_goal': 'User messages are stored and retrieved in chat history when rejoining the room'}, {'task_description': 'Write functional tests with Jest', 'programmatic_goal': 'Functional tests written with Jest validate the message storing and broadcasting process and all tests should pass', 'user_review_goal': 'All functional tests run successfully validating user functionalities'}, {'task_description': 'Write end-to-end tests with Cypress', 'programmatic_goal': 'End-to-end tests validate the chat system as a whole, including sending and receiving messages in a chat room, and all tests should pass', 'user_review_goal': 'All end-to-end tests run successfully validating chat room system as a whole'}]
-        # parent_task = {'task_description': 'Set up the Node.js project', 'programmatic_goal': 'A valid package.json file is created once `npm init` command is run', 'user_review_goal': 'Project can be initialized using `npm init` command'}
-        # sibling_tasks = [{'type': 'COMMAND', 'description': 'Run `mkdir euclid` to create a new directory for the project'}, {'type': 'COMMAND', 'description': 'Navigate to the newly created directory using `cd euclid`'}, {'type': 'COMMAND', 'description': 'Initialize npm using `npm init -y` to create a new `package.json` file with default values'}, {'type': 'CODE_CHANGE', 'description': 'Verify that the `package.json` file is created in the root directory of the project'}, {'type': 'CODE_CHANGE', 'description': "Write a test in Jest to verify that the `package.json` file is a valid JSON file and contains needed fields such as 'name', 'version', 'main' and 'scripts'"}]
-        #current_task_index = 2
-        # TODO END
         convo_dev_task = AgentConvo(self)
         task_steps, type = convo_dev_task.send_message('development/task/breakdown.prompt', {
             "app_summary": self.project.high_level_summary,
@@ -54,25 +46,9 @@ class Developer(Agent):
             "parent_task": parent_task,
         }, DEV_STEPS)
 
-
-
-
         self.execute_task(task_steps)
 
-
-
-
-
-
-
-        # TODO REMOVE
-        # convo_dev_task.messages = [{'role': 'system', 'content': 'You are a full stack software developer who works in a software development agency. You write very modular code and you practice TDD (test driven development) whenever is suitable to use it. Your job is to implement tasks that your tech lead assigns you. Each task has a description of what needs to be implemented, a programmatic goal that will determine if a task can be marked as done from a programmatic perspective (this is basically a blueprint for an automated test that is run before you send the task for a review to your tech lead) and user-review goal that will determine if a task is done or not but from a user perspective since it will be reviewed by a human.'}, {'role': 'user', 'content': 'You are working on a web app called Euclid and you need to write code for the entire application based on the tasks that the tech lead gives you. So that you understand better what you\'re working on, you\'re given other specs for Euclid as well.\n\nHere is a high level description of Euclid:\n```\nThe client wants to create a simple chat application named "Euclid". This application would not include any authentication and operates solely on localhost. Key features include the ability to create chat rooms and provide users with access to a room via a specific room id. Clarifications have been made regarding the storage, notification mechanism, and chat room design. Messages exchanged in the chat rooms will be stored in a database. However, no notification mechanism is required if a user receives a message while being in another chat room. Lastly, the application is specifically designed for one-on-one chats.\n```\n\nHere are user stories that specify how users use Euclid:\n```\n- As a user, I can create a new chat room using the \'Create\' functionality provided in the Euclid application.\n- As a user, I can share my chat room\'s unique id with another user to allow them to join the chat room.\n- As a user, I can join a chat room by entering a specific room id.\n- As a user, I can send messages to another user within a chat room, and this conversational data will be stored in a database.\n- As a user, I will not receive notifications if I receive a message while being in another chat room.\n- As a user, I can exchange messages in a One-on-One chat format.\n```\n\nHere are user tasks that specify what users need to do to interact with Euclid:\n```\n- User opens the Euclid application on localhost\n- User clicks on the \'Create\' button to create a new chat room\n- User shares the unique chat room id to allow another user to join\n- User enters a specific room id in the \'Join Room\' input to join a pre-existing chat room\n- User writes a message in the chat box and clicks \'Send\' to communicate within a chat room\n- User check and see messages stored in the chat room database\n- User can switch between chat rooms, understanding messages will not be received from other rooms simultaneously\n- User can engage in one-on-one conversation with another user in a chat room\n```\n\nHere are the technologies that you need to use for this project:\n```\n- Node.js\n- Socket.io\n- MongoDB\n- Mongoose\n- Jest\n- Cypress\n- Bootstrap\n- Vanilla Javascript\n- Cronjob\n```\n\n\n\n\nSo far, tasks \n\n    \n\n    \n\n#{\'task_description\': \'Set up the Node.js project\', \'programmatic_goal\': \'A valid package.json file is created once `npm init` command is run\', \'user_review_goal\': \'Project can be initialized using `npm init` command\'}, #{\'task_description\': \'Install necessary packages\', \'programmatic_goal\': \'Package.json file should include socket.io, mongoose, jest, and cypress after running `npm install socket.io mongoose jest cypress bootstrap` command\', \'user_review_goal\': \'Program dependencies are successfully installed via `npm install`\'} are finished so let\'s do\n\n```\ntask_description: Set up an express server\nprogrammatic_goal: Express server needs to be able to start running on a port 3000 responding all requests with status code 200\nuser_review_goal: User needs to be able to run the server by running a command `npm run start` and access the URL `http://localhost:3000` in a browser\n```\n\nThink step by step about what needs to be done to complete this task. \n\n\n\nFirst, just make a list of steps we need to do to fulfill this task. It should be in a JSON array. Every step must NOT contain both a command that needs to be run and the code that needs to be changed. It can be either command (or multiple commands) that need to be run or a change in the code. Each step must start with a keyword `COMMAND` in case the step consists of commands that need to be run or `CODE_CHANGE` in case it consists of changes in the code. After the keyword, write a description of what will be done in that step. Do not write what needs to be done for each step but only list them in an array. Also, keep in mind that you also need to write test (or tests) that will programmatically verify that your task is complete.\n\n'}, {'role': 'assistant', 'content': ('more_tasks', [{'type': 'COMMAND', 'description': 'Install express by running `npm install express`'}, {'type': 'CODE_CHANGE', 'description': 'Create a new file called server.js'}, {'type': 'CODE_CHANGE', 'description': 'In server.js, require express module and initialize a new express application'}, {'type': 'CODE_CHANGE', 'description': 'Set the application to listen on port 3000'}, {'type': 'COMMAND', 'description': 'Test the server by running `node server.js`'}, {'type': 'CODE_CHANGE', 'description': "Add a default route '/' that will respond with status code 200 to all requests"}, {'type': 'CODE_CHANGE', 'description': 'Update package.json to include a `start` script which would run the server.js file'}, {'type': 'CODE_CHANGE', 'description': 'Create a test that will request to URL `http://localhost:3000` and assert that the response status code is 200'}])}]
-        # task_steps = [{'type': 'COMMAND', 'description': 'Install express by running `npm install express`'}, {'type': 'CODE_CHANGE', 'description': 'Create a new file called server.js'}, {'type': 'CODE_CHANGE', 'description': 'In server.js, require express module and initialize a new express application'}, {'type': 'CODE_CHANGE', 'description': 'Set the application to listen on port 3000'}, {'type': 'COMMAND', 'description': 'Test the server by running `node server.js`'}, {'type': 'CODE_CHANGE', 'description': "Add a default route '/' that will respond with status code 200 to all requests"}, {'type': 'CODE_CHANGE', 'description': 'Update package.json to include a `start` script which would run the server.js file'}, {'type': 'CODE_CHANGE', 'description': 'Create a test that will request to URL `http://localhost:3000` and assert that the response status code is 200'}]
-        # type = 'code_change'
-        # TODO REMOVE
-
         if type == 'run_commands':
-            return
             for cmd in task_steps:
                 run_command_until_success(cmd['command'], cmd['timeout'], convo_dev_task)
         elif type == 'code_change':
@@ -88,7 +64,6 @@ class Developer(Agent):
     def execute_task(self, task):
         for step in task:
             if step['type'] == 'command':
-                continue
                 run_command_until_success(cmd['command'], cmd['timeout'], convo_dev_task)
             elif step['type'] == 'code_change':
                 self.implement_code_changes(step['description'])
@@ -109,10 +84,6 @@ class Developer(Agent):
         print(colored(f"Setting up the environment...\n", "green"))
         logger.info(f"Setting up the environment...")
 
-        # TODO: remove this once the database is set up properly
-        # previous_messages[2]['content'] = '\n'.join(previous_messages[2]['content'])
-        # TODO END
-
         os_info = get_os_info()
         os_specific_techologies = self.convo_os_specific_tech.send_message('development/env_setup/specs.prompt',
             { "os_info": os_info, "technologies": self.project.architecture }, FILTER_OS_TECHNOLOGIES)
@@ -163,19 +134,19 @@ class Developer(Agent):
         code_monkey = CodeMonkey(self.project, self)
         code_monkey.implement_code_changes(code_changes_description)
 
-    def test_code_changes(self, code_changes_description):
-        verification_type = convo.send_message('development/step_check.prompt', {
-            "instructions": code_changes_description,
-            "directory_tree": self.project.get_directory_tree(),
-            "files": self.project.get_files(files_needed),
-        }, CHANGE_VERIFICATION)
+    def test_code_changes(self, code_monkey, convo):
+        (test_type, command, automated_test_description, manual_test_description) = convo.send_message('development/task/step_check.prompt', {}, GET_TEST_TYPE)
         
-        if verification_type == 'command':
-            pass
-        elif verification_type == 'automated_test':
-            pass
-        elif verification_type == 'manual_test':
-            pass
+        if test_type == 'command_test':
+            run_command_until_success(command['command'], command['timeout'], convo)
+        elif test_type == 'automated_test':
+            code_monkey.implement_test(convo, automated_test_description)
+        elif test_type == 'manual_test':
+            # TODO make the message better
+            self.project.ask_for_human_verification(
+                'Message from Euclid: I need your help. Can you please test if this was successful?',
+                manual_test_description
+            )
 
     def implement_step(self, convo, step_index, type, description):
         # TODO remove hardcoded folder path
diff --git a/euclid/helpers/cli.py b/euclid/helpers/cli.py
index 8501cf50..d2ef3b24 100644
--- a/euclid/helpers/cli.py
+++ b/euclid/helpers/cli.py
@@ -15,15 +15,22 @@ def enqueue_output(out, q):
         q.put(line)
     out.close()
 
-def run_command(command, q, pid_container):
-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+def run_command(command, directory, q, pid_container):
+    process = subprocess.Popen(
+        command,
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        cwd=directory
+    )
     pid_container[0] = process.pid
     t = threading.Thread(target=enqueue_output, args=(process.stdout, q))
     t.daemon = True
     t.start()
     return process
 
-def execute_command(command, timeout=5):
+def execute_command(root_path, command, timeout=5):
     answer = styled_text(
         f'Can i execute the command: `{command}`?\n' +
         'If yes, just press ENTER and if not, please paste the output of running this command here and press ENTER'
@@ -33,7 +40,7 @@ def execute_command(command, timeout=5):
 
     q = queue.Queue()
     pid_container = [None]
-    process = run_command(command, q, pid_container)
+    process = run_command(command, root_path, q, pid_container)
     output = ''
     start_time = time.time()
 
@@ -114,7 +121,7 @@ def execute_command_and_check_cli_response(command, timeout, convo):
 def run_command_until_success(command, timeout, convo):
     command_executed = False
     for _ in range(MAX_COMMAND_DEBUG_TRIES):
-        cli_response = execute_command(command, timeout)
+        cli_response = execute_command(convo.agent.project.root_path, command, timeout)
         response = convo.send_message('dev_ops/ran_command.prompt',
             {'cli_response': cli_response, 'command': command})
 
@@ -125,5 +132,7 @@ def run_command_until_success(command, timeout, convo):
         command = response
 
     if not command_executed:
-        # TODO ask user to debug and press enter to continue
-        pass
+        convo.agent.project.ask_for_human_verification(
+            'It seems like I cannot debug this problem by myself. Can you please help me and try debugging it yourself?',
+            command
+        )
diff --git a/euclid/prompts/development/task/break_down_code_changes.prompt b/euclid/prompts/development/task/break_down_code_changes.prompt
index 2c2d1f27..fbda8dc9 100644
--- a/euclid/prompts/development/task/break_down_code_changes.prompt
+++ b/euclid/prompts/development/task/break_down_code_changes.prompt
@@ -8,6 +8,13 @@ Here is the current folder tree:
 {{ directory_tree }}
 ```
 
+Here are technologies that you can use:
+```
+{% for technology in technologies %}
+- {{ technology }}
+{% endfor %}
+```
+
 First, you need to break down these instructions into actionable steps that can be made. There are 2 types of steps. If a step requires a change in a file content, that step is of a type `code_change` and if a change requires a command to be run (eg. to create a file or a folder), that step is of a type `run_command`.
 For a step to be actionable, it cannot have a vague description but a clear explanation of what needs to be done to finish that step. Here are a couple of examples of good and bad steps:
 BAD STEP: `Set up mongo database`
@@ -17,4 +24,6 @@ When thinking about steps, first think about what files need to changed to finis
 
 So, each step of type `code_change` can contain ALL changes that need to be made to a single file. If changes need to be made to multiple different files, they need to be split across multiple steps where each step contains all changes that need ot be made to a single file.
 
+Remember, all commands will be run from the project root folder.
+
 Now, think step by step and return a list of steps that need to be run.
\ No newline at end of file
diff --git a/euclid/prompts/development/task/request_files_for_code_changes.prompt b/euclid/prompts/development/task/request_files_for_code_changes.prompt
index 1d7f2d0c..784264da 100644
--- a/euclid/prompts/development/task/request_files_for_code_changes.prompt
+++ b/euclid/prompts/development/task/request_files_for_code_changes.prompt
@@ -14,3 +14,5 @@ You are currently working on this step from the instructions above:
 ```
 
 I will give you each file that needs to be changed and you will implement changes from the instructions. To do this, you will need to see the currently implemented files so first, filter the files outlined above that are relevant for the instructions. Then, tell me files that you need to see so that you can make appropriate changes to the code. If no files are needed (eg. if you need to create a file), just return an empty array.
+
+Remember, ask for files relative to the project root. For example, if you need a file with path `{project_root}/models/model.py`, you need to request the file `models/model.py`.
\ No newline at end of file
diff --git a/euclid/prompts/development/task/request_test_files.prompt b/euclid/prompts/development/task/request_test_files.prompt
index 905ce754..8c5ffe8c 100644
--- a/euclid/prompts/development/task/request_test_files.prompt
+++ b/euclid/prompts/development/task/request_test_files.prompt
@@ -1,2 +1,4 @@
 Ok, now, I will show you the list of all files with automated tests that are written so far and I want you to tell me which automated tests do you want to see so that you can propriatelly modify tests or create new ones.
-{{ testing_files_tree }}
\ No newline at end of file
+{{ testing_files_tree }}
+
+Remember, ask for files relative to the project root. For example, if you need a file with path `{project_root}/models/model.py`, you need to request the file `models/model.py`.
\ No newline at end of file
diff --git a/euclid/prompts/development/task/step/write_automated_test.prompt b/euclid/prompts/development/task/step/write_automated_test.prompt
index 0fb6b873..e895396c 100644
--- a/euclid/prompts/development/task/step/write_automated_test.prompt
+++ b/euclid/prompts/development/task/step/write_automated_test.prompt
@@ -2,7 +2,7 @@
 Here are the requested files:
 {% for file in files %}
 **{{ file.name }}**
-```{{ file.language }}
+```{# file.language #}
 {{ file.content }}
 ```
 
diff --git a/euclid/prompts/development/task/step_check.prompt b/euclid/prompts/development/task/step_check.prompt
index e106194d..81c5a23c 100644
--- a/euclid/prompts/development/task/step_check.prompt
+++ b/euclid/prompts/development/task/step_check.prompt
@@ -1,8 +1,8 @@
 Now, we need to verify if this change was successfully implemented. We can do that in 3 ways:
-1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page. If you think we can write an automated test, start the response with `AUTOMATED_TEST`
+1. By writing an automated test or by running a previously written test - this is the preferred way since we can then test if this functionality works in the future. You write automated tests in Jest and you always try finding a way to test a functionality with an automated test. Even if changes seem visual or UI-based, try to find a way to validate them using an automated test, such as verifying HTTP responses or elements rendered on the page.
 
-2. By running a command - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. If you just want to run a command (or multiple commands), respond with `COMMAND_TEST: {explanation on how to test this with a command}`. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.
+2. By running a command (or multiple commands) - this is good for when an automated test is an overkill. For example, if we installed a new package or changed some configuration. Keep in mind that in this case, there shouldn't be any human intervention needed - I will run the commands you will give me and show you the CLI output and from that, you should be able to determine if the test passed or failed.
 
-3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI. If you need a human to check the functionality, start the response with `MANUAL_TEST`.
+3. By requesting that a human checks if everything works as expected - this is the last option that we want to avoid but if we can't test the functionality programmatically, we should ask a human to check if it works as expected. For example, if something was visually changed in the UI.
 
-Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.
\ No newline at end of file
+Ok, now, tell me how can we verify if this change was successful and respond only with a keyword for a type of test.    
\ No newline at end of file
diff --git a/euclid/utils/llm_connection.py b/euclid/utils/llm_connection.py
index d28bb841..2193b952 100644
--- a/euclid/utils/llm_connection.py
+++ b/euclid/utils/llm_connection.py
@@ -61,7 +61,10 @@ def create_gpt_chat_completion(messages: List[dict], req_type, min_tokens=MIN_TO
 
     if function_calls is not None:
         gpt_data['functions'] = function_calls['definitions']
-        gpt_data['function_call'] = { 'name': function_calls['definitions'][0]['name'] }
+        if len(function_calls['definitions']) > 1:
+            gpt_data['function_call'] = 'auto'
+        else:
+            gpt_data['function_call'] = { 'name': function_calls['definitions'][0]['name'] }
 
     try:
         response = stream_gpt_completion(gpt_data, req_type)