Merge commit '804520c14ac27838685668548c45918b53dd01f1' into feature/no-color

2026-01-09 13:17:55 -05:00 · 2023-10-17 09:15:17 +08:00
parent e79a72601b 4475704441
commit d0563771d1
31 changed files with 485 additions and 209 deletions
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ All generated code will be stored in the folder `workspace` inside the folder na

 ## 🐳 How to start gpt-pilot in docker?
 1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
-2. Update the `docker-compose.yml` environment variables, which can be done via `docker compose config`
+2. Update the `docker-compose.yml` environment variables, which can be done via `docker compose config` . if you use local model, please go to [https://localai.io/basics/getting_started/](https://localai.io/basics/getting_started/) start. 
 3. run `docker compose build`. this will build a gpt-pilot container for you.
 4. run `docker compose up`.
 5. access the web terminal on `port 7681`
--- a/pilot/const/function_calls.py
+++ b/pilot/const/function_calls.py
@@ -44,7 +44,7 @@ def command_definition(description_command='A single command that needs to be ex
                       description_timeout=
                       'Timeout in milliseconds that represent the approximate time this command takes to finish. '
                       'If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), '
-                       'set the timeout to to a value long enough to determine that it has started successfully and provide a process_name. '
+                       'set the timeout to to a value long enough to determine that it has started successfully and provide a command_id. '
                       'If you need to create a directory that doesn\'t exist and is not the root project directory, '
                       'always create it by running a command `mkdir`'):
    return {
@@ -63,10 +63,10 @@ def command_definition(description_command='A single command that needs to be ex
                'type': 'string',
                'description': 'A message to look for in the output of the command to determine if successful or not.',
            },
-            'process_name': {
+            'command_id': {
                'type': 'string',
                'description': 'If the process needs to continue running after the command is executed provide '
-                               'a name which you can use to kill the process later.',
+                               'a unique command identifier which you can use to kill the process later.',
            }
        },
        'required': ['command', 'timeout'],
@@ -193,7 +193,7 @@ IMPLEMENT_TASK = {
                                'command': command_definition(),
                                'kill_process': {
                                    'type': 'string',
-                                    'description': 'To kill a process that was left running by a previous `command` step provide the `process_name` in this field and set `type` to "kill_process".',
+                                    'description': 'To kill a process that was left running by a previous `command` step provide the `command_id` in this field and set `type` to "kill_process".',
                                },
                                'code_change': {
                                    'type': 'object',
@@ -529,8 +529,15 @@ DEBUG_STEPS_BREAKDOWN = {
            'description': 'Starts the debugging process based on the list of steps that need to be done to debug the problem.',
            'parameters': {
                'type': 'object',
-                "properties": {
-                    "steps": {
+                'properties': {
+                    'thoughts': {
+                        'type': 'string',
+                        'description': 'Thoughts that you have about the problem that you are trying to debug.'
+                    },
+                    'reasoning': {
+                        'type': 'string',
+                    },
+                    'steps': {
                        'type': 'array',
                        'description': 'List of steps that need to be done to debug the problem.',
                        'items': {
@@ -560,7 +567,7 @@ DEBUG_STEPS_BREAKDOWN = {
                        }
                    }
                },
-                "required": ['steps'],
+                "required": ['thoughts', 'reasoning', 'steps'],
            },
        },
    ],
--- a/pilot/const/ipc.py
+++ b/pilot/const/ipc.py
@@ -5,4 +5,5 @@ MESSAGE_TYPE = {
    'hint': 'hint',                        # Hint text, eg "Do you want to add anything else? If not, just press ENTER."
    'info': 'info',                        # JSON data can be sent to progress `progress_stage`
    'local': 'local',
+    'run_command': 'run_command',
 }
--- a/pilot/database/database.py
+++ b/pilot/database/database.py
@@ -27,11 +27,6 @@ from database.models.user_apps import UserApps
 from database.models.user_inputs import UserInputs
 from database.models.files import File

-# DB_NAME = os.getenv("DB_NAME")
-# DB_HOST = os.getenv("DB_HOST")
-# DB_PORT = os.getenv("DB_PORT")
-# DB_USER = os.getenv("DB_USER")
-# DB_PASSWORD = os.getenv("DB_PASSWORD")
 TABLES = [
            User,
            App,
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -1,3 +1,4 @@
+import json
 import re
 import subprocess
 import uuid
@@ -11,6 +12,7 @@ from utils.utils import array_of_objects_to_string, get_prompt, get_sys_message,
 from logger.logger import logger
 from prompts.prompts import ask_user
 from const.llm import END_RESPONSE
+from helpers.cli import running_processes


 class AgentConvo:
@@ -49,6 +51,7 @@ class AgentConvo:
        # craft message
        self.construct_and_add_message_from_prompt(prompt_path, prompt_data)

+        # TODO: move this if block (and the other below) to Developer agent - https://github.com/Pythagora-io/gpt-pilot/issues/91#issuecomment-1751964079
        # check if we already have the LLM response saved
        if self.agent.__class__.__name__ == 'Developer':
            self.agent.project.llm_req_num += 1
@@ -78,7 +81,8 @@ class AgentConvo:
                save_development_step(self.agent.project, prompt_path, prompt_data, self.messages, '', str(e))
                raise e

-            if self.agent.__class__.__name__ == 'Developer':
+            # TODO: move this code to Developer agent - https://github.com/Pythagora-io/gpt-pilot/issues/91#issuecomment-1751964079
+            if response != {} and self.agent.__class__.__name__ == 'Developer':
                development_step = save_development_step(self.agent.project, prompt_path, prompt_data, self.messages, response)

        # TODO handle errors from OpenAI
@@ -89,22 +93,7 @@ class AgentConvo:
            raise Exception("OpenAI API error happened.")

        response = parse_agent_response(response, function_calls)
-
-        # TODO remove this once the database is set up properly
-        message_content = response[0] if type(response) == tuple else response
-        if isinstance(message_content, list):
-            if 'to_message' in function_calls:
-                string_response = function_calls['to_message'](message_content)
-            elif len(message_content) > 0 and isinstance(message_content[0], dict):
-                string_response = [
-                    f'#{i}\n' + array_of_objects_to_string(d)
-                    for i, d in enumerate(message_content)
-                ]
-            else:
-                string_response = ['- ' + r for r in message_content]
-
-            message_content = '\n'.join(string_response)
-        # TODO END
+        message_content = self.format_message_content(response, function_calls)

        # TODO we need to specify the response when there is a function called
        # TODO maybe we can have a specific function that creates the GPT response from the function call
@@ -114,6 +103,33 @@ class AgentConvo:

        return response

+    def format_message_content(self, response, function_calls):
+        # TODO remove this once the database is set up properly
+        if isinstance(response, str):
+            return response
+        else:
+            # string_response = []
+            # for key, value in response.items():
+            #     string_response.append(f'# {key}')
+            #
+            #     if isinstance(value, list):
+            #         if 'to_message' in function_calls:
+            #             string_response.append(function_calls['to_message'](value))
+            #         elif len(value) > 0 and isinstance(value[0], dict):
+            #             string_response.extend([
+            #                 f'##{i}\n' + array_of_objects_to_string(d)
+            #                 for i, d in enumerate(value)
+            #             ])
+            #         else:
+            #             string_response.extend(['- ' + r for r in value])
+            #     else:
+            #         string_response.append(str(value))
+            #
+            # return '\n'.join(string_response)
+            return json.dumps(response)
+        # TODO END
+
+
    def continuous_conversation(self, prompt_path, prompt_data, function_calls=None):
        """
        Conducts a continuous conversation with the agent.
@@ -197,6 +213,18 @@ class AgentConvo:
            print(f"\n{content}\n", type='local')
        logger.info(f"{print_msg}: {content}\n")

+    def to_context_prompt(self):
+        logger.info(f'to_context_prompt({self.agent.project.current_step})')
+
+        # TODO: get dependencies & versions from the project (package.json, requirements.txt, pom.xml, etc.)
+        # Ideally, the LLM could do this, and we update it on load & whenever the file changes
+        # ...or LLM generates a script for `.gpt-pilot/get_dependencies` that we run
+        # https://github.com/Pythagora-io/gpt-pilot/issues/189
+        return get_prompt('development/context.prompt', {
+            'directory_tree': self.agent.project.get_directory_tree(),
+            'running_processes': running_processes,
+        })
+
    def to_playground(self):
        with open('const/convert_to_playground_convo.js', 'r', encoding='utf-8') as file:
            content = file.read()
--- a/pilot/helpers/Debugger.py
+++ b/pilot/helpers/Debugger.py
@@ -5,6 +5,7 @@ from const.code_execution import MAX_COMMAND_DEBUG_TRIES, MAX_RECUSION_LAYER
 from const.function_calls import DEBUG_STEPS_BREAKDOWN
 from helpers.exceptions.TokenLimitError import TokenLimitError
 from helpers.exceptions.TooDeepRecursionError import TooDeepRecursionError
+from logger.logger import logger


 class Debugger:
@@ -41,21 +42,25 @@ class Debugger:

            convo.load_branch(function_uuid)

-            debugging_plan = convo.send_message('dev_ops/debug.prompt',
+            llm_response = convo.send_message('dev_ops/debug.prompt',
                {
                    'command': command['command'] if command is not None else None,
                    'user_input': user_input,
                    'issue_description': issue_description,
-                    'os': platform.system()
+                    'os': platform.system(),
+                    'context': convo.to_context_prompt()
                },
                DEBUG_STEPS_BREAKDOWN)

+            logger.info('Thoughts: ' + llm_response['thoughts'])
+            logger.info('Reasoning: ' + llm_response['reasoning'])
+
            try:
                # TODO refactor to nicely get the developer agent
                response = self.agent.project.developer.execute_task(
                    convo,
-                    debugging_plan,
-                    command,
+                    llm_response['steps'],
+                    test_command=command,
                    test_after_code_changes=True,
                    continue_development=False,
                    is_root_task=is_root_task)
--- a/pilot/helpers/Project.py
+++ b/pilot/helpers/Project.py
@@ -23,8 +23,9 @@ from utils.dot_gpt_pilot import DotGptPilot


 class Project:
-    def __init__(self, args, name=None, description=None, user_stories=None, user_tasks=None, architecture=None,
-                 development_plan=None, current_step=None, ipc_client_instance=None, enable_dot_pilot_gpt=True):
+    def __init__(self, args, name=None, project_description=None, clarifications=None, user_stories=None,
+                 user_tasks=None, architecture=None, development_plan=None, current_step=None, ipc_client_instance=None,
+                 enable_dot_pilot_gpt=True):
        """
        Initialize a project.

@@ -56,20 +57,14 @@ class Project:

        # self.restore_files({dev_step_id_to_start_from})

-        if current_step is not None:
-            self.current_step = current_step
-        if name is not None:
-            self.name = name
-        if description is not None:
-            self.description = description
-        if user_stories is not None:
-            self.user_stories = user_stories
-        if user_tasks is not None:
-            self.user_tasks = user_tasks
-        if architecture is not None:
-            self.architecture = architecture
-        # if development_plan is not None:
-        #     self.development_plan = development_plan
+        self.current_step = current_step
+        self.name = name
+        self.project_description = project_description
+        self.clarifications = clarifications
+        self.user_stories = user_stories
+        self.user_tasks = user_tasks
+        self.architecture = architecture
+        self.development_plan = development_plan
        self.dot_pilot_gpt = DotGptPilot(log_chat_completions=enable_dot_pilot_gpt)

    def set_root_path(self, root_path: str):
@@ -81,27 +76,19 @@ class Project:
        Start the project.
        """
        self.project_manager = ProductOwner(self)
-        print(json.dumps({
-            "project_stage": "project_description"
-        }), type='info')
        self.project_manager.get_project_description()
-        print(json.dumps({
-            "project_stage": "user_stories"
-        }), type='info')
-        self.user_stories = self.project_manager.get_user_stories()
+
+        self.project_manager.get_user_stories()
        # self.user_tasks = self.project_manager.get_user_tasks()

-        print(json.dumps({
-            "project_stage": "architecture"
-        }), type='info')
        self.architect = Architect(self)
-        self.architecture = self.architect.get_architecture()
+        self.architect.get_architecture()

        self.developer = Developer(self)
        self.developer.set_up_environment()

        self.tech_lead = TechLead(self)
-        self.development_plan = self.tech_lead.create_development_plan()
+        self.tech_lead.create_development_plan()

        # TODO move to constructor eventually
        if self.args['step'] is not None and STEPS.index(self.args['step']) < STEPS.index('coding'):
@@ -141,6 +128,9 @@ class Project:
        self.developer.start_coding()

    def finish(self):
+        """
+        Finish the project.
+        """
        update_app_status(self.args['app_id'], STEPS[-1])
        # TODO say that project is finished and ask user for additional features, fixes,...
        return
@@ -155,11 +145,12 @@ class Project:
        Returns:
            dict: The directory tree.
        """
-        files = {}
-        if with_descriptions and False:
-            files = File.select().where(File.app_id == self.args['app_id'])
-            files = {snapshot.name: snapshot for snapshot in files}
-        return build_directory_tree(self.root_path + '/', ignore=IGNORE_FOLDERS, files=files, add_descriptions=False)
+        # files = {}
+        # if with_descriptions and False:
+        #     files = File.select().where(File.app_id == self.args['app_id'])
+        #     files = {snapshot.name: snapshot for snapshot in files}
+        # return build_directory_tree_with_descriptions(self.root_path, ignore=IGNORE_FOLDERS, files=files, add_descriptions=False)
+        return build_directory_tree(self.root_path, ignore=IGNORE_FOLDERS)

    def get_test_directory_tree(self):
        """
--- a/pilot/helpers/agents/Architect.py
+++ b/pilot/helpers/agents/Architect.py
@@ -19,37 +19,44 @@ class Architect(Agent):
        self.convo_architecture = None

    def get_architecture(self):
+        print(json.dumps({
+            "project_stage": "architecture"
+        }), type='info')
+
        self.project.current_step = ARCHITECTURE_STEP

        # If this app_id already did this step, just get all data from DB and don't ask user again
        step = get_progress_steps(self.project.args['app_id'], ARCHITECTURE_STEP)
        if step and not should_execute_step(self.project.args['step'], ARCHITECTURE_STEP):
            step_already_finished(self.project.args, step)
-            return step['architecture']
+            self.project.architecture = step['architecture']
+            return

        # ARCHITECTURE
        print(color_green_bold("Planning project architecture...\n"))
        logger.info("Planning project architecture...")

        self.convo_architecture = AgentConvo(self)
-        architecture = self.convo_architecture.send_message('architecture/technologies.prompt',
+        llm_response = self.convo_architecture.send_message('architecture/technologies.prompt',
            {'name': self.project.args['name'],
             'prompt': self.project.project_description,
+             'clarifications': self.project.clarifications,
             'user_stories': self.project.user_stories,
-            #  'user_tasks': self.project.user_tasks,
+             'user_tasks': self.project.user_tasks,
             'app_type': self.project.args['app_type']}, ARCHITECTURE)
+        self.project.architecture = llm_response['technologies']

        # TODO: Project.args should be a defined class so that all of the possible args are more obvious
        if self.project.args.get('advanced', False):
-            architecture = get_additional_info_from_user(self.project, architecture, 'architect')
+            self.project.architecture = get_additional_info_from_user(self.project, self.project.architecture, 'architect')

-        logger.info(f"Final architecture: {architecture}")
+        logger.info(f"Final architecture: {self.project.architecture}")

        save_progress(self.project.args['app_id'], self.project.current_step, {
            "messages": self.convo_architecture.messages,
-            "architecture": architecture,
+            "architecture": self.project.architecture,
            "app_data": generate_app_data(self.project.args)
        })

-        return architecture
+        return
        # ARCHITECTURE END
--- a/pilot/helpers/agents/CodeMonkey.py
+++ b/pilot/helpers/agents/CodeMonkey.py
@@ -19,7 +19,7 @@ class CodeMonkey(Agent):
        #     "finished_steps": ', '.join(f"#{j}" for j in range(step_index))
        # }, GET_FILES)

-        changes = convo.send_message('development/implement_changes.prompt', {
+        llm_response = convo.send_message('development/implement_changes.prompt', {
            "step_description": code_changes_description,
            "step_index": step_index,
            "directory_tree": self.project.get_directory_tree(True),
@@ -27,6 +27,8 @@ class CodeMonkey(Agent):
        }, IMPLEMENT_CHANGES)
        convo.remove_last_x_messages(1)

+        changes = llm_response['files']
+
        if self.project.skip_until_dev_step != str(self.project.checkpoints['last_development_step'].id):
            for file_data in changes:
                self.project.save_file(file_data)
--- a/pilot/helpers/agents/Developer.py
+++ b/pilot/helpers/agents/Developer.py
@@ -62,18 +62,22 @@ class Developer(Agent):
            "name": self.project.args['name'],
            "app_type": self.project.args['app_type'],
            "app_summary": self.project.project_description,
-            "clarification": [],
+            "clarifications": self.project.clarifications,
            "user_stories": self.project.user_stories,
-            # "user_tasks": self.project.user_tasks,
+            "user_tasks": self.project.user_tasks,
            "technologies": self.project.architecture,
-            "array_of_objects_to_string": array_of_objects_to_string,
+            "array_of_objects_to_string": array_of_objects_to_string,  # TODO check why is this here
            "directory_tree": self.project.get_directory_tree(True),
            "current_task_index": i,
            "development_tasks": self.project.development_plan,
            "files": self.project.get_all_coded_files(),
        })

-        task_steps = convo_dev_task.send_message('development/parse_task.prompt', {}, IMPLEMENT_TASK)
+        response = convo_dev_task.send_message('development/parse_task.prompt', {
+            'running_processes': running_processes,
+            'os': platform.system(),
+        }, IMPLEMENT_TASK)
+        task_steps = response['tasks']
        convo_dev_task.remove_last_x_messages(2)
        return self.execute_task(convo_dev_task, task_steps, development_task=development_task, continue_development=True, is_root_task=True)

@@ -107,12 +111,12 @@ class Developer(Agent):
        # TODO END
        additional_message = 'Let\'s start with the step #0:\n\n' if i == 0 else f'So far, steps { ", ".join(f"#{j}" for j in range(i)) } are finished so let\'s do step #{i + 1} now.\n\n'

-        process_name = data['process_name'] if 'process_name' in data else None
+        command_id = data['command_id'] if 'command_id' in data else None
        success_message = data['success_message'] if 'success_message' in data else None

        return run_command_until_success(convo, data['command'],
                                         timeout=data['timeout'],
-                                         process_name=process_name,
+                                         command_id=command_id,
                                         success_message=success_message,
                                         additional_message=additional_message)

@@ -140,7 +144,7 @@ class Developer(Agent):
                cbs={
                    'r': lambda conv: run_command_until_success(conv,
                                                                self.run_command,
-                                                                process_name='app',
+                                                                command_id='app',
                                                                timeout=None,
                                                                force=True,
                                                                return_cli_response=True)
@@ -151,19 +155,22 @@ class Developer(Agent):
                continue

            if response['user_input'] != 'continue':
-                return_value = self.debugger.debug(convo, user_input=response['user_input'], issue_description=step['human_intervention_description'])
+                return_value = self.debugger.debug(convo,
+                                                   user_input=response['user_input'],
+                                                   issue_description=step['human_intervention_description'])
                return_value['user_input'] = response['user_input']
                return return_value
            else:
                return response

    def step_test(self, convo, test_command):
-        should_rerun_command = convo.send_message('dev_ops/should_rerun_command.prompt',
-            test_command)
+        should_rerun_command = convo.send_message('dev_ops/should_rerun_command.prompt', test_command)
        if should_rerun_command == 'NO':
            return { "success": True }
        elif should_rerun_command == 'YES':
-            cli_response, llm_response = execute_command_and_check_cli_response(test_command['command'], test_command['timeout'], convo)
+            cli_response, llm_response = execute_command_and_check_cli_response(test_command['command'],
+                                                                                test_command['timeout'],
+                                                                                convo)
            logger.info('After running command llm_response: ' + llm_response)
            if llm_response == 'NEEDS_DEBUGGING':
                print(color_red('Got incorrect CLI response:'))
@@ -261,6 +268,7 @@ class Developer(Agent):
        convo.save_branch(function_uuid)

        for (i, step) in enumerate(task_steps):
+            logger.info('---------- execute_task() step #%d: %s', i, step)

            result = None
            step_implementation_try = 0
@@ -279,7 +287,10 @@ class Developer(Agent):
                    elif step['type'] == 'human_intervention':
                        result = self.step_human_intervention(convo, step)

+                    logger.info('  result: %s', result)
+
                    if test_command is not None and ('check_if_fixed' not in step or step['check_if_fixed']):
+                        logger.info('check_if_fixed: %s', test_command)
                        is_fixed = self.step_test(convo, test_command)
                        return is_fixed

@@ -325,7 +336,7 @@ class Developer(Agent):
            response = self.project.ask_for_human_intervention(
                user_description,
                cbs={'r': lambda convo: run_command_until_success(convo, self.run_command,
-                                                                  process_name='app',
+                                                                  command_id='app',
                                                                  timeout=None,
                                                                  force=True,
                                                                  return_cli_response=True, is_root_task=True)},
@@ -343,11 +354,11 @@ class Developer(Agent):
                    "name": self.project.args['name'],
                    "app_type": self.project.args['app_type'],
                    "app_summary": self.project.project_description,
-                    "clarification": [],
+                    "clarifications": self.project.clarifications,
                    "user_stories": self.project.user_stories,
-                    # "user_tasks": self.project.user_tasks,
+                    "user_tasks": self.project.user_tasks,
                    "technologies": self.project.architecture,
-                    "array_of_objects_to_string": array_of_objects_to_string,
+                    "array_of_objects_to_string": array_of_objects_to_string,  # TODO check why is this here
                    "directory_tree": self.project.get_directory_tree(True),
                    "files": self.project.get_all_coded_files(),
                    "user_input": user_feedback,
@@ -355,11 +366,13 @@ class Developer(Agent):

                # self.debugger.debug(iteration_convo, user_input=user_feedback)

-                task_steps = iteration_convo.send_message('development/parse_task.prompt', {
-                    'running_processes': running_processes
+                llm_response = iteration_convo.send_message('development/parse_task.prompt', {
+                    'running_processes': running_processes,
+                    'os': platform.system(),
                }, IMPLEMENT_TASK)
                iteration_convo.remove_last_x_messages(2)

+                task_steps = llm_response['tasks']
                return self.execute_task(iteration_convo, task_steps, is_root_task=True)


@@ -385,7 +398,7 @@ class Developer(Agent):
        logger.info("Setting up the environment...")

        os_info = get_os_info()
-        os_specific_technologies = self.convo_os_specific_tech.send_message('development/env_setup/specs.prompt',
+        llm_response = self.convo_os_specific_tech.send_message('development/env_setup/specs.prompt',
            {
                "name": self.project.args['name'],
                "app_type": self.project.args['app_type'],
@@ -393,16 +406,18 @@ class Developer(Agent):
                "technologies": self.project.architecture
            }, FILTER_OS_TECHNOLOGIES)

+        os_specific_technologies = llm_response['technologies']
        for technology in os_specific_technologies:
            logger.info('Installing %s', technology)
            llm_response = self.install_technology(technology)

            # TODO: I don't think llm_response would ever be 'DONE'?
            if llm_response != 'DONE':
-                installation_commands = self.convo_os_specific_tech.send_message(
+                llm_response = self.convo_os_specific_tech.send_message(
                    'development/env_setup/unsuccessful_installation.prompt',
                    {'technology': technology},
                    EXECUTE_COMMANDS)
+                installation_commands = llm_response['commands']

                if installation_commands is not None:
                    for cmd in installation_commands:
@@ -421,7 +436,7 @@ class Developer(Agent):
    # TODO: This is only called from the unreachable section of set_up_environment()
    def install_technology(self, technology):
        # TODO move the functions definitions to function_calls.py
-        cmd, timeout_val = self.convo_os_specific_tech.send_message(
+        llm_response = self.convo_os_specific_tech.send_message(
            'development/env_setup/install_next_technology.prompt',
            {'technology': technology}, {
                'definitions': [{
@@ -447,21 +462,26 @@ class Developer(Agent):
                }
            })

+        cmd = llm_response['command']
+        timeout_val = llm_response['timeout']
        cli_response, llm_response = execute_command_and_check_cli_response(cmd, timeout_val, self.convo_os_specific_tech)

        return llm_response

    def test_code_changes(self, code_monkey, convo):
        logger.info('Testing code changes...')
-        test_type, description = convo.send_message('development/task/step_check.prompt', {}, GET_TEST_TYPE)
+        llm_response = convo.send_message('development/task/step_check.prompt', {}, GET_TEST_TYPE)
+        test_type = llm_response['type']

        if test_type == 'command_test':
-            return run_command_until_success(convo, description['command'], timeout=description['timeout'])
+            command = llm_response['command']
+            return run_command_until_success(convo, command['command'], timeout=command['timeout'])
        elif test_type == 'automated_test':
            # TODO get code monkey to implement the automated test
            pass
        elif test_type == 'manual_test':
            # TODO make the message better
+            description = llm_response['manual_test_description']
            response = self.project.ask_for_human_intervention(
                'I need your help. Can you please test if this was successful?',
                description,
@@ -479,7 +499,7 @@ class Developer(Agent):
        logger.info('Implementing %s step #%d: %s', type, step_index, description)
        # TODO remove hardcoded folder path
        directory_tree = self.project.get_directory_tree(True)
-        step_details = convo.send_message('development/task/next_step.prompt', {
+        llm_response = convo.send_message('development/task/next_step.prompt', {
            'finished_steps': [],
            'step_description': description,
            'step_type': type,
@@ -487,6 +507,8 @@ class Developer(Agent):
            'step_index': step_index
        }, EXECUTE_COMMANDS)

+        step_details = llm_response['commands']
+
        if type == 'COMMAND':
            for cmd in step_details:
                run_command_until_success(convo, cmd['command'], timeout=cmd['timeout'])
--- a/pilot/helpers/agents/ProductOwner.py
+++ b/pilot/helpers/agents/ProductOwner.py
@@ -20,6 +20,10 @@ class ProductOwner(Agent):
        super().__init__('product_owner', project)

    def get_project_description(self):
+        print(json.dumps({
+            "project_stage": "project_description"
+        }), type='info')
+
        self.project.app = get_app(self.project.args['app_id'], error_if_not_found=False)

        # If this app_id already did this step, just get all data from DB and don't ask user again
@@ -75,6 +79,13 @@ class ProductOwner(Agent):
        # PROJECT DESCRIPTION END

    def get_user_stories(self):
+        if not self.project.args.get('advanced', False):
+            return
+
+        print(json.dumps({
+            "project_stage": "user_stories"
+        }), type='info')
+
        self.project.current_step = USER_STORIES_STEP
        self.convo_user_stories = AgentConvo(self)

@@ -83,7 +94,8 @@ class ProductOwner(Agent):
        if step and not should_execute_step(self.project.args['step'], USER_STORIES_STEP):
            step_already_finished(self.project.args, step)
            self.convo_user_stories.messages = step['messages']
-            return step['user_stories']
+            self.project.user_stories = step['user_stories']
+            return

        # USER STORIES
        msg = "User Stories:\n"
@@ -106,7 +118,7 @@ class ProductOwner(Agent):
            "app_data": generate_app_data(self.project.args)
        })

-        return self.project.user_stories
+        return
        # USER STORIES END

    def get_user_tasks(self):
--- a/pilot/helpers/agents/TechLead.py
+++ b/pilot/helpers/agents/TechLead.py
@@ -29,28 +29,30 @@ class TechLead(Agent):
        step = get_progress_steps(self.project.args['app_id'], DEVELOPMENT_PLANNING_STEP)
        if step and not should_execute_step(self.project.args['step'], DEVELOPMENT_PLANNING_STEP):
            step_already_finished(self.project.args, step)
-            return step['development_plan']
+            self.project.development_plan = step['development_plan']
+            return
        
        # DEVELOPMENT PLANNING
        print(color_green_bold("Starting to create the action plan for development...\n"))
        logger.info("Starting to create the action plan for development...")

        # TODO add clarifications
-        self.development_plan = self.convo_development_plan.send_message('development/plan.prompt',
+        llm_response = self.convo_development_plan.send_message('development/plan.prompt',
            {
                "name": self.project.args['name'],
                "app_type": self.project.args['app_type'],
                "app_summary": self.project.project_description,
-                "clarification": [],
+                "clarifications": self.project.clarifications,
                "user_stories": self.project.user_stories,
-                # "user_tasks": self.project.user_tasks,
+                "user_tasks": self.project.user_tasks,
                "technologies": self.project.architecture
            }, DEVELOPMENT_PLAN)
+        self.project.development_plan = llm_response['plan']

        logger.info('Plan for development is created.')

        save_progress(self.project.args['app_id'], self.project.current_step, {
-            "development_plan": self.development_plan, "app_data": generate_app_data(self.project.args)
+            "development_plan": self.project.development_plan, "app_data": generate_app_data(self.project.args)
        })

-        return self.development_plan
+        return
--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -54,12 +54,12 @@ class TestCodeMonkey:
            convo = MagicMock()
            mock_responses = [
                # [],
-                [{
+                {'files': [{
                    'content': 'Washington',
                    'description': "A new .txt file with the word 'Washington' in it.",
                    'name': 'washington.txt',
                    'path': 'washington.txt'
-                }]
+                }]}
            ]
            convo.send_message.side_effect = mock_responses

@@ -94,12 +94,12 @@ class TestCodeMonkey:
            convo = MagicMock()
            mock_responses = [
                # ['file_to_read.txt', 'output.txt'],
-                [{
+                {'files': [{
                    'content': 'Hello World!\n',
                    'description': 'This file is the output file. The content of file_to_read.txt is copied into this file.',
                    'name': 'output.txt',
                    'path': 'output.txt'
-                }]
+                }]}
            ]
            convo.send_message.side_effect = mock_responses

--- a/pilot/helpers/agents/test_Developer.py
+++ b/pilot/helpers/agents/test_Developer.py
@@ -2,7 +2,7 @@ import builtins
 import json
 import os
 import pytest
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock

 import requests

@@ -12,8 +12,8 @@ load_dotenv()

 from main import get_custom_print
 from .Developer import Developer, ENVIRONMENT_SETUP_STEP
-from helpers.Project import Project
 from test.mock_questionary import MockQuestionary
+from helpers.test_Project import create_project


 class TestDeveloper:
@@ -21,18 +21,12 @@ class TestDeveloper:
        builtins.print, ipc_client_instance = get_custom_print({})

        name = 'TestDeveloper'
-        self.project = Project({
-                'app_id': 'test-developer',
-                'name': name,
-                'app_type': ''
-            },
-            name=name,
-            architecture=[],
-            user_stories=[]
-        )
-
+        self.project = create_project()
+        self.project.app_id = 'test-developer'
+        self.project.name = name
        self.project.set_root_path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                                              '../../../workspace/TestDeveloper')))
+
        self.project.technologies = []
        self.project.current_step = ENVIRONMENT_SETUP_STEP
        self.developer = Developer(self.project)
@@ -53,7 +47,35 @@ class TestDeveloper:

        # Then
        assert llm_response == 'DONE'
-        mock_execute_command.assert_called_once_with(self.project, 'python --version', 10)
+        mock_execute_command.assert_called_once_with(self.project, 'python --version', timeout=10)
+
+    @patch('helpers.AgentConvo.get_saved_development_step')
+    @patch('helpers.AgentConvo.save_development_step')
+    @patch('helpers.AgentConvo.create_gpt_chat_completion',
+           return_value={'text': '{"tasks": [{"command": "ls -al"}]}'})
+    def test_implement_task(self, mock_completion, mock_save, mock_get_saved_step):
+        # Given any project
+        project = create_project()
+        project.project_description = 'Test Project'
+        project.development_plan = [{
+            'description': 'Do stuff',
+            'user_review_goal': 'Do stuff',
+        }]
+        project.get_all_coded_files = lambda: []
+        project.current_step = 'test'
+
+        # and a developer who will execute any task
+        developer = Developer(project)
+        developer.execute_task = MagicMock()
+        developer.execute_task.return_value = 'DONE'
+
+        # When
+        llm_response = developer.implement_task(0, {'description': 'Do stuff'})
+
+        # Then we parse the response correctly and send list of steps to execute_task()
+        assert llm_response == 'DONE'
+        assert developer.execute_task.call_count == 1
+        developer.execute_task.call_args[0][1] == [{'command': 'ls -al'}]

    @patch('helpers.AgentConvo.get_saved_development_step')
    @patch('helpers.AgentConvo.save_development_step')
@@ -113,7 +135,7 @@ class TestDeveloper:

        mock_chat_completion.side_effect = [
            {'text': '{"type": "manual_test", "manual_test_description": "Does it look good?"}'},
-            {'text': '{"steps": [{"type": "command", "command": {"command": "something scary", "timeout": 3000}, "check_if_fixed": true}]}'},
+            {'text': '{"thoughts": "hmmm...", "reasoning": "testing", "steps": [{"type": "command", "command": {"command": "something scary", "timeout": 3000}, "check_if_fixed": true}]}'},
            {'text': 'do something else scary'},
        ]

--- a/pilot/helpers/cli.py
+++ b/pilot/helpers/cli.py
@@ -18,7 +18,7 @@ from const.code_execution import MIN_COMMAND_RUN_TIME, MAX_COMMAND_RUN_TIME, MAX
 interrupted = False

 running_processes: Dict[str, tuple[str, int]] = {}
-"""Holds a list of (command, process ID)s, mapped to the `process_name` provided in the call to `execute_command()`."""
+"""Holds a list of (command, process ID)s, mapped to the `command_id` provided in the call to `execute_command()`."""


 def enqueue_output(out, q):
@@ -72,14 +72,14 @@ def run_command(command, root_path, q_stdout, q_stderr) -> subprocess.Popen:
    return process


-def terminate_named_process(process_name: str) -> None:
-    if process_name in running_processes:
-        terminate_process(running_processes[process_name][1], process_name)
+def terminate_named_process(command_id: str) -> None:
+    if command_id in running_processes:
+        terminate_process(running_processes[command_id][1], command_id)


 def terminate_running_processes():
-    for process_name in list(running_processes.keys()):
-        terminate_process(running_processes[process_name][1], process_name)
+    for command_id in list(running_processes.keys()):
+        terminate_process(running_processes[command_id][1], command_id)


 def terminate_process(pid: int, name=None) -> None:
@@ -99,12 +99,12 @@ def terminate_process(pid: int, name=None) -> None:
        except OSError as e:
            logger.error(f'Error while terminating process: {e}')

-    for process_name in list(running_processes.keys()):
-        if running_processes[process_name][1] == pid:
-            del running_processes[process_name]
+    for command_id in list(running_processes.keys()):
+        if running_processes[command_id][1] == pid:
+            del running_processes[command_id]


-def execute_command(project, command, timeout=None, success_message=None, process_name: str = None, force=False) \
+def execute_command(project, command, timeout=None, success_message=None, command_id: str = None, force=False) \
        -> (str, str, int):
    """
    Execute a command and capture its output.
@@ -114,8 +114,7 @@ def execute_command(project, command, timeout=None, success_message=None, proces
        command (str): The command to run.
        timeout (int, optional): The maximum execution time in milliseconds. Default is None.
        success_message: A message to look for in the output of the command to determine if successful or not.
-        process_name (str, optional): A name for the process.
-                            If `timeout` is not provided, can be used to terminate the process.
+        command_id (str, optional): A unique identifier assigned by the LLM, can be used to terminate the process.
        force (bool, optional): Whether to execute the command without confirmation. Default is False.

    Returns:
@@ -178,9 +177,9 @@ def execute_command(project, command, timeout=None, success_message=None, proces
    q = queue.Queue()
    process = run_command(command, project.root_path, q, q_stderr)

-    if process_name is not None:
-        terminate_named_process(process_name)
-        running_processes[process_name] = (command, process.pid)
+    if command_id is not None:
+        terminate_named_process(command_id)
+        running_processes[command_id] = (command, process.pid)

    output = ''
    stderr_output = ''
@@ -200,6 +199,7 @@ def execute_command(project, command, timeout=None, success_message=None, proces

            # Check if process has finished
            if process.poll() is not None:
+                logger.info('process exited with return code: %d', process.returncode)
                # Get remaining lines from the queue
                time.sleep(0.1)  # TODO this shouldn't be used
                while not q.empty():
@@ -212,8 +212,8 @@ def execute_command(project, command, timeout=None, success_message=None, proces

            # If timeout is reached, kill the process
            if timeout is not None and elapsed_time * 1000 > timeout:
-                if process_name is not None:
-                    logger.info(f'Process "{process_name}" running after timeout as pid: {process.pid}')
+                if command_id is not None:
+                    logger.info(f'Process "{command_id}" running after timeout as pid: {process.pid}')
                    break

                raise TimeoutError("Command exceeded the specified timeout.")
@@ -274,7 +274,53 @@ def execute_command(project, command, timeout=None, success_message=None, proces
    return return_value, 'DONE' if was_success else None, process.returncode


-def build_directory_tree(path, prefix="", ignore=None, is_last=False, files=None, add_descriptions=False):
+def build_directory_tree(path, prefix='', is_root=True, ignore=None):
+    """Build the directory tree structure in a simplified format.
+
+    Args:
+    - path: The starting directory path.
+    - prefix: Prefix for the current item, used for recursion.
+    - is_root: Flag to indicate if the current item is the root directory.
+    - ignore: a list of directories to ignore
+
+    Returns:
+    - A string representation of the directory tree.
+    """
+    output = ""
+    indent = '  '
+
+    if os.path.isdir(path):
+        dir_name = os.path.basename(path)
+        if is_root:
+            output += '/'
+        else:
+            output += f'{prefix}/{dir_name}'
+
+        # List items in the directory
+        items = os.listdir(path)
+        dirs = [item for item in items if os.path.isdir(os.path.join(path, item)) and item not in ignore]
+        files = [item for item in items if os.path.isfile(os.path.join(path, item))]
+        dirs.sort()
+        files.sort()
+
+        if dirs:
+            output += '\n'
+            for index, dir_item in enumerate(dirs):
+                item_path = os.path.join(path, dir_item)
+                output += build_directory_tree(item_path, prefix + indent, is_root=False, ignore=ignore)
+
+            if files:
+                output += f"{prefix}  {', '.join(files)}\n"
+
+        elif files:
+            output += f": {', '.join(files)}\n"
+        else:
+            output += '\n'
+
+    return output
+
+
+def build_directory_tree_with_descriptions(path, prefix="", ignore=None, is_last=False, files=None):
    """Build the directory tree structure in tree-like format.

    Args:
@@ -297,17 +343,19 @@ def build_directory_tree(path, prefix="", ignore=None, is_last=False, files=None

    if os.path.isdir(path):
        # It's a directory, add its name to the output and then recurse into it
-        output += prefix + "|-- " + os.path.basename(path) + ((' - ' + files[os.path.basename(path)].description + ' ' if files and os.path.basename(path) in files and add_descriptions else '')) + "/\n"
+        output += prefix + "|-- " + os.path.basename(path) + \
+                  ((' - ' + files[os.path.basename(path)].description + ' ' if files and os.path.basename(path) in files else '')) + "/\n"

        # List items in the directory
        items = os.listdir(path)
        for index, item in enumerate(items):
            item_path = os.path.join(path, item)
-            output += build_directory_tree(item_path, prefix + indent, ignore, index == len(items) - 1, files, add_descriptions)
+            output += build_directory_tree_with_descriptions(item_path, prefix + indent, ignore, index == len(items) - 1, files)

    else:
        # It's a file, add its name to the output
-        output += prefix + "|-- " + os.path.basename(path) + ((' - ' + files[os.path.basename(path)].description + ' ' if files and os.path.basename(path) in files and add_descriptions else '')) + "\n"
+        output += prefix + "|-- " + os.path.basename(path) + \
+                  ((' - ' + files[os.path.basename(path)].description + ' ' if files and os.path.basename(path) in files else '')) + "\n"

    return output

@@ -339,7 +387,7 @@ def execute_command_and_check_cli_response(command, timeout, convo):

 def run_command_until_success(convo, command,
                              timeout: Union[int, None],
-                              process_name: Union[str, None] = None,
+                              command_id: Union[str, None] = None,
                              success_message=None,
                              additional_message=None,
                              force=False,
@@ -352,7 +400,7 @@ def run_command_until_success(convo, command,
        convo (AgentConvo): The conversation object.
        command (str): The command to run.
        timeout (int): The maximum execution time in milliseconds.
-        process_name: A name for the process.
+        command_id: A name for the process.
                      If `timeout` is not provided, can be used to terminate the process.
        success_message: A message to look for in the output of the command to determine if successful or not.
        additional_message (str, optional): Additional message to include in the response.
@@ -364,7 +412,7 @@ def run_command_until_success(convo, command,
                                                        command,
                                                        timeout=timeout,
                                                        success_message=success_message,
-                                                        process_name=process_name,
+                                                        command_id=command_id,
                                                        force=force)

    if response is None:
@@ -394,7 +442,12 @@ def run_command_until_success(convo, command,
                # This catch is necessary to return the correct value (cli_response) to continue development function so
                # the developer can debug the appropriate issue
                # this snippet represents the first entry point into debugging recursion because of return_cli_response
-                return convo.agent.debugger.debug(convo, {'command': command, 'timeout': timeout})
+                return convo.agent.debugger.debug(convo, {
+                    'command': command,
+                    'timeout': timeout,
+                    'command_id': command_id,
+                    'success_message': success_message,
+                })
            except TooDeepRecursionError as e:
                # this is only to put appropriate message in the response after TooDeepRecursionError is raised
                raise TooDeepRecursionError(cli_response) if return_cli_response else e
--- a/pilot/helpers/files.py
+++ b/pilot/helpers/files.py
@@ -21,6 +21,7 @@ def get_files_content(directory, ignore=[]):
        dirs[:] = [d for d in dirs if d not in ignore]

        for file in files:
+            # TODO: avoid sharing `.env` etc
            if file in ignore:
                continue

--- a/pilot/helpers/test_AgentConvo.py
+++ b/pilot/helpers/test_AgentConvo.py
@@ -0,0 +1,66 @@
+import builtins
+import os.path
+from dotenv import load_dotenv
+from database.database import database
+from const.function_calls import IMPLEMENT_TASK
+from helpers.agents.Developer import Developer
+from helpers.AgentConvo import AgentConvo
+from utils.custom_print import get_custom_print
+from .test_Project import create_project
+
+load_dotenv()
+
+builtins.print, ipc_client_instance = get_custom_print({})
+
+
+# def test_format_message_content_json_response():
+#     # Given
+#     project = create_project()
+#     project.current_step = 'test'
+#     developer = Developer(project)
+#     convo = AgentConvo(developer)
+#
+#     response = {
+#         'files': [
+#             {
+#                 'name': 'package.json',
+#                 'path': '/package.json',
+#                 'content': '{\n  "name": "complex_app",\n  "version": "1.0.0",\n  "description": "",\n  "main": "index.js",\n  "directories": {\n    "test": "tests"\n  },\n  "scripts": {\n    "test": "echo \\"Error: no test specified\\" && exit 1",\n    "start": "node index.js"\n  },\n  "keywords": [],\n  "author": "",\n  "license": "ISC",\n  "dependencies": {\n    "axios": "^1.5.1",\n    "express": "^4.18.2",\n    "mongoose": "^7.6.1",\n    "socket.io": "^4.7.2"\n  },\n  "devDependencies": {\n    "nodemon": "^3.0.1"\n  }\n}'
+#             }
+#         ]
+#     }
+#
+#     # When
+#     message_content = convo.format_message_content(response, IMPLEMENT_TASK)
+#
+#     # Then
+#     assert message_content == '''
+# # files
+# ##0
+# name: package.json
+# path: /package.json
+# content: {
+#   "name": "complex_app",
+#   "version": "1.0.0",
+#   "description": "",
+#   "main": "index.js",
+#   "directories": {
+#     "test": "tests"
+#   },
+#   "scripts": {
+#     "test": "echo \\"Error: no test specified\\" && exit 1",
+#     "start": "node index.js"
+#   },
+#   "keywords": [],
+#   "author": "",
+#   "license": "ISC",
+#   "dependencies": {
+#     "axios": "^1.5.1",
+#     "express": "^4.18.2",
+#     "mongoose": "^7.6.1",
+#     "socket.io": "^4.7.2"
+#   },
+#   "devDependencies": {
+#     "nodemon": "^3.0.1"
+#   }
+# }'''.lstrip()
--- a/pilot/helpers/test_Project.py
+++ b/pilot/helpers/test_Project.py
@@ -132,15 +132,30 @@ class TestProjectFileLists:

        # with directories including common.IGNORE_FOLDERS
        src = os.path.join(project.root_path, 'src')
+        foo = os.path.join(project.root_path, 'src/foo')
+        files_no_folders = os.path.join(foo, 'files_no_folders')
        os.makedirs(src, exist_ok=True)
+        os.makedirs(foo, exist_ok=True)
+        os.makedirs(foo + '/empty1', exist_ok=True)
+        os.makedirs(foo + '/empty2', exist_ok=True)
+        os.makedirs(files_no_folders, exist_ok=True)
        for dir in ['.git', '.idea', '.vscode', '__pycache__', 'node_modules', 'venv', 'dist', 'build']:
            os.makedirs(os.path.join(project.root_path, dir), exist_ok=True)

        # ...and files
+
        with open(os.path.join(project.root_path, 'package.json'), 'w') as file:
            json.dump({'name': 'test app'}, file, indent=2)
-        with open(os.path.join(src, 'main.js'), 'w') as file:
-            file.write('console.log("Hello World!");')
+        for path in [
+            os.path.join(src, 'main.js'),
+            os.path.join(src, 'other.js'),
+            os.path.join(foo, 'bar.js'),
+            os.path.join(foo, 'fighters.js'),
+            os.path.join(files_no_folders, 'file1.js'),
+            os.path.join(files_no_folders, 'file2.js'),
+        ]:
+            with open(path, 'w') as file:
+                file.write('console.log("Hello World!");')

        # and a non-empty .gpt-pilot directory
        project.dot_pilot_gpt.write_project(project)
@@ -150,11 +165,17 @@ class TestProjectFileLists:
        tree = self.project.get_directory_tree()

        # Then we should not be including the .gpt-pilot directory or other ignored directories
+        # print('\n' + tree)
        assert tree == '''
-|-- /
-|   |-- package.json
-|   |-- src/
-|   |   |-- main.js
+/
+  /src
+    /foo
+      /empty1
+      /empty2
+      /files_no_folders: file1.js, file2.js
+      bar.js, fighters.js
+    main.js, other.js
+  package.json
 '''.lstrip()

    @patch('helpers.Project.DevelopmentSteps.get_or_create', return_value=('test', True))
@@ -167,6 +188,7 @@ class TestProjectFileLists:
        self.project.save_files_snapshot('test')

        # Then the files should be saved to the project, but nothing from `.gpt-pilot/`
-        assert mock_file.call_count == 2
-        assert mock_file.call_args_list[0][1]['name'] == 'package.json'
-        assert mock_file.call_args_list[1][1]['name'] == 'main.js'
+        assert mock_file.call_count == 7
+        files = ['package.json', 'main.js', 'file1.js', 'file2.js', 'bar.js', 'fighters.js', 'other.js']
+        for i in range(7):
+            assert mock_file.call_args_list[i][1]['name'] in files
--- a/pilot/main.py
+++ b/pilot/main.py
@@ -43,7 +43,15 @@ if __name__ == "__main__":
        if '--api-key' in args:
            os.environ["OPENAI_API_KEY"] = args['--api-key']
        if '--get-created-apps-with-steps' in args:
-            print({ 'db_data': get_created_apps_with_steps() }, type='info')
+            if ipc_client_instance is not None:
+                print({ 'db_data': get_created_apps_with_steps() }, type='info')
+            else:
+                print('----------------------------------------------------------------------------------------')
+                print('app_id                                step                 dev_step  name')
+                print('----------------------------------------------------------------------------------------')
+                print('\n'.join(f"{app['id']}: {app['status']:20}      "
+                                f"{'' if len(app['development_steps']) == 0 else app['development_steps'][-1]['id']:3}"
+                                f"  {app['name']}" for app in get_created_apps_with_steps()))
        elif '--ux-test' in args:
            from test.ux_tests import run_test
            run_test(args['--ux-test'], args)
--- a/pilot/prompts/architecture/technologies.prompt
+++ b/pilot/prompts/architecture/technologies.prompt
@@ -5,6 +5,7 @@ Here is a high level description of "{{ name }}":
 {{ prompt }}
 ```

+{% if clarifications %}
 Here are some additional questions and answers to clarify the apps description:
 ```
 {% for clarification in clarifications %}
@@ -13,20 +14,25 @@ A: {{ clarification.answer }}
 {% endfor %}
 ```

+{% endif %}
+{% if user_stories %}
 Here are user stories that specify how users use "{{ name }}":
 ```
 {% for story in user_stories %}
 - {{ story }}
 {% endfor %}
-```{#
+```

+{% endif %}
+{% if user_tasks %}
 Here are user tasks that specify what users need to do to interact with "{{ name }}":
 ```
 {% for task in user_tasks %}
 - {{ task }}
 {% endfor %}
-```#}
+```

+{% endif %}
 Now, based on the app's description, user stories and user tasks, think step by step and list the names of the technologies that will be used by your development team to create the app "{{ name }}". Do not write any explanations behind your choices but only a list of technologies that will be used.

 You do not need to list any technologies related to automated tests like Jest, Cypress, Mocha, Selenium, etc.
--- a/pilot/prompts/dev_ops/debug.prompt
+++ b/pilot/prompts/dev_ops/debug.prompt
@@ -3,7 +3,9 @@ You wanted me to check this - `{{ issue_description }}` but there was a problem{
 ```
 {{ user_input }}
 ```
-{% endif %}I want you to debug this issue by yourself and I will give you 2 functions that you can use - `run_command` and `implement_code_changes`.
+{% endif -%}
+{{ context }}
+I want you to debug this issue by yourself and I will give you 2 functions that you can use - `run_command` and `implement_code_changes`.

 `run_command` function will run a command on the machine and will return the CLI output to you so you can see what to do next. Note that the command will run on a {{ os }} machine.

--- a/pilot/prompts/development/context.prompt
+++ b/pilot/prompts/development/context.prompt
@@ -0,0 +1,14 @@
+{%- if directory_tree %}
+The project directory tree looks like:
+
+{{ directory_tree }}
+{% endif -%}
+{% if running_processes -%}
+Note that the following processes are already running:
+
+{%- for key, data in running_processes.items() %}
+
+command_id: {{ key }}
+command: {{ data[0] }}
+{%- endfor -%}
+{%- endif -%}
--- a/pilot/prompts/development/parse_task.prompt
+++ b/pilot/prompts/development/parse_task.prompt
@@ -1,8 +1,10 @@
-Ok, now, take your previous message and convert it to actionable items. An item might be a code change or a command run. When you need to change code, make sure that you put the entire content of the file in the value of `content` key even though you will likely copy and paste the most of the previous message.
-{%- if running_processes %}
+Ok, now, take your previous message and convert it to actionable items. An item might be a code change or a command run. When you need to change code, make sure that you put the entire content of the file in the value of `content` key even though you will likely copy and paste the most of the previous message. Note that the commands will run on a {{ os }} machine.
+{% if running_processes -%}
 Note that the following processes are already running:

-{% for key, data in running_processes.items() -%}
- "{{ key }}" (`{{ data[0] }}`)
-{% endfor -%}
-{% endif -%}
+{%- for key, data in running_processes.items() %}
+
+command_id: {{ key }}
+command: {{ data[0] }}
+{%- endfor -%}
+{%- endif -%}
--- a/pilot/prompts/development/plan.prompt
+++ b/pilot/prompts/development/plan.prompt
@@ -5,6 +5,7 @@ Here is a high level description of "{{ name }}":
 {{ app_summary }}
 ```

+{% if clarifications %}
 Here are some additional questions and answers to clarify the apps description:
 ```
 {% for clarification in clarifications %}
@@ -13,6 +14,8 @@ A: {{ clarification.answer }}
 {% endfor %}
 ```

+{% endif %}
+{% if user_stories %}
 Here are user stories that specify how users use "{{ name }}":
 ```
 {% for story in user_stories %}
@@ -20,6 +23,8 @@ Here are user stories that specify how users use "{{ name }}":
 {% endfor %}
 ```

+{% endif %}
+{% if user_tasks %}
 Here are user tasks that specify what users need to do to interact with "{{ name }}":
 ```
 {% for task in user_tasks %}
@@ -27,6 +32,7 @@ Here are user tasks that specify what users need to do to interact with "{{ name
 {% endfor %}
 ```

+{% endif %}
 Here are the technologies that you need to use for this project:
 ```
 {% for tech in technologies %}
@@ -34,10 +40,10 @@ Here are the technologies that you need to use for this project:
 {% endfor %}
 ```

-OK, now, you need to create code to have this app fully working but before we go into the coding part, I want you to split the development process of creating this app into smaller tasks so that it is easier to debug and make the app work. Each smaller task of this project has to be a whole that can be reviewed by a developer to make sure we're on a right track to create this app completely. However, it cannot be split into tasks that are too small as well.
+OK, now, you need to create code to have this app fully working but before we go into the coding part, I want you to split the development process of creating this app into smaller tasks so that it is easier to debug and make the app work. Each smaller task of this project has to be a whole that can be reviewed by a developer to make sure we're on a right track to create this app completely. Also, make sure it is not be split into tasks that are too small for no reason.

 Each task needs to be related only to the development of this app and nothing else - once the app is fully working, that is it. There shouldn't be a task for deployment, writing documentation, or anything that is not writing the actual code. Think task by task and create the least number of tasks that are relevant for this specific app.

 For each task, there must be a way for human developer to check if the task is done or not. Write how should the developer check if the task is done.

-Now, based on the app's description, user stories and user tasks, and the technologies that you need to use, think task by task and create the entire development plan. Start from the project setup and specify each task until the moment when the entire app should be fully working. For each task, write a description and a user-review goal.
+Now, based on the app's description,{% if user_stories %} user stories,{% endif %}{% if user_tasks %} user tasks,{% endif %} and the technologies that you need to use, think task by task and create the entire development plan. Start from the project setup and specify each task until the moment when the entire app should be fully working. For each task, write a description and a user-review goal.
--- a/pilot/prompts/development/task/breakdown.prompt
+++ b/pilot/prompts/development/task/breakdown.prompt
@@ -5,16 +5,20 @@ Here is a high level description of "{{ name }}":
 {{ app_summary }}
 ```

+{% if user_stories %}
 Here are user stories that specify how users use "{{ name }}":
 ```{% for story in user_stories %}
 - {{ story }}{% endfor %}
-```{#
+```

+{% endif %}
+{% if user_tasks %}
 Here are user tasks that specify what users need to do to interact with "{{ name }}":
 ```{% for task in user_tasks %}
 - {{ task }}{% endfor %}
-```#}
+```

+{% endif %}
 Here are the technologies that you need to use for this project:
 ```{% for tech in technologies %}
 - {{ tech }}{% endfor %}
@@ -33,19 +37,23 @@ So far, this code has been implemented

 We've broken the development of this app down to these tasks:
 ```{% for task in development_tasks %}
- {{ task['description'] }}{% endfor %}
+- {{ loop.index }}. {{ task['description'] }}
+{% endfor %}
 ```

-You are currently working on this task with the following description: {{ development_tasks[current_task_index]['description'] }}
+You are currently working on task {{ current_task_index + 1 }} with the following description:
+```
+{{ development_tasks[current_task_index]['description'] }}
+```
 After all the code is finished, a human developer will check it works this way - {{ development_tasks[current_task_index]['user_review_goal'] }}

-Now, tell me all the code that needs to be written to implement this app and have it fully working and all commands that need to be run to implement this app.
+Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task.

 {{no_microservices}}

 **IMPORTANT**
-Remember, I'm currently in an empty folder where I will start writing files that you tell me.
-Tell me how can I test the app to see if it's working or not.
+Remember, I created an empty folder where I will start writing files that you tell me and that are needed for this app.
+Tell me how can I test this task to see if it's working or not.
 You do not need to make any automated tests work.
 DO NOT specify commands to create any folders or files, they will be created automatically - just specify the relative path to each file that needs to be written.
 Never use the port 5000 to run the app, it's reserved.
--- a/pilot/prompts/system_messages/full_stack_developer.prompt
+++ b/pilot/prompts/system_messages/full_stack_developer.prompt
@@ -1 +1 @@
-{#You are a full stack software developer who works in a software development agency. You write very modular code and you practice TDD (test driven development) whenever is suitable to use it. Your job is to implement tasks that your tech lead assigns you. Each task has a description of what needs to be implemented, a programmatic goal that will determine if a task can be marked as done from a programmatic perspective (this is basically a blueprint for an automated test that is run before you send the task for a review to your tech lead) and user-review goal that will determine if a task is done or not but from a user perspective since it will be reviewed by a human.#}
+You are a full stack software developer who works in a software development agency. You write very modular and clean code. Your job is to implement tasks that your tech lead assigns you. Each task has a description of what needs to be implemented, a programmatic goal that will determine if a task can be marked as done and user-review goal that will determine if a task is done or not but from a user perspective since it will be reviewed by a human.
--- a/pilot/prompts/test_prompts.py
+++ b/pilot/prompts/test_prompts.py
@@ -66,4 +66,4 @@ def test_parse_task_with_processes():

    # Then
    assert 'the following processes are already running:' in prompt
-    assert '- "app" (`npm start`)\n- "mongo" (`mongod`)' in prompt
+    assert 'command_id: app\ncommand: npm start\n\ncommand_id: mongo\ncommand: mongod' in prompt
--- a/pilot/utils/arguments.py
+++ b/pilot/utils/arguments.py
@@ -34,29 +34,27 @@ def get_arguments():

    app = None
    if 'workspace' in arguments:
+        arguments['workspace'] = os.path.abspath(arguments['workspace'])
        app = get_app_by_user_workspace(arguments['user_id'], arguments['workspace'])
        if app is not None:
-            arguments['app_id'] = app.id
+            arguments['app_id'] = str(app.id)
    else:
        arguments['workspace'] = None

    if 'app_id' in arguments:
-        try:
-            if app is None:
-                app = get_app(arguments['app_id'])
+        if app is None:
+            app = get_app(arguments['app_id'])

-            arguments['app_type'] = app.app_type
-            arguments['name'] = app.name
-            if 'step' not in arguments or ('step' in arguments and not should_execute_step(arguments['step'], app.status)):
-                arguments['step'] = 'finished' if app.status == 'finished' else STEPS[STEPS.index(app.status) + 1]
+        arguments['app_type'] = app.app_type
+        arguments['name'] = app.name
+        if 'step' not in arguments or ('step' in arguments and not should_execute_step(arguments['step'], app.status)):
+            arguments['step'] = 'finished' if app.status == 'finished' else STEPS[STEPS.index(app.status) + 1]

-            print(color_green_bold('\n------------------ LOADING PROJECT ----------------------'))
-            print(color_green_bold(f'{app.name} (app_id={arguments["app_id"]})'))
-            print(color_green_bold('--------------------------------------------------------------\n'))
-        except ValueError as e:
-            print(e)
-            exit(1)
-    else:
+        print(color_green_bold('\n------------------ LOADING PROJECT ----------------------'))
+        print(color_green_bold(f'{app.name} (app_id={arguments["app_id"]})'))
+        print(color_green_bold('--------------------------------------------------------------\n'))
+
+    elif '--get-created-apps-with-steps' not in args:
        arguments['app_id'] = str(uuid.uuid4())
        print(color_green_bold('\n------------------ STARTING NEW PROJECT ----------------------'))
        print("If you wish to continue with this project in future run:")
--- a/pilot/utils/function_calling.py
+++ b/pilot/utils/function_calling.py
@@ -70,11 +70,7 @@ def parse_agent_response(response, function_calls: Union[FunctionCallSet, None])
    """
    if function_calls:
        text = response['text']
-        values = list(json.loads(text).values())
-        if len(values) == 1:
-            return values[0]
-        else:
-            return tuple(values)
+        return json.loads(text)

    return response['text']

--- a/pilot/utils/test_function_calling.py
+++ b/pilot/utils/test_function_calling.py
@@ -23,7 +23,7 @@ class TestFunctionCalling:
        response = parse_agent_response(response, function_calls)

        # Then
-        assert response == 'Hello world!'
+        assert response == {'greeting': 'Hello world!'}

    def test_parse_agent_response_json_markdown(self):
        # Given
@@ -35,7 +35,7 @@ class TestFunctionCalling:
        response = parse_agent_response(response, function_calls)

        # Then
-        assert response == 'Hello world!'
+        assert response == {'greeting': 'Hello world!'}

    def test_parse_agent_response_markdown(self):
        # Given
@@ -47,7 +47,7 @@ class TestFunctionCalling:
        response = parse_agent_response(response, function_calls)

        # Then
-        assert response == 'Hello world!'
+        assert response == {'greeting': 'Hello world!'}

    def test_parse_agent_response_multiple_args(self):
        # Given
@@ -55,11 +55,11 @@ class TestFunctionCalling:
        function_calls = {'definitions': [], 'functions': {}}

        # When
-        greeting, name = parse_agent_response(response, function_calls)
+        response = parse_agent_response(response, function_calls)

        # Then
-        assert greeting == 'Hello'
-        assert name == 'John'
+        assert response['greeting'] == 'Hello'
+        assert response['name'] == 'John'


 def test_json_prompter():
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -493,7 +493,7 @@ solution-oriented decision-making in areas where precise instructions were not p

        assert response is not None
        response = parse_agent_response(response, function_calls)
-        assert 'Node.js' in response
+        assert 'Node.js' in response['technologies']

    @pytest.mark.uses_tokens
    @pytest.mark.parametrize('endpoint, model', [
@@ -548,9 +548,9 @@ The development process will include the creation of user stories and tasks, bas

        assert response is not None
        response = parse_agent_response(response, function_calls)
-        assert_non_empty_string(response[0]['description'])
-        assert_non_empty_string(response[0]['programmatic_goal'])
-        assert_non_empty_string(response[0]['user_review_goal'])
+        assert_non_empty_string(response['plan'][0]['description'])
+        assert_non_empty_string(response['plan'][0]['programmatic_goal'])
+        assert_non_empty_string(response['plan'][0]['user_review_goal'])


    # def test_break_down_development_task(self):