Merge branch 'main' into feature/no-color

2026-01-09 13:17:55 -05:00 · 2023-10-14 15:48:43 +08:00
parent 60e32a2587 e48437212b
commit 7961713e20
53 changed files with 658 additions and 218 deletions
--- a/.github/CODE_OF_CONDUCT.md
+++ b/.github/CODE_OF_CONDUCT.md
@@ -18,7 +18,7 @@ diverse, inclusive, and healthy community.
 Examples of behavior that contributes to a positive environment for our
 community include:

-* Demonstrating empathy and kindness toward other people
+* Demonstrating empathy and kindness towards other people
 * Being respectful of differing opinions, viewpoints, and experiences
 * Giving and gracefully accepting constructive feedback
 * Accepting responsibility and apologizing to those affected by our mistakes,
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,16 +7,34 @@ on:
  pull_request:
    branches:
      - main
-      - debugging_ipc

 jobs:
-  build:
+  Docker:
    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Build the Docker image
+        run: docker compose build
+      - name: Run the Docker image
+        run: docker compose up gpt-pilot -d
+      - name: Wait for the Docker image to start
+        run: docker ps
+      - name: Stop the Docker image
+        run: docker compose down
+  
+  Test:
+    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        # 3.10 - 04 Oct 2021
        # 3.11 - 24 Oct 2022
        python-version: ['3.9', '3.10', '3.11', '3.12']
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        exclude:
+          # LINK : fatal error LNK1181: cannot open input file 'libpq.lib'
+          # Maybe related: https://github.com/psycopg/psycopg2/issues/1628
+          - os: windows-latest
+            python-version: '3.12'

    steps:
    - uses: actions/checkout@v4
@@ -37,12 +55,14 @@ jobs:
        pip install flake8 ruff
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
        # stop the build if there are Python syntax errors or undefined names
-        ruff --format=github --select=E9,F63,F7,F82 --target-version=py37 .
+        ruff --output-format=github --select=E9,F63,F7,F82 --target-version=py37 ./pilot
        # default set of ruff rules with GitHub Annotations
-        #ruff --format=github --target-version=py37 --ignore=F401,E501 .
+        ruff --output-format=github --target-version=py37 --ignore=F401,E402,E501 ./pilot

    - name: Run tests
+      env:
+        PYTHONPATH: .
      run: |
        pip install pytest
        cd pilot
-        PYTHONPATH=. pytest -m "not slow and not uses_tokens and not ux_test"
+        pytest -m "not slow and not uses_tokens and not ux_test"
--- a/README.md
+++ b/README.md
@@ -1,4 +1,7 @@
 # 🧑‍✈️ GPT PILOT
+
+<a href="https://trendshift.io/repositories/466" target="_blank"><img src="https://trendshift.io/api/badge/repositories/466" alt="Pythagora-io%2Fgpt-pilot | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
 ### GPT Pilot helps developers build apps 20x faster

 You specify what kind of app you want to build. Then, GPT Pilot asks clarifying questions, creates the product and technical requirements, sets up the environment, and **starts coding the app step by step, like in real life, while you oversee the development process**. It asks you to review each task it finishes or to help when it gets stuck. This way, GPT Pilot acts as a coder while you are a lead dev who reviews code and helps when needed.
@@ -9,7 +12,7 @@ You specify what kind of app you want to build. Then, GPT Pilot asks clarifying
 * [🔌 Requirements](#-requirements)
 * [🚦How to start using gpt-pilot?](#how-to-start-using-gpt-pilot)
    * [🐳 How to start gpt-pilot in docker?](#-how-to-start-gpt-pilot-in-docker)
-* [🧑‍💻️ Other arguments](#-other-arguments)
+* [🧑‍💻️ CLI arguments](#%EF%B8%8F-cli-arguments)
 * [🔎 Examples](#-examples)
    * [Real-time chat app](#-real-time-chat-app)
    * [Markdown editor](#-markdown-editor)
@@ -19,6 +22,7 @@ You specify what kind of app you want to build. Then, GPT Pilot asks clarifying
 * [🕴How's GPT Pilot different from _Smol developer_ and _GPT engineer_?](#hows-gpt-pilot-different-from-smol-developer-and-gpt-engineer)
 * [🍻 Contributing](#-contributing)
 * [🔗 Connect with us](#-connect-with-us)
+* [🌟 Star history](#-star-history)
 <!-- TOC -->

 ---
@@ -78,7 +82,7 @@ All generated code will be stored in the folder `workspace` inside the folder na

 ## 🐳 How to start gpt-pilot in docker?
 1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
-2. Update the `docker-compose.yml` environment variables
+2. Update the `docker-compose.yml` environment variables, which can be done via `docker compose config`
 3. run `docker compose build`. this will build a gpt-pilot container for you.
 4. run `docker compose up`.
 5. access the web terminal on `port 7681`
@@ -251,3 +255,8 @@ Other than the research, GPT Pilot needs to be debugged to work in different sce
 🌟 As an open-source tool, it would mean the world to us if you starred the GPT-pilot repo 🌟

 💬 Join [the Discord server](https://discord.gg/HaqXugmxr9) to get in touch.
+
+
+# 🌟 Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=Pythagora-io/gpt-pilot&type=Date)](https://star-history.com/#Pythagora-io/gpt-pilot&Date)
--- a/pilot/const/common.py
+++ b/pilot/const/common.py
@@ -3,8 +3,9 @@ ROLES = {
    'product_owner': ['project_description', 'user_stories', 'user_tasks'],
    'architect': ['architecture'],
    'tech_lead': ['development_planning'],
-    'full_stack_developer': ['create_scripts', 'coding'],
+    'full_stack_developer': ['coding'],
    'dev_ops': ['environment_setup'],
+    'code_monkey': ['coding']
 }
 STEPS = [
    'project_description',
@@ -13,11 +14,13 @@ STEPS = [
    'architecture',
    'environment_setup',
    'development_planning',
-    'coding'
+    'coding',
+    'finished'
 ]

 IGNORE_FOLDERS = [
    '.git',
+    '.gpt-pilot',
    '.idea',
    '.vscode',
    '__pycache__',
--- a/pilot/const/function_calls.py
+++ b/pilot/const/function_calls.py
@@ -40,11 +40,11 @@ def return_array_from_prompt(name_plural, name_singular, return_var_name):
    }


-def command_definition(description_command=f'A single command that needs to be executed.',
+def command_definition(description_command='A single command that needs to be executed.',
                       description_timeout=
                       'Timeout in milliseconds that represent the approximate time this command takes to finish. '
                       'If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), '
-                       'set the timeout to -1 and provide a process_name. '
+                       'set the timeout to to a value long enough to determine that it has started successfully and provide a process_name. '
                       'If you need to create a directory that doesn\'t exist and is not the root project directory, '
                       'always create it by running a command `mkdir`'):
    return {
@@ -59,6 +59,10 @@ def command_definition(description_command=f'A single command that needs to be e
                'type': 'number',
                'description': description_timeout,
            },
+            'success_message': {
+                'type': 'string',
+                'description': 'A message to look for in the output of the command to determine if successful or not.',
+            },
            'process_name': {
                'type': 'string',
                'description': 'If the process needs to continue running after the command is executed provide '
@@ -136,14 +140,14 @@ DEV_TASKS_BREAKDOWN = {
                        'description': 'List of smaller development steps that need to be done to complete the entire task.',
                        'items': {
                            'type': 'object',
-                            'description': 'A smaller development step that needs to be done to complete the entire task.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`',
+                            'description': 'A smaller development step that needs to be done to complete the entire task.  Remember, if you need to run a command that doesn\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`',
                            'properties': {
                                'type': {
                                    'type': 'string',
                                    'enum': ['command', 'code_change', 'human_intervention'],
                                    'description': 'Type of the development step that needs to be done to complete the entire task.',
                                },
-                                'command': command_definition(f'A single command that needs to be executed.', 'Timeout in milliseconds that represent the approximate time the command takes to finish. This should be used only if the task is of a type "command". If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. Remember, this is not in seconds but in milliseconds so likely it always needs to be greater than 1000.'),
+                                'command': command_definition('A single command that needs to be executed.', 'Timeout in milliseconds that represent the approximate time the command takes to finish. This should be used only if the task is of a type "command". If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. Remember, this is not in seconds but in milliseconds so likely it always needs to be greater than 1000.'),
                                'code_change_description': {
                                    'type': 'string',
                                    'description': 'Description of a the development step that needs to be done. This should be used only if the task is of a type "code_change" and it should thoroughly describe what needs to be done to implement the code change for a single file - it cannot include changes for multiple files.',
@@ -179,14 +183,18 @@ IMPLEMENT_TASK = {
                        'description': 'List of smaller development steps that need to be done to complete the entire task.',
                        'items': {
                            'type': 'object',
-                            'description': 'A smaller development step that needs to be done to complete the entire task.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an  If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`',
+                            'description': 'A smaller development step that needs to be done to complete the entire task.  Remember, if you need to run a command that doesn\'t finish by itself (eg. a command to run an  If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`',
                            'properties': {
                                'type': {
                                    'type': 'string',
-                                    'enum': ['command', 'code_change', 'human_intervention'],
+                                    'enum': ['command', 'kill_process', 'code_change', 'human_intervention'],
                                    'description': 'Type of the development step that needs to be done to complete the entire task.',
                                },
                                'command': command_definition(),
+                                'kill_process': {
+                                    'type': 'string',
+                                    'description': 'To kill a process that was left running by a previous `command` step provide the `process_name` in this field and set `type` to "kill_process".',
+                                },
                                'code_change': {
                                    'type': 'object',
                                    'description': 'A code change that needs to be implemented. This should be used only if the task is of a type "code_change".',
@@ -289,16 +297,16 @@ DEV_STEPS = {
        },
        {
            'name': 'get_files',
-            'description': f'Returns development files that are currently implemented so that they can be analized and so that changes can be appropriatelly made.',
+            'description': 'Returns development files that are currently implemented so that they can be analized and so that changes can be appropriatelly made.',
            'parameters': {
                'type': 'object',
                'properties': {
                    'files': {
                        'type': 'array',
-                        'description': f'List of files that need to be analyzed to implement the required changes.',
+                        'description': 'List of files that need to be analyzed to implement the required changes.',
                        'items': {
                            'type': 'string',
-                            'description': f'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`.',
+                            'description': 'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`.',
                        }
                    }
                },
@@ -395,14 +403,15 @@ DEVELOPMENT_PLAN = {
 EXECUTE_COMMANDS = {
    'definitions': [{
        'name': 'execute_commands',
-        'description': f'Executes a list of commands. ',
+        'description': 'Executes a list of commands. ',
        'parameters': {
            'type': 'object',
            'properties': {
                'commands': {
                    'type': 'array',
-                    'description': f'List of commands that need to be executed.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`',
-                    'items': command_definition(f'A single command that needs to be executed.', f'Timeout in milliseconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds.')
+                    'description': 'List of commands that need to be executed.  Remember, if you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`',
+                    'items': command_definition('A single command that needs to be executed.',
+                                                'Timeout in milliseconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds.')
                }
            },
            'required': ['commands'],
@@ -416,16 +425,16 @@ EXECUTE_COMMANDS = {
 GET_FILES = {
    'definitions': [{
        'name': 'get_files',
-        'description': f'Returns development files that are currently implemented so that they can be analized and so that changes can be appropriatelly made.',
+        'description': 'Returns development files that are currently implemented so that they can be analized and so that changes can be appropriatelly made.',
        'parameters': {
            'type': 'object',
            'properties': {
                'files': {
                    'type': 'array',
-                    'description': f'List of files that need to be analized to implement the reqired changes. Any file name in this array MUST be from the directory tree listed in the previous message.',
+                    'description': 'List of files that need to be analized to implement the reqired changes. Any file name in this array MUST be from the directory tree listed in the previous message.',
                    'items': {
                        'type': 'string',
-                        'description': f'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`. This file name MUST be listed in the directory from the previous message.',
+                        'description': 'A single file name that needs to be analized to implement the reqired changes. Remember, this is a file name with path relative to the project root. For example, if a file path is `{{project_root}}/models/model.py`, this value needs to be `models/model.py`. This file name MUST be listed in the directory from the previous message.',
                    }
                }
            },
@@ -456,7 +465,7 @@ IMPLEMENT_CHANGES = {
                            },
                            'path': {
                                'type': 'string',
-                                'description': 'Path of the file that needs to be saved on the disk.',
+                                'description': 'Full path of the file with the file name that needs to be saved.',
                            },
                            'content': {
                                'type': 'string',
@@ -485,13 +494,13 @@ IMPLEMENT_CHANGES = {
 GET_TEST_TYPE = {
    'definitions': [{
        'name': 'test_changes',
-        'description': f'Tests the changes based on the test type.',
+        'description': 'Tests the changes based on the test type.',
        'parameters': {
            'type': 'object',
            'properties': {
                'type': {
                    'type': 'string',
-                    'description': f'Type of a test that needs to be run. If this is just an intermediate step in getting a task done, put `no_test` as the type and we\'ll just go onto the next task without testing.',
+                    'description': 'Type of a test that needs to be run. If this is just an intermediate step in getting a task done, put `no_test` as the type and we\'ll just go onto the next task without testing.',
                    'enum': ['automated_test', 'command_test', 'manual_test', 'no_test']
                },
                'command': command_definition('Command that needs to be run to test the changes.', 'Timeout in milliseconds that represent the approximate time this command takes to finish. If you need to run a command that doesnt\'t finish by itself (eg. a command to run an app), put the timeout to 3000 milliseconds. If you need to create a directory that doesn\'t exist and is not the root project directory, always create it by running a command `mkdir`'),
--- a/pilot/database/database.py
+++ b/pilot/database/database.py
@@ -1,6 +1,6 @@
 from playhouse.shortcuts import model_to_dict
-from peewee import *
 from utils.style import color_yellow, color_red
+from peewee import DoesNotExist, IntegrityError
 from functools import reduce
 import operator
 import psycopg2
@@ -27,11 +27,11 @@ from database.models.user_apps import UserApps
 from database.models.user_inputs import UserInputs
 from database.models.files import File

-DB_NAME = os.getenv("DB_NAME")
-DB_HOST = os.getenv("DB_HOST")
-DB_PORT = os.getenv("DB_PORT")
-DB_USER = os.getenv("DB_USER")
-DB_PASSWORD = os.getenv("DB_PASSWORD")
+# DB_NAME = os.getenv("DB_NAME")
+# DB_HOST = os.getenv("DB_HOST")
+# DB_PORT = os.getenv("DB_PORT")
+# DB_USER = os.getenv("DB_USER")
+# DB_PASSWORD = os.getenv("DB_PASSWORD")
 TABLES = [
            User,
            App,
@@ -251,24 +251,24 @@ def get_db_model_from_hash_id(model, app_id, previous_step, high_level_step):


 def hash_and_save_step(Model, app_id, unique_data_fields, data_fields, message):
-    app = get_app(app_id)
+    # app = get_app(app_id)

-    fields_to_preserve = [getattr(Model, field) for field in list(unique_data_fields.keys())]
+    # fields_to_preserve = [getattr(Model, field) for field in list(unique_data_fields.keys())]

    for field, value in data_fields.items():
        unique_data_fields[field] = value

    try:
-        existing_record = Model.get_or_none(
-            (Model.app == app) & (Model.previous_step == unique_data_fields['previous_step']) & (
-                        Model.high_level_step == unique_data_fields['high_level_step']))
+        # existing_record = Model.get_or_none(
+        #     (Model.app == app) & (Model.previous_step == unique_data_fields['previous_step']) & (
+        #                 Model.high_level_step == unique_data_fields['high_level_step']))
        inserted_id = (Model
                       .insert(**unique_data_fields)
                       .execute())

        record = Model.get_by_id(inserted_id)
        logger.debug(color_yellow(f"{message} with id {record.id}"))
-    except IntegrityError as e:
+    except IntegrityError:
        logger.warn(f"A record with data {unique_data_fields} already exists for {Model.__name__}.")
        return None
    return record
@@ -328,10 +328,10 @@ def save_command_run(project, command, cli_response):


 def get_saved_command_run(project, command):
-    data_to_hash = {
-        'command': command,
-        'command_runs_count': project.command_runs_count
-    }
+    # data_to_hash = {
+    #     'command': command,
+    #     'command_runs_count': project.command_runs_count
+    # }
    command_run = get_db_model_from_hash_id(CommandRuns, project.args['app_id'],
                                            project.checkpoints['last_command_run'], project.current_step)
    return command_run
@@ -356,10 +356,10 @@ def save_user_input(project, query, user_input):


 def get_saved_user_input(project, query):
-    data_to_hash = {
-        'query': query,
-        'user_inputs_count': project.user_inputs_count
-    }
+    # data_to_hash = {
+    #     'query': query,
+    #     'user_inputs_count': project.user_inputs_count
+    # }
    user_input = get_db_model_from_hash_id(UserInputs, project.args['app_id'], project.checkpoints['last_user_input'],
                                           project.current_step)
    return user_input
--- a/pilot/database/models/app.py
+++ b/pilot/database/models/app.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import ForeignKeyField, CharField

 from database.models.components.base_models import BaseModel
 from database.models.user import User
--- a/pilot/database/models/architecture.py
+++ b/pilot/database/models/architecture.py
@@ -1,4 +1,4 @@
-from peewee import *
+# from peewee import
 from database.config import DATABASE_TYPE
 from database.models.components.progress_step import ProgressStep
 from database.models.components.sqlite_middlewares import JSONField
--- a/pilot/database/models/command_runs.py
+++ b/pilot/database/models/command_runs.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import AutoField, ForeignKeyField, TextField, CharField

 from database.models.components.base_models import BaseModel
 from database.models.app import App
--- a/pilot/database/models/components/base_models.py
+++ b/pilot/database/models/components/base_models.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import Model, UUIDField, DateTimeField
 from datetime import datetime
 from uuid import uuid4

--- a/pilot/database/models/components/progress_step.py
+++ b/pilot/database/models/components/progress_step.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import ForeignKeyField, CharField, BooleanField, DateTimeField
 from database.config import DATABASE_TYPE
 from database.models.components.base_models import BaseModel
 from database.models.app import App
--- a/pilot/database/models/development.py
+++ b/pilot/database/models/development.py
@@ -1,5 +1,3 @@
-from peewee import *
-
 from database.models.components.progress_step import ProgressStep


--- a/pilot/database/models/development_planning.py
+++ b/pilot/database/models/development_planning.py
@@ -1,4 +1,3 @@
-from peewee import *
 from database.config import DATABASE_TYPE
 from database.models.components.progress_step import ProgressStep
 from database.models.components.sqlite_middlewares import JSONField
--- a/pilot/database/models/development_steps.py
+++ b/pilot/database/models/development_steps.py
@@ -1,10 +1,11 @@
-from peewee import *
+from peewee import ForeignKeyField, AutoField, TextField, IntegerField, CharField
 from database.config import DATABASE_TYPE
 from database.models.components.base_models import BaseModel
 from database.models.app import App
 from database.models.components.sqlite_middlewares import JSONField
 from playhouse.postgres_ext import BinaryJSONField

+
 class DevelopmentSteps(BaseModel):
    id = AutoField()  # This will serve as the primary key
    app = ForeignKeyField(App, on_delete='CASCADE')
--- a/pilot/database/models/file_snapshot.py
+++ b/pilot/database/models/file_snapshot.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import ForeignKeyField, TextField

 from database.models.components.base_models import BaseModel
 from database.models.development_steps import DevelopmentSteps
@@ -15,4 +15,4 @@ class FileSnapshot(BaseModel):
        table_name = 'file_snapshot'
        indexes = (
            (('development_step', 'file'), True),
-        )
+        )
--- a/pilot/database/models/files.py
+++ b/pilot/database/models/files.py
@@ -1,9 +1,9 @@
-from peewee import *
+from peewee import AutoField, CharField, TextField, ForeignKeyField

 from database.models.components.base_models import BaseModel
-from database.models.development_steps import DevelopmentSteps
 from database.models.app import App

+
 class File(BaseModel):
    id = AutoField()
    app = ForeignKeyField(App, on_delete='CASCADE')
@@ -15,4 +15,4 @@ class File(BaseModel):
    class Meta:
        indexes = (
            (('app', 'name', 'path'), True),
-        )
+        )
--- a/pilot/database/models/project_description.py
+++ b/pilot/database/models/project_description.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import TextField
 from database.models.components.progress_step import ProgressStep


--- a/pilot/database/models/user.py
+++ b/pilot/database/models/user.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import CharField

 from database.models.components.base_models import BaseModel

--- a/pilot/database/models/user_apps.py
+++ b/pilot/database/models/user_apps.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import AutoField, CharField, ForeignKeyField

 from database.models.components.base_models import BaseModel
 from database.models.app import App
--- a/pilot/database/models/user_inputs.py
+++ b/pilot/database/models/user_inputs.py
@@ -1,4 +1,4 @@
-from peewee import *
+from peewee import AutoField, ForeignKeyField, TextField, CharField

 from database.models.components.base_models import BaseModel
 from database.models.app import App
--- a/pilot/database/models/user_stories.py
+++ b/pilot/database/models/user_stories.py
@@ -1,4 +1,3 @@
-from peewee import *
 from database.config import DATABASE_TYPE
 from database.models.components.progress_step import ProgressStep
 from database.models.components.sqlite_middlewares import JSONField
--- a/pilot/database/models/user_tasks.py
+++ b/pilot/database/models/user_tasks.py
@@ -1,4 +1,3 @@
-from peewee import *
 from database.config import DATABASE_TYPE
 from database.models.components.progress_step import ProgressStep
 from database.models.components.sqlite_middlewares import JSONField
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -29,7 +29,7 @@ class AgentConvo:
        self.high_level_step = self.agent.project.current_step

        # add system message
-        system_message = get_sys_message(self.agent.role)
+        system_message = get_sys_message(self.agent.role,self.agent.project.args)
        logger.info('\n>>>>>>>>>> System Prompt >>>>>>>>>>\n%s\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>',
                    system_message['content'])
        self.messages.append(system_message)
@@ -82,8 +82,10 @@ class AgentConvo:
                development_step = save_development_step(self.agent.project, prompt_path, prompt_data, self.messages, response)

        # TODO handle errors from OpenAI
+        # It's complicated because calling functions are expecting different types of responses - string or tuple
+        # https://github.com/Pythagora-io/gpt-pilot/issues/165 & #91
        if response == {}:
-            logger.error(f'Aborting with "OpenAI API error happened"')
+            logger.error('Aborting with "OpenAI API error happened"')
            raise Exception("OpenAI API error happened.")

        response = parse_agent_response(response, function_calls)
--- a/pilot/helpers/Project.py
+++ b/pilot/helpers/Project.py
@@ -3,7 +3,7 @@ import os
 from typing import Tuple
 from utils.style import  color_yellow_bold, color_cyan, color_white_bold
 from const.common import IGNORE_FOLDERS, STEPS
-from database.database import delete_unconnected_steps_from, delete_all_app_development_data
+from database.database import delete_unconnected_steps_from, delete_all_app_development_data, update_app_status
 from const.ipc import MESSAGE_TYPE
 from prompts.prompts import ask_user
 from helpers.exceptions.TokenLimitError import TokenLimitError
@@ -19,11 +19,12 @@ from database.models.development_steps import DevelopmentSteps
 from database.models.file_snapshot import FileSnapshot
 from database.models.files import File
 from logger.logger import logger
+from utils.dot_gpt_pilot import DotGptPilot


 class Project:
    def __init__(self, args, name=None, description=None, user_stories=None, user_tasks=None, architecture=None,
-                 development_plan=None, current_step=None, ipc_client_instance=None):
+                 development_plan=None, current_step=None, ipc_client_instance=None, enable_dot_pilot_gpt=True):
        """
        Initialize a project.

@@ -69,6 +70,11 @@ class Project:
            self.architecture = architecture
        # if development_plan is not None:
        #     self.development_plan = development_plan
+        self.dot_pilot_gpt = DotGptPilot(log_chat_completions=enable_dot_pilot_gpt)
+
+    def set_root_path(self, root_path: str):
+        self.root_path = root_path
+        self.dot_pilot_gpt.with_root_path(root_path)

    def start(self):
        """
@@ -128,11 +134,17 @@ class Project:
                        break
        # TODO END

+        self.dot_pilot_gpt.write_project(self)
        print(json.dumps({
            "project_stage": "coding"
        }), type='info')
        self.developer.start_coding()

+    def finish(self):
+        update_app_status(self.args['app_id'], STEPS[-1])
+        # TODO say that project is finished and ask user for additional features, fixes,...
+        return
+
    def get_directory_tree(self, with_descriptions=False):
        """
        Get the directory tree of the project.
@@ -196,7 +208,7 @@ class Project:
            try:
                relative_path, full_path = self.get_full_file_path('', file)
                file_content = open(full_path, 'r').read()
-            except:
+            except OSError:
                file_content = ''

            files_with_content.append({
@@ -315,7 +327,7 @@ class Project:
                file=file_in_db,
                defaults={'content': file.get('content', '')}
            )
-            file_snapshot.content = content = file['content']
+            file_snapshot.content = file['content']
            file_snapshot.save()

    def restore_files(self, development_step_id):
@@ -324,7 +336,7 @@ class Project:

        clear_directory(self.root_path, IGNORE_FOLDERS)
        for file_snapshot in file_snapshots:
-            update_file(file_snapshot.file.full_path, file_snapshot.content);
+            update_file(file_snapshot.file.full_path, file_snapshot.content)

    def delete_all_steps_except_current_branch(self):
        delete_unconnected_steps_from(self.checkpoints['last_development_step'], 'previous_step')
--- a/pilot/helpers/agents/AGENTS.md
+++ b/pilot/helpers/agents/AGENTS.md
@@ -43,11 +43,11 @@ TODO:

 **TODO: no prompt**

-`debug` functions: `run_command`, `implement_code_changes`
+`debug` functions: `run_command`, `implement_changes`


 ## Developer (full_stack_developer)
-`create_scripts`, `coding` **(TODO: No entry in `STEPS` for `create_scripts`)**
+`create_scripts`, `coding`

 - Implement tasks assigned by tech lead
 - Modular code, TDD
@@ -55,9 +55,9 @@ TODO:


 ## Code Monkey
-**TODO: not listed in `ROLES`**
+`create_scripts`, `coding`, `implement_changes`

-`development/implement_changes` functions: `save_files`
+`implement_changes` functions: `save_files`

 - Implement tasks assigned by tech lead
 - Modular code, TDD
--- a/pilot/helpers/agents/Architect.py
+++ b/pilot/helpers/agents/Architect.py
@@ -28,8 +28,8 @@ class Architect(Agent):
            return step['architecture']

        # ARCHITECTURE
-        print(color_green_bold(f"Planning project architecture...\n"))
-        logger.info(f"Planning project architecture...")
+        print(color_green_bold("Planning project architecture...\n"))
+        logger.info("Planning project architecture...")

        self.convo_architecture = AgentConvo(self)
        architecture = self.convo_architecture.send_message('architecture/technologies.prompt',
--- a/pilot/helpers/agents/Developer.py
+++ b/pilot/helpers/agents/Developer.py
@@ -1,3 +1,4 @@
+import platform
 import uuid
 from utils.style import color_green, color_red, color_green_bold, color_yellow_bold, color_red_bold, color_blue_bold, color_white_bold
 from helpers.exceptions.TokenLimitError import TokenLimitError
@@ -11,7 +12,7 @@ from logger.logger import logger
 from helpers.Agent import Agent
 from helpers.AgentConvo import AgentConvo
 from utils.utils import should_execute_step, array_of_objects_to_string, generate_app_data
-from helpers.cli import run_command_until_success, execute_command_and_check_cli_response
+from helpers.cli import run_command_until_success, execute_command_and_check_cli_response, running_processes
 from const.function_calls import FILTER_OS_TECHNOLOGIES, EXECUTE_COMMANDS, GET_TEST_TYPE, IMPLEMENT_TASK
 from database.database import save_progress, get_progress_steps, update_app_status
 from utils.utils import get_os_info
@@ -33,21 +34,23 @@ class Developer(Agent):
            self.project.skip_steps = False if ('skip_until_dev_step' in self.project.args and self.project.args['skip_until_dev_step'] == '0') else True

        # DEVELOPMENT
-        print(color_green_bold(f"🚀 Now for the actual development...\n"))
-        logger.info(f"Starting to create the actual code...")
+        print(color_green_bold("🚀 Now for the actual development...\n"))
+        logger.info("Starting to create the actual code...")

        for i, dev_task in enumerate(self.project.development_plan):
            self.implement_task(i, dev_task)

        # DEVELOPMENT END
-
+        self.project.dot_pilot_gpt.chat_log_folder(None)
        logger.info('The app is DONE!!! Yay...you can use it now.')
+        print(green_bold("The app is DONE!!! Yay...you can use it now.\n"))

    def implement_task(self, i, development_task=None):
-        print(color_green_bold(f'Implementing task #{i + 1}: ') + color_green(f' {development_task["description"]}\n'))
+        print(color_green_bold('Implementing task #{i + 1}: ') + color_green(' {development_task["description"]}\n'))
+        self.project.dot_pilot_gpt.chat_log_folder(i + 1)

        convo_dev_task = AgentConvo(self)
-        task_description = convo_dev_task.send_message('development/task/breakdown.prompt', {
+        convo_dev_task.send_message('development/task/breakdown.prompt', {
            "name": self.project.args['name'],
            "app_type": self.project.args['app_type'],
            "app_summary": self.project.project_description,
@@ -97,10 +100,12 @@ class Developer(Agent):
        additional_message = 'Let\'s start with the step #0:\n\n' if i == 0 else f'So far, steps { ", ".join(f"#{j}" for j in range(i)) } are finished so let\'s do step #{i + 1} now.\n\n'

        process_name = data['process_name'] if 'process_name' in data else None
+        success_message = data['success_message'] if 'success_message' in data else None

        return run_command_until_success(convo, data['command'],
                                         timeout=data['timeout'],
                                         process_name=process_name,
+                                         success_message=success_message,
                                         additional_message=additional_message)

    def step_human_intervention(self, convo, step: dict):
@@ -114,9 +119,14 @@ class Developer(Agent):
                    step['human_intervention_description'])

        while True:
-            human_intervention_description = step['human_intervention_description'] + \
-                                             color_yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`') \
-                                             if self.run_command is not None else step['human_intervention_description']
+            human_intervention_description = step['human_intervention_description']
+
+            if self.run_command is not None:
+                if (self.project.ipc_client_instance is None or self.project.ipc_client_instance.client is None):
+                    human_intervention_description += color_yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`')
+                else:
+                    print(self.run_command, type="run_command")
+
            response = self.project.ask_for_human_intervention('I need human intervention:',
                human_intervention_description,
                cbs={
@@ -148,7 +158,7 @@ class Developer(Agent):
            cli_response, llm_response = execute_command_and_check_cli_response(test_command['command'], test_command['timeout'], convo)
            logger.info('After running command llm_response: ' + llm_response)
            if llm_response == 'NEEDS_DEBUGGING':
-                print(color_red(f'Got incorrect CLI response:'))
+                print(color_red('Got incorrect CLI response:'))
                print(cli_response)
                print(color_red('-------------------'))

@@ -166,7 +176,9 @@ class Developer(Agent):

        if development_task is not None:
            convo.remove_last_x_messages(2)
-            detailed_user_review_goal = convo.send_message('development/define_user_review_goal.prompt', {})
+            detailed_user_review_goal = convo.send_message('development/define_user_review_goal.prompt', {
+                'os': platform.system()
+            })
            convo.remove_last_x_messages(2)

        try:
@@ -183,8 +195,8 @@ class Developer(Agent):
        if step_implementation_try >= MAX_COMMAND_DEBUG_TRIES:
            self.dev_help_needed(step)

-        print(color_red_bold(f'\n--------- LLM Reached Token Limit ----------'))
-        print(color_red_bold(f'Can I retry implementing the entire development step?'))
+        print(color_red_bold('\n--------- LLM Reached Token Limit ----------'))
+        print(color_red_bold('Can I retry implementing the entire development step?'))

        answer = ''
        while answer != 'y':
@@ -193,7 +205,7 @@ class Developer(Agent):
                'Type y/n'
            )

-            logger.info(f"Retry step implementation? %s", answer)
+            logger.info("Retry step implementation? %s", answer)
            if answer == 'n':
                return self.dev_help_needed(step)

@@ -202,9 +214,9 @@ class Developer(Agent):
    def dev_help_needed(self, step):

        if step['type'] == 'command':
-            help_description = (color_red_bold(f'I tried running the following command but it doesn\'t seem to work:\n\n') +
-                color_white_bold(step['command']['command']) +
-                color_red_bold(f'\n\nCan you please make it work?'))
+            help_description = (color_red_bold('I tried running the following command but it doesn\'t seem to work:\n\n') +
+                white_bold(step['command']['command']) +
+                color_red_bold('\n\nCan you please make it work?'))
        elif step['type'] == 'code_change':
            help_description = step['code_change_description']
        elif step['type'] == 'human_intervention':
@@ -223,14 +235,14 @@ class Developer(Agent):

        answer = ''
        while answer != 'continue':
-            print(color_red_bold(f'\n----------------------------- I need your help ------------------------------'))
+            print(color_red_bold('\n----------------------------- I need your help ------------------------------'))
            print(extract_substring(str(help_description)))
-            print(color_red_bold(f'\n-----------------------------------------------------------------------------'))
+            print(color_red_bold('\n-----------------------------------------------------------------------------'))
            answer = styled_text(
                self.project,
                'Once you\'re done, type "continue"?'
            )
-            logger.info(f"help needed: %s", answer)
+            logger.info("help needed: %s", answer)

        return { "success": True, "user_input": answer }

@@ -293,9 +305,13 @@ class Developer(Agent):
            iteration_convo.load_branch(last_branch_name)
            user_description = ('Here is a description of what should be working: \n\n' + color_blue_bold(continue_description) + '\n') \
                                if continue_description != '' else ''
-            user_description = 'Can you check if the app works please? ' + user_description + \
-                               '\nIf you want to run the app, ' + \
-                               color_yellow_bold('just type "r" and press ENTER and that will run `' + self.run_command + '`')
+            user_description = 'Can you check if the app works please? ' + user_description
+
+            if self.project.ipc_client_instance is None or self.project.ipc_client_instance.client is None:
+                user_description += color_yellow_bold('\n\nIf you want to run the app, just type "r" and press ENTER and that will run `' + self.run_command + '`')
+            else:
+                print(self.run_command, type="run_command")
+
            # continue_description = ''
            # TODO: Wait for a specific string in the output or timeout?
            response = self.project.ask_for_human_intervention(
@@ -331,7 +347,9 @@ class Developer(Agent):

                # self.debugger.debug(iteration_convo, user_input=user_feedback)

-                task_steps = iteration_convo.send_message('development/parse_task.prompt', {}, IMPLEMENT_TASK)
+                task_steps = iteration_convo.send_message('development/parse_task.prompt', {
+                    'running_processes': running_processes
+                }, IMPLEMENT_TASK)
                iteration_convo.remove_last_x_messages(2)

                return self.execute_task(iteration_convo, task_steps, is_root_task=True)
@@ -355,8 +373,8 @@ class Developer(Agent):
        })
        return
        # ENVIRONMENT SETUP
-        print(green(f"Setting up the environment...\n"))
-        logger.info(f"Setting up the environment...")
+        print(green("Setting up the environment...\n"))
+        logger.info("Setting up the environment...")

        os_info = get_os_info()
        os_specific_technologies = self.convo_os_specific_tech.send_message('development/env_setup/specs.prompt',
--- a/pilot/helpers/agents/ProductOwner.py
+++ b/pilot/helpers/agents/ProductOwner.py
@@ -27,7 +27,7 @@ class ProductOwner(Agent):
            step = get_progress_steps(self.project.args['app_id'], PROJECT_DESCRIPTION_STEP)
            if step and not should_execute_step(self.project.args['step'], PROJECT_DESCRIPTION_STEP):
                step_already_finished(self.project.args, step)
-                self.project.root_path = setup_workspace(self.project.args)
+                self.project.set_root_path(setup_workspace(self.project.args))
                self.project.project_description = step['summary']
                self.project.project_description_messages = step['messages']
                return
@@ -39,7 +39,7 @@ class ProductOwner(Agent):
        if 'name' not in self.project.args:
            self.project.args['name'] = clean_filename(ask_user(self.project, 'What is the project name?'))

-        self.project.root_path = setup_workspace(self.project.args)
+        self.project.set_root_path(setup_workspace(self.project.args))

        self.project.app = save_app(self.project)

@@ -86,7 +86,7 @@ class ProductOwner(Agent):
            return step['user_stories']

        # USER STORIES
-        msg = f"User Stories:\n"
+        msg = "User Stories:\n"
        print(color_green_bold(msg))
        logger.info(msg)

@@ -120,7 +120,7 @@ class ProductOwner(Agent):
            return step['user_tasks']

        # USER TASKS
-        msg = f"User Tasks:\n"
+        msg = "User Tasks:\n"
        print(color_green_bold(msg))
        logger.info(msg)

--- a/pilot/helpers/agents/TechLead.py
+++ b/pilot/helpers/agents/TechLead.py
@@ -32,8 +32,8 @@ class TechLead(Agent):
            return step['development_plan']
        
        # DEVELOPMENT PLANNING
-        print(color_green_bold(f"Starting to create the action plan for development...\n"))
-        logger.info(f"Starting to create the action plan for development...")
+        print(color_green_bold("Starting to create the action plan for development...\n"))
+        logger.info("Starting to create the action plan for development...")

        # TODO add clarifications
        self.development_plan = self.convo_development_plan.send_message('development/plan.prompt',
--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -30,8 +30,8 @@ class TestCodeMonkey:
            current_step='coding',
        )

-        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                                              '../../../workspace/TestDeveloper'))
+        self.project.set_root_path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestDeveloper')))
        self.project.technologies = []
        last_step = DevelopmentSteps()
        last_step.id = 1
--- a/pilot/helpers/agents/test_Developer.py
+++ b/pilot/helpers/agents/test_Developer.py
@@ -31,8 +31,8 @@ class TestDeveloper:
            user_stories=[]
        )

-        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                                              '../../../workspace/TestDeveloper'))
+        self.project.set_root_path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestDeveloper')))
        self.project.technologies = []
        self.project.current_step = ENVIRONMENT_SETUP_STEP
        self.developer = Developer(self.project)
@@ -170,7 +170,7 @@ class TestDeveloper:
        mock_requests_post.side_effect = generate_response
        monkeypatch.setenv('OPENAI_API_KEY', 'secret')

-        mock_questionary = MockQuestionary([''])
+        # mock_questionary = MockQuestionary([''])

        # with patch('utils.questionary.questionary', mock_questionary):
        # When
--- a/pilot/helpers/agents/test_TechLead.py
+++ b/pilot/helpers/agents/test_TechLead.py
@@ -27,8 +27,8 @@ class TestTechLead:
            user_stories=[]
        )

-        self.project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                                              '../../../workspace/TestTechLead'))
+        self.project.set_root_path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                              '../../../workspace/TestTechLead')))
        self.project.technologies = []
        self.project.project_description = '''
 The project entails creating a web-based chat application, tentatively named "chat_app." 
--- a/pilot/helpers/cli.py
+++ b/pilot/helpers/cli.py
@@ -17,8 +17,8 @@ from const.code_execution import MIN_COMMAND_RUN_TIME, MAX_COMMAND_RUN_TIME, MAX

 interrupted = False

-running_processes: Dict[str, int] = {}
-"""Holds a list of process IDs, mapped to the `process_name` provided in the call to `execute_command()`."""
+running_processes: Dict[str, tuple[str, int]] = {}
+"""Holds a list of (command, process ID)s, mapped to the `process_name` provided in the call to `execute_command()`."""


 def enqueue_output(out, q):
@@ -74,12 +74,12 @@ def run_command(command, root_path, q_stdout, q_stderr) -> subprocess.Popen:

 def terminate_named_process(process_name: str) -> None:
    if process_name in running_processes:
-        terminate_process(running_processes[process_name], process_name)
+        terminate_process(running_processes[process_name][1], process_name)


 def terminate_running_processes():
    for process_name in list(running_processes.keys()):
-        terminate_process(running_processes[process_name], process_name)
+        terminate_process(running_processes[process_name][1], process_name)


 def terminate_process(pid: int, name=None) -> None:
@@ -100,11 +100,12 @@ def terminate_process(pid: int, name=None) -> None:
            logger.error(f'Error while terminating process: {e}')

    for process_name in list(running_processes.keys()):
-        if running_processes[process_name] == pid:
+        if running_processes[process_name][1] == pid:
            del running_processes[process_name]


-def execute_command(project, command, timeout=None, process_name: str = None, force=False):
+def execute_command(project, command, timeout=None, success_message=None, process_name: str = None, force=False) \
+        -> (str, str, int):
    """
    Execute a command and capture its output.

@@ -112,6 +113,7 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
        project: The project associated with the command.
        command (str): The command to run.
        timeout (int, optional): The maximum execution time in milliseconds. Default is None.
+        success_message: A message to look for in the output of the command to determine if successful or not.
        process_name (str, optional): A name for the process.
                            If `timeout` is not provided, can be used to terminate the process.
        force (bool, optional): Whether to execute the command without confirmation. Default is False.
@@ -119,8 +121,8 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
    Returns:
        cli_response (str): The command output
                            or: '', 'DONE' if user answered 'no' or 'skip'
-        llm_response (str): The response from the agent.
-                            TODO: this seems to be 'DONE' (no or skip) or None
+        llm_response (str): 'DONE' if 'no', 'skip' or `success_message` matched.
+                            Otherwise `None` - caller should send `cli_response` to LLM
        exit_code (int): The exit code of the process.
    """
    if timeout is not None:
@@ -133,7 +135,7 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
            timeout = min(max(timeout, MIN_COMMAND_RUN_TIME), MAX_COMMAND_RUN_TIME)

    if not force:
-        print(color_yellow_bold(f'\n--------- EXECUTE COMMAND ----------'))
+        print(color_yellow_bold('\n--------- EXECUTE COMMAND ----------'))
        question = f'Can I execute the command: `{color_yellow_bold(command)}`'
        if timeout is not None:
            question += f' with {timeout}ms timeout?'
@@ -146,8 +148,12 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
        #       "That's not going to work, let's do X instead"
        #       We don't explicitly make "no" or "skip" options to the user
        #       see https://github.com/Pythagora-io/gpt-pilot/issues/122
+        #       https://github.com/Pythagora-io/gpt-pilot/issues/198
+        #       https://github.com/Pythagora-io/gpt-pilot/issues/43#issuecomment-1756352056
+        #       This may require exiting the list of steps early.
+        # ...or .confirm(question, default='yes').ask()  https://questionary.readthedocs.io/en/stable/pages/types.html#confirmation
        print('answer: ' + answer)
-        if answer == 'no':
+        if answer.lower().startswith('no'):
            return '', 'DONE', None
        elif answer == 'skip':
            return '', 'DONE', None
@@ -166,20 +172,19 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
        return command_run.cli_response, None, None

    return_value = None
+    was_success = None

    q_stderr = queue.Queue()
    q = queue.Queue()
-    pid_container = [None]
    process = run_command(command, project.root_path, q, q_stderr)

    if process_name is not None:
        terminate_named_process(process_name)
-        running_processes[process_name] = process.pid
+        running_processes[process_name] = (command, process.pid)

    output = ''
    stderr_output = ''
    start_time = time.time()
-    interrupted = False

    # Note: If we don't need to log the output in real-time, we can remove q, q_stderr, the threads and this while loop.
    # if timeout is not None:
@@ -189,9 +194,9 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
    try:
        while True:
            elapsed_time = time.time() - start_time
-            if timeout is not None:
-                # TODO: print to IPC using a different message type so VS Code can ignore it or update the previous value
-                print(color_white_bold(f'\rt: {round(elapsed_time * 1000)}ms : '), end='', flush=True)
+            # if timeout is not None:
+            #     # TODO: print to IPC using a different message type so VS Code can ignore it or update the previous value
+            #     print(color_white_bold(f'\rt: {round(elapsed_time * 1000)}ms : '), end='', flush=True)

            # Check if process has finished
            if process.poll() is not None:
@@ -207,6 +212,10 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo

            # If timeout is reached, kill the process
            if timeout is not None and elapsed_time * 1000 > timeout:
+                if process_name is not None:
+                    logger.info(f'Process "{process_name}" running after timeout as pid: {process.pid}')
+                    break
+
                raise TimeoutError("Command exceeded the specified timeout.")
                # os.killpg(process.pid, signal.SIGKILL)
                # break
@@ -220,6 +229,10 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
                output += line
                print(color_green('CLI OUTPUT:') + line, end='')
                logger.info('CLI OUTPUT: ' + line)
+                if success_message is not None and success_message in line:
+                    logger.info('Success message found: %s', success_message)
+                    was_success = True
+                    break

            # Read stderr
            try:
@@ -231,13 +244,8 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
                stderr_output += stderr_line
                print(color_red('CLI ERROR:') + stderr_line, end='')  # Print with different color for distinction
                logger.error('CLI ERROR: ' + stderr_line)
-                
-            if process_name is not None:
-                logger.info(f'Process {process_name} running as pid: {process.pid}')
-                break

    except (KeyboardInterrupt, TimeoutError) as e:
-        interrupted = True
        if isinstance(e, KeyboardInterrupt):
            print('\nCTRL+C detected. Stopping command execution...')
            logger.info('CTRL+C detected. Stopping command execution...')
@@ -245,11 +253,11 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo
            print('\nTimeout detected. Stopping command execution...')
            logger.warn('Timeout detected. Stopping command execution...')

+        was_success = False
        terminate_process(process.pid)

    elapsed_time = time.time() - start_time
-    print(f'{command} took {round(elapsed_time * 1000)}ms to execute.')
-    logger.info(f'{command} took {round(elapsed_time * 1000)}ms to execute.')
+    logger.info(f'`{command}` took {round(elapsed_time * 1000)}ms to execute.')

    # stderr_output = ''
    # while not q_stderr.empty():
@@ -263,7 +271,7 @@ def execute_command(project, command, timeout=None, process_name: str = None, fo

    save_command_run(project, command, return_value)

-    return return_value, None, process.returncode
+    return return_value, 'DONE' if was_success else None, process.returncode


 def build_directory_tree(path, prefix="", ignore=None, is_last=False, files=None, add_descriptions=False):
@@ -332,6 +340,7 @@ def execute_command_and_check_cli_response(command, timeout, convo):
 def run_command_until_success(convo, command,
                              timeout: Union[int, None],
                              process_name: Union[str, None] = None,
+                              success_message=None,
                              additional_message=None,
                              force=False,
                              return_cli_response=False,
@@ -345,6 +354,7 @@ def run_command_until_success(convo, command,
        timeout (int): The maximum execution time in milliseconds.
        process_name: A name for the process.
                      If `timeout` is not provided, can be used to terminate the process.
+        success_message: A message to look for in the output of the command to determine if successful or not.
        additional_message (str, optional): Additional message to include in the response.
        force (bool, optional): Whether to execute the command without confirmation. Default is False.
        return_cli_response (bool, optional): If True, may raise TooDeepRecursionError(cli_response)
@@ -353,11 +363,12 @@ def run_command_until_success(convo, command,
    cli_response, response, exit_code = execute_command(convo.agent.project,
                                                        command,
                                                        timeout=timeout,
+                                                        success_message=success_message,
                                                        process_name=process_name,
                                                        force=force)

    if response is None:
-        logger.info(f'{command} exit code: {exit_code}')
+        logger.info(f'`{command}` exit code: {exit_code}')
        if exit_code is None:
            response = 'DONE'
        else:
@@ -373,7 +384,7 @@ def run_command_until_success(convo, command,

    if response != 'DONE':
        # 'NEEDS_DEBUGGING'
-        print(color_red(f'Got incorrect CLI response:'))
+        print(color_red('Got incorrect CLI response:'))
        print(cli_response)
        print(color_red('-------------------'))

--- a/pilot/helpers/exceptions/TokenLimitError.py
+++ b/pilot/helpers/exceptions/TokenLimitError.py
@@ -1,5 +1,8 @@
+from const.llm import MAX_GPT_MODEL_TOKENS
+
+
 class TokenLimitError(Exception):
-    def __init__(self, tokens_in_messages, max_tokens):
+    def __init__(self, tokens_in_messages, max_tokens=MAX_GPT_MODEL_TOKENS):
        self.tokens_in_messages = tokens_in_messages
        self.max_tokens = max_tokens
        super().__init__(f"Token limit error happened with {tokens_in_messages}/{max_tokens} tokens in messages!")
--- a/pilot/helpers/test_Debugger.py
+++ b/pilot/helpers/test_Debugger.py
@@ -0,0 +1,84 @@
+import builtins
+import pytest
+from unittest.mock import patch
+from dotenv import load_dotenv
+
+load_dotenv()
+from pilot.utils.custom_print import get_custom_print
+from pilot.helpers.agents.Developer import Developer
+from pilot.helpers.AgentConvo import AgentConvo
+from pilot.helpers.Debugger import Debugger
+from pilot.helpers.test_Project import create_project
+from pilot.test.mock_questionary import MockQuestionary
+
+
+################## NOTE: this test needs to be ran in debug with breakpoints ##################
+
+@pytest.mark.uses_tokens
+@patch('pilot.helpers.AgentConvo.get_saved_development_step')
+@patch('pilot.helpers.AgentConvo.save_development_step')
+@patch('utils.questionary.get_saved_user_input')
+@patch('utils.questionary.save_user_input')
+@patch('helpers.cli.get_saved_command_run')
+@patch('helpers.cli.run_command')
+@patch('helpers.cli.save_command_run')
+# @patch('pilot.helpers.cli.execute_command', return_value=('', 'DONE', 0))
+def test_debug(
+        # mock_execute_command,
+        mock_save_command, mock_run_command, mock_get_saved_command,
+               mock_save_input, mock_user_input, mock_save_step, mock_get_saved_step):
+    # Given
+    builtins.print, ipc_client_instance = get_custom_print({})
+    project = create_project()
+    project.current_step = 'coding'
+    developer = Developer(project)
+    project.developer = developer
+    convo = AgentConvo(developer)
+    convo.load_branch = lambda x: None
+
+    debugger = Debugger(developer)
+    # TODO: mock agent.project.developer.execute_task
+
+    # convo.messages.append()
+    convo.construct_and_add_message_from_prompt('dev_ops/ran_command.prompt', {
+        'cli_response': '''
+stderr:
+```
+node:internal/modules/cjs/loader:1080
+  throw err;
+  ^
+
+Error: Cannot find module 'mime'
+Require stack:
+- /workspace/chat_app/node_modules/send/index.js
+- /workspace/chat_app/node_modules/express/lib/utils.js
+- /workspace/chat_app/node_modules/express/lib/application.js
+- /workspace/chat_app/node_modules/express/lib/express.js
+- /workspace/chat_app/node_modules/express/index.js
+- /workspace/chat_app/server.js
+    at Module._resolveFilename (node:internal/modules/cjs/loader:1077:15)
+    at Module._load (node:internal/modules/cjs/loader:922:27)
+    at Module.require (node:internal/modules/cjs/loader:1143:19)
+    at require (node:internal/modules/cjs/helpers:121:18)
+    at Object.<anonymous> (/workspace/chat_app/node_modules/send/index.js:24:12)
+    at Module._compile (node:internal/modules/cjs/loader:1256:14)
+    at Module._extensions..js (node:internal/modules/cjs/loader:1310:10)
+    at Module.load (node:internal/modules/cjs/loader:1119:32)
+    at Module._load (node:internal/modules/cjs/loader:960:12)
+```
+stdout:
+```
+> chat_app@1.0.0 start
+> node server.js
+```        
+'''
+    })
+
+    mock_questionary = MockQuestionary(['', ''])
+
+    with patch('utils.questionary.questionary', mock_questionary):
+        # When
+        result = debugger.debug(convo, command={'command': 'npm run start'}, is_root_task=True)
+
+        # Then
+        assert result == {'success': True}
--- a/pilot/helpers/test_Project.py
+++ b/pilot/helpers/test_Project.py
@@ -1,8 +1,12 @@
+import os
+import json
 import pytest
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock
 from helpers.Project import Project
 from database.models.files import File

+test_root = os.path.join(os.path.dirname(__file__), '../../workspace/gpt-pilot-test').replace('\\', '/')
+

 def create_project():
    project = Project({
@@ -14,17 +18,17 @@ def create_project():
        architecture=[],
        user_stories=[]
    )
-    project.root_path = "/temp/gpt-pilot-test"
+    project.set_root_path(test_root)
    project.app = 'test'
    return project


@pytest.mark.parametrize('test_data', [
-    {'name': 'package.json', 'path': 'package.json', 'saved_to': '/temp/gpt-pilot-test/package.json'},
-    {'name': 'package.json', 'path': '', 'saved_to': '/temp/gpt-pilot-test/package.json'},
-    # {'name': 'Dockerfile', 'path': None, 'saved_to': '/temp/gpt-pilot-test/Dockerfile'},
-    {'name': None, 'path': 'public/index.html', 'saved_to': '/temp/gpt-pilot-test/public/index.html'},
-    {'name': '', 'path': 'public/index.html', 'saved_to': '/temp/gpt-pilot-test/public/index.html'},
+    {'name': 'package.json', 'path': 'package.json', 'saved_to': f'{test_root}/package.json'},
+    {'name': 'package.json', 'path': '', 'saved_to': f'{test_root}/package.json'},
+    # {'name': 'Dockerfile', 'path': None, 'saved_to': f'{test_root}/Dockerfile'},
+    {'name': None, 'path': 'public/index.html', 'saved_to': f'{test_root}/public/index.html'},
+    {'name': '', 'path': 'public/index.html', 'saved_to': f'{test_root}/public/index.html'},

    # TODO: Treatment of paths outside of the project workspace - https://github.com/Pythagora-io/gpt-pilot/issues/129
    # {'name': '/etc/hosts', 'path': None, 'saved_to': '/etc/hosts'},
@@ -65,12 +69,12 @@ def test_save_file(mock_file_insert, mock_update_file, test_data):


@pytest.mark.parametrize('file_path, file_name, expected', [
-    ('file.txt', 'file.txt', '/temp/gpt-pilot-test/file.txt'),
-    ('', 'file.txt', '/temp/gpt-pilot-test/file.txt'),
-    ('path/', 'file.txt', '/temp/gpt-pilot-test/path/file.txt'),
-    ('path/to/', 'file.txt', '/temp/gpt-pilot-test/path/to/file.txt'),
-    ('path/to/file.txt', 'file.txt', '/temp/gpt-pilot-test/path/to/file.txt'),
-    ('./path/to/file.txt', 'file.txt', '/temp/gpt-pilot-test/./path/to/file.txt'),  # ideally result would not have `./`
+    ('file.txt', 'file.txt', f'{test_root}/file.txt'),
+    ('', 'file.txt', f'{test_root}/file.txt'),
+    ('path/', 'file.txt', f'{test_root}/path/file.txt'),
+    ('path/to/', 'file.txt', f'{test_root}/path/to/file.txt'),
+    ('path/to/file.txt', 'file.txt', f'{test_root}/path/to/file.txt'),
+    ('./path/to/file.txt', 'file.txt', f'{test_root}/./path/to/file.txt'),  # ideally result would not have `./`
 ])
 def test_get_full_path(file_path, file_name, expected):
    # Given
@@ -100,7 +104,6 @@ def test_get_full_path_absolute(file_path, file_name, expected):
    # Then
    assert absolute_path == expected

-
 # This is known to fail and should be avoided
 # def test_get_full_file_path_error():
 #     # Given
@@ -111,4 +114,59 @@ def test_get_full_path_absolute(file_path, file_name, expected):
 #     full_path = project.get_full_file_path(file_path, file_name)
 #
 #     # Then
-#     assert full_path == '/temp/gpt-pilot-test/path/to/file/'
+#     assert full_path == f'{test_root}/path/to/file/'
+
+
+class TestProjectFileLists:
+    def setup_method(self):
+        # Given a project
+        project = create_project()
+        self.project = project
+        project.set_root_path(os.path.join(os.path.dirname(__file__), '../../workspace/directory_tree'))
+        project.project_description = 'Test Project'
+        project.development_plan = [{
+            'description': 'Test User Story',
+            'programmatic_goal': 'Test Programmatic Goal',
+            'user_review_goal': 'Test User Review Goal',
+        }]
+
+        # with directories including common.IGNORE_FOLDERS
+        src = os.path.join(project.root_path, 'src')
+        os.makedirs(src, exist_ok=True)
+        for dir in ['.git', '.idea', '.vscode', '__pycache__', 'node_modules', 'venv', 'dist', 'build']:
+            os.makedirs(os.path.join(project.root_path, dir), exist_ok=True)
+
+        # ...and files
+        with open(os.path.join(project.root_path, 'package.json'), 'w') as file:
+            json.dump({'name': 'test app'}, file, indent=2)
+        with open(os.path.join(src, 'main.js'), 'w') as file:
+            file.write('console.log("Hello World!");')
+
+        # and a non-empty .gpt-pilot directory
+        project.dot_pilot_gpt.write_project(project)
+
+    def test_get_directory_tree(self):
+        # When
+        tree = self.project.get_directory_tree()
+
+        # Then we should not be including the .gpt-pilot directory or other ignored directories
+        assert tree == '''
+|-- /
+|   |-- package.json
+|   |-- src/
+|   |   |-- main.js
+'''.lstrip()
+
+    @patch('helpers.Project.DevelopmentSteps.get_or_create', return_value=('test', True))
+    @patch('helpers.Project.File.get_or_create', return_value=('test', True))
+    @patch('helpers.Project.FileSnapshot.get_or_create', return_value=(MagicMock(), True))
+    def test_save_files_snapshot(self, mock_snap, mock_file, mock_step):
+        # Given a snapshot of the files in the project
+
+        # When we save the file snapshot
+        self.project.save_files_snapshot('test')
+
+        # Then the files should be saved to the project, but nothing from `.gpt-pilot/`
+        assert mock_file.call_count == 2
+        assert mock_file.call_args_list[0][1]['name'] == 'package.json'
+        assert mock_file.call_args_list[1][1]['name'] == 'main.js'
--- a/pilot/helpers/test_cli.py
+++ b/pilot/helpers/test_cli.py
@@ -0,0 +1,6 @@
+from pilot.helpers.cli import terminate_process
+
+
+def test_terminate_process_not_running():
+    terminate_process(999999999, 'not running')
+    assert True
--- a/pilot/main.py
+++ b/pilot/main.py
@@ -34,6 +34,7 @@ def init():


 if __name__ == "__main__":
+    ask_feedback = True
    try:
        # sys.argv.append('--ux-test=' + 'continue_development')
        args = init()
@@ -50,12 +51,12 @@ if __name__ == "__main__":
            # TODO get checkpoint from database and fill the project with it
            project = Project(args, ipc_client_instance=ipc_client_instance)
            project.start()
-    except KeyboardInterrupt:
-        exit_gpt_pilot()
-    except Exception as e:
+            project.finish()
+    except Exception:
        print(color_red('---------- GPT PILOT EXITING WITH ERROR ----------'))
        traceback.print_exc()
        print(color_red('--------------------------------------------------'))
-        exit_gpt_pilot(False)
+        ask_feedback = False
    finally:
+        exit_gpt_pilot(ask_feedback)
        sys.exit(0)
--- a/pilot/prompts/development/define_user_review_goal.prompt
+++ b/pilot/prompts/development/define_user_review_goal.prompt
@@ -1,4 +1,4 @@
-How can a human user test if this task was completed successfully? If you specify a command that needs to be run or give example, be very specific. You don't want the user to have to think anything through but rather that they just follow your instructions.
+How can a human user test if this task was completed successfully? If you specify a command that needs to be run or give example, be very specific. You don't want the user to have to think anything through but rather that they just follow your instructions. Note that the command will run on a {{ os }} machine.

 !IMPORTANT!
 In case the task can be tested by making an API request, do not suggest how can a request be made with Postman but rather write a full cURL command that the user can just run.
--- a/pilot/prompts/development/parse_task.prompt
+++ b/pilot/prompts/development/parse_task.prompt
@@ -1 +1,8 @@
-Ok, now, take your previous message and convert it to actionable items. An item might be a code change or a command run. When you need to change code, make sure that you put the entire content of the file in the value of `content` key even though you will likely copy and paste the most of the previous message.
+Ok, now, take your previous message and convert it to actionable items. An item might be a code change or a command run. When you need to change code, make sure that you put the entire content of the file in the value of `content` key even though you will likely copy and paste the most of the previous message.
+{%- if running_processes %}
+Note that the following processes are already running:
+
+{% for key, data in running_processes.items() -%}
+- "{{ key }}" (`{{ data[0] }}`)
+{% endfor -%}
+{% endif -%}
--- a/pilot/prompts/prompts.py
+++ b/pilot/prompts/prompts.py
@@ -126,7 +126,7 @@ def get_additional_info_from_user(project, messages, role):
        while True:
            if isinstance(message, dict) and 'text' in message:
                message = message['text']
-            print(color_yellow(f"Please check this message and say what needs to be changed. If everything is ok just press ENTER",))
+            print(color_yellow("Please check this message and say what needs to be changed. If everything is ok just press ENTER",))
            answer = ask_user(project, message, require_some_input=False)
            if answer.lower() == '':
                break
--- a/pilot/prompts/test_prompts.py
+++ b/pilot/prompts/test_prompts.py
@@ -43,3 +43,27 @@ success

 If the command was successfully executed, respond with `DONE`. If it wasn't, respond with `NEEDS_DEBUGGING`.
 '''.strip()
+
+
+def test_parse_task_no_processes():
+    # When
+    prompt = get_prompt('development/parse_task.prompt', {
+        'running_processes': {}
+    })
+
+    # Then
+    assert 'the following processes' not in prompt
+
+
+def test_parse_task_with_processes():
+    # When
+    prompt = get_prompt('development/parse_task.prompt', {
+        'running_processes': {
+            'app': ('npm start', 123),
+            'mongo': ('mongod', 456)
+        }
+    })
+
+    # Then
+    assert 'the following processes are already running:' in prompt
+    assert '- "app" (`npm start`)\n- "mongo" (`mongod`)' in prompt
--- a/pilot/prompts/utils/incomplete_json.prompt
+++ b/pilot/prompts/utils/incomplete_json.prompt
@@ -1,6 +1,6 @@
 [INST]I received an incomplete JSON response. Please provide the remainder of the JSON object. I will append your entire response to the incomplete JSON data below so it is important that you must not include any of the data already received or any text that does not complete the JSON data.
 A response which starts with "Here is the remainder of the JSON object" would be an example of an invalid response, a preamble must NOT be included.
-Note that because the JSON data I have already received is an incomplete JSON object, you will need to include the opening and closing curly braces in your response, but rather continue off from EXACTLY where the received JSON ends.
+Note that because the JSON data I have already received is an incomplete JSON object, you will not need to include the opening and closing curly braces in your response, but rather continue off from EXACTLY where the received JSON ends.

 JSON received:
 [/INST]
--- a/pilot/test/ux_tests/run_command_until_success.py
+++ b/pilot/test/ux_tests/run_command_until_success.py
@@ -20,8 +20,8 @@ def run_command_until_success():
        user_stories=[]
    )

-    project.root_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                                     '../../../workspace/TestDeveloper'))
+    project.set_root_path(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                                     '../../../workspace/TestDeveloper')))
    project.technologies = []
    project.current_step = ENVIRONMENT_SETUP_STEP
    project.app = save_app(project)
@@ -38,4 +38,4 @@ def run_command_until_success():
        'human_intervention_description': 'I want you to test that this process works from the CLI _and_ from the UI.',
    }

-    result = developer.step_human_intervention(convo, step)
+    developer.step_human_intervention(convo, step)
--- a/pilot/utils/arguments.py
+++ b/pilot/utils/arguments.py
@@ -7,6 +7,7 @@ from getpass import getuser
 from database.database import get_app, get_app_by_user_workspace
 from utils.style import color_green_bold, disable_color_output
 from utils.utils import should_execute_step
+from const.common import STEPS


 def get_arguments():
@@ -47,7 +48,7 @@ def get_arguments():
            arguments['app_type'] = app.app_type
            arguments['name'] = app.name
            if 'step' not in arguments or ('step' in arguments and not should_execute_step(arguments['step'], app.status)):
-                arguments['step'] = app.status
+                arguments['step'] = 'finished' if app.status == 'finished' else STEPS[STEPS.index(app.status) + 1]

            print(color_green_bold('\n------------------ LOADING PROJECT ----------------------'))
            print(color_green_bold(f'{app.name} (app_id={arguments["app_id"]})'))
--- a/pilot/utils/dot_gpt_pilot.py
+++ b/pilot/utils/dot_gpt_pilot.py
@@ -0,0 +1,91 @@
+import json
+import os
+import yaml
+from datetime import datetime
+from dotenv import load_dotenv
+
+load_dotenv()
+
+USE_GPTPILOT_FOLDER = os.getenv('USE_GPTPILOT_FOLDER') == 'true'
+
+
+# TODO: Parse files from the `.gpt-pilot` directory to resume a project - `user_stories` may have changed - include checksums for sections which may need to be reprocessed.
+# TODO: Save a summary at the end of each task/sprint.
+class DotGptPilot:
+    """
+    Manages the `.gpt-pilot` directory.
+    """
+    def __init__(self, log_chat_completions: bool = True):
+        if not USE_GPTPILOT_FOLDER:
+            return
+        self.log_chat_completions = log_chat_completions
+        self.dot_gpt_pilot_path = self.with_root_path('~', create=False)
+        self.chat_log_path = self.chat_log_folder(None)
+
+    def with_root_path(self, root_path: str, create=True):
+        if not USE_GPTPILOT_FOLDER:
+            return
+        dot_gpt_pilot_path = os.path.join(root_path, '.gpt-pilot')
+        self.dot_gpt_pilot_path = dot_gpt_pilot_path
+
+        # Create the `.gpt-pilot` directory if required.
+        if create and self.log_chat_completions:  # (... or ...):
+            self.chat_log_folder(None)
+
+        return dot_gpt_pilot_path
+
+    def chat_log_folder(self, task):
+        if not USE_GPTPILOT_FOLDER:
+            return
+        chat_log_path = os.path.join(self.dot_gpt_pilot_path, 'chat_log')
+        if task is not None:
+            chat_log_path = os.path.join(chat_log_path, 'task_' + str(task))
+
+        os.makedirs(chat_log_path, exist_ok=True)
+        self.chat_log_path = chat_log_path
+        return chat_log_path
+
+    def log_chat_completion(self, endpoint: str, model: str, req_type: str, messages: list[dict], response: str):
+        if not USE_GPTPILOT_FOLDER:
+            return
+        if self.log_chat_completions:
+            time = datetime.now().strftime('%Y-%m-%d_%H_%M_%S')
+            with open(os.path.join(self.chat_log_path, f'{time}-{req_type}.yaml'), 'w') as file:
+                data = {
+                    'endpoint': endpoint,
+                    'model': model,
+                    'messages': messages,
+                    'response': response,
+                }
+
+                yaml.safe_dump(data, file, width=120, indent=2, default_flow_style=False, sort_keys=False)
+
+    def log_chat_completion_json(self, endpoint: str, model: str, req_type: str, functions: dict, json_response: str):
+        if not USE_GPTPILOT_FOLDER:
+            return
+        if self.log_chat_completions:
+            time = datetime.now().strftime('%Y-%m-%d_%H_%M_%S')
+
+            with open(os.path.join(self.chat_log_path, f'{time}-{req_type}.json'), 'w') as file:
+                data = {
+                    'endpoint': endpoint,
+                    'model': model,
+                    'functions': functions,
+                    'response': json.loads(json_response),
+                }
+
+                json.dump(data, file, indent=2)
+
+    def write_project(self, project):
+        if not USE_GPTPILOT_FOLDER:
+            return
+        data = {
+            'name': project.args['name'],
+            'description': project.project_description,
+            'user_stories': project.user_stories,
+            'architecture': project.architecture,
+            'development_plan': project.development_plan,
+        }
+
+        with open(os.path.join(self.dot_gpt_pilot_path, 'project.yaml'), 'w') as file:
+            yaml.safe_dump(data, file, width=120, indent=2, default_flow_style=False, sort_keys=False)
--- a/pilot/utils/files.py
+++ b/pilot/utils/files.py
@@ -16,23 +16,22 @@ def setup_workspace(args) -> str:
    """
    Creates & returns the path to the project workspace.
    Also creates a 'tests' folder inside the workspace.
-    :param args: may contain 'workspace' or 'root' keys
+    :param args: may contain 'root' key
    """
-    # `args['workspace']` can be used to work with an existing workspace at the specified path.
-    # `args['root']` is used by VS Code for (nearly) the same purpose, but `args['name']` is appended to it.
    workspace = args.get('workspace')
    if workspace:
-        try:
-            save_user_app(args['user_id'], args['app_id'], workspace)
-            return workspace
-        except Exception as e:
-            print(str(e))
+        project_path = workspace
+    else:
+        root = args.get('root') or get_parent_folder('pilot')
+        name = args.get('name', 'default_project_name')
+        project_path = create_directory(os.path.join(root, 'workspace'), name)

-        return args['workspace']
-
-    root = args.get('root') or get_parent_folder('pilot')
-    project_path = create_directory(os.path.join(root, 'workspace'), args.get('name', 'default_project_name'))
    create_directory(project_path, 'tests')
+    try:
+        save_user_app(args.get('user_id'), args.get('app_id'), project_path)
+    except Exception as e:
+        print(str(e))
+
    return project_path


--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -17,7 +17,6 @@ from utils.utils import fix_json, get_prompt
 from utils.function_calling import add_function_calls_to_request, FunctionCallSet, FunctionType
 from utils.questionary import styled_text

-
 def get_tokens_in_messages(messages: List[str]) -> int:
    tokenizer = tiktoken.get_encoding("cl100k_base")  # GPT-4 tokenizer
    tokenized_messages = [tokenizer.encode(message['content']) for message in messages]
@@ -157,6 +156,8 @@ def retry_on_exception(func):
            del args[0]['function_buffer']

    def wrapper(*args, **kwargs):
+        wait_duration_ms = None
+
        while True:
            try:
                # spinner_stop(spinner)
@@ -190,6 +191,7 @@ def retry_on_exception(func):
                    # or `Expecting value` with `pos` before the end of `e.doc`
                    function_error_count = update_error_count(args)
                    logger.warning('Received invalid character in JSON response from LLM. Asking to retry...')
+                    logger.info(f'  received: {e.doc}')
                    set_function_error(args, err_str)
                    if function_error_count < 3:
                        continue
@@ -212,12 +214,16 @@ def retry_on_exception(func):
                    match = re.search(r"Please try again in (\d+)ms.", err_str)
                    if match:
                        # spinner = spinner_start(colored("Rate limited. Waiting...", 'yellow'))
-                        logger.debug('Rate limited. Waiting...')
-                        wait_duration = int(match.group(1)) / 1000
-                        time.sleep(wait_duration)
+                        if wait_duration_ms is None:
+                            wait_duration_ms = int(match.group(1))
+                        elif wait_duration_ms < 6000:
+                            # waiting 6ms isn't usually long enough - exponential back-off until about 6 seconds
+                            wait_duration_ms *= 2
+                        logger.debug(f'Rate limited. Waiting {wait_duration_ms}ms...')
+                        time.sleep(wait_duration_ms / 1000)
                    continue

-                print(color_red(f'There was a problem with request to openai API:'))
+                print(color_red('There was a problem with request to openai API:'))
                # spinner_stop(spinner)
                print(err_str)
                logger.error(f'There was a problem with request to openai API: {err_str}')
@@ -249,7 +255,6 @@ def stream_gpt_completion(data, req_type, project):
    :param project: NEEDED FOR WRAPPER FUNCTION retry_on_exception
    :return: {'text': str} or {'function_calls': {'name': str, arguments: '{...}'}}
    """
-
    # TODO add type dynamically - this isn't working when connected to the external process
    try:
        terminal_width = os.get_terminal_size().columns
@@ -308,9 +313,10 @@ def stream_gpt_completion(data, req_type, project):
        headers = {
            'Content-Type': 'application/json',
            'Authorization': 'Bearer ' + get_api_key_or_throw('OPENROUTER_API_KEY'),
-            'HTTP-Referer': 'http://localhost:3000',
-            'X-Title': 'GPT Pilot (LOCAL)'
+            'HTTP-Referer': 'https://github.com/Pythagora-io/gpt-pilot',
+            'X-Title': 'GPT Pilot'
        }
+        data['max_tokens'] = MAX_GPT_MODEL_TOKENS
        data['model'] = model
    else:
        # If not, send the request to the OpenAI endpoint
@@ -328,11 +334,9 @@ def stream_gpt_completion(data, req_type, project):
        stream=True
    )

-    # Log the response status code and message
-    logger.debug(f'Response status code: {response.status_code}')
-
    if response.status_code != 200:
-        logger.info(f'problem with request: {response.text}')
+        project.dot_pilot_gpt.log_chat_completion(endpoint, model, req_type, data['messages'], response.text)
+        logger.info(f'problem with request (status {response.status_code}): {response.text}')
        raise Exception(f"API responded with status code: {response.status_code}. Response text: {response.text}")

    # function_calls = {'name': '', 'arguments': ''}
@@ -405,10 +409,13 @@ def stream_gpt_completion(data, req_type, project):
    #     function_calls['arguments'] = load_data_to_json(function_calls['arguments'])
    #     return return_result({'function_calls': function_calls}, lines_printed)
    logger.info('<<<<<<<<<< LLM Response <<<<<<<<<<\n%s\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<', gpt_response)
+    project.dot_pilot_gpt.log_chat_completion(endpoint, model, req_type, data['messages'], gpt_response)

    if expecting_json:
        gpt_response = clean_json_response(gpt_response)
        assert_json_schema(gpt_response, expecting_json)
+        # Note, we log JSON separately from the YAML log above incase the JSON is invalid and an error is raised
+        project.dot_pilot_gpt.log_chat_completion_json(endpoint, model, req_type, expecting_json, gpt_response)

    new_code = postprocessing(gpt_response, req_type)  # TODO add type dynamically
    return return_result({'text': new_code}, lines_printed)
--- a/pilot/utils/questionary.py
+++ b/pilot/utils/questionary.py
@@ -1,7 +1,11 @@
-from prompt_toolkit.styles import Style
+import platform
 import questionary
 from utils.style import color_yellow_bold
 import re
+import sys
+from prompt_toolkit.styles import Style
+from utils.style import yellow_bold
+
 from database.database import save_user_input, get_saved_user_input

 custom_style = Style.from_dict({
@@ -19,7 +23,7 @@ def remove_ansi_codes(s: str) -> str:


 def styled_select(*args, **kwargs):
-    kwargs["style"] = custom_style  # Set style here
+    kwargs["style"] = custom_style
    return questionary.select(*args, **kwargs).unsafe_ask()  # .ask() is included here


@@ -38,7 +42,8 @@ def styled_text(project, question, ignore_user_input_count=False, style=None):
        config = {
            'style': style if style is not None else custom_style,
        }
-        question = remove_ansi_codes(question) # Colorama and questionary are not compatible and styling doesn't work
+        question = remove_ansi_codes(question)  # Colorama and questionary are not compatible and styling doesn't work
+        flush_input()
        response = questionary.text(question, **config).unsafe_ask()  # .ask() is included here
    else:
        response = print(question, type='user_input_request')
@@ -55,4 +60,19 @@ def get_user_feedback():
    config = {
        'style': custom_style,
    }
-    return questionary.text("How did GPT Pilot do? Were you able to create any app that works? Please write any feedback you have or just press ENTER to exit: ", **config).unsafe_ask()
+    return questionary.text('How did GPT Pilot do? Were you able to create any app that works? '
+                            'Please write any feedback you have or just press ENTER to exit: ', **config).unsafe_ask()
+
+
+def flush_input():
+    """Flush the input buffer, discarding all that's in the buffer."""
+    try:
+        if platform.system() == 'Windows':
+            import msvcrt
+            while msvcrt.kbhit():
+                msvcrt.getch()
+        else:
+            import termios
+            termios.tcflush(sys.stdin, termios.TCIOFLUSH)
+    except (ImportError, OSError):
+        pass
--- a/pilot/utils/test_files.py
+++ b/pilot/utils/test_files.py
@@ -1,10 +1,6 @@
-from .files import setup_workspace
-
-
-def test_setup_workspace_with_existing_workspace():
-    args = {'workspace': 'some_directory', 'name': 'sample'}
-    result = setup_workspace(args)
-    assert result == 'some_directory'
+import os
+from unittest.mock import patch
+from utils.files import setup_workspace


 def mocked_create_directory(path, exist_ok=True):
@@ -15,11 +11,27 @@ def mocked_abspath(file):
    return "/root_path/pilot/helpers"


-def test_setup_workspace_without_existing_workspace(monkeypatch):
-    args = {'workspace': None, 'name': 'project_name'}
+@patch('utils.files.os.makedirs', side_effect=mocked_create_directory)
+def test_setup_workspace_with_existing_workspace(mock_makedirs):
+    args = {'workspace': '/some/directory', 'name': 'sample'}
+    result = setup_workspace(args)
+    assert result == '/some/directory'
+
+
+def test_setup_workspace_with_root_arg(monkeypatch):
+    args = {'root': '/my/root', 'name': 'project_name'}

    monkeypatch.setattr('os.path.abspath', mocked_abspath)
    monkeypatch.setattr('os.makedirs', mocked_create_directory)

+    result = setup_workspace(args)
+    assert result.replace('\\', '/') == "/my/root/workspace/project_name"
+
+
+@patch('utils.files.os.path.abspath', return_value='/root_path/pilot/helpers')
+@patch('utils.files.os.makedirs', side_effect=mocked_create_directory)
+def test_setup_workspace_without_existing_workspace(mock_makedirs, mock_abs_path):
+    args = {'workspace': None, 'name': 'project_name'}
+
    result = setup_workspace(args)
    assert result.replace('\\', '/') == "/root_path/workspace/project_name"
--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -2,7 +2,7 @@ import builtins
 from json import JSONDecodeError

 import pytest
-from unittest.mock import patch, Mock
+from unittest.mock import call, patch, Mock
 from dotenv import load_dotenv
 from jsonschema import ValidationError
 from const.function_calls import ARCHITECTURE, DEVELOPMENT_PLAN
@@ -19,7 +19,7 @@ from main import get_custom_print

 load_dotenv()

-project = Project({'app_id': 'test-app'}, current_step='test')
+project = Project({'app_id': 'test-app'}, current_step='test', enable_dot_pilot_gpt=False)


 def test_clean_json_response_True_False():
@@ -364,6 +364,50 @@ class TestLlmConnection:
    def setup_method(self):
        builtins.print, ipc_client_instance = get_custom_print({})

+    @patch('utils.llm_connection.requests.post')
+    @patch('utils.llm_connection.time.sleep')
+    def test_rate_limit_error(self, mock_sleep, mock_post, monkeypatch):
+        monkeypatch.setenv('OPENAI_API_KEY', 'secret')
+
+        error_text = '''{
+                "error": {
+                    "message": "Rate limit reached for 10KTPM-200RPM in organization org-OASFC7k1Ff5IzueeLArhQtnT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues.",
+                    "type": "tokens",
+                    "param": null,
+                    "code": "rate_limit_exceeded"
+                }
+            }'''
+        content = 'DONE'
+        success_text = '{"id": "gen-123", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "' + content + '"}}]}'
+
+        error_response = Mock()
+        error_response.status_code = 429
+        error_response.text = error_text
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.iter_lines.return_value = [success_text.encode('utf-8')]
+
+        mock_post.side_effect = [error_response, error_response, error_response, error_response, error_response,
+                                 error_response, error_response, error_response, error_response, error_response,
+                                 error_response, error_response, mock_response]
+        wrapper = retry_on_exception(stream_gpt_completion)
+        data = {
+            'model': 'gpt-4',
+            'messages': [{'role': 'user', 'content': 'testing'}]
+        }
+
+        # When
+        response = wrapper(data, 'test', project)
+
+        # Then
+        assert response == {'text': 'DONE'}
+        # assert mock_sleep.call_count == 9
+        assert mock_sleep.call_args_list == [call(0.006), call(0.012), call(0.024), call(0.048), call(0.096),
+                                             call(0.192), call(0.384), call(0.768), call(1.536), call(3.072),
+                                             call(6.144), call(6.144)]
+        # mock_sleep.call
+
    @patch('utils.llm_connection.requests.post')
    def test_stream_gpt_completion(self, mock_post, monkeypatch):
        # Given streaming JSON response
@@ -456,9 +500,11 @@ solution-oriented decision-making in areas where precise instructions were not p
        ('OPENAI', 'gpt-4'),
        ('OPENROUTER', 'openai/gpt-3.5-turbo'),
        ('OPENROUTER', 'meta-llama/codellama-34b-instruct'),
+        ('OPENROUTER', 'phind/phind-codellama-34b-v2'),
        ('OPENROUTER', 'google/palm-2-chat-bison'),
        ('OPENROUTER', 'google/palm-2-codechat-bison'),
        ('OPENROUTER', 'anthropic/claude-2'),
+        ('OPENROUTER', 'mistralai/mistral-7b-instruct')
    ])
    def test_chat_completion_TechLead(self, endpoint, model, monkeypatch):
        # Given
@@ -490,7 +536,7 @@ The development process will include the creation of user stories and tasks, bas
        function_calls = DEVELOPMENT_PLAN

        # Retry on bad LLM responses
-        mock_questionary = MockQuestionary(['', '', 'no'])
+        # mock_questionary = MockQuestionary(['', '', 'no'])

        # with patch('utils.llm_connection.questionary', mock_questionary):
        # When
@@ -530,5 +576,5 @@ The development process will include the creation of user stories and tasks, bas
    #     # assert len(convo.messages) == 2
    #     assert response == ([{'type': 'command', 'description': 'Run the app'}], 'more_tasks')

-    def _create_convo(self, agent):
-        convo = AgentConvo(agent)
+    # def _create_convo(self, agent):
+    #     convo = AgentConvo(agent)
--- a/pilot/utils/utils.py
+++ b/pilot/utils/utils.py
@@ -84,12 +84,12 @@ def get_prompt_components():
    return prompts_components


-def get_sys_message(role):
+def get_sys_message(role,args=None):
    """
    :param role: 'product_owner', 'architect', 'dev_ops', 'tech_lead', 'full_stack_developer', 'code_monkey'
    :return: { "role": "system", "content": "You are a {role}... You do..." }
    """
-    content = get_prompt(f'system_messages/{role}.prompt')
+    content = get_prompt(f'system_messages/{role}.prompt',args)

    return {
        "role": "system",
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,7 @@ psycopg2-binary==2.9.6
 python-dotenv==1.0.0
 python-editor==1.0.4
 pytest==7.4.2
+pyyaml==6.0.1
 questionary==1.10.0
 readchar==4.0.5
 regex==2023.6.3