Merge branch 'development' into alternative_solutions

2026-01-10 13:37:55 -05:00 · 2024-02-03 11:19:22 -08:00
parent e566590dec 2673d1b20c
commit 0b86c46986
21 changed files with 520 additions and 1087 deletions
--- a/pilot/.env.example
+++ b/pilot/.env.example
@@ -29,3 +29,6 @@ DB_PASSWORD=

 # Load database imported from another location/system - EXPERIMENTAL
 # AUTOFIX_FILE_PATHS=false
+
+# Set extra buffer to wait on top of detected retry time when rate limmit is hit. defaults to 6
+# RATE_LIMIT_EXTRA_BUFFER=
--- a/pilot/const/function_calls.py
+++ b/pilot/const/function_calls.py
@@ -87,7 +87,7 @@ def step_save_file_definition():
            },
            "save_file": {
                "type": "object",
-                "description": "A file that needs to be created or file that needs to be completely replaced. This should be used for new files.",
+                "description": "A file that needs to be created or file that needs to be completely replaced. This should only be used for new files.",
                "properties": {
                    "name": {
                        "type": "string",
@@ -95,7 +95,7 @@ def step_save_file_definition():
                    },
                    "path": {
                        "type": "string",
-                        "description": "Full path of the file (with the file name) that needs to be created or replaced."
+                        "description": "Full path of the file (with the file name) that needs to be created."
                    },
                    "content": {
                        "type": "string",
@@ -147,11 +147,11 @@ def step_human_intervention_definition():
        "properties": {
            "type": {
                "const": "human_intervention",
-                "description": dev_step_type_description()
+                "description": 'Development step that will be executed by a human. You should avoid using this step if possible, task does NOT need to have "human_intervention" step.'
            },
            "human_intervention_description": {
                "type": "string",
-                "description": "Description of step where human intervention is needed."
+                "description": "Very clear description of step where human intervention is needed."
            }
        },
        "required": ["type", "human_intervention_description"]
@@ -717,3 +717,43 @@ GET_DOCUMENTATION_FILE = {
        },
    }],
 }
+
+REVIEW_CHANGES = {
+    'definitions': [{
+        'name': 'review_diff',
+        'description': 'Review a unified diff and select hunks to apply.',
+        'parameters': {
+            "type": "object",
+            "properties": {
+                "hunks": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "number": {
+                                "type": "integer",
+                                "description": "Index of the hunk in the diff. Starts from 1."
+                            },
+                            "decision": {
+                                "type": "string",
+                                "enum": ["apply", "ignore"],
+                                "description": "Whether to apply this hunk (if it's a valid change) or ignore it."
+                            },
+                            "reason": {
+                                "type": "string",
+                                "desciprion": "Reason for allowing or ignoring this hunk."
+                            }
+                        },
+                        "required": ["number", "decision", "reason"],
+                        "additionalProperties": False
+                    },
+                },
+                "review_notes": {
+                    "type": "string"
+                }
+            },
+            "required": ["hunks", "review_notes"],
+            "additionalProperties": False
+        }
+    }],
+}
--- a/pilot/const/messages.py
+++ b/pilot/const/messages.py
@@ -4,3 +4,4 @@ AFFIRMATIVE_ANSWERS = ['', 'y', 'yes', 'ok', 'okay', 'sure', 'absolutely', 'inde
 NEGATIVE_ANSWERS = ['n', 'no', 'skip', 'negative', 'not now', 'cancel', 'decline', 'stop', 'Keep my changes']
 STUCK_IN_LOOP = 'I\'m stuck in loop'
 NONE_OF_THESE = 'none of these'
+MAX_PROJECT_NAME_LENGTH = 50
--- a/pilot/helpers/Debugger.py
+++ b/pilot/helpers/Debugger.py
@@ -1,6 +1,7 @@
 import platform
 import uuid
 import re
+import traceback

 from const.code_execution import MAX_COMMAND_DEBUG_TRIES, MAX_RECURSION_LAYER
 from const.function_calls import DEBUG_STEPS_BREAKDOWN
@@ -10,6 +11,7 @@ from helpers.exceptions import TokenLimitError
 from helpers.exceptions import TooDeepRecursionError
 from logger.logger import logger
 from prompts.prompts import ask_user
+from utils.exit import trace_code_event


 class Debugger:
@@ -40,6 +42,8 @@ class Debugger:
        self.agent.project.current_task.add_debugging_task(self.recursion_layer, command, user_input, issue_description)
        if self.recursion_layer > MAX_RECURSION_LAYER:
            self.recursion_layer = 0
+            # TooDeepRecursionError kills all debugging loops and goes back to the point where first debug was called
+            # it does not retry initial step but instead calls dev_help_needed()
            raise TooDeepRecursionError()

        function_uuid = str(uuid.uuid4())
@@ -54,6 +58,8 @@ class Debugger:
                print('yes/no', type='button')
                answer = ask_user(self.agent.project, 'Can I start debugging this issue [Y/n/error details]?', require_some_input=False)
                if answer.lower() in NEGATIVE_ANSWERS:
+                    self.recursion_layer -= 1
+                    convo.load_branch(function_uuid)
                    return True
                if answer and answer.lower() not in AFFIRMATIVE_ANSWERS:
                    user_input = answer
@@ -117,24 +123,22 @@ class Debugger:
                        break

            except TokenLimitError as e:
+                # initial TokenLimitError is triggered by OpenAI API
+                # TokenLimitError kills recursion loops 1 by 1 and reloads convo, so it can retry the same initial step
                if self.recursion_layer > 0:
+                    convo.load_branch(function_uuid)
                    self.recursion_layer -= 1
                    raise e
                else:
+                    trace_code_event('token-limit-error', {'error': traceback.format_exc()})
                    if not success:
                        convo.load_branch(function_uuid)
                    continue

-            # if not success:
-            #     # TODO explain better how should the user approach debugging
-            #     # we can copy the entire convo to clipboard so they can paste it in the playground
-            #     user_input = convo.agent.project.ask_for_human_intervention(
-            #         'It seems like I cannot debug this problem by myself. Can you please help me and try debugging it yourself?' if user_input is None else f'Can you check this again:\n{issue_description}?',
-            #         response['data']
-            #     )
-
-            #     if user_input == 'continue':
-            #         success = True
+            except TooDeepRecursionError as e:
+                convo.load_branch(function_uuid)
+                raise e

+        convo.load_branch(function_uuid)
        self.recursion_layer -= 1
        return success
--- a/pilot/helpers/agents/CodeMonkey.py
+++ b/pilot/helpers/agents/CodeMonkey.py
@@ -1,25 +1,30 @@
 import os.path
 import re
 from typing import Optional
+from traceback import format_exc
+from difflib import unified_diff

 from helpers.AgentConvo import AgentConvo
 from helpers.Agent import Agent
 from helpers.files import get_file_contents
-from const.function_calls import GET_FILE_TO_MODIFY
+from const.function_calls import GET_FILE_TO_MODIFY, REVIEW_CHANGES

 from utils.exit import trace_code_event
 from utils.telemetry import telemetry

+# Constant for indicating missing new line at the end of a file in a unified diff
+NO_EOL = "\ No newline at end of file"
+
+# Regular expression pattern for matching hunk headers
+PATCH_HEADER_PATTERN = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@")
+
+MAX_REVIEW_RETRIES = 3

 class CodeMonkey(Agent):
    save_dev_steps = True

-    # Only attempt block-by-block replace if the file is larger than this many lines
-    SMART_REPLACE_THRESHOLD = 200
-
-    def __init__(self, project, developer):
+    def __init__(self, project):
        super().__init__('code_monkey', project)
-        self.developer = developer

    def get_original_file(
            self,
@@ -66,53 +71,41 @@ class CodeMonkey(Agent):
    def implement_code_changes(
        self,
        convo: Optional[AgentConvo],
-        code_changes_description: str,
        step: dict[str, str],
    ) -> AgentConvo:
        """
        Implement code changes described in `code_changes_description`.

-        :param convo: AgentConvo instance (optional)
-        :param task_description: description of the task
-        :param code_changes_description: description of the code changes
+        :param convo: conversation to continue)
        :param step: information about the step being implemented
-        :param step_index: index of the step to implement
        """
+        code_change_description = step['code_change_description']
+
        standalone = False
        if not convo:
            standalone = True
            convo = AgentConvo(self)

        files = self.project.get_all_coded_files()
-        file_name, file_content = self.get_original_file(code_changes_description, step, files)
-        content = file_content
+        file_name, file_content = self.get_original_file(code_change_description, step, files)

-        # If the file is non-empty and larger than the threshold, attempt to replace individual code blocks
-        if file_content and len(file_content.splitlines()) > self.SMART_REPLACE_THRESHOLD:
-            replace_complete_file, content = self.replace_code_blocks(
-                step,
-                convo,
-                standalone,
-                code_changes_description,
-                file_content,
-                file_name,
-                files,
-            )
-        else:
-            # Just replace the entire file
-            replace_complete_file = True
+        # Get the new version of the file
+        content = self.replace_complete_file(
+            convo,
+            standalone,
+            code_change_description,
+            file_content,
+            file_name,
+            files,
+        )

-        # If this is a new file or replacing individual code blocks failed,
-        # replace the complete file.
-        if replace_complete_file:
-            content = self.replace_complete_file(
-                convo,
-                standalone,
-                code_changes_description,
-                file_content,
-                file_name, files
-            )
+        # Review the changes and only apply changes that are useful/approved
+        if content and content != file_content:
+            content = self.review_change(convo, code_change_description, file_name, file_content, content)

+        # If we have changes, update the file
+        # TODO: if we *don't* have changes, we might want to retry the whole process (eg. have the reviewer
+        # explicitly reject the whole PR and use that as feedback in implement_changes.prompt)
        if content and content != file_content:
            if not self.project.skip_steps:
                delta_lines = len(content.splitlines()) - len(file_content.splitlines())
@@ -125,123 +118,6 @@ class CodeMonkey(Agent):

        return convo

-    def replace_code_blocks(
-        self,
-        step: dict[str, str],
-        convo: AgentConvo,
-        standalone: bool,
-        code_changes_description: str,
-        file_content: str,
-        file_name: str,
-        files: list[dict]
-    ):
-        llm_response = convo.send_message('development/implement_changes.prompt', {
-            "full_output": False,
-            "standalone": standalone,
-            "code_changes_description": code_changes_description,
-            "file_content": file_content,
-            "file_name": file_name,
-            "files": files,
-        })
-
-        replace_complete_file = False
-        exchanged_messages = 2
-        content = file_content
-
-        # Allow for up to 2 retries
-        while exchanged_messages < 7:
-            if re.findall('(old|existing).+code', llm_response, re.IGNORECASE):
-                trace_code_event("codemonkey-file-update-error", {
-                    "error": "old-code-comment",
-                    "llm_response": llm_response,
-                })
-                llm_response = convo.send_message('utils/llm_response_error.prompt', {
-                    "error": (
-                        "You must not omit any code from NEW_CODE. "
-                        "Please don't use coments like `// .. existing code goes here`."
-                    )
-                })
-                exchanged_messages += 2
-                continue
-
-            # Split the response into pairs of old and new code blocks
-            block_pairs = self.get_code_blocks(llm_response)
-
-            if len(block_pairs) == 0:
-                if "```" in llm_response:
-                    # We know some code blocks were outputted but we couldn't find them
-                    print("Unable to parse code blocks from LLM response, asking to retry")
-                    trace_code_event("codemonkey-file-update-error", {
-                        "error": "error-parsing-blocks",
-                        "llm_response": llm_response,
-                    })
-
-                    # If updating is more complicated than just replacing the complete file, don't bother.
-                    if len(llm_response) > len(file_content):
-                        replace_complete_file = True
-                        break
-
-                    llm_response = convo.send_message('utils/llm_response_error.prompt', {
-                        "error": "I can't find CURRENT_CODE and NEW_CODE blocks in your response, please try again."
-                    })
-                    exchanged_messages += 2
-                    continue
-                else:
-                    print(f"No changes required for {step['name']}")
-                    break
-
-            # Replace old code blocks with new code blocks
-            errors = []
-            for i, (old_code, new_code) in enumerate(block_pairs):
-                try:
-                    old_code, new_code = self.dedent(old_code, new_code)
-                    content = self.replace(content, old_code, new_code)
-                except ValueError as err:
-                    errors.append((i + 1, str(err)))
-
-            if not errors:
-                break
-
-            trace_code_event("codemonkey-file-update-error", {
-                "error": "replace-errors",
-                "llm_response": llm_response,
-                "details": errors,
-            })
-            print(f"{len(errors)} error(s) while trying to update file, asking LLM to retry")
-
-            if len(llm_response) > len(file_content):
-                # If updating is more complicated than just replacing the complete file, don't bother.
-                replace_complete_file = True
-                break
-
-            # Otherwise, identify the problem block(s) and ask the LLM to retry
-            if content != file_content:
-                error_text = (
-                    "Some changes were applied, but these failed:\n" +
-                    "\n".join(f"Error in change {i}:\n{err}" for i, err in errors) +
-                    "\nPlease fix the errors and try again (only output the blocks that failed to update, not all of them)."
-                )
-            else:
-                error_text = "\n".join(f"Error in change {i}:\n{err}" for i, err in errors)
-
-            llm_response = convo.send_message('utils/llm_response_error.prompt', {
-                "error": error_text,
-            })
-            exchanged_messages += 2
-        else:
-            # We failed after a few retries, so let's just replace the complete file
-            print("Unable to modify file, asking LLM to output the complete new file")
-            replace_complete_file = True
-
-        if replace_complete_file:
-            trace_code_event("codemonkey-file-update-error", {
-                "error": "fallback-complete-replace",
-                "llm_response": llm_response,
-            })
-
-        convo.remove_last_x_messages(exchanged_messages)
-        return replace_complete_file, content
-
    def replace_complete_file(
        self,
        convo: AgentConvo,
@@ -278,9 +154,9 @@ class CodeMonkey(Agent):
        end_pattern = re.compile(r"\n```\s*$")
        llm_response = start_pattern.sub("", llm_response)
        llm_response = end_pattern.sub("", llm_response)
+        convo.remove_last_x_messages(2)
        return llm_response

-
    def identify_file_to_change(self, code_changes_description: str, files: list[dict]) -> str:
        """
        Identify file to change based on the code changes description
@@ -296,118 +172,222 @@ class CodeMonkey(Agent):
        }, GET_FILE_TO_MODIFY)
        return llm_response["file"]

-    @staticmethod
-    def get_code_blocks(llm_response: str) -> list[tuple[str, str]]:
+    def review_change(
+        self,
+        convo: AgentConvo,
+        instructions: str,
+        file_name: str,
+        old_content: str,
+        new_content: str
+    ) -> str:
        """
-        Split the response into code block(s).
+        Review changes that were applied to the file.

-        Ignores any content outside of code blocks.
+        This asks the LLM to act as a PR reviewer and for each part (hunk) of the
+        diff, decide if it should be applied (kept) or ignored (removed from the PR).

-        :param llm_response: response from the LLM
-        :return: list of pairs of current and new blocks
-        """
-        pattern = re.compile(
-            r"CURRENT_CODE:\n```([a-z0-9]+)?\n(.*?)\n```\nNEW_CODE:\n```([a-z0-9]+)?\n(.*?)\n?```\nEND\s*",
-            re.DOTALL
-        )
-        pairs = []
-        for block in pattern.findall(llm_response):
-            pairs.append((block[1], block[3]))
-        return pairs
+        :param convo: AgentConvo instance
+        :param instructions: instructions for the reviewer
+        :param file_name: name of the file being modified
+        :param old_content: old file content
+        :param new_content: new file content (with proposed changes)
+        :return: file content update with approved changes

-    @staticmethod
-    def dedent(old_code: str, new_code: str) -> tuple[str, str]:
-        """
-        Remove common indentation from `old_code` and `new_code`.
-
-        This is useful because the LLM will sometimes indent the code blocks MORE
-        than in the original file, leading to no matches. Since we have indent
-        compensation, we can just remove any extra indent as long as we do it
-        consistently for both old and new code block.
-
-        :param old_code: old code block
-        :param new_code: new code block
-        :return: tuple of (old_code, new_code) with common indentation removed
-        """
-        old_lines = old_code.splitlines()
-        new_lines = new_code.splitlines()
-        indent = 0
-        while all(ol.startswith(" ") for ol in old_lines) and all(ol.startswith(" ") for ol in new_lines):
-            indent -= 1
-            old_lines = [ol[1:] for ol in old_lines]
-            new_lines = [nl[1:] for nl in new_lines]
-        return "\n".join(old_lines), "\n".join(new_lines)
-
-    @staticmethod
-    def replace(haystack: str, needle: str, replacement: str) -> str:
-        """
-        Replace `needle` text in `haystack`, allowing that `needle` is not
-        indented the same as the matching part of `haystack` and
-        compensating for it.
-
-        :param haystack: text to search in
-        :param needle: text to search for
-        :param replacement: text to replace `needle` with
-        :return: `haystack` with `needle` replaced with `replacement`
-
-        Example:
-        >>> haystack = "def foo():\n    pass"
-        >>> needle = "pass"
-        >>> replacement = "return 42"
-        >>> replace(haystack, needle, replacement)
-        "def foo():\n    return 42"
-
-        If `needle` is not found in `haystack` even with indent compensation,
-        or if it's found multiple times, raise a ValueError.
+        Diff hunk explanation: https://www.gnu.org/software/diffutils/manual/html_node/Hunks.html
        """

-        def indent_text(text: str, indent: int) -> str:
-            return "\n".join((" " * indent + line) for line in text.splitlines())
+        hunks = self.get_diff_hunks(file_name, old_content, new_content)

-        def indent_sensitive_match(haystack: str, needle: str) -> int:
-            """
-            Check if 'needle' is in 'haystack' but compare full lines.
-            """
-            # This is required so we don't match text "foo" (no indentation) with line "  foo"
-            # (2 spaces indentation). We want exact matches so we know exact indentation needed.
-            haystack_with_line_start_stop_markers = "\n".join(f"\x00{line}\x00" for line in haystack.splitlines())
-            needle_with_line_start_stop_markers = "\n".join(f"\x00{line}\x00" for line in needle.splitlines())
-            return haystack_with_line_start_stop_markers.count(needle_with_line_start_stop_markers)
+        llm_response = convo.send_message('development/review_changes.prompt', {
+            "instructions": instructions,
+            "file_name": file_name,
+            "old_content": old_content,
+            "hunks": hunks,
+        }, REVIEW_CHANGES)
+        messages_to_remove = 2

-        # Try from the largest indents to the smallest so that we know the correct indentation of
-        # single-line old blocks that would otherwise match with 0 indent as well. If these single-line
-        # old blocks were then replaced with multi-line blocks and indentation wasn't not correctly re-applied,
-        # the new multiline block would only have the first line correctly indented. We want to avoid that.
-        matching_old_blocks = []
+        for i in range(MAX_REVIEW_RETRIES):
+            ids_to_apply = set()
+            ids_to_ignore = set()
+            for hunk in llm_response.get("hunks", []):
+                if hunk.get("decision", "").lower() == "apply":
+                    ids_to_apply.add(hunk["number"] - 1)
+                elif hunk.get("decision", "").lower() == "ignore":
+                    ids_to_ignore.add(hunk["number"] - 1)

-        for indent in range(128, -1, -1):
-            text = indent_text(needle, indent)
-            if text not in haystack:
-                # If there are empty lines in the old code, `indent_text` will indent them as well. The original
-                # file might not have them indented as they're empty, so it is useful to try without indenting
-                # those empty lines.
-                text = "\n".join(
-                    (line if line.strip() else "")
-                    for line
-                    in text.splitlines()
-                )
-            n_matches = indent_sensitive_match(haystack, text)
-            for i in range(n_matches):
-                matching_old_blocks.append((indent, text))
+            n_hunks = len(hunks)
+            n_review_hunks = len(ids_to_apply | ids_to_ignore)
+            if n_review_hunks == n_hunks:
+                break
+            elif n_review_hunks < n_hunks:
+                error = "Not all hunks have been reviewed. Please review all hunks and add 'apply' or 'ignore' decision for each."
+            elif n_review_hunks > n_hunks:
+                error = f"Your review contains more hunks ({n_review_hunks}) than in the original diff ({n_hunks}). Note that one hunk may have multiple changed lines."

-        if len(matching_old_blocks) == 0:
-            raise ValueError(
-                f"Old code block not found in the original file:\n```\n{needle}\n```\n"
-                "Old block *MUST* contain the exact same text (including indentation, empty lines, etc.) as the original file "
-                "in order to match."
+            # Max two retries; if the reviewer still hasn't reviewed all hunks, we'll just use the entire new content
+            llm_response = convo.send_message(
+                'utils/llm_response_error.prompt', {
+                    "error": error
+                },
+                REVIEW_CHANGES,
            )
+            messages_to_remove += 2
+        else:
+            # The reviewer failed to review all the hunks in 3 attempts, let's just use all the new content
+            convo.remove_last_x_messages(messages_to_remove)
+            return new_content

-        if len(matching_old_blocks) > 1:
-            raise ValueError(
-                f"Old code block found more than once ({len(matching_old_blocks)} matches) in the original file:\n```\n{needle}\n```\n\n"
-                "Please provide larger blocks (more context) to uniquely identify the code that needs to be changed."
+        convo.remove_last_x_messages(messages_to_remove)
+
+        hunks_to_apply = [ h for i, h in enumerate(hunks) if i in ids_to_apply ]
+        diff_log = f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks_to_apply)
+
+        if len(hunks_to_apply) == len(hunks):
+            print("Applying entire change")
+            return new_content
+        elif len(hunks_to_apply) == 0:
+            print("Reviewer has doubts, but applying all proposed changes anyway")
+            trace_code_event(
+                "modify-file-review-reject-all",
+                {
+                    "file": file_name,
+                    "original": old_content,
+                    "diff": f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks),
+                }
            )
+            return new_content
+        else:
+            print("Applying code change:\n" + diff_log)
+            return self.apply_diff(file_name, old_content, hunks_to_apply, new_content)

-        indent, text = matching_old_blocks[0]
-        indented_replacement = indent_text(replacement, indent)
-        return haystack.replace(text, indented_replacement)
+    @staticmethod
+    def get_diff_hunks(file_name: str, old_content: str, new_content: str) -> list[str]:
+        """
+        Get the diff between two files.
+
+        This uses Python difflib to produce an unified diff, then splits
+        it into hunks that will be separately reviewed by the reviewer.
+
+        :param file_name: name of the file being modified
+        :param old_content: old file content
+        :param new_content: new file content
+        :return: change hunks from the unified diff
+        """
+        from_name = "old_" + file_name
+        to_name = "to_" + file_name
+        from_lines = old_content.splitlines(keepends=True)
+        to_lines = new_content.splitlines(keepends=True)
+        diff_gen = unified_diff(from_lines, to_lines, fromfile=from_name, tofile=to_name)
+        diff_txt = "".join(diff_gen)
+
+        hunks = re.split(r'\n@@', diff_txt, re.MULTILINE)
+        result = []
+        for i, h in enumerate(hunks):
+            # Skip the prologue (file names)
+            if i == 0:
+                continue
+            txt = h.splitlines()
+            txt[0] = "@@" + txt[0]
+            result.append("\n".join(txt))
+        return result
+
+    def apply_diff(
+        self,
+        file_name: str,
+        old_content: str,
+        hunks: list[str],
+        fallback: str
+    ):
+        """
+        Apply the diff to the original file content.
+
+        This uses the internal `_apply_patch` method to apply the
+        approved diff hunks to the original file content.
+
+        If patch apply fails, the fallback is the full new file content
+        with all the changes applied (as if the reviewer approved everythng).
+
+        :param file_name: name of the file being modified
+        :param old_content: old file content
+        :param hunks: change hunks from the unified diff
+        :param fallback: proposed new file content (with all the changes applied)
+        """
+        diff = "\n".join(
+            [
+                "--- " + file_name,
+                "+++ " + file_name,
+            ] + hunks
+        ) + "\n"
+        try:
+            fixed_content = self._apply_patch(old_content, diff)
+        except Exception as e:
+            # This should never happen but if it does, just use the new version from
+            # the LLM and hope for the best
+            print(f"Error applying diff: {e}; hoping all changes are valid")
+            trace_code_event(
+                "patch-apply-error",
+                {
+                    "file": file_name,
+                    "error": str(e),
+                    "traceback": format_exc(),
+                    "original": old_content,
+                    "diff": diff
+                }
+            )
+            return fallback
+
+        return fixed_content
+
+    # Adapted from https://gist.github.com/noporpoise/16e731849eb1231e86d78f9dfeca3abc (Public Domain)
+    @staticmethod
+    def _apply_patch(original: str, patch: str, revert: bool = False):
+        """
+        Apply a patch to a string to recover a newer version of the string.
+
+        :param original: The original string.
+        :param patch: The patch to apply.
+        :param revert: If True, treat the original string as the newer version and recover the older string.
+        :return: The updated string after applying the patch.
+        """
+        original_lines = original.splitlines(True)
+        patch_lines = patch.splitlines(True)
+
+        updated_text = ''
+        index_original = start_line = 0
+
+        # Choose which group of the regex to use based on the revert flag
+        match_index, line_sign = (1, '+') if not revert else (3, '-')
+
+        # Skip header lines of the patch
+        while index_original < len(patch_lines) and patch_lines[index_original].startswith(("---", "+++")):
+            index_original += 1
+
+        while index_original < len(patch_lines):
+            match = PATCH_HEADER_PATTERN.match(patch_lines[index_original])
+            if not match:
+                raise Exception("Bad patch -- regex mismatch [line " + str(index_original) + "]")
+
+            line_number = int(match.group(match_index)) - 1 + (match.group(match_index + 1) == '0')
+
+            if start_line > line_number or line_number > len(original_lines):
+                raise Exception("Bad patch -- bad line number [line " + str(index_original) + "]")
+
+            updated_text += ''.join(original_lines[start_line:line_number])
+            start_line = line_number
+            index_original += 1
+
+            while index_original < len(patch_lines) and patch_lines[index_original][0] != '@':
+                if index_original + 1 < len(patch_lines) and patch_lines[index_original + 1][0] == '\\':
+                    line_content = patch_lines[index_original][:-1]
+                    index_original += 2
+                else:
+                    line_content = patch_lines[index_original]
+                    index_original += 1
+
+                if line_content:
+                    if line_content[0] == line_sign or line_content[0] == ' ':
+                        updated_text += line_content[1:]
+                    start_line += (line_content[0] != line_sign)
+
+        updated_text += ''.join(original_lines[start_line:])
+        return updated_text
--- a/pilot/helpers/agents/Developer.py
+++ b/pilot/helpers/agents/Developer.py
@@ -56,12 +56,13 @@ class Developer(Agent):

        for i, dev_task in enumerate(self.project.development_plan):
            # don't create documentation for features
-            if not self.project.finished and not self.project.skip_steps:
+            if not self.project.finished:
                current_progress_percent = round((i / total_tasks) * 100, 2)

                for threshold in progress_thresholds:
                    if current_progress_percent > threshold and threshold not in documented_thresholds:
-                        self.project.technical_writer.document_project(current_progress_percent)
+                        if not self.project.skip_steps:
+                            self.project.technical_writer.document_project(current_progress_percent)
                        documented_thresholds.add(threshold)

            if self.project.tasks_to_load:
@@ -234,8 +235,8 @@ class Developer(Agent):
        step = task_steps[i]
        if 'code_change_description' in step:
            print(f'Implementing code changes for `{step["code_change_description"]}`')
-            code_monkey = CodeMonkey(self.project, self)
-            updated_convo = code_monkey.implement_code_changes(convo, step['code_change_description'], step)
+            code_monkey = CodeMonkey(self.project)
+            updated_convo = code_monkey.implement_code_changes(convo, step)
            if test_after_code_changes:
                return self.test_code_changes(updated_convo, task_steps, i)
            else:
@@ -252,8 +253,8 @@ class Developer(Agent):
    def step_modify_file(self, convo, step, i, test_after_code_changes):
        data = step['modify_file']
        print(f'Updating existing file {data["name"]}: {data["code_change_description"].splitlines()[0]}')
-        code_monkey = CodeMonkey(self.project, self)
-        code_monkey.implement_code_changes(convo, data['code_change_description'], data)
+        code_monkey = CodeMonkey(self.project)
+        code_monkey.implement_code_changes(convo, data)
        return {"success": True}

    def step_command_run(self, convo, task_steps, i, success_with_cli_response=False):
@@ -404,13 +405,13 @@ class Developer(Agent):

    def should_retry_step_implementation(self, step, step_implementation_try):
        if step_implementation_try >= MAX_COMMAND_DEBUG_TRIES:
-            self.dev_help_needed(step)
+            return self.dev_help_needed(step)

        print(color_red_bold('\n--------- LLM Reached Token Limit ----------'))
        print(color_red_bold('Can I retry implementing the entire development step?'))

        answer = None
-        while answer.lower() not in AFFIRMATIVE_ANSWERS:
+        while answer is None or answer.lower() not in AFFIRMATIVE_ANSWERS:
            print('yes/no', type='buttons-only')
            answer = styled_text(
                self.project,
@@ -432,6 +433,8 @@ class Developer(Agent):
                    color_red_bold('\n\nCan you please make it work?'))
        elif step['type'] == 'code_change':
            help_description = step['code_change_description']
+        elif step['type'] == 'modify_file':
+            help_description = step['modify_file']['code_change_description']
        elif step['type'] == 'human_intervention':
            help_description = step['human_intervention_description']

@@ -450,6 +453,7 @@ class Developer(Agent):
        answer = ''
        while answer.lower() != 'continue':
            print(color_red_bold('\n----------------------------- I need your help ------------------------------'))
+            print(color_red('\nHere are instructions for the issue I did not manage to solve:'))
            print(extract_substring(str(help_description)))
            print(color_red_bold('\n-----------------------------------------------------------------------------'))
            print('continue', type='buttons-only')
@@ -529,6 +533,7 @@ class Developer(Agent):
                        response = self.should_retry_step_implementation(step, step_implementation_try)
                        if 'retry' in response:
                            # TODO we can rewind this convo even more
+                            step_implementation_try += 1
                            convo.load_branch(function_uuid)
                            continue
                        elif 'success' in response:
@@ -543,7 +548,7 @@ class Developer(Agent):
                    else:
                        raise e

-        result = {"success": True}  # if all steps are finished, the task has been successfully implemented
+        result = {"success": True}  # if all steps are finished, the task has been successfully implemented... NOT!
        convo.load_branch(function_uuid)
        return self.task_postprocessing(convo, development_task, continue_development, result, function_uuid)

@@ -590,7 +595,7 @@ class Developer(Agent):
            logger.info('response: %s', response)
            user_feedback = response['user_input'] if 'user_input' in response else None
            if user_feedback == 'continue':
-                self.project.remove_debugging_logs_from_all_files()
+                # self.project.remove_debugging_logs_from_all_files()
                return {"success": True, "user_input": user_feedback}

            if user_feedback is not None:
--- a/pilot/helpers/agents/ProductOwner.py
+++ b/pilot/helpers/agents/ProductOwner.py
@@ -9,6 +9,7 @@ from utils.files import setup_workspace
 from prompts.prompts import ask_for_app_type, ask_for_main_app_definition, get_additional_info_from_openai, \
    generate_messages_from_description, ask_user, get_prompt
 from const.llm import END_RESPONSE
+from const.messages import MAX_PROJECT_NAME_LENGTH

 PROJECT_DESCRIPTION_STEP = 'project_description'
 USER_STORIES_STEP = 'user_stories'
@@ -42,9 +43,16 @@ class ProductOwner(Agent):
        if 'app_type' not in self.project.args:
            self.project.args['app_type'] = ask_for_app_type()
        if 'name' not in self.project.args:
-            question = 'What is the project name?'
-            print(question, type='ipc')
-            self.project.args['name'] = clean_filename(ask_user(self.project, question))
+            while True:
+                question = 'What is the project name?'
+                print(question, type='ipc')
+                project_name = ask_user(self.project, question)
+                if len(project_name) <= MAX_PROJECT_NAME_LENGTH:
+                    break
+                else:
+                    print(f"Hold your horses cowboy! Please, give project NAME with max {MAX_PROJECT_NAME_LENGTH} characters.")
+
+            self.project.args['name'] = clean_filename(project_name)

        self.project.app = save_app(self.project)

--- a/pilot/helpers/agents/test_CodeMonkey.py
+++ b/pilot/helpers/agents/test_CodeMonkey.py
@@ -1,575 +0,0 @@
-from unittest.mock import patch, MagicMock, call
-from os.path import normpath, sep
-import pytest
-
-
-from helpers.agents.CodeMonkey import CodeMonkey
-from const.function_calls import GET_FILE_TO_MODIFY
-
-@pytest.mark.parametrize(
-    ("content", "expected_blocks"),
-    [
-        ("", []),
-        ("no code blocks here", []),
-        ("one\n```\ncode block\n```\nwithout CURRENT/NEW tags", []),
-        (
-            "Change\nCURRENT_CODE:\n```python\nold\n```\nNEW_CODE:\n```\nnew\n```\nEND\n",
-            [("old", "new")]
-        ),
-        (
-            "\n".join([
-                "Change 1",
-                "CURRENT_CODE:",
-                "```python",
-                "old",
-                "```",
-                "NEW_CODE:",
-                "```javascript",
-                "```",
-                "END",
-                "Change 2",
-                "CURRENT_CODE:",
-                "```python",
-                "old",
-                "```",
-                "NEW_CODE:",
-                "```python",
-                "new",
-                "```",
-                "END",
-            ]),
-            [("old", ""), ("old", "new")]
-        ),
-        (
-            "\n".join([
-                "Code with markdown blocks in it",
-                "CURRENT_CODE:",
-                "```markdown",
-                "# Title",
-                "",
-                "```python",
-                "print('hello world')",
-                "```",
-                "Rest of markdown",
-                "```",
-                "NEW_CODE:",
-                "```markdown",
-                "# Title",
-                "",
-                "```python",
-                "print('goodbye world')",
-                "```",
-                "New markdown text here",
-                "```",
-                "END"
-            ]),
-            [
-                (
-                    "# Title\n\n```python\nprint('hello world')\n```\nRest of markdown",
-                    "# Title\n\n```python\nprint('goodbye world')\n```\nNew markdown text here",
-                )
-            ]
-        )
-    ]
-)
-def test_get_code_blocks(content, expected_blocks):
-    code_monkey = CodeMonkey(None, None)
-    assert code_monkey.get_code_blocks(content) == expected_blocks
-
-
-@pytest.mark.parametrize(
-    ("haystack", "needle", "result", "error"),
-    [
-        ### Oneliner old blocks ###
-        # Simple match
-        ("first\nsecond\nthird", "second", "first\n@@NEW@@\nthird", None),
-        # No match
-        ("first\nsecond\nthird", "fourth", None, "not found"),
-        # Too many matches on the same indentation level
-        ("line\nline", "line", None, "found more than once"),
-        # Match, replacement should be indented
-        ("first\n    second\nthird", "second", "first\n    @@NEW@@\nthird", None),
-        # Too many matches, on different indentation levels
-        ("line\n  line", "line", None, "found more than once"),
-
-        ### Multiline old blocks ###
-        # Simple match
-        ("first\nsecond\nthird", "second\nthird", "first\n@@NEW@@", None),
-        # No match
-        ("first\nsecond\nthird", "second\n  third", None, "not found"),
-        # Too many matches on the same indentation level
-        ("a\nb\nc\nd\na\nb", "a\nb", None, "found more than once"),
-        # Too many matches on different indentation levels
-        ("a\nb\nc\nd\n  a\n  b", "a\nb", None, "found more than once"),
-        # Match, replacement should be indented
-        ("first\n  second\n  third", "second\nthird", "first\n  @@NEW@@", None),
-
-        ### Multiline with empty lines ###
-        # Simple match
-        ("first\nsecond\n\nthird", "second\n\nthird", "first\n@@NEW@@", None),
-        # Indented match with empty lines also indentend
-        ("first\n  second\n  \n  third", "second\n\nthird", "first\n  @@NEW@@", None),
-        # Indented match with empty lines not indentend
-        ("first\n  second\n\n  third", "second\n\nthird", "first\n  @@NEW@@", None),
-    ]
-)
-def test_replace(haystack, needle, result, error):
-    code_monkey = CodeMonkey(None, None)
-    if error:
-        with pytest.raises(ValueError, match=error):
-            code_monkey.replace(haystack, needle, "@@NEW@@")
-    else:
-        assert code_monkey.replace(haystack, needle, "@@NEW@@") == result
-
-
-@patch("helpers.agents.CodeMonkey.AgentConvo")
-def test_identify_file_to_change(MockAgentConvo):
-    mock_convo = MockAgentConvo.return_value
-    mock_convo.send_message.return_value = {"file": "file.py"}
-    files = CodeMonkey(None, None).identify_file_to_change("some description", [])
-    assert files == "file.py"
-    mock_convo.send_message.assert_called_once_with(
-        "development/identify_files_to_change.prompt",
-        {
-            "code_changes_description": "some description",
-            "files": []
-        },
-        GET_FILE_TO_MODIFY
-    )
-
-
-def test_dedent():
-    old_code = "\n".join([
-        "    def foo():",
-        "        print('bar')",
-    ])
-    new_code = "\n".join([
-        "  def bar():",
-        "      print('foo')",
-    ])
-    expected_old = "\n".join([
-        "  def foo():",
-        "      print('bar')",
-    ])
-    expected_new = "\n".join([
-        "def bar():",
-        "    print('foo')",
-    ])
-    result_old, result_new = CodeMonkey.dedent(old_code, new_code)
-    assert result_old == expected_old
-    assert expected_new == result_new
-
-
-def test_codemonkey_simple():
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = [
-        {
-            "path": "",
-            "name": "main.py",
-            "content": "one to the\nfoo\nto the three to the four"
-        },
-    ]
-    mock_project.get_full_file_path.return_value = ("", normpath("/path/to/main.py"))
-    mock_convo = MagicMock()
-    mock_convo.send_message.return_value = "## Change\nCURRENT_CODE:\n```\nfoo\n```\nNEW_CODE:\n```\nbar\n```\nEND"
-
-    cm = CodeMonkey(mock_project, None)
-    with patch.object(cm, "SMART_REPLACE_THRESHOLD", 1):
-        cm.implement_code_changes(
-            mock_convo,
-            "Modify all references from `foo` to `bar`",
-            {
-                "path": sep,
-                "name": "main.py",
-            }
-        )
-
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with(sep, "main.py")
-    mock_convo.send_message.assert_called_once_with(
-        "development/implement_changes.prompt", {
-        "full_output": False,
-        "standalone": False,
-        "code_changes_description": "Modify all references from `foo` to `bar`",
-        "file_content": "one to the\nfoo\nto the three to the four",
-        "file_name": "main.py",
-        "files": mock_project.get_all_coded_files.return_value,
-    })
-    mock_project.save_file.assert_called_once_with({
-        "path": sep,
-        "name": "main.py",
-        "content": "one to the\nbar\nto the three to the four"
-    })
-
-
-def test_codemonkey_simple_replace():
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = [
-        {
-            "path": "",
-            "name": "main.py",
-            "content": "one to the\nfoo\nto the three to the four"
-        },
-    ]
-    mock_project.get_full_file_path.return_value = ("", normpath("/path/to/main.py"))
-    mock_convo = MagicMock()
-    mock_convo.send_message.return_value = "```\none to the\nbar\nto the three to the four\n```"
-
-    cm = CodeMonkey(mock_project, None)
-    cm.implement_code_changes(
-        mock_convo,
-        "Modify all references from `foo` to `bar`",
-        {
-            "path": sep,
-            "name": "main.py",
-        }
-    )
-
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with(sep, "main.py")
-    mock_convo.send_message.assert_called_once_with(
-        "development/implement_changes.prompt", {
-        "full_output": True,
-        "standalone": False,
-        "code_changes_description": "Modify all references from `foo` to `bar`",
-        "file_content": "one to the\nfoo\nto the three to the four",
-        "file_name": "main.py",
-        "files": mock_project.get_all_coded_files.return_value,
-    })
-    mock_project.save_file.assert_called_once_with({
-        "path": sep,
-        "name": "main.py",
-        "content": "one to the\nbar\nto the three to the four"
-    })
-
-
-@patch("helpers.agents.CodeMonkey.trace_code_event")
-def test_codemonkey_retry(trace_code_event):
-    file_content = (
-        "one to the\nfoo\nto the three to the four\n"
-        "the rest of this file is filler so it's big enought not to "
-        "trigger the full replace fallback immediately upon the first failure"
-    )
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = [
-        {
-            "path": "",
-            "name": "main.py",
-            "content": file_content,
-        },
-    ]
-    mock_project.get_full_file_path.return_value = ("", normpath("/path/to/main.py"))
-    mock_convo = MagicMock()
-    mock_convo.send_message.side_effect = [
-        # Incorrect match
-        "## Change\nCURRENT_CODE:\n```\ntwo\n```\nNEW_CODE:\n```\nbar\n```\nEND\n",
-        # Corrected match on retry
-        "Apologies, here is the corrected version. ## Change\nCURRENT_CODE:\n```\n  foo\n```\nNEW_CODE:\n```\n  bar\n```\nEND\n",
-    ]
-
-    cm = CodeMonkey(mock_project, None)
-    with patch.object(cm, "SMART_REPLACE_THRESHOLD", 1):
-        cm.implement_code_changes(
-            mock_convo,
-            "Modify all references from `foo` to `bar`",
-            {
-                "path": sep,
-                "name": "main.py",
-            }
-        )
-
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with(sep, "main.py")
-    mock_convo.send_message.assert_has_calls([
-        call(
-            "development/implement_changes.prompt", {
-                "full_output": False,
-                "standalone": False,
-                "code_changes_description": "Modify all references from `foo` to `bar`",
-                "file_content": file_content,
-                "file_name": "main.py",
-                "files": mock_project.get_all_coded_files.return_value,
-            }
-        ),
-        call(
-            "utils/llm_response_error.prompt", {
-                "error": (
-                    "Error in change 1:\n"
-                    "Old code block not found in the original file:\n```\ntwo\n```\n"
-                    "Old block *MUST* contain the exact same text (including indentation, empty lines, etc.) "
-                    "as the original file in order to match."
-                ),
-            }
-        )
-    ])
-    mock_project.save_file.assert_called_once_with({
-        "path": sep,
-        "name": "main.py",
-        "content": file_content.replace("foo", "bar"),
-    })
-    trace_code_event.assert_called_once_with(
-        "codemonkey-file-update-error",
-        {
-            "error": "replace-errors",
-            "llm_response": "## Change\nCURRENT_CODE:\n```\ntwo\n```\nNEW_CODE:\n```\nbar\n```\nEND\n",
-            "details": [(1, (
-                'Old code block not found in the original file:\n```\ntwo\n```\n'
-                'Old block *MUST* contain the exact same text (including indentation, empty lines, etc.) '
-                'as the original file in order to match.'
-            ))]
-        }
-    )
-
-
-@patch("helpers.agents.CodeMonkey.trace_code_event")
-def test_codemonkey_partial_retry(trace_code_event):
-    file_content = (
-        "one to the\nfoo\nto the three to the four\n"
-        "the rest of this file is filler so it's big enought not to\n"
-        "trigger\nthe full replace fallback immediately upon the first failure"
-    )
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = [
-        {
-            "path": "",
-            "name": "main.py",
-            "content": file_content,
-        },
-    ]
-    mock_project.get_full_file_path.return_value = ("", normpath("/path/to/main.py"))
-    mock_convo = MagicMock()
-    mock_convo.send_message.side_effect = [
-        # Incorrect match
-        (
-            "## Change 1\nCURRENT_CODE:\n```\ntwo\n```\nNEW_CODE:\n```\nbar\n```\nEND\n"
-            "## Change 2\nCURRENT_CODE:\n```\ntrigger\n```\nNEW_CODE:\n```\ncause\n```\nEND\n"
-        ),
-        "Apologies, here is the corrected version. ## Change 1\nCURRENT_CODE:\n```\n  foo\n```\nNEW_CODE:\n```\n  bar\n```\nEND\n",
-    ]
-
-    cm = CodeMonkey(mock_project, None)
-    with patch.object(cm, "SMART_REPLACE_THRESHOLD", 1):
-        cm.implement_code_changes(
-            mock_convo,
-            "Modify all references from `foo` to `bar` and `trigger` to `cause`",
-            {
-                "path": sep,
-                "name": "main.py",
-            }
-        )
-
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with(sep, "main.py")
-    mock_convo.send_message.assert_has_calls([
-        call(
-            "development/implement_changes.prompt", {
-                "full_output": False,
-                "standalone": False,
-                "code_changes_description": "Modify all references from `foo` to `bar` and `trigger` to `cause`",
-                "file_content": file_content,
-                "file_name": "main.py",
-                "files": mock_project.get_all_coded_files.return_value,
-            }
-        ),
-        call(
-            "utils/llm_response_error.prompt", {
-                "error": (
-                    "Some changes were applied, but these failed:\n"
-                    "Error in change 1:\n"
-                    "Old code block not found in the original file:\n```\ntwo\n```\n"
-                    "Old block *MUST* contain the exact same text (including indentation, empty lines, etc.) "
-                    "as the original file in order to match.\n"
-                    "Please fix the errors and try again (only output the blocks that failed to update, not all of them)."
-                ),
-            }
-        )
-    ])
-    mock_project.save_file.assert_called_once_with({
-        "path": sep,
-        "name": "main.py",
-        "content": file_content.replace("foo", "bar").replace("trigger", "cause")
-    })
-    trace_code_event.assert_called_once_with(
-        "codemonkey-file-update-error",
-        {
-            "error": "replace-errors",
-            "llm_response": (
-                "## Change 1\nCURRENT_CODE:\n```\ntwo\n```\nNEW_CODE:\n```\nbar\n```\nEND\n"
-                "## Change 2\nCURRENT_CODE:\n```\ntrigger\n```\nNEW_CODE:\n```\ncause\n```\nEND\n"
-            ),
-            "details": [(1, (
-                'Old code block not found in the original file:\n```\ntwo\n```\n'
-                'Old block *MUST* contain the exact same text (including indentation, empty lines, etc.) '
-                'as the original file in order to match.'
-            ))]
-        }
-    )
-
-
-@patch("helpers.agents.CodeMonkey.trace_code_event")
-def test_codemonkey_fallback(trace_code_event):
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = [
-        {
-            "path": "",
-            "name": "main.py",
-            "content": "one to the\nfoo\nto the three to the four"
-        },
-    ]
-    mock_project.get_full_file_path.return_value = ("", normpath("/path/to/main.py"))
-    mock_convo = MagicMock()
-    mock_convo.send_message.side_effect = [
-        # Incorrect match (END within block), will cause immediate fallback because of short file
-        "1 ## Change\nCURRENT_CODE:\n```\nfoo\n```\nNEW_CODE:\n```\nbar\nEND\n```\n",
-        # Fallback returns entire new file
-        "```\none to the\nbar\nto the three to the four\n```\n",
-    ]
-
-    cm = CodeMonkey(mock_project, None)
-    with patch.object(cm, "SMART_REPLACE_THRESHOLD", 1):
-        cm.implement_code_changes(
-            mock_convo,
-            "Modify all references from `foo` to `bar`",
-            {
-                "path": sep,
-                "name": "main.py",
-            }
-        )
-
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with(sep, "main.py")
-    mock_convo.send_message.assert_has_calls([
-        call(
-            "development/implement_changes.prompt", {
-                "full_output": False,
-                "standalone": False,
-                "code_changes_description": "Modify all references from `foo` to `bar`",
-                "file_content": "one to the\nfoo\nto the three to the four",
-                "file_name": "main.py",
-                "files": mock_project.get_all_coded_files.return_value,
-            }
-        ),
-        call(
-            'development/implement_changes.prompt', {
-                "full_output": True,
-                "standalone": False,
-                "code_changes_description": "Modify all references from `foo` to `bar`",
-                "file_content": "one to the\nfoo\nto the three to the four",
-                "file_name": "main.py",
-                "files": mock_project.get_all_coded_files.return_value,
-            }
-        )
-    ])
-    mock_project.save_file.assert_called_once_with({
-        "path": sep,
-        "name": "main.py",
-        "content": "one to the\nbar\nto the three to the four"
-    })
-    trace_code_event.assert_has_calls([
-        call(
-            'codemonkey-file-update-error',
-            {
-                'error': 'error-parsing-blocks',
-                'llm_response': '1 ## Change\nCURRENT_CODE:\n```\nfoo\n```\nNEW_CODE:\n```\nbar\nEND\n```\n'
-            }
-        ),
-        call(
-            'codemonkey-file-update-error',
-            {
-                'error': 'fallback-complete-replace',
-                'llm_response': '1 ## Change\nCURRENT_CODE:\n```\nfoo\n```\nNEW_CODE:\n```\nbar\nEND\n```\n'
-            }
-        )
-    ])
-
-
-@patch("helpers.agents.CodeMonkey.trace_code_event")
-@patch("helpers.agents.CodeMonkey.get_file_contents")
-@patch("helpers.agents.CodeMonkey.AgentConvo")
-def test_codemonkey_implement_changes_after_debugging(MockAgentConvo, mock_get_file_contents, trace_code_event):
-    """
-    Test that the flow to figure out files that need to be changed
-    (which happens after debugging where we only have a description of the
-    changes needed, not file name).
-
-    Also test standalone conversation (though that's not happening after debugging).
-    """
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = []
-    mock_project.get_full_file_path.return_value = ("", "/path/to/main.py")
-    mock_convo = MockAgentConvo.return_value
-    mock_convo.send_message.return_value = "## Change\nCURRENT_CODE:\n```\nfoo\n```\nNEW_CODE:\n```\nbar\n```\nEND"
-    mock_get_file_contents.return_value = {
-        "name": "main.py",
-        "path": "",
-        "content": "one to the\nfoo\nto the three to the four",
-        "full_path": "/path/to/main.py",
-    }
-
-    cm = CodeMonkey(mock_project, None)
-    with patch.object(cm, "identify_file_to_change") as mock_identify_file_to_change:
-        with patch.object(cm, "SMART_REPLACE_THRESHOLD", 1):
-            mock_identify_file_to_change.return_value = "/main.py"
-            cm.implement_code_changes(
-                None,
-                "Modify all references from `foo` to `bar`",
-                {},
-            )
-
-    MockAgentConvo.assert_called_once_with(cm)
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with("/", "main.py")
-    mock_convo.send_message.assert_called_once_with(
-        "development/implement_changes.prompt", {
-        "full_output": False,
-        "standalone": True,
-        "code_changes_description": "Modify all references from `foo` to `bar`",
-        "file_content": "one to the\nfoo\nto the three to the four",
-        "file_name": "main.py",
-        "files": mock_project.get_all_coded_files.return_value,
-    })
-    mock_project.save_file.assert_called_once_with({
-        "path": "/",
-        "name": "main.py",
-        "content": "one to the\nbar\nto the three to the four"
-    })
-    trace_code_event.assert_not_called()
-
-
-@patch("helpers.agents.CodeMonkey.trace_code_event")
-@patch("helpers.agents.CodeMonkey.get_file_contents")
-def test_codemonkey_original_file_not_found(mock_get_file_contents, _trace_code_event):
-    mock_project = MagicMock()
-    mock_project.get_all_coded_files.return_value = []
-    mock_project.get_full_file_path.return_value = ("", normpath("/path/to/main.py"))
-    mock_convo = MagicMock()
-    mock_convo.send_message.return_value = "```\none to the\nbar\nto the three to the four\n```\n"
-    mock_get_file_contents.side_effect = ValueError("File not found: /path/to/main.py")
-    cm = CodeMonkey(mock_project, None)
-    with patch.object(cm, "SMART_REPLACE_THRESHOLD", 1):
-        cm.implement_code_changes(
-            mock_convo,
-            "Modify all references from `foo` to `bar`",
-            {
-                "path": sep,
-                "name": "main.py",
-            }
-        )
-
-    mock_project.get_all_coded_files.assert_called_once()
-    mock_project.get_full_file_path.assert_called_once_with(sep, "main.py")
-    mock_convo.send_message.assert_called_once_with(
-        'development/implement_changes.prompt', {
-            "full_output": True,
-            "standalone": False,
-            "code_changes_description": "Modify all references from `foo` to `bar`",
-            "file_content": "",
-            "file_name": "main.py",
-            "files": mock_project.get_all_coded_files.return_value,
-        }
-    )
-    mock_project.save_file.assert_called_once_with({
-        "path": sep,
-        "name": "main.py",
-        "content": "one to the\nbar\nto the three to the four"
-    })
--- a/pilot/helpers/cli.py
+++ b/pilot/helpers/cli.py
@@ -336,6 +336,8 @@ def check_if_command_successful(convo, command, cli_response, response, exit_cod
                                              'step_index': step_index,
                                          })
            logger.debug(f'LLM response to ran_command.prompt: {response}')
+            if response == 'DONE':
+                convo.remove_last_x_messages(2)

    return response

--- a/pilot/helpers/exceptions.py
+++ b/pilot/helpers/exceptions.py
@@ -1,3 +1,5 @@
+import json
+
 from const.llm import MAX_GPT_MODEL_TOKENS


@@ -27,8 +29,16 @@ class TooDeepRecursionError(Exception):


 class ApiError(Exception):
-    def __init__(self, message):
+    def __init__(self, message, response=None):
        self.message = message
+        self.response = response
+        self.response_json = None
+        if response and hasattr(response, "text"):
+            try:
+                self.response_json = json.loads(response.text)
+            except Exception:  # noqa
+                pass
+
        super().__init__(message)


--- a/pilot/prompts/components/human_intervention_explanation.prompt
+++ b/pilot/prompts/components/human_intervention_explanation.prompt
@@ -0,0 +1,27 @@
+**IMPORTANT**
+You must not tell me to run a command in the database or anything OS related - only if some dependencies need to be installed. If there is a need to run an OS related command, specifically tell me that this should be labeled as "Human Intervention" and explain what the human needs to do.
+Avoid using "Human Intervention" if possible. You should NOT use "Human Intervention" for anything else than steps that you can't execute. Here are few examples when and how to use "Human Intervention":
+------------------------start_of_example_1---------------------------
+Here is an example of good response for the situation where it seems like 3rd party API, in this case Facebook, is not working:
+
+* "Human Intervention"
+"1. Check latest Facebook API documentation for updates on endpoints, parameters, or authentication.
+2. Verify Facebook API key/authentication and request format to ensure they are current and correctly implemented.
+3. Use REST client tools like Postman or cURL to directly test the Facebook API endpoints.
+4. Check the Facebook API's status page for any reported downtime or service issues.
+5. Try calling the Facebook API from a different environment to isolate the issue."
+------------------------end_of_example_1---------------------------
+
+------------------------start_of_example_2---------------------------
+Here is an example of good response for the situation where the user needs to enable some settings in their Gmail account:
+
+* "Human Intervention"
+"To enable sending emails from your Node.js app via your Gmail, account, you need to do the following:
+1. Log in to your Gmail account.
+2. Go to 'Manage your Google Account' > Security.
+3. Scroll down to 'Less secure app access' and turn it on.
+4. Under 'Signing in to Google', select 'App Passwords'. (You may need to sign in again)
+5. At the bottom, click 'Select app' and choose the app you’re using.
+6. Click 'Generate'.
+Then, use your gmail address and the password generated in the step #6 and put it into the .env file."
+------------------------end_of_example_2---------------------------
--- a/pilot/prompts/components/steps_list.prompt
+++ b/pilot/prompts/components/steps_list.prompt
@@ -1,8 +1,8 @@
 {% if task_steps and step_index is not none -%}
 The current task has been split into multiple steps, and each step is one of the following:
 * `command` - command to run
-* `save_file` or `code_change` -  create new or update existing file
-* `modify_file` - update large existing file
+* `save_file` -  create a NEW file
+* `modify_file` or `code_change` - update ONE EXISTING file
 * `human_intervention` - if the human needs to do something

 {% if step_index > 0 %}Here is the list of steps that have been executed:
--- a/pilot/prompts/development/define_user_review_goal.prompt
+++ b/pilot/prompts/development/define_user_review_goal.prompt
@@ -1,7 +1,7 @@
 How can a human user test if this task was completed successfully? If you specify a command that needs to be run or give example, be very specific. You don't want the user to have to think anything through but rather that they just follow your instructions. Note that any commands will be run on a {{ os }} machine.

 **IMPORTANT**
-In case the task can be tested by making an API request, do not suggest how can a request be made with Postman but rather write a full cURL command that the user can just run.
+In case the task can be tested by making an API request, you should always prefer to test functionality in the browser. In case you can't do that, do not suggest how can a request be made with Postman but rather write a full cURL command that the user can just run.
 **IMPORTANT**
 Do not require any code writing form the user for testing this task.

--- a/pilot/prompts/development/implement_changes.prompt
+++ b/pilot/prompts/development/implement_changes.prompt
@@ -6,159 +6,14 @@ This file needs to be modified by these instructions:
 ---------------------start_of_instructions------------------------------
 {{ code_changes_description }}
 ----------------------end_of_instructions-----------------------------
-{% if full_output %}
+
 I want you to implement the instructions and show me the COMPLETE NEW VERSION of this file in this format:
 -----------------------format----------------------------
 ```
 the full contents of the updated file, without skipping over any content
 ```
-
-{{ logs_and_error_handling }}
-
------------------------end_of_format---------------------------
-**IMPORTANT**: Your reply should not omit any code in the new implementation or substitute anything with comments like `// .. rest of the code goes here ..`, because I will overwrite the existing file with the content you provide. Output ONLY the content for this file, without additional explanation.
-{% else %}
-I want you to implement the instructions and show me the exact changes (`diff`) in the file `{{ file_name }}`. Reply only with the modifications (`diff`) in the following format:
-----------------------start_of_format----------------------------
-CURRENT_CODE:
-```
-(All lines of code from specific code block in the current file that will be replaced by the code under NEW_CODE.)
-```
-NEW_CODE:
-```
-(All lines of code that will replace the code under CURRENT_CODE. That includes new lines of code and old lines of code that are not being changed but are part of that code block.)
-```
-END
 ------------------------end_of_format---------------------------

-Once you respond in this format, I will find all occurrences of CURRENT_CODE in the file `{{ file_name }}` and replace them with the code under NEW_CODE.
+**IMPORTANT**: Your reply MUST NOT omit any code in the new implementation or substitute anything with comments like `// .. rest of the code goes here ..` or `# insert existing code here`, because I will overwrite the existing file with the content you provide. Output ONLY the content for this file, without additional explanation. Your output MUST start with ``` and MUST end with ``` and include only the complete file contents.

 {{ logs_and_error_handling }}
-
-**IMPORTANT**
-Here are rules how to give good response. You have to strictly follow all rules at all times:
-
-Rule #1:
-This is most important rule and there must never be reason to break this rule!
-When the instructions contain hints such as `# .. insert existing code here ...`, it is imperative to interpret and insert the relevant code from the original. Never omit any code that belongs in the new block, and never replace any code with comments such as `// the rest of the code goes here`, '# existing code from another file', or similar, even if the instructions explicitly request it!
-If the instruction examples reference existing code to be pasted in place, always use the specified code from the previous messages in this conversation instead of copying the comment, as illustrated in the following example:
------------------------start_of_example_1---------------------------
-Instructions: "Rename function increase() { // ... existing code } to function inc() { // ... existing code } and increase counter by 10 instead of 1."
------------------------BAD response for example_1:---------------------------
-CURRENT_CODE:
-```
-function increase() {
-   // ... existing code
-}
-```
-NEW_CODE:
-```
-function inc() {
-   // ... existing code
-  return value + 10;
-}
-```
------------------------GOOD response for example_1:---------------------------
-
-CURRENT_CODE:
-```
-function increase(value) {
-  if (typeof value !== 'number') {
-    throw new Error('Argument must be number');
-  }
-  return value + 1;
-}
-```
-NEW_CODE:
-```
-function inc(value) {
-  if (typeof value !== 'number') {
-    throw new Error('Argument must be number');
-  }
-  return value + 10;
-}
-END
-```
------------------------end_of_example_1---------------------------
-
-Rule #2:
-For each change that needs to be done, you must show exactly one CURRENT_CODE code block and one NEW_CODE code block. You can think of this as difference (`diff`) between the current implementation and the new implementation.
-If there are no lines of code that need to be replaced by the NEW_CODE (if the NEW_CODE needs to be added into the CURRENT_CODE), show a couple of lines of code in the CURRENT_CODE before the place where NEW_CODE needs to be added.
-Here is an example of how to add one line `i--;` in the for loop:
------------------------start_of_example_2---------------------------
-CURRENT_CODE:
-```
-let i = 0;
-i++;
-for (let j = 0; j < 100; j++) {
-```
-NEW_CODE:
-```
-let i = 0;
-i++;
-for (let j = 0; j < 100; j++) {
-    i--;
-```
-END
------------------------end_of_example_2---------------------------
-
-Here's an example how to add code to the beginning of the file:
------------------------start_of_example_3---------------------------
-CURRENT_CODE:
-```
-const app = express();
-const bodyParser = require('body-parser');
-```
-NEW_CODE:
-```
-const express = require('express');
-const app = express();
-const bodyParser = require('body-parser');
-```
-END
------------------------end_of_example_3---------------------------
-
-Rule #3:
-Do not show the entire file under CURRENT_CODE and NEW_CODE but only the lines that need to be replaced. If any lines should be left as they are in CURRENT_CODE, do not write them.
-
-Rule #4:
-You must output the CURRENT_CODE exactly as it is in the original file, including the indentation from the original code, as it will be used for search-replace, and it should only match the original file in ONE place.
-In the NEW_CODE, remember to follow the same coding style that is used in the rest of the file. Pay special attention to the indentation of the new code and make sure to include all the required old and new code, without omitting anything.
-Pay very close attention to parenthesis and make sure that when CURRENT_CODE is replaced with NEW_CODE there are no extra parenthesis or any parenthesis missing.
-
-Rule #5:
-Pay attention to the lines of code that you select in the CURRENT_CODE and the lines that you add to NEW_CODE because all lines under CURRENT_CODE will be replaced with the lines under NEW_CODE. Sometimes you might duplicate the currently implemented lines by not putting them under CURRENT_CODE but repeating them under NEW_CODE - you must **NOT** do that.
-Here is an example. Let's say that you have a code like this:
-```
-let someClass = new SomeClass();
-let methodValue = someClass.someMethod();
-```
-And you want to add a line `methodValue += 22;` below the line `let methodValue = someClass.someMethod();`. Here is an example of what you must **NOT** do:
------------------------start_of_incorrect_example---------------------------
-CURRENT_CODE:
-```
-let methodValue = someClass.someMethod()
-```
-NEW_CODE:
-```
-let someClass = new SomeClass();
-let methodValue = someClass.someMethod();
-methodValue += 22;
-```
-END
------------------------end_of_incorrect_example---------------------------
-
-See how the line `let someClass = new SomeClass();` was added under NEW_CODE but it wasn't mentioned under CURRENT_CODE so the result will be that this line will be duplicated in the end result once the line `let methodValue = someClass.someMethod()` gets replaced with lines under NEW_CODE. Instead, this would be a correct way to do it:
------------------------start_of_correct_example---------------------------
-CURRENT_CODE:
-```
-let methodValue = someClass.someMethod()
-```
-NEW_CODE:
-```
-let methodValue = someClass.someMethod();
-methodValue += 22;
-```
-END
------------------------end_of_correct_example---------------------------
-{% endif %}
--- a/pilot/prompts/development/iteration.prompt
+++ b/pilot/prompts/development/iteration.prompt
@@ -47,9 +47,6 @@ When there is an error in the code, the best way to debug the issue is to unders
 **IMPORTANT**
 When you think about in which file should the new code go to, always try to make files as small as possible and put code in more smaller files rather than in one big file. Whenever a file becomes too large (more than 50 lines of code) split it into smaller files.

-{# Do not leave any parts of the code to be written afterwards. Make sure that all the code you provide is working and does as outlined in the description area above.
-#}
 You do not need to make any automated tests work.

-**IMPORTANT**
-You must not tell me to run a command in the database or anything OS related - only if some dependencies need to be installed. If there is a need to run an OS related command, specifically tell me that this should be labeled as "Human Intervention" and explain what the human needs to do.
+{{ human_intervention_explanation }}
--- a/pilot/prompts/development/parse_task.prompt
+++ b/pilot/prompts/development/parse_task.prompt
@@ -3,8 +3,8 @@ Ok, now, take your previous message that starts with `{{ instructions_prefix }}`
 Each step can be either:

 * `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder)
-* `save_file` -  create new or update ONE existing file; use this if the existing file is smaller than 20 lines or if many lines need to be changed
-* `modify_file` - update ONE existing file; use this if the existing file is larger than 20 lines and only a few lines need to be updated
+* `save_file` -  create a NEW file
+* `modify_file` - update ONE EXISTING file
 * `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do.

 If the step is of type `save_file` or `modify_file`, it needs to contain instructions on how to change only ONE file.
--- a/pilot/prompts/development/review_changes.prompt
+++ b/pilot/prompts/development/review_changes.prompt
@@ -0,0 +1,55 @@
+A developer on your team has been working on this task:
+
+{{ instructions }}
+
+Based on this instructions, the developer has made changes to file `{{ file_name }}`.
+
+Here is the original content of this file:
+```
+{{ old_content }}
+```
+
+Here is the diff of the changes:
+
+{% for hunk in hunks %}## Hunk {{ loop.index }}
+```diff
+{{ hunk }}
+```
+{% endfor %}
+
+As you can see, there {% if hunks|length == 1 %}is only one hunk in this diff, and it{% else %}are {hunks|length} hunks in this diff, and each{% endif %} starts with the `@@` header line.
+
+Think carefully about the instructions and review the proposed changes. For each hunk of change, decide whether it should be applied or should be ignored (for example if it is a code deletion or change that wasn't asked for). Finally, if the changes miss something that was in the instructions, mention that.
+
+Note that the developer may add logging (including `gpt_pilot_debugging_log`) or error handling that's not explicitly asked for, but is a part of good development practice. Unless these logging and error handling additions break something, you should not ignore those changes.
+
+Here is an example output if 3 of 4 hunks in the change should be applied and one of them should be ignored, and no other changes are needed:
+```
+{
+    "hunks": [
+        {
+            "number": 1,
+            "decision": "apply",
+            "reason": "Some explanation why this part of the change is important",
+        },
+        {
+            "number": 2,
+            "decision": "apply",
+            "reason": "Another explanation, for the 2nd hunk",
+        },
+        {
+            "number": 3,
+            "decision": "ignore",
+            "reason": "This hunk accidentally deletes important code",
+        },
+        {
+            "number": 4,
+            "decision": "apply",
+            "reason": "Explanation why the fourth hunk should be included in the change",
+        },
+    ],
+    "review_notes": "General review notes, if something is missing from the change you can comment about it here"
+}
+```
+
+IMPORTANT: The developer that wrote this is sloppy and has probably deleted some parts of the code that should not be deleted. Pay special attention to that in your review!
--- a/pilot/prompts/development/task/breakdown.prompt
+++ b/pilot/prompts/development/task/breakdown.prompt
@@ -28,7 +28,6 @@ You do not need to make any automated tests work.
 {% endif %}
 DO NOT specify commands to create any folders or files, they will be created automatically - just specify the relative path to each file that needs to be written.

-**IMPORTANT**
-You must not tell me to run a command in the database or anything OS related - only if some dependencies need to be installed. If there is a need to run an OS related command, specifically tell me that this should be labeled as "Human Intervention" and explain what the human needs to do.
+{{ human_intervention_explanation }}

 Never use the port 5000 to run the app, it's reserved.
--- a/pilot/prompts/system_messages/code_monkey.prompt
+++ b/pilot/prompts/system_messages/code_monkey.prompt
@@ -1 +1 @@
-You are a full stack software developer who works in a software development agency. You write very modular code. Your job is to implement tasks that your tech lead assigns you.
+You are a full stack software developer that works in a software development agency. You write modular, clean, maintainable, production-ready code. Your job is to implement tasks that your tech lead assigns you.
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -9,7 +9,7 @@ from traceback import format_exc
 from prompt_toolkit.styles import Style

 from jsonschema import validate, ValidationError
-from utils.style import color_red
+from utils.style import color_red, color_yellow
 from typing import List
 from const.llm import MAX_GPT_MODEL_TOKENS, API_CONNECT_TIMEOUT, API_READ_TIMEOUT
 from const.messages import AFFIRMATIVE_ANSWERS
@@ -151,7 +151,7 @@ def create_gpt_chat_completion(messages: List[dict], req_type, project,
        if isinstance(e, ApiError):
            raise e
        else:
-            raise ApiError("Error making LLM API request: {e}") from e
+            raise ApiError(f"Error making LLM API request: {e}") from e

 def delete_last_n_lines(n):
    for _ in range(n):
@@ -199,8 +199,6 @@ def retry_on_exception(func):
            del args[0]['function_buffer']

    def wrapper(*args, **kwargs):
-        wait_duration_ms = None
-
        while True:
            try:
                # spinner_stop(spinner)
@@ -255,17 +253,7 @@ def retry_on_exception(func):
                    print(color_red(f"Error calling LLM API: The request exceeded the maximum token limit (request size: {n_tokens}) tokens."))
                    raise TokenLimitError(n_tokens, MAX_GPT_MODEL_TOKENS)
                if "rate_limit_exceeded" in err_str:
-                    # Extracting the duration from the error string
-                    match = re.search(r"Please try again in (\d+)ms.", err_str)
-                    if match:
-                        # spinner = spinner_start(colored("Rate limited. Waiting...", 'yellow'))
-                        if wait_duration_ms is None:
-                            wait_duration_ms = int(match.group(1))
-                        elif wait_duration_ms < 6000:
-                            # waiting 6ms isn't usually long enough - exponential back-off until about 6 seconds
-                            wait_duration_ms *= 2
-                        logger.debug(f'Rate limited. Waiting {wait_duration_ms}ms...')
-                        time.sleep(wait_duration_ms / 1000)
+                    rate_limit_exceeded_sleep(e, err_str)
                    continue

                print(color_red('There was a problem with request to openai API:'))
@@ -295,6 +283,39 @@ def retry_on_exception(func):
    return wrapper


+def rate_limit_exceeded_sleep (e, err_str):
+    extra_buffer_time = float(os.getenv('RATE_LIMIT_EXTRA_BUFFER', 6))  # extra buffer time to wait, defaults to 6 secs
+    wait_duration_sec = extra_buffer_time  # Default time to wait in seconds
+    
+    # Regular expression to find milliseconds
+    match = re.search(r'Please try again in (\d+)ms.', err_str)
+    if match:
+        milliseconds = int(match.group(1))
+        wait_duration_sec += milliseconds / 1000
+    else:
+        # Regular expression to find minutes and seconds
+        match = re.search(r'Please try again in (\d+)m(\d+\.\d+)s.', err_str)
+        if match:
+            minutes = int(match.group(1))
+            seconds = float(match.group(2))
+            wait_duration_sec += minutes * 60 + seconds
+        else:
+            # Check for only seconds
+            match = re.search(r'(\d+\.\d+)s.', err_str)
+            if match:
+                seconds = float(match.group(1))
+                wait_duration_sec += seconds
+
+    logger.debug(f'Rate limited. Waiting {wait_duration_sec} seconds...')
+
+    if isinstance(e, ApiError) and hasattr(e, "response_json") and e.response_json is not None and "error" in e.response_json:
+        message = e.response_json["error"]["message"]
+    else:
+        message = "Rate limited by the API (we're over 'tokens per minute' or 'requests per minute' limit)"
+    print(color_yellow(message))
+    print(color_yellow(f"Retrying in {wait_duration_sec} second(s)... with extra buffer of: {extra_buffer_time} second(s)"))
+    time.sleep(wait_duration_sec)
+
@retry_on_exception
 def stream_gpt_completion(data, req_type, project):
    """
@@ -392,7 +413,7 @@ def stream_gpt_completion(data, req_type, project):
        project.dot_pilot_gpt.log_chat_completion(endpoint, model, req_type, data['messages'], response.text)
        logger.info(f'problem with request (status {response.status_code}): {response.text}')
        telemetry.record_llm_request(token_count, time.time() - request_start_time, is_error=True)
-        raise ApiError(f"API responded with status code: {response.status_code}. Request token size: {token_count} tokens. Response text: {response.text}")
+        raise ApiError(f"API responded with status code: {response.status_code}. Request token size: {token_count} tokens. Response text: {response.text}", response=response)

    # function_calls = {'name': '', 'arguments': ''}

--- a/pilot/utils/test_llm_connection.py
+++ b/pilot/utils/test_llm_connection.py
@@ -309,7 +309,7 @@ class TestSchemaValidation:
    def test_assert_json_schema(self):
        # When assert_json_schema is called with valid JSON
        # Then no errors
-        assert(assert_json_schema('{"foo": "bar"}', [self.function]))
+        assert (assert_json_schema('{"foo": "bar"}', [self.function]))

    def test_assert_json_schema_incomplete(self):
        # When assert_json_schema is called with incomplete JSON
@@ -333,7 +333,7 @@ class TestSchemaValidation:
            assert_json_schema('{"foo": "bar"}', [self.function])

    def test_DEVELOPMENT_PLAN(self):
-        assert(assert_json_schema('''
+        assert (assert_json_schema('''
 {
  "plan": [
    {
@@ -368,28 +368,33 @@ class TestLlmConnection:

        monkeypatch.setenv('OPENAI_API_KEY', 'secret')

-        error_text = '''{
-                "error": {
-                    "message": "Rate limit reached for 10KTPM-200RPM in organization org-OASFC7k1Ff5IzueeLArhQtnT on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues.",
-                    "type": "tokens",
-                    "param": null,
-                    "code": "rate_limit_exceeded"
-                }
-            }'''
+        error_texts = [
+            "Please try again in 6ms.",
+            "Please try again in 1.2s.",
+            "Please try again in 2m5.5s.",
+        ]
+
+        mock_responses = [Mock(status_code=429, text='''{
+            "error": {
+                "message": "Rate limit reached for 10KTPM-200RPM in organization org-OASFC7k1Ff5IzueeLArhQtnT on tokens per min. Limit: 10000 / min. ''' + error_text + '''",
+                "type": "tokens",
+                "param": null,
+                "code": "rate_limit_exceeded"
+            }
+        }''') for error_text in error_texts]
+
        content = 'DONE'
        success_text = '{"id": "gen-123", "choices": [{"index": 0, "delta": {"role": "assistant", "content": "' + content + '"}}]}'

-        error_response = Mock()
-        error_response.status_code = 429
-        error_response.text = error_text
+        mock_success_response = Mock()
+        mock_success_response.status_code = 200
+        mock_success_response.iter_lines.return_value = [success_text.encode('utf-8')]

-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.iter_lines.return_value = [success_text.encode('utf-8')]
+        # add the success at the end of the error requests
+        mock_responses.append(mock_success_response)
+
+        mock_post.side_effect = mock_responses

-        mock_post.side_effect = [error_response, error_response, error_response, error_response, error_response,
-                                 error_response, error_response, error_response, error_response, error_response,
-                                 error_response, error_response, mock_response]
        wrapper = retry_on_exception(stream_gpt_completion)
        data = {
            'model': 'gpt-4',
@@ -401,11 +406,7 @@ class TestLlmConnection:

        # Then
        assert response == {'text': 'DONE'}
-        # assert mock_sleep.call_count == 9
-        assert mock_sleep.call_args_list == [call(0.006), call(0.012), call(0.024), call(0.048), call(0.096),
-                                             call(0.192), call(0.384), call(0.768), call(1.536), call(3.072),
-                                             call(6.144), call(6.144)]
-        # mock_sleep.call
+        assert mock_sleep.call_args_list == [call(6.006), call(7.2), call(131.5)]

    @patch('utils.llm_connection.requests.post')
    def test_stream_gpt_completion(self, mock_post, monkeypatch):
@@ -440,15 +441,14 @@ class TestLlmConnection:
            # Then
            assert response == {'text': '{\n  "foo": "bar",\n  "prompt": "Hello",\n  "choices": []\n}'}

-
    @pytest.mark.uses_tokens
    @pytest.mark.parametrize('endpoint, model', [
-        ('OPENAI', 'gpt-4'),                                 # role: system
-        ('OPENROUTER', 'openai/gpt-3.5-turbo'),              # role: user
-        ('OPENROUTER', 'meta-llama/codellama-34b-instruct'), # rule: user, is_llama
-        ('OPENROUTER', 'google/palm-2-chat-bison'),          # role: user/system
+        ('OPENAI', 'gpt-4'),  # role: system
+        ('OPENROUTER', 'openai/gpt-3.5-turbo'),  # role: user
+        ('OPENROUTER', 'meta-llama/codellama-34b-instruct'),  # rule: user, is_llama
+        ('OPENROUTER', 'google/palm-2-chat-bison'),  # role: user/system
        ('OPENROUTER', 'google/palm-2-codechat-bison'),
-        ('OPENROUTER', 'anthropic/claude-2'),              # role: user, is_llama
+        ('OPENROUTER', 'anthropic/claude-2'),  # role: user, is_llama
    ])
    def test_chat_completion_Architect(self, endpoint, model, monkeypatch):
        # Given
@@ -459,9 +459,9 @@ class TestLlmConnection:
        agent = Architect(project)
        convo = AgentConvo(agent)
        convo.construct_and_add_message_from_prompt('architecture/technologies.prompt',
-                                                        {
-                                                            'name': 'Test App',
-                                                            'app_summary': '''
+                                                    {
+                                                        'name': 'Test App',
+                                                        'app_summary': '''
 The project involves the development of a web-based chat application named "Test_App".
 In this application, users can send direct messages to each other.
 However, it does not include a group chat functionality.
@@ -471,19 +471,19 @@ picture and status updates, as well as a feature for chat history. The project m
 as a monolithic application, regardless of any other suggested methods.
 The project's specifications are subject to the project manager's discretion, implying a need for
 solution-oriented decision-making in areas where precise instructions were not provided.''',
-                                                            'app_type': 'web app',
-                                                            'user_stories': [
-                                                                'User will be able to send direct messages to another user.',
-                                                                'User will receive direct messages from other users.',
-                                                                'User will view the sent and received messages in a conversation view.',
-                                                                'User will select a user to send a direct message.',
-                                                                'User will be able to search for users to send direct messages to.',
-                                                                'Users can view the online status of other users.',
-                                                                'User will be able to log into the application using their credentials.',
-                                                                'User will be able to logout from the Test_App.',
-                                                                'User will be able to register a new account on Test_App.',
-                                                            ]
-                                                        })
+                                                        'app_type': 'web app',
+                                                        'user_stories': [
+                                                            'User will be able to send direct messages to another user.',
+                                                            'User will receive direct messages from other users.',
+                                                            'User will view the sent and received messages in a conversation view.',
+                                                            'User will select a user to send a direct message.',
+                                                            'User will be able to search for users to send direct messages to.',
+                                                            'Users can view the online status of other users.',
+                                                            'User will be able to log into the application using their credentials.',
+                                                            'User will be able to logout from the Test_App.',
+                                                            'User will be able to register a new account on Test_App.',
+                                                        ]
+                                                    })
        function_calls = ARCHITECTURE

        # When
@@ -528,12 +528,12 @@ The development of this application will strictly follow a monolithic structure,
 The development process will include the creation of user stories and tasks, based on detailed discussions with the client.''',
                                                        'app_type': 'web app',
                                                        'user_stories': [
-            'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
-            'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
-            'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
-            'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
-            'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
-            'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
+                                                            'User Story 1: As a user, I can access the web-based "chat_app" directly without needing to authenticate or log in. Do you want to add anything else? If not, just press ENTER.',
+                                                            'User Story 2: As a user, I can start one-on-one conversations with another user on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+                                                            'User Story 3: As a user, I can send and receive messages in real-time within my one-on-one conversation on the "chat_app". Do you want to add anything else? If not, just press ENTER.',
+                                                            'User Story 4: As a user, I do not need to worry about deleting or storing my chats because the "chat_app" does not store chat histories. Do you want to add anything else? If not, just press ENTER.',
+                                                            'User Story 5: As a user, I will only be able to send text messages, as the "chat_app" does not support any kind of multimedia sharing like photos, videos, or files. Do you want to add anything else? If not, just press ENTER.',
+                                                            'User Story 6: As a user, I will not see any live typing indicators or read receipts since the "chat_app" does not provide any additional real-time functionality beyond message exchange. Do you want to add anything else? If not, just press ENTER.',
                                                        ]
                                                    })
        function_calls = DEVELOPMENT_PLAN
@@ -547,7 +547,8 @@ The development process will include the creation of user stories and tasks, bas

        # Then
        assert convo.messages[0]['content'].startswith('You are a tech lead in a software development agency')
-        assert convo.messages[1]['content'].startswith('You are working in a software development agency and a project manager and software architect approach you')
+        assert convo.messages[1]['content'].startswith(
+            'You are working in a software development agency and a project manager and software architect approach you')

        assert response is not None
        response = parse_agent_response(response, function_calls)