Merge pull request #834 from Pythagora-io/development

Development
2026-01-10 13:37:55 -05:00 · 2024-04-01 13:40:06 -07:00
parent c898178567 dd3f870ce8
commit 3003f5d098
9 changed files with 152 additions and 37 deletions
--- a/pilot/const/ipc.py
+++ b/pilot/const/ipc.py
@@ -17,6 +17,7 @@ MESSAGE_TYPE = {
    'progress': 'progress',                # Progress bar for extension only
    'projectStats': 'projectStats',        # Project stats for extension only
    'keyExpired': 'keyExpired',            # (Free trial) key expired message - for extension only
+    'inputPrefill': 'inputPrefill',        # Prefill input field with text in extension
 }

 LOCAL_IGNORE_MESSAGE_TYPES = [
@@ -33,4 +34,5 @@ LOCAL_IGNORE_MESSAGE_TYPES = [
    'progress',
    'projectStats',
    'keyExpired',
+    'inputPrefill',
 ]
--- a/pilot/database/database.py
+++ b/pilot/database/database.py
@@ -254,6 +254,37 @@ def save_progress(app_id, step, data):
    return progress


+def edit_development_plan(app_id, update_data):
+    try:
+        dev_plan = DevelopmentPlanning.get(app=app_id)
+    except DevelopmentPlanning.DoesNotExist:
+        print(color_red(f"No development plan found for app {app_id}"), category='error')
+        return None
+
+    for key, value in update_data.items():
+        setattr(dev_plan, key, value)
+
+    dev_plan.save()
+    return dev_plan
+
+
+def edit_feature_plan(app_id, update_data):
+    try:
+        dev_plan = (DevelopmentSteps.select()
+                    .where((DevelopmentSteps.app == app_id) & (DevelopmentSteps.prompt_path.contains('feature_plan')))
+                    .order_by(DevelopmentSteps.created_at.desc())
+                    .get())
+    except DevelopmentPlanning.DoesNotExist:
+        print(color_red(f"No feature plan found for app {app_id}"), category='error')
+        return None
+
+    for key, value in update_data.items():
+        setattr(dev_plan, key, value)
+
+    dev_plan.save()
+    return dev_plan
+
+
 def get_app(app_id, error_if_not_found=True):
    try:
        app = App.get(App.id == app_id)
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -241,7 +241,13 @@ class AgentConvo:
                if not self.agent.project.check_ipc():
                    print(color_yellow_bold(dev_step_msg), end='')
                logger.info(dev_step_msg)
-            print(f"\n{content}\n", type='local')
+            try:
+                print(f"\n{content}\n", type='local')
+            except Exception:  # noqa
+                # Workaround for Windows encoding crash: https://github.com/Pythagora-io/gpt-pilot/issues/509
+                safe_content = content.encode('ascii', 'ignore').decode('ascii')
+                print(f"\n{safe_content}\n", type='local')
+
        logger.info(f"{print_msg}: {content}\n")

    def to_context_prompt(self):
--- a/pilot/helpers/agents/CodeMonkey.py
+++ b/pilot/helpers/agents/CodeMonkey.py
@@ -364,8 +364,8 @@ class CodeMonkey(Agent):
        """
        diff = "\n".join(
            [
-                "--- " + file_name,
-                "+++ " + file_name,
+                f"--- {file_name}",
+                f"+++ {file_name}",
            ] + hunks
        ) + "\n"
        try:
--- a/pilot/helpers/agents/Developer.py
+++ b/pilot/helpers/agents/Developer.py
@@ -16,7 +16,7 @@ from utils.style import (
    color_white_bold
 )
 from helpers.exceptions import TokenLimitError
-from const.code_execution import MAX_COMMAND_DEBUG_TRIES, MAX_QUESTIONS_FOR_BUG_REPORT
+from const.code_execution import MAX_COMMAND_DEBUG_TRIES
 from helpers.exceptions import TooDeepRecursionError
 from helpers.Debugger import Debugger
 from utils.questionary import styled_text
@@ -29,7 +29,7 @@ from utils.utils import should_execute_step, array_of_objects_to_string, generat
 from helpers.cli import run_command_until_success, execute_command_and_check_cli_response
 from const.function_calls import (EXECUTE_COMMANDS, GET_TEST_TYPE, IMPLEMENT_TASK, COMMAND_TO_RUN,
                                  ALTERNATIVE_SOLUTIONS, GET_BUG_REPORT_MISSING_DATA)
-from database.database import save_progress, get_progress_steps, update_app_status
+from database.database import save_progress, edit_development_plan, edit_feature_plan, get_progress_steps, update_app_status
 from utils.telemetry import telemetry
 from prompts.prompts import ask_user
 from utils.print import print_task_progress, print_step_progress
@@ -55,14 +55,17 @@ class Developer(Agent):
        if not self.project.skip_steps:
            logger.info("Starting to create the actual code...")

-        total_tasks = len(self.project.development_plan)
        progress_thresholds = [50]  # Percentages of progress when documentation is created
        documented_thresholds = set()

-        for i, dev_task in enumerate(self.project.development_plan):
+        finished_tasks = []
+        while len(finished_tasks) < len(self.project.development_plan):
+            i = len(finished_tasks)
+            dev_task = self.project.development_plan[i]
+            num_of_tasks = len(self.project.development_plan)
            # don't create documentation for features
            if not self.project.finished:
-                current_progress_percent = round((i / total_tasks) * 100, 2)
+                current_progress_percent = round(((i + 1) / num_of_tasks) * 100, 2)

                for threshold in progress_thresholds:
                    if current_progress_percent > threshold and threshold not in documented_thresholds:
@@ -76,6 +79,7 @@ class Developer(Agent):
                self.project.cleanup_list('dev_steps_to_load', task['id'])

                if len(self.project.tasks_to_load):
+                    finished_tasks.append(dev_task)
                    continue
                # if it is last task to load, execute it to check if it's finished
                else:
@@ -83,15 +87,18 @@ class Developer(Agent):
                    readme_dev_step = next((el for el in self.project.dev_steps_to_load if
                                                   'create_readme.prompt' in el.get('prompt_path', '')), None)

-                    if len(self.project.development_plan) - 1 == i and readme_dev_step is not None:
+                    if num_of_tasks - 1 == i and readme_dev_step is not None:
                        self.project.cleanup_list('dev_steps_to_load', readme_dev_step['id'])
+                        finished_tasks.append(dev_task)
                        continue

            self.project.current_task.start_new_task(dev_task['description'], i + 1)
-            print_task_progress(i+1, len(self.project.development_plan), dev_task['description'], task_source, 'in_progress')
-            self.implement_task(i, task_source, dev_task)
-            print_task_progress(i+1, len(self.project.development_plan), dev_task['description'], task_source, 'done')
-            telemetry.inc("num_tasks")
+            print_task_progress(i+1, num_of_tasks, dev_task['description'], task_source, 'in_progress')
+            task_finished = self.implement_task(i, task_source, dev_task)
+            if task_finished:
+                finished_tasks.append(dev_task)
+                telemetry.inc("num_tasks")
+            print_task_progress(i+1, num_of_tasks, dev_task['description'], task_source, 'done')

        # DEVELOPMENT END
        if not self.project.skip_steps:
@@ -123,6 +130,10 @@ class Developer(Agent):
        :param task_source: The source of the task, one of: 'app', 'feature', 'debugger', 'iteration'.
        :param development_task: The task to implement.
        """
+        should_execute_task = self.edit_task(task_source, development_task)
+        if not should_execute_task:
+            return False
+
        print(color_green_bold(f'Implementing task #{i + 1}: ') + color_green(f' {development_task["description"]}\n'), category='pythagora')
        print(f'Starting task #{i + 1} implementation...', type='verbose', category='agent:developer')
        self.project.dot_pilot_gpt.chat_log_folder(i + 1)
@@ -242,6 +253,64 @@ class Developer(Agent):
                logger.warning('Testing at end of task failed')
                break

+        return True
+
+    def edit_task(self, task_source, task):
+        """
+        Allow the user to edit a task before executing it.
+
+        :param task_source: The source of the task, it can be only 'app' or 'feature'.
+        :param task: The task to edit.
+
+        :return: True if the task should be executed, False if it should be skipped.
+        """
+        if self.project.skip_steps or task_source not in ['app', 'feature']:
+            return True
+
+        execute_question = 'Do you want to execute this task?'
+        if self.project.check_ipc():
+            print(execute_question, category='pythagora')
+        else:
+            execute_question += ' [yes/edit task/skip task]'
+        print('')
+        print(task['description'])
+        print('yes/edit task/skip task', type='buttons-only')
+        response = ask_user(self.project, execute_question)
+        if response.lower() in NEGATIVE_ANSWERS + ['skip task']:
+            # remove task from development plan if it is being skipped
+            self.project.development_plan = [
+                element for element in self.project.development_plan
+                if element['description'] != task['description']
+            ]
+            if task_source == 'app':
+                db_task_skip = edit_development_plan(self.project.args['app_id'], {'development_plan': self.project.development_plan})
+            else:
+                db_task_skip = edit_feature_plan(self.project.args['app_id'], {'llm_response': {'text': json.dumps({'plan': self.project.development_plan})}})
+
+            if db_task_skip:
+                print('Successfully skipped task.', category='Pythagora')
+            return False
+        elif response.lower() == 'edit task':
+            edit_question = 'Write full edited description of the task here:'
+            if self.project.check_ipc():
+                print('continue/cancel', type='button')
+                print(edit_question, type='ipc')
+                print(task['description'], type='inputPrefill')
+            edited_task = ask_user(self.project, edit_question)
+            if edited_task.lower() in NEGATIVE_ANSWERS + ['', 'continue']:
+                return True
+
+            task['description'] = edited_task
+            if task_source == 'app':
+                db_task_edit = edit_development_plan(self.project.args['app_id'], {'development_plan': self.project.development_plan})
+            else:
+                db_task_edit = edit_feature_plan(self.project.args['app_id'], {'llm_response': {'text': json.dumps({'plan': self.project.development_plan})}})
+
+            if db_task_edit:
+                print('Successfully edited task.', category='Pythagora')
+
+        return True
+
    def step_delete_file(self, convo, step, i, test_after_code_changes):
        """
        Delete a file from the project.
@@ -606,8 +675,7 @@ class Developer(Agent):
        next_solution_to_try = None
        iteration_count = self.project.last_iteration['prompt_data']['iteration_count'] if (self.project.last_iteration and 'iteration_count' in self.project.last_iteration['prompt_data']) else 0
        while True:
-            self.user_feedback = llm_solutions[-1]['user_feedback'] if len(llm_solutions) > 0 else None
-            review_successful = self.project.skip_steps or self.review_task()
+            review_successful = self.project.skip_steps or self.review_task(llm_solutions)
            if not review_successful and self.review_count < 3:
                continue
            iteration_count += 1
@@ -765,24 +833,29 @@ class Developer(Agent):

        return user_feedback, questions_and_answers

-    def review_task(self):
+    def review_task(self, llm_solutions):
        """
        Review all task changes and refactor big files.
+
+        :param llm_solutions: List of all user feedbacks and LLM solutions (to those feedbacks) for current task.
+
        :return: bool - True if the task changes passed review, False if not
        """
        print('Starting review of all changes made in this task...', type='verbose', category='agent:reviewer')
        self.review_count += 1
-        review_result = self.review_code_changes()
+        review_result = self.review_code_changes(llm_solutions)
        refactoring_done = self.refactor_code()
        if refactoring_done or review_result['implementation_needed']:
-            review_result = self.review_code_changes()
+            review_result = self.review_code_changes(llm_solutions)

        return review_result['success']

-    def review_code_changes(self):
+    def review_code_changes(self, llm_solutions):
        """
        Review the code changes and ask for human intervention if needed

+        :param llm_solutions: List of all user feedbacks and LLM solutions (to those feedbacks) for current task.
+
        :return: dict - {
            'success': bool,
            'implementation_needed': bool
@@ -791,11 +864,13 @@ class Developer(Agent):
        review_convo = AgentConvo(self)
        files = [
            file_dict for file_dict in self.project.get_all_coded_files()
-            if any(file_dict['full_path'].endswith(modified_file.lstrip('.')) for modified_file in self.modified_files)
+            if any(os.path.normpath(file_dict['full_path']).endswith(os.path.normpath(modified_file.lstrip('.'))) for
+                   modified_file in self.modified_files)
        ]
        files_at_start_of_task = [
            file_dict for file_dict in self.files_at_start_of_task
-            if any(file_dict['full_path'].endswith(modified_file.lstrip('.')) for modified_file in self.modified_files)
+            if any(os.path.normpath(file_dict['full_path']).endswith(os.path.normpath(modified_file.lstrip('.'))) for
+                   modified_file in self.modified_files)
        ]
        # TODO instead of sending files before and after maybe add nice way to show diff for multiple files
        review = review_convo.send_message('development/review_task.prompt', {
@@ -804,7 +879,7 @@ class Developer(Agent):
            "tasks": self.project.development_plan,
            "current_task": self.project.current_task.data.get('task_description'),
            "files": files,
-            "user_input": self.user_feedback,
+            "all_feedbacks": [solution["user_feedback"].replace("```", "") for solution in llm_solutions],
            "modified_files": self.modified_files,
            "files_at_start_of_task": files_at_start_of_task,
            "previous_features": self.project.previous_features,
--- a/pilot/helpers/files.py
+++ b/pilot/helpers/files.py
@@ -32,8 +32,8 @@ def update_file(path: str, new_content: Union[str, bytes], project=None):
        if project is not None:  # project can be None only in tests
            if not project.skip_steps:
                print({"path": path, "line": None}, type='openFile')
-            if not project.check_ipc():
-                print(color_green(f"Updated file {path}"))
+                if not project.check_ipc():
+                    print(color_green(f"Updated file {path}"))


 def get_file_contents(
--- a/pilot/prompts/dev_ops/debug.prompt
+++ b/pilot/prompts/dev_ops/debug.prompt
@@ -14,7 +14,7 @@ I want you to create a list of steps that are needed to debug this issue.
 Each step can be either:

 * `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder)
-* `code_change` -  step will change the code and you need to thoroughly describe what needs to be implemented. I will implement the requested changes and let you know.
+* `save_file` - step will create or update a file, and you need to thoroughly describe what needs to be implemented. I will implement the requested changes and let you know.
 * `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do. NEVER use `human_intervention` for testing, as testing will be done separately by a dedicated QA after all the steps are done.

 {{ execution_order }}
@@ -22,5 +22,3 @@ Each step can be either:
 Also, make sure that at least the last step has `check_if_fixed` set to TRUE.

 {{ file_size_limit }}
-
-{# After this, you need to decide what to do next. You can rerun the command `{{ command }}` to check if the problem is fixed or run another command with `run_command` or change more code with `implement_code_changes`. #}
--- a/pilot/prompts/development/review_task.prompt
+++ b/pilot/prompts/development/review_task.prompt
@@ -24,27 +24,26 @@ Here are files that were modified during this task implementation:
 ---end_of_current_files---
 {% endif -%}

-{% if user_input -%}While working on this task, your colleague who is testing the app "{{ name }}" sent you some additional info. Here it is:
-```
-{{ user_input }}
+{% if all_feedbacks -%}While working on this task, your colleague who is testing the app "{{ name }}" sent you some additional information on what doesn't work as intended or what should be added. Here are all the inputs he sent you:
+```{% for feedback in all_feedbacks %}
+{{ loop.index }}. {{ feedback }}
+{% endfor %}
 ```

-After this info, you tried to fix it. {% endif %}Files that were modified during implementation of the task are:
+After you got each of these additional inputs, you tried to fix it as part of this task. {% endif %}Files that were modified during implementation of the task are:
 ```
 {{ modified_files }}
 ```

 Now I will show you how those files looked before this task implementation started. If a file is listed as the file that changed but is not in this list that means it was created during this task. Here are files before implementation of this task:

-{% if files_at_start_of_task|length > 0 %}---start_of_files_at_start_of_task---
+---start_of_files_at_start_of_task---{% if files_at_start_of_task|length > 0 %}
 {% for file in files_at_start_of_task %}
 **{{ file.path }}/{{ file.name }}** ({{ file.lines_of_code }} lines of code):
 ```
 {{ file.content }}
 ```
-{% endfor %}
---end_of_files_at_start_of_task---
-{% endif -%}
+{% endfor %}{% endif -%}---end_of_files_at_start_of_task---

 **IMPORTANT**
 You have to review this task implementation. You are known to be very strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions.
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -264,7 +264,12 @@ def retry_on_exception(func):
                    print(color_red(f"Error calling LLM API: The request exceeded the maximum token limit (request size: {n_tokens}) tokens."))
                    trace_token_limit_error(n_tokens, args[0]['messages'], err_str)
                    raise TokenLimitError(n_tokens, MAX_GPT_MODEL_TOKENS)
-                if "rate_limit_exceeded" in err_str:
+                if "rate_limit_exceeded" in err_str or "rate_limit_error" in err_str:
+                    # Retry the attempt if the current account's tier reaches the API limits
+                    rate_limit_exceeded_sleep(e, err_str)
+                    continue
+                if "overloaded_error" in err_str:
+                    # Retry the attempt if the Anthropic servers are overloaded
                    rate_limit_exceeded_sleep(e, err_str)
                    continue

@@ -601,7 +606,6 @@ def load_data_to_json(string):
    return json.loads(fix_json(string))


-
 def stream_anthropic(messages, function_call_message, gpt_data, model_name = "claude-3-sonnet-20240229"):
    try:
        import anthropic