Merge branch 'development' into dynamic-tasks

2026-01-09 21:27:53 -05:00 · 2024-04-06 10:29:36 -07:00
parent 1f9c590afb 8c3f648b78
commit 9ceca0186b
8 changed files with 39 additions and 24 deletions
--- a/README.md
+++ b/README.md
@@ -214,7 +214,7 @@ Here are the steps GPT Pilot takes to create an app:
 - **Works at scale** - GPT Pilot isn't meant to create simple apps but rather so it can work at any scale. It has mechanisms that filter out the code, so in each LLM conversation, it doesn't need to store the entire codebase in context, but it shows the LLM only the relevant code for the current task it's working on. Once an app is finished, you can continue working on it by writing instructions on what feature you want to add.

 # 🍻 Contributing
-If you are interested in contributing to GPT Pilot, I would be more than happy to have you on board and also help you get started. Feel free to ping [zvonimir@pythagora.ai](mailto:zvonimir@pythagora.ai), and I'll help you get started.
+If you are interested in contributing to GPT Pilot, join [our Discord server](https://discord.gg/HaqXugmxr9), check out open [GitHub issues](https://github.com/Pythagora-io/gpt-pilot/issues), and see if anything interests you. We would be happy to get help in resolving any of those. The best place to start is by reviewing blog posts mentioned above to understand how the architecture works before diving into the codebase.

 ## 🖥 Development
 Other than the research, GPT Pilot needs to be debugged to work in different scenarios. For example, we realized that the quality of the code generated is very sensitive to the size of the development task. When the task is too broad, the code has too many bugs that are hard to fix, but when the development task is too narrow, GPT also seems to struggle in getting the task implemented into the existing code.
--- a/pilot/helpers/AgentConvo.py
+++ b/pilot/helpers/AgentConvo.py
@@ -241,7 +241,13 @@ class AgentConvo:
                if not self.agent.project.check_ipc():
                    print(color_yellow_bold(dev_step_msg), end='')
                logger.info(dev_step_msg)
-            print(f"\n{content}\n", type='local')
+            try:
+                print(f"\n{content}\n", type='local')
+            except Exception:  # noqa
+                # Workaround for Windows encoding crash: https://github.com/Pythagora-io/gpt-pilot/issues/509
+                safe_content = content.encode('ascii', 'ignore').decode('ascii')
+                print(f"\n{safe_content}\n", type='local')
+
        logger.info(f"{print_msg}: {content}\n")

    def to_context_prompt(self):
--- a/pilot/helpers/agents/CodeMonkey.py
+++ b/pilot/helpers/agents/CodeMonkey.py
@@ -364,8 +364,8 @@ class CodeMonkey(Agent):
        """
        diff = "\n".join(
            [
-                "--- " + file_name,
-                "+++ " + file_name,
+                f"--- {file_name}",
+                f"+++ {file_name}",
            ] + hunks
        ) + "\n"
        try:
--- a/pilot/helpers/agents/Developer.py
+++ b/pilot/helpers/agents/Developer.py
@@ -689,7 +689,7 @@ class Developer(Agent):
        should_review = bool(self.modified_files)
        while True:
            self.user_feedback = llm_solutions[-1]['user_feedback'] if len(llm_solutions) > 0 else None
-            review_successful = self.project.skip_steps or not should_review or (should_review and self.review_task())
+            review_successful = self.project.skip_steps or not should_review or (should_review and self.review_task(llm_solutions))
            if not review_successful and self.review_count < 3:
                continue
            iteration_count += 1
@@ -849,24 +849,29 @@ class Developer(Agent):

        return user_feedback, questions_and_answers

-    def review_task(self):
+    def review_task(self, llm_solutions):
        """
        Review all task changes and refactor big files.
+
+        :param llm_solutions: List of all user feedbacks and LLM solutions (to those feedbacks) for current task.
+
        :return: bool - True if the task changes passed review, False if not
        """
        print('Starting review of all changes made in this task...', type='verbose', category='agent:reviewer')
        self.review_count += 1
-        review_result = self.review_code_changes()
+        review_result = self.review_code_changes(llm_solutions)
        refactoring_done = self.refactor_code()
        if refactoring_done or review_result['implementation_needed']:
-            review_result = self.review_code_changes()
+            review_result = self.review_code_changes(llm_solutions)

        return review_result['success']

-    def review_code_changes(self):
+    def review_code_changes(self, llm_solutions):
        """
        Review all the code changes during current task.

+        :param llm_solutions: List of all user feedbacks and LLM solutions (to those feedbacks) for current task.
+
        :return: dict - {
            'success': bool,
            'implementation_needed': bool
@@ -875,11 +880,13 @@ class Developer(Agent):
        review_convo = AgentConvo(self)
        files = [
            file_dict for file_dict in self.project.get_all_coded_files()
-            if any(file_dict['full_path'].endswith(modified_file.lstrip('.')) for modified_file in self.modified_files)
+            if any(os.path.normpath(file_dict['full_path']).endswith(os.path.normpath(modified_file.lstrip('.'))) for
+                   modified_file in self.modified_files)
        ]
        files_at_start_of_task = [
            file_dict for file_dict in self.files_at_start_of_task
-            if any(file_dict['full_path'].endswith(modified_file.lstrip('.')) for modified_file in self.modified_files)
+            if any(os.path.normpath(file_dict['full_path']).endswith(os.path.normpath(modified_file.lstrip('.'))) for
+                   modified_file in self.modified_files)
        ]
        # TODO instead of sending files before and after maybe add nice way to show diff for multiple files
        review = review_convo.send_message('development/review_task.prompt', {
@@ -888,7 +895,7 @@ class Developer(Agent):
            "tasks": self.project.development_plan,
            "current_task": self.project.current_task.data.get('task_description'),
            "files": files,
-            "user_input": self.user_feedback,
+            "all_feedbacks": [solution["user_feedback"].replace("```", "") for solution in llm_solutions],
            "modified_files": self.modified_files,
            "files_at_start_of_task": files_at_start_of_task,
            "previous_features": self.project.previous_features,
--- a/pilot/prompts/dev_ops/debug.prompt
+++ b/pilot/prompts/dev_ops/debug.prompt
@@ -14,7 +14,7 @@ I want you to create a list of steps that are needed to debug this issue.
 Each step can be either:

 * `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder)
-* `code_change` -  step will change the code and you need to thoroughly describe what needs to be implemented. I will implement the requested changes and let you know.
+* `save_file` - step will create or update a file, and you need to thoroughly describe what needs to be implemented. I will implement the requested changes and let you know.
 * `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do. NEVER use `human_intervention` for testing, as testing will be done separately by a dedicated QA after all the steps are done.

 {{ execution_order }}
@@ -22,5 +22,3 @@ Each step can be either:
 Also, make sure that at least the last step has `check_if_fixed` set to TRUE.

 {{ file_size_limit }}
-
-{# After this, you need to decide what to do next. You can rerun the command `{{ command }}` to check if the problem is fixed or run another command with `run_command` or change more code with `implement_code_changes`. #}
--- a/pilot/prompts/development/review_task.prompt
+++ b/pilot/prompts/development/review_task.prompt
@@ -15,12 +15,13 @@ You are currently working on task "{{ current_task }}" and you have to focus onl
 A part of the app is already finished.
 {{ custom_files_list }}

-{% if user_input -%}While working on this task, your colleague who is testing the app "{{ name }}" sent you some additional info. Here it is:
-```
-{{ user_input }}
+{% if all_feedbacks -%}While working on this task, your colleague who is testing the app "{{ name }}" sent you some additional information on what doesn't work as intended or what should be added. Here are all the inputs he sent you:
+```{% for feedback in all_feedbacks %}
+{{ loop.index }}. {{ feedback }}
+{% endfor %}
 ```

-After this info, you tried to fix it. {% endif %}Files that were modified during implementation of the task are:
+After you got each of these additional inputs, you tried to fix it as part of this task. {% endif %}Files that were modified during implementation of the task are:
 ```
 {{ modified_files }}
 ```
@@ -33,8 +34,7 @@ Now I will show you how those files looked before this task implementation start
 ```
 {{ file.content }}
 ```
-{% endfor %}
-{% endif -%}---end_of_files_at_start_of_task---
+{% endfor %}{% endif -%}---end_of_files_at_start_of_task---

 **IMPORTANT**
 You have to review this task implementation. You are known to be very strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions.
--- a/pilot/utils/llm_connection.py
+++ b/pilot/utils/llm_connection.py
@@ -264,7 +264,12 @@ def retry_on_exception(func):
                    print(color_red(f"Error calling LLM API: The request exceeded the maximum token limit (request size: {n_tokens}) tokens."))
                    trace_token_limit_error(n_tokens, args[0]['messages'], err_str)
                    raise TokenLimitError(n_tokens, MAX_GPT_MODEL_TOKENS)
-                if "rate_limit_exceeded" in err_str:
+                if "rate_limit_exceeded" in err_str or "rate_limit_error" in err_str:
+                    # Retry the attempt if the current account's tier reaches the API limits
+                    rate_limit_exceeded_sleep(e, err_str)
+                    continue
+                if "overloaded_error" in err_str:
+                    # Retry the attempt if the Anthropic servers are overloaded
                    rate_limit_exceeded_sleep(e, err_str)
                    continue

@@ -601,7 +606,6 @@ def load_data_to_json(string):
    return json.loads(fix_json(string))


-
 def stream_anthropic(messages, function_call_message, gpt_data, model_name = "claude-3-sonnet-20240229"):
    try:
        import anthropic
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup

-VERSION = "0.1.9"
+VERSION = "0.1.10"

 requirements = open("requirements.txt").readlines()