fix(platform): remove hardcoded 3-iteration cap from dry-run loop

Instead of capping at 3 iterations, let the copilot repeat the dry-run -> fix cycle until the simulation passes or the problems are clearly unfixable. This gives the copilot flexibility to keep going if it's making progress, or stop early if issues are not resolvable.
2026-04-08 03:00:28 -04:00 · 2026-03-27 11:00:27 +07:00
parent 50b635da6d
commit c778cc9849
5 changed files with 20 additions and 16 deletions
--- a/autogpt_platform/backend/backend/copilot/dry_run_loop_test.py
+++ b/autogpt_platform/backend/backend/copilot/dry_run_loop_test.py
@@ -31,8 +31,8 @@ class TestSystemPromptDryRunLoop:
    def test_system_prompt_references_tool_notes(self):
        assert "tool notes" in DEFAULT_SYSTEM_PROMPT.lower()

-    def test_system_prompt_mentions_iterations(self):
-        assert "3 iteration" in DEFAULT_SYSTEM_PROMPT.lower()
+    def test_system_prompt_mentions_repeat_until_pass(self):
+        assert "repeat until" in DEFAULT_SYSTEM_PROMPT.lower()


 class TestToolDescriptionsDryRunLoop:
@@ -66,7 +66,7 @@ class TestToolDescriptionsDryRunLoop:
        dry_run_desc = params["properties"]["dry_run"]["description"]
        assert "create_agent" in dry_run_desc or "edit_agent" in dry_run_desc
        assert "wait_for_result" in dry_run_desc
-        assert "3 iterations" in dry_run_desc
+        assert "repeat" in dry_run_desc.lower()

    def test_get_agent_building_guide_mentions_workflow(self):
        tool = TOOL_REGISTRY["get_agent_building_guide"]
@@ -97,8 +97,8 @@ class TestPromptingSupplementDryRunLoop:
        assert "null / empty outputs" in notes_lower
        assert "nodes that never executed" in notes_lower

-    def test_shared_tool_notes_include_max_iterations(self):
-        assert "3 iterations" in _SHARED_TOOL_NOTES
+    def test_shared_tool_notes_include_repeat_until_pass(self):
+        assert "repeat until" in _SHARED_TOOL_NOTES.lower()

    def test_sdk_supplement_includes_dry_run_section(self):
        supplement = get_sdk_supplement(use_e2b=False, cwd="/tmp/test")
@@ -126,8 +126,9 @@ class TestAgentBuildingGuideDryRunLoop:
        assert "**Good output**" in guide_content
        assert "**Bad output**" in guide_content

-    def test_guide_mentions_max_iterations(self, guide_content):
-        assert "**3 iterations**" in guide_content
+    def test_guide_mentions_repeat_until_pass(self, guide_content):
+        assert "repeat" in guide_content.lower()
+        assert "clearly unfixable" in guide_content.lower()

    def test_guide_mentions_wait_for_result(self, guide_content):
        assert "wait_for_result=120" in guide_content
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -132,8 +132,9 @@ to the user as ready:
     with wrong content. Check type compatibility between linked ports.
 3. If any issues are found, fix the agent JSON and call `edit_agent`, then
   dry-run again.
-4. Repeat up to 3 iterations. If issues persist, report the outstanding
-   problems to the user and ask for guidance.
+4. Repeat until the simulation passes or the problems are clearly unfixable.
+   If you stop making progress, report the outstanding problems to the user
+   and ask for guidance.

 This loop ensures the agent actually works before the user invests real
 credentials and API credits in a live run.
--- a/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md
+++ b/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md
@@ -41,7 +41,8 @@ or trigger. If so:
 8. **Dry-run**: ALWAYS call `run_agent` with `dry_run=True` and
   `wait_for_result=120` to verify the agent works end-to-end.
 9. **Inspect & fix**: Check the dry-run output for errors. If issues are
-   found, call `edit_agent` to fix and dry-run again (max 3 iterations).
+   found, call `edit_agent` to fix and dry-run again. Repeat until the
+   simulation passes or the problems are clearly unfixable.
   See "REQUIRED: Dry-Run Verification Loop" section below for details.

 ### Agent JSON Structure
@@ -264,9 +265,9 @@ user the agent is ready. NEVER skip this step.
     structure. Check type compatibility between linked ports.
 4. **Fix**: If any issues are found, call `edit_agent` with the corrected
   agent JSON, then go back to step 2.
-5. **Max iterations**: Repeat the dry-run -> fix cycle up to **3 iterations**.
-   If problems persist after 3 attempts, report the remaining issues to
-   the user and ask for guidance rather than looping indefinitely.
+5. **Repeat**: Continue the dry-run -> fix cycle until the simulation passes
+   or the problems are clearly unfixable. If you stop making progress,
+   report the remaining issues to the user and ask for guidance.

 #### Good vs bad dry-run output

--- a/autogpt_platform/backend/backend/copilot/service.py
+++ b/autogpt_platform/backend/backend/copilot/service.py
@@ -63,7 +63,7 @@ Your goal is to help users automate tasks by:
 - Building and running working automations
 - Delivering tangible value through action, not just explanation

-After creating or editing an agent, ALWAYS dry-run it with `run_agent(dry_run=True, wait_for_result=120)` to verify it works, then fix and re-test (up to 3 iterations). NEVER skip the dry-run step. See tool notes for the full workflow.
+After creating or editing an agent, ALWAYS dry-run it with `run_agent(dry_run=True, wait_for_result=120)` to verify it works. If issues are found, fix them and re-simulate. Repeat until the simulation passes or the problems are clearly unfixable. NEVER skip the dry-run step. See tool notes for the full workflow.

 Be concise, proactive, and action-oriented. Bias toward showing working solutions over lengthy explanations."""

--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -164,8 +164,9 @@ class RunAgentTool(BaseTool):
                        "edit_agent to verify the agent works correctly before telling "
                        "the user it is ready. Set wait_for_result=120 to get the "
                        "output inline. Inspect the results for errors or unexpected "
-                        "values, then fix with edit_agent and re-test if needed (max "
-                        "3 iterations)."
+                        "values, then fix with edit_agent and re-simulate. Repeat "
+                        "until the simulation passes or the problems are clearly "
+                        "unfixable."
                    ),
                },
            },