fix(copilot): prevent dry-run metadata from leaking into LLM context

When dry_run=True, the tool output message contained "[DRY RUN] Block 'X' simulated successfully — no real execution occurred." This text was fed directly into the LLM conversation context, causing the autopilot to realize it was in simulation mode and change its evaluation behavior — defeating the purpose of dry-run testing. Changes: - Remove [DRY RUN] prefix from execute_block() dry-run message — now identical to real execution ("Block 'X' executed successfully") - Shorten dry_run tool schema descriptions to "Execute in preview mode" instead of detailed simulation language that steered LLM behavior - Update tests to assert dry-run output matches real execution format The is_dry_run field is preserved in the response model so the frontend can still show simulation badges via the SSE tool output event. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 03:00:28 -04:00 · 2026-03-27 08:41:49 +00:00
parent 4a581ef3d7
commit 5231276ce9
5 changed files with 7 additions and 17 deletions
--- a/autogpt_platform/backend/backend/copilot/tools/helpers.py
+++ b/autogpt_platform/backend/backend/copilot/tools/helpers.py
@@ -115,10 +115,7 @@ async def execute_block(
                    session_id=session_id,
                )
            return BlockOutputResponse(
-                message=(
-                    f"[DRY RUN] Block '{block.name}' simulated successfully "
-                    "— no real execution occurred."
-                ),
+                message=f"Block '{block.name}' executed successfully",
                block_id=block_id,
                block_name=block.name,
                outputs=dict(outputs),
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -153,11 +153,7 @@ class RunAgentTool(BaseTool):
                },
                "dry_run": {
                    "type": "boolean",
-                    "description": (
-                        "When true, simulates the entire agent execution using an LLM "
-                        "for each block — no real API calls, no credentials needed, "
-                        "no credits charged. Useful for testing agent wiring end-to-end."
-                    ),
+                    "description": "Execute in preview mode.",
                },
            },
            "required": ["dry_run"],
--- a/autogpt_platform/backend/backend/copilot/tools/run_block.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_block.py
@@ -51,11 +51,7 @@ class RunBlockTool(BaseTool):
                },
                "dry_run": {
                    "type": "boolean",
-                    "description": (
-                        "When true, simulates block execution using an LLM without making any "
-                        "real API calls or producing side effects. Useful for testing agent "
-                        "wiring and previewing outputs."
-                    ),
+                    "description": "Execute in preview mode.",
                },
            },
            "required": ["block_id", "input_data", "dry_run"],
--- a/autogpt_platform/backend/backend/copilot/tools/test_dry_run.py
+++ b/autogpt_platform/backend/backend/copilot/tools/test_dry_run.py
@@ -238,7 +238,7 @@ async def test_execute_block_dry_run_skips_real_execution():

@pytest.mark.asyncio
 async def test_execute_block_dry_run_response_format():
-    """Dry-run response should contain [DRY RUN] in message and success=True."""
+    """Dry-run response should match real execution message format and have success=True."""
    mock_block = make_mock_block()

    async def fake_simulate(block, input_data):
@@ -259,7 +259,8 @@ async def test_execute_block_dry_run_response_format():
        )

    assert isinstance(response, BlockOutputResponse)
-    assert "[DRY RUN]" in response.message
+    assert "executed successfully" in response.message
+    assert "[DRY RUN]" not in response.message  # must not leak to LLM context
    assert response.success is True
    assert response.outputs == {"result": ["simulated"]}

--- a/autogpt_platform/backend/backend/copilot/tools/test_session_dry_run.py
+++ b/autogpt_platform/backend/backend/copilot/tools/test_session_dry_run.py
@@ -91,7 +91,7 @@ class TestRunBlockToolSessionDryRun:

            # Set up execute_block to return a success
            mock_exec.return_value = MagicMock(
-                message="[DRY RUN] Block executed",
+                message="Block 'TestBlock' executed successfully",
                success=True,
            )