mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(copilot): prevent dry-run metadata from leaking into LLM context
When dry_run=True, the tool output message contained "[DRY RUN] Block
'X' simulated successfully — no real execution occurred." This text was
fed directly into the LLM conversation context, causing the autopilot to
realize it was in simulation mode and change its evaluation behavior —
defeating the purpose of dry-run testing.
Changes:
- Remove [DRY RUN] prefix from execute_block() dry-run message — now
identical to real execution ("Block 'X' executed successfully")
- Shorten dry_run tool schema descriptions to "Execute in preview mode"
instead of detailed simulation language that steered LLM behavior
- Update tests to assert dry-run output matches real execution format
The is_dry_run field is preserved in the response model so the frontend
can still show simulation badges via the SSE tool output event.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -115,10 +115,7 @@ async def execute_block(
|
||||
session_id=session_id,
|
||||
)
|
||||
return BlockOutputResponse(
|
||||
message=(
|
||||
f"[DRY RUN] Block '{block.name}' simulated successfully "
|
||||
"— no real execution occurred."
|
||||
),
|
||||
message=f"Block '{block.name}' executed successfully",
|
||||
block_id=block_id,
|
||||
block_name=block.name,
|
||||
outputs=dict(outputs),
|
||||
|
||||
@@ -153,11 +153,7 @@ class RunAgentTool(BaseTool):
|
||||
},
|
||||
"dry_run": {
|
||||
"type": "boolean",
|
||||
"description": (
|
||||
"When true, simulates the entire agent execution using an LLM "
|
||||
"for each block — no real API calls, no credentials needed, "
|
||||
"no credits charged. Useful for testing agent wiring end-to-end."
|
||||
),
|
||||
"description": "Execute in preview mode.",
|
||||
},
|
||||
},
|
||||
"required": ["dry_run"],
|
||||
|
||||
@@ -51,11 +51,7 @@ class RunBlockTool(BaseTool):
|
||||
},
|
||||
"dry_run": {
|
||||
"type": "boolean",
|
||||
"description": (
|
||||
"When true, simulates block execution using an LLM without making any "
|
||||
"real API calls or producing side effects. Useful for testing agent "
|
||||
"wiring and previewing outputs."
|
||||
),
|
||||
"description": "Execute in preview mode.",
|
||||
},
|
||||
},
|
||||
"required": ["block_id", "input_data", "dry_run"],
|
||||
|
||||
@@ -238,7 +238,7 @@ async def test_execute_block_dry_run_skips_real_execution():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_block_dry_run_response_format():
|
||||
"""Dry-run response should contain [DRY RUN] in message and success=True."""
|
||||
"""Dry-run response should match real execution message format and have success=True."""
|
||||
mock_block = make_mock_block()
|
||||
|
||||
async def fake_simulate(block, input_data):
|
||||
@@ -259,7 +259,8 @@ async def test_execute_block_dry_run_response_format():
|
||||
)
|
||||
|
||||
assert isinstance(response, BlockOutputResponse)
|
||||
assert "[DRY RUN]" in response.message
|
||||
assert "executed successfully" in response.message
|
||||
assert "[DRY RUN]" not in response.message # must not leak to LLM context
|
||||
assert response.success is True
|
||||
assert response.outputs == {"result": ["simulated"]}
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ class TestRunBlockToolSessionDryRun:
|
||||
|
||||
# Set up execute_block to return a success
|
||||
mock_exec.return_value = MagicMock(
|
||||
message="[DRY RUN] Block executed",
|
||||
message="Block 'TestBlock' executed successfully",
|
||||
success=True,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user