Files
AutoGPT/autogpt_platform/backend/backend/copilot/tools/run_block.py
Zamil Majdy a880d73481 feat(platform): dry-run execution mode with LLM block simulation (#12483)
## Why

Agent generation and building needs a way to test-run agents without
requiring real credentials or producing side effects. Currently, every
execution hits real APIs, consumes credits, and requires valid
credentials — making it impossible to debug or validate agent graphs
during the build phase without real consequences.

## Summary

Adds a `dry_run` execution mode to the copilot's `run_block` and
`run_agent` tools. When `dry_run=True`, every block execution is
simulated by an LLM instead of calling the real service — no real API
calls, no credentials consumed, no side effects.

Inspired by
[Significant-Gravitas/agent-simulator](https://github.com/Significant-Gravitas/agent-simulator).

### How it works

- **`backend/executor/simulator.py`** (new): `simulate_block()` builds a
prompt from the block's name, description, input/output schemas, and
actual input values, then calls `gpt-4o-mini` via the existing
OpenRouter client with JSON mode. Retries up to 5 times on JSON parse
failures. Missing output pins are filled with `None` (or `""` for the
`error` pin). Long inputs (>20k chars) are truncated before sending to
the LLM.
- **`ExecutionContext`**: Added `dry_run: bool = False` field; threaded
through `add_graph_execution()` so graph-level dry runs propagate to
every block execution.
- **`execute_block()` helper**: When `dry_run=True`, the function
short-circuits before any credential injection or credit checks, calls
`simulate_block()`, and returns a `[DRY RUN]`-prefixed
`BlockOutputResponse`.
- **`RunBlockTool`**: New `dry_run` boolean parameter.
- **`RunAgentTool`**: New `dry_run` boolean parameter; passes
`ExecutionContext(dry_run=True)` to graph execution.

### Tests

11 tests in `backend/copilot/tools/test_dry_run.py`:
- Correct output tuples from LLM response
- JSON retry logic (3 total calls when first 2 fail)
- All-retries-exhausted yields `SIMULATOR ERROR`
- Missing output pins filled with `None`/`""`
- No-client case
- Input truncation at 20k chars
- `execute_block(dry_run=True)` skips real `block.execute()`
- Response format: `[DRY RUN]` message, `success=True`
- `dry_run=False` unchanged (real path)
- `RunBlockTool` parameter presence
- `dry_run` kwarg forwarding

## Test plan
- [x] Run `pytest backend/copilot/tools/test_dry_run.py -v` — all 11
pass
- [x] Call `run_block` with `dry_run=true` in copilot; verify no real
API calls occur and output contains `[DRY RUN]`
- [x] Call `run_agent` with `dry_run=true`; verify execution is created
with `dry_run=True` in context
- [x] E2E: Simulate button (flask icon) present in builder alongside
play button
- [x] E2E: Simulated run labeled with "(Simulated)" suffix and badge in
Library
- [x] E2E: No credits consumed during dry-run
2026-03-24 22:36:47 +00:00

215 lines
7.5 KiB
Python

"""Tool for executing blocks directly."""
import logging
import uuid
from typing import Any
from backend.copilot.constants import COPILOT_NODE_EXEC_ID_SEPARATOR
from backend.copilot.context import get_current_permissions
from backend.copilot.model import ChatSession
from .base import BaseTool
from .helpers import (
BlockPreparation,
check_hitl_review,
execute_block,
prepare_block_for_execution,
)
from .models import BlockDetails, BlockDetailsResponse, ErrorResponse, ToolResponseBase
logger = logging.getLogger(__name__)
class RunBlockTool(BaseTool):
"""Tool for executing a block and returning its outputs."""
@property
def name(self) -> str:
return "run_block"
@property
def description(self) -> str:
return (
"Execute a block. IMPORTANT: Always get block_id from find_block first "
"— do NOT guess or fabricate IDs. "
"Call with empty input_data to see schema, then with data to execute. "
"If review_required, use continue_run_block."
)
@property
def parameters(self) -> dict[str, Any]:
return {
"type": "object",
"properties": {
"block_id": {
"type": "string",
"description": "Block ID from find_block results.",
},
"input_data": {
"type": "object",
"description": "Input values. Use {} first to see schema.",
},
"dry_run": {
"type": "boolean",
"description": (
"When true, simulates block execution using an LLM without making any "
"real API calls or producing side effects. Useful for testing agent "
"wiring and previewing outputs. Default: false."
),
},
},
"required": ["block_id", "input_data"],
}
@property
def requires_auth(self) -> bool:
return True
async def _execute(
self,
user_id: str | None,
session: ChatSession,
**kwargs,
) -> ToolResponseBase:
"""Execute a block with the given input data.
Args:
user_id: User ID (required)
session: Chat session
block_id: Block UUID to execute
input_data: Input values for the block
Returns:
BlockOutputResponse: Block execution outputs
SetupRequirementsResponse: Missing credentials
ErrorResponse: Error message
"""
block_id = kwargs.get("block_id", "").strip()
input_data = kwargs.get("input_data", {})
dry_run = bool(kwargs.get("dry_run", False))
session_id = session.session_id
if not block_id:
return ErrorResponse(
message="Please provide a block_id",
session_id=session_id,
)
if not isinstance(input_data, dict):
return ErrorResponse(
message="input_data must be an object",
session_id=session_id,
)
if not user_id:
return ErrorResponse(
message="Authentication required",
session_id=session_id,
)
logger.info("Preparing block %s for user %s", block_id, user_id)
prep_or_err = await prepare_block_for_execution(
block_id=block_id,
input_data=input_data,
user_id=user_id,
session=session,
session_id=session_id,
dry_run=dry_run,
)
if isinstance(prep_or_err, ToolResponseBase):
return prep_or_err
prep: BlockPreparation = prep_or_err
# Check block-level permissions before execution.
perms = get_current_permissions()
if perms is not None and not perms.is_block_allowed(block_id, prep.block.name):
available_hint = (
f"Allowed identifiers: {perms.blocks!r}. "
if not perms.blocks_exclude and perms.blocks
else (
f"Blocked identifiers: {perms.blocks!r}. "
if perms.blocks_exclude and perms.blocks
else ""
)
)
return ErrorResponse(
message=(
f"Block '{prep.block.name}' ({block_id}) is not permitted "
f"by the current execution permissions. {available_hint}"
"Use find_block to discover blocks that are allowed."
),
session_id=session_id,
)
# Dry-run fast-path: skip credential/HITL checks — simulation never calls
# the real service so credentials and review gates are not needed.
# Input field validation (unrecognized fields) is already handled by
# prepare_block_for_execution above.
if dry_run:
synthetic_node_exec_id = (
f"{prep.synthetic_node_id}"
f"{COPILOT_NODE_EXEC_ID_SEPARATOR}"
f"{uuid.uuid4().hex[:8]}"
)
return await execute_block(
block=prep.block,
block_id=block_id,
input_data=prep.input_data,
user_id=user_id,
session_id=session_id,
node_exec_id=synthetic_node_exec_id,
matched_credentials=prep.matched_credentials,
dry_run=True,
)
# Show block details when required inputs are not yet provided.
# This is run_block's two-step UX: first call returns the schema,
# second call (with inputs) actually executes.
if not (prep.required_non_credential_keys <= prep.provided_input_keys):
try:
output_schema: dict[str, Any] = prep.block.output_schema.jsonschema()
except Exception as e:
logger.warning(
"Failed to generate output schema for block %s: %s", block_id, e
)
return ErrorResponse(
message=f"Block '{prep.block.name}' has an invalid output schema",
error=str(e),
session_id=session_id,
)
credentials_meta = list(prep.matched_credentials.values())
return BlockDetailsResponse(
message=(
f"Block '{prep.block.name}' details. "
"Provide input_data matching the inputs schema to execute the block."
),
session_id=session_id,
block=BlockDetails(
id=block_id,
name=prep.block.name,
description=prep.block.description or "",
inputs=prep.input_schema,
outputs=output_schema,
credentials=credentials_meta,
),
user_authenticated=True,
)
hitl_or_err = await check_hitl_review(prep, user_id, session_id)
if isinstance(hitl_or_err, ToolResponseBase):
return hitl_or_err
synthetic_node_exec_id, input_data = hitl_or_err
return await execute_block(
block=prep.block,
block_id=block_id,
input_data=input_data,
user_id=user_id,
session_id=session_id,
node_exec_id=synthetic_node_exec_id,
matched_credentials=prep.matched_credentials,
dry_run=dry_run,
)