mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
## Why Agent generation and building needs a way to test-run agents without requiring real credentials or producing side effects. Currently, every execution hits real APIs, consumes credits, and requires valid credentials — making it impossible to debug or validate agent graphs during the build phase without real consequences. ## Summary Adds a `dry_run` execution mode to the copilot's `run_block` and `run_agent` tools. When `dry_run=True`, every block execution is simulated by an LLM instead of calling the real service — no real API calls, no credentials consumed, no side effects. Inspired by [Significant-Gravitas/agent-simulator](https://github.com/Significant-Gravitas/agent-simulator). ### How it works - **`backend/executor/simulator.py`** (new): `simulate_block()` builds a prompt from the block's name, description, input/output schemas, and actual input values, then calls `gpt-4o-mini` via the existing OpenRouter client with JSON mode. Retries up to 5 times on JSON parse failures. Missing output pins are filled with `None` (or `""` for the `error` pin). Long inputs (>20k chars) are truncated before sending to the LLM. - **`ExecutionContext`**: Added `dry_run: bool = False` field; threaded through `add_graph_execution()` so graph-level dry runs propagate to every block execution. - **`execute_block()` helper**: When `dry_run=True`, the function short-circuits before any credential injection or credit checks, calls `simulate_block()`, and returns a `[DRY RUN]`-prefixed `BlockOutputResponse`. - **`RunBlockTool`**: New `dry_run` boolean parameter. - **`RunAgentTool`**: New `dry_run` boolean parameter; passes `ExecutionContext(dry_run=True)` to graph execution. ### Tests 11 tests in `backend/copilot/tools/test_dry_run.py`: - Correct output tuples from LLM response - JSON retry logic (3 total calls when first 2 fail) - All-retries-exhausted yields `SIMULATOR ERROR` - Missing output pins filled with `None`/`""` - No-client case - Input truncation at 20k chars - `execute_block(dry_run=True)` skips real `block.execute()` - Response format: `[DRY RUN]` message, `success=True` - `dry_run=False` unchanged (real path) - `RunBlockTool` parameter presence - `dry_run` kwarg forwarding ## Test plan - [x] Run `pytest backend/copilot/tools/test_dry_run.py -v` — all 11 pass - [x] Call `run_block` with `dry_run=true` in copilot; verify no real API calls occur and output contains `[DRY RUN]` - [x] Call `run_agent` with `dry_run=true`; verify execution is created with `dry_run=True` in context - [x] E2E: Simulate button (flask icon) present in builder alongside play button - [x] E2E: Simulated run labeled with "(Simulated)" suffix and badge in Library - [x] E2E: No credits consumed during dry-run
215 lines
7.5 KiB
Python
215 lines
7.5 KiB
Python
"""Tool for executing blocks directly."""
|
|
|
|
import logging
|
|
import uuid
|
|
from typing import Any
|
|
|
|
from backend.copilot.constants import COPILOT_NODE_EXEC_ID_SEPARATOR
|
|
from backend.copilot.context import get_current_permissions
|
|
from backend.copilot.model import ChatSession
|
|
|
|
from .base import BaseTool
|
|
from .helpers import (
|
|
BlockPreparation,
|
|
check_hitl_review,
|
|
execute_block,
|
|
prepare_block_for_execution,
|
|
)
|
|
from .models import BlockDetails, BlockDetailsResponse, ErrorResponse, ToolResponseBase
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RunBlockTool(BaseTool):
|
|
"""Tool for executing a block and returning its outputs."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "run_block"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return (
|
|
"Execute a block. IMPORTANT: Always get block_id from find_block first "
|
|
"— do NOT guess or fabricate IDs. "
|
|
"Call with empty input_data to see schema, then with data to execute. "
|
|
"If review_required, use continue_run_block."
|
|
)
|
|
|
|
@property
|
|
def parameters(self) -> dict[str, Any]:
|
|
return {
|
|
"type": "object",
|
|
"properties": {
|
|
"block_id": {
|
|
"type": "string",
|
|
"description": "Block ID from find_block results.",
|
|
},
|
|
"input_data": {
|
|
"type": "object",
|
|
"description": "Input values. Use {} first to see schema.",
|
|
},
|
|
"dry_run": {
|
|
"type": "boolean",
|
|
"description": (
|
|
"When true, simulates block execution using an LLM without making any "
|
|
"real API calls or producing side effects. Useful for testing agent "
|
|
"wiring and previewing outputs. Default: false."
|
|
),
|
|
},
|
|
},
|
|
"required": ["block_id", "input_data"],
|
|
}
|
|
|
|
@property
|
|
def requires_auth(self) -> bool:
|
|
return True
|
|
|
|
async def _execute(
|
|
self,
|
|
user_id: str | None,
|
|
session: ChatSession,
|
|
**kwargs,
|
|
) -> ToolResponseBase:
|
|
"""Execute a block with the given input data.
|
|
|
|
Args:
|
|
user_id: User ID (required)
|
|
session: Chat session
|
|
block_id: Block UUID to execute
|
|
input_data: Input values for the block
|
|
|
|
Returns:
|
|
BlockOutputResponse: Block execution outputs
|
|
SetupRequirementsResponse: Missing credentials
|
|
ErrorResponse: Error message
|
|
"""
|
|
block_id = kwargs.get("block_id", "").strip()
|
|
input_data = kwargs.get("input_data", {})
|
|
dry_run = bool(kwargs.get("dry_run", False))
|
|
session_id = session.session_id
|
|
|
|
if not block_id:
|
|
return ErrorResponse(
|
|
message="Please provide a block_id",
|
|
session_id=session_id,
|
|
)
|
|
|
|
if not isinstance(input_data, dict):
|
|
return ErrorResponse(
|
|
message="input_data must be an object",
|
|
session_id=session_id,
|
|
)
|
|
|
|
if not user_id:
|
|
return ErrorResponse(
|
|
message="Authentication required",
|
|
session_id=session_id,
|
|
)
|
|
|
|
logger.info("Preparing block %s for user %s", block_id, user_id)
|
|
|
|
prep_or_err = await prepare_block_for_execution(
|
|
block_id=block_id,
|
|
input_data=input_data,
|
|
user_id=user_id,
|
|
session=session,
|
|
session_id=session_id,
|
|
dry_run=dry_run,
|
|
)
|
|
if isinstance(prep_or_err, ToolResponseBase):
|
|
return prep_or_err
|
|
prep: BlockPreparation = prep_or_err
|
|
|
|
# Check block-level permissions before execution.
|
|
perms = get_current_permissions()
|
|
if perms is not None and not perms.is_block_allowed(block_id, prep.block.name):
|
|
available_hint = (
|
|
f"Allowed identifiers: {perms.blocks!r}. "
|
|
if not perms.blocks_exclude and perms.blocks
|
|
else (
|
|
f"Blocked identifiers: {perms.blocks!r}. "
|
|
if perms.blocks_exclude and perms.blocks
|
|
else ""
|
|
)
|
|
)
|
|
return ErrorResponse(
|
|
message=(
|
|
f"Block '{prep.block.name}' ({block_id}) is not permitted "
|
|
f"by the current execution permissions. {available_hint}"
|
|
"Use find_block to discover blocks that are allowed."
|
|
),
|
|
session_id=session_id,
|
|
)
|
|
|
|
# Dry-run fast-path: skip credential/HITL checks — simulation never calls
|
|
# the real service so credentials and review gates are not needed.
|
|
# Input field validation (unrecognized fields) is already handled by
|
|
# prepare_block_for_execution above.
|
|
if dry_run:
|
|
synthetic_node_exec_id = (
|
|
f"{prep.synthetic_node_id}"
|
|
f"{COPILOT_NODE_EXEC_ID_SEPARATOR}"
|
|
f"{uuid.uuid4().hex[:8]}"
|
|
)
|
|
return await execute_block(
|
|
block=prep.block,
|
|
block_id=block_id,
|
|
input_data=prep.input_data,
|
|
user_id=user_id,
|
|
session_id=session_id,
|
|
node_exec_id=synthetic_node_exec_id,
|
|
matched_credentials=prep.matched_credentials,
|
|
dry_run=True,
|
|
)
|
|
|
|
# Show block details when required inputs are not yet provided.
|
|
# This is run_block's two-step UX: first call returns the schema,
|
|
# second call (with inputs) actually executes.
|
|
if not (prep.required_non_credential_keys <= prep.provided_input_keys):
|
|
try:
|
|
output_schema: dict[str, Any] = prep.block.output_schema.jsonschema()
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Failed to generate output schema for block %s: %s", block_id, e
|
|
)
|
|
return ErrorResponse(
|
|
message=f"Block '{prep.block.name}' has an invalid output schema",
|
|
error=str(e),
|
|
session_id=session_id,
|
|
)
|
|
|
|
credentials_meta = list(prep.matched_credentials.values())
|
|
return BlockDetailsResponse(
|
|
message=(
|
|
f"Block '{prep.block.name}' details. "
|
|
"Provide input_data matching the inputs schema to execute the block."
|
|
),
|
|
session_id=session_id,
|
|
block=BlockDetails(
|
|
id=block_id,
|
|
name=prep.block.name,
|
|
description=prep.block.description or "",
|
|
inputs=prep.input_schema,
|
|
outputs=output_schema,
|
|
credentials=credentials_meta,
|
|
),
|
|
user_authenticated=True,
|
|
)
|
|
|
|
hitl_or_err = await check_hitl_review(prep, user_id, session_id)
|
|
if isinstance(hitl_or_err, ToolResponseBase):
|
|
return hitl_or_err
|
|
synthetic_node_exec_id, input_data = hitl_or_err
|
|
|
|
return await execute_block(
|
|
block=prep.block,
|
|
block_id=block_id,
|
|
input_data=input_data,
|
|
user_id=user_id,
|
|
session_id=session_id,
|
|
node_exec_id=synthetic_node_exec_id,
|
|
matched_credentials=prep.matched_credentials,
|
|
dry_run=dry_run,
|
|
)
|