fix(backend/copilot): address round-5 review comments

- Add "do NOT redirect to the Builder for credential setup" guardrail to run_agent description, making it symmetric with create_agent/edit_agent - Scrub error message text from race-path warning logs; log only node IDs and field names to avoid leaking credential IDs/provider details - Add code comment explaining the None-vs-filtering trade-off in _build_setup_requirements_from_validation_error - Add E2E tests for structural-error fallback on both run and schedule paths (verifies ErrorResponse returned, not setup_requirements card)
2026-04-30 03:00:41 -04:00 · 2026-04-13 03:54:31 +00:00
parent c4d8293fad
commit 4ea5cd5f7f
2 changed files with 112 additions and 6 deletions
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -108,7 +108,8 @@ class RunAgentTool(BaseTool):
    def description(self) -> str:
        return (
            "Run or schedule an agent. Automatically checks inputs and credentials "
-            "and surfaces the inline credentials-setup card if anything is missing. "
+            "and surfaces the inline credentials-setup card if anything is missing — "
+            "do NOT redirect to the Builder for credential setup. "
            "Identify by username_agent_slug ('user/agent') or library_agent_id. "
            "For scheduling, provide schedule_name + cron."
        )
@@ -396,10 +397,17 @@ class RunAgentTool(BaseTool):
        ):
            return None

-        # Show all credential fields as missing — in the race case the
+        # Show ALL credential fields as missing — in the race case the
        # previously-matched credentials have since become invalid, so
        # the user needs to reconnect all of them.  Passing ``None``
        # means no field is treated as "already connected".
+        #
+        # Trade-off: we could narrow to only the failing nodes in
+        # ``error.node_errors``, but we cannot trust the old credential
+        # mapping (those creds were valid at prereq time but are now
+        # gone/invalid), so showing all is safer than showing a partial
+        # list that might still contain broken entries.  The user sees
+        # every account that may need attention in a single card.
        credentials_dict = build_missing_credentials_from_graph(graph, None)
        return SetupRequirementsResponse(
            message=(
@@ -578,12 +586,15 @@ class RunAgentTool(BaseTool):
            # Reaching here means ``_check_prerequisites`` passed but the
            # executor's validator re-raised milliseconds later — surface
            # the race/drift so oncall can monitor how often this fires.
+            # Log only node IDs and field names — omit the error message
+            # text, which may contain credential IDs or provider details
+            # from the credential store (e.g. CredentialNotFoundError).
            logger.warning(
                "Race: GraphValidationError from add_graph_execution after "
-                "prereq check passed (user_id=%s graph_id=%s node_errors=%s)",
+                "prereq check passed (user_id=%s graph_id=%s failing_fields=%s)",
                user_id,
                graph.id,
-                dict(e.node_errors),
+                {node_id: list(fields) for node_id, fields in e.node_errors.items()},
            )
            creds_setup = self._build_setup_requirements_from_validation_error(
                graph=graph,
@@ -783,12 +794,15 @@ class RunAgentTool(BaseTool):
            # Reaching here means ``_check_prerequisites`` passed but the
            # scheduler's re-validation raised — surface the race/drift so
            # oncall can monitor how often this fires.
+            # Log only node IDs and field names — omit the error message
+            # text, which may contain credential IDs or provider details
+            # from the credential store (e.g. CredentialNotFoundError).
            logger.warning(
                "Race: GraphValidationError from add_execution_schedule after "
-                "prereq check passed (user_id=%s graph_id=%s node_errors=%s)",
+                "prereq check passed (user_id=%s graph_id=%s failing_fields=%s)",
                user_id,
                graph.id,
-                dict(e.node_errors),
+                {node_id: list(fields) for node_id, fields in e.node_errors.items()},
            )
            creds_setup = self._build_setup_requirements_from_validation_error(
                graph=graph,
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent_test.py
@@ -652,6 +652,53 @@ async def test_run_agent_schedule_credential_race_returns_setup_card(
    assert result_data["setup_info"]["user_readiness"]["ready_to_run"] is False


+@pytest.mark.asyncio(loop_scope="session")
+async def test_run_agent_schedule_structural_error_returns_error_response(
+    setup_test_data,
+):
+    """End-to-end: if the scheduler raises a GraphValidationError with purely
+    structural (non-credential) errors after _check_prerequisites passed, the
+    tool must return an ErrorResponse with error='graph_validation_failed' —
+    not a setup_requirements card."""
+    user = setup_test_data["user"]
+    store_submission = setup_test_data["store_submission"]
+
+    tool = RunAgentTool()
+    agent_marketplace_id = f"{user.email.split('@')[0]}/{store_submission.slug}"
+    session = make_session(user_id=user.id)
+
+    fake_scheduler = AsyncMock()
+    fake_scheduler.add_execution_schedule.side_effect = GraphValidationError(
+        message="Graph is invalid",
+        node_errors={"some-node-id": {"url": "Input field 'url' is required"}},
+    )
+
+    with patch(
+        "backend.copilot.tools.run_agent.get_scheduler_client",
+        return_value=fake_scheduler,
+    ):
+        response = await tool.execute(
+            user_id=user.id,
+            session_id=str(uuid.uuid4()),
+            tool_call_id=str(uuid.uuid4()),
+            username_agent_slug=agent_marketplace_id,
+            inputs={"test_input": "value"},
+            schedule_name="My Schedule",
+            cron="0 9 * * *",
+            dry_run=False,
+            session=session,
+        )
+
+    assert response is not None
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)
+
+    # Structural errors must fall through to the plain error path — the
+    # user should see the validation error, not the credential setup card.
+    assert result_data.get("error") == "graph_validation_failed"
+    assert result_data.get("type") != "setup_requirements"
+
+
@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_execution_credential_race_returns_setup_card(
    setup_test_data,
@@ -695,3 +742,48 @@ async def test_run_agent_execution_credential_race_returns_setup_card(
    assert result_data.get("type") == "setup_requirements"
    assert "setup_info" in result_data
    assert result_data["setup_info"]["user_readiness"]["ready_to_run"] is False
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_run_agent_execution_structural_error_returns_error_response(
+    setup_test_data,
+):
+    """End-to-end: if the executor raises a GraphValidationError with purely
+    structural (non-credential) errors after _check_prerequisites passed, the
+    tool must return an ErrorResponse with error='graph_validation_failed' —
+    not a setup_requirements card and not a silent swallow."""
+    user = setup_test_data["user"]
+    store_submission = setup_test_data["store_submission"]
+
+    tool = RunAgentTool()
+    agent_marketplace_id = f"{user.email.split('@')[0]}/{store_submission.slug}"
+    session = make_session(user_id=user.id)
+
+    with patch(
+        "backend.copilot.tools.run_agent.execution_utils.add_graph_execution",
+        new_callable=AsyncMock,
+        side_effect=GraphValidationError(
+            message="Graph is invalid",
+            node_errors={
+                "some-node-id": {"url": "Input field 'url' is required"},
+            },
+        ),
+    ):
+        response = await tool.execute(
+            user_id=user.id,
+            session_id=str(uuid.uuid4()),
+            tool_call_id=str(uuid.uuid4()),
+            username_agent_slug=agent_marketplace_id,
+            inputs={"test_input": "value"},
+            dry_run=False,
+            session=session,
+        )
+
+    assert response is not None
+    assert isinstance(response.output, str)
+    result_data = orjson.loads(response.output)
+
+    # Structural errors must fall through to the plain error path — the
+    # user should see the validation error, not the credential setup card.
+    assert result_data.get("error") == "graph_validation_failed"
+    assert result_data.get("type") != "setup_requirements"