From 260bbb28bd715c09fb1a05f0fca0c8f01ef4e78c Mon Sep 17 00:00:00 2001
From: majdyz <zamil.majdy@agpt.co>
Date: Sat, 11 Apr 2026 12:06:47 +0000
Subject: [PATCH] fix(backend/copilot): log GraphValidationError race path for
 observability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both _run_agent and _schedule_agent silently swallowed the
GraphValidationError that triggers the credential-race fallback and
returned the inline setup card with no log. That left the race invisible
to oncall — every recovered request looked identical to a cold-cache
first attempt, and credential-drift rates could not be monitored.

Add a logger.warning at both race catch sites that captures user_id,
graph_id, and the raw node_errors so SRE can track how often the
prereq check drifts from the executor/scheduler re-validation. Keeps
the user-facing behaviour unchanged — still returns the inline card —
but makes the race observable.
---
 .../backend/copilot/tools/run_agent.py        | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/autogpt_platform/backend/backend/copilot/tools/run_agent.py b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
index ff1f1902d1..91f24ed302 100644
--- a/autogpt_platform/backend/backend/copilot/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/run_agent.py
@@ -575,6 +575,16 @@ class RunAgentTool(BaseTool):
                 dry_run=dry_run,
             )
         except GraphValidationError as e:
+            # Reaching here means ``_check_prerequisites`` passed but the
+            # executor's validator re-raised milliseconds later — surface
+            # the race/drift so oncall can monitor how often this fires.
+            logger.warning(
+                "Race: GraphValidationError from add_graph_execution after "
+                "prereq check passed (user_id=%s graph_id=%s node_errors=%s)",
+                user_id,
+                graph.id,
+                dict(e.node_errors),
+            )
             creds_setup = self._build_setup_requirements_from_validation_error(
                 graph=graph,
                 error=e,
@@ -770,6 +780,16 @@ class RunAgentTool(BaseTool):
                 user_timezone=user_timezone,
             )
         except GraphValidationError as e:
+            # Reaching here means ``_check_prerequisites`` passed but the
+            # scheduler's re-validation raised — surface the race/drift so
+            # oncall can monitor how often this fires.
+            logger.warning(
+                "Race: GraphValidationError from add_execution_schedule after "
+                "prereq check passed (user_id=%s graph_id=%s node_errors=%s)",
+                user_id,
+                graph.id,
+                dict(e.node_errors),
+            )
             creds_setup = self._build_setup_requirements_from_validation_error(
                 graph=graph,
                 error=e,