fix(copilot): require fresh decompose_goal+approval per build request

Previous gate (has_pending_decomposition) only blocked when decompose_goal was called but not yet approved. It missed the case where the LLM skipped decompose_goal entirely on follow-up requests — if a session already had a prior completed decomposition, the gate returned False and let a fresh create_agent through without any new plan/approval. Tighter rule (needs_build_plan_approval): the most recent user message must be an approval (contains 'approved', case-insensitive) AND a decompose_goal tool_call must exist in the session before that approval. This forces the LLM to call decompose_goal for every new build request, end its turn, and only resume building after the user responds.
2026-04-30 03:00:41 -04:00 · 2026-04-17 20:11:51 +07:00
parent 162c3f09a4
commit d0cb1e981e
3 changed files with 125 additions and 82 deletions
--- a/autogpt_platform/backend/backend/copilot/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/create_agent.py
@@ -8,7 +8,7 @@ from backend.copilot.model import ChatSession

 from .agent_generator.pipeline import fetch_library_agents, fix_validate_and_save
 from .base import BaseTool
-from .decompose_goal import has_pending_decomposition
+from .decompose_goal import needs_build_plan_approval
 from .models import ErrorResponse, ToolResponseBase

 logger = logging.getLogger(__name__)
@@ -71,22 +71,22 @@ class CreateAgentTool(BaseTool):
    ) -> ToolResponseBase:
        session_id = session.session_id if session else None

-        # Enforce the decompose_goal approval gate. The LLM is instructed via
-        # ``agent_generation_guide.md`` to STOP after decompose_goal until the
-        # user approves, but natural-language instructions alone are not
-        # reliable — without this gate, the LLM has been observed calling
-        # ``decompose_goal`` and ``create_agent`` in the same turn, building
-        # the agent while the user is still mid-countdown.
-        if session and has_pending_decomposition(session):
+        # Enforce the decompose_goal approval gate at the code level.
+        # Prompt-only "STOP" is unreliable: the LLM has been observed
+        # (a) calling decompose_goal + create_agent in the same turn and
+        # (b) skipping decompose_goal entirely on follow-up build requests.
+        # Require that the most recent user message is an approval AND a
+        # decompose_goal call exists before it in the session.
+        if session and needs_build_plan_approval(session):
            return ErrorResponse(
                message=(
-                    "A build plan is awaiting user approval. Do not call "
-                    "create_agent until the user responds with Approved "
-                    "(or Approved with modifications). End your turn now — "
-                    "the platform will resume the conversation once the user "
-                    "responds."
+                    "You must call decompose_goal first and wait for user "
+                    "approval before calling create_agent. Call decompose_goal "
+                    "now with the build steps, then end your turn — the "
+                    "platform will resume the conversation after the user "
+                    "responds with Approved (or Approved with modifications)."
                ),
-                error="decomposition_pending_approval",
+                error="build_plan_approval_required",
                session_id=session_id,
            )

--- a/autogpt_platform/backend/backend/copilot/tools/decompose_goal.py
+++ b/autogpt_platform/backend/backend/copilot/tools/decompose_goal.py
@@ -45,34 +45,52 @@ _CANCEL_KEY_TTL_SECONDS = AUTO_APPROVE_SERVER_SECONDS + 30
 _pending_auto_approvals: dict[str, asyncio.Task] = {}


-def has_pending_decomposition(session: ChatSession) -> bool:
-    """Return True if the most recent ``decompose_goal`` tool call in the
-    session has not yet been followed by a user message.
+_APPROVAL_MARKERS = ("approved",)

-    Used by build tools (create_agent, edit_agent, fix_agent_graph, …) to
-    enforce the "STOP — do not proceed until the user approves" gate from
-    ``agent_generation_guide.md`` at the *code* level. The natural-language
-    instruction alone is not enough — the LLM has been observed calling
-    ``decompose_goal`` and ``create_agent`` in the same turn, building the
-    agent while the user is still mid-countdown. This predicate lets build
-    tools refuse when approval is still pending.

-    Scans messages in reverse, returning:
-    - True  → a ``decompose_goal`` tool call exists with no user message after it
-    - False → no ``decompose_goal`` call, or the most recent one has already
-              been answered by a user message (Approve / Modify / other)
+def needs_build_plan_approval(session: ChatSession) -> bool:
+    """Return True if the current build must be blocked pending decomposition
+    approval.
+
+    Enforces the "STOP — do not proceed until the user approves" gate from
+    ``agent_generation_guide.md`` at the *code* level. Natural-language
+    instruction alone is not enough — the LLM has been observed:
+    1. Calling ``decompose_goal`` and ``create_agent`` in the same turn
+       (building while the user is mid-countdown), AND
+    2. Skipping ``decompose_goal`` entirely and jumping straight to
+       ``create_agent`` on follow-up build requests when the session
+       already contains a prior resolved decomposition.
+
+    Rule: the most recent user message must be an approval message
+    (contains "approved", case-insensitive) AND a ``decompose_goal`` tool
+    call must exist somewhere in the session before that approval.
+
+    - Fresh "build me X" request without decomposition → block.
+    - New build request after a previous completed build → block until
+      a new decompose_goal + approval cycle runs.
+    - decompose_goal called but user hasn't responded yet → block.
+    - User said "Approved" after decompose_goal → allow.
    """
+    last_user_idx = -1
    for i in range(len(session.messages) - 1, -1, -1):
+        if session.messages[i].role == "user":
+            last_user_idx = i
+            break
+    if last_user_idx < 0:
+        return True
+
+    last_user_content = (session.messages[last_user_idx].content or "").lower()
+    if not any(marker in last_user_content for marker in _APPROVAL_MARKERS):
+        return True
+
+    for i in range(last_user_idx - 1, -1, -1):
        msg = session.messages[i]
-        if msg.role == "user":
-            # A user message without a prior decompose_goal = nothing pending.
-            return False
        if msg.role == "assistant" and msg.tool_calls:
            for tc in msg.tool_calls:
                name = (tc.get("function") or {}).get("name") or tc.get("name")
                if name == "decompose_goal":
-                    return True
-    return False
+                    return False
+    return True


 def _no_user_action_since(baseline_index: int):
--- a/autogpt_platform/backend/backend/copilot/tools/decompose_goal_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/decompose_goal_test.py
@@ -16,7 +16,7 @@ from .decompose_goal import (
    DecomposeGoalTool,
    _no_user_action_since,
    cancel_auto_approve,
-    has_pending_decomposition,
+    needs_build_plan_approval,
 )
 from .models import ErrorResponse, TaskDecompositionResponse

@@ -347,7 +347,7 @@ def test_predicate_handles_messages_with_none_sequence():


 # ---------------------------------------------------------------------------
-# has_pending_decomposition — build-tool approval gate
+# needs_build_plan_approval — build-tool approval gate
 # ---------------------------------------------------------------------------


@@ -359,67 +359,92 @@ def _decompose_tool_call() -> dict:
    }


-def test_pending_decomposition_true_when_no_user_after_call():
-    """LLM called decompose_goal but user hasn't responded yet."""
+def test_needs_approval_blocks_when_no_decompose_in_session():
+    """LLM tries to build without calling decompose_goal at all."""
    session = make_session(_USER_ID)
    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
-    session.messages.append(
-        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
-    )
-    session.messages.append(ChatMessage(role="tool", content="{...plan...}"))
-    assert has_pending_decomposition(session) is True
+    assert needs_build_plan_approval(session) is True


-def test_pending_decomposition_false_after_user_approves():
-    """User approved — downstream tools should be unblocked."""
+def test_needs_approval_blocks_when_last_user_is_not_approval():
+    """Even with a decompose_goal earlier, a fresh non-approval user message
+    starts a new build flow that requires its own decomposition."""
    session = make_session(_USER_ID)
-    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
-    session.messages.append(
-        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
-    )
-    session.messages.append(ChatMessage(role="tool", content="{...plan...}"))
-    session.messages.append(
-        ChatMessage(role="user", content="Approved. Please build the agent.")
-    )
-    assert has_pending_decomposition(session) is False
-
-
-def test_pending_decomposition_false_when_no_decompose_call():
-    """Sessions without decompose_goal are never blocked."""
-    session = make_session(_USER_ID)
-    session.messages.append(ChatMessage(role="user", content="Change the prompt"))
-    session.messages.append(ChatMessage(role="assistant", content="Sure, will do."))
-    assert has_pending_decomposition(session) is False
-
-
-def test_pending_decomposition_ignores_assistant_text_without_tool_calls():
-    """Assistant's text after decompose_goal doesn't count as user action."""
-    session = make_session(_USER_ID)
-    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
-    session.messages.append(
-        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
-    )
-    session.messages.append(ChatMessage(role="tool", content="{...plan...}"))
-    session.messages.append(ChatMessage(role="assistant", content="Plan is ready."))
-    assert has_pending_decomposition(session) is True
-
-
-def test_pending_decomposition_scans_most_recent_call():
-    """Only the most recent decompose_goal matters; older ones are irrelevant."""
-    session = make_session(_USER_ID)
-    # Old cycle: decomposed, approved, built.
    session.messages.append(ChatMessage(role="user", content="Build v1"))
    session.messages.append(
        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
    )
    session.messages.append(ChatMessage(role="tool", content="{plan v1}"))
    session.messages.append(ChatMessage(role="user", content="Approved"))
-    # New cycle: decomposed, still awaiting approval.
+    session.messages.append(ChatMessage(role="assistant", content="agent built."))
+    # User asks for a second build — LLM must call decompose_goal again.
+    session.messages.append(ChatMessage(role="user", content="Now build v2"))
+    assert needs_build_plan_approval(session) is True
+
+
+def test_needs_approval_allows_when_user_approved_after_decompose():
+    """User said "Approved" after a decompose_goal → build may proceed."""
+    session = make_session(_USER_ID)
+    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
    session.messages.append(
        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
    )
-    session.messages.append(ChatMessage(role="tool", content="{plan v2}"))
-    assert has_pending_decomposition(session) is True
+    session.messages.append(ChatMessage(role="tool", content="{plan}"))
+    session.messages.append(
+        ChatMessage(role="user", content="Approved. Please build the agent.")
+    )
+    assert needs_build_plan_approval(session) is False
+
+
+def test_needs_approval_allows_modified_approval():
+    """Approved with modifications also counts as approval."""
+    session = make_session(_USER_ID)
+    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
+    session.messages.append(
+        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
+    )
+    session.messages.append(ChatMessage(role="tool", content="{plan}"))
+    session.messages.append(
+        ChatMessage(
+            role="user",
+            content="Approved with modifications. Please build the agent following these steps: ...",
+        )
+    )
+    assert needs_build_plan_approval(session) is False
+
+
+def test_needs_approval_blocks_same_turn_decompose_and_build():
+    """LLM calls decompose_goal then immediately tries create_agent in the
+    same turn — the last user message is still the original build request,
+    not an approval."""
+    session = make_session(_USER_ID)
+    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
+    session.messages.append(
+        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
+    )
+    session.messages.append(ChatMessage(role="tool", content="{plan}"))
+    # No user message yet — still mid-countdown.
+    assert needs_build_plan_approval(session) is True
+
+
+def test_needs_approval_blocks_approval_without_prior_decompose():
+    """User spontaneously says "Approved" but no decompose_goal was ever
+    called — the LLM did not show a plan, so the gate stays closed."""
+    session = make_session(_USER_ID)
+    session.messages.append(ChatMessage(role="user", content="Approved"))
+    assert needs_build_plan_approval(session) is True
+
+
+def test_needs_approval_case_insensitive():
+    """Approval detection is case-insensitive."""
+    session = make_session(_USER_ID)
+    session.messages.append(ChatMessage(role="user", content="Build me an agent"))
+    session.messages.append(
+        ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
+    )
+    session.messages.append(ChatMessage(role="tool", content="{plan}"))
+    session.messages.append(ChatMessage(role="user", content="APPROVED, go."))
+    assert needs_build_plan_approval(session) is False


 # ---------------------------------------------------------------------------