fix(copilot): require fresh decompose_goal+approval per build request

Previous gate (has_pending_decomposition) only blocked when decompose_goal
was called but not yet approved. It missed the case where the LLM skipped
decompose_goal entirely on follow-up requests — if a session already had
a prior completed decomposition, the gate returned False and let a fresh
create_agent through without any new plan/approval.

Tighter rule (needs_build_plan_approval): the most recent user message
must be an approval (contains 'approved', case-insensitive) AND a
decompose_goal tool_call must exist in the session before that approval.
This forces the LLM to call decompose_goal for every new build request,
end its turn, and only resume building after the user responds.
This commit is contained in:
Zamil Majdy
2026-04-17 20:11:51 +07:00
parent 162c3f09a4
commit d0cb1e981e
3 changed files with 125 additions and 82 deletions

View File

@@ -8,7 +8,7 @@ from backend.copilot.model import ChatSession
from .agent_generator.pipeline import fetch_library_agents, fix_validate_and_save
from .base import BaseTool
from .decompose_goal import has_pending_decomposition
from .decompose_goal import needs_build_plan_approval
from .models import ErrorResponse, ToolResponseBase
logger = logging.getLogger(__name__)
@@ -71,22 +71,22 @@ class CreateAgentTool(BaseTool):
) -> ToolResponseBase:
session_id = session.session_id if session else None
# Enforce the decompose_goal approval gate. The LLM is instructed via
# ``agent_generation_guide.md`` to STOP after decompose_goal until the
# user approves, but natural-language instructions alone are not
# reliable — without this gate, the LLM has been observed calling
# ``decompose_goal`` and ``create_agent`` in the same turn, building
# the agent while the user is still mid-countdown.
if session and has_pending_decomposition(session):
# Enforce the decompose_goal approval gate at the code level.
# Prompt-only "STOP" is unreliable: the LLM has been observed
# (a) calling decompose_goal + create_agent in the same turn and
# (b) skipping decompose_goal entirely on follow-up build requests.
# Require that the most recent user message is an approval AND a
# decompose_goal call exists before it in the session.
if session and needs_build_plan_approval(session):
return ErrorResponse(
message=(
"A build plan is awaiting user approval. Do not call "
"create_agent until the user responds with Approved "
"(or Approved with modifications). End your turn now "
"the platform will resume the conversation once the user "
"responds."
"You must call decompose_goal first and wait for user "
"approval before calling create_agent. Call decompose_goal "
"now with the build steps, then end your turn — the "
"platform will resume the conversation after the user "
"responds with Approved (or Approved with modifications)."
),
error="decomposition_pending_approval",
error="build_plan_approval_required",
session_id=session_id,
)

View File

@@ -45,34 +45,52 @@ _CANCEL_KEY_TTL_SECONDS = AUTO_APPROVE_SERVER_SECONDS + 30
_pending_auto_approvals: dict[str, asyncio.Task] = {}
def has_pending_decomposition(session: ChatSession) -> bool:
"""Return True if the most recent ``decompose_goal`` tool call in the
session has not yet been followed by a user message.
_APPROVAL_MARKERS = ("approved",)
Used by build tools (create_agent, edit_agent, fix_agent_graph, …) to
enforce the "STOP — do not proceed until the user approves" gate from
``agent_generation_guide.md`` at the *code* level. The natural-language
instruction alone is not enough — the LLM has been observed calling
``decompose_goal`` and ``create_agent`` in the same turn, building the
agent while the user is still mid-countdown. This predicate lets build
tools refuse when approval is still pending.
Scans messages in reverse, returning:
- True → a ``decompose_goal`` tool call exists with no user message after it
- False → no ``decompose_goal`` call, or the most recent one has already
been answered by a user message (Approve / Modify / other)
def needs_build_plan_approval(session: ChatSession) -> bool:
"""Return True if the current build must be blocked pending decomposition
approval.
Enforces the "STOP — do not proceed until the user approves" gate from
``agent_generation_guide.md`` at the *code* level. Natural-language
instruction alone is not enough — the LLM has been observed:
1. Calling ``decompose_goal`` and ``create_agent`` in the same turn
(building while the user is mid-countdown), AND
2. Skipping ``decompose_goal`` entirely and jumping straight to
``create_agent`` on follow-up build requests when the session
already contains a prior resolved decomposition.
Rule: the most recent user message must be an approval message
(contains "approved", case-insensitive) AND a ``decompose_goal`` tool
call must exist somewhere in the session before that approval.
- Fresh "build me X" request without decomposition → block.
- New build request after a previous completed build → block until
a new decompose_goal + approval cycle runs.
- decompose_goal called but user hasn't responded yet → block.
- User said "Approved" after decompose_goal → allow.
"""
last_user_idx = -1
for i in range(len(session.messages) - 1, -1, -1):
if session.messages[i].role == "user":
last_user_idx = i
break
if last_user_idx < 0:
return True
last_user_content = (session.messages[last_user_idx].content or "").lower()
if not any(marker in last_user_content for marker in _APPROVAL_MARKERS):
return True
for i in range(last_user_idx - 1, -1, -1):
msg = session.messages[i]
if msg.role == "user":
# A user message without a prior decompose_goal = nothing pending.
return False
if msg.role == "assistant" and msg.tool_calls:
for tc in msg.tool_calls:
name = (tc.get("function") or {}).get("name") or tc.get("name")
if name == "decompose_goal":
return True
return False
return False
return True
def _no_user_action_since(baseline_index: int):

View File

@@ -16,7 +16,7 @@ from .decompose_goal import (
DecomposeGoalTool,
_no_user_action_since,
cancel_auto_approve,
has_pending_decomposition,
needs_build_plan_approval,
)
from .models import ErrorResponse, TaskDecompositionResponse
@@ -347,7 +347,7 @@ def test_predicate_handles_messages_with_none_sequence():
# ---------------------------------------------------------------------------
# has_pending_decomposition — build-tool approval gate
# needs_build_plan_approval — build-tool approval gate
# ---------------------------------------------------------------------------
@@ -359,67 +359,92 @@ def _decompose_tool_call() -> dict:
}
def test_pending_decomposition_true_when_no_user_after_call():
"""LLM called decompose_goal but user hasn't responded yet."""
def test_needs_approval_blocks_when_no_decompose_in_session():
"""LLM tries to build without calling decompose_goal at all."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{...plan...}"))
assert has_pending_decomposition(session) is True
assert needs_build_plan_approval(session) is True
def test_pending_decomposition_false_after_user_approves():
"""User approved — downstream tools should be unblocked."""
def test_needs_approval_blocks_when_last_user_is_not_approval():
"""Even with a decompose_goal earlier, a fresh non-approval user message
starts a new build flow that requires its own decomposition."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{...plan...}"))
session.messages.append(
ChatMessage(role="user", content="Approved. Please build the agent.")
)
assert has_pending_decomposition(session) is False
def test_pending_decomposition_false_when_no_decompose_call():
"""Sessions without decompose_goal are never blocked."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Change the prompt"))
session.messages.append(ChatMessage(role="assistant", content="Sure, will do."))
assert has_pending_decomposition(session) is False
def test_pending_decomposition_ignores_assistant_text_without_tool_calls():
"""Assistant's text after decompose_goal doesn't count as user action."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{...plan...}"))
session.messages.append(ChatMessage(role="assistant", content="Plan is ready."))
assert has_pending_decomposition(session) is True
def test_pending_decomposition_scans_most_recent_call():
"""Only the most recent decompose_goal matters; older ones are irrelevant."""
session = make_session(_USER_ID)
# Old cycle: decomposed, approved, built.
session.messages.append(ChatMessage(role="user", content="Build v1"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{plan v1}"))
session.messages.append(ChatMessage(role="user", content="Approved"))
# New cycle: decomposed, still awaiting approval.
session.messages.append(ChatMessage(role="assistant", content="agent built."))
# User asks for a second build — LLM must call decompose_goal again.
session.messages.append(ChatMessage(role="user", content="Now build v2"))
assert needs_build_plan_approval(session) is True
def test_needs_approval_allows_when_user_approved_after_decompose():
"""User said "Approved" after a decompose_goal → build may proceed."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{plan v2}"))
assert has_pending_decomposition(session) is True
session.messages.append(ChatMessage(role="tool", content="{plan}"))
session.messages.append(
ChatMessage(role="user", content="Approved. Please build the agent.")
)
assert needs_build_plan_approval(session) is False
def test_needs_approval_allows_modified_approval():
"""Approved with modifications also counts as approval."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{plan}"))
session.messages.append(
ChatMessage(
role="user",
content="Approved with modifications. Please build the agent following these steps: ...",
)
)
assert needs_build_plan_approval(session) is False
def test_needs_approval_blocks_same_turn_decompose_and_build():
"""LLM calls decompose_goal then immediately tries create_agent in the
same turn — the last user message is still the original build request,
not an approval."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{plan}"))
# No user message yet — still mid-countdown.
assert needs_build_plan_approval(session) is True
def test_needs_approval_blocks_approval_without_prior_decompose():
"""User spontaneously says "Approved" but no decompose_goal was ever
called — the LLM did not show a plan, so the gate stays closed."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Approved"))
assert needs_build_plan_approval(session) is True
def test_needs_approval_case_insensitive():
"""Approval detection is case-insensitive."""
session = make_session(_USER_ID)
session.messages.append(ChatMessage(role="user", content="Build me an agent"))
session.messages.append(
ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()])
)
session.messages.append(ChatMessage(role="tool", content="{plan}"))
session.messages.append(ChatMessage(role="user", content="APPROVED, go."))
assert needs_build_plan_approval(session) is False
# ---------------------------------------------------------------------------