feat(copilot): add server-side auto-approve fallback for decompose_goal

The decompose_goal countdown was purely client-side: if the user closed the
tab before the timer ran out, the agent never got built. Add a server-side
timer that fires the same approval message even when no client is connected.

- backend/copilot/model.py: add append_message_if helper that appends a
  message inside the session lock only if a predicate is satisfied. Used
  by the auto-approve task to no-op when the user has already acted.
- backend/copilot/tools/decompose_goal.py: when the tool returns, schedule
  a fire-and-forget asyncio task (same _background_tasks pattern as
  agent_browser.py) that sleeps 90s, re-checks the session, and if no user
  message has appeared since, appends "Approved. Please build the agent."
  and enqueues a new copilot turn. Stays in process; restart-resilience
  is a documented follow-up.
- backend/copilot/tools/models.py: expose auto_approve_seconds on
  TaskDecompositionResponse so the frontend countdown is sourced from the
  backend instead of a hard-coded constant.
- frontend DecomposeGoal.tsx: seed secondsLeft from output.auto_approve_seconds
  with a 60s fallback for older sessions.
- Regenerate openapi.json with the new field.
- Tests: 9 new unit tests covering the predicate, the auto-approve flow
  (idle / user-acted / errors swallowed) and _schedule_auto_approve.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
anvyle
2026-04-10 16:34:46 +02:00
parent 94f065a7e0
commit fb86fcb67d
7 changed files with 424 additions and 16 deletions

View File

@@ -2,7 +2,7 @@ import asyncio
import logging
import uuid
from datetime import UTC, datetime
from typing import Any, Self, cast
from typing import Any, Callable, Self, cast
from weakref import WeakValueDictionary
from openai.types.chat import (
@@ -677,6 +677,49 @@ async def append_and_save_message(session_id: str, message: ChatMessage) -> Chat
return session
async def append_message_if(
session_id: str,
message: ChatMessage,
predicate: Callable[["ChatSession"], bool],
) -> "ChatSession | None":
"""Atomically append a message iff ``predicate(session)`` returns True.
Used by fire-and-forget tasks that need to no-op if the session state
has moved on while they were waiting (e.g. the decompose_goal server-side
auto-approve timer: skip the approval if the user has already sent a
message). The predicate runs inside the session lock, so the check and
the append are one atomic operation — no race with concurrent appends.
Returns the updated session on append, or ``None`` if the predicate
rejected, the session no longer exists, or the append failed.
"""
lock = await _get_session_lock(session_id)
async with lock:
session = await get_chat_session(session_id)
if session is None or not predicate(session):
return None
session.messages.append(message)
existing_message_count = await chat_db().get_next_sequence(session_id)
try:
await _save_session_to_db(session, existing_message_count)
except Exception as e:
logger.error(
f"append_message_if: failed to persist message to "
f"session {session_id}: {e}"
)
return None
try:
await cache_chat_session(session)
except Exception as e:
logger.warning(f"Cache write failed for session {session_id}: {e}")
return session
async def create_chat_session(user_id: str, *, dry_run: bool) -> ChatSession:
"""Create a new chat session and persist it.

View File

@@ -1,9 +1,11 @@
"""DecomposeGoalTool - Breaks agent-building goals into sub-instructions."""
import asyncio
import logging
from typing import Any
from uuid import uuid4
from backend.copilot.model import ChatSession
from backend.copilot.model import ChatMessage, ChatSession, append_message_if
from .base import BaseTool
from .models import (
@@ -20,6 +22,111 @@ MAX_STEPS = 8
DEFAULT_ACTION = "add_block"
VALID_ACTIONS = {"add_block", "connect_blocks", "configure", "add_input", "add_output"}
# Auto-approve countdown — single source of truth for both client and server.
# The frontend reads ``auto_approve_seconds`` from the tool response and runs
# the visible countdown. The server fallback runs slightly longer to absorb
# network latency / SSE round-trip when the client also sends "Approved".
AUTO_APPROVE_CLIENT_SECONDS = 60
AUTO_APPROVE_SERVER_GRACE_SECONDS = 30
AUTO_APPROVE_SERVER_SECONDS = (
AUTO_APPROVE_CLIENT_SECONDS + AUTO_APPROVE_SERVER_GRACE_SECONDS
)
AUTO_APPROVE_MESSAGE = "Approved. Please build the agent."
# Fire-and-forget tasks held to keep them alive and self-clean on completion.
# Same pattern as ``backend/copilot/tools/agent_browser.py``.
_auto_approve_tasks: set[asyncio.Task] = set()
def _no_user_action_since(baseline_sequence: int):
"""Predicate: returns True iff no user message has been appended after
the message at ``baseline_sequence``."""
def _check(session: ChatSession) -> bool:
for m in session.messages:
if m.role == "user" and (m.sequence or 0) > baseline_sequence:
return False
return True
return _check
async def _run_auto_approve(
session_id: str,
user_id: str | None,
baseline_sequence: int,
) -> None:
"""Wait the server-side timeout and inject a synthetic approval if the
user has not acted in the meantime.
Limitation: this lives in the executor process; if the worker restarts
during the wait, the pending approval is lost (the user falls back to
manual approve). Restart-resilience would need a Redis-backed scheduler.
Modify-mode caveat: clicking "Modify" stops the *client* timer, not this
one. Users have ``AUTO_APPROVE_SERVER_SECONDS`` total to finish editing
and click Approve, otherwise the server fires the default approval. A
follow-up should add an explicit cancel endpoint.
"""
try:
await asyncio.sleep(AUTO_APPROVE_SERVER_SECONDS)
approval = ChatMessage(role="user", content=AUTO_APPROVE_MESSAGE)
result = await append_message_if(
session_id=session_id,
message=approval,
predicate=_no_user_action_since(baseline_sequence),
)
if result is None:
# User already acted (or the session is gone) — nothing to do.
return
# Local imports avoid a circular dependency between this module and
# the executor / API stream registry packages.
from backend.copilot import stream_registry
from backend.copilot.executor.utils import enqueue_copilot_turn
turn_id = str(uuid4())
await stream_registry.create_session(
session_id=session_id,
user_id=user_id or "",
tool_call_id="chat_stream",
tool_name="chat",
turn_id=turn_id,
)
await enqueue_copilot_turn(
session_id=session_id,
user_id=user_id,
message=AUTO_APPROVE_MESSAGE,
turn_id=turn_id,
is_user_message=True,
)
logger.info("decompose_goal auto-approve fired for session %s", session_id)
except asyncio.CancelledError:
raise
except Exception:
logger.exception(
"decompose_goal auto-approve task failed for session %s",
session_id,
)
def _schedule_auto_approve(
session_id: str | None, user_id: str | None, session: ChatSession
) -> None:
"""Schedule the fire-and-forget auto-approve task for this session."""
if not session_id:
return
baseline_sequence = max(
(m.sequence or 0 for m in session.messages),
default=0,
)
task = asyncio.create_task(
_run_auto_approve(session_id, user_id, baseline_sequence)
)
_auto_approve_tasks.add(task)
task.add_done_callback(_auto_approve_tasks.discard)
class DecomposeGoalTool(BaseTool):
"""Tool for decomposing an agent goal into sub-instructions."""
@@ -135,11 +242,14 @@ class DecomposeGoalTool(BaseTool):
)
)
_schedule_auto_approve(session_id, user_id, session)
return TaskDecompositionResponse(
message=f"Here's the plan to build your agent ({len(decomposition_steps)} steps):",
goal=goal,
steps=decomposition_steps,
step_count=len(decomposition_steps),
requires_approval=True,
auto_approve_seconds=AUTO_APPROVE_CLIENT_SECONDS,
session_id=session_id,
)

View File

@@ -1,11 +1,26 @@
"""Unit tests for DecomposeGoalTool."""
import asyncio
from unittest.mock import AsyncMock, patch
import pytest
from backend.copilot.model import ChatMessage
from . import decompose_goal as decompose_goal_module
from ._test_data import make_session
from .decompose_goal import DEFAULT_ACTION, MAX_STEPS, DecomposeGoalTool
from .decompose_goal import (
AUTO_APPROVE_CLIENT_SECONDS,
DEFAULT_ACTION,
MAX_STEPS,
DecomposeGoalTool,
_no_user_action_since,
)
from .models import ErrorResponse, TaskDecompositionResponse
# Captured before the autouse fixture stubs the real scheduler.
_REAL_SCHEDULE_AUTO_APPROVE = decompose_goal_module._schedule_auto_approve
_USER_ID = "test-user-decompose-goal"
_VALID_STEPS = [
@@ -19,6 +34,18 @@ _VALID_STEPS = [
]
@pytest.fixture(autouse=True)
def _stub_auto_approve_scheduler():
"""The existing happy-path tests don't have a database; stub the
fire-and-forget scheduler so they don't kick off real timers that try to
hit Redis/Postgres. Tests that exercise auto-approve override this with
their own patches inside the test body."""
with patch.object(
decompose_goal_module, "_schedule_auto_approve", lambda *a, **kw: None
):
yield
@pytest.fixture()
def tool() -> DecomposeGoalTool:
return DecomposeGoalTool()
@@ -253,3 +280,210 @@ async def test_step_ids_are_sequential(tool: DecomposeGoalTool, session):
assert isinstance(result, TaskDecompositionResponse)
for i, step in enumerate(result.steps):
assert step.step_id == f"step_{i + 1}"
# ---------------------------------------------------------------------------
# auto_approve_seconds field
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_response_includes_auto_approve_seconds(tool: DecomposeGoalTool, session):
"""The response carries the countdown so the frontend has a single
source of truth instead of a hard-coded constant."""
result = await tool._execute(
user_id=_USER_ID,
session=session,
goal="Build agent",
steps=_VALID_STEPS,
)
assert isinstance(result, TaskDecompositionResponse)
assert result.auto_approve_seconds == AUTO_APPROVE_CLIENT_SECONDS
# ---------------------------------------------------------------------------
# Predicate: _no_user_action_since
# ---------------------------------------------------------------------------
def test_predicate_passes_when_no_user_messages_after_baseline():
session = make_session(_USER_ID)
session.messages.append(
ChatMessage(role="assistant", content="tool call", sequence=5)
)
assert _no_user_action_since(5)(session) is True
def test_predicate_rejects_when_user_message_after_baseline():
session = make_session(_USER_ID)
session.messages.append(
ChatMessage(role="assistant", content="tool call", sequence=5)
)
session.messages.append(
ChatMessage(role="user", content="user replied", sequence=6)
)
assert _no_user_action_since(5)(session) is False
def test_predicate_ignores_assistant_messages_after_baseline():
"""Only user messages count as 'user action' — assistant messages are
just the LLM continuing on its own."""
session = make_session(_USER_ID)
session.messages.append(
ChatMessage(role="assistant", content="more stuff", sequence=6)
)
assert _no_user_action_since(5)(session) is True
# ---------------------------------------------------------------------------
# Server-side auto-approve task — full flow
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_auto_approve_fires_when_user_idle():
"""When no user message is appended after the baseline sequence, the
task should append the synthetic approval and enqueue a new turn."""
session_id = "session-auto-approve-idle"
captured_message = {}
async def fake_append_message_if(session_id, message, predicate):
captured_message["msg"] = message
return make_session(_USER_ID)
fake_enqueue = AsyncMock()
fake_create_session = AsyncMock()
with (
patch(
"backend.copilot.tools.decompose_goal.append_message_if",
new=fake_append_message_if,
),
patch(
"backend.copilot.tools.decompose_goal.AUTO_APPROVE_SERVER_SECONDS",
0,
),
patch(
"backend.copilot.executor.utils.enqueue_copilot_turn",
new=fake_enqueue,
),
patch(
"backend.copilot.stream_registry.create_session",
new=fake_create_session,
),
):
await decompose_goal_module._run_auto_approve(
session_id=session_id,
user_id=_USER_ID,
baseline_sequence=5,
)
assert captured_message["msg"].role == "user"
assert captured_message["msg"].content == "Approved. Please build the agent."
fake_create_session.assert_awaited_once()
fake_enqueue.assert_awaited_once()
assert fake_enqueue.await_args is not None
enqueue_kwargs = fake_enqueue.await_args.kwargs
assert enqueue_kwargs["session_id"] == session_id
assert enqueue_kwargs["message"] == "Approved. Please build the agent."
assert enqueue_kwargs["is_user_message"] is True
@pytest.mark.asyncio
async def test_auto_approve_skips_when_user_already_acted():
"""If append_message_if returns None (predicate rejected because the
user already sent a message), no turn should be enqueued."""
fake_append_message_if = AsyncMock(return_value=None)
fake_enqueue = AsyncMock()
fake_create_session = AsyncMock()
with (
patch(
"backend.copilot.tools.decompose_goal.append_message_if",
new=fake_append_message_if,
),
patch(
"backend.copilot.tools.decompose_goal.AUTO_APPROVE_SERVER_SECONDS",
0,
),
patch(
"backend.copilot.executor.utils.enqueue_copilot_turn",
new=fake_enqueue,
),
patch(
"backend.copilot.stream_registry.create_session",
new=fake_create_session,
),
):
await decompose_goal_module._run_auto_approve(
session_id="session-acted",
user_id=_USER_ID,
baseline_sequence=5,
)
fake_append_message_if.assert_awaited_once()
fake_enqueue.assert_not_awaited()
fake_create_session.assert_not_awaited()
@pytest.mark.asyncio
async def test_auto_approve_swallows_unexpected_errors():
"""A failure inside the task must never propagate — the worker should
keep running."""
async def boom(*args, **kwargs):
raise RuntimeError("kaboom")
with (
patch(
"backend.copilot.tools.decompose_goal.append_message_if",
new=boom,
),
patch(
"backend.copilot.tools.decompose_goal.AUTO_APPROVE_SERVER_SECONDS",
0,
),
):
# Should not raise.
await decompose_goal_module._run_auto_approve(
session_id="session-error",
user_id=_USER_ID,
baseline_sequence=0,
)
@pytest.mark.asyncio
async def test_schedule_auto_approve_creates_task(monkeypatch):
"""_schedule_auto_approve should add a task to the tracking set and
auto-remove it on completion."""
monkeypatch.setattr(decompose_goal_module, "AUTO_APPROVE_SERVER_SECONDS", 0)
fake_run = AsyncMock()
monkeypatch.setattr(decompose_goal_module, "_run_auto_approve", fake_run)
session = make_session(_USER_ID)
session.messages.append(
ChatMessage(role="assistant", content="tool call", sequence=3)
)
_REAL_SCHEDULE_AUTO_APPROVE(
session_id="session-schedule",
user_id=_USER_ID,
session=session,
)
# Wait for the scheduled task to complete.
await asyncio.sleep(0)
while decompose_goal_module._auto_approve_tasks:
await asyncio.sleep(0)
fake_run.assert_awaited_once_with("session-schedule", _USER_ID, 3)
def test_schedule_auto_approve_no_op_without_session_id():
"""Empty session id should be a no-op (defensive)."""
session = make_session(_USER_ID)
decompose_goal_module._schedule_auto_approve(
session_id=None, user_id=_USER_ID, session=session
)
assert len(decompose_goal_module._auto_approve_tasks) == 0

View File

@@ -729,6 +729,14 @@ class TaskDecompositionResponse(ToolResponseBase):
default=0, description="Number of steps (auto-derived from steps list)"
)
requires_approval: bool = True
auto_approve_seconds: int = Field(
default=60,
description=(
"Seconds the client should count down before auto-approving. "
"Kept in sync with the server-side fallback timer, which runs a "
"grace period longer to absorb network latency."
),
)
@model_validator(mode="after")
def sync_step_count(self) -> "TaskDecompositionResponse":

View File

@@ -27,7 +27,9 @@ import {
ToolIcon,
} from "./helpers";
const COUNTDOWN_SECONDS = 60;
// Fallback used only if the backend response omits auto_approve_seconds
// (older sessions). The authoritative value comes from the tool output.
const FALLBACK_COUNTDOWN_SECONDS = 60;
const RADIUS = 15;
const CIRCUMFERENCE = 2 * Math.PI * RADIUS;
@@ -62,7 +64,13 @@ export function DecomposeGoalTool({ part, isLastMessage }: Props) {
isDecompositionOutput(output) &&
output.requires_approval;
const [secondsLeft, setSecondsLeft] = useState(COUNTDOWN_SECONDS);
// Authoritative countdown comes from the backend tool response so the
// server-side fallback timer and the client are guaranteed to agree.
const countdownSeconds =
(output && isDecompositionOutput(output) && output.auto_approve_seconds) ||
FALLBACK_COUNTDOWN_SECONDS;
const [secondsLeft, setSecondsLeft] = useState(countdownSeconds);
// timerActive becomes false when the user clicks Modify — stops countdown and auto-approve.
const [timerActive, setTimerActive] = useState(true);
const [isEditing, setIsEditing] = useState(false);
@@ -153,7 +161,7 @@ export function DecomposeGoalTool({ part, isLastMessage }: Props) {
}
}, [secondsLeft, timerActive, showActions]); // approve reads refs only — safe to omit
const progress = secondsLeft / COUNTDOWN_SECONDS;
const progress = secondsLeft / countdownSeconds;
const dashOffset = CIRCUMFERENCE * (1 - progress);
const stepCount = isEditing
? editableSteps.length
@@ -263,10 +271,10 @@ export function DecomposeGoalTool({ part, isLastMessage }: Props) {
</Button>
) : (
<>
{/* Timer button — same ghost style as Modify, ring wraps the number inline */}
<Button variant="ghost" onClick={approve}>
<span className="group/label inline-flex items-center gap-1">
<span className="inline-flex items-center gap-1 group-hover/label:hidden">
{/* Primary CTA — encourages user to run the agent */}
<Button variant="primary" size="small" onClick={approve}>
<span className="group/label inline-flex items-center gap-2">
<span className="inline-flex items-center gap-1.5 group-hover/label:hidden">
Starting in
<span className="relative inline-flex h-6 w-6 items-center justify-center">
<svg
@@ -282,7 +290,7 @@ export function DecomposeGoalTool({ part, isLastMessage }: Props) {
fill="none"
stroke="currentColor"
strokeWidth="2"
className="text-neutral-300"
className="text-white/30"
/>
<circle
cx="17"
@@ -294,22 +302,20 @@ export function DecomposeGoalTool({ part, isLastMessage }: Props) {
strokeLinecap="round"
strokeDasharray={CIRCUMFERENCE}
strokeDashoffset={dashOffset}
className="text-neutral-600 transition-[stroke-dashoffset] duration-1000 ease-linear"
className="text-white transition-[stroke-dashoffset] duration-1000 ease-linear"
/>
</svg>
<span className="relative z-10 text-[11px] font-semibold tabular-nums text-foreground">
<span className="relative z-10 text-[11px] font-semibold tabular-nums text-white">
{secondsLeft}
</span>
</span>
s
</span>
<span className="hidden group-hover/label:inline">
Start now
</span>
</span>
</Button>
<span className="text-neutral-300">|</span>
<Button variant="ghost" onClick={handleModify}>
<Button variant="ghost" size="small" onClick={handleModify}>
<span className="inline-flex items-center gap-1.5">
<PencilSimpleIcon size={14} weight="bold" />
Modify

View File

@@ -26,6 +26,7 @@ export interface TaskDecompositionOutput {
steps: DecompositionStep[];
step_count: number;
requires_approval: boolean;
auto_approve_seconds?: number;
}
export interface DecomposeErrorOutput {

View File

@@ -13788,6 +13788,12 @@
"type": "boolean",
"title": "Requires Approval",
"default": true
},
"auto_approve_seconds": {
"type": "integer",
"title": "Auto Approve Seconds",
"description": "Seconds the client should count down before auto-approving. Kept in sync with the server-side fallback timer, which runs a grace period longer to absorb network latency.",
"default": 60
}
},
"type": "object",