diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index dfad86a36c..bb05912764 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -817,28 +817,6 @@ async def cancel_session_task( return CancelSessionResponse(cancelled=True) -@router.post( - "/sessions/{session_id}/cancel-auto-approve", - status_code=200, -) -async def cancel_auto_approve_task( - session_id: str, - user_id: Annotated[str, Security(auth.get_user_id)], -) -> CancelSessionResponse: - """Cancel the pending auto-approve timer for a decompose_goal plan. - - Called by the frontend when the user clicks "Modify" on the build-plan - box. Without this, the server-side timer would fire the default - "Approved" message while the user is still editing. - """ - await _validate_and_get_session(session_id, user_id) - - from backend.copilot.tools.decompose_goal import cancel_auto_approve - - await cancel_auto_approve(session_id) - return CancelSessionResponse(cancelled=True) - - @router.post( "/sessions/{session_id}/stream", responses={ diff --git a/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md b/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md index 0a0cbad482..b0f7ca3403 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md +++ b/autogpt_platform/backend/backend/copilot/sdk/agent_generation_guide.md @@ -26,24 +26,19 @@ Steps: **Skip this** when the goal already specifies all dimensions (e.g. "scrape prices from Amazon and email me daily"). -### Before Building: Goal Decomposition (REQUIRED) +### Before Building: Show the Plan -Before running the workflow below, ALWAYS decompose the goal first: +Start agent generation by calling `decompose_goal` once to display your +build plan to the user as a step-by-step UI card. 1. Analyze the user's request and break it into logical build steps (e.g. "add input block", "add AI summarizer", "wire blocks together"). -2. Call `decompose_goal` with those steps. **Do not write any text before - or after the tool call** — the platform renders a rich UI card for the - plan automatically. Any text you write will duplicate the plan display. -3. **STOP your turn immediately after `decompose_goal` returns.** Do not - call any other tools. Do not generate any text. End the turn so the - user can review the plan and respond. -4. Only after the user responds, continue with "Workflow for Creating/ - Editing Agents". - -`decompose_goal` MUST be the only tool call in the turn, with no -accompanying text. Never combine it with `find_block`, `create_agent`, -or any other tool in the same turn. +2. Call `decompose_goal` with those steps. Do not write any text before + or after the tool call — the platform renders the plan UI card + automatically, so any extra text duplicates the display. +3. Continue immediately with the workflow below in the same turn. The + plan card is informational only — there is no approval step, no + countdown, and no need to wait for the user. For simple goals (1-2 blocks), keep steps brief (2-3 steps). For complex goals, use as many steps as needed. diff --git a/autogpt_platform/backend/backend/copilot/tools/create_agent.py b/autogpt_platform/backend/backend/copilot/tools/create_agent.py index 39ae11e858..a27392fc8e 100644 --- a/autogpt_platform/backend/backend/copilot/tools/create_agent.py +++ b/autogpt_platform/backend/backend/copilot/tools/create_agent.py @@ -8,7 +8,6 @@ from backend.copilot.model import ChatSession from .agent_generator.pipeline import fetch_library_agents, fix_validate_and_save from .base import BaseTool -from .decompose_goal import needs_build_plan_approval from .helpers import require_guide_read from .models import ErrorResponse, ToolResponseBase @@ -77,25 +76,6 @@ class CreateAgentTool(BaseTool): if guide_gate is not None: return guide_gate - # Enforce the decompose_goal approval gate at the code level. - # Prompt-only "STOP" is unreliable: the LLM has been observed - # (a) calling decompose_goal + create_agent in the same turn and - # (b) skipping decompose_goal entirely on follow-up build requests. - # Require that the most recent user message is an approval AND a - # decompose_goal call exists before it in the session. - if session and needs_build_plan_approval(session): - return ErrorResponse( - message=( - "You must call decompose_goal first and wait for user " - "approval before calling create_agent. Call decompose_goal " - "now with the build steps, then end your turn — the " - "platform will resume the conversation after the user " - "responds with Approved (or Approved with modifications)." - ), - error="build_plan_approval_required", - session_id=session_id, - ) - if not agent_json: return ErrorResponse( message=( diff --git a/autogpt_platform/backend/backend/copilot/tools/create_agent_test.py b/autogpt_platform/backend/backend/copilot/tools/create_agent_test.py index 19c318d52f..5bf32342a4 100644 --- a/autogpt_platform/backend/backend/copilot/tools/create_agent_test.py +++ b/autogpt_platform/backend/backend/copilot/tools/create_agent_test.py @@ -4,7 +4,6 @@ from unittest.mock import MagicMock, patch import pytest -from backend.copilot.model import ChatMessage from backend.copilot.tools.create_agent import CreateAgentTool from backend.copilot.tools.models import AgentPreviewResponse, ErrorResponse @@ -14,28 +13,6 @@ _TEST_USER_ID = "test-user-create-agent" _PIPELINE = "backend.copilot.tools.agent_generator.pipeline" -def _add_approval_history(session): - """Add decompose_goal + user approval to the session so the - needs_build_plan_approval gate passes.""" - session.messages.append( - ChatMessage( - role="assistant", - content="", - tool_calls=[ - { - "id": "call_decompose", - "type": "function", - "function": {"name": "decompose_goal", "arguments": "{}"}, - } - ], - ) - ) - session.messages.append(ChatMessage(role="tool", content="{plan}")) - session.messages.append( - ChatMessage(role="user", content="Approved. Please build the agent.") - ) - - @pytest.fixture def tool(): return CreateAgentTool() @@ -43,9 +20,7 @@ def tool(): @pytest.fixture def session(): - s = make_session(_TEST_USER_ID) - _add_approval_history(s) - return s + return make_session(_TEST_USER_ID) # ── Input validation tests ────────────────────────────────────────────── diff --git a/autogpt_platform/backend/backend/copilot/tools/decompose_goal.py b/autogpt_platform/backend/backend/copilot/tools/decompose_goal.py index 17137d32de..91b49806aa 100644 --- a/autogpt_platform/backend/backend/copilot/tools/decompose_goal.py +++ b/autogpt_platform/backend/backend/copilot/tools/decompose_goal.py @@ -1,11 +1,9 @@ """DecomposeGoalTool - Breaks agent-building goals into sub-instructions.""" -import asyncio import logging from typing import Any from backend.copilot.model import ChatSession -from backend.data.redis_client import get_redis_async from .base import BaseTool from .models import ( @@ -20,185 +18,6 @@ logger = logging.getLogger(__name__) DEFAULT_ACTION = "add_block" VALID_ACTIONS = {"add_block", "connect_blocks", "configure", "add_input", "add_output"} -# Auto-approve countdown — the frontend reads ``auto_approve_seconds`` from the -# tool response and runs the visible countdown (60s). The server fires 5s later -# as a fallback for the "user closed the tab" case. The 5s gap ensures the -# client always fires first when present, creating the SSE subscription that -# lets the user see the build in real-time. When the server wakes at 65s, it -# checks the predicate and skips (the client's message is already there). -AUTO_APPROVE_CLIENT_SECONDS = 60 -AUTO_APPROVE_SERVER_GRACE_SECONDS = 5 -AUTO_APPROVE_SERVER_SECONDS = ( - AUTO_APPROVE_CLIENT_SECONDS + AUTO_APPROVE_SERVER_GRACE_SECONDS -) -AUTO_APPROVE_MESSAGE = "Approved. Please build the agent." - -# Redis key prefix for cross-process cancel signalling. The cancel -# endpoint (AgentServer process) SETs the key; _run_auto_approve -# (CoPilotExecutor process) checks it before firing. -_CANCEL_KEY_PREFIX = "copilot:cancel_auto_approve:" -_CANCEL_KEY_TTL_SECONDS = AUTO_APPROVE_SERVER_SECONDS + 30 - -# In-process dict for best-effort cancel when both the cancel call and -# the asyncio task happen to live in the same process (single-worker). -_pending_auto_approvals: dict[str, asyncio.Task] = {} - - -def needs_build_plan_approval(session: ChatSession) -> bool: - """Return True if the current build must be blocked pending user response. - - Enforces the "STOP — do not proceed until the user responds" gate from - ``agent_generation_guide.md`` at the *code* level. Natural-language - instruction alone is not enough — the LLM has been observed calling - ``decompose_goal`` and ``create_agent`` in the same turn. - - Rule: a ``decompose_goal`` tool call must exist in the session AND at - least one user message must appear after it. The gate does NOT check - *what* the user said — the LLM interprets the intent (build, modify, - or reject). The gate only blocks same-turn builds where the user hasn't - responded at all. - - - No decompose_goal in session → block (must decompose first). - - decompose_goal called but no user response yet → block. - - Any user message after decompose_goal → allow (LLM decides). - """ - # Walk backward to find the latest decompose_goal tool call. - decompose_idx = -1 - for i in range(len(session.messages) - 1, -1, -1): - msg = session.messages[i] - if msg.role == "assistant" and msg.tool_calls: - for tc in msg.tool_calls: - name = (tc.get("function") or {}).get("name") or tc.get("name") - if name == "decompose_goal": - decompose_idx = i - break - if decompose_idx >= 0: - break - - if decompose_idx < 0: - return True - - # Any user message after the decompose_goal call unblocks the gate. - for msg in session.messages[decompose_idx + 1 :]: - if msg.role == "user": - return False - - return True - - -async def _run_auto_approve(session_id: str, user_id: str | None) -> None: - """Wait the server-side timeout and dispatch the approval via - ``run_copilot_turn_via_queue`` — the canonical helper that queues the - message if a turn is already in flight, or starts a new turn if idle. - - Cancelled when the user clicks "Modify" (via ``cancel_auto_approve``). - """ - try: - await asyncio.sleep(AUTO_APPROVE_SERVER_SECONDS) - - # Check the cross-process cancel flag set by cancel_auto_approve(). - redis = await get_redis_async() - if await redis.get(f"{_CANCEL_KEY_PREFIX}{session_id}"): - logger.info( - "decompose_goal auto-approve skipped (cancelled) for session %s", - session_id, - ) - return - - # Skip if a turn is already in flight — the client already sent - # "Approved" and started the build. Only fire when the session is - # idle (client closed the tab). - from backend.copilot.pending_message_helpers import is_turn_in_flight - - if await is_turn_in_flight(session_id): - logger.info( - "decompose_goal auto-approve skipped (turn in flight) for session %s", - session_id, - ) - return - - from backend.copilot.sdk.session_waiter import run_copilot_turn_via_queue - - outcome, result = await run_copilot_turn_via_queue( - session_id=session_id, - user_id=user_id or "", - message=AUTO_APPROVE_MESSAGE, - timeout=0, - tool_call_id="auto_approve", - tool_name="decompose_goal_auto_approve", - ) - logger.info( - "decompose_goal auto-approve fired for session %s (outcome=%s)", - session_id, - outcome, - ) - except asyncio.CancelledError: - raise - except Exception: - logger.exception( - "decompose_goal auto-approve task failed for session %s", - session_id, - ) - - -async def cancel_auto_approve(session_id: str) -> bool: - """Cancel the pending auto-approve task for a session. - - Called by the ``/sessions/{session_id}/cancel-auto-approve`` endpoint - when the user clicks "Modify" in the build-plan UI. - - Uses **two** cancellation channels: - 1. **Redis flag** (cross-process) — the executor checks this before - firing. Works even when the cancel endpoint runs in the AgentServer - process and the asyncio task lives in the CoPilotExecutor process. - 2. **In-process task cancel** (best-effort) — if both happen to share - the same process, cancels the asyncio task directly. - """ - redis = await get_redis_async() - await redis.set( - f"{_CANCEL_KEY_PREFIX}{session_id}", - "1", - ex=_CANCEL_KEY_TTL_SECONDS, - ) - logger.info( - "decompose_goal auto-approve cancel flag set for session %s", session_id - ) - - # Best-effort in-process cancel (no-op if the task is in another process). - task = _pending_auto_approvals.pop(session_id, None) - if task is not None and not task.done(): - task.cancel() - - return True - - -async def _schedule_auto_approve( - session_id: str | None, user_id: str | None, session: ChatSession -) -> None: - """Schedule the fire-and-forget auto-approve task for this session.""" - if not session_id: - return - # Cancel any existing pending approval for this session (e.g. if the - # LLM called decompose_goal twice in one turn). - old_task = _pending_auto_approvals.pop(session_id, None) - if old_task is not None and not old_task.done(): - old_task.cancel() - # Clear any stale Redis cancel flag from a previous Modify click so - # the new auto-approve task isn't incorrectly suppressed. - redis = await get_redis_async() - await redis.delete(f"{_CANCEL_KEY_PREFIX}{session_id}") - task = asyncio.create_task(_run_auto_approve(session_id, user_id)) - _pending_auto_approvals[session_id] = task - # Only remove from dict if this task is still the current one — a - # cancelled old task's callback must not clobber a newly-scheduled one. - task.add_done_callback( - lambda t: ( - _pending_auto_approvals.pop(session_id, None) - if _pending_auto_approvals.get(session_id) is t - else None - ) - ) - class DecomposeGoalTool(BaseTool): """Tool for decomposing an agent goal into sub-instructions.""" @@ -210,10 +29,11 @@ class DecomposeGoalTool(BaseTool): @property def description(self) -> str: return ( - "Break down an agent-building goal into logical sub-instructions. " - "Each step maps to one task (e.g. add a block, wire connections, " - "configure settings). ALWAYS call this before create_agent to show " - "the user your plan and get approval." + "Show the user your build plan as a step-by-step card before " + "constructing the agent. Each step maps to one task (e.g. add a " + "block, wire connections, configure settings). Display-only — " + "the build continues in the same turn without pausing for user " + "input." ) @property @@ -307,14 +127,10 @@ class DecomposeGoalTool(BaseTool): ) ) - await _schedule_auto_approve(session_id, user_id, session) - return TaskDecompositionResponse( message=f"Here's the plan to build your agent ({len(decomposition_steps)} steps):", goal=goal, steps=decomposition_steps, step_count=len(decomposition_steps), - requires_approval=True, - auto_approve_seconds=AUTO_APPROVE_CLIENT_SECONDS, session_id=session_id, ) diff --git a/autogpt_platform/backend/backend/copilot/tools/decompose_goal_test.py b/autogpt_platform/backend/backend/copilot/tools/decompose_goal_test.py index 8cfecbc24c..b8421b4007 100644 --- a/autogpt_platform/backend/backend/copilot/tools/decompose_goal_test.py +++ b/autogpt_platform/backend/backend/copilot/tools/decompose_goal_test.py @@ -1,27 +1,11 @@ """Unit tests for DecomposeGoalTool.""" -import asyncio -from datetime import UTC, datetime -from unittest.mock import AsyncMock, patch - import pytest -from backend.copilot.model import ChatMessage - -from . import decompose_goal as decompose_goal_module from ._test_data import make_session -from .decompose_goal import ( - AUTO_APPROVE_CLIENT_SECONDS, - DEFAULT_ACTION, - DecomposeGoalTool, - cancel_auto_approve, - needs_build_plan_approval, -) +from .decompose_goal import DEFAULT_ACTION, DecomposeGoalTool from .models import ErrorResponse, TaskDecompositionResponse -# Captured before the autouse fixture stubs the real scheduler. -_REAL_SCHEDULE_AUTO_APPROVE = decompose_goal_module._schedule_auto_approve - _USER_ID = "test-user-decompose-goal" _VALID_STEPS = [ @@ -35,20 +19,6 @@ _VALID_STEPS = [ ] -@pytest.fixture(autouse=True) -def _stub_auto_approve_scheduler(): - """The existing happy-path tests don't have a database; stub the - fire-and-forget scheduler so they don't kick off real timers that try to - hit Redis/Postgres. Tests that exercise auto-approve override this with - their own patches inside the test body.""" - - async def _noop(*a, **kw): - pass - - with patch.object(decompose_goal_module, "_schedule_auto_approve", _noop): - yield - - @pytest.fixture() def tool() -> DecomposeGoalTool: return DecomposeGoalTool() @@ -77,7 +47,6 @@ async def test_happy_path(tool: DecomposeGoalTool, session): assert result.goal == "Build a news summarizer agent" assert len(result.steps) == 3 assert result.step_count == 3 - assert result.requires_approval is True assert result.steps[0].step_id == "step_1" assert result.steps[0].description == "Add input block" assert result.steps[1].block_name == "AI Text Generator" @@ -96,20 +65,6 @@ async def test_step_count_matches_steps(tool: DecomposeGoalTool, session): assert result.step_count == len(result.steps) -@pytest.mark.asyncio -async def test_requires_approval_always_true(tool: DecomposeGoalTool, session): - """requires_approval must always be True regardless of kwargs.""" - result = await tool._execute( - user_id=_USER_ID, - session=session, - goal="Build agent", - steps=_VALID_STEPS, - require_approval=False, # should be ignored - ) - assert isinstance(result, TaskDecompositionResponse) - assert result.requires_approval is True - - @pytest.mark.asyncio async def test_invalid_action_defaults_to_add_block(tool: DecomposeGoalTool, session): """Unknown action values are coerced to DEFAULT_ACTION.""" @@ -251,293 +206,3 @@ async def test_step_ids_are_sequential(tool: DecomposeGoalTool, session): assert isinstance(result, TaskDecompositionResponse) for i, step in enumerate(result.steps): assert step.step_id == f"step_{i + 1}" - - -# --------------------------------------------------------------------------- -# auto_approve_seconds field -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_response_includes_auto_approve_seconds(tool: DecomposeGoalTool, session): - """The response carries the countdown so the frontend has a single - source of truth instead of a hard-coded constant.""" - result = await tool._execute( - user_id=_USER_ID, - session=session, - goal="Build agent", - steps=_VALID_STEPS, - ) - assert isinstance(result, TaskDecompositionResponse) - assert result.auto_approve_seconds == AUTO_APPROVE_CLIENT_SECONDS - - -@pytest.mark.asyncio -async def test_response_includes_created_at(tool: DecomposeGoalTool, session): - """created_at must be stamped at execution time so the client can - compute remaining countdown when the user reopens the session.""" - before = datetime.now(UTC) - result = await tool._execute( - user_id=_USER_ID, - session=session, - goal="Build agent", - steps=_VALID_STEPS, - ) - after = datetime.now(UTC) - - assert isinstance(result, TaskDecompositionResponse) - assert isinstance(result.created_at, datetime) - # Stamped during the call. - assert before <= result.created_at <= after - - -# --------------------------------------------------------------------------- -# needs_build_plan_approval — build-tool approval gate -# --------------------------------------------------------------------------- - - -def _decompose_tool_call() -> dict: - return { - "id": "call_1", - "type": "function", - "function": {"name": "decompose_goal", "arguments": "{}"}, - } - - -def test_needs_approval_blocks_when_no_decompose_in_session(): - """LLM tries to build without calling decompose_goal at all.""" - session = make_session(_USER_ID) - session.messages.append(ChatMessage(role="user", content="Build me an agent")) - assert needs_build_plan_approval(session) is True - - -def test_needs_approval_allows_any_user_response(): - """Any user message after decompose_goal unblocks the gate.""" - session = make_session(_USER_ID) - session.messages.append(ChatMessage(role="user", content="Build me an agent")) - session.messages.append( - ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()]) - ) - session.messages.append(ChatMessage(role="tool", content="{plan}")) - session.messages.append(ChatMessage(role="user", content="Sure")) - assert needs_build_plan_approval(session) is False - - -def test_needs_approval_allows_explicit_approval(): - """Explicit 'Approved' also passes (common from button/auto-approve).""" - session = make_session(_USER_ID) - session.messages.append(ChatMessage(role="user", content="Build me an agent")) - session.messages.append( - ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()]) - ) - session.messages.append(ChatMessage(role="tool", content="{plan}")) - session.messages.append( - ChatMessage(role="user", content="Approved. Please build the agent.") - ) - assert needs_build_plan_approval(session) is False - - -def test_needs_approval_allows_modification_request(): - """User asking to modify the plan also passes — LLM decides what to do.""" - session = make_session(_USER_ID) - session.messages.append(ChatMessage(role="user", content="Build me an agent")) - session.messages.append( - ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()]) - ) - session.messages.append(ChatMessage(role="tool", content="{plan}")) - session.messages.append( - ChatMessage(role="user", content="Change step 3 to use Gmail instead") - ) - assert needs_build_plan_approval(session) is False - - -def test_needs_approval_blocks_same_turn_decompose_and_build(): - """LLM calls decompose_goal then immediately tries create_agent in the - same turn — no user message after decompose_goal yet.""" - session = make_session(_USER_ID) - session.messages.append(ChatMessage(role="user", content="Build me an agent")) - session.messages.append( - ChatMessage(role="assistant", content="", tool_calls=[_decompose_tool_call()]) - ) - session.messages.append(ChatMessage(role="tool", content="{plan}")) - assert needs_build_plan_approval(session) is True - - -def test_needs_approval_blocks_without_prior_decompose(): - """No decompose_goal in session → must decompose first.""" - session = make_session(_USER_ID) - session.messages.append(ChatMessage(role="user", content="Build me an agent")) - assert needs_build_plan_approval(session) is True - - -# --------------------------------------------------------------------------- -# Server-side auto-approve task — uses run_copilot_turn_via_queue -# --------------------------------------------------------------------------- - - -class _FakeRedisNoCancelFlag: - """Stub Redis that reports no cancel flag and ignores writes.""" - - async def get(self, key): - return None - - async def set(self, key, value, ex=None): - pass - - async def delete(self, key): - pass - - -def _stub_redis(): - """Patch get_redis_async to return a fake Redis (no real connection).""" - return patch( - "backend.copilot.tools.decompose_goal.get_redis_async", - new=AsyncMock(return_value=_FakeRedisNoCancelFlag()), - ) - - -@pytest.mark.asyncio -async def test_auto_approve_dispatches_via_queue_helper(): - """_run_auto_approve should delegate to run_copilot_turn_via_queue.""" - fake_dispatch = AsyncMock(return_value=("completed", None)) - - with ( - _stub_redis(), - patch( - "backend.copilot.sdk.session_waiter.run_copilot_turn_via_queue", - new=fake_dispatch, - ), - patch( - "backend.copilot.tools.decompose_goal.AUTO_APPROVE_SERVER_SECONDS", - 0, - ), - ): - await decompose_goal_module._run_auto_approve("session-idle", _USER_ID) - - fake_dispatch.assert_awaited_once() - call_kwargs = fake_dispatch.await_args.kwargs - assert call_kwargs["session_id"] == "session-idle" - assert call_kwargs["message"] == "Approved. Please build the agent." - assert call_kwargs["timeout"] == 0 - assert call_kwargs["tool_name"] == "decompose_goal_auto_approve" - - -@pytest.mark.asyncio -async def test_auto_approve_swallows_unexpected_errors(): - """A failure inside the task must never propagate.""" - - async def boom(*args, **kwargs): - raise RuntimeError("kaboom") - - with ( - _stub_redis(), - patch( - "backend.copilot.sdk.session_waiter.run_copilot_turn_via_queue", - new=boom, - ), - patch( - "backend.copilot.tools.decompose_goal.AUTO_APPROVE_SERVER_SECONDS", - 0, - ), - ): - await decompose_goal_module._run_auto_approve("session-error", None) - - -@pytest.mark.asyncio -async def test_schedule_auto_approve_creates_task(monkeypatch): - """_schedule_auto_approve should add a task to the tracking dict.""" - monkeypatch.setattr(decompose_goal_module, "AUTO_APPROVE_SERVER_SECONDS", 0) - fake_run = AsyncMock() - monkeypatch.setattr(decompose_goal_module, "_run_auto_approve", fake_run) - monkeypatch.setattr( - decompose_goal_module, - "get_redis_async", - AsyncMock(return_value=_FakeRedisNoCancelFlag()), - ) - - session = make_session(_USER_ID) - - await _REAL_SCHEDULE_AUTO_APPROVE( - session_id="session-schedule", - user_id=_USER_ID, - session=session, - ) - - await asyncio.sleep(0) - while decompose_goal_module._pending_auto_approvals: - await asyncio.sleep(0) - - fake_run.assert_awaited_once_with("session-schedule", _USER_ID) - - -@pytest.mark.asyncio -async def test_schedule_auto_approve_no_op_without_session_id(): - """Empty session id should be a no-op (defensive).""" - session = make_session(_USER_ID) - await decompose_goal_module._schedule_auto_approve( - session_id=None, user_id=_USER_ID, session=session - ) - assert len(decompose_goal_module._pending_auto_approvals) == 0 - - -# --------------------------------------------------------------------------- -# cancel_auto_approve -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_cancel_auto_approve_sets_redis_flag_and_cancels_task(monkeypatch): - """cancel_auto_approve should set a Redis cancel flag AND cancel the - in-process task. Returns True always (Redis flag is authoritative).""" - monkeypatch.setattr(decompose_goal_module, "AUTO_APPROVE_SERVER_SECONDS", 999) - fake_run = AsyncMock() - monkeypatch.setattr(decompose_goal_module, "_run_auto_approve", fake_run) - - captured_redis_calls: list[tuple] = [] - - class FakeRedis: - async def set(self, key, value, ex=None): - captured_redis_calls.append(("set", key, value, ex)) - - async def get(self, key): - return None - - async def delete(self, key): - pass - - monkeypatch.setattr( - decompose_goal_module, "get_redis_async", AsyncMock(return_value=FakeRedis()) - ) - - session = make_session(_USER_ID) - await _REAL_SCHEDULE_AUTO_APPROVE( - session_id="session-cancel-test", - user_id=_USER_ID, - session=session, - ) - - assert "session-cancel-test" in decompose_goal_module._pending_auto_approvals - result = await cancel_auto_approve("session-cancel-test") - assert result is True - assert "session-cancel-test" not in decompose_goal_module._pending_auto_approvals - assert len(captured_redis_calls) == 1 - assert captured_redis_calls[0][0] == "set" - assert "session-cancel-test" in captured_redis_calls[0][1] - - -@pytest.mark.asyncio -async def test_cancel_auto_approve_returns_true_even_without_in_process_task( - monkeypatch, -): - """Even if no in-process task exists (e.g. task is in another process), - cancel_auto_approve should still set the Redis flag and return True.""" - - class FakeRedis: - async def set(self, key, value, ex=None): - pass - - monkeypatch.setattr( - decompose_goal_module, "get_redis_async", AsyncMock(return_value=FakeRedis()) - ) - result = await cancel_auto_approve("nonexistent-session") - assert result is True diff --git a/autogpt_platform/backend/backend/copilot/tools/models.py b/autogpt_platform/backend/backend/copilot/tools/models.py index 51172288dd..078ef9ab77 100644 --- a/autogpt_platform/backend/backend/copilot/tools/models.py +++ b/autogpt_platform/backend/backend/copilot/tools/models.py @@ -1,6 +1,6 @@ """Pydantic models for tool responses.""" -from datetime import UTC, datetime +from datetime import datetime from enum import Enum from typing import Any, Literal @@ -845,24 +845,6 @@ class TaskDecompositionResponse(ToolResponseBase): step_count: int = Field( default=0, description="Number of steps (auto-derived from steps list)" ) - requires_approval: bool = True - auto_approve_seconds: int = Field( - default=60, - description=( - "Seconds the client should count down before auto-approving. " - "Kept in sync with the server-side fallback timer, which runs a " - "grace period longer to absorb network latency." - ), - ) - created_at: datetime = Field( - default_factory=lambda: datetime.now(UTC), - description=( - "UTC timestamp when the tool returned. The client uses this with " - "auto_approve_seconds to compute the correct remaining countdown " - "when the user reopens the session — so the timer reflects real " - "elapsed time instead of restarting from zero." - ), - ) @model_validator(mode="after") def sync_step_count(self) -> "TaskDecompositionResponse": diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx index 4d769ba9a4..2fe7be8ee5 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/ChatMessagesContainer.tsx @@ -55,8 +55,6 @@ function renderSegments( segments: RenderSegment[], messageID: string, onRetry?: () => void, - isLastMessage?: boolean, - isMessageStreaming?: boolean, ): React.ReactNode[] { return segments.map((seg, segIdx) => { if (seg.kind === "collapsed-group") { @@ -69,8 +67,6 @@ function renderSegments( messageID={messageID} partIndex={seg.index} onRetry={onRetry} - isLastMessage={isLastMessage} - isMessageStreaming={isMessageStreaming} /> ); }); @@ -448,8 +444,6 @@ export function ChatMessagesContainer({ responseSegments, message.id, isLastAssistant ? onRetry : undefined, - isLastAssistant, - isCurrentlyStreaming, ) : message.parts.map((part, i) => ( ))} {isLastInTurn && !isCurrentlyStreaming && ( diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/__tests__/helpers.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/__tests__/helpers.test.ts index 552b83f198..97bd19cc92 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/__tests__/helpers.test.ts +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/__tests__/helpers.test.ts @@ -56,7 +56,6 @@ describe("isInteractiveToolPart", () => { goal: "Build agent", steps: [], step_count: 0, - requires_approval: true, }); expect(isInteractiveToolPart(part)).toBe(true); }); diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/components/MessagePartRenderer.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/components/MessagePartRenderer.tsx index 576c925871..15061d616b 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/components/MessagePartRenderer.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/components/MessagePartRenderer.tsx @@ -95,8 +95,6 @@ interface Props { messageID: string; partIndex: number; onRetry?: () => void; - isLastMessage?: boolean; - isMessageStreaming?: boolean; } export function MessagePartRenderer({ @@ -104,8 +102,6 @@ export function MessagePartRenderer({ messageID, partIndex, onRetry, - isLastMessage, - isMessageStreaming, }: Props) { const key = `${messageID}-${partIndex}`; @@ -186,14 +182,7 @@ export function MessagePartRenderer({ case "tool-schedule_agent": return ; case "tool-decompose_goal": - return ( - - ); + return ; case "tool-create_agent": return ; case "tool-edit_agent": diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/DecomposeGoal/DecomposeGoal.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/DecomposeGoal/DecomposeGoal.tsx index 6f14c8f420..7f89ea7ebc 100644 --- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/DecomposeGoal/DecomposeGoal.tsx +++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/DecomposeGoal/DecomposeGoal.tsx @@ -1,15 +1,6 @@ "use client"; -import { postV2CancelAutoApproveTask } from "@/app/api/__generated__/endpoints/chat/chat"; -import { Button } from "@/components/atoms/Button/Button"; -import { - CheckIcon, - PencilSimpleIcon, - PlusIcon, - TrashIcon, -} from "@phosphor-icons/react"; import type { ToolUIPart } from "ai"; -import { useEffect, useRef, useState } from "react"; import { useCopilotChatActions } from "../../components/CopilotChatActionsProvider/useCopilotChatActions"; import { MorphingTextAnimation } from "../../components/MorphingTextAnimation/MorphingTextAnimation"; import { @@ -21,40 +12,18 @@ import { ToolErrorCard } from "../../components/ToolErrorCard/ToolErrorCard"; import { StepItem } from "./components/StepItem"; import { AccordionIcon, - computeRemainingSeconds, getAnimationText, getDecomposeGoalOutput, isDecompositionOutput, isErrorOutput, - FALLBACK_COUNTDOWN_SECONDS, ToolIcon, } from "./helpers"; -const RADIUS = 15; -const CIRCUMFERENCE = 2 * Math.PI * RADIUS; - -interface EditableStep { - step_id: string; - description: string; - action: string; - block_name?: string | null; - status: string; -} - interface Props { part: ToolUIPart; - isLastMessage?: boolean; - // True while the parent assistant message is still streaming. We disable - // Approve/Modify in this window because the chat session is locked to - // the in-flight turn — sending a new user message would fail. - isMessageStreaming?: boolean; } -export function DecomposeGoalTool({ - part, - isLastMessage, - isMessageStreaming, -}: Props) { +export function DecomposeGoalTool({ part }: Props) { const text = getAnimationText(part); const { onSend } = useCopilotChatActions(); @@ -66,165 +35,6 @@ export function DecomposeGoalTool({ part.state === "output-error" || (!!output && isErrorOutput(output)); const isPending = !output && !isError; - const showActions = - !!isLastMessage && - !!output && - isDecompositionOutput(output) && - output.requires_approval; - - // The Approve/Modify buttons are visible (so the user knows what's - // coming) but click-disabled while the assistant is still streaming - // its summary text after the tool call. The countdown ring keeps - // ticking so it stays in sync with the server-side timer. - const actionsEnabled = showActions && !isMessageStreaming; - - // Authoritative countdown comes from the backend tool response so the - // server-side fallback timer and the client are guaranteed to agree. - const countdownSeconds = - (output && isDecompositionOutput(output) && output.auto_approve_seconds) || - FALLBACK_COUNTDOWN_SECONDS; - - // Lazy initializer: runs once on mount and seeds remaining time from the - // backend ``created_at`` so reopening a session resumes the countdown - // instead of restarting it. - const [secondsLeft, setSecondsLeft] = useState(() => - computeRemainingSeconds(output, FALLBACK_COUNTDOWN_SECONDS), - ); - // timerActive becomes false when the user clicks Modify — stops countdown and auto-approve. - const [timerActive, setTimerActive] = useState(true); - const [isEditing, setIsEditing] = useState(false); - const [editableSteps, setEditableSteps] = useState([]); - - // True when secondsLeft started at 0 because the server-stamped deadline - // had already elapsed before this mount. Prevents the auto-approve effect - // from firing on remount with stale cache — the server handles that case. - // Without this, re-entering after the deadline sends a duplicate "Approved". - const wasInitiallyPastDeadlineRef = useRef( - secondsLeft === 0 && - !!output && - isDecompositionOutput(output) && - !!output.created_at, - ); - - const approvedRef = useRef(false); - const onSendRef = useRef(onSend); - const isEditingRef = useRef(isEditing); - const editableStepsRef = useRef(editableSteps); - onSendRef.current = onSend; - isEditingRef.current = isEditing; - editableStepsRef.current = editableSteps; - - function buildMessage() { - if (isEditingRef.current && editableStepsRef.current.length > 0) { - const filledSteps = editableStepsRef.current.filter((s) => - s.description.trim(), - ); - if (filledSteps.length > 0) { - const list = filledSteps - .map((s, i) => `${i + 1}. ${s.description}`) - .join("; "); - return `Approved with modifications. Please build the agent following these steps: ${list}`; - } - } - return "Approved. Please build the agent."; - } - - function approve() { - if (approvedRef.current) return; - approvedRef.current = true; - setIsEditing(false); - onSendRef.current(buildMessage()); - } - - function handleModify() { - if (approvedRef.current) return; - if (!output || !isDecompositionOutput(output)) return; - setTimerActive(false); - setIsEditing(true); - setEditableSteps( - output.steps.map((s) => ({ ...s, status: s.status ?? "pending" })), - ); - - // Cancel the server-side auto-approve timer so it doesn't fire the - // default "Approved" message while the user is editing steps. - if (output.session_id) { - postV2CancelAutoApproveTask(output.session_id).catch(() => { - // Best-effort — if the timer already fired or the request fails, - // the predicate check will still prevent a duplicate. - }); - } - } - - function handleStepChange(index: number, description: string) { - setEditableSteps((prev) => - prev.map((s, i) => (i === index ? { ...s, description } : s)), - ); - } - - function handleStepDelete(index: number) { - setEditableSteps((prev) => prev.filter((_, i) => i !== index)); - } - - // Insert a blank step after the given index (-1 = prepend). - function handleStepInsert(afterIndex: number) { - setEditableSteps((prev) => { - const next = [...prev]; - next.splice(afterIndex + 1, 0, { - step_id: `step_new_${crypto.randomUUID()}`, - description: "", - action: "add_block", - status: "pending", - }); - return next; - }); - } - - // If a new message arrives while editing, exit edit mode so the user is not stuck. - useEffect(() => { - if (!showActions && isEditing) { - setIsEditing(false); - } - }, [showActions, isEditing]); - - // The timer only ticks once the turn is fully finished (actionsEnabled - // includes !isMessageStreaming). This gives the user the full countdown - // duration to review the plan after all streaming completes — not from - // when the tool returned (which would eat streaming time into the review - // window). For session re-entry, the lazy initializer already seeds - // secondsLeft from created_at, so the timer resumes correctly. - useEffect(() => { - if (!actionsEnabled || !timerActive) return; - const interval = setInterval(() => { - setSecondsLeft((s) => Math.max(0, s - 1)); - }, 1000); - return () => clearInterval(interval); - }, [actionsEnabled, timerActive, part.toolCallId]); - - // Auto-approve when countdown reaches 0. The client fires at 60s; the - // server fires 5s later as a fallback for the "user closed the tab" case. - // When the client IS present, its approve() creates the SSE subscription - // so the user sees the build in real-time. The server's predicate then - // sees the user's message and skips — no duplicate. - // approve() is stable via approvedRef — safe to omit from deps. - useEffect(() => { - if ( - secondsLeft === 0 && - timerActive && - actionsEnabled && - !wasInitiallyPastDeadlineRef.current - ) { - approve(); - } - }, [secondsLeft, timerActive, actionsEnabled]); - - const progress = secondsLeft / countdownSeconds; - const dashOffset = CIRCUMFERENCE * (1 - progress); - const stepCount = isEditing - ? editableSteps.length - : output && isDecompositionOutput(output) - ? output.step_count - : 0; - return (
{isPending && ( @@ -255,7 +65,7 @@ export function DecomposeGoalTool({ {output && isDecompositionOutput(output) && ( } - title={`Build Plan — ${stepCount} steps`} + title={`Build Plan — ${output.step_count} steps`} description={output.goal} defaultExpanded > @@ -263,164 +73,21 @@ export function DecomposeGoalTool({ {output.message}
- {isEditing ? ( -
- {/* Insert before the first step */} - handleStepInsert(-1)} /> - - {editableSteps.map((step, i) => ( -
-
- - {i + 1}. - -