diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py index cbde6a40fe..ae505acbfe 100644 --- a/autogpt_platform/backend/backend/api/features/chat/routes.py +++ b/autogpt_platform/backend/backend/api/features/chat/routes.py @@ -50,6 +50,7 @@ from backend.copilot.tools.models import ( AgentPreviewResponse, AgentSavedResponse, AgentsFoundResponse, + BackgroundToolStatus, BlockDetailsResponse, BlockListResponse, BlockOutputResponse, @@ -1323,6 +1324,7 @@ ToolResponseUnion = ( | MemorySearchResponse | MemoryForgetCandidatesResponse | MemoryForgetConfirmResponse + | BackgroundToolStatus ) diff --git a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py index 88822f37ef..7ad27737b7 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py +++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py @@ -276,24 +276,34 @@ async def _execute_tool_sync( ) timeout = base_tool.timeout_seconds - if timeout is None: - result = await task - else: - # asyncio.wait (unlike wait_for) does NOT cancel on timeout — the - # task keeps running in the background. - await asyncio.wait({task}, timeout=timeout) + try: + if timeout is None: + result = await task + else: + # asyncio.wait (unlike wait_for) does NOT cancel on timeout — the + # task keeps running in the background. + await asyncio.wait({task}, timeout=timeout) + if not task.done(): + bg_id = _register_background_task(task, base_tool.name) + logger.warning( + "Tool %s exceeded %ss budget — parked as " + "background_id=%s (args=%s)", + base_tool.name, + timeout, + bg_id, + _redact_args_for_log(args), + ) + return _tool_background_result(base_tool.name, timeout, bg_id) + # Completed within budget — .result() re-raises any exception. + result = task.result() + except asyncio.CancelledError: + # The handler itself was cancelled (e.g. stream teardown) mid-wait. + # Cancel the child so it doesn't keep running untracked — the + # registry hasn't seen it yet, so cancel_all_background_tasks + # couldn't clean it up. if not task.done(): - bg_id = _register_background_task(task, base_tool.name) - logger.warning( - "Tool %s exceeded %ss budget — parked as background_id=%s " "(args=%s)", - base_tool.name, - timeout, - bg_id, - _redact_args_for_log(args), - ) - return _tool_background_result(base_tool.name, timeout, bg_id) - # Completed within budget — .result() re-raises any exception. - result = task.result() + task.cancel() + raise text = ( result.output if isinstance(result.output, str) else json.dumps(result.output) diff --git a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py index 92a396d3ad..99a91bc54f 100644 --- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py +++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py @@ -466,6 +466,38 @@ class TestToolTimeout: assert result["isError"] is False assert "completed" in result["content"][0]["text"] + @pytest.mark.asyncio + async def test_handler_cancellation_cancels_child_task(self): + """If the handler itself is cancelled before the tool completes, + the child task is cancelled too (no leak into the background + registry, since it wasn't parked yet).""" + import contextlib + + mock_tool = _make_mock_tool("slow_tool", timeout_seconds=60) + child_cancelled = asyncio.Event() + + async def hang_until_cancelled(*_args, **_kwargs): + try: + await asyncio.sleep(60) + except asyncio.CancelledError: + child_cancelled.set() + raise + + mock_tool.execute = AsyncMock(side_effect=hang_until_cancelled) + + from backend.copilot.sdk.tool_adapter import _execute_tool_sync + + outer_task = asyncio.create_task( + _execute_tool_sync(mock_tool, "u", _make_mock_session(), {}) + ) + # Let the handler start waiting on the child. + await asyncio.sleep(0.05) + outer_task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await outer_task + await asyncio.sleep(0) + assert child_cancelled.is_set() + @pytest.mark.asyncio async def test_fast_tool_within_timeout_succeeds(self): """Tools that complete well under the timeout are unaffected.""" diff --git a/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py b/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py index 80c220d54f..fc22584394 100644 --- a/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py +++ b/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py @@ -8,9 +8,7 @@ stays in control rather than the handler making an irreversible choice. import asyncio import logging -from typing import Any, Literal - -from pydantic import Field +from typing import Any from backend.copilot.model import ChatSession from backend.copilot.sdk.background_registry import ( @@ -22,26 +20,11 @@ from backend.copilot.sdk.background_registry import ( ) from .base import BaseTool -from .models import ErrorResponse, ResponseType, ToolResponseBase +from .models import BackgroundToolStatus, ErrorResponse, ToolResponseBase logger = logging.getLogger(__name__) -class BackgroundToolStatus(ToolResponseBase): - """Status of a backgrounded tool call.""" - - type: ResponseType = ResponseType.MCP_TOOL_OUTPUT - status: Literal["completed", "still_running", "cancelled", "error"] = Field( - description="Current state of the background task." - ) - tool: str = Field(description="The name of the originally-backgrounded tool.") - background_id: str - output: Any | None = Field( - default=None, description="Tool output when status=completed." - ) - waited_seconds: int | None = Field(default=None) - - class CheckBackgroundToolTool(BaseTool): """Inspect, wait on, or cancel a backgrounded tool call.""" @@ -207,6 +190,17 @@ def _status_from_finished_task( ) result = task.result() + # A tool can complete with success=False without raising — preserve + # that as status="error" so the agent doesn't treat it as a win. + if not result.success: + return BackgroundToolStatus( + message=f"'{tool_name}' completed with an error.", + session_id=session.session_id, + status="error", + tool=tool_name, + background_id=background_id, + output=result.output, + ) return BackgroundToolStatus( message=f"'{tool_name}' completed.", session_id=session.session_id, diff --git a/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py b/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py index 4bc1c2dd83..2defaca6b3 100644 --- a/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py +++ b/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py @@ -1,6 +1,7 @@ """Tests for CheckBackgroundToolTool.""" import asyncio +import contextlib from unittest.mock import MagicMock import pytest @@ -11,7 +12,8 @@ from backend.copilot.sdk.background_registry import ( register_background_task, ) -from .check_background_tool import BackgroundToolStatus, CheckBackgroundToolTool +from .check_background_tool import CheckBackgroundToolTool +from .models import BackgroundToolStatus def _make_session() -> MagicMock: @@ -78,10 +80,8 @@ class TestCheckBackgroundTool: assert response.background_id == bg_id task.cancel() - try: + with contextlib.suppress(asyncio.CancelledError): await task - except (asyncio.CancelledError, BaseException): - pass @pytest.mark.asyncio async def test_wait_returns_completed_when_task_finishes(self): @@ -124,15 +124,11 @@ class TestCheckBackgroundTool: assert response.waited_seconds == 1 task.cancel() - try: + with contextlib.suppress(asyncio.CancelledError): await task - except (asyncio.CancelledError, BaseException): - pass @pytest.mark.asyncio async def test_cancel_true_cancels_and_removes_from_registry(self): - import contextlib - observed_cancel = asyncio.Event() async def stays_until_cancelled(): @@ -189,3 +185,31 @@ class TestCheckBackgroundTool: assert isinstance(response, BackgroundToolStatus) assert response.status == "error" assert "boom" in response.message + + @pytest.mark.asyncio + async def test_finished_task_with_success_false_reports_error(self): + """A tool that completes with success=False (without raising) is + reported as status='error', not 'completed', so the agent doesn't + treat it as a win.""" + + async def finish_with_failure(): + return StreamToolOutputAvailable( + toolCallId="tc-1", + output="partial", + toolName="broken_tool", + success=False, + ) + + task = asyncio.create_task(finish_with_failure()) + await task + bg_id = register_background_task(task, "broken_tool") + + tool = CheckBackgroundToolTool() + response = await tool._execute( + user_id="u", + session=_make_session(), + background_id=bg_id, + ) + assert isinstance(response, BackgroundToolStatus) + assert response.status == "error" + assert response.output == "partial" diff --git a/autogpt_platform/backend/backend/copilot/tools/models.py b/autogpt_platform/backend/backend/copilot/tools/models.py index 90aa3d51db..d3d4b2e652 100644 --- a/autogpt_platform/backend/backend/copilot/tools/models.py +++ b/autogpt_platform/backend/backend/copilot/tools/models.py @@ -259,6 +259,22 @@ class ErrorResponse(ToolResponseBase): details: dict[str, Any] | None = None +class BackgroundToolStatus(ToolResponseBase): + """Status of a backgrounded tool call, returned by ``check_background_tool``.""" + + type: ResponseType = ResponseType.MCP_TOOL_OUTPUT + status: Literal["completed", "still_running", "cancelled", "error"] = Field( + description="Current state of the background task." + ) + tool: str = Field(description="The name of the originally-backgrounded tool.") + background_id: str + output: Any | None = Field( + default=None, + description="Tool output when status=completed or status=error.", + ) + waited_seconds: int | None = Field(default=None) + + class InputValidationErrorResponse(ToolResponseBase): """Response when run_agent receives unknown input fields."""