diff --git a/autogpt_platform/backend/backend/api/features/chat/routes.py b/autogpt_platform/backend/backend/api/features/chat/routes.py
index cbde6a40fe..ae505acbfe 100644
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -50,6 +50,7 @@ from backend.copilot.tools.models import (
     AgentPreviewResponse,
     AgentSavedResponse,
     AgentsFoundResponse,
+    BackgroundToolStatus,
     BlockDetailsResponse,
     BlockListResponse,
     BlockOutputResponse,
@@ -1323,6 +1324,7 @@ ToolResponseUnion = (
     | MemorySearchResponse
     | MemoryForgetCandidatesResponse
     | MemoryForgetConfirmResponse
+    | BackgroundToolStatus
 )
 
 
diff --git a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
index 88822f37ef..7ad27737b7 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
@@ -276,24 +276,34 @@ async def _execute_tool_sync(
     )
 
     timeout = base_tool.timeout_seconds
-    if timeout is None:
-        result = await task
-    else:
-        # asyncio.wait (unlike wait_for) does NOT cancel on timeout — the
-        # task keeps running in the background.
-        await asyncio.wait({task}, timeout=timeout)
+    try:
+        if timeout is None:
+            result = await task
+        else:
+            # asyncio.wait (unlike wait_for) does NOT cancel on timeout — the
+            # task keeps running in the background.
+            await asyncio.wait({task}, timeout=timeout)
+            if not task.done():
+                bg_id = _register_background_task(task, base_tool.name)
+                logger.warning(
+                    "Tool %s exceeded %ss budget — parked as "
+                    "background_id=%s (args=%s)",
+                    base_tool.name,
+                    timeout,
+                    bg_id,
+                    _redact_args_for_log(args),
+                )
+                return _tool_background_result(base_tool.name, timeout, bg_id)
+            # Completed within budget — .result() re-raises any exception.
+            result = task.result()
+    except asyncio.CancelledError:
+        # The handler itself was cancelled (e.g. stream teardown) mid-wait.
+        # Cancel the child so it doesn't keep running untracked — the
+        # registry hasn't seen it yet, so cancel_all_background_tasks
+        # couldn't clean it up.
         if not task.done():
-            bg_id = _register_background_task(task, base_tool.name)
-            logger.warning(
-                "Tool %s exceeded %ss budget — parked as background_id=%s " "(args=%s)",
-                base_tool.name,
-                timeout,
-                bg_id,
-                _redact_args_for_log(args),
-            )
-            return _tool_background_result(base_tool.name, timeout, bg_id)
-        # Completed within budget — .result() re-raises any exception.
-        result = task.result()
+            task.cancel()
+        raise
 
     text = (
         result.output if isinstance(result.output, str) else json.dumps(result.output)
diff --git a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
index 92a396d3ad..99a91bc54f 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter_test.py
@@ -466,6 +466,38 @@ class TestToolTimeout:
         assert result["isError"] is False
         assert "completed" in result["content"][0]["text"]
 
+    @pytest.mark.asyncio
+    async def test_handler_cancellation_cancels_child_task(self):
+        """If the handler itself is cancelled before the tool completes,
+        the child task is cancelled too (no leak into the background
+        registry, since it wasn't parked yet)."""
+        import contextlib
+
+        mock_tool = _make_mock_tool("slow_tool", timeout_seconds=60)
+        child_cancelled = asyncio.Event()
+
+        async def hang_until_cancelled(*_args, **_kwargs):
+            try:
+                await asyncio.sleep(60)
+            except asyncio.CancelledError:
+                child_cancelled.set()
+                raise
+
+        mock_tool.execute = AsyncMock(side_effect=hang_until_cancelled)
+
+        from backend.copilot.sdk.tool_adapter import _execute_tool_sync
+
+        outer_task = asyncio.create_task(
+            _execute_tool_sync(mock_tool, "u", _make_mock_session(), {})
+        )
+        # Let the handler start waiting on the child.
+        await asyncio.sleep(0.05)
+        outer_task.cancel()
+        with contextlib.suppress(asyncio.CancelledError):
+            await outer_task
+        await asyncio.sleep(0)
+        assert child_cancelled.is_set()
+
     @pytest.mark.asyncio
     async def test_fast_tool_within_timeout_succeeds(self):
         """Tools that complete well under the timeout are unaffected."""
diff --git a/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py b/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py
index 80c220d54f..fc22584394 100644
--- a/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py
+++ b/autogpt_platform/backend/backend/copilot/tools/check_background_tool.py
@@ -8,9 +8,7 @@ stays in control rather than the handler making an irreversible choice.
 
 import asyncio
 import logging
-from typing import Any, Literal
-
-from pydantic import Field
+from typing import Any
 
 from backend.copilot.model import ChatSession
 from backend.copilot.sdk.background_registry import (
@@ -22,26 +20,11 @@ from backend.copilot.sdk.background_registry import (
 )
 
 from .base import BaseTool
-from .models import ErrorResponse, ResponseType, ToolResponseBase
+from .models import BackgroundToolStatus, ErrorResponse, ToolResponseBase
 
 logger = logging.getLogger(__name__)
 
 
-class BackgroundToolStatus(ToolResponseBase):
-    """Status of a backgrounded tool call."""
-
-    type: ResponseType = ResponseType.MCP_TOOL_OUTPUT
-    status: Literal["completed", "still_running", "cancelled", "error"] = Field(
-        description="Current state of the background task."
-    )
-    tool: str = Field(description="The name of the originally-backgrounded tool.")
-    background_id: str
-    output: Any | None = Field(
-        default=None, description="Tool output when status=completed."
-    )
-    waited_seconds: int | None = Field(default=None)
-
-
 class CheckBackgroundToolTool(BaseTool):
     """Inspect, wait on, or cancel a backgrounded tool call."""
 
@@ -207,6 +190,17 @@ def _status_from_finished_task(
         )
 
     result = task.result()
+    # A tool can complete with success=False without raising — preserve
+    # that as status="error" so the agent doesn't treat it as a win.
+    if not result.success:
+        return BackgroundToolStatus(
+            message=f"'{tool_name}' completed with an error.",
+            session_id=session.session_id,
+            status="error",
+            tool=tool_name,
+            background_id=background_id,
+            output=result.output,
+        )
     return BackgroundToolStatus(
         message=f"'{tool_name}' completed.",
         session_id=session.session_id,
diff --git a/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py b/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py
index 4bc1c2dd83..2defaca6b3 100644
--- a/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/check_background_tool_test.py
@@ -1,6 +1,7 @@
 """Tests for CheckBackgroundToolTool."""
 
 import asyncio
+import contextlib
 from unittest.mock import MagicMock
 
 import pytest
@@ -11,7 +12,8 @@ from backend.copilot.sdk.background_registry import (
     register_background_task,
 )
 
-from .check_background_tool import BackgroundToolStatus, CheckBackgroundToolTool
+from .check_background_tool import CheckBackgroundToolTool
+from .models import BackgroundToolStatus
 
 
 def _make_session() -> MagicMock:
@@ -78,10 +80,8 @@ class TestCheckBackgroundTool:
         assert response.background_id == bg_id
 
         task.cancel()
-        try:
+        with contextlib.suppress(asyncio.CancelledError):
             await task
-        except (asyncio.CancelledError, BaseException):
-            pass
 
     @pytest.mark.asyncio
     async def test_wait_returns_completed_when_task_finishes(self):
@@ -124,15 +124,11 @@ class TestCheckBackgroundTool:
         assert response.waited_seconds == 1
 
         task.cancel()
-        try:
+        with contextlib.suppress(asyncio.CancelledError):
             await task
-        except (asyncio.CancelledError, BaseException):
-            pass
 
     @pytest.mark.asyncio
     async def test_cancel_true_cancels_and_removes_from_registry(self):
-        import contextlib
-
         observed_cancel = asyncio.Event()
 
         async def stays_until_cancelled():
@@ -189,3 +185,31 @@ class TestCheckBackgroundTool:
         assert isinstance(response, BackgroundToolStatus)
         assert response.status == "error"
         assert "boom" in response.message
+
+    @pytest.mark.asyncio
+    async def test_finished_task_with_success_false_reports_error(self):
+        """A tool that completes with success=False (without raising) is
+        reported as status='error', not 'completed', so the agent doesn't
+        treat it as a win."""
+
+        async def finish_with_failure():
+            return StreamToolOutputAvailable(
+                toolCallId="tc-1",
+                output="partial",
+                toolName="broken_tool",
+                success=False,
+            )
+
+        task = asyncio.create_task(finish_with_failure())
+        await task
+        bg_id = register_background_task(task, "broken_tool")
+
+        tool = CheckBackgroundToolTool()
+        response = await tool._execute(
+            user_id="u",
+            session=_make_session(),
+            background_id=bg_id,
+        )
+        assert isinstance(response, BackgroundToolStatus)
+        assert response.status == "error"
+        assert response.output == "partial"
diff --git a/autogpt_platform/backend/backend/copilot/tools/models.py b/autogpt_platform/backend/backend/copilot/tools/models.py
index 90aa3d51db..d3d4b2e652 100644
--- a/autogpt_platform/backend/backend/copilot/tools/models.py
+++ b/autogpt_platform/backend/backend/copilot/tools/models.py
@@ -259,6 +259,22 @@ class ErrorResponse(ToolResponseBase):
     details: dict[str, Any] | None = None
 
 
+class BackgroundToolStatus(ToolResponseBase):
+    """Status of a backgrounded tool call, returned by ``check_background_tool``."""
+
+    type: ResponseType = ResponseType.MCP_TOOL_OUTPUT
+    status: Literal["completed", "still_running", "cancelled", "error"] = Field(
+        description="Current state of the background task."
+    )
+    tool: str = Field(description="The name of the originally-backgrounded tool.")
+    background_id: str
+    output: Any | None = Field(
+        default=None,
+        description="Tool output when status=completed or status=error.",
+    )
+    waited_seconds: int | None = Field(default=None)
+
+
 class InputValidationErrorResponse(ToolResponseBase):
     """Response when run_agent receives unknown input fields."""