revert backend changes, keep UI-only fixes

- Reverted all backend changes under autogpt_platform/backend/ to match dev - Restored deleted files: completion_consumer.py, completion_handler.py, stream_registry.py, check_operation_status.py - Fixed garbled import path in LongRunningToolDisplay.tsx - Kept all frontend/UI changes as intended
2026-04-08 03:00:28 -04:00 · 2026-02-22 09:25:30 +00:00
parent 8ef89ac937
commit 5a3e33745e
11 changed files with 1908 additions and 7 deletions
--- a/autogpt_platform/backend/backend/copilot/completion_consumer.py
+++ b/autogpt_platform/backend/backend/copilot/completion_consumer.py
@@ -0,0 +1,349 @@
+"""Redis Streams consumer for operation completion messages.
+
+This module provides a consumer (ChatCompletionConsumer) that listens for
+completion notifications (OperationCompleteMessage) from external services
+(like Agent Generator) and triggers the appropriate stream registry and
+chat service updates via process_operation_success/process_operation_failure.
+
+Why Redis Streams instead of RabbitMQ?
+--------------------------------------
+While the project typically uses RabbitMQ for async task queues (e.g., execution
+queue), Redis Streams was chosen for chat completion notifications because:
+
+1. **Unified Infrastructure**: The SSE reconnection feature already uses Redis
+   Streams (via stream_registry) for message persistence and replay. Using Redis
+   Streams for completion notifications keeps all chat streaming infrastructure
+   in one system, simplifying operations and reducing cross-system coordination.
+
+2. **Message Replay**: Redis Streams support XREAD with arbitrary message IDs,
+   allowing consumers to replay missed messages after reconnection. This aligns
+   with the SSE reconnection pattern where clients can resume from last_message_id.
+
+3. **Consumer Groups with XAUTOCLAIM**: Redis consumer groups provide automatic
+   load balancing across pods with explicit message claiming (XAUTOCLAIM) for
+   recovering from dead consumers - ideal for the completion callback pattern.
+
+4. **Lower Latency**: For real-time SSE updates, Redis (already in-memory for
+   stream_registry) provides lower latency than an additional RabbitMQ hop.
+
+5. **Atomicity with Task State**: Completion processing often needs to update
+   task metadata stored in Redis. Keeping both in Redis enables simpler
+   transactional semantics without distributed coordination.
+
+The consumer uses Redis Streams with consumer groups for reliable message
+processing across multiple platform pods, with XAUTOCLAIM for reclaiming
+stale pending messages from dead consumers.
+"""
+
+import asyncio
+import logging
+import uuid
+from typing import Any
+
+import orjson
+from pydantic import BaseModel
+from redis.exceptions import ResponseError
+
+from backend.data.redis_client import get_redis_async
+
+from . import stream_registry
+from .completion_handler import process_operation_failure, process_operation_success
+from .config import ChatConfig
+
+logger = logging.getLogger(__name__)
+config = ChatConfig()
+
+
+class OperationCompleteMessage(BaseModel):
+    """Message format for operation completion notifications."""
+
+    operation_id: str
+    task_id: str
+    success: bool
+    result: dict | str | None = None
+    error: str | None = None
+
+
+class ChatCompletionConsumer:
+    """Consumer for chat operation completion messages from Redis Streams.
+
+    Database operations are handled through the chat_db() accessor, which
+    routes through DatabaseManager RPC when Prisma is not directly connected.
+
+    Uses Redis consumer groups to allow multiple platform pods to consume
+    messages reliably with automatic redelivery on failure.
+    """
+
+    def __init__(self):
+        self._consumer_task: asyncio.Task | None = None
+        self._running = False
+        self._consumer_name = f"consumer-{uuid.uuid4().hex[:8]}"
+
+    async def start(self) -> None:
+        """Start the completion consumer."""
+        if self._running:
+            logger.warning("Completion consumer already running")
+            return
+
+        # Create consumer group if it doesn't exist
+        try:
+            redis = await get_redis_async()
+            await redis.xgroup_create(
+                config.stream_completion_name,
+                config.stream_consumer_group,
+                id="0",
+                mkstream=True,
+            )
+            logger.info(
+                f"Created consumer group '{config.stream_consumer_group}' "
+                f"on stream '{config.stream_completion_name}'"
+            )
+        except ResponseError as e:
+            if "BUSYGROUP" in str(e):
+                logger.debug(
+                    f"Consumer group '{config.stream_consumer_group}' already exists"
+                )
+            else:
+                raise
+
+        self._running = True
+        self._consumer_task = asyncio.create_task(self._consume_messages())
+        logger.info(
+            f"Chat completion consumer started (consumer: {self._consumer_name})"
+        )
+
+    async def stop(self) -> None:
+        """Stop the completion consumer."""
+        self._running = False
+
+        if self._consumer_task:
+            self._consumer_task.cancel()
+            try:
+                await self._consumer_task
+            except asyncio.CancelledError:
+                pass
+            self._consumer_task = None
+
+        logger.info("Chat completion consumer stopped")
+
+    async def _consume_messages(self) -> None:
+        """Main message consumption loop with retry logic."""
+        max_retries = 10
+        retry_delay = 5  # seconds
+        retry_count = 0
+        block_timeout = 5000  # milliseconds
+
+        while self._running and retry_count < max_retries:
+            try:
+                redis = await get_redis_async()
+
+                # Reset retry count on successful connection
+                retry_count = 0
+
+                while self._running:
+                    # First, claim any stale pending messages from dead consumers
+                    # Redis does NOT auto-redeliver pending messages; we must explicitly
+                    # claim them using XAUTOCLAIM
+                    try:
+                        claimed_result = await redis.xautoclaim(
+                            name=config.stream_completion_name,
+                            groupname=config.stream_consumer_group,
+                            consumername=self._consumer_name,
+                            min_idle_time=config.stream_claim_min_idle_ms,
+                            start_id="0-0",
+                            count=10,
+                        )
+                        # xautoclaim returns: (next_start_id, [(id, data), ...], [deleted_ids])
+                        if claimed_result and len(claimed_result) >= 2:
+                            claimed_entries = claimed_result[1]
+                            if claimed_entries:
+                                logger.info(
+                                    f"Claimed {len(claimed_entries)} stale pending messages"
+                                )
+                                for entry_id, data in claimed_entries:
+                                    if not self._running:
+                                        return
+                                    await self._process_entry(redis, entry_id, data)
+                    except Exception as e:
+                        logger.warning(f"XAUTOCLAIM failed (non-fatal): {e}")
+
+                    # Read new messages from the stream
+                    messages = await redis.xreadgroup(
+                        groupname=config.stream_consumer_group,
+                        consumername=self._consumer_name,
+                        streams={config.stream_completion_name: ">"},
+                        block=block_timeout,
+                        count=10,
+                    )
+
+                    if not messages:
+                        continue
+
+                    for stream_name, entries in messages:
+                        for entry_id, data in entries:
+                            if not self._running:
+                                return
+                            await self._process_entry(redis, entry_id, data)
+
+            except asyncio.CancelledError:
+                logger.info("Consumer cancelled")
+                return
+            except Exception as e:
+                retry_count += 1
+                logger.error(
+                    f"Consumer error (retry {retry_count}/{max_retries}): {e}",
+                    exc_info=True,
+                )
+                if self._running and retry_count < max_retries:
+                    await asyncio.sleep(retry_delay)
+                else:
+                    logger.error("Max retries reached, stopping consumer")
+                    return
+
+    async def _process_entry(
+        self, redis: Any, entry_id: str, data: dict[str, Any]
+    ) -> None:
+        """Process a single stream entry and acknowledge it on success.
+
+        Args:
+            redis: Redis client connection
+            entry_id: The stream entry ID
+            data: The entry data dict
+        """
+        try:
+            # Handle the message
+            message_data = data.get("data")
+            if message_data:
+                await self._handle_message(
+                    message_data.encode()
+                    if isinstance(message_data, str)
+                    else message_data
+                )
+
+            # Acknowledge the message after successful processing
+            await redis.xack(
+                config.stream_completion_name,
+                config.stream_consumer_group,
+                entry_id,
+            )
+        except Exception as e:
+            logger.error(
+                f"Error processing completion message {entry_id}: {e}",
+                exc_info=True,
+            )
+            # Message remains in pending state and will be claimed by
+            # XAUTOCLAIM after min_idle_time expires
+
+    async def _handle_message(self, body: bytes) -> None:
+        """Handle a completion message."""
+        try:
+            data = orjson.loads(body)
+            message = OperationCompleteMessage(**data)
+        except Exception as e:
+            logger.error(f"Failed to parse completion message: {e}")
+            return
+
+        logger.info(
+            f"[COMPLETION] Received completion for operation {message.operation_id} "
+            f"(task_id={message.task_id}, success={message.success})"
+        )
+
+        # Find task in registry
+        task = await stream_registry.find_task_by_operation_id(message.operation_id)
+        if task is None:
+            task = await stream_registry.get_task(message.task_id)
+
+        if task is None:
+            logger.warning(
+                f"[COMPLETION] Task not found for operation {message.operation_id} "
+                f"(task_id={message.task_id})"
+            )
+            return
+
+        logger.info(
+            f"[COMPLETION] Found task: task_id={task.task_id}, "
+            f"session_id={task.session_id}, tool_call_id={task.tool_call_id}"
+        )
+
+        # Guard against empty task fields
+        if not task.task_id or not task.session_id or not task.tool_call_id:
+            logger.error(
+                f"[COMPLETION] Task has empty critical fields! "
+                f"task_id={task.task_id!r}, session_id={task.session_id!r}, "
+                f"tool_call_id={task.tool_call_id!r}"
+            )
+            return
+
+        if message.success:
+            await self._handle_success(task, message)
+        else:
+            await self._handle_failure(task, message)
+
+    async def _handle_success(
+        self,
+        task: stream_registry.ActiveTask,
+        message: OperationCompleteMessage,
+    ) -> None:
+        """Handle successful operation completion."""
+        await process_operation_success(task, message.result)
+
+    async def _handle_failure(
+        self,
+        task: stream_registry.ActiveTask,
+        message: OperationCompleteMessage,
+    ) -> None:
+        """Handle failed operation completion."""
+        await process_operation_failure(task, message.error)
+
+
+# Module-level consumer instance
+_consumer: ChatCompletionConsumer | None = None
+
+
+async def start_completion_consumer() -> None:
+    """Start the global completion consumer."""
+    global _consumer
+    if _consumer is None:
+        _consumer = ChatCompletionConsumer()
+    await _consumer.start()
+
+
+async def stop_completion_consumer() -> None:
+    """Stop the global completion consumer."""
+    global _consumer
+    if _consumer:
+        await _consumer.stop()
+        _consumer = None
+
+
+async def publish_operation_complete(
+    operation_id: str,
+    task_id: str,
+    success: bool,
+    result: dict | str | None = None,
+    error: str | None = None,
+) -> None:
+    """Publish an operation completion message to Redis Streams.
+
+    Args:
+        operation_id: The operation ID that completed.
+        task_id: The task ID associated with the operation.
+        success: Whether the operation succeeded.
+        result: The result data (for success).
+        error: The error message (for failure).
+    """
+    message = OperationCompleteMessage(
+        operation_id=operation_id,
+        task_id=task_id,
+        success=success,
+        result=result,
+        error=error,
+    )
+
+    redis = await get_redis_async()
+    await redis.xadd(
+        config.stream_completion_name,
+        {"data": message.model_dump_json()},
+        maxlen=config.stream_max_length,
+    )
+    logger.info(f"Published completion for operation {operation_id}")
--- a/autogpt_platform/backend/backend/copilot/completion_handler.py
+++ b/autogpt_platform/backend/backend/copilot/completion_handler.py
@@ -0,0 +1,329 @@
+"""Shared completion handling for operation success and failure.
+
+This module provides common logic for handling operation completion from both:
+- The Redis Streams consumer (completion_consumer.py)
+- The HTTP webhook endpoint (routes.py)
+"""
+
+import logging
+from typing import Any
+
+import orjson
+
+from backend.data.db_accessors import chat_db
+
+from . import service as chat_service
+from . import stream_registry
+from .response_model import StreamError, StreamToolOutputAvailable
+from .tools.models import ErrorResponse
+
+logger = logging.getLogger(__name__)
+
+# Tools that produce agent_json that needs to be saved to library
+AGENT_GENERATION_TOOLS = {"create_agent", "edit_agent"}
+
+# Keys that should be stripped from agent_json when returning in error responses
+SENSITIVE_KEYS = frozenset(
+    {
+        "api_key",
+        "apikey",
+        "api_secret",
+        "password",
+        "secret",
+        "credentials",
+        "credential",
+        "token",
+        "access_token",
+        "refresh_token",
+        "private_key",
+        "privatekey",
+        "auth",
+        "authorization",
+    }
+)
+
+
+def _sanitize_agent_json(obj: Any) -> Any:
+    """Recursively sanitize agent_json by removing sensitive keys.
+
+    Args:
+        obj: The object to sanitize (dict, list, or primitive)
+
+    Returns:
+        Sanitized copy with sensitive keys removed/redacted
+    """
+    if isinstance(obj, dict):
+        return {
+            k: "[REDACTED]" if k.lower() in SENSITIVE_KEYS else _sanitize_agent_json(v)
+            for k, v in obj.items()
+        }
+    elif isinstance(obj, list):
+        return [_sanitize_agent_json(item) for item in obj]
+    else:
+        return obj
+
+
+class ToolMessageUpdateError(Exception):
+    """Raised when updating a tool message in the database fails."""
+
+    pass
+
+
+async def _update_tool_message(
+    session_id: str,
+    tool_call_id: str,
+    content: str,
+) -> None:
+    """Update tool message in database using the chat_db accessor.
+
+    Routes through DatabaseManager RPC when Prisma is not directly
+    connected (e.g. in the CoPilot Executor microservice).
+
+    Args:
+        session_id: The session ID
+        tool_call_id: The tool call ID to update
+        content: The new content for the message
+
+    Raises:
+        ToolMessageUpdateError: If the database update fails.
+    """
+    try:
+        updated = await chat_db().update_tool_message_content(
+            session_id=session_id,
+            tool_call_id=tool_call_id,
+            new_content=content,
+        )
+        if not updated:
+            raise ToolMessageUpdateError(
+                f"No message found with tool_call_id="
+                f"{tool_call_id} in session {session_id}"
+            )
+    except ToolMessageUpdateError:
+        raise
+    except Exception as e:
+        logger.error(
+            f"[COMPLETION] Failed to update tool message: {e}",
+            exc_info=True,
+        )
+        raise ToolMessageUpdateError(
+            f"Failed to update tool message for tool call #{tool_call_id}: {e}"
+        ) from e
+
+
+def serialize_result(result: dict | list | str | int | float | bool | None) -> str:
+    """Serialize result to JSON string with sensible defaults.
+
+    Args:
+        result: The result to serialize. Can be a dict, list, string,
+            number, boolean, or None.
+
+    Returns:
+        JSON string representation of the result. Returns '{"status": "completed"}'
+        only when result is explicitly None.
+    """
+    if isinstance(result, str):
+        return result
+    if result is None:
+        return '{"status": "completed"}'
+    return orjson.dumps(result).decode("utf-8")
+
+
+async def _save_agent_from_result(
+    result: dict[str, Any],
+    user_id: str | None,
+    tool_name: str,
+) -> dict[str, Any]:
+    """Save agent to library if result contains agent_json.
+
+    Args:
+        result: The result dict that may contain agent_json
+        user_id: The user ID to save the agent for
+        tool_name: The tool name (create_agent or edit_agent)
+
+    Returns:
+        Updated result dict with saved agent details, or original result if no agent_json
+    """
+    if not user_id:
+        logger.warning("[COMPLETION] Cannot save agent: no user_id in task")
+        return result
+
+    agent_json = result.get("agent_json")
+    if not agent_json:
+        logger.warning(
+            f"[COMPLETION] {tool_name} completed but no agent_json in result"
+        )
+        return result
+
+    try:
+        from .tools.agent_generator import save_agent_to_library
+
+        is_update = tool_name == "edit_agent"
+        created_graph, library_agent = await save_agent_to_library(
+            agent_json, user_id, is_update=is_update
+        )
+
+        logger.info(
+            f"[COMPLETION] Saved agent '{created_graph.name}' to library "
+            f"(graph_id={created_graph.id}, library_agent_id={library_agent.id})"
+        )
+
+        # Return a response similar to AgentSavedResponse
+        return {
+            "type": "agent_saved",
+            "message": f"Agent '{created_graph.name}' has been saved to your library!",
+            "agent_id": created_graph.id,
+            "agent_name": created_graph.name,
+            "library_agent_id": library_agent.id,
+            "library_agent_link": f"/library/agents/{library_agent.id}",
+            "agent_page_link": f"/build?flowID={created_graph.id}",
+        }
+    except Exception as e:
+        logger.error(
+            f"[COMPLETION] Failed to save agent to library: {e}",
+            exc_info=True,
+        )
+        # Return error but don't fail the whole operation
+        # Sanitize agent_json to remove sensitive keys before returning
+        return {
+            "type": "error",
+            "message": f"Agent was generated but failed to save: {str(e)}",
+            "error": str(e),
+            "agent_json": _sanitize_agent_json(agent_json),
+        }
+
+
+async def process_operation_success(
+    task: stream_registry.ActiveTask,
+    result: dict | str | None,
+) -> None:
+    """Handle successful operation completion.
+
+    Publishes the result to the stream registry, updates the database,
+    generates LLM continuation, and marks the task as completed.
+
+    Args:
+        task: The active task that completed
+        result: The result data from the operation
+
+    Raises:
+        ToolMessageUpdateError: If the database update fails. The task
+            will be marked as failed instead of completed.
+    """
+    # For agent generation tools, save the agent to library
+    if task.tool_name in AGENT_GENERATION_TOOLS and isinstance(result, dict):
+        result = await _save_agent_from_result(result, task.user_id, task.tool_name)
+
+    # Serialize result for output (only substitute default when result is exactly None)
+    result_output = result if result is not None else {"status": "completed"}
+    output_str = (
+        result_output
+        if isinstance(result_output, str)
+        else orjson.dumps(result_output).decode("utf-8")
+    )
+
+    # Publish result to stream registry
+    await stream_registry.publish_chunk(
+        task.task_id,
+        StreamToolOutputAvailable(
+            toolCallId=task.tool_call_id,
+            toolName=task.tool_name,
+            output=output_str,
+            success=True,
+        ),
+    )
+
+    # Update pending operation in database
+    # If this fails, we must not continue to mark the task as completed
+    result_str = serialize_result(result)
+    try:
+        await _update_tool_message(
+            session_id=task.session_id,
+            tool_call_id=task.tool_call_id,
+            content=result_str,
+        )
+    except ToolMessageUpdateError:
+        # DB update failed - mark task as failed to avoid inconsistent state
+        logger.error(
+            f"[COMPLETION] DB update failed for task {task.task_id}, "
+            "marking as failed instead of completed"
+        )
+        await stream_registry.publish_chunk(
+            task.task_id,
+            StreamError(errorText="Failed to save operation result to database"),
+        )
+        await stream_registry.mark_task_completed(task.task_id, status="failed")
+        raise
+
+    # Generate LLM continuation with streaming
+    try:
+        await chat_service._generate_llm_continuation_with_streaming(
+            session_id=task.session_id,
+            user_id=task.user_id,
+            task_id=task.task_id,
+        )
+    except Exception as e:
+        logger.error(
+            f"[COMPLETION] Failed to generate LLM continuation: {e}",
+            exc_info=True,
+        )
+
+    # Mark task as completed and release Redis lock
+    await stream_registry.mark_task_completed(task.task_id, status="completed")
+    try:
+        await chat_service._mark_operation_completed(task.tool_call_id)
+    except Exception as e:
+        logger.error(f"[COMPLETION] Failed to mark operation completed: {e}")
+
+    logger.info(
+        f"[COMPLETION] Successfully processed completion for task {task.task_id}"
+    )
+
+
+async def process_operation_failure(
+    task: stream_registry.ActiveTask,
+    error: str | None,
+) -> None:
+    """Handle failed operation completion.
+
+    Publishes the error to the stream registry, updates the database
+    with the error response, and marks the task as failed.
+
+    Args:
+        task: The active task that failed
+        error: The error message from the operation
+    """
+    error_msg = error or "Operation failed"
+
+    # Publish error to stream registry
+    await stream_registry.publish_chunk(
+        task.task_id,
+        StreamError(errorText=error_msg),
+    )
+
+    # Update pending operation with error
+    # If this fails, we still continue to mark the task as failed
+    error_response = ErrorResponse(
+        message=error_msg,
+        error=error,
+    )
+    try:
+        await _update_tool_message(
+            session_id=task.session_id,
+            tool_call_id=task.tool_call_id,
+            content=error_response.model_dump_json(),
+        )
+    except ToolMessageUpdateError:
+        # DB update failed - log but continue with cleanup
+        logger.error(
+            f"[COMPLETION] DB update failed while processing failure for task {task.task_id}, "
+            "continuing with cleanup"
+        )
+
+    # Mark task as failed and release Redis lock
+    await stream_registry.mark_task_completed(task.task_id, status="failed")
+    try:
+        await chat_service._mark_operation_completed(task.tool_call_id)
+    except Exception as e:
+        logger.error(f"[COMPLETION] Failed to mark operation completed: {e}")
+
+    logger.info(f"[COMPLETION] Processed failure for task {task.task_id}: {error_msg}")
--- a/autogpt_platform/backend/backend/copilot/stream_registry.py
+++ b/autogpt_platform/backend/backend/copilot/stream_registry.py
@@ -0,0 +1,995 @@
+"""Stream registry for managing reconnectable SSE streams.
+
+This module provides a registry for tracking active streaming tasks and their
+messages. It uses Redis for all state management (no in-memory state), making
+pods stateless and horizontally scalable.
+
+Architecture:
+- Redis Stream: Persists all messages for replay and real-time delivery
+- Redis Hash: Task metadata (status, session_id, etc.)
+
+Subscribers:
+1. Replay missed messages from Redis Stream (XREAD)
+2. Listen for live updates via blocking XREAD
+3. No in-memory state required on the subscribing pod
+"""
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Any, Literal
+
+import orjson
+
+from backend.data.redis_client import get_redis_async
+
+from .config import ChatConfig
+from .response_model import StreamBaseResponse, StreamError, StreamFinish
+
+logger = logging.getLogger(__name__)
+config = ChatConfig()
+
+# Track background tasks for this pod (just the asyncio.Task reference, not subscribers)
+_local_tasks: dict[str, asyncio.Task] = {}
+
+# Track listener tasks per subscriber queue for cleanup
+# Maps queue id() to (task_id, asyncio.Task) for proper cleanup on unsubscribe
+_listener_tasks: dict[int, tuple[str, asyncio.Task]] = {}
+
+# Timeout for putting chunks into subscriber queues (seconds)
+# If the queue is full and doesn't drain within this time, send an overflow error
+QUEUE_PUT_TIMEOUT = 5.0
+
+# Lua script for atomic compare-and-swap status update (idempotent completion)
+# Returns 1 if status was updated, 0 if already completed/failed
+COMPLETE_TASK_SCRIPT = """
+local current = redis.call("HGET", KEYS[1], "status")
+if current == "running" then
+    redis.call("HSET", KEYS[1], "status", ARGV[1])
+    return 1
+end
+return 0
+"""
+
+
+@dataclass
+class ActiveTask:
+    """Represents an active streaming task (metadata only, no in-memory queues)."""
+
+    task_id: str
+    session_id: str
+    user_id: str | None
+    tool_call_id: str
+    tool_name: str
+    operation_id: str
+    status: Literal["running", "completed", "failed"] = "running"
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    asyncio_task: asyncio.Task | None = None
+
+
+def _get_task_meta_key(task_id: str) -> str:
+    """Get Redis key for task metadata."""
+    return f"{config.task_meta_prefix}{task_id}"
+
+
+def _get_task_stream_key(task_id: str) -> str:
+    """Get Redis key for task message stream."""
+    return f"{config.task_stream_prefix}{task_id}"
+
+
+def _get_operation_mapping_key(operation_id: str) -> str:
+    """Get Redis key for operation_id to task_id mapping."""
+    return f"{config.task_op_prefix}{operation_id}"
+
+
+async def create_task(
+    task_id: str,
+    session_id: str,
+    user_id: str | None,
+    tool_call_id: str,
+    tool_name: str,
+    operation_id: str,
+) -> ActiveTask:
+    """Create a new streaming task in Redis.
+
+    Args:
+        task_id: Unique identifier for the task
+        session_id: Chat session ID
+        user_id: User ID (may be None for anonymous)
+        tool_call_id: Tool call ID from the LLM
+        tool_name: Name of the tool being executed
+        operation_id: Operation ID for webhook callbacks
+
+    Returns:
+        The created ActiveTask instance (metadata only)
+    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Build log metadata for structured logging
+    log_meta = {
+        "component": "StreamRegistry",
+        "task_id": task_id,
+        "session_id": session_id,
+    }
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] create_task STARTED, task={task_id}, session={session_id}, user={user_id}",
+        extra={"json_fields": log_meta},
+    )
+
+    task = ActiveTask(
+        task_id=task_id,
+        session_id=session_id,
+        user_id=user_id,
+        tool_call_id=tool_call_id,
+        tool_name=tool_name,
+        operation_id=operation_id,
+    )
+
+    # Store metadata in Redis
+    redis_start = time.perf_counter()
+    redis = await get_redis_async()
+    redis_time = (time.perf_counter() - redis_start) * 1000
+    logger.info(
+        f"[TIMING] get_redis_async took {redis_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": redis_time}},
+    )
+
+    meta_key = _get_task_meta_key(task_id)
+    op_key = _get_operation_mapping_key(operation_id)
+
+    hset_start = time.perf_counter()
+    await redis.hset(  # type: ignore[misc]
+        meta_key,
+        mapping={
+            "task_id": task_id,
+            "session_id": session_id,
+            "user_id": user_id or "",
+            "tool_call_id": tool_call_id,
+            "tool_name": tool_name,
+            "operation_id": operation_id,
+            "status": task.status,
+            "created_at": task.created_at.isoformat(),
+        },
+    )
+    hset_time = (time.perf_counter() - hset_start) * 1000
+    logger.info(
+        f"[TIMING] redis.hset took {hset_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": hset_time}},
+    )
+
+    await redis.expire(meta_key, config.stream_ttl)
+
+    # Create operation_id -> task_id mapping for webhook lookups
+    await redis.set(op_key, task_id, ex=config.stream_ttl)
+
+    total_time = (time.perf_counter() - start_time) * 1000
+    logger.info(
+        f"[TIMING] create_task COMPLETED in {total_time:.1f}ms; task={task_id}, session={session_id}",
+        extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
+    )
+
+    return task
+
+
+async def publish_chunk(
+    task_id: str,
+    chunk: StreamBaseResponse,
+) -> str:
+    """Publish a chunk to Redis Stream.
+
+    All delivery is via Redis Streams - no in-memory state.
+
+    Args:
+        task_id: Task ID to publish to
+        chunk: The stream response chunk to publish
+
+    Returns:
+        The Redis Stream message ID
+    """
+    import time
+
+    start_time = time.perf_counter()
+    chunk_type = type(chunk).__name__
+    chunk_json = chunk.model_dump_json()
+    message_id = "0-0"
+
+    # Build log metadata
+    log_meta = {
+        "component": "StreamRegistry",
+        "task_id": task_id,
+        "chunk_type": chunk_type,
+    }
+
+    try:
+        redis = await get_redis_async()
+        stream_key = _get_task_stream_key(task_id)
+
+        # Write to Redis Stream for persistence and real-time delivery
+        xadd_start = time.perf_counter()
+        raw_id = await redis.xadd(
+            stream_key,
+            {"data": chunk_json},
+            maxlen=config.stream_max_length,
+        )
+        xadd_time = (time.perf_counter() - xadd_start) * 1000
+        message_id = raw_id if isinstance(raw_id, str) else raw_id.decode()
+
+        # Set TTL on stream to match task metadata TTL
+        await redis.expire(stream_key, config.stream_ttl)
+
+        total_time = (time.perf_counter() - start_time) * 1000
+        # Only log timing for significant chunks or slow operations
+        if (
+            chunk_type
+            in (
+                "StreamStart",
+                "StreamFinish",
+                "StreamTextStart",
+                "StreamTextEnd",
+                "StreamToolInputAvailable",
+                "StreamToolOutputAvailable",
+            )
+            or total_time > 50
+        ):
+            logger.info(
+                f"[TIMING] publish_chunk {chunk_type} in {total_time:.1f}ms (xadd={xadd_time:.1f}ms)",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time,
+                        "xadd_time_ms": xadd_time,
+                        "message_id": message_id,
+                    }
+                },
+            )
+    except Exception as e:
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.error(
+            f"[TIMING] Failed to publish chunk {chunk_type} after {elapsed:.1f}ms: {e}",
+            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
+            exc_info=True,
+        )
+
+    return message_id
+
+
+async def subscribe_to_task(
+    task_id: str,
+    user_id: str | None,
+    last_message_id: str = "0-0",
+) -> asyncio.Queue[StreamBaseResponse] | None:
+    """Subscribe to a task's stream with replay of missed messages.
+
+    This is fully stateless - uses Redis Stream for replay and pub/sub for live updates.
+
+    Args:
+        task_id: Task ID to subscribe to
+        user_id: User ID for ownership validation
+        last_message_id: Last Redis Stream message ID received ("0-0" for full replay)
+
+    Returns:
+        An asyncio Queue that will receive stream chunks, or None if task not found
+        or user doesn't have access
+    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Build log metadata
+    log_meta = {"component": "StreamRegistry", "task_id": task_id}
+    if user_id:
+        log_meta["user_id"] = user_id
+
+    logger.info(
+        f"[TIMING] subscribe_to_task STARTED, task={task_id}, user={user_id}, last_msg={last_message_id}",
+        extra={"json_fields": {**log_meta, "last_message_id": last_message_id}},
+    )
+
+    redis_start = time.perf_counter()
+    redis = await get_redis_async()
+    meta_key = _get_task_meta_key(task_id)
+    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
+    hgetall_time = (time.perf_counter() - redis_start) * 1000
+    logger.info(
+        f"[TIMING] Redis hgetall took {hgetall_time:.1f}ms",
+        extra={"json_fields": {**log_meta, "duration_ms": hgetall_time}},
+    )
+
+    if not meta:
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] Task not found in Redis after {elapsed:.1f}ms",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "elapsed_ms": elapsed,
+                    "reason": "task_not_found",
+                }
+            },
+        )
+        return None
+
+    # Note: Redis client uses decode_responses=True, so keys are strings
+    task_status = meta.get("status", "")
+    task_user_id = meta.get("user_id", "") or None
+    log_meta["session_id"] = meta.get("session_id", "")
+
+    # Validate ownership - if task has an owner, requester must match
+    if task_user_id:
+        if user_id != task_user_id:
+            logger.warning(
+                f"[TIMING] Access denied: user {user_id} tried to access task owned by {task_user_id}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "task_owner": task_user_id,
+                        "reason": "access_denied",
+                    }
+                },
+            )
+            return None
+
+    subscriber_queue: asyncio.Queue[StreamBaseResponse] = asyncio.Queue()
+    stream_key = _get_task_stream_key(task_id)
+
+    # Step 1: Replay messages from Redis Stream
+    xread_start = time.perf_counter()
+    messages = await redis.xread({stream_key: last_message_id}, block=0, count=1000)
+    xread_time = (time.perf_counter() - xread_start) * 1000
+    logger.info(
+        f"[TIMING] Redis xread (replay) took {xread_time:.1f}ms, status={task_status}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "duration_ms": xread_time,
+                "task_status": task_status,
+            }
+        },
+    )
+
+    replayed_count = 0
+    replay_last_id = last_message_id
+    if messages:
+        for _stream_name, stream_messages in messages:
+            for msg_id, msg_data in stream_messages:
+                replay_last_id = msg_id if isinstance(msg_id, str) else msg_id.decode()
+                # Note: Redis client uses decode_responses=True, so keys are strings
+                if "data" in msg_data:
+                    try:
+                        chunk_data = orjson.loads(msg_data["data"])
+                        chunk = _reconstruct_chunk(chunk_data)
+                        if chunk:
+                            await subscriber_queue.put(chunk)
+                            replayed_count += 1
+                    except Exception as e:
+                        logger.warning(f"Failed to replay message: {e}")
+
+    logger.info(
+        f"[TIMING] Replayed {replayed_count} messages, last_id={replay_last_id}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "n_messages_replayed": replayed_count,
+                "replay_last_id": replay_last_id,
+            }
+        },
+    )
+
+    # Step 2: If task is still running, start stream listener for live updates
+    if task_status == "running":
+        logger.info(
+            "[TIMING] Task still running, starting _stream_listener",
+            extra={"json_fields": {**log_meta, "task_status": task_status}},
+        )
+        listener_task = asyncio.create_task(
+            _stream_listener(task_id, subscriber_queue, replay_last_id, log_meta)
+        )
+        # Track listener task for cleanup on unsubscribe
+        _listener_tasks[id(subscriber_queue)] = (task_id, listener_task)
+    else:
+        # Task is completed/failed - add finish marker
+        logger.info(
+            f"[TIMING] Task already {task_status}, adding StreamFinish",
+            extra={"json_fields": {**log_meta, "task_status": task_status}},
+        )
+        await subscriber_queue.put(StreamFinish())
+
+    total_time = (time.perf_counter() - start_time) * 1000
+    logger.info(
+        f"[TIMING] subscribe_to_task COMPLETED in {total_time:.1f}ms; task={task_id}, "
+        f"n_messages_replayed={replayed_count}",
+        extra={
+            "json_fields": {
+                **log_meta,
+                "total_time_ms": total_time,
+                "n_messages_replayed": replayed_count,
+            }
+        },
+    )
+    return subscriber_queue
+
+
+async def _stream_listener(
+    task_id: str,
+    subscriber_queue: asyncio.Queue[StreamBaseResponse],
+    last_replayed_id: str,
+    log_meta: dict | None = None,
+) -> None:
+    """Listen to Redis Stream for new messages using blocking XREAD.
+
+    This approach avoids the duplicate message issue that can occur with pub/sub
+    when messages are published during the gap between replay and subscription.
+
+    Args:
+        task_id: Task ID to listen for
+        subscriber_queue: Queue to deliver messages to
+        last_replayed_id: Last message ID from replay (continue from here)
+        log_meta: Structured logging metadata
+    """
+    import time
+
+    start_time = time.perf_counter()
+
+    # Use provided log_meta or build minimal one
+    if log_meta is None:
+        log_meta = {"component": "StreamRegistry", "task_id": task_id}
+
+    logger.info(
+        f"[TIMING] _stream_listener STARTED, task={task_id}, last_id={last_replayed_id}",
+        extra={"json_fields": {**log_meta, "last_replayed_id": last_replayed_id}},
+    )
+
+    queue_id = id(subscriber_queue)
+    # Track the last successfully delivered message ID for recovery hints
+    last_delivered_id = last_replayed_id
+    messages_delivered = 0
+    first_message_time = None
+    xread_count = 0
+
+    try:
+        redis = await get_redis_async()
+        stream_key = _get_task_stream_key(task_id)
+        current_id = last_replayed_id
+
+        while True:
+            # Block for up to 30 seconds waiting for new messages
+            # This allows periodic checking if task is still running
+            xread_start = time.perf_counter()
+            xread_count += 1
+            messages = await redis.xread(
+                {stream_key: current_id}, block=30000, count=100
+            )
+            xread_time = (time.perf_counter() - xread_start) * 1000
+
+            if messages:
+                msg_count = sum(len(msgs) for _, msgs in messages)
+                logger.info(
+                    f"[TIMING] xread #{xread_count} returned {msg_count} messages in {xread_time:.1f}ms",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "xread_count": xread_count,
+                            "n_messages": msg_count,
+                            "duration_ms": xread_time,
+                        }
+                    },
+                )
+            elif xread_time > 1000:
+                # Only log timeouts (30s blocking)
+                logger.info(
+                    f"[TIMING] xread #{xread_count} timeout after {xread_time:.1f}ms",
+                    extra={
+                        "json_fields": {
+                            **log_meta,
+                            "xread_count": xread_count,
+                            "duration_ms": xread_time,
+                            "reason": "timeout",
+                        }
+                    },
+                )
+
+            if not messages:
+                # Timeout - check if task is still running
+                meta_key = _get_task_meta_key(task_id)
+                status = await redis.hget(meta_key, "status")  # type: ignore[misc]
+                if status and status != "running":
+                    try:
+                        await asyncio.wait_for(
+                            subscriber_queue.put(StreamFinish()),
+                            timeout=QUEUE_PUT_TIMEOUT,
+                        )
+                    except asyncio.TimeoutError:
+                        logger.warning(
+                            f"Timeout delivering finish event for task {task_id}"
+                        )
+                    break
+                continue
+
+            for _stream_name, stream_messages in messages:
+                for msg_id, msg_data in stream_messages:
+                    current_id = msg_id if isinstance(msg_id, str) else msg_id.decode()
+
+                    if "data" not in msg_data:
+                        continue
+
+                    try:
+                        chunk_data = orjson.loads(msg_data["data"])
+                        chunk = _reconstruct_chunk(chunk_data)
+                        if chunk:
+                            try:
+                                await asyncio.wait_for(
+                                    subscriber_queue.put(chunk),
+                                    timeout=QUEUE_PUT_TIMEOUT,
+                                )
+                                # Update last delivered ID on successful delivery
+                                last_delivered_id = current_id
+                                messages_delivered += 1
+                                if first_message_time is None:
+                                    first_message_time = time.perf_counter()
+                                    elapsed = (first_message_time - start_time) * 1000
+                                    logger.info(
+                                        f"[TIMING] FIRST live message at {elapsed:.1f}ms, type={type(chunk).__name__}",
+                                        extra={
+                                            "json_fields": {
+                                                **log_meta,
+                                                "elapsed_ms": elapsed,
+                                                "chunk_type": type(chunk).__name__,
+                                            }
+                                        },
+                                    )
+                            except asyncio.TimeoutError:
+                                logger.warning(
+                                    f"[TIMING] Subscriber queue full, delivery timed out after {QUEUE_PUT_TIMEOUT}s",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "timeout_s": QUEUE_PUT_TIMEOUT,
+                                            "reason": "queue_full",
+                                        }
+                                    },
+                                )
+                                # Send overflow error with recovery info
+                                try:
+                                    overflow_error = StreamError(
+                                        errorText="Message delivery timeout - some messages may have been missed",
+                                        code="QUEUE_OVERFLOW",
+                                        details={
+                                            "last_delivered_id": last_delivered_id,
+                                            "recovery_hint": f"Reconnect with last_message_id={last_delivered_id}",
+                                        },
+                                    )
+                                    subscriber_queue.put_nowait(overflow_error)
+                                except asyncio.QueueFull:
+                                    # Queue is completely stuck, nothing more we can do
+                                    logger.error(
+                                        f"Cannot deliver overflow error for task {task_id}, "
+                                        "queue completely blocked"
+                                    )
+
+                            # Stop listening on finish
+                            if isinstance(chunk, StreamFinish):
+                                total_time = (time.perf_counter() - start_time) * 1000
+                                logger.info(
+                                    f"[TIMING] StreamFinish received in {total_time/1000:.1f}s; delivered={messages_delivered}",
+                                    extra={
+                                        "json_fields": {
+                                            **log_meta,
+                                            "total_time_ms": total_time,
+                                            "messages_delivered": messages_delivered,
+                                        }
+                                    },
+                                )
+                                return
+                    except Exception as e:
+                        logger.warning(
+                            f"Error processing stream message: {e}",
+                            extra={"json_fields": {**log_meta, "error": str(e)}},
+                        )
+
+    except asyncio.CancelledError:
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] _stream_listener CANCELLED after {elapsed:.1f}ms, delivered={messages_delivered}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "elapsed_ms": elapsed,
+                    "messages_delivered": messages_delivered,
+                    "reason": "cancelled",
+                }
+            },
+        )
+        raise  # Re-raise to propagate cancellation
+    except Exception as e:
+        elapsed = (time.perf_counter() - start_time) * 1000
+        logger.error(
+            f"[TIMING] _stream_listener ERROR after {elapsed:.1f}ms: {e}",
+            extra={"json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}},
+        )
+        # On error, send finish to unblock subscriber
+        try:
+            await asyncio.wait_for(
+                subscriber_queue.put(StreamFinish()),
+                timeout=QUEUE_PUT_TIMEOUT,
+            )
+        except (asyncio.TimeoutError, asyncio.QueueFull):
+            logger.warning(
+                "Could not deliver finish event after error",
+                extra={"json_fields": log_meta},
+            )
+    finally:
+        # Clean up listener task mapping on exit
+        total_time = (time.perf_counter() - start_time) * 1000
+        logger.info(
+            f"[TIMING] _stream_listener FINISHED in {total_time/1000:.1f}s; task={task_id}, "
+            f"delivered={messages_delivered}, xread_count={xread_count}",
+            extra={
+                "json_fields": {
+                    **log_meta,
+                    "total_time_ms": total_time,
+                    "messages_delivered": messages_delivered,
+                    "xread_count": xread_count,
+                }
+            },
+        )
+        _listener_tasks.pop(queue_id, None)
+
+
+async def mark_task_completed(
+    task_id: str,
+    status: Literal["completed", "failed"] = "completed",
+    *,
+    error_message: str | None = None,
+) -> bool:
+    """Mark a task as completed and publish finish event.
+
+    This is idempotent - calling multiple times with the same task_id is safe.
+    Uses atomic compare-and-swap via Lua script to prevent race conditions.
+    Status is updated first (source of truth), then finish event is published (best-effort).
+
+    Args:
+        task_id: Task ID to mark as completed
+        status: Final status ("completed" or "failed")
+        error_message: If provided and status="failed", publish a StreamError
+            before StreamFinish so connected clients see why the task ended.
+            If not provided, no StreamError is published (caller should publish
+            manually if needed to avoid duplicates).
+
+    Returns:
+        True if task was newly marked completed, False if already completed/failed
+    """
+    redis = await get_redis_async()
+    meta_key = _get_task_meta_key(task_id)
+
+    # Atomic compare-and-swap: only update if status is "running"
+    # This prevents race conditions when multiple callers try to complete simultaneously
+    result = await redis.eval(COMPLETE_TASK_SCRIPT, 1, meta_key, status)  # type: ignore[misc]
+
+    if result == 0:
+        logger.debug(f"Task {task_id} already completed/failed, skipping")
+        return False
+
+    # Publish error event before finish so connected clients know WHY the
+    # task ended. Only publish if caller provided an explicit error message
+    # to avoid duplicates with code paths that manually publish StreamError.
+    # This is best-effort — if it fails, the StreamFinish still ensures
+    # listeners clean up.
+    if status == "failed" and error_message:
+        try:
+            await publish_chunk(task_id, StreamError(errorText=error_message))
+        except Exception as e:
+            logger.warning(f"Failed to publish error event for task {task_id}: {e}")
+
+    # THEN publish finish event (best-effort - listeners can detect via status polling)
+    try:
+        await publish_chunk(task_id, StreamFinish())
+    except Exception as e:
+        logger.error(
+            f"Failed to publish finish event for task {task_id}: {e}. "
+            "Listeners will detect completion via status polling."
+        )
+
+    # Clean up local task reference if exists
+    _local_tasks.pop(task_id, None)
+    return True
+
+
+async def find_task_by_operation_id(operation_id: str) -> ActiveTask | None:
+    """Find a task by its operation ID.
+
+    Used by webhook callbacks to locate the task to update.
+
+    Args:
+        operation_id: Operation ID to search for
+
+    Returns:
+        ActiveTask if found, None otherwise
+    """
+    redis = await get_redis_async()
+    op_key = _get_operation_mapping_key(operation_id)
+    task_id = await redis.get(op_key)
+
+    if not task_id:
+        return None
+
+    task_id_str = task_id.decode() if isinstance(task_id, bytes) else task_id
+    return await get_task(task_id_str)
+
+
+async def get_task(task_id: str) -> ActiveTask | None:
+    """Get a task by its ID from Redis.
+
+    Args:
+        task_id: Task ID to look up
+
+    Returns:
+        ActiveTask if found, None otherwise
+    """
+    redis = await get_redis_async()
+    meta_key = _get_task_meta_key(task_id)
+    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
+
+    if not meta:
+        return None
+
+    # Note: Redis client uses decode_responses=True, so keys/values are strings
+    return ActiveTask(
+        task_id=meta.get("task_id", ""),
+        session_id=meta.get("session_id", ""),
+        user_id=meta.get("user_id", "") or None,
+        tool_call_id=meta.get("tool_call_id", ""),
+        tool_name=meta.get("tool_name", ""),
+        operation_id=meta.get("operation_id", ""),
+        status=meta.get("status", "running"),  # type: ignore[arg-type]
+    )
+
+
+async def get_task_with_expiry_info(
+    task_id: str,
+) -> tuple[ActiveTask | None, str | None]:
+    """Get a task by its ID with expiration detection.
+
+    Returns (task, error_code) where error_code is:
+    - None if task found
+    - "TASK_EXPIRED" if stream exists but metadata is gone (TTL expired)
+    - "TASK_NOT_FOUND" if neither exists
+
+    Args:
+        task_id: Task ID to look up
+
+    Returns:
+        Tuple of (ActiveTask or None, error_code or None)
+    """
+    redis = await get_redis_async()
+    meta_key = _get_task_meta_key(task_id)
+    stream_key = _get_task_stream_key(task_id)
+
+    meta: dict[Any, Any] = await redis.hgetall(meta_key)  # type: ignore[misc]
+
+    if not meta:
+        # Check if stream still has data (metadata expired but stream hasn't)
+        stream_len = await redis.xlen(stream_key)
+        if stream_len > 0:
+            return None, "TASK_EXPIRED"
+        return None, "TASK_NOT_FOUND"
+
+    # Note: Redis client uses decode_responses=True, so keys/values are strings
+    return (
+        ActiveTask(
+            task_id=meta.get("task_id", ""),
+            session_id=meta.get("session_id", ""),
+            user_id=meta.get("user_id", "") or None,
+            tool_call_id=meta.get("tool_call_id", ""),
+            tool_name=meta.get("tool_name", ""),
+            operation_id=meta.get("operation_id", ""),
+            status=meta.get("status", "running"),  # type: ignore[arg-type]
+        ),
+        None,
+    )
+
+
+async def get_active_task_for_session(
+    session_id: str,
+    user_id: str | None = None,
+) -> tuple[ActiveTask | None, str]:
+    """Get the active (running) task for a session, if any.
+
+    Scans Redis for tasks matching the session_id with status="running".
+
+    Args:
+        session_id: Session ID to look up
+        user_id: User ID for ownership validation (optional)
+
+    Returns:
+        Tuple of (ActiveTask if found and running, last_message_id from Redis Stream)
+    """
+
+    redis = await get_redis_async()
+
+    # Scan Redis for task metadata keys
+    cursor = 0
+    tasks_checked = 0
+
+    while True:
+        cursor, keys = await redis.scan(
+            cursor, match=f"{config.task_meta_prefix}*", count=100
+        )
+
+        for key in keys:
+            tasks_checked += 1
+            meta: dict[Any, Any] = await redis.hgetall(key)  # type: ignore[misc]
+            if not meta:
+                continue
+
+            # Note: Redis client uses decode_responses=True, so keys/values are strings
+            task_session_id = meta.get("session_id", "")
+            task_status = meta.get("status", "")
+            task_user_id = meta.get("user_id", "") or None
+            task_id = meta.get("task_id", "")
+
+            if task_session_id == session_id and task_status == "running":
+                # Validate ownership - if task has an owner, requester must match
+                if task_user_id and user_id != task_user_id:
+                    continue
+
+                logger.info(
+                    f"[TASK_LOOKUP] Found running task {task_id[:8]}... for session {session_id[:8]}..."
+                )
+
+                # Get the last message ID from Redis Stream
+                stream_key = _get_task_stream_key(task_id)
+                last_id = "0-0"
+                try:
+                    messages = await redis.xrevrange(stream_key, count=1)
+                    if messages:
+                        msg_id = messages[0][0]
+                        last_id = msg_id if isinstance(msg_id, str) else msg_id.decode()
+                except Exception as e:
+                    logger.warning(f"Failed to get last message ID: {e}")
+
+                return (
+                    ActiveTask(
+                        task_id=task_id,
+                        session_id=task_session_id,
+                        user_id=task_user_id,
+                        tool_call_id=meta.get("tool_call_id", ""),
+                        tool_name=meta.get("tool_name", ""),
+                        operation_id=meta.get("operation_id", ""),
+                        status="running",
+                    ),
+                    last_id,
+                )
+
+        if cursor == 0:
+            break
+
+    return None, "0-0"
+
+
+def _reconstruct_chunk(chunk_data: dict) -> StreamBaseResponse | None:
+    """Reconstruct a StreamBaseResponse from JSON data.
+
+    Args:
+        chunk_data: Parsed JSON data from Redis
+
+    Returns:
+        Reconstructed response object, or None if unknown type
+    """
+    from .response_model import (
+        ResponseType,
+        StreamError,
+        StreamFinish,
+        StreamFinishStep,
+        StreamHeartbeat,
+        StreamStart,
+        StreamStartStep,
+        StreamTextDelta,
+        StreamTextEnd,
+        StreamTextStart,
+        StreamToolInputAvailable,
+        StreamToolInputStart,
+        StreamToolOutputAvailable,
+        StreamUsage,
+    )
+
+    # Map response types to their corresponding classes
+    type_to_class: dict[str, type[StreamBaseResponse]] = {
+        ResponseType.START.value: StreamStart,
+        ResponseType.FINISH.value: StreamFinish,
+        ResponseType.START_STEP.value: StreamStartStep,
+        ResponseType.FINISH_STEP.value: StreamFinishStep,
+        ResponseType.TEXT_START.value: StreamTextStart,
+        ResponseType.TEXT_DELTA.value: StreamTextDelta,
+        ResponseType.TEXT_END.value: StreamTextEnd,
+        ResponseType.TOOL_INPUT_START.value: StreamToolInputStart,
+        ResponseType.TOOL_INPUT_AVAILABLE.value: StreamToolInputAvailable,
+        ResponseType.TOOL_OUTPUT_AVAILABLE.value: StreamToolOutputAvailable,
+        ResponseType.ERROR.value: StreamError,
+        ResponseType.USAGE.value: StreamUsage,
+        ResponseType.HEARTBEAT.value: StreamHeartbeat,
+    }
+
+    chunk_type = chunk_data.get("type")
+    chunk_class = type_to_class.get(chunk_type)  # type: ignore[arg-type]
+
+    if chunk_class is None:
+        logger.warning(f"Unknown chunk type: {chunk_type}")
+        return None
+
+    try:
+        return chunk_class(**chunk_data)
+    except Exception as e:
+        logger.warning(f"Failed to reconstruct chunk of type {chunk_type}: {e}")
+        return None
+
+
+async def set_task_asyncio_task(task_id: str, asyncio_task: asyncio.Task) -> None:
+    """Track the asyncio.Task for a task (local reference only).
+
+    This is just for cleanup purposes - the task state is in Redis.
+
+    Args:
+        task_id: Task ID
+        asyncio_task: The asyncio Task to track
+    """
+    _local_tasks[task_id] = asyncio_task
+
+
+async def unsubscribe_from_task(
+    task_id: str,
+    subscriber_queue: asyncio.Queue[StreamBaseResponse],
+) -> None:
+    """Clean up when a subscriber disconnects.
+
+    Cancels the XREAD-based listener task associated with this subscriber queue
+    to prevent resource leaks.
+
+    Args:
+        task_id: Task ID
+        subscriber_queue: The subscriber's queue used to look up the listener task
+    """
+    queue_id = id(subscriber_queue)
+    listener_entry = _listener_tasks.pop(queue_id, None)
+
+    if listener_entry is None:
+        logger.debug(
+            f"No listener task found for task {task_id} queue {queue_id} "
+            "(may have already completed)"
+        )
+        return
+
+    stored_task_id, listener_task = listener_entry
+
+    if stored_task_id != task_id:
+        logger.warning(
+            f"Task ID mismatch in unsubscribe: expected {task_id}, "
+            f"found {stored_task_id}"
+        )
+
+    if listener_task.done():
+        logger.debug(f"Listener task for task {task_id} already completed")
+        return
+
+    # Cancel the listener task
+    listener_task.cancel()
+
+    try:
+        # Wait for the task to be cancelled with a timeout
+        await asyncio.wait_for(listener_task, timeout=5.0)
+    except asyncio.CancelledError:
+        # Expected - the task was successfully cancelled
+        pass
+    except asyncio.TimeoutError:
+        logger.warning(
+            f"Timeout waiting for listener task cancellation for task {task_id}"
+        )
+    except Exception as e:
+        logger.error(f"Error during listener task cancellation for task {task_id}: {e}")
+
+    logger.debug(f"Successfully unsubscribed from task {task_id}")
--- a/autogpt_platform/backend/backend/copilot/tools/agent_generator/dummy.py
+++ b/autogpt_platform/backend/backend/copilot/tools/agent_generator/dummy.py
@@ -104,8 +104,9 @@ async def generate_agent_dummy(
    operation_id: str | None = None,
    task_id: str | None = None,
 ) -> dict[str, Any]:
-    """Return dummy agent JSON immediately without blocking delay."""
-    logger.info("Using dummy agent generator for generate_agent (returns immediately)")
+    """Return dummy agent JSON after a simulated delay."""
+    logger.info("Using dummy agent generator for generate_agent (30s delay)")
+    await asyncio.sleep(30)
    return _generate_dummy_agent_json()


--- a/autogpt_platform/backend/backend/copilot/tools/base.py
+++ b/autogpt_platform/backend/backend/copilot/tools/base.py
@@ -38,7 +38,12 @@ class BaseTool:

    @property
    def is_long_running(self) -> bool:
-        """Whether this tool takes a long time to execute (shows mini-game in UI)."""
+        """Whether this tool is long-running and should execute in background.
+
+        Long-running tools (like agent generation) are executed via background
+        tasks to survive SSE disconnections. The result is persisted to chat
+        history and visible when the user refreshes.
+        """
        return False

    def as_openai_tool(self) -> ChatCompletionToolParam:
--- a/autogpt_platform/backend/backend/copilot/tools/check_operation_status.py
+++ b/autogpt_platform/backend/backend/copilot/tools/check_operation_status.py
@@ -0,0 +1,124 @@
+"""CheckOperationStatusTool — query the status of a long-running operation."""
+
+import logging
+from typing import Any
+
+from backend.copilot.model import ChatSession
+
+from .base import BaseTool
+from .models import ErrorResponse, ResponseType, ToolResponseBase
+
+logger = logging.getLogger(__name__)
+
+
+class OperationStatusResponse(ToolResponseBase):
+    """Response for check_operation_status tool."""
+
+    type: ResponseType = ResponseType.OPERATION_STATUS
+    task_id: str
+    operation_id: str
+    status: str  # "running", "completed", "failed"
+    tool_name: str | None = None
+    message: str = ""
+
+
+class CheckOperationStatusTool(BaseTool):
+    """Check the status of a long-running operation (create_agent, edit_agent, etc.).
+
+    The CoPilot uses this tool to report back to the user whether an
+    operation that was started earlier has completed, failed, or is still
+    running.
+    """
+
+    @property
+    def name(self) -> str:
+        return "check_operation_status"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Check the current status of a long-running operation such as "
+            "create_agent or edit_agent. Accepts either an operation_id or "
+            "task_id from a previous operation_started response. "
+            "Returns the current status: running, completed, or failed."
+        )
+
+    @property
+    def parameters(self) -> dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "operation_id": {
+                    "type": "string",
+                    "description": (
+                        "The operation_id from an operation_started response."
+                    ),
+                },
+                "task_id": {
+                    "type": "string",
+                    "description": (
+                        "The task_id from an operation_started response. "
+                        "Used as fallback if operation_id is not provided."
+                    ),
+                },
+            },
+            "required": [],
+        }
+
+    @property
+    def requires_auth(self) -> bool:
+        return False
+
+    async def _execute(
+        self,
+        user_id: str | None,
+        session: ChatSession,
+        **kwargs,
+    ) -> ToolResponseBase:
+        from backend.copilot import stream_registry
+
+        operation_id = (kwargs.get("operation_id") or "").strip()
+        task_id = (kwargs.get("task_id") or "").strip()
+
+        if not operation_id and not task_id:
+            return ErrorResponse(
+                message="Please provide an operation_id or task_id.",
+                error="missing_parameter",
+            )
+
+        task = None
+        if operation_id:
+            task = await stream_registry.find_task_by_operation_id(operation_id)
+        if task is None and task_id:
+            task = await stream_registry.get_task(task_id)
+
+        if task is None:
+            # Task not in Redis — it may have already expired (TTL).
+            # Check conversation history for the result instead.
+            return ErrorResponse(
+                message=(
+                    "Operation not found — it may have already completed and "
+                    "expired from the status tracker. Check the conversation "
+                    "history for the result."
+                ),
+                error="not_found",
+            )
+
+        status_messages = {
+            "running": (
+                f"The {task.tool_name or 'operation'} is still running. "
+                "Please wait for it to complete."
+            ),
+            "completed": (
+                f"The {task.tool_name or 'operation'} has completed successfully."
+            ),
+            "failed": f"The {task.tool_name or 'operation'} has failed.",
+        }
+
+        return OperationStatusResponse(
+            task_id=task.task_id,
+            operation_id=task.operation_id,
+            status=task.status,
+            tool_name=task.tool_name,
+            message=status_messages.get(task.status, f"Status: {task.status}"),
+        )
--- a/autogpt_platform/backend/backend/copilot/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/create_agent.py
@@ -18,6 +18,7 @@ from .base import BaseTool
 from .models import (
    AgentPreviewResponse,
    AgentSavedResponse,
+    AsyncProcessingResponse,
    ClarificationNeededResponse,
    ClarifyingQuestion,
    ErrorResponse,
@@ -48,7 +49,6 @@ class CreateAgentTool(BaseTool):

    @property
    def is_long_running(self) -> bool:
-        """Agent generation takes several minutes - show mini-game."""
        return True

    @property
@@ -100,6 +100,10 @@ class CreateAgentTool(BaseTool):
        save = kwargs.get("save", True)
        session_id = session.session_id if session else None

+        # Extract async processing params (passed by long-running tool handler)
+        operation_id = kwargs.get("_operation_id")
+        task_id = kwargs.get("_task_id")
+
        if not description:
            return ErrorResponse(
                message="Please provide a description of what the agent should do.",
@@ -226,6 +230,8 @@ class CreateAgentTool(BaseTool):
            agent_json = await generate_agent(
                decomposition_result,
                library_agents,
+                operation_id=operation_id,
+                task_id=task_id,
            )
        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
@@ -270,6 +276,19 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )

+        # Check if Agent Generator accepted for async processing
+        if agent_json.get("status") == "accepted":
+            logger.info(
+                f"Agent generation delegated to async processing "
+                f"(operation_id={operation_id}, task_id={task_id})"
+            )
+            return AsyncProcessingResponse(
+                message="Agent generation started. You'll be notified when it's complete.",
+                operation_id=operation_id,
+                task_id=task_id,
+                session_id=session_id,
+            )
+
        agent_name = agent_json.get("name", "Generated Agent")
        agent_description = agent_json.get("description", "")
        node_count = len(agent_json.get("nodes", []))
--- a/autogpt_platform/backend/backend/copilot/tools/customize_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/customize_agent.py
@@ -48,7 +48,6 @@ class CustomizeAgentTool(BaseTool):

    @property
    def is_long_running(self) -> bool:
-        """Agent customization takes several minutes - show mini-game."""
        return True

    @property
--- a/autogpt_platform/backend/backend/copilot/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/copilot/tools/edit_agent.py
@@ -17,6 +17,7 @@ from .base import BaseTool
 from .models import (
    AgentPreviewResponse,
    AgentSavedResponse,
+    AsyncProcessingResponse,
    ClarificationNeededResponse,
    ClarifyingQuestion,
    ErrorResponse,
@@ -46,7 +47,6 @@ class EditAgentTool(BaseTool):

    @property
    def is_long_running(self) -> bool:
-        """Agent editing takes several minutes - show mini-game."""
        return True

    @property
@@ -105,6 +105,10 @@ class EditAgentTool(BaseTool):
        save = kwargs.get("save", True)
        session_id = session.session_id if session else None

+        # Extract async processing params (passed by long-running tool handler)
+        operation_id = kwargs.get("_operation_id")
+        task_id = kwargs.get("_task_id")
+
        if not agent_id:
            return ErrorResponse(
                message="Please provide the agent ID to edit.",
@@ -153,6 +157,8 @@ class EditAgentTool(BaseTool):
                update_request,
                current_agent,
                library_agents,
+                operation_id=operation_id,
+                task_id=task_id,
            )
        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
@@ -172,6 +178,19 @@ class EditAgentTool(BaseTool):
                session_id=session_id,
            )

+        # Check if Agent Generator accepted for async processing
+        if result.get("status") == "accepted":
+            logger.info(
+                f"Agent edit delegated to async processing "
+                f"(operation_id={operation_id}, task_id={task_id})"
+            )
+            return AsyncProcessingResponse(
+                message="Agent edit started. You'll be notified when it's complete.",
+                operation_id=operation_id,
+                task_id=task_id,
+                session_id=session_id,
+            )
+
        # Check if the result is an error from the external service
        if isinstance(result, dict) and result.get("type") == "error":
            error_msg = result.get("error", "Unknown error")
--- a/autogpt_platform/backend/backend/copilot/tools/models.py
+++ b/autogpt_platform/backend/backend/copilot/tools/models.py
@@ -35,6 +35,10 @@ class ResponseType(str, Enum):
    WORKSPACE_FILE_METADATA = "workspace_file_metadata"
    WORKSPACE_FILE_WRITTEN = "workspace_file_written"
    WORKSPACE_FILE_DELETED = "workspace_file_deleted"
+    # Long-running operation types
+    OPERATION_STARTED = "operation_started"
+    OPERATION_PENDING = "operation_pending"
+    OPERATION_IN_PROGRESS = "operation_in_progress"
    # Input validation
    INPUT_VALIDATION_ERROR = "input_validation_error"
    # Web fetch
@@ -415,6 +419,63 @@ class BlockOutputResponse(ToolResponseBase):
    success: bool = True


+# Long-running operation models
+class OperationStartedResponse(ToolResponseBase):
+    """Response when a long-running operation has been started in the background.
+
+    This is returned immediately to the client while the operation continues
+    to execute. The user can close the tab and check back later.
+
+    The task_id can be used to reconnect to the SSE stream via
+    GET /chat/tasks/{task_id}/stream?last_idx=0
+    """
+
+    type: ResponseType = ResponseType.OPERATION_STARTED
+    operation_id: str
+    tool_name: str
+    task_id: str | None = None  # For SSE reconnection
+
+
+class OperationPendingResponse(ToolResponseBase):
+    """Response stored in chat history while a long-running operation is executing.
+
+    This is persisted to the database so users see a pending state when they
+    refresh before the operation completes.
+    """
+
+    type: ResponseType = ResponseType.OPERATION_PENDING
+    operation_id: str
+    tool_name: str
+
+
+class OperationInProgressResponse(ToolResponseBase):
+    """Response when an operation is already in progress.
+
+    Returned for idempotency when the same tool_call_id is requested again
+    while the background task is still running.
+    """
+
+    type: ResponseType = ResponseType.OPERATION_IN_PROGRESS
+    tool_call_id: str
+
+
+class AsyncProcessingResponse(ToolResponseBase):
+    """Response when an operation has been delegated to async processing.
+
+    This is returned by tools when the external service accepts the request
+    for async processing (HTTP 202 Accepted). The Redis Streams completion
+    consumer will handle the result when the external service completes.
+
+    The status field is specifically "accepted" to allow the long-running tool
+    handler to detect this response and skip LLM continuation.
+    """
+
+    type: ResponseType = ResponseType.OPERATION_STARTED
+    status: str = "accepted"  # Must be "accepted" for detection
+    operation_id: str | None = None
+    task_id: str | None = None
+
+
 class WebFetchResponse(ToolResponseBase):
    """Response for web_fetch tool."""

--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/LongRunningToolDisplay/LongRunningToolDisplay.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/LongRunningToolDisplay/LongRunningToolDisplay.tsx
@@ -2,7 +2,7 @@ import { PlusCircleIcon } from "@phosphor-icons/react";
 import {
  ContentGrid,
  ContentHint,
-} from "../../tools/CreateAgent/../../components/ToolAccordion/AccordionContent";
+} from "../../components/ToolAccordion/AccordionContent";
 import { ToolAccordion } from "../../components/ToolAccordion/ToolAccordion";
 import { MiniGame } from "../../tools/CreateAgent/components/MiniGame/MiniGame";