refactor(frontend): streamline NodeDataViewer component and execution results handling

### Changes - Removed unused `NodeExecutionResult` type and `executionResults` prop from `NodeDataViewerProps`. - Simplified the logic for resolving execution results by directly using the `useNodeStore` hook. - Updated the component to ensure consistent handling of data types and improved readability. ### Impact - Enhances code clarity and maintainability by reducing unnecessary complexity in the component. - Ensures that the latest execution results are effectively utilized in the data viewer. ### Testing - Verified that the component functions correctly with the updated logic and maintains expected behavior.
Merge branch 'dev' into abhi/show-all-execution-node
2026-01-25 06:58:21 -05:00 · 2026-01-25 12:25:49 +05:30 · 2026-01-25 12:17:28 +05:30 · 2026-01-25 12:17:12 +05:30 · 2026-01-25 12:03:22 +05:30 · 2026-01-25 11:54:05 +05:30
367 changed files with 12290 additions and 5009 deletions
--- a/.github/workflows/platform-frontend-ci.yml
+++ b/.github/workflows/platform-frontend-ci.yml
@@ -128,7 +128,7 @@ jobs:
          token: ${{ secrets.GITHUB_TOKEN }}
          exitOnceUploaded: true

-  test:
+  e2e_test:
    runs-on: big-boi
    needs: setup
    strategy:
@@ -258,3 +258,39 @@ jobs:
      - name: Print Final Docker Compose logs
        if: always()
        run: docker compose -f ../docker-compose.yml logs
+
+  integration_test:
+    runs-on: ubuntu-latest
+    needs: setup
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: "22.18.0"
+
+      - name: Enable corepack
+        run: corepack enable
+
+      - name: Restore dependencies cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.pnpm-store
+          key: ${{ needs.setup.outputs.cache-key }}
+          restore-keys: |
+            ${{ runner.os }}-pnpm-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
+            ${{ runner.os }}-pnpm-
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Generate API client
+        run: pnpm generate:api
+
+      - name: Run Integration Tests
+        run: pnpm test:unit
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -16,6 +16,32 @@ See `docs/content/platform/getting-started.md` for setup instructions.
 - Format Python code with `poetry run format`.
 - Format frontend code using `pnpm format`.

+
+## Frontend guidelines:
+
+See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
+
+1. **Pages**: Create in `src/app/(platform)/feature-name/page.tsx`
+   - Add `usePageName.ts` hook for logic
+   - Put sub-components in local `components/` folder
+2. **Components**: Structure as `ComponentName/ComponentName.tsx` + `useComponentName.ts` + `helpers.ts`
+   - Use design system components from `src/components/` (atoms, molecules, organisms)
+   - Never use `src/components/__legacy__/*`
+3. **Data fetching**: Use generated API hooks from `@/app/api/__generated__/endpoints/`
+   - Regenerate with `pnpm generate:api`
+   - Pattern: `use{Method}{Version}{OperationName}`
+4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
+5. **Testing**: Add Storybook stories for new components, Playwright for E2E
+6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
+- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
+- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
+- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
+- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
+- Use function declarations for components, arrow functions only for callbacks
+- No barrel files or `index.ts` re-exports
+- Do not use `useCallback` or `useMemo` unless strictly needed
+- Avoid comments at all times unless the code is very complex
+
 ## Testing

 - Backend: `poetry run test` (runs pytest with a docker based postgres + prisma).
--- a/autogpt_platform/CLAUDE.md
+++ b/autogpt_platform/CLAUDE.md
@@ -201,7 +201,7 @@ If you get any pushback or hit complex block conditions check the new_blocks gui
 3. Write tests alongside the route file
 4. Run `poetry run test` to verify

-**Frontend feature development:**
+### Frontend guidelines:

 See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:

@@ -217,6 +217,14 @@ See `/frontend/CONTRIBUTING.md` for complete patterns. Quick reference:
 4. **Styling**: Tailwind CSS only, use design tokens, Phosphor Icons only
 5. **Testing**: Add Storybook stories for new components, Playwright for E2E
 6. **Code conventions**: Function declarations (not arrow functions) for components/handlers
+- Component props should be `interface Props { ... }` (not exported) unless the interface needs to be used outside the component
+- Separate render logic from business logic (component.tsx + useComponent.ts + helpers.ts)
+- Colocate state when possible and avoid creating large components, use sub-components ( local `/components` folder next to the parent component ) when sensible
+- Avoid large hooks, abstract logic into `helpers.ts` files when sensible
+- Use function declarations for components, arrow functions only for callbacks
+- No barrel files or `index.ts` re-exports
+- Do not use `useCallback` or `useMemo` unless strictly needed
+- Avoid comments at all times unless the code is very complex

 ### Security Implementation

--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -290,6 +290,11 @@ async def _cache_session(session: ChatSession) -> None:
    await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())


+async def cache_chat_session(session: ChatSession) -> None:
+    """Cache a chat session without persisting to the database."""
+    await _cache_session(session)
+
+
 async def _get_session_from_db(session_id: str) -> ChatSession | None:
    """Get a chat session from the database."""
    prisma_session = await chat_db.get_chat_session(session_id)
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -172,12 +172,12 @@ async def get_session(
        user_id: The optional authenticated user ID, or None for anonymous access.

    Returns:
-        SessionDetailResponse: Details for the requested session; raises NotFoundError if not found.
+        SessionDetailResponse: Details for the requested session, or None if not found.

    """
    session = await get_chat_session(session_id, user_id)
    if not session:
-        raise NotFoundError(f"Session {session_id} not found")
+        raise NotFoundError(f"Session {session_id} not found.")

    messages = [message.model_dump() for message in session.messages]
    logger.info(
@@ -222,6 +222,8 @@ async def stream_chat_post(
    session = await _validate_and_get_session(session_id, user_id)

    async def event_generator() -> AsyncGenerator[str, None]:
+        chunk_count = 0
+        first_chunk_type: str | None = None
        async for chunk in chat_service.stream_chat_completion(
            session_id,
            request.message,
@@ -230,7 +232,26 @@ async def stream_chat_post(
            session=session,  # Pass pre-fetched session to avoid double-fetch
            context=request.context,
        ):
+            if chunk_count < 3:
+                logger.info(
+                    "Chat stream chunk",
+                    extra={
+                        "session_id": session_id,
+                        "chunk_type": str(chunk.type),
+                    },
+                )
+            if not first_chunk_type:
+                first_chunk_type = str(chunk.type)
+            chunk_count += 1
            yield chunk.to_sse()
+        logger.info(
+            "Chat stream completed",
+            extra={
+                "session_id": session_id,
+                "chunk_count": chunk_count,
+                "first_chunk_type": first_chunk_type,
+            },
+        )
        # AI SDK protocol termination
        yield "data: [DONE]\n\n"

@@ -275,6 +296,8 @@ async def stream_chat_get(
    session = await _validate_and_get_session(session_id, user_id)

    async def event_generator() -> AsyncGenerator[str, None]:
+        chunk_count = 0
+        first_chunk_type: str | None = None
        async for chunk in chat_service.stream_chat_completion(
            session_id,
            message,
@@ -282,7 +305,26 @@ async def stream_chat_get(
            user_id=user_id,
            session=session,  # Pass pre-fetched session to avoid double-fetch
        ):
+            if chunk_count < 3:
+                logger.info(
+                    "Chat stream chunk",
+                    extra={
+                        "session_id": session_id,
+                        "chunk_type": str(chunk.type),
+                    },
+                )
+            if not first_chunk_type:
+                first_chunk_type = str(chunk.type)
+            chunk_count += 1
            yield chunk.to_sse()
+        logger.info(
+            "Chat stream completed",
+            extra={
+                "session_id": session_id,
+                "chunk_count": chunk_count,
+                "first_chunk_type": first_chunk_type,
+            },
+        )
        # AI SDK protocol termination
        yield "data: [DONE]\n\n"

--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -1,12 +1,20 @@
 import asyncio
 import logging
+import time
+from asyncio import CancelledError
 from collections.abc import AsyncGenerator
 from typing import Any

 import orjson
 from langfuse import get_client, propagate_attributes
 from langfuse.openai import openai  # type: ignore
-from openai import APIConnectionError, APIError, APIStatusError, RateLimitError
+from openai import (
+    APIConnectionError,
+    APIError,
+    APIStatusError,
+    PermissionDeniedError,
+    RateLimitError,
+)
 from openai.types.chat import ChatCompletionChunk, ChatCompletionToolParam

 from backend.data.understanding import (
@@ -21,6 +29,7 @@ from .model import (
    ChatMessage,
    ChatSession,
    Usage,
+    cache_chat_session,
    get_chat_session,
    update_session_title,
    upsert_chat_session,
@@ -296,6 +305,10 @@ async def stream_chat_completion(
                content="",
            )
            accumulated_tool_calls: list[dict[str, Any]] = []
+            has_saved_assistant_message = False
+            has_appended_streaming_message = False
+            last_cache_time = 0.0
+            last_cache_content_len = 0

            # Wrap main logic in try/finally to ensure Langfuse observations are always ended
            has_yielded_end = False
@@ -332,6 +345,23 @@ async def stream_chat_completion(
                        assert assistant_response.content is not None
                        assistant_response.content += delta
                        has_received_text = True
+                        if not has_appended_streaming_message:
+                            session.messages.append(assistant_response)
+                            has_appended_streaming_message = True
+                        current_time = time.monotonic()
+                        content_len = len(assistant_response.content)
+                        if (
+                            current_time - last_cache_time >= 1.0
+                            and content_len > last_cache_content_len
+                        ):
+                            try:
+                                await cache_chat_session(session)
+                            except Exception as e:
+                                logger.warning(
+                                    f"Failed to cache partial session {session.session_id}: {e}"
+                                )
+                            last_cache_time = current_time
+                            last_cache_content_len = content_len
                        yield chunk
                    elif isinstance(chunk, StreamTextEnd):
                        # Emit text-end after text completes
@@ -390,10 +420,42 @@ async def stream_chat_completion(
                            if has_received_text and not text_streaming_ended:
                                yield StreamTextEnd(id=text_block_id)
                                text_streaming_ended = True
+
+                            # Save assistant message before yielding finish to ensure it's persisted
+                            # even if client disconnects immediately after receiving StreamFinish
+                            if not has_saved_assistant_message:
+                                messages_to_save_early: list[ChatMessage] = []
+                                if accumulated_tool_calls:
+                                    assistant_response.tool_calls = (
+                                        accumulated_tool_calls
+                                    )
+                                if not has_appended_streaming_message and (
+                                    assistant_response.content
+                                    or assistant_response.tool_calls
+                                ):
+                                    messages_to_save_early.append(assistant_response)
+                                messages_to_save_early.extend(tool_response_messages)
+
+                                if messages_to_save_early:
+                                    session.messages.extend(messages_to_save_early)
+                                    logger.info(
+                                        f"Saving assistant message before StreamFinish: "
+                                        f"content_len={len(assistant_response.content or '')}, "
+                                        f"tool_calls={len(assistant_response.tool_calls or [])}, "
+                                        f"tool_responses={len(tool_response_messages)}"
+                                    )
+                                if (
+                                    messages_to_save_early
+                                    or has_appended_streaming_message
+                                ):
+                                    await upsert_chat_session(session)
+                                    has_saved_assistant_message = True
+
                            has_yielded_end = True
                            yield chunk
                    elif isinstance(chunk, StreamError):
                        has_yielded_error = True
+                        yield chunk
                    elif isinstance(chunk, StreamUsage):
                        session.usage.append(
                            Usage(
@@ -413,6 +475,27 @@ async def stream_chat_completion(
                    langfuse.update_current_trace(output=str(tool_response_messages))
                    langfuse.update_current_span(output=str(tool_response_messages))

+            except CancelledError:
+                if not has_saved_assistant_message:
+                    if accumulated_tool_calls:
+                        assistant_response.tool_calls = accumulated_tool_calls
+                    if assistant_response.content:
+                        assistant_response.content = (
+                            f"{assistant_response.content}\n\n[interrupted]"
+                        )
+                    else:
+                        assistant_response.content = "[interrupted]"
+                    if not has_appended_streaming_message:
+                        session.messages.append(assistant_response)
+                    if tool_response_messages:
+                        session.messages.extend(tool_response_messages)
+                    try:
+                        await upsert_chat_session(session)
+                    except Exception as e:
+                        logger.warning(
+                            f"Failed to save interrupted session {session.session_id}: {e}"
+                        )
+                raise
            except Exception as e:
                logger.error(f"Error during stream: {e!s}", exc_info=True)

@@ -434,14 +517,19 @@ async def stream_chat_completion(
                    # Add assistant message if it has content or tool calls
                    if accumulated_tool_calls:
                        assistant_response.tool_calls = accumulated_tool_calls
-                    if assistant_response.content or assistant_response.tool_calls:
+                    if not has_appended_streaming_message and (
+                        assistant_response.content or assistant_response.tool_calls
+                    ):
                        messages_to_save.append(assistant_response)

                    # Add tool response messages after assistant message
                    messages_to_save.extend(tool_response_messages)

-                    session.messages.extend(messages_to_save)
-                    await upsert_chat_session(session)
+                    if not has_saved_assistant_message:
+                        if messages_to_save:
+                            session.messages.extend(messages_to_save)
+                        if messages_to_save or has_appended_streaming_message:
+                            await upsert_chat_session(session)

                    if not has_yielded_error:
                        error_message = str(e)
@@ -472,38 +560,49 @@ async def stream_chat_completion(
                return  # Exit after retry to avoid double-saving in finally block

            # Normal completion path - save session and handle tool call continuation
-            logger.info(
-                f"Normal completion path: session={session.session_id}, "
-                f"current message_count={len(session.messages)}"
-            )
-
-            # Build the messages list in the correct order
-            messages_to_save: list[ChatMessage] = []
-
-            # Add assistant message with tool_calls if any
-            if accumulated_tool_calls:
-                assistant_response.tool_calls = accumulated_tool_calls
+            # Only save if we haven't already saved when StreamFinish was received
+            if not has_saved_assistant_message:
                logger.info(
-                    f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
-                )
-            if assistant_response.content or assistant_response.tool_calls:
-                messages_to_save.append(assistant_response)
-                logger.info(
-                    f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
+                    f"Normal completion path: session={session.session_id}, "
+                    f"current message_count={len(session.messages)}"
                )

-            # Add tool response messages after assistant message
-            messages_to_save.extend(tool_response_messages)
-            logger.info(
-                f"Saving {len(tool_response_messages)} tool response messages, "
-                f"total_to_save={len(messages_to_save)}"
-            )
+                # Build the messages list in the correct order
+                messages_to_save: list[ChatMessage] = []

-            session.messages.extend(messages_to_save)
-            logger.info(
-                f"Extended session messages, new message_count={len(session.messages)}"
-            )
-            await upsert_chat_session(session)
+                # Add assistant message with tool_calls if any
+                if accumulated_tool_calls:
+                    assistant_response.tool_calls = accumulated_tool_calls
+                    logger.info(
+                        f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
+                    )
+                if not has_appended_streaming_message and (
+                    assistant_response.content or assistant_response.tool_calls
+                ):
+                    messages_to_save.append(assistant_response)
+                    logger.info(
+                        f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
+                    )
+
+                # Add tool response messages after assistant message
+                messages_to_save.extend(tool_response_messages)
+                logger.info(
+                    f"Saving {len(tool_response_messages)} tool response messages, "
+                    f"total_to_save={len(messages_to_save)}"
+                )
+
+                if messages_to_save:
+                    session.messages.extend(messages_to_save)
+                    logger.info(
+                        f"Extended session messages, new message_count={len(session.messages)}"
+                    )
+                if messages_to_save or has_appended_streaming_message:
+                    await upsert_chat_session(session)
+            else:
+                logger.info(
+                    "Assistant message already saved when StreamFinish was received, "
+                    "skipping duplicate save"
+                )

            # If we did a tool call, stream the chat completion again to get the next response
            if has_done_tool_call:
@@ -545,6 +644,12 @@ def _is_retryable_error(error: Exception) -> bool:
    return False


+def _is_region_blocked_error(error: Exception) -> bool:
+    if isinstance(error, PermissionDeniedError):
+        return "not available in your region" in str(error).lower()
+    return "not available in your region" in str(error).lower()
+
+
 async def _stream_chat_chunks(
    session: ChatSession,
    tools: list[ChatCompletionToolParam],
@@ -737,7 +842,18 @@ async def _stream_chat_chunks(
                        f"Error in stream (not retrying): {e!s}",
                        exc_info=True,
                    )
-                    error_response = StreamError(errorText=str(e))
+                    error_code = None
+                    error_text = str(e)
+                    if _is_region_blocked_error(e):
+                        error_code = "MODEL_NOT_AVAILABLE_REGION"
+                        error_text = (
+                            "This model is not available in your region. "
+                            "Please connect via VPN and try again."
+                        )
+                    error_response = StreamError(
+                        errorText=error_text,
+                        code=error_code,
+                    )
                    yield error_response
                    yield StreamFinish()
                    return
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
@@ -1,29 +1,28 @@
 """Agent generator package - Creates agents from natural language."""

 from .core import (
-    apply_agent_patch,
+    AgentGeneratorNotConfiguredError,
    decompose_goal,
    generate_agent,
    generate_agent_patch,
    get_agent_as_json,
+    json_to_graph,
    save_agent_to_library,
 )
-from .fixer import apply_all_fixes
-from .utils import get_blocks_info
-from .validator import validate_agent
+from .service import health_check as check_external_service_health
+from .service import is_external_service_configured

 __all__ = [
    # Core functions
    "decompose_goal",
    "generate_agent",
    "generate_agent_patch",
-    "apply_agent_patch",
    "save_agent_to_library",
    "get_agent_as_json",
-    # Fixer
-    "apply_all_fixes",
-    # Validator
-    "validate_agent",
-    # Utils
-    "get_blocks_info",
+    "json_to_graph",
+    # Exceptions
+    "AgentGeneratorNotConfiguredError",
+    # Service
+    "is_external_service_configured",
+    "check_external_service_health",
 ]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/client.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/client.py
@@ -1,25 +0,0 @@
-"""OpenRouter client configuration for agent generation."""
-
-import os
-
-from openai import AsyncOpenAI
-
-# Configuration - use OPEN_ROUTER_API_KEY for consistency with chat/config.py
-OPENROUTER_API_KEY = os.getenv("OPEN_ROUTER_API_KEY")
-AGENT_GENERATOR_MODEL = os.getenv("AGENT_GENERATOR_MODEL", "anthropic/claude-opus-4.5")
-
-# OpenRouter client (OpenAI-compatible API)
-_client: AsyncOpenAI | None = None
-
-
-def get_client() -> AsyncOpenAI:
-    """Get or create the OpenRouter client."""
-    global _client
-    if _client is None:
-        if not OPENROUTER_API_KEY:
-            raise ValueError("OPENROUTER_API_KEY environment variable is required")
-        _client = AsyncOpenAI(
-            base_url="https://openrouter.ai/api/v1",
-            api_key=OPENROUTER_API_KEY,
-        )
-    return _client
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
@@ -1,7 +1,5 @@
 """Core agent generation functions."""

-import copy
-import json
 import logging
 import uuid
 from typing import Any
@@ -9,13 +7,35 @@ from typing import Any
 from backend.api.features.library import db as library_db
 from backend.data.graph import Graph, Link, Node, create_graph

-from .client import AGENT_GENERATOR_MODEL, get_client
-from .prompts import DECOMPOSITION_PROMPT, GENERATION_PROMPT, PATCH_PROMPT
-from .utils import get_block_summaries, parse_json_from_llm
+from .service import (
+    decompose_goal_external,
+    generate_agent_external,
+    generate_agent_patch_external,
+    is_external_service_configured,
+)

 logger = logging.getLogger(__name__)


+class AgentGeneratorNotConfiguredError(Exception):
+    """Raised when the external Agent Generator service is not configured."""
+
+    pass
+
+
+def _check_service_configured() -> None:
+    """Check if the external Agent Generator service is configured.
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the service is not configured.
+    """
+    if not is_external_service_configured():
+        raise AgentGeneratorNotConfiguredError(
+            "Agent Generator service is not configured. "
+            "Set AGENTGENERATOR_HOST environment variable to enable agent generation."
+        )
+
+
 async def decompose_goal(description: str, context: str = "") -> dict[str, Any] | None:
    """Break down a goal into steps or return clarifying questions.

@@ -28,40 +48,13 @@ async def decompose_goal(description: str, context: str = "") -> dict[str, Any]
        - {"type": "clarifying_questions", "questions": [...]}
        - {"type": "instructions", "steps": [...]}
        Or None on error
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
-    client = get_client()
-    prompt = DECOMPOSITION_PROMPT.format(block_summaries=get_block_summaries())
-
-    full_description = description
-    if context:
-        full_description = f"{description}\n\nAdditional context:\n{context}"
-
-    try:
-        response = await client.chat.completions.create(
-            model=AGENT_GENERATOR_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": full_description},
-            ],
-            temperature=0,
-        )
-
-        content = response.choices[0].message.content
-        if content is None:
-            logger.error("LLM returned empty content for decomposition")
-            return None
-
-        result = parse_json_from_llm(content)
-
-        if result is None:
-            logger.error(f"Failed to parse decomposition response: {content[:200]}")
-            return None
-
-        return result
-
-    except Exception as e:
-        logger.error(f"Error decomposing goal: {e}")
-        return None
+    _check_service_configured()
+    logger.info("Calling external Agent Generator service for decompose_goal")
+    return await decompose_goal_external(description, context)


 async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:
@@ -72,31 +65,14 @@ async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:

    Returns:
        Agent JSON dict or None on error
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
-    client = get_client()
-    prompt = GENERATION_PROMPT.format(block_summaries=get_block_summaries())
-
-    try:
-        response = await client.chat.completions.create(
-            model=AGENT_GENERATOR_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": json.dumps(instructions, indent=2)},
-            ],
-            temperature=0,
-        )
-
-        content = response.choices[0].message.content
-        if content is None:
-            logger.error("LLM returned empty content for agent generation")
-            return None
-
-        result = parse_json_from_llm(content)
-
-        if result is None:
-            logger.error(f"Failed to parse agent JSON: {content[:200]}")
-            return None
-
+    _check_service_configured()
+    logger.info("Calling external Agent Generator service for generate_agent")
+    result = await generate_agent_external(instructions)
+    if result:
        # Ensure required fields
        if "id" not in result:
            result["id"] = str(uuid.uuid4())
@@ -104,12 +80,7 @@ async def generate_agent(instructions: dict[str, Any]) -> dict[str, Any] | None:
            result["version"] = 1
        if "is_active" not in result:
            result["is_active"] = True
-
-        return result
-
-    except Exception as e:
-        logger.error(f"Error generating agent: {e}")
-        return None
+    return result


 def json_to_graph(agent_json: dict[str, Any]) -> Graph:
@@ -284,108 +255,23 @@ async def get_agent_as_json(
 async def generate_agent_patch(
    update_request: str, current_agent: dict[str, Any]
 ) -> dict[str, Any] | None:
-    """Generate a patch to update an existing agent.
+    """Update an existing agent using natural language.
+
+    The external Agent Generator service handles:
+    - Generating the patch
+    - Applying the patch
+    - Fixing and validating the result

    Args:
        update_request: Natural language description of changes
        current_agent: Current agent JSON

    Returns:
-        Patch dict or clarifying questions, or None on error
+        Updated agent JSON, clarifying questions dict, or None on error
+
+    Raises:
+        AgentGeneratorNotConfiguredError: If the external service is not configured.
    """
-    client = get_client()
-    prompt = PATCH_PROMPT.format(
-        current_agent=json.dumps(current_agent, indent=2),
-        block_summaries=get_block_summaries(),
-    )
-
-    try:
-        response = await client.chat.completions.create(
-            model=AGENT_GENERATOR_MODEL,
-            messages=[
-                {"role": "system", "content": prompt},
-                {"role": "user", "content": update_request},
-            ],
-            temperature=0,
-        )
-
-        content = response.choices[0].message.content
-        if content is None:
-            logger.error("LLM returned empty content for patch generation")
-            return None
-
-        return parse_json_from_llm(content)
-
-    except Exception as e:
-        logger.error(f"Error generating patch: {e}")
-        return None
-
-
-def apply_agent_patch(
-    current_agent: dict[str, Any], patch: dict[str, Any]
-) -> dict[str, Any]:
-    """Apply a patch to an existing agent.
-
-    Args:
-        current_agent: Current agent JSON
-        patch: Patch dict with operations
-
-    Returns:
-        Updated agent JSON
-    """
-    agent = copy.deepcopy(current_agent)
-    patches = patch.get("patches", [])
-
-    for p in patches:
-        patch_type = p.get("type")
-
-        if patch_type == "modify":
-            node_id = p.get("node_id")
-            changes = p.get("changes", {})
-
-            for node in agent.get("nodes", []):
-                if node["id"] == node_id:
-                    _deep_update(node, changes)
-                    logger.debug(f"Modified node {node_id}")
-                    break
-
-        elif patch_type == "add":
-            new_nodes = p.get("new_nodes", [])
-            new_links = p.get("new_links", [])
-
-            agent["nodes"] = agent.get("nodes", []) + new_nodes
-            agent["links"] = agent.get("links", []) + new_links
-            logger.debug(f"Added {len(new_nodes)} nodes, {len(new_links)} links")
-
-        elif patch_type == "remove":
-            node_ids_to_remove = set(p.get("node_ids", []))
-            link_ids_to_remove = set(p.get("link_ids", []))
-
-            # Remove nodes
-            agent["nodes"] = [
-                n for n in agent.get("nodes", []) if n["id"] not in node_ids_to_remove
-            ]
-
-            # Remove links (both explicit and those referencing removed nodes)
-            agent["links"] = [
-                link
-                for link in agent.get("links", [])
-                if link["id"] not in link_ids_to_remove
-                and link["source_id"] not in node_ids_to_remove
-                and link["sink_id"] not in node_ids_to_remove
-            ]
-
-            logger.debug(
-                f"Removed {len(node_ids_to_remove)} nodes, {len(link_ids_to_remove)} links"
-            )
-
-    return agent
-
-
-def _deep_update(target: dict, source: dict) -> None:
-    """Recursively update a dict with another dict."""
-    for key, value in source.items():
-        if key in target and isinstance(target[key], dict) and isinstance(value, dict):
-            _deep_update(target[key], value)
-        else:
-            target[key] = value
+    _check_service_configured()
+    logger.info("Calling external Agent Generator service for generate_agent_patch")
+    return await generate_agent_patch_external(update_request, current_agent)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/fixer.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/fixer.py
@@ -1,606 +0,0 @@
-"""Agent fixer - Fixes common LLM generation errors."""
-
-import logging
-import re
-import uuid
-from typing import Any
-
-from .utils import (
-    ADDTODICTIONARY_BLOCK_ID,
-    ADDTOLIST_BLOCK_ID,
-    CODE_EXECUTION_BLOCK_ID,
-    CONDITION_BLOCK_ID,
-    CREATEDICT_BLOCK_ID,
-    CREATELIST_BLOCK_ID,
-    DATA_SAMPLING_BLOCK_ID,
-    DOUBLE_CURLY_BRACES_BLOCK_IDS,
-    GET_CURRENT_DATE_BLOCK_ID,
-    STORE_VALUE_BLOCK_ID,
-    UNIVERSAL_TYPE_CONVERTER_BLOCK_ID,
-    get_blocks_info,
-    is_valid_uuid,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def fix_agent_ids(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix invalid UUIDs in agent and link IDs."""
-    # Fix agent ID
-    if not is_valid_uuid(agent.get("id", "")):
-        agent["id"] = str(uuid.uuid4())
-        logger.debug(f"Fixed agent ID: {agent['id']}")
-
-    # Fix node IDs
-    id_mapping = {}  # Old ID -> New ID
-    for node in agent.get("nodes", []):
-        if not is_valid_uuid(node.get("id", "")):
-            old_id = node.get("id", "")
-            new_id = str(uuid.uuid4())
-            id_mapping[old_id] = new_id
-            node["id"] = new_id
-            logger.debug(f"Fixed node ID: {old_id} -> {new_id}")
-
-    # Fix link IDs and update references
-    for link in agent.get("links", []):
-        if not is_valid_uuid(link.get("id", "")):
-            link["id"] = str(uuid.uuid4())
-            logger.debug(f"Fixed link ID: {link['id']}")
-
-        # Update source/sink IDs if they were remapped
-        if link.get("source_id") in id_mapping:
-            link["source_id"] = id_mapping[link["source_id"]]
-        if link.get("sink_id") in id_mapping:
-            link["sink_id"] = id_mapping[link["sink_id"]]
-
-    return agent
-
-
-def fix_double_curly_braces(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix single curly braces to double in template blocks."""
-    for node in agent.get("nodes", []):
-        if node.get("block_id") not in DOUBLE_CURLY_BRACES_BLOCK_IDS:
-            continue
-
-        input_data = node.get("input_default", {})
-        for key in ("prompt", "format"):
-            if key in input_data and isinstance(input_data[key], str):
-                original = input_data[key]
-                # Fix simple variable references: {var} -> {{var}}
-                fixed = re.sub(
-                    r"(?<!\{)\{([a-zA-Z_][a-zA-Z0-9_]*)\}(?!\})",
-                    r"{{\1}}",
-                    original,
-                )
-                if fixed != original:
-                    input_data[key] = fixed
-                    logger.debug(f"Fixed curly braces in {key}")
-
-    return agent
-
-
-def fix_storevalue_before_condition(agent: dict[str, Any]) -> dict[str, Any]:
-    """Add StoreValueBlock before ConditionBlock if needed for value2."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-
-    # Find all ConditionBlock nodes
-    condition_node_ids = {
-        node["id"] for node in nodes if node.get("block_id") == CONDITION_BLOCK_ID
-    }
-
-    if not condition_node_ids:
-        return agent
-
-    new_nodes = []
-    new_links = []
-    processed_conditions = set()
-
-    for link in links:
-        sink_id = link.get("sink_id")
-        sink_name = link.get("sink_name")
-
-        # Check if this link goes to a ConditionBlock's value2
-        if sink_id in condition_node_ids and sink_name == "value2":
-            source_node = next(
-                (n for n in nodes if n["id"] == link.get("source_id")), None
-            )
-
-            # Skip if source is already a StoreValueBlock
-            if source_node and source_node.get("block_id") == STORE_VALUE_BLOCK_ID:
-                continue
-
-            # Skip if we already processed this condition
-            if sink_id in processed_conditions:
-                continue
-
-            processed_conditions.add(sink_id)
-
-            # Create StoreValueBlock
-            store_node_id = str(uuid.uuid4())
-            store_node = {
-                "id": store_node_id,
-                "block_id": STORE_VALUE_BLOCK_ID,
-                "input_default": {"data": None},
-                "metadata": {"position": {"x": 0, "y": -100}},
-            }
-            new_nodes.append(store_node)
-
-            # Create link: original source -> StoreValueBlock
-            new_links.append(
-                {
-                    "id": str(uuid.uuid4()),
-                    "source_id": link["source_id"],
-                    "source_name": link["source_name"],
-                    "sink_id": store_node_id,
-                    "sink_name": "input",
-                    "is_static": False,
-                }
-            )
-
-            # Update original link: StoreValueBlock -> ConditionBlock
-            link["source_id"] = store_node_id
-            link["source_name"] = "output"
-
-            logger.debug(f"Added StoreValueBlock before ConditionBlock {sink_id}")
-
-    if new_nodes:
-        agent["nodes"] = nodes + new_nodes
-
-    return agent
-
-
-def fix_addtolist_blocks(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix AddToList blocks by adding prerequisite empty AddToList block.
-
-    When an AddToList block is found:
-    1. Checks if there's a CreateListBlock before it
-    2. Removes CreateListBlock if linked directly to AddToList
-    3. Adds an empty AddToList block before the original
-    4. Ensures the original has a self-referencing link
-    """
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    new_nodes = []
-    original_addtolist_ids = set()
-    nodes_to_remove = set()
-    links_to_remove = []
-
-    # First pass: identify CreateListBlock nodes to remove
-    for link in links:
-        source_node = next(
-            (n for n in nodes if n.get("id") == link.get("source_id")), None
-        )
-        sink_node = next((n for n in nodes if n.get("id") == link.get("sink_id")), None)
-
-        if (
-            source_node
-            and sink_node
-            and source_node.get("block_id") == CREATELIST_BLOCK_ID
-            and sink_node.get("block_id") == ADDTOLIST_BLOCK_ID
-        ):
-            nodes_to_remove.add(source_node.get("id"))
-            links_to_remove.append(link)
-            logger.debug(f"Removing CreateListBlock {source_node.get('id')}")
-
-    # Second pass: process AddToList blocks
-    filtered_nodes = []
-    for node in nodes:
-        if node.get("id") in nodes_to_remove:
-            continue
-
-        if node.get("block_id") == ADDTOLIST_BLOCK_ID:
-            original_addtolist_ids.add(node.get("id"))
-            node_id = node.get("id")
-            pos = node.get("metadata", {}).get("position", {"x": 0, "y": 0})
-
-            # Check if already has prerequisite
-            has_prereq = any(
-                link.get("sink_id") == node_id
-                and link.get("sink_name") == "list"
-                and link.get("source_name") == "updated_list"
-                for link in links
-            )
-
-            if not has_prereq:
-                # Remove links to "list" input (except self-reference)
-                for link in links:
-                    if (
-                        link.get("sink_id") == node_id
-                        and link.get("sink_name") == "list"
-                        and link.get("source_id") != node_id
-                        and link not in links_to_remove
-                    ):
-                        links_to_remove.append(link)
-
-                # Create prerequisite AddToList block
-                prereq_id = str(uuid.uuid4())
-                prereq_node = {
-                    "id": prereq_id,
-                    "block_id": ADDTOLIST_BLOCK_ID,
-                    "input_default": {"list": [], "entry": None, "entries": []},
-                    "metadata": {
-                        "position": {"x": pos.get("x", 0) - 800, "y": pos.get("y", 0)}
-                    },
-                }
-                new_nodes.append(prereq_node)
-
-                # Link prerequisite to original
-                links.append(
-                    {
-                        "id": str(uuid.uuid4()),
-                        "source_id": prereq_id,
-                        "source_name": "updated_list",
-                        "sink_id": node_id,
-                        "sink_name": "list",
-                        "is_static": False,
-                    }
-                )
-                logger.debug(f"Added prerequisite AddToList block for {node_id}")
-
-        filtered_nodes.append(node)
-
-    # Remove marked links
-    filtered_links = [link for link in links if link not in links_to_remove]
-
-    # Add self-referencing links for original AddToList blocks
-    for node in filtered_nodes + new_nodes:
-        if (
-            node.get("block_id") == ADDTOLIST_BLOCK_ID
-            and node.get("id") in original_addtolist_ids
-        ):
-            node_id = node.get("id")
-            has_self_ref = any(
-                link["source_id"] == node_id
-                and link["sink_id"] == node_id
-                and link["source_name"] == "updated_list"
-                and link["sink_name"] == "list"
-                for link in filtered_links
-            )
-            if not has_self_ref:
-                filtered_links.append(
-                    {
-                        "id": str(uuid.uuid4()),
-                        "source_id": node_id,
-                        "source_name": "updated_list",
-                        "sink_id": node_id,
-                        "sink_name": "list",
-                        "is_static": False,
-                    }
-                )
-                logger.debug(f"Added self-reference for AddToList {node_id}")
-
-    agent["nodes"] = filtered_nodes + new_nodes
-    agent["links"] = filtered_links
-    return agent
-
-
-def fix_addtodictionary_blocks(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix AddToDictionary blocks by removing empty CreateDictionary nodes."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    nodes_to_remove = set()
-    links_to_remove = []
-
-    for link in links:
-        source_node = next(
-            (n for n in nodes if n.get("id") == link.get("source_id")), None
-        )
-        sink_node = next((n for n in nodes if n.get("id") == link.get("sink_id")), None)
-
-        if (
-            source_node
-            and sink_node
-            and source_node.get("block_id") == CREATEDICT_BLOCK_ID
-            and sink_node.get("block_id") == ADDTODICTIONARY_BLOCK_ID
-        ):
-            nodes_to_remove.add(source_node.get("id"))
-            links_to_remove.append(link)
-            logger.debug(f"Removing CreateDictionary {source_node.get('id')}")
-
-    agent["nodes"] = [n for n in nodes if n.get("id") not in nodes_to_remove]
-    agent["links"] = [link for link in links if link not in links_to_remove]
-    return agent
-
-
-def fix_code_execution_output(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix CodeExecutionBlock output: change 'response' to 'stdout_logs'."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-
-    for link in links:
-        source_node = next(
-            (n for n in nodes if n.get("id") == link.get("source_id")), None
-        )
-        if (
-            source_node
-            and source_node.get("block_id") == CODE_EXECUTION_BLOCK_ID
-            and link.get("source_name") == "response"
-        ):
-            link["source_name"] = "stdout_logs"
-            logger.debug("Fixed CodeExecutionBlock output: response -> stdout_logs")
-
-    return agent
-
-
-def fix_data_sampling_sample_size(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix DataSamplingBlock by setting sample_size to 1 as default."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    links_to_remove = []
-
-    for node in nodes:
-        if node.get("block_id") == DATA_SAMPLING_BLOCK_ID:
-            node_id = node.get("id")
-            input_default = node.get("input_default", {})
-
-            # Remove links to sample_size
-            for link in links:
-                if (
-                    link.get("sink_id") == node_id
-                    and link.get("sink_name") == "sample_size"
-                ):
-                    links_to_remove.append(link)
-
-            # Set default
-            input_default["sample_size"] = 1
-            node["input_default"] = input_default
-            logger.debug(f"Fixed DataSamplingBlock {node_id} sample_size to 1")
-
-    if links_to_remove:
-        agent["links"] = [link for link in links if link not in links_to_remove]
-
-    return agent
-
-
-def fix_node_x_coordinates(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix node x-coordinates to ensure 800+ unit spacing between linked nodes."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    node_lookup = {n.get("id"): n for n in nodes}
-
-    for link in links:
-        source_id = link.get("source_id")
-        sink_id = link.get("sink_id")
-
-        source_node = node_lookup.get(source_id)
-        sink_node = node_lookup.get(sink_id)
-
-        if not source_node or not sink_node:
-            continue
-
-        source_pos = source_node.get("metadata", {}).get("position", {})
-        sink_pos = sink_node.get("metadata", {}).get("position", {})
-
-        source_x = source_pos.get("x", 0)
-        sink_x = sink_pos.get("x", 0)
-
-        if abs(sink_x - source_x) < 800:
-            new_x = source_x + 800
-            if "metadata" not in sink_node:
-                sink_node["metadata"] = {}
-            if "position" not in sink_node["metadata"]:
-                sink_node["metadata"]["position"] = {}
-            sink_node["metadata"]["position"]["x"] = new_x
-            logger.debug(f"Fixed node {sink_id} x: {sink_x} -> {new_x}")
-
-    return agent
-
-
-def fix_getcurrentdate_offset(agent: dict[str, Any]) -> dict[str, Any]:
-    """Fix GetCurrentDateBlock offset to ensure it's positive."""
-    for node in agent.get("nodes", []):
-        if node.get("block_id") == GET_CURRENT_DATE_BLOCK_ID:
-            input_default = node.get("input_default", {})
-            if "offset" in input_default:
-                offset = input_default["offset"]
-                if isinstance(offset, (int, float)) and offset < 0:
-                    input_default["offset"] = abs(offset)
-                    logger.debug(f"Fixed offset: {offset} -> {abs(offset)}")
-
-    return agent
-
-
-def fix_ai_model_parameter(
-    agent: dict[str, Any],
-    blocks_info: list[dict[str, Any]],
-    default_model: str = "gpt-4o",
-) -> dict[str, Any]:
-    """Add default model parameter to AI blocks if missing."""
-    block_map = {b.get("id"): b for b in blocks_info}
-
-    for node in agent.get("nodes", []):
-        block_id = node.get("block_id")
-        block = block_map.get(block_id)
-
-        if not block:
-            continue
-
-        # Check if block has AI category
-        categories = block.get("categories", [])
-        is_ai_block = any(
-            cat.get("category") == "AI" for cat in categories if isinstance(cat, dict)
-        )
-
-        if is_ai_block:
-            input_default = node.get("input_default", {})
-            if "model" not in input_default:
-                input_default["model"] = default_model
-                node["input_default"] = input_default
-                logger.debug(
-                    f"Added model '{default_model}' to AI block {node.get('id')}"
-                )
-
-    return agent
-
-
-def fix_link_static_properties(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-) -> dict[str, Any]:
-    """Fix is_static property based on source block's staticOutput."""
-    block_map = {b.get("id"): b for b in blocks_info}
-    node_lookup = {n.get("id"): n for n in agent.get("nodes", [])}
-
-    for link in agent.get("links", []):
-        source_node = node_lookup.get(link.get("source_id"))
-        if not source_node:
-            continue
-
-        source_block = block_map.get(source_node.get("block_id"))
-        if not source_block:
-            continue
-
-        static_output = source_block.get("staticOutput", False)
-        if link.get("is_static") != static_output:
-            link["is_static"] = static_output
-            logger.debug(f"Fixed link {link.get('id')} is_static to {static_output}")
-
-    return agent
-
-
-def fix_data_type_mismatch(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-) -> dict[str, Any]:
-    """Fix data type mismatches by inserting UniversalTypeConverterBlock."""
-    nodes = agent.get("nodes", [])
-    links = agent.get("links", [])
-    block_map = {b.get("id"): b for b in blocks_info}
-    node_lookup = {n.get("id"): n for n in nodes}
-
-    def get_property_type(schema: dict, name: str) -> str | None:
-        if "_#_" in name:
-            parent, child = name.split("_#_", 1)
-            parent_schema = schema.get(parent, {})
-            if "properties" in parent_schema:
-                return parent_schema["properties"].get(child, {}).get("type")
-            return None
-        return schema.get(name, {}).get("type")
-
-    def are_types_compatible(src: str, sink: str) -> bool:
-        if {src, sink} <= {"integer", "number"}:
-            return True
-        return src == sink
-
-    type_mapping = {
-        "string": "string",
-        "text": "string",
-        "integer": "number",
-        "number": "number",
-        "float": "number",
-        "boolean": "boolean",
-        "bool": "boolean",
-        "array": "list",
-        "list": "list",
-        "object": "dictionary",
-        "dict": "dictionary",
-        "dictionary": "dictionary",
-    }
-
-    new_links = []
-    nodes_to_add = []
-
-    for link in links:
-        source_node = node_lookup.get(link.get("source_id"))
-        sink_node = node_lookup.get(link.get("sink_id"))
-
-        if not source_node or not sink_node:
-            new_links.append(link)
-            continue
-
-        source_block = block_map.get(source_node.get("block_id"))
-        sink_block = block_map.get(sink_node.get("block_id"))
-
-        if not source_block or not sink_block:
-            new_links.append(link)
-            continue
-
-        source_outputs = source_block.get("outputSchema", {}).get("properties", {})
-        sink_inputs = sink_block.get("inputSchema", {}).get("properties", {})
-
-        source_type = get_property_type(source_outputs, link.get("source_name", ""))
-        sink_type = get_property_type(sink_inputs, link.get("sink_name", ""))
-
-        if (
-            source_type
-            and sink_type
-            and not are_types_compatible(source_type, sink_type)
-        ):
-            # Insert type converter
-            converter_id = str(uuid.uuid4())
-            target_type = type_mapping.get(sink_type, sink_type)
-
-            converter_node = {
-                "id": converter_id,
-                "block_id": UNIVERSAL_TYPE_CONVERTER_BLOCK_ID,
-                "input_default": {"type": target_type},
-                "metadata": {"position": {"x": 0, "y": 100}},
-            }
-            nodes_to_add.append(converter_node)
-
-            # source -> converter
-            new_links.append(
-                {
-                    "id": str(uuid.uuid4()),
-                    "source_id": link["source_id"],
-                    "source_name": link["source_name"],
-                    "sink_id": converter_id,
-                    "sink_name": "value",
-                    "is_static": False,
-                }
-            )
-
-            # converter -> sink
-            new_links.append(
-                {
-                    "id": str(uuid.uuid4()),
-                    "source_id": converter_id,
-                    "source_name": "value",
-                    "sink_id": link["sink_id"],
-                    "sink_name": link["sink_name"],
-                    "is_static": False,
-                }
-            )
-
-            logger.debug(f"Inserted type converter: {source_type} -> {target_type}")
-        else:
-            new_links.append(link)
-
-    if nodes_to_add:
-        agent["nodes"] = nodes + nodes_to_add
-        agent["links"] = new_links
-
-    return agent
-
-
-def apply_all_fixes(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]] | None = None
-) -> dict[str, Any]:
-    """Apply all fixes to an agent JSON.
-
-    Args:
-        agent: Agent JSON dict
-        blocks_info: Optional list of block info dicts for advanced fixes
-
-    Returns:
-        Fixed agent JSON
-    """
-    # Basic fixes (no block info needed)
-    agent = fix_agent_ids(agent)
-    agent = fix_double_curly_braces(agent)
-    agent = fix_storevalue_before_condition(agent)
-    agent = fix_addtolist_blocks(agent)
-    agent = fix_addtodictionary_blocks(agent)
-    agent = fix_code_execution_output(agent)
-    agent = fix_data_sampling_sample_size(agent)
-    agent = fix_node_x_coordinates(agent)
-    agent = fix_getcurrentdate_offset(agent)
-
-    # Advanced fixes (require block info)
-    if blocks_info is None:
-        blocks_info = get_blocks_info()
-
-    agent = fix_ai_model_parameter(agent, blocks_info)
-    agent = fix_link_static_properties(agent, blocks_info)
-    agent = fix_data_type_mismatch(agent, blocks_info)
-
-    return agent
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/prompts.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/prompts.py
@@ -1,225 +0,0 @@
-"""Prompt templates for agent generation."""
-
-DECOMPOSITION_PROMPT = """
-You are an expert AutoGPT Workflow Decomposer. Your task is to analyze a user's high-level goal and break it down into a clear, step-by-step plan using the available blocks.
-
-Each step should represent a distinct, automatable action suitable for execution by an AI automation system.
-
---
-
-FIRST: Analyze the user's goal and determine:
-1) Design-time configuration (fixed settings that won't change per run)
-2) Runtime inputs (values the agent's end-user will provide each time it runs)
-
-For anything that can vary per run (email addresses, names, dates, search terms, etc.):
- DO NOT ask for the actual value
- Instead, define it as an Agent Input with a clear name, type, and description
-
-Only ask clarifying questions about design-time config that affects how you build the workflow:
- Which external service to use (e.g., "Gmail vs Outlook", "Notion vs Google Docs")
- Required formats or structures (e.g., "CSV, JSON, or PDF output?")
- Business rules that must be hard-coded
-
-IMPORTANT CLARIFICATIONS POLICY:
- Ask no more than five essential questions
- Do not ask for concrete values that can be provided at runtime as Agent Inputs
- Do not ask for API keys or credentials; the platform handles those directly
- If there is enough information to infer reasonable defaults, prefer to propose defaults
-
---
-
-GUIDELINES:
-1. List each step as a numbered item
-2. Describe the action clearly and specify inputs/outputs
-3. Ensure steps are in logical, sequential order
-4. Mention block names naturally (e.g., "Use GetWeatherByLocationBlock to...")
-5. Help the user reach their goal efficiently
-
---
-
-RULES:
-1. OUTPUT FORMAT: Only output either clarifying questions OR step-by-step instructions, not both
-2. USE ONLY THE BLOCKS PROVIDED
-3. ALL required_input fields must be provided
-4. Data types of linked properties must match
-5. Write expert-level prompts for AI-related blocks
-
---
-
-CRITICAL BLOCK RESTRICTIONS:
-1. AddToListBlock: Outputs updated list EVERY addition, not after all additions
-2. SendEmailBlock: Draft the email for user review; set SMTP config based on email type
-3. ConditionBlock: value2 is reference, value1 is contrast
-4. CodeExecutionBlock: DO NOT USE - use AI blocks instead
-5. ReadCsvBlock: Only use the 'rows' output, not 'row'
-
---
-
-OUTPUT FORMAT:
-
-If more information is needed:
-```json
-{{
-  "type": "clarifying_questions",
-  "questions": [
-    {{
-      "question": "Which email provider should be used? (Gmail, Outlook, custom SMTP)",
-      "keyword": "email_provider",
-      "example": "Gmail"
-    }}
-  ]
-}}
-```
-
-If ready to proceed:
-```json
-{{
-  "type": "instructions",
-  "steps": [
-    {{
-      "step_number": 1,
-      "block_name": "AgentShortTextInputBlock",
-      "description": "Get the URL of the content to analyze.",
-      "inputs": [{{"name": "name", "value": "URL"}}],
-      "outputs": [{{"name": "result", "description": "The URL entered by user"}}]
-    }}
-  ]
-}}
-```
-
---
-
-AVAILABLE BLOCKS:
-{block_summaries}
-"""
-
-GENERATION_PROMPT = """
-You are an expert AI workflow builder. Generate a valid agent JSON from the given instructions.
-
---
-
-NODES:
-Each node must include:
- `id`: Unique UUID v4 (e.g. `a8f5b1e2-c3d4-4e5f-8a9b-0c1d2e3f4a5b`)
- `block_id`: The block identifier (must match an Allowed Block)
- `input_default`: Dict of inputs (can be empty if no static inputs needed)
- `metadata`: Must contain:
-  - `position`: {{"x": number, "y": number}} - adjacent nodes should differ by 800+ in X
-  - `customized_name`: Clear name describing this block's purpose in the workflow
-
---
-
-LINKS:
-Each link connects a source node's output to a sink node's input:
- `id`: MUST be UUID v4 (NOT "link-1", "link-2", etc.)
- `source_id`: ID of the source node
- `source_name`: Output field name from the source block
- `sink_id`: ID of the sink node
- `sink_name`: Input field name on the sink block
- `is_static`: true only if source block has static_output: true
-
-CRITICAL: All IDs must be valid UUID v4 format!
-
---
-
-AGENT (GRAPH):
-Wrap nodes and links in:
- `id`: UUID of the agent
- `name`: Short, generic name (avoid specific company names, URLs)
- `description`: Short, generic description
- `nodes`: List of all nodes
- `links`: List of all links
- `version`: 1
- `is_active`: true
-
---
-
-TIPS:
- All required_input fields must be provided via input_default or a valid link
- Ensure consistent source_id and sink_id references
- Avoid dangling links
- Input/output pins must match block schemas
- Do not invent unknown block_ids
-
---
-
-ALLOWED BLOCKS:
-{block_summaries}
-
---
-
-Generate the complete agent JSON. Output ONLY valid JSON, no explanation.
-"""
-
-PATCH_PROMPT = """
-You are an expert at modifying AutoGPT agent workflows. Given the current agent and a modification request, generate a JSON patch to update the agent.
-
-CURRENT AGENT:
-{current_agent}
-
-AVAILABLE BLOCKS:
-{block_summaries}
-
---
-
-PATCH FORMAT:
-Return a JSON object with the following structure:
-
-```json
-{{
-  "type": "patch",
-  "intent": "Brief description of what the patch does",
-  "patches": [
-    {{
-      "type": "modify",
-      "node_id": "uuid-of-node-to-modify",
-      "changes": {{
-        "input_default": {{"field": "new_value"}},
-        "metadata": {{"customized_name": "New Name"}}
-      }}
-    }},
-    {{
-      "type": "add",
-      "new_nodes": [
-        {{
-          "id": "new-uuid",
-          "block_id": "block-uuid",
-          "input_default": {{}},
-          "metadata": {{"position": {{"x": 0, "y": 0}}, "customized_name": "Name"}}
-        }}
-      ],
-      "new_links": [
-        {{
-          "id": "link-uuid",
-          "source_id": "source-node-id",
-          "source_name": "output_field",
-          "sink_id": "sink-node-id",
-          "sink_name": "input_field"
-        }}
-      ]
-    }},
-    {{
-      "type": "remove",
-      "node_ids": ["uuid-of-node-to-remove"],
-      "link_ids": ["uuid-of-link-to-remove"]
-    }}
-  ]
-}}
-```
-
-If you need more information, return:
-```json
-{{
-  "type": "clarifying_questions",
-  "questions": [
-    {{
-      "question": "What specific change do you want?",
-      "keyword": "change_type",
-      "example": "Add error handling"
-    }}
-  ]
-}}
-```
-
-Generate the minimal patch needed. Output ONLY valid JSON.
-"""
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
@@ -0,0 +1,269 @@
+"""External Agent Generator service client.
+
+This module provides a client for communicating with the external Agent Generator
+microservice. When AGENTGENERATOR_HOST is configured, the agent generation functions
+will delegate to the external service instead of using the built-in LLM-based implementation.
+"""
+
+import logging
+from typing import Any
+
+import httpx
+
+from backend.util.settings import Settings
+
+logger = logging.getLogger(__name__)
+
+_client: httpx.AsyncClient | None = None
+_settings: Settings | None = None
+
+
+def _get_settings() -> Settings:
+    """Get or create settings singleton."""
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
+
+
+def is_external_service_configured() -> bool:
+    """Check if external Agent Generator service is configured."""
+    settings = _get_settings()
+    return bool(settings.config.agentgenerator_host)
+
+
+def _get_base_url() -> str:
+    """Get the base URL for the external service."""
+    settings = _get_settings()
+    host = settings.config.agentgenerator_host
+    port = settings.config.agentgenerator_port
+    return f"http://{host}:{port}"
+
+
+def _get_client() -> httpx.AsyncClient:
+    """Get or create the HTTP client for the external service."""
+    global _client
+    if _client is None:
+        settings = _get_settings()
+        _client = httpx.AsyncClient(
+            base_url=_get_base_url(),
+            timeout=httpx.Timeout(settings.config.agentgenerator_timeout),
+        )
+    return _client
+
+
+async def decompose_goal_external(
+    description: str, context: str = ""
+) -> dict[str, Any] | None:
+    """Call the external service to decompose a goal.
+
+    Args:
+        description: Natural language goal description
+        context: Additional context (e.g., answers to previous questions)
+
+    Returns:
+        Dict with either:
+        - {"type": "clarifying_questions", "questions": [...]}
+        - {"type": "instructions", "steps": [...]}
+        - {"type": "unachievable_goal", ...}
+        - {"type": "vague_goal", ...}
+        Or None on error
+    """
+    client = _get_client()
+
+    # Build the request payload
+    payload: dict[str, Any] = {"description": description}
+    if context:
+        # The external service uses user_instruction for additional context
+        payload["user_instruction"] = context
+
+    try:
+        response = await client.post("/api/decompose-description", json=payload)
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error(f"External service returned error: {data.get('error')}")
+            return None
+
+        # Map the response to the expected format
+        response_type = data.get("type")
+        if response_type == "instructions":
+            return {"type": "instructions", "steps": data.get("steps", [])}
+        elif response_type == "clarifying_questions":
+            return {
+                "type": "clarifying_questions",
+                "questions": data.get("questions", []),
+            }
+        elif response_type == "unachievable_goal":
+            return {
+                "type": "unachievable_goal",
+                "reason": data.get("reason"),
+                "suggested_goal": data.get("suggested_goal"),
+            }
+        elif response_type == "vague_goal":
+            return {
+                "type": "vague_goal",
+                "suggested_goal": data.get("suggested_goal"),
+            }
+        else:
+            logger.error(
+                f"Unknown response type from external service: {response_type}"
+            )
+            return None
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error calling external agent generator: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error calling external agent generator: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error calling external agent generator: {e}")
+        return None
+
+
+async def generate_agent_external(
+    instructions: dict[str, Any]
+) -> dict[str, Any] | None:
+    """Call the external service to generate an agent from instructions.
+
+    Args:
+        instructions: Structured instructions from decompose_goal
+
+    Returns:
+        Agent JSON dict or None on error
+    """
+    client = _get_client()
+
+    try:
+        response = await client.post(
+            "/api/generate-agent", json={"instructions": instructions}
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error(f"External service returned error: {data.get('error')}")
+            return None
+
+        return data.get("agent_json")
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error calling external agent generator: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error calling external agent generator: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error calling external agent generator: {e}")
+        return None
+
+
+async def generate_agent_patch_external(
+    update_request: str, current_agent: dict[str, Any]
+) -> dict[str, Any] | None:
+    """Call the external service to generate a patch for an existing agent.
+
+    Args:
+        update_request: Natural language description of changes
+        current_agent: Current agent JSON
+
+    Returns:
+        Updated agent JSON, clarifying questions dict, or None on error
+    """
+    client = _get_client()
+
+    try:
+        response = await client.post(
+            "/api/update-agent",
+            json={
+                "update_request": update_request,
+                "current_agent_json": current_agent,
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error(f"External service returned error: {data.get('error')}")
+            return None
+
+        # Check if it's clarifying questions
+        if data.get("type") == "clarifying_questions":
+            return {
+                "type": "clarifying_questions",
+                "questions": data.get("questions", []),
+            }
+
+        # Otherwise return the updated agent JSON
+        return data.get("agent_json")
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error calling external agent generator: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error calling external agent generator: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error calling external agent generator: {e}")
+        return None
+
+
+async def get_blocks_external() -> list[dict[str, Any]] | None:
+    """Get available blocks from the external service.
+
+    Returns:
+        List of block info dicts or None on error
+    """
+    client = _get_client()
+
+    try:
+        response = await client.get("/api/blocks")
+        response.raise_for_status()
+        data = response.json()
+
+        if not data.get("success"):
+            logger.error("External service returned error getting blocks")
+            return None
+
+        return data.get("blocks", [])
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"HTTP error getting blocks from external service: {e}")
+        return None
+    except httpx.RequestError as e:
+        logger.error(f"Request error getting blocks from external service: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error getting blocks from external service: {e}")
+        return None
+
+
+async def health_check() -> bool:
+    """Check if the external service is healthy.
+
+    Returns:
+        True if healthy, False otherwise
+    """
+    if not is_external_service_configured():
+        return False
+
+    client = _get_client()
+
+    try:
+        response = await client.get("/health")
+        response.raise_for_status()
+        data = response.json()
+        return data.get("status") == "healthy" and data.get("blocks_loaded", False)
+    except Exception as e:
+        logger.warning(f"External agent generator health check failed: {e}")
+        return False
+
+
+async def close_client() -> None:
+    """Close the HTTP client."""
+    global _client
+    if _client is not None:
+        await _client.aclose()
+        _client = None
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/utils.py
@@ -1,213 +0,0 @@
-"""Utilities for agent generation."""
-
-import json
-import re
-from typing import Any
-
-from backend.data.block import get_blocks
-
-# UUID validation regex
-UUID_REGEX = re.compile(
-    r"^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$"
-)
-
-# Block IDs for various fixes
-STORE_VALUE_BLOCK_ID = "1ff065e9-88e8-4358-9d82-8dc91f622ba9"
-CONDITION_BLOCK_ID = "715696a0-e1da-45c8-b209-c2fa9c3b0be6"
-ADDTOLIST_BLOCK_ID = "aeb08fc1-2fc1-4141-bc8e-f758f183a822"
-ADDTODICTIONARY_BLOCK_ID = "31d1064e-7446-4693-a7d4-65e5ca1180d1"
-CREATELIST_BLOCK_ID = "a912d5c7-6e00-4542-b2a9-8034136930e4"
-CREATEDICT_BLOCK_ID = "b924ddf4-de4f-4b56-9a85-358930dcbc91"
-CODE_EXECUTION_BLOCK_ID = "0b02b072-abe7-11ef-8372-fb5d162dd712"
-DATA_SAMPLING_BLOCK_ID = "4a448883-71fa-49cf-91cf-70d793bd7d87"
-UNIVERSAL_TYPE_CONVERTER_BLOCK_ID = "95d1b990-ce13-4d88-9737-ba5c2070c97b"
-GET_CURRENT_DATE_BLOCK_ID = "b29c1b50-5d0e-4d9f-8f9d-1b0e6fcbf0b1"
-
-DOUBLE_CURLY_BRACES_BLOCK_IDS = [
-    "44f6c8ad-d75c-4ae1-8209-aad1c0326928",  # FillTextTemplateBlock
-    "6ab085e2-20b3-4055-bc3e-08036e01eca6",
-    "90f8c45e-e983-4644-aa0b-b4ebe2f531bc",
-    "363ae599-353e-4804-937e-b2ee3cef3da4",  # AgentOutputBlock
-    "3b191d9f-356f-482d-8238-ba04b6d18381",
-    "db7d8f02-2f44-4c55-ab7a-eae0941f0c30",
-    "3a7c4b8d-6e2f-4a5d-b9c1-f8d23c5a9b0e",
-    "ed1ae7a0-b770-4089-b520-1f0005fad19a",
-    "a892b8d9-3e4e-4e9c-9c1e-75f8efcf1bfa",
-    "b29c1b50-5d0e-4d9f-8f9d-1b0e6fcbf0b1",
-    "716a67b3-6760-42e7-86dc-18645c6e00fc",
-    "530cf046-2ce0-4854-ae2c-659db17c7a46",
-    "ed55ac19-356e-4243-a6cb-bc599e9b716f",
-    "1f292d4a-41a4-4977-9684-7c8d560b9f91",  # LLM blocks
-    "32a87eab-381e-4dd4-bdb8-4c47151be35a",
-]
-
-
-def is_valid_uuid(value: str) -> bool:
-    """Check if a string is a valid UUID v4."""
-    return isinstance(value, str) and UUID_REGEX.match(value) is not None
-
-
-def _compact_schema(schema: dict) -> dict[str, str]:
-    """Extract compact type info from a JSON schema properties dict.
-
-    Returns a dict of {field_name: type_string} for essential info only.
-    """
-    props = schema.get("properties", {})
-    result = {}
-
-    for name, prop in props.items():
-        # Skip internal/complex fields
-        if name.startswith("_"):
-            continue
-
-        # Get type string
-        type_str = prop.get("type", "any")
-
-        # Handle anyOf/oneOf (optional types)
-        if "anyOf" in prop:
-            types = [t.get("type", "?") for t in prop["anyOf"] if t.get("type")]
-            type_str = "|".join(types) if types else "any"
-        elif "allOf" in prop:
-            type_str = "object"
-
-        # Add array item type if present
-        if type_str == "array" and "items" in prop:
-            items = prop["items"]
-            if isinstance(items, dict):
-                item_type = items.get("type", "any")
-                type_str = f"array[{item_type}]"
-
-        result[name] = type_str
-
-    return result
-
-
-def get_block_summaries(include_schemas: bool = True) -> str:
-    """Generate compact block summaries for prompts.
-
-    Args:
-        include_schemas: Whether to include input/output type info
-
-    Returns:
-        Formatted string of block summaries (compact format)
-    """
-    blocks = get_blocks()
-    summaries = []
-
-    for block_id, block_cls in blocks.items():
-        block = block_cls()
-        name = block.name
-        desc = getattr(block, "description", "") or ""
-
-        # Truncate description
-        if len(desc) > 150:
-            desc = desc[:147] + "..."
-
-        if not include_schemas:
-            summaries.append(f"- {name} (id: {block_id}): {desc}")
-        else:
-            # Compact format with type info only
-            inputs = {}
-            outputs = {}
-            required = []
-
-            if hasattr(block, "input_schema"):
-                try:
-                    schema = block.input_schema.jsonschema()
-                    inputs = _compact_schema(schema)
-                    required = schema.get("required", [])
-                except Exception:
-                    pass
-
-            if hasattr(block, "output_schema"):
-                try:
-                    schema = block.output_schema.jsonschema()
-                    outputs = _compact_schema(schema)
-                except Exception:
-                    pass
-
-            # Build compact line format
-            # Format: NAME (id): desc | in: {field:type, ...} [required] | out: {field:type}
-            in_str = ", ".join(f"{k}:{v}" for k, v in inputs.items())
-            out_str = ", ".join(f"{k}:{v}" for k, v in outputs.items())
-            req_str = f" req=[{','.join(required)}]" if required else ""
-
-            static = " [static]" if getattr(block, "static_output", False) else ""
-
-            line = f"- {name} (id: {block_id}): {desc}"
-            if in_str:
-                line += f"\n  in: {{{in_str}}}{req_str}"
-            if out_str:
-                line += f"\n  out: {{{out_str}}}{static}"
-
-            summaries.append(line)
-
-    return "\n".join(summaries)
-
-
-def get_blocks_info() -> list[dict[str, Any]]:
-    """Get block information with schemas for validation and fixing."""
-    blocks = get_blocks()
-    blocks_info = []
-    for block_id, block_cls in blocks.items():
-        block = block_cls()
-        blocks_info.append(
-            {
-                "id": block_id,
-                "name": block.name,
-                "description": getattr(block, "description", ""),
-                "categories": getattr(block, "categories", []),
-                "staticOutput": getattr(block, "static_output", False),
-                "inputSchema": (
-                    block.input_schema.jsonschema()
-                    if hasattr(block, "input_schema")
-                    else {}
-                ),
-                "outputSchema": (
-                    block.output_schema.jsonschema()
-                    if hasattr(block, "output_schema")
-                    else {}
-                ),
-            }
-        )
-    return blocks_info
-
-
-def parse_json_from_llm(text: str) -> dict[str, Any] | None:
-    """Extract JSON from LLM response (handles markdown code blocks)."""
-    if not text:
-        return None
-
-    # Try fenced code block
-    match = re.search(r"```(?:json)?\s*([\s\S]*?)```", text, re.IGNORECASE)
-    if match:
-        try:
-            return json.loads(match.group(1).strip())
-        except json.JSONDecodeError:
-            pass
-
-    # Try raw text
-    try:
-        return json.loads(text.strip())
-    except json.JSONDecodeError:
-        pass
-
-    # Try finding {...} span
-    start = text.find("{")
-    end = text.rfind("}")
-    if start != -1 and end > start:
-        try:
-            return json.loads(text[start : end + 1])
-        except json.JSONDecodeError:
-            pass
-
-    # Try finding [...] span
-    start = text.find("[")
-    end = text.rfind("]")
-    if start != -1 and end > start:
-        try:
-            return json.loads(text[start : end + 1])
-        except json.JSONDecodeError:
-            pass
-
-    return None
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/validator.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/validator.py
@@ -1,279 +0,0 @@
-"""Agent validator - Validates agent structure and connections."""
-
-import logging
-import re
-from typing import Any
-
-from .utils import get_blocks_info
-
-logger = logging.getLogger(__name__)
-
-
-class AgentValidator:
-    """Validator for AutoGPT agents with detailed error reporting."""
-
-    def __init__(self):
-        self.errors: list[str] = []
-
-    def add_error(self, error: str) -> None:
-        """Add an error message."""
-        self.errors.append(error)
-
-    def validate_block_existence(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate all block IDs exist in the blocks library."""
-        valid = True
-        valid_block_ids = {b.get("id") for b in blocks_info if b.get("id")}
-
-        for node in agent.get("nodes", []):
-            block_id = node.get("block_id")
-            node_id = node.get("id")
-
-            if not block_id:
-                self.add_error(f"Node '{node_id}' is missing 'block_id' field.")
-                valid = False
-                continue
-
-            if block_id not in valid_block_ids:
-                self.add_error(
-                    f"Node '{node_id}' references block_id '{block_id}' which does not exist."
-                )
-                valid = False
-
-        return valid
-
-    def validate_link_node_references(self, agent: dict[str, Any]) -> bool:
-        """Validate all node IDs referenced in links exist."""
-        valid = True
-        valid_node_ids = {n.get("id") for n in agent.get("nodes", []) if n.get("id")}
-
-        for link in agent.get("links", []):
-            link_id = link.get("id", "Unknown")
-            source_id = link.get("source_id")
-            sink_id = link.get("sink_id")
-
-            if not source_id:
-                self.add_error(f"Link '{link_id}' is missing 'source_id'.")
-                valid = False
-            elif source_id not in valid_node_ids:
-                self.add_error(
-                    f"Link '{link_id}' references non-existent source_id '{source_id}'."
-                )
-                valid = False
-
-            if not sink_id:
-                self.add_error(f"Link '{link_id}' is missing 'sink_id'.")
-                valid = False
-            elif sink_id not in valid_node_ids:
-                self.add_error(
-                    f"Link '{link_id}' references non-existent sink_id '{sink_id}'."
-                )
-                valid = False
-
-        return valid
-
-    def validate_required_inputs(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate required inputs are provided."""
-        valid = True
-        block_map = {b.get("id"): b for b in blocks_info}
-
-        for node in agent.get("nodes", []):
-            block_id = node.get("block_id")
-            block = block_map.get(block_id)
-
-            if not block:
-                continue
-
-            required_inputs = block.get("inputSchema", {}).get("required", [])
-            input_defaults = node.get("input_default", {})
-            node_id = node.get("id")
-
-            # Get linked inputs
-            linked_inputs = {
-                link["sink_name"]
-                for link in agent.get("links", [])
-                if link.get("sink_id") == node_id
-            }
-
-            for req_input in required_inputs:
-                if (
-                    req_input not in input_defaults
-                    and req_input not in linked_inputs
-                    and req_input != "credentials"
-                ):
-                    block_name = block.get("name", "Unknown Block")
-                    self.add_error(
-                        f"Node '{node_id}' ({block_name}) is missing required input '{req_input}'."
-                    )
-                    valid = False
-
-        return valid
-
-    def validate_data_type_compatibility(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate linked data types are compatible."""
-        valid = True
-        block_map = {b.get("id"): b for b in blocks_info}
-        node_lookup = {n.get("id"): n for n in agent.get("nodes", [])}
-
-        def get_type(schema: dict, name: str) -> str | None:
-            if "_#_" in name:
-                parent, child = name.split("_#_", 1)
-                parent_schema = schema.get(parent, {})
-                if "properties" in parent_schema:
-                    return parent_schema["properties"].get(child, {}).get("type")
-                return None
-            return schema.get(name, {}).get("type")
-
-        def are_compatible(src: str, sink: str) -> bool:
-            if {src, sink} <= {"integer", "number"}:
-                return True
-            return src == sink
-
-        for link in agent.get("links", []):
-            source_node = node_lookup.get(link.get("source_id"))
-            sink_node = node_lookup.get(link.get("sink_id"))
-
-            if not source_node or not sink_node:
-                continue
-
-            source_block = block_map.get(source_node.get("block_id"))
-            sink_block = block_map.get(sink_node.get("block_id"))
-
-            if not source_block or not sink_block:
-                continue
-
-            source_outputs = source_block.get("outputSchema", {}).get("properties", {})
-            sink_inputs = sink_block.get("inputSchema", {}).get("properties", {})
-
-            source_type = get_type(source_outputs, link.get("source_name", ""))
-            sink_type = get_type(sink_inputs, link.get("sink_name", ""))
-
-            if source_type and sink_type and not are_compatible(source_type, sink_type):
-                self.add_error(
-                    f"Type mismatch: {source_block.get('name')} output '{link['source_name']}' "
-                    f"({source_type}) -> {sink_block.get('name')} input '{link['sink_name']}' ({sink_type})."
-                )
-                valid = False
-
-        return valid
-
-    def validate_nested_sink_links(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]]
-    ) -> bool:
-        """Validate nested sink links (with _#_ notation)."""
-        valid = True
-        block_map = {b.get("id"): b for b in blocks_info}
-        node_lookup = {n.get("id"): n for n in agent.get("nodes", [])}
-
-        for link in agent.get("links", []):
-            sink_name = link.get("sink_name", "")
-
-            if "_#_" in sink_name:
-                parent, child = sink_name.split("_#_", 1)
-
-                sink_node = node_lookup.get(link.get("sink_id"))
-                if not sink_node:
-                    continue
-
-                block = block_map.get(sink_node.get("block_id"))
-                if not block:
-                    continue
-
-                input_props = block.get("inputSchema", {}).get("properties", {})
-                parent_schema = input_props.get(parent)
-
-                if not parent_schema:
-                    self.add_error(
-                        f"Invalid nested link '{sink_name}': parent '{parent}' not found."
-                    )
-                    valid = False
-                    continue
-
-                if not parent_schema.get("additionalProperties"):
-                    if not (
-                        isinstance(parent_schema, dict)
-                        and "properties" in parent_schema
-                        and child in parent_schema.get("properties", {})
-                    ):
-                        self.add_error(
-                            f"Invalid nested link '{sink_name}': child '{child}' not found in '{parent}'."
-                        )
-                        valid = False
-
-        return valid
-
-    def validate_prompt_spaces(self, agent: dict[str, Any]) -> bool:
-        """Validate prompts don't have spaces in template variables."""
-        valid = True
-
-        for node in agent.get("nodes", []):
-            input_default = node.get("input_default", {})
-            prompt = input_default.get("prompt", "")
-
-            if not isinstance(prompt, str):
-                continue
-
-            # Find {{...}} with spaces
-            matches = re.finditer(r"\{\{([^}]+)\}\}", prompt)
-            for match in matches:
-                content = match.group(1)
-                if " " in content:
-                    self.add_error(
-                        f"Node '{node.get('id')}' has spaces in template variable: "
-                        f"'{{{{{content}}}}}' should be '{{{{{content.replace(' ', '_')}}}}}'."
-                    )
-                    valid = False
-
-        return valid
-
-    def validate(
-        self, agent: dict[str, Any], blocks_info: list[dict[str, Any]] | None = None
-    ) -> tuple[bool, str | None]:
-        """Run all validations.
-
-        Returns:
-            Tuple of (is_valid, error_message)
-        """
-        self.errors = []
-
-        if blocks_info is None:
-            blocks_info = get_blocks_info()
-
-        checks = [
-            self.validate_block_existence(agent, blocks_info),
-            self.validate_link_node_references(agent),
-            self.validate_required_inputs(agent, blocks_info),
-            self.validate_data_type_compatibility(agent, blocks_info),
-            self.validate_nested_sink_links(agent, blocks_info),
-            self.validate_prompt_spaces(agent),
-        ]
-
-        all_passed = all(checks)
-
-        if all_passed:
-            logger.info("Agent validation successful")
-            return True, None
-
-        error_message = "Agent validation failed:\n"
-        for i, error in enumerate(self.errors, 1):
-            error_message += f"{i}. {error}\n"
-
-        logger.warning(f"Agent validation failed with {len(self.errors)} errors")
-        return False, error_message
-
-
-def validate_agent(
-    agent: dict[str, Any], blocks_info: list[dict[str, Any]] | None = None
-) -> tuple[bool, str | None]:
-    """Convenience function to validate an agent.
-
-    Returns:
-        Tuple of (is_valid, error_message)
-    """
-    validator = AgentValidator()
-    return validator.validate(agent, blocks_info)
--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -8,12 +8,10 @@ from langfuse import observe
 from backend.api.features.chat.model import ChatSession

 from .agent_generator import (
-    apply_all_fixes,
+    AgentGeneratorNotConfiguredError,
    decompose_goal,
    generate_agent,
-    get_blocks_info,
    save_agent_to_library,
-    validate_agent,
 )
 from .base import BaseTool
 from .models import (
@@ -27,9 +25,6 @@ from .models import (

 logger = logging.getLogger(__name__)

-# Maximum retries for agent generation with validation feedback
-MAX_GENERATION_RETRIES = 2
-

 class CreateAgentTool(BaseTool):
    """Tool for creating agents from natural language descriptions."""
@@ -91,9 +86,8 @@ class CreateAgentTool(BaseTool):

        Flow:
        1. Decompose the description into steps (may return clarifying questions)
-        2. Generate agent JSON from the steps
-        3. Apply fixes to correct common LLM errors
-        4. Preview or save based on the save parameter
+        2. Generate agent JSON (external service handles fixing and validation)
+        3. Preview or save based on the save parameter
        """
        description = kwargs.get("description", "").strip()
        context = kwargs.get("context", "")
@@ -110,11 +104,13 @@ class CreateAgentTool(BaseTool):
        # Step 1: Decompose goal into steps
        try:
            decomposition_result = await decompose_goal(description, context)
-        except ValueError as e:
-            # Handle missing API key or configuration errors
+        except AgentGeneratorNotConfiguredError:
            return ErrorResponse(
-                message=f"Agent generation is not configured: {str(e)}",
-                error="configuration_error",
+                message=(
+                    "Agent generation is not available. "
+                    "The Agent Generator service is not configured."
+                ),
+                error="service_not_configured",
                session_id=session_id,
            )

@@ -171,72 +167,32 @@ class CreateAgentTool(BaseTool):
                session_id=session_id,
            )

-        # Step 2: Generate agent JSON with retry on validation failure
-        blocks_info = get_blocks_info()
-        agent_json = None
-        validation_errors = None
-
-        for attempt in range(MAX_GENERATION_RETRIES + 1):
-            # Generate agent (include validation errors from previous attempt)
-            if attempt == 0:
-                agent_json = await generate_agent(decomposition_result)
-            else:
-                # Retry with validation error feedback
-                logger.info(
-                    f"Retry {attempt}/{MAX_GENERATION_RETRIES} with validation feedback"
-                )
-                retry_instructions = {
-                    **decomposition_result,
-                    "previous_errors": validation_errors,
-                    "retry_instructions": (
-                        "The previous generation had validation errors. "
-                        "Please fix these issues in the new generation:\n"
-                        f"{validation_errors}"
-                    ),
-                }
-                agent_json = await generate_agent(retry_instructions)
-
-            if agent_json is None:
-                if attempt == MAX_GENERATION_RETRIES:
-                    return ErrorResponse(
-                        message="Failed to generate the agent. Please try again.",
-                        error="Generation failed",
-                        session_id=session_id,
-                    )
-                continue
-
-            # Step 3: Apply fixes to correct common errors
-            agent_json = apply_all_fixes(agent_json, blocks_info)
-
-            # Step 4: Validate the agent
-            is_valid, validation_errors = validate_agent(agent_json, blocks_info)
-
-            if is_valid:
-                logger.info(f"Agent generated successfully on attempt {attempt + 1}")
-                break
-
-            logger.warning(
-                f"Validation failed on attempt {attempt + 1}: {validation_errors}"
+        # Step 2: Generate agent JSON (external service handles fixing and validation)
+        try:
+            agent_json = await generate_agent(decomposition_result)
+        except AgentGeneratorNotConfiguredError:
+            return ErrorResponse(
+                message=(
+                    "Agent generation is not available. "
+                    "The Agent Generator service is not configured."
+                ),
+                error="service_not_configured",
+                session_id=session_id,
            )

-            if attempt == MAX_GENERATION_RETRIES:
-                # Return error with validation details
-                return ErrorResponse(
-                    message=(
-                        f"Generated agent has validation errors after {MAX_GENERATION_RETRIES + 1} attempts. "
-                        f"Please try rephrasing your request or simplify the workflow."
-                    ),
-                    error="validation_failed",
-                    details={"validation_errors": validation_errors},
-                    session_id=session_id,
-                )
+        if agent_json is None:
+            return ErrorResponse(
+                message="Failed to generate the agent. Please try again.",
+                error="Generation failed",
+                session_id=session_id,
+            )

        agent_name = agent_json.get("name", "Generated Agent")
        agent_description = agent_json.get("description", "")
        node_count = len(agent_json.get("nodes", []))
        link_count = len(agent_json.get("links", []))

-        # Step 4: Preview or save
+        # Step 3: Preview or save
        if not save:
            return AgentPreviewResponse(
                message=(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -8,13 +8,10 @@ from langfuse import observe
 from backend.api.features.chat.model import ChatSession

 from .agent_generator import (
-    apply_agent_patch,
-    apply_all_fixes,
+    AgentGeneratorNotConfiguredError,
    generate_agent_patch,
    get_agent_as_json,
-    get_blocks_info,
    save_agent_to_library,
-    validate_agent,
 )
 from .base import BaseTool
 from .models import (
@@ -28,9 +25,6 @@ from .models import (

 logger = logging.getLogger(__name__)

-# Maximum retries for patch generation with validation feedback
-MAX_GENERATION_RETRIES = 2
-

 class EditAgentTool(BaseTool):
    """Tool for editing existing agents using natural language."""
@@ -43,7 +37,7 @@ class EditAgentTool(BaseTool):
    def description(self) -> str:
        return (
            "Edit an existing agent from the user's library using natural language. "
-            "Generates a patch to update the agent while preserving unchanged parts."
+            "Generates updates to the agent while preserving unchanged parts."
        )

    @property
@@ -98,9 +92,8 @@ class EditAgentTool(BaseTool):

        Flow:
        1. Fetch the current agent
-        2. Generate a patch based on the requested changes
-        3. Apply the patch to create an updated agent
-        4. Preview or save based on the save parameter
+        2. Generate updated agent (external service handles fixing and validation)
+        3. Preview or save based on the save parameter
        """
        agent_id = kwargs.get("agent_id", "").strip()
        changes = kwargs.get("changes", "").strip()
@@ -137,121 +130,58 @@ class EditAgentTool(BaseTool):
        if context:
            update_request = f"{changes}\n\nAdditional context:\n{context}"

-        # Step 2: Generate patch with retry on validation failure
-        blocks_info = get_blocks_info()
-        updated_agent = None
-        validation_errors = None
-        intent = "Applied requested changes"
-
-        for attempt in range(MAX_GENERATION_RETRIES + 1):
-            # Generate patch (include validation errors from previous attempt)
-            try:
-                if attempt == 0:
-                    patch_result = await generate_agent_patch(
-                        update_request, current_agent
-                    )
-                else:
-                    # Retry with validation error feedback
-                    logger.info(
-                        f"Retry {attempt}/{MAX_GENERATION_RETRIES} with validation feedback"
-                    )
-                    retry_request = (
-                        f"{update_request}\n\n"
-                        f"IMPORTANT: The previous edit had validation errors. "
-                        f"Please fix these issues:\n{validation_errors}"
-                    )
-                    patch_result = await generate_agent_patch(
-                        retry_request, current_agent
-                    )
-            except ValueError as e:
-                # Handle missing API key or configuration errors
-                return ErrorResponse(
-                    message=f"Agent generation is not configured: {str(e)}",
-                    error="configuration_error",
-                    session_id=session_id,
-                )
-
-            if patch_result is None:
-                if attempt == MAX_GENERATION_RETRIES:
-                    return ErrorResponse(
-                        message="Failed to generate changes. Please try rephrasing.",
-                        error="Patch generation failed",
-                        session_id=session_id,
-                    )
-                continue
-
-            # Check if LLM returned clarifying questions
-            if patch_result.get("type") == "clarifying_questions":
-                questions = patch_result.get("questions", [])
-                return ClarificationNeededResponse(
-                    message=(
-                        "I need some more information about the changes. "
-                        "Please answer the following questions:"
-                    ),
-                    questions=[
-                        ClarifyingQuestion(
-                            question=q.get("question", ""),
-                            keyword=q.get("keyword", ""),
-                            example=q.get("example"),
-                        )
-                        for q in questions
-                    ],
-                    session_id=session_id,
-                )
-
-            # Step 3: Apply patch and fixes
-            try:
-                updated_agent = apply_agent_patch(current_agent, patch_result)
-                updated_agent = apply_all_fixes(updated_agent, blocks_info)
-            except Exception as e:
-                if attempt == MAX_GENERATION_RETRIES:
-                    return ErrorResponse(
-                        message=f"Failed to apply changes: {str(e)}",
-                        error="patch_apply_failed",
-                        details={"exception": str(e)},
-                        session_id=session_id,
-                    )
-                validation_errors = str(e)
-                continue
-
-            # Step 4: Validate the updated agent
-            is_valid, validation_errors = validate_agent(updated_agent, blocks_info)
-
-            if is_valid:
-                logger.info(f"Agent edited successfully on attempt {attempt + 1}")
-                intent = patch_result.get("intent", "Applied requested changes")
-                break
-
-            logger.warning(
-                f"Validation failed on attempt {attempt + 1}: {validation_errors}"
+        # Step 2: Generate updated agent (external service handles fixing and validation)
+        try:
+            result = await generate_agent_patch(update_request, current_agent)
+        except AgentGeneratorNotConfiguredError:
+            return ErrorResponse(
+                message=(
+                    "Agent editing is not available. "
+                    "The Agent Generator service is not configured."
+                ),
+                error="service_not_configured",
+                session_id=session_id,
            )

-            if attempt == MAX_GENERATION_RETRIES:
-                # Return error with validation details
-                return ErrorResponse(
-                    message=(
-                        f"Updated agent has validation errors after "
-                        f"{MAX_GENERATION_RETRIES + 1} attempts. "
-                        f"Please try rephrasing your request or simplify the changes."
-                    ),
-                    error="validation_failed",
-                    details={"validation_errors": validation_errors},
-                    session_id=session_id,
-                )
+        if result is None:
+            return ErrorResponse(
+                message="Failed to generate changes. Please try rephrasing.",
+                error="Update generation failed",
+                session_id=session_id,
+            )

-        # At this point, updated_agent is guaranteed to be set (we return on all failure paths)
-        assert updated_agent is not None
+        # Check if LLM returned clarifying questions
+        if result.get("type") == "clarifying_questions":
+            questions = result.get("questions", [])
+            return ClarificationNeededResponse(
+                message=(
+                    "I need some more information about the changes. "
+                    "Please answer the following questions:"
+                ),
+                questions=[
+                    ClarifyingQuestion(
+                        question=q.get("question", ""),
+                        keyword=q.get("keyword", ""),
+                        example=q.get("example"),
+                    )
+                    for q in questions
+                ],
+                session_id=session_id,
+            )
+
+        # Result is the updated agent JSON
+        updated_agent = result

        agent_name = updated_agent.get("name", "Updated Agent")
        agent_description = updated_agent.get("description", "")
        node_count = len(updated_agent.get("nodes", []))
        link_count = len(updated_agent.get("links", []))

-        # Step 5: Preview or save
+        # Step 3: Preview or save
        if not save:
            return AgentPreviewResponse(
                message=(
-                    f"I've updated the agent. Changes: {intent}. "
+                    f"I've updated the agent. "
                    f"The agent now has {node_count} blocks. "
                    f"Review it and call edit_agent with save=true to save the changes."
                ),
@@ -277,10 +207,7 @@ class EditAgentTool(BaseTool):
            )

            return AgentSavedResponse(
-                message=(
-                    f"Updated agent '{created_graph.name}' has been saved to your library! "
-                    f"Changes: {intent}"
-                ),
+                message=f"Updated agent '{created_graph.name}' has been saved to your library!",
                agent_id=created_graph.id,
                agent_name=created_graph.name,
                library_agent_id=library_agent.id,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
@@ -29,7 +29,7 @@ def mock_embedding_functions():
        yield


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent(setup_test_data):
    """Test that the run_agent tool successfully executes an approved agent"""
    # Use test data from fixture
@@ -70,7 +70,7 @@ async def test_run_agent(setup_test_data):
    assert result_data["graph_name"] == "Test Agent"


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_missing_inputs(setup_test_data):
    """Test that the run_agent tool returns error when inputs are missing"""
    # Use test data from fixture
@@ -106,7 +106,7 @@ async def test_run_agent_missing_inputs(setup_test_data):
    assert "message" in result_data


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_invalid_agent_id(setup_test_data):
    """Test that the run_agent tool returns error for invalid agent ID"""
    # Use test data from fixture
@@ -141,7 +141,7 @@ async def test_run_agent_invalid_agent_id(setup_test_data):
    )


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_with_llm_credentials(setup_llm_test_data):
    """Test that run_agent works with an agent requiring LLM credentials"""
    # Use test data from fixture
@@ -185,7 +185,7 @@ async def test_run_agent_with_llm_credentials(setup_llm_test_data):
    assert result_data["graph_name"] == "LLM Test Agent"


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_data):
    """Test that run_agent returns available inputs when called without inputs or use_defaults."""
    user = setup_test_data["user"]
@@ -219,7 +219,7 @@ async def test_run_agent_shows_available_inputs_when_none_provided(setup_test_da
    assert "inputs" in result_data["message"].lower()


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_with_use_defaults(setup_test_data):
    """Test that run_agent executes successfully with use_defaults=True."""
    user = setup_test_data["user"]
@@ -251,7 +251,7 @@ async def test_run_agent_with_use_defaults(setup_test_data):
    assert result_data["graph_id"] == graph.id


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
    """Test that run_agent returns setup_requirements when credentials are missing."""
    user = setup_firecrawl_test_data["user"]
@@ -285,7 +285,7 @@ async def test_run_agent_missing_credentials(setup_firecrawl_test_data):
    assert len(setup_info["user_readiness"]["missing_credentials"]) > 0


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_invalid_slug_format(setup_test_data):
    """Test that run_agent returns error for invalid slug format (no slash)."""
    user = setup_test_data["user"]
@@ -313,7 +313,7 @@ async def test_run_agent_invalid_slug_format(setup_test_data):
    assert "username/agent-name" in result_data["message"]


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_unauthenticated():
    """Test that run_agent returns need_login for unauthenticated users."""
    tool = RunAgentTool()
@@ -340,7 +340,7 @@ async def test_run_agent_unauthenticated():
    assert "sign in" in result_data["message"].lower()


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_schedule_without_cron(setup_test_data):
    """Test that run_agent returns error when scheduling without cron expression."""
    user = setup_test_data["user"]
@@ -372,7 +372,7 @@ async def test_run_agent_schedule_without_cron(setup_test_data):
    assert "cron" in result_data["message"].lower()


-@pytest.mark.asyncio(scope="session")
+@pytest.mark.asyncio(loop_scope="session")
 async def test_run_agent_schedule_without_name(setup_test_data):
    """Test that run_agent returns error when scheduling without schedule_name."""
    user = setup_test_data["user"]
--- a/autogpt_platform/backend/backend/api/features/executions/review/model.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/model.py
@@ -23,6 +23,7 @@ class PendingHumanReviewModel(BaseModel):
        id: Unique identifier for the review record
        user_id: ID of the user who must perform the review
        node_exec_id: ID of the node execution that created this review
+        node_id: ID of the node definition (for grouping reviews from same node)
        graph_exec_id: ID of the graph execution containing the node
        graph_id: ID of the graph template being executed
        graph_version: Version number of the graph template
@@ -37,6 +38,10 @@ class PendingHumanReviewModel(BaseModel):
    """

    node_exec_id: str = Field(description="Node execution ID (primary key)")
+    node_id: str = Field(
+        description="Node definition ID (for grouping)",
+        default="",  # Temporary default for test compatibility
+    )
    user_id: str = Field(description="User ID associated with the review")
    graph_exec_id: str = Field(description="Graph execution ID")
    graph_id: str = Field(description="Graph ID")
@@ -66,7 +71,9 @@ class PendingHumanReviewModel(BaseModel):
    )

    @classmethod
-    def from_db(cls, review: "PendingHumanReview") -> "PendingHumanReviewModel":
+    def from_db(
+        cls, review: "PendingHumanReview", node_id: str
+    ) -> "PendingHumanReviewModel":
        """
        Convert a database model to a response model.

@@ -74,9 +81,14 @@ class PendingHumanReviewModel(BaseModel):
        payload, instructions, and editable flag.

        Handles invalid data gracefully by using safe defaults.
+
+        Args:
+            review: Database review object
+            node_id: Node definition ID (fetched from NodeExecution)
        """
        return cls(
            node_exec_id=review.nodeExecId,
+            node_id=node_id,
            user_id=review.userId,
            graph_exec_id=review.graphExecId,
            graph_id=review.graphId,
@@ -107,6 +119,13 @@ class ReviewItem(BaseModel):
    reviewed_data: SafeJsonData | None = Field(
        None, description="Optional edited data (ignored if approved=False)"
    )
+    auto_approve_future: bool = Field(
+        default=False,
+        description=(
+            "If true and this review is approved, future executions of this same "
+            "block (node) will be automatically approved. This only affects approved reviews."
+        ),
+    )

    @field_validator("reviewed_data")
    @classmethod
@@ -174,6 +193,9 @@ class ReviewRequest(BaseModel):
    This request must include ALL pending reviews for a graph execution.
    Each review will be either approved (with optional data modifications)
    or rejected (data ignored). The execution will resume only after ALL reviews are processed.
+
+    Each review item can individually specify whether to auto-approve future executions
+    of the same block via the `auto_approve_future` field on ReviewItem.
    """

    reviews: List[ReviewItem] = Field(
--- a/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
--- a/autogpt_platform/backend/backend/api/features/executions/review/routes.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/routes.py
@@ -1,17 +1,27 @@
+import asyncio
 import logging
-from typing import List
+from typing import Any, List

 import autogpt_libs.auth as autogpt_auth_lib
 from fastapi import APIRouter, HTTPException, Query, Security, status
 from prisma.enums import ReviewStatus

-from backend.data.execution import get_graph_execution_meta
+from backend.data.execution import (
+    ExecutionContext,
+    ExecutionStatus,
+    get_graph_execution_meta,
+)
+from backend.data.graph import get_graph_settings
 from backend.data.human_review import (
+    create_auto_approval_record,
+    get_pending_reviews_by_node_exec_ids,
    get_pending_reviews_for_execution,
    get_pending_reviews_for_user,
    has_pending_reviews_for_graph_exec,
    process_all_reviews_for_execution,
 )
+from backend.data.model import USER_TIMEZONE_NOT_SET
+from backend.data.user import get_user_by_id
 from backend.executor.utils import add_graph_execution

 from .model import PendingHumanReviewModel, ReviewRequest, ReviewResponse
@@ -127,17 +137,70 @@ async def process_review_action(
            detail="At least one review must be provided",
        )

-    # Build review decisions map
+    # Batch fetch all requested reviews
+    reviews_map = await get_pending_reviews_by_node_exec_ids(
+        list(all_request_node_ids), user_id
+    )
+
+    # Validate all reviews were found
+    missing_ids = all_request_node_ids - set(reviews_map.keys())
+    if missing_ids:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"No pending review found for node execution(s): {', '.join(missing_ids)}",
+        )
+
+    # Validate all reviews belong to the same execution
+    graph_exec_ids = {review.graph_exec_id for review in reviews_map.values()}
+    if len(graph_exec_ids) > 1:
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail="All reviews in a single request must belong to the same execution.",
+        )
+
+    graph_exec_id = next(iter(graph_exec_ids))
+
+    # Validate execution status before processing reviews
+    graph_exec_meta = await get_graph_execution_meta(
+        user_id=user_id, execution_id=graph_exec_id
+    )
+
+    if not graph_exec_meta:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Graph execution #{graph_exec_id} not found",
+        )
+
+    # Only allow processing reviews if execution is paused for review
+    # or incomplete (partial execution with some reviews already processed)
+    if graph_exec_meta.status not in (
+        ExecutionStatus.REVIEW,
+        ExecutionStatus.INCOMPLETE,
+    ):
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail=f"Cannot process reviews while execution status is {graph_exec_meta.status}. "
+            f"Reviews can only be processed when execution is paused (REVIEW status). "
+            f"Current status: {graph_exec_meta.status}",
+        )
+
+    # Build review decisions map and track which reviews requested auto-approval
+    # Auto-approved reviews use original data (no modifications allowed)
    review_decisions = {}
+    auto_approve_requests = {}  # Map node_exec_id -> auto_approve_future flag
+
    for review in request.reviews:
        review_status = (
            ReviewStatus.APPROVED if review.approved else ReviewStatus.REJECTED
        )
+        # If this review requested auto-approval, don't allow data modifications
+        reviewed_data = None if review.auto_approve_future else review.reviewed_data
        review_decisions[review.node_exec_id] = (
            review_status,
-            review.reviewed_data,
+            reviewed_data,
            review.message,
        )
+        auto_approve_requests[review.node_exec_id] = review.auto_approve_future

    # Process all reviews
    updated_reviews = await process_all_reviews_for_execution(
@@ -145,6 +208,87 @@ async def process_review_action(
        review_decisions=review_decisions,
    )

+    # Create auto-approval records for approved reviews that requested it
+    # Deduplicate by node_id to avoid race conditions when multiple reviews
+    # for the same node are processed in parallel
+    async def create_auto_approval_for_node(
+        node_id: str, review_result
+    ) -> tuple[str, bool]:
+        """
+        Create auto-approval record for a node.
+        Returns (node_id, success) tuple for tracking failures.
+        """
+        try:
+            await create_auto_approval_record(
+                user_id=user_id,
+                graph_exec_id=review_result.graph_exec_id,
+                graph_id=review_result.graph_id,
+                graph_version=review_result.graph_version,
+                node_id=node_id,
+                payload=review_result.payload,
+            )
+            return (node_id, True)
+        except Exception as e:
+            logger.error(
+                f"Failed to create auto-approval record for node {node_id}",
+                exc_info=e,
+            )
+            return (node_id, False)
+
+    # Collect node_exec_ids that need auto-approval
+    node_exec_ids_needing_auto_approval = [
+        node_exec_id
+        for node_exec_id, review_result in updated_reviews.items()
+        if review_result.status == ReviewStatus.APPROVED
+        and auto_approve_requests.get(node_exec_id, False)
+    ]
+
+    # Batch-fetch node executions to get node_ids
+    nodes_needing_auto_approval: dict[str, Any] = {}
+    if node_exec_ids_needing_auto_approval:
+        from backend.data.execution import get_node_executions
+
+        node_execs = await get_node_executions(
+            graph_exec_id=graph_exec_id, include_exec_data=False
+        )
+        node_exec_map = {node_exec.node_exec_id: node_exec for node_exec in node_execs}
+
+        for node_exec_id in node_exec_ids_needing_auto_approval:
+            node_exec = node_exec_map.get(node_exec_id)
+            if node_exec:
+                review_result = updated_reviews[node_exec_id]
+                # Use the first approved review for this node (deduplicate by node_id)
+                if node_exec.node_id not in nodes_needing_auto_approval:
+                    nodes_needing_auto_approval[node_exec.node_id] = review_result
+            else:
+                logger.error(
+                    f"Failed to create auto-approval record for {node_exec_id}: "
+                    f"Node execution not found. This may indicate a race condition "
+                    f"or data inconsistency."
+                )
+
+    # Execute all auto-approval creations in parallel (deduplicated by node_id)
+    auto_approval_results = await asyncio.gather(
+        *[
+            create_auto_approval_for_node(node_id, review_result)
+            for node_id, review_result in nodes_needing_auto_approval.items()
+        ],
+        return_exceptions=True,
+    )
+
+    # Count auto-approval failures
+    auto_approval_failed_count = 0
+    for result in auto_approval_results:
+        if isinstance(result, Exception):
+            # Unexpected exception during auto-approval creation
+            auto_approval_failed_count += 1
+            logger.error(
+                f"Unexpected exception during auto-approval creation: {result}"
+            )
+        elif isinstance(result, tuple) and len(result) == 2 and not result[1]:
+            # Auto-approval creation failed (returned False)
+            auto_approval_failed_count += 1
+
    # Count results
    approved_count = sum(
        1
@@ -157,30 +301,53 @@ async def process_review_action(
        if review.status == ReviewStatus.REJECTED
    )

-    # Resume execution if we processed some reviews
+    # Resume execution only if ALL pending reviews for this execution have been processed
    if updated_reviews:
-        # Get graph execution ID from any processed review
-        first_review = next(iter(updated_reviews.values()))
-        graph_exec_id = first_review.graph_exec_id
-
-        # Check if any pending reviews remain for this execution
        still_has_pending = await has_pending_reviews_for_graph_exec(graph_exec_id)

        if not still_has_pending:
-            # Resume execution
+            # Get the graph_id from any processed review
+            first_review = next(iter(updated_reviews.values()))
+
            try:
+                # Fetch user and settings to build complete execution context
+                user = await get_user_by_id(user_id)
+                settings = await get_graph_settings(
+                    user_id=user_id, graph_id=first_review.graph_id
+                )
+
+                # Preserve user's timezone preference when resuming execution
+                user_timezone = (
+                    user.timezone if user.timezone != USER_TIMEZONE_NOT_SET else "UTC"
+                )
+
+                execution_context = ExecutionContext(
+                    human_in_the_loop_safe_mode=settings.human_in_the_loop_safe_mode,
+                    sensitive_action_safe_mode=settings.sensitive_action_safe_mode,
+                    user_timezone=user_timezone,
+                )
+
                await add_graph_execution(
                    graph_id=first_review.graph_id,
                    user_id=user_id,
                    graph_exec_id=graph_exec_id,
+                    execution_context=execution_context,
                )
                logger.info(f"Resumed execution {graph_exec_id}")
            except Exception as e:
                logger.error(f"Failed to resume execution {graph_exec_id}: {str(e)}")

+    # Build error message if auto-approvals failed
+    error_message = None
+    if auto_approval_failed_count > 0:
+        error_message = (
+            f"{auto_approval_failed_count} auto-approval setting(s) could not be saved. "
+            f"You may need to manually approve these reviews in future executions."
+        )
+
    return ReviewResponse(
        approved_count=approved_count,
        rejected_count=rejected_count,
-        failed_count=0,
-        error=None,
+        failed_count=auto_approval_failed_count,
+        error=error_message,
    )
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -583,7 +583,13 @@ async def update_library_agent(
            )
        update_fields["isDeleted"] = is_deleted
    if settings is not None:
-        update_fields["settings"] = SafeJson(settings.model_dump())
+        existing_agent = await get_library_agent(id=library_agent_id, user_id=user_id)
+        current_settings_dict = (
+            existing_agent.settings.model_dump() if existing_agent.settings else {}
+        )
+        new_settings = settings.model_dump(exclude_unset=True)
+        merged_settings = {**current_settings_dict, **new_settings}
+        update_fields["settings"] = SafeJson(merged_settings)

    try:
        # If graph_version is provided, update to that specific version
--- a/autogpt_platform/backend/backend/api/features/oauth_test.py
+++ b/autogpt_platform/backend/backend/api/features/oauth_test.py
@@ -20,6 +20,7 @@ from typing import AsyncGenerator

 import httpx
 import pytest
+import pytest_asyncio
 from autogpt_libs.api_key.keysmith import APIKeySmith
 from prisma.enums import APIKeyPermission
 from prisma.models import OAuthAccessToken as PrismaOAuthAccessToken
@@ -38,13 +39,13 @@ keysmith = APIKeySmith()
 # ============================================================================


-@pytest.fixture
+@pytest.fixture(scope="session")
 def test_user_id() -> str:
    """Test user ID for OAuth tests."""
    return str(uuid.uuid4())


-@pytest.fixture
+@pytest_asyncio.fixture(scope="session", loop_scope="session")
 async def test_user(server, test_user_id: str):
    """Create a test user in the database."""
    await PrismaUser.prisma().create(
@@ -67,7 +68,7 @@ async def test_user(server, test_user_id: str):
    await PrismaUser.prisma().delete(where={"id": test_user_id})


-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_oauth_app(test_user: str):
    """Create a test OAuth application in the database."""
    app_id = str(uuid.uuid4())
@@ -122,7 +123,7 @@ def pkce_credentials() -> tuple[str, str]:
    return generate_pkce()


-@pytest.fixture
+@pytest_asyncio.fixture
 async def client(server, test_user: str) -> AsyncGenerator[httpx.AsyncClient, None]:
    """
    Create an async HTTP client that talks directly to the FastAPI app.
@@ -287,7 +288,7 @@ async def test_authorize_invalid_client_returns_error(
    assert query_params["error"][0] == "invalid_client"


-@pytest.fixture
+@pytest_asyncio.fixture
 async def inactive_oauth_app(test_user: str):
    """Create an inactive test OAuth application in the database."""
    app_id = str(uuid.uuid4())
@@ -1004,7 +1005,7 @@ async def test_token_refresh_revoked(
    assert "revoked" in response.json()["detail"].lower()


-@pytest.fixture
+@pytest_asyncio.fixture
 async def other_oauth_app(test_user: str):
    """Create a second OAuth application for cross-app tests."""
    app_id = str(uuid.uuid4())
--- a/autogpt_platform/backend/backend/api/features/store/db.py
+++ b/autogpt_platform/backend/backend/api/features/store/db.py
@@ -1552,7 +1552,7 @@ async def review_store_submission(

                # Generate embedding for approved listing (blocking - admin operation)
                # Inside transaction: if embedding fails, entire transaction rolls back
-                embedding_success = await ensure_embedding(
+                await ensure_embedding(
                    version_id=store_listing_version_id,
                    name=store_listing_version.name,
                    description=store_listing_version.description,
@@ -1560,12 +1560,6 @@ async def review_store_submission(
                    categories=store_listing_version.categories or [],
                    tx=tx,
                )
-                if not embedding_success:
-                    raise ValueError(
-                        f"Failed to generate embedding for listing {store_listing_version_id}. "
-                        "This is likely due to OpenAI API being unavailable. "
-                        "Please try again later or contact support if the issue persists."
-                    )

                await prisma.models.StoreListing.prisma(tx).update(
                    where={"id": store_listing_version.StoreListing.id},
--- a/autogpt_platform/backend/backend/api/features/store/embeddings.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py
@@ -21,7 +21,6 @@ from backend.util.json import dumps

 logger = logging.getLogger(__name__)

-
 # OpenAI embedding model configuration
 EMBEDDING_MODEL = "text-embedding-3-small"
 # Embedding dimension for the model above
@@ -63,49 +62,42 @@ def build_searchable_text(
    return " ".join(parts)


-async def generate_embedding(text: str) -> list[float] | None:
+async def generate_embedding(text: str) -> list[float]:
    """
    Generate embedding for text using OpenAI API.

-    Returns None if embedding generation fails.
-    Fail-fast: no retries to maintain consistency with approval flow.
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        client = get_openai_client()
-        if not client:
-            logger.error("openai_internal_api_key not set, cannot generate embedding")
-            return None
+    client = get_openai_client()
+    if not client:
+        raise RuntimeError("openai_internal_api_key not set, cannot generate embedding")

-        # Truncate text to token limit using tiktoken
-        # Character-based truncation is insufficient because token ratios vary by content type
-        enc = encoding_for_model(EMBEDDING_MODEL)
-        tokens = enc.encode(text)
-        if len(tokens) > EMBEDDING_MAX_TOKENS:
-            tokens = tokens[:EMBEDDING_MAX_TOKENS]
-            truncated_text = enc.decode(tokens)
-            logger.info(
-                f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
-            )
-        else:
-            truncated_text = text
-
-        start_time = time.time()
-        response = await client.embeddings.create(
-            model=EMBEDDING_MODEL,
-            input=truncated_text,
-        )
-        latency_ms = (time.time() - start_time) * 1000
-
-        embedding = response.data[0].embedding
+    # Truncate text to token limit using tiktoken
+    # Character-based truncation is insufficient because token ratios vary by content type
+    enc = encoding_for_model(EMBEDDING_MODEL)
+    tokens = enc.encode(text)
+    if len(tokens) > EMBEDDING_MAX_TOKENS:
+        tokens = tokens[:EMBEDDING_MAX_TOKENS]
+        truncated_text = enc.decode(tokens)
        logger.info(
-            f"Generated embedding: {len(embedding)} dims, "
-            f"{len(tokens)} tokens, {latency_ms:.0f}ms"
+            f"Truncated text from {len(enc.encode(text))} to {len(tokens)} tokens"
        )
-        return embedding
+    else:
+        truncated_text = text

-    except Exception as e:
-        logger.error(f"Failed to generate embedding: {e}")
-        return None
+    start_time = time.time()
+    response = await client.embeddings.create(
+        model=EMBEDDING_MODEL,
+        input=truncated_text,
+    )
+    latency_ms = (time.time() - start_time) * 1000
+
+    embedding = response.data[0].embedding
+    logger.info(
+        f"Generated embedding: {len(embedding)} dims, "
+        f"{len(tokens)} tokens, {latency_ms:.0f}ms"
+    )
+    return embedding


 async def store_embedding(
@@ -144,48 +136,45 @@ async def store_content_embedding(

    New function for unified content embedding storage.
    Uses raw SQL since Prisma doesn't natively support pgvector.
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        client = tx if tx else prisma.get_client()
+    client = tx if tx else prisma.get_client()

-        # Convert embedding to PostgreSQL vector format
-        embedding_str = embedding_to_vector_string(embedding)
-        metadata_json = dumps(metadata or {})
+    # Convert embedding to PostgreSQL vector format
+    embedding_str = embedding_to_vector_string(embedding)
+    metadata_json = dumps(metadata or {})

-        # Upsert the embedding
-        # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
-        # Use {pgvector_schema}.vector for explicit pgvector type qualification
-        await execute_raw_with_schema(
-            """
-            INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
-                "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
-            )
-            VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::{pgvector_schema}.vector, $5, $6::jsonb, NOW(), NOW())
-            ON CONFLICT ("contentType", "contentId", "userId")
-            DO UPDATE SET
-                "embedding" = $4::{pgvector_schema}.vector,
-                "searchableText" = $5,
-                "metadata" = $6::jsonb,
-                "updatedAt" = NOW()
-            WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
-                AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
-                AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
-            """,
-            content_type,
-            content_id,
-            user_id,
-            embedding_str,
-            searchable_text,
-            metadata_json,
-            client=client,
+    # Upsert the embedding
+    # WHERE clause in DO UPDATE prevents PostgreSQL 15 bug with NULLS NOT DISTINCT
+    # Use unqualified ::vector - pgvector is in search_path on all environments
+    await execute_raw_with_schema(
+        """
+        INSERT INTO {schema_prefix}"UnifiedContentEmbedding" (
+            "id", "contentType", "contentId", "userId", "embedding", "searchableText", "metadata", "createdAt", "updatedAt"
        )
+        VALUES (gen_random_uuid()::text, $1::{schema_prefix}"ContentType", $2, $3, $4::vector, $5, $6::jsonb, NOW(), NOW())
+        ON CONFLICT ("contentType", "contentId", "userId")
+        DO UPDATE SET
+            "embedding" = $4::vector,
+            "searchableText" = $5,
+            "metadata" = $6::jsonb,
+            "updatedAt" = NOW()
+        WHERE {schema_prefix}"UnifiedContentEmbedding"."contentType" = $1::{schema_prefix}"ContentType"
+            AND {schema_prefix}"UnifiedContentEmbedding"."contentId" = $2
+            AND ({schema_prefix}"UnifiedContentEmbedding"."userId" = $3 OR ($3 IS NULL AND {schema_prefix}"UnifiedContentEmbedding"."userId" IS NULL))
+        """,
+        content_type,
+        content_id,
+        user_id,
+        embedding_str,
+        searchable_text,
+        metadata_json,
+        client=client,
+    )

-        logger.info(f"Stored embedding for {content_type}:{content_id}")
-        return True
-
-    except Exception as e:
-        logger.error(f"Failed to store embedding for {content_type}:{content_id}: {e}")
-        return False
+    logger.info(f"Stored embedding for {content_type}:{content_id}")
+    return True


 async def get_embedding(version_id: str) -> dict[str, Any] | None:
@@ -217,34 +206,31 @@ async def get_content_embedding(

    New function for unified content embedding retrieval.
    Returns dict with contentType, contentId, embedding, timestamps or None if not found.
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        result = await query_raw_with_schema(
-            """
-            SELECT
-                "contentType",
-                "contentId",
-                "userId",
-                "embedding"::text as "embedding",
-                "searchableText",
-                "metadata",
-                "createdAt",
-                "updatedAt"
-            FROM {schema_prefix}"UnifiedContentEmbedding"
-            WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
-            """,
-            content_type,
-            content_id,
-            user_id,
-        )
+    result = await query_raw_with_schema(
+        """
+        SELECT
+            "contentType",
+            "contentId",
+            "userId",
+            "embedding"::text as "embedding",
+            "searchableText",
+            "metadata",
+            "createdAt",
+            "updatedAt"
+        FROM {schema_prefix}"UnifiedContentEmbedding"
+        WHERE "contentType" = $1::{schema_prefix}"ContentType" AND "contentId" = $2 AND ("userId" = $3 OR ($3 IS NULL AND "userId" IS NULL))
+        """,
+        content_type,
+        content_id,
+        user_id,
+    )

-        if result and len(result) > 0:
-            return result[0]
-        return None
-
-    except Exception as e:
-        logger.error(f"Failed to get embedding for {content_type}:{content_id}: {e}")
-        return None
+    if result and len(result) > 0:
+        return result[0]
+    return None


 async def ensure_embedding(
@@ -272,46 +258,38 @@ async def ensure_embedding(
        tx: Optional transaction client

    Returns:
-        True if embedding exists/was created, False on failure
+        True if embedding exists/was created
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        # Check if embedding already exists
-        if not force:
-            existing = await get_embedding(version_id)
-            if existing and existing.get("embedding"):
-                logger.debug(f"Embedding for version {version_id} already exists")
-                return True
+    # Check if embedding already exists
+    if not force:
+        existing = await get_embedding(version_id)
+        if existing and existing.get("embedding"):
+            logger.debug(f"Embedding for version {version_id} already exists")
+            return True

-        # Build searchable text for embedding
-        searchable_text = build_searchable_text(
-            name, description, sub_heading, categories
-        )
+    # Build searchable text for embedding
+    searchable_text = build_searchable_text(name, description, sub_heading, categories)

-        # Generate new embedding
-        embedding = await generate_embedding(searchable_text)
-        if embedding is None:
-            logger.warning(f"Could not generate embedding for version {version_id}")
-            return False
+    # Generate new embedding
+    embedding = await generate_embedding(searchable_text)

-        # Store the embedding with metadata using new function
-        metadata = {
-            "name": name,
-            "subHeading": sub_heading,
-            "categories": categories,
-        }
-        return await store_content_embedding(
-            content_type=ContentType.STORE_AGENT,
-            content_id=version_id,
-            embedding=embedding,
-            searchable_text=searchable_text,
-            metadata=metadata,
-            user_id=None,  # Store agents are public
-            tx=tx,
-        )
-
-    except Exception as e:
-        logger.error(f"Failed to ensure embedding for version {version_id}: {e}")
-        return False
+    # Store the embedding with metadata using new function
+    metadata = {
+        "name": name,
+        "subHeading": sub_heading,
+        "categories": categories,
+    }
+    return await store_content_embedding(
+        content_type=ContentType.STORE_AGENT,
+        content_id=version_id,
+        embedding=embedding,
+        searchable_text=searchable_text,
+        metadata=metadata,
+        user_id=None,  # Store agents are public
+        tx=tx,
+    )


 async def delete_embedding(version_id: str) -> bool:
@@ -521,6 +499,24 @@ async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
            success = sum(1 for result in results if result is True)
            failed = len(results) - success

+            # Aggregate unique errors to avoid Sentry spam
+            if failed > 0:
+                # Group errors by type and message
+                error_summary: dict[str, int] = {}
+                for result in results:
+                    if isinstance(result, Exception):
+                        error_key = f"{type(result).__name__}: {str(result)}"
+                        error_summary[error_key] = error_summary.get(error_key, 0) + 1
+
+                # Log aggregated error summary
+                error_details = ", ".join(
+                    f"{error} ({count}x)" for error, count in error_summary.items()
+                )
+                logger.error(
+                    f"{content_type.value}: {failed}/{len(results)} embeddings failed. "
+                    f"Errors: {error_details}"
+                )
+
            results_by_type[content_type.value] = {
                "processed": len(missing_items),
                "success": success,
@@ -557,11 +553,12 @@ async def backfill_all_content_types(batch_size: int = 10) -> dict[str, Any]:
    }


-async def embed_query(query: str) -> list[float] | None:
+async def embed_query(query: str) -> list[float]:
    """
    Generate embedding for a search query.

    Same as generate_embedding but with clearer intent.
+    Raises exceptions on failure - caller should handle.
    """
    return await generate_embedding(query)

@@ -594,40 +591,30 @@ async def ensure_content_embedding(
        tx: Optional transaction client

    Returns:
-        True if embedding exists/was created, False on failure
+        True if embedding exists/was created
+
+    Raises exceptions on failure - caller should handle.
    """
-    try:
-        # Check if embedding already exists
-        if not force:
-            existing = await get_content_embedding(content_type, content_id, user_id)
-            if existing and existing.get("embedding"):
-                logger.debug(
-                    f"Embedding for {content_type}:{content_id} already exists"
-                )
-                return True
+    # Check if embedding already exists
+    if not force:
+        existing = await get_content_embedding(content_type, content_id, user_id)
+        if existing and existing.get("embedding"):
+            logger.debug(f"Embedding for {content_type}:{content_id} already exists")
+            return True

-        # Generate new embedding
-        embedding = await generate_embedding(searchable_text)
-        if embedding is None:
-            logger.warning(
-                f"Could not generate embedding for {content_type}:{content_id}"
-            )
-            return False
+    # Generate new embedding
+    embedding = await generate_embedding(searchable_text)

-        # Store the embedding
-        return await store_content_embedding(
-            content_type=content_type,
-            content_id=content_id,
-            embedding=embedding,
-            searchable_text=searchable_text,
-            metadata=metadata or {},
-            user_id=user_id,
-            tx=tx,
-        )
-
-    except Exception as e:
-        logger.error(f"Failed to ensure embedding for {content_type}:{content_id}: {e}")
-        return False
+    # Store the embedding
+    return await store_content_embedding(
+        content_type=content_type,
+        content_id=content_id,
+        embedding=embedding,
+        searchable_text=searchable_text,
+        metadata=metadata or {},
+        user_id=user_id,
+        tx=tx,
+    )


 async def cleanup_orphaned_embeddings() -> dict[str, Any]:
@@ -854,9 +841,8 @@ async def semantic_search(
        limit = 100

    # Generate query embedding
-    query_embedding = await embed_query(query)
-
-    if query_embedding is not None:
+    try:
+        query_embedding = await embed_query(query)
        # Semantic search with embeddings
        embedding_str = embedding_to_vector_string(query_embedding)

@@ -879,8 +865,7 @@ async def semantic_search(
        min_similarity_idx = len(params) + 1
        params.append(min_similarity)

-        # Use regular string (not f-string) for template to preserve {schema_prefix} and {schema} placeholders
-        # Use OPERATOR({pgvector_schema}.<=>) for explicit operator schema qualification
+        # Use unqualified ::vector and <=> operator - pgvector is in search_path on all environments
        sql = (
            """
            SELECT
@@ -888,9 +873,9 @@ async def semantic_search(
                "contentType" as content_type,
                "searchableText" as searchable_text,
                metadata,
-                1 - (embedding OPERATOR({pgvector_schema}.<=>) '"""
+                1 - (embedding <=> '"""
            + embedding_str
-            + """'::{pgvector_schema}.vector) as similarity
+            + """'::vector) as similarity
            FROM {schema_prefix}"UnifiedContentEmbedding"
            WHERE "contentType" IN ("""
            + content_type_placeholders
@@ -898,9 +883,9 @@ async def semantic_search(
            """
            + user_filter
            + """
-            AND 1 - (embedding OPERATOR({pgvector_schema}.<=>) '"""
+            AND 1 - (embedding <=> '"""
            + embedding_str
-            + """'::{pgvector_schema}.vector) >= $"""
+            + """'::vector) >= $"""
            + str(min_similarity_idx)
            + """
            ORDER BY similarity DESC
@@ -908,24 +893,21 @@ async def semantic_search(
        """
        )

-        try:
-            results = await query_raw_with_schema(sql, *params)
-            return [
-                {
-                    "content_id": row["content_id"],
-                    "content_type": row["content_type"],
-                    "searchable_text": row["searchable_text"],
-                    "metadata": row["metadata"],
-                    "similarity": float(row["similarity"]),
-                }
-                for row in results
-            ]
-        except Exception as e:
-            logger.error(f"Semantic search failed: {e}")
-            # Fall through to lexical search below
+        results = await query_raw_with_schema(sql, *params)
+        return [
+            {
+                "content_id": row["content_id"],
+                "content_type": row["content_type"],
+                "searchable_text": row["searchable_text"],
+                "metadata": row["metadata"],
+                "similarity": float(row["similarity"]),
+            }
+            for row in results
+        ]
+    except Exception as e:
+        logger.warning(f"Semantic search failed, falling back to lexical search: {e}")

    # Fallback to lexical search if embeddings unavailable
-    logger.warning("Falling back to lexical search (embeddings unavailable)")

    params_lexical: list[Any] = [limit]
    user_filter = ""
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_schema_test.py
@@ -298,17 +298,16 @@ async def test_schema_handling_error_cases():
            mock_client.execute_raw.side_effect = Exception("Database error")
            mock_get_client.return_value = mock_client

-            result = await embeddings.store_content_embedding(
-                content_type=ContentType.STORE_AGENT,
-                content_id="test-id",
-                embedding=[0.1] * EMBEDDING_DIM,
-                searchable_text="test",
-                metadata=None,
-                user_id=None,
-            )
-
-            # Should return False on error, not raise
-            assert result is False
+            # Should raise exception on error
+            with pytest.raises(Exception, match="Database error"):
+                await embeddings.store_content_embedding(
+                    content_type=ContentType.STORE_AGENT,
+                    content_id="test-id",
+                    embedding=[0.1] * EMBEDDING_DIM,
+                    searchable_text="test",
+                    metadata=None,
+                    user_id=None,
+                )


 if __name__ == "__main__":
--- a/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings_test.py
@@ -80,9 +80,8 @@ async def test_generate_embedding_no_api_key():
    ) as mock_get_client:
        mock_get_client.return_value = None

-        result = await embeddings.generate_embedding("test text")
-
-        assert result is None
+        with pytest.raises(RuntimeError, match="openai_internal_api_key not set"):
+            await embeddings.generate_embedding("test text")


@pytest.mark.asyncio(loop_scope="session")
@@ -97,9 +96,8 @@ async def test_generate_embedding_api_error():
    ) as mock_get_client:
        mock_get_client.return_value = mock_client

-        result = await embeddings.generate_embedding("test text")
-
-        assert result is None
+        with pytest.raises(Exception, match="API Error"):
+            await embeddings.generate_embedding("test text")


@pytest.mark.asyncio(loop_scope="session")
@@ -173,11 +171,10 @@ async def test_store_embedding_database_error(mocker):

    embedding = [0.1, 0.2, 0.3]

-    result = await embeddings.store_embedding(
-        version_id="test-version-id", embedding=embedding, tx=mock_client
-    )
-
-    assert result is False
+    with pytest.raises(Exception, match="Database error"):
+        await embeddings.store_embedding(
+            version_id="test-version-id", embedding=embedding, tx=mock_client
+        )


@pytest.mark.asyncio(loop_scope="session")
@@ -277,17 +274,16 @@ async def test_ensure_embedding_create_new(mock_get, mock_store, mock_generate):
 async def test_ensure_embedding_generation_fails(mock_get, mock_generate):
    """Test ensure_embedding when generation fails."""
    mock_get.return_value = None
-    mock_generate.return_value = None
+    mock_generate.side_effect = Exception("Generation failed")

-    result = await embeddings.ensure_embedding(
-        version_id="test-id",
-        name="Test",
-        description="Test description",
-        sub_heading="Test heading",
-        categories=["test"],
-    )
-
-    assert result is False
+    with pytest.raises(Exception, match="Generation failed"):
+        await embeddings.ensure_embedding(
+            version_id="test-id",
+            name="Test",
+            description="Test description",
+            sub_heading="Test heading",
+            categories=["test"],
+        )


@pytest.mark.asyncio(loop_scope="session")
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search.py
@@ -186,13 +186,12 @@ async def unified_hybrid_search(

    offset = (page - 1) * page_size

-    # Generate query embedding
-    query_embedding = await embed_query(query)
-
-    # Graceful degradation if embedding unavailable
-    if query_embedding is None or not query_embedding:
+    # Generate query embedding with graceful degradation
+    try:
+        query_embedding = await embed_query(query)
+    except Exception as e:
        logger.warning(
-            "Failed to generate query embedding - falling back to lexical-only search. "
+            f"Failed to generate query embedding - falling back to lexical-only search: {e}. "
            "Check that openai_internal_api_key is configured and OpenAI API is accessible."
        )
        query_embedding = [0.0] * EMBEDDING_DIM
@@ -295,7 +294,7 @@ async def unified_hybrid_search(
                FROM {{schema_prefix}}"UnifiedContentEmbedding" uce
                WHERE uce."contentType" = ANY({content_types_param}::{{schema_prefix}}"ContentType"[])
                {user_filter}
-                ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>)  {embedding_param}::{{pgvector_schema}}.vector
+                ORDER BY uce.embedding <=> {embedding_param}::vector
                LIMIT 200
            )
        ),
@@ -307,7 +306,7 @@ async def unified_hybrid_search(
                uce.metadata,
                uce."updatedAt" as updated_at,
                -- Semantic score: cosine similarity (1 - distance)
-                COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>)  {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score,
+                COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
                -- Lexical score: ts_rank_cd
                COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
                -- Category match from metadata
@@ -464,13 +463,12 @@ async def hybrid_search(

    offset = (page - 1) * page_size

-    # Generate query embedding
-    query_embedding = await embed_query(query)
-
-    # Graceful degradation
-    if query_embedding is None or not query_embedding:
+    # Generate query embedding with graceful degradation
+    try:
+        query_embedding = await embed_query(query)
+    except Exception as e:
        logger.warning(
-            "Failed to generate query embedding - falling back to lexical-only search."
+            f"Failed to generate query embedding - falling back to lexical-only search: {e}"
        )
        query_embedding = [0.0] * EMBEDDING_DIM
        total_non_semantic = (
@@ -583,7 +581,7 @@ async def hybrid_search(
                WHERE uce."contentType" = 'STORE_AGENT'::{{schema_prefix}}"ContentType"
                AND uce."userId" IS NULL
                AND {where_clause}
-                ORDER BY uce.embedding OPERATOR({{pgvector_schema}}.<=>)  {embedding_param}::{{pgvector_schema}}.vector
+                ORDER BY uce.embedding <=> {embedding_param}::vector
                LIMIT 200
            ) uce
        ),
@@ -605,7 +603,7 @@ async def hybrid_search(
                -- Searchable text for BM25 reranking
                COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
                -- Semantic score
-                COALESCE(1 - (uce.embedding OPERATOR({{pgvector_schema}}.<=>)  {embedding_param}::{{pgvector_schema}}.vector), 0) as semantic_score,
+                COALESCE(1 - (uce.embedding <=> {embedding_param}::vector), 0) as semantic_score,
                -- Lexical score (raw, will normalize)
                COALESCE(ts_rank_cd(uce.search, plainto_tsquery('english', {query_param})), 0) as lexical_raw,
                -- Category match
--- a/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/hybrid_search_test.py
@@ -172,8 +172,8 @@ async def test_hybrid_search_without_embeddings():
        with patch(
            "backend.api.features.store.hybrid_search.query_raw_with_schema"
        ) as mock_query:
-            # Simulate embedding failure
-            mock_embed.return_value = None
+            # Simulate embedding failure by raising exception
+            mock_embed.side_effect = Exception("Embedding generation failed")
            mock_query.return_value = mock_results

            # Should NOT raise - graceful degradation
@@ -613,7 +613,9 @@ async def test_unified_hybrid_search_graceful_degradation():
            "backend.api.features.store.hybrid_search.embed_query"
        ) as mock_embed:
            mock_query.return_value = mock_results
-            mock_embed.return_value = None  # Embedding failure
+            mock_embed.side_effect = Exception(
+                "Embedding generation failed"
+            )  # Embedding failure

            # Should NOT raise - graceful degradation
            results, total = await unified_hybrid_search(
--- a/autogpt_platform/backend/backend/blocks/basic.py
+++ b/autogpt_platform/backend/backend/blocks/basic.py
@@ -116,6 +116,7 @@ class PrintToConsoleBlock(Block):
            input_schema=PrintToConsoleBlock.Input,
            output_schema=PrintToConsoleBlock.Output,
            test_input={"text": "Hello, World!"},
+            is_sensitive_action=True,
            test_output=[
                ("output", "Hello, World!"),
                ("status", "printed"),
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -0,0 +1,659 @@
+import json
+import shlex
+import uuid
+from typing import Literal, Optional
+
+from e2b import AsyncSandbox as BaseAsyncSandbox
+from pydantic import BaseModel, SecretStr
+
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+
+
+class ClaudeCodeExecutionError(Exception):
+    """Exception raised when Claude Code execution fails.
+
+    Carries the sandbox_id so it can be returned to the user for cleanup
+    when dispose_sandbox=False.
+    """
+
+    def __init__(self, message: str, sandbox_id: str = ""):
+        super().__init__(message)
+        self.sandbox_id = sandbox_id
+
+
+# Test credentials for E2B
+TEST_E2B_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="e2b",
+    api_key=SecretStr("mock-e2b-api-key"),
+    title="Mock E2B API key",
+    expires_at=None,
+)
+TEST_E2B_CREDENTIALS_INPUT = {
+    "provider": TEST_E2B_CREDENTIALS.provider,
+    "id": TEST_E2B_CREDENTIALS.id,
+    "type": TEST_E2B_CREDENTIALS.type,
+    "title": TEST_E2B_CREDENTIALS.title,
+}
+
+# Test credentials for Anthropic
+TEST_ANTHROPIC_CREDENTIALS = APIKeyCredentials(
+    id="2e568a2b-b2ea-475a-8564-9a676bf31c56",
+    provider="anthropic",
+    api_key=SecretStr("mock-anthropic-api-key"),
+    title="Mock Anthropic API key",
+    expires_at=None,
+)
+TEST_ANTHROPIC_CREDENTIALS_INPUT = {
+    "provider": TEST_ANTHROPIC_CREDENTIALS.provider,
+    "id": TEST_ANTHROPIC_CREDENTIALS.id,
+    "type": TEST_ANTHROPIC_CREDENTIALS.type,
+    "title": TEST_ANTHROPIC_CREDENTIALS.title,
+}
+
+
+class ClaudeCodeBlock(Block):
+    """
+    Execute tasks using Claude Code (Anthropic's AI coding assistant) in an E2B sandbox.
+
+    Claude Code can create files, install tools, run commands, and perform complex
+    coding tasks autonomously within a secure sandbox environment.
+    """
+
+    # Use base template - we'll install Claude Code ourselves for latest version
+    DEFAULT_TEMPLATE = "base"
+
+    class Input(BlockSchemaInput):
+        e2b_credentials: CredentialsMetaInput[
+            Literal[ProviderName.E2B], Literal["api_key"]
+        ] = CredentialsField(
+            description=(
+                "API key for the E2B platform to create the sandbox. "
+                "Get one on the [e2b website](https://e2b.dev/docs)"
+            ),
+        )
+
+        anthropic_credentials: CredentialsMetaInput[
+            Literal[ProviderName.ANTHROPIC], Literal["api_key"]
+        ] = CredentialsField(
+            description=(
+                "API key for Anthropic to power Claude Code. "
+                "Get one at [Anthropic's website](https://console.anthropic.com)"
+            ),
+        )
+
+        prompt: str = SchemaField(
+            description=(
+                "The task or instruction for Claude Code to execute. "
+                "Claude Code can create files, install packages, run commands, "
+                "and perform complex coding tasks."
+            ),
+            placeholder="Create a hello world index.html file",
+            default="",
+            advanced=False,
+        )
+
+        timeout: int = SchemaField(
+            description=(
+                "Sandbox timeout in seconds. Claude Code tasks can take "
+                "a while, so set this appropriately for your task complexity. "
+                "Note: This only applies when creating a new sandbox. "
+                "When reconnecting to an existing sandbox via sandbox_id, "
+                "the original timeout is retained."
+            ),
+            default=300,  # 5 minutes default
+            advanced=True,
+        )
+
+        setup_commands: list[str] = SchemaField(
+            description=(
+                "Optional shell commands to run before executing Claude Code. "
+                "Useful for installing dependencies or setting up the environment."
+            ),
+            default_factory=list,
+            advanced=True,
+        )
+
+        working_directory: str = SchemaField(
+            description="Working directory for Claude Code to operate in.",
+            default="/home/user",
+            advanced=True,
+        )
+
+        # Session/continuation support
+        session_id: str = SchemaField(
+            description=(
+                "Session ID to resume a previous conversation. "
+                "Leave empty for a new conversation. "
+                "Use the session_id from a previous run to continue that conversation."
+            ),
+            default="",
+            advanced=True,
+        )
+
+        sandbox_id: str = SchemaField(
+            description=(
+                "Sandbox ID to reconnect to an existing sandbox. "
+                "Required when resuming a session (along with session_id). "
+                "Use the sandbox_id from a previous run where dispose_sandbox was False."
+            ),
+            default="",
+            advanced=True,
+        )
+
+        conversation_history: str = SchemaField(
+            description=(
+                "Previous conversation history to continue from. "
+                "Use this to restore context on a fresh sandbox if the previous one timed out. "
+                "Pass the conversation_history output from a previous run."
+            ),
+            default="",
+            advanced=True,
+        )
+
+        dispose_sandbox: bool = SchemaField(
+            description=(
+                "Whether to dispose of the sandbox immediately after execution. "
+                "Set to False if you want to continue the conversation later "
+                "(you'll need both sandbox_id and session_id from the output)."
+            ),
+            default=True,
+            advanced=True,
+        )
+
+    class FileOutput(BaseModel):
+        """A file extracted from the sandbox."""
+
+        path: str
+        relative_path: str  # Path relative to working directory (for GitHub, etc.)
+        name: str
+        content: str
+
+    class Output(BlockSchemaOutput):
+        response: str = SchemaField(
+            description="The output/response from Claude Code execution"
+        )
+        files: list["ClaudeCodeBlock.FileOutput"] = SchemaField(
+            description=(
+                "List of text files created/modified by Claude Code during this execution. "
+                "Each file has 'path', 'relative_path', 'name', and 'content' fields."
+            )
+        )
+        conversation_history: str = SchemaField(
+            description=(
+                "Full conversation history including this turn. "
+                "Pass this to conversation_history input to continue on a fresh sandbox "
+                "if the previous sandbox timed out."
+            )
+        )
+        session_id: str = SchemaField(
+            description=(
+                "Session ID for this conversation. "
+                "Pass this back along with sandbox_id to continue the conversation."
+            )
+        )
+        sandbox_id: Optional[str] = SchemaField(
+            description=(
+                "ID of the sandbox instance. "
+                "Pass this back along with session_id to continue the conversation. "
+                "This is None if dispose_sandbox was True (sandbox was disposed)."
+            ),
+            default=None,
+        )
+        error: str = SchemaField(description="Error message if execution failed")
+
+    def __init__(self):
+        super().__init__(
+            id="4e34f4a5-9b89-4326-ba77-2dd6750b7194",
+            description=(
+                "Execute tasks using Claude Code in an E2B sandbox. "
+                "Claude Code can create files, install tools, run commands, "
+                "and perform complex coding tasks autonomously."
+            ),
+            categories={BlockCategory.DEVELOPER_TOOLS, BlockCategory.AI},
+            input_schema=ClaudeCodeBlock.Input,
+            output_schema=ClaudeCodeBlock.Output,
+            test_credentials={
+                "e2b_credentials": TEST_E2B_CREDENTIALS,
+                "anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS,
+            },
+            test_input={
+                "e2b_credentials": TEST_E2B_CREDENTIALS_INPUT,
+                "anthropic_credentials": TEST_ANTHROPIC_CREDENTIALS_INPUT,
+                "prompt": "Create a hello world HTML file",
+                "timeout": 300,
+                "setup_commands": [],
+                "working_directory": "/home/user",
+                "session_id": "",
+                "sandbox_id": "",
+                "conversation_history": "",
+                "dispose_sandbox": True,
+            },
+            test_output=[
+                ("response", "Created index.html with hello world content"),
+                (
+                    "files",
+                    [
+                        {
+                            "path": "/home/user/index.html",
+                            "relative_path": "index.html",
+                            "name": "index.html",
+                            "content": "<html>Hello World</html>",
+                        }
+                    ],
+                ),
+                (
+                    "conversation_history",
+                    "User: Create a hello world HTML file\n"
+                    "Claude: Created index.html with hello world content",
+                ),
+                ("session_id", str),
+                ("sandbox_id", None),  # None because dispose_sandbox=True in test_input
+            ],
+            test_mock={
+                "execute_claude_code": lambda *args, **kwargs: (
+                    "Created index.html with hello world content",  # response
+                    [
+                        ClaudeCodeBlock.FileOutput(
+                            path="/home/user/index.html",
+                            relative_path="index.html",
+                            name="index.html",
+                            content="<html>Hello World</html>",
+                        )
+                    ],  # files
+                    "User: Create a hello world HTML file\n"
+                    "Claude: Created index.html with hello world content",  # conversation_history
+                    "test-session-id",  # session_id
+                    "sandbox_id",  # sandbox_id
+                ),
+            },
+        )
+
+    async def execute_claude_code(
+        self,
+        e2b_api_key: str,
+        anthropic_api_key: str,
+        prompt: str,
+        timeout: int,
+        setup_commands: list[str],
+        working_directory: str,
+        session_id: str,
+        existing_sandbox_id: str,
+        conversation_history: str,
+        dispose_sandbox: bool,
+    ) -> tuple[str, list["ClaudeCodeBlock.FileOutput"], str, str, str]:
+        """
+        Execute Claude Code in an E2B sandbox.
+
+        Returns:
+            Tuple of (response, files, conversation_history, session_id, sandbox_id)
+        """
+
+        # Validate that sandbox_id is provided when resuming a session
+        if session_id and not existing_sandbox_id:
+            raise ValueError(
+                "sandbox_id is required when resuming a session with session_id. "
+                "The session state is stored in the original sandbox. "
+                "If the sandbox has timed out, use conversation_history instead "
+                "to restore context on a fresh sandbox."
+            )
+
+        sandbox = None
+        sandbox_id = ""
+
+        try:
+            # Either reconnect to existing sandbox or create a new one
+            if existing_sandbox_id:
+                # Reconnect to existing sandbox for conversation continuation
+                sandbox = await BaseAsyncSandbox.connect(
+                    sandbox_id=existing_sandbox_id,
+                    api_key=e2b_api_key,
+                )
+            else:
+                # Create new sandbox
+                sandbox = await BaseAsyncSandbox.create(
+                    template=self.DEFAULT_TEMPLATE,
+                    api_key=e2b_api_key,
+                    timeout=timeout,
+                    envs={"ANTHROPIC_API_KEY": anthropic_api_key},
+                )
+
+                # Install Claude Code from npm (ensures we get the latest version)
+                install_result = await sandbox.commands.run(
+                    "npm install -g @anthropic-ai/claude-code@latest",
+                    timeout=120,  # 2 min timeout for install
+                )
+                if install_result.exit_code != 0:
+                    raise Exception(
+                        f"Failed to install Claude Code: {install_result.stderr}"
+                    )
+
+                # Run any user-provided setup commands
+                for cmd in setup_commands:
+                    setup_result = await sandbox.commands.run(cmd)
+                    if setup_result.exit_code != 0:
+                        raise Exception(
+                            f"Setup command failed: {cmd}\n"
+                            f"Exit code: {setup_result.exit_code}\n"
+                            f"Stdout: {setup_result.stdout}\n"
+                            f"Stderr: {setup_result.stderr}"
+                        )
+
+            # Capture sandbox_id immediately after creation/connection
+            # so it's available for error recovery if dispose_sandbox=False
+            sandbox_id = sandbox.sandbox_id
+
+            # Generate or use provided session ID
+            current_session_id = session_id if session_id else str(uuid.uuid4())
+
+            # Build base Claude flags
+            base_flags = "-p --dangerously-skip-permissions --output-format json"
+
+            # Add conversation history context if provided (for fresh sandbox continuation)
+            history_flag = ""
+            if conversation_history and not session_id:
+                # Inject previous conversation as context via system prompt
+                # Use consistent escaping via _escape_prompt helper
+                escaped_history = self._escape_prompt(
+                    f"Previous conversation context: {conversation_history}"
+                )
+                history_flag = f" --append-system-prompt {escaped_history}"
+
+            # Build Claude command based on whether we're resuming or starting new
+            # Use shlex.quote for working_directory and session IDs to prevent injection
+            safe_working_dir = shlex.quote(working_directory)
+            if session_id:
+                # Resuming existing session (sandbox still alive)
+                safe_session_id = shlex.quote(session_id)
+                claude_command = (
+                    f"cd {safe_working_dir} && "
+                    f"echo {self._escape_prompt(prompt)} | "
+                    f"claude --resume {safe_session_id} {base_flags}"
+                )
+            else:
+                # New session with specific ID
+                safe_current_session_id = shlex.quote(current_session_id)
+                claude_command = (
+                    f"cd {safe_working_dir} && "
+                    f"echo {self._escape_prompt(prompt)} | "
+                    f"claude --session-id {safe_current_session_id} {base_flags}{history_flag}"
+                )
+
+            # Capture timestamp before running Claude Code to filter files later
+            # Capture timestamp 1 second in the past to avoid race condition with file creation
+            timestamp_result = await sandbox.commands.run(
+                "date -u -d '1 second ago' +%Y-%m-%dT%H:%M:%S"
+            )
+            if timestamp_result.exit_code != 0:
+                raise RuntimeError(
+                    f"Failed to capture timestamp: {timestamp_result.stderr}"
+                )
+            start_timestamp = (
+                timestamp_result.stdout.strip() if timestamp_result.stdout else None
+            )
+
+            result = await sandbox.commands.run(
+                claude_command,
+                timeout=0,  # No command timeout - let sandbox timeout handle it
+            )
+
+            # Check for command failure
+            if result.exit_code != 0:
+                error_msg = result.stderr or result.stdout or "Unknown error"
+                raise Exception(
+                    f"Claude Code command failed with exit code {result.exit_code}:\n"
+                    f"{error_msg}"
+                )
+
+            raw_output = result.stdout or ""
+
+            # Parse JSON output to extract response and build conversation history
+            response = ""
+            new_conversation_history = conversation_history or ""
+
+            try:
+                # The JSON output contains the result
+                output_data = json.loads(raw_output)
+                response = output_data.get("result", raw_output)
+
+                # Build conversation history entry
+                turn_entry = f"User: {prompt}\nClaude: {response}"
+                if new_conversation_history:
+                    new_conversation_history = (
+                        f"{new_conversation_history}\n\n{turn_entry}"
+                    )
+                else:
+                    new_conversation_history = turn_entry
+
+            except json.JSONDecodeError:
+                # If not valid JSON, use raw output
+                response = raw_output
+                turn_entry = f"User: {prompt}\nClaude: {response}"
+                if new_conversation_history:
+                    new_conversation_history = (
+                        f"{new_conversation_history}\n\n{turn_entry}"
+                    )
+                else:
+                    new_conversation_history = turn_entry
+
+            # Extract files created/modified during this run
+            files = await self._extract_files(
+                sandbox, working_directory, start_timestamp
+            )
+
+            return (
+                response,
+                files,
+                new_conversation_history,
+                current_session_id,
+                sandbox_id,
+            )
+
+        except Exception as e:
+            # Wrap exception with sandbox_id so caller can access/cleanup
+            # the preserved sandbox when dispose_sandbox=False
+            raise ClaudeCodeExecutionError(str(e), sandbox_id) from e
+
+        finally:
+            if dispose_sandbox and sandbox:
+                await sandbox.kill()
+
+    async def _extract_files(
+        self,
+        sandbox: BaseAsyncSandbox,
+        working_directory: str,
+        since_timestamp: str | None = None,
+    ) -> list["ClaudeCodeBlock.FileOutput"]:
+        """
+        Extract text files created/modified during this Claude Code execution.
+
+        Args:
+            sandbox: The E2B sandbox instance
+            working_directory: Directory to search for files
+            since_timestamp: ISO timestamp - only return files modified after this time
+
+        Returns:
+            List of FileOutput objects with path, relative_path, name, and content
+        """
+        files: list[ClaudeCodeBlock.FileOutput] = []
+
+        # Text file extensions we can safely read as text
+        text_extensions = {
+            ".txt",
+            ".md",
+            ".html",
+            ".htm",
+            ".css",
+            ".js",
+            ".ts",
+            ".jsx",
+            ".tsx",
+            ".json",
+            ".xml",
+            ".yaml",
+            ".yml",
+            ".toml",
+            ".ini",
+            ".cfg",
+            ".conf",
+            ".py",
+            ".rb",
+            ".php",
+            ".java",
+            ".c",
+            ".cpp",
+            ".h",
+            ".hpp",
+            ".cs",
+            ".go",
+            ".rs",
+            ".swift",
+            ".kt",
+            ".scala",
+            ".sh",
+            ".bash",
+            ".zsh",
+            ".sql",
+            ".graphql",
+            ".env",
+            ".gitignore",
+            ".dockerfile",
+            "Dockerfile",
+            ".vue",
+            ".svelte",
+            ".astro",
+            ".mdx",
+            ".rst",
+            ".tex",
+            ".csv",
+            ".log",
+        }
+
+        try:
+            # List files recursively using find command
+            # Exclude node_modules and .git directories, but allow hidden files
+            # like .env and .gitignore (they're filtered by text_extensions later)
+            # Filter by timestamp to only get files created/modified during this run
+            safe_working_dir = shlex.quote(working_directory)
+            timestamp_filter = ""
+            if since_timestamp:
+                timestamp_filter = f"-newermt {shlex.quote(since_timestamp)} "
+            find_result = await sandbox.commands.run(
+                f"find {safe_working_dir} -type f "
+                f"{timestamp_filter}"
+                f"-not -path '*/node_modules/*' "
+                f"-not -path '*/.git/*' "
+                f"2>/dev/null"
+            )
+
+            if find_result.stdout:
+                for file_path in find_result.stdout.strip().split("\n"):
+                    if not file_path:
+                        continue
+
+                    # Check if it's a text file we can read
+                    is_text = any(
+                        file_path.endswith(ext) for ext in text_extensions
+                    ) or file_path.endswith("Dockerfile")
+
+                    if is_text:
+                        try:
+                            content = await sandbox.files.read(file_path)
+                            # Handle bytes or string
+                            if isinstance(content, bytes):
+                                content = content.decode("utf-8", errors="replace")
+
+                            # Extract filename from path
+                            file_name = file_path.split("/")[-1]
+
+                            # Calculate relative path by stripping working directory
+                            relative_path = file_path
+                            if file_path.startswith(working_directory):
+                                relative_path = file_path[len(working_directory) :]
+                                # Remove leading slash if present
+                                if relative_path.startswith("/"):
+                                    relative_path = relative_path[1:]
+
+                            files.append(
+                                ClaudeCodeBlock.FileOutput(
+                                    path=file_path,
+                                    relative_path=relative_path,
+                                    name=file_name,
+                                    content=content,
+                                )
+                            )
+                        except Exception:
+                            # Skip files that can't be read
+                            pass
+
+        except Exception:
+            # If file extraction fails, return empty results
+            pass
+
+        return files
+
+    def _escape_prompt(self, prompt: str) -> str:
+        """Escape the prompt for safe shell execution."""
+        # Use single quotes and escape any single quotes in the prompt
+        escaped = prompt.replace("'", "'\"'\"'")
+        return f"'{escaped}'"
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        e2b_credentials: APIKeyCredentials,
+        anthropic_credentials: APIKeyCredentials,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            (
+                response,
+                files,
+                conversation_history,
+                session_id,
+                sandbox_id,
+            ) = await self.execute_claude_code(
+                e2b_api_key=e2b_credentials.api_key.get_secret_value(),
+                anthropic_api_key=anthropic_credentials.api_key.get_secret_value(),
+                prompt=input_data.prompt,
+                timeout=input_data.timeout,
+                setup_commands=input_data.setup_commands,
+                working_directory=input_data.working_directory,
+                session_id=input_data.session_id,
+                existing_sandbox_id=input_data.sandbox_id,
+                conversation_history=input_data.conversation_history,
+                dispose_sandbox=input_data.dispose_sandbox,
+            )
+
+            yield "response", response
+            # Always yield files (empty list if none) to match Output schema
+            yield "files", [f.model_dump() for f in files]
+            # Always yield conversation_history so user can restore context on fresh sandbox
+            yield "conversation_history", conversation_history
+            # Always yield session_id so user can continue conversation
+            yield "session_id", session_id
+            # Always yield sandbox_id (None if disposed) to match Output schema
+            yield "sandbox_id", sandbox_id if not input_data.dispose_sandbox else None
+
+        except ClaudeCodeExecutionError as e:
+            yield "error", str(e)
+            # If sandbox was preserved (dispose_sandbox=False), yield sandbox_id
+            # so user can reconnect to or clean up the orphaned sandbox
+            if not input_data.dispose_sandbox and e.sandbox_id:
+                yield "sandbox_id", e.sandbox_id
+        except Exception as e:
+            yield "error", str(e)
--- a/autogpt_platform/backend/backend/blocks/helpers/review.py
+++ b/autogpt_platform/backend/backend/blocks/helpers/review.py
@@ -9,7 +9,7 @@ from typing import Any, Optional
 from prisma.enums import ReviewStatus
 from pydantic import BaseModel

-from backend.data.execution import ExecutionContext, ExecutionStatus
+from backend.data.execution import ExecutionStatus
 from backend.data.human_review import ReviewResult
 from backend.executor.manager import async_update_node_execution_status
 from backend.util.clients import get_database_manager_async_client
@@ -28,6 +28,11 @@ class ReviewDecision(BaseModel):
 class HITLReviewHelper:
    """Helper class for Human-In-The-Loop review operations."""

+    @staticmethod
+    async def check_approval(**kwargs) -> Optional[ReviewResult]:
+        """Check if there's an existing approval for this node execution."""
+        return await get_database_manager_async_client().check_approval(**kwargs)
+
    @staticmethod
    async def get_or_create_human_review(**kwargs) -> Optional[ReviewResult]:
        """Create or retrieve a human review from the database."""
@@ -55,11 +60,11 @@ class HITLReviewHelper:
    async def _handle_review_request(
        input_data: Any,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
        graph_version: int,
-        execution_context: ExecutionContext,
        block_name: str = "Block",
        editable: bool = False,
    ) -> Optional[ReviewResult]:
@@ -69,11 +74,11 @@ class HITLReviewHelper:
        Args:
            input_data: The input data to be reviewed
            user_id: ID of the user requesting the review
+            node_id: ID of the node in the graph definition
            node_exec_id: ID of the node execution
            graph_exec_id: ID of the graph execution
            graph_id: ID of the graph
            graph_version: Version of the graph
-            execution_context: Current execution context
            block_name: Name of the block requesting review
            editable: Whether the reviewer can edit the data

@@ -83,15 +88,41 @@ class HITLReviewHelper:
        Raises:
            Exception: If review creation or status update fails
        """
-        # Skip review if safe mode is disabled - return auto-approved result
-        if not execution_context.human_in_the_loop_safe_mode:
+        # Note: Safe mode checks (human_in_the_loop_safe_mode, sensitive_action_safe_mode)
+        # are handled by the caller:
+        # - HITL blocks check human_in_the_loop_safe_mode in their run() method
+        # - Sensitive action blocks check sensitive_action_safe_mode in is_block_exec_need_review()
+        # This function only handles checking for existing approvals.
+
+        # Check if this node has already been approved (normal or auto-approval)
+        if approval_result := await HITLReviewHelper.check_approval(
+            node_exec_id=node_exec_id,
+            graph_exec_id=graph_exec_id,
+            node_id=node_id,
+            user_id=user_id,
+            input_data=input_data,
+        ):
            logger.info(
-                f"Block {block_name} skipping review for node {node_exec_id} - safe mode disabled"
+                f"Block {block_name} skipping review for node {node_exec_id} - "
+                f"found existing approval"
+            )
+            # Return a new ReviewResult with the current node_exec_id but approved status
+            # For auto-approvals, always use current input_data
+            # For normal approvals, use approval_result.data unless it's None
+            is_auto_approval = approval_result.node_exec_id != node_exec_id
+            approved_data = (
+                input_data
+                if is_auto_approval
+                else (
+                    approval_result.data
+                    if approval_result.data is not None
+                    else input_data
+                )
            )
            return ReviewResult(
-                data=input_data,
+                data=approved_data,
                status=ReviewStatus.APPROVED,
-                message="Auto-approved (safe mode disabled)",
+                message=approval_result.message,
                processed=True,
                node_exec_id=node_exec_id,
            )
@@ -103,7 +134,7 @@ class HITLReviewHelper:
            graph_id=graph_id,
            graph_version=graph_version,
            input_data=input_data,
-            message=f"Review required for {block_name} execution",
+            message=block_name,  # Use block_name directly as the message
            editable=editable,
        )

@@ -129,11 +160,11 @@ class HITLReviewHelper:
    async def handle_review_decision(
        input_data: Any,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
        graph_version: int,
-        execution_context: ExecutionContext,
        block_name: str = "Block",
        editable: bool = False,
    ) -> Optional[ReviewDecision]:
@@ -143,11 +174,11 @@ class HITLReviewHelper:
        Args:
            input_data: The input data to be reviewed
            user_id: ID of the user requesting the review
+            node_id: ID of the node in the graph definition
            node_exec_id: ID of the node execution
            graph_exec_id: ID of the graph execution
            graph_id: ID of the graph
            graph_version: Version of the graph
-            execution_context: Current execution context
            block_name: Name of the block requesting review
            editable: Whether the reviewer can edit the data

@@ -158,11 +189,11 @@ class HITLReviewHelper:
        review_result = await HITLReviewHelper._handle_review_request(
            input_data=input_data,
            user_id=user_id,
+            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
-            execution_context=execution_context,
            block_name=block_name,
            editable=editable,
        )
--- a/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
+++ b/autogpt_platform/backend/backend/blocks/human_in_the_loop.py
@@ -97,6 +97,7 @@ class HumanInTheLoopBlock(Block):
        input_data: Input,
        *,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
@@ -115,12 +116,12 @@ class HumanInTheLoopBlock(Block):
        decision = await self.handle_review_decision(
            input_data=input_data.data,
            user_id=user_id,
+            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
-            execution_context=execution_context,
-            block_name=self.name,
+            block_name=input_data.name,  # Use user-provided name instead of block type
            editable=input_data.editable,
        )

--- a/autogpt_platform/backend/backend/conftest.py
+++ b/autogpt_platform/backend/backend/conftest.py
@@ -1,7 +1,7 @@
 import logging
 import os

-import pytest
+import pytest_asyncio
 from dotenv import load_dotenv

 from backend.util.logging import configure_logging
@@ -19,7 +19,7 @@ if not os.getenv("PRISMA_DEBUG"):
    prisma_logger.setLevel(logging.INFO)


-@pytest.fixture(scope="session")
+@pytest_asyncio.fixture(scope="session", loop_scope="session")
 async def server():
    from backend.util.test import SpinTestServer

@@ -27,7 +27,7 @@ async def server():
        yield server


-@pytest.fixture(scope="session", autouse=True)
+@pytest_asyncio.fixture(scope="session", loop_scope="session", autouse=True)
 async def graph_cleanup(server):
    created_graph_ids = []
    original_create_graph = server.agent_server.test_create_graph
--- a/autogpt_platform/backend/backend/data/block.py
+++ b/autogpt_platform/backend/backend/data/block.py
@@ -441,6 +441,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        static_output: bool = False,
        block_type: BlockType = BlockType.STANDARD,
        webhook_config: Optional[BlockWebhookConfig | BlockManualWebhookConfig] = None,
+        is_sensitive_action: bool = False,
    ):
        """
        Initialize the block with the given schema.
@@ -473,8 +474,8 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        self.static_output = static_output
        self.block_type = block_type
        self.webhook_config = webhook_config
+        self.is_sensitive_action = is_sensitive_action
        self.execution_stats: NodeExecutionStats = NodeExecutionStats()
-        self.is_sensitive_action: bool = False

        if self.webhook_config:
            if isinstance(self.webhook_config, BlockWebhookConfig):
@@ -622,6 +623,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        input_data: BlockInput,
        *,
        user_id: str,
+        node_id: str,
        node_exec_id: str,
        graph_exec_id: str,
        graph_id: str,
@@ -648,11 +650,11 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
        decision = await HITLReviewHelper.handle_review_decision(
            input_data=input_data,
            user_id=user_id,
+            node_id=node_id,
            node_exec_id=node_exec_id,
            graph_exec_id=graph_exec_id,
            graph_id=graph_id,
            graph_version=graph_version,
-            execution_context=execution_context,
            block_name=self.name,
            editable=True,
        )
--- a/autogpt_platform/backend/backend/data/db.py
+++ b/autogpt_platform/backend/backend/data/db.py
@@ -121,10 +121,14 @@ async def _raw_with_schema(
    Supports placeholders:
        - {schema_prefix}: Table/type prefix (e.g., "platform".)
        - {schema}: Raw schema name for application tables (e.g., platform)
-        - {pgvector_schema}: Schema where pgvector is installed (defaults to "public")
+
+    Note on pgvector types:
+        Use unqualified ::vector and <=> operator in queries. PostgreSQL resolves
+        these via search_path, which includes the schema where pgvector is installed
+        on all environments (local, CI, dev).

    Args:
-        query_template: SQL query with {schema_prefix}, {schema}, and/or {pgvector_schema} placeholders
+        query_template: SQL query with {schema_prefix} and/or {schema} placeholders
        *args: Query parameters
        execute: If False, executes SELECT query. If True, executes INSERT/UPDATE/DELETE.
        client: Optional Prisma client for transactions (only used when execute=True).
@@ -135,20 +139,16 @@ async def _raw_with_schema(

    Example with vector type:
        await execute_raw_with_schema(
-            'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::{pgvector_schema}.vector)',
+            'INSERT INTO {schema_prefix}"Embedding" (vec) VALUES ($1::vector)',
            embedding_data
        )
    """
    schema = get_database_schema()
    schema_prefix = f'"{schema}".' if schema != "public" else ""
-    # pgvector extension is typically installed in "public" schema
-    # On Supabase it may be in "extensions" but "public" is the common default
-    pgvector_schema = "public"

    formatted_query = query_template.format(
        schema_prefix=schema_prefix,
        schema=schema,
-        pgvector_schema=pgvector_schema,
    )

    import prisma as prisma_module
--- a/autogpt_platform/backend/backend/data/event_bus.py
+++ b/autogpt_platform/backend/backend/data/event_bus.py
@@ -103,8 +103,18 @@ class RedisEventBus(BaseRedisEventBus[M], ABC):
        return redis.get_redis()

    def publish_event(self, event: M, channel_key: str):
-        message, full_channel_name = self._serialize_message(event, channel_key)
-        self.connection.publish(full_channel_name, message)
+        """
+        Publish an event to Redis. Gracefully handles connection failures
+        by logging the error instead of raising exceptions.
+        """
+        try:
+            message, full_channel_name = self._serialize_message(event, channel_key)
+            self.connection.publish(full_channel_name, message)
+        except Exception:
+            logger.exception(
+                f"Failed to publish event to Redis channel {channel_key}. "
+                "Event bus operation will continue without Redis connectivity."
+            )

    def listen_events(self, channel_key: str) -> Generator[M, None, None]:
        pubsub, full_channel_name = self._get_pubsub_channel(
@@ -128,9 +138,19 @@ class AsyncRedisEventBus(BaseRedisEventBus[M], ABC):
        return await redis.get_redis_async()

    async def publish_event(self, event: M, channel_key: str):
-        message, full_channel_name = self._serialize_message(event, channel_key)
-        connection = await self.connection
-        await connection.publish(full_channel_name, message)
+        """
+        Publish an event to Redis. Gracefully handles connection failures
+        by logging the error instead of raising exceptions.
+        """
+        try:
+            message, full_channel_name = self._serialize_message(event, channel_key)
+            connection = await self.connection
+            await connection.publish(full_channel_name, message)
+        except Exception:
+            logger.exception(
+                f"Failed to publish event to Redis channel {channel_key}. "
+                "Event bus operation will continue without Redis connectivity."
+            )

    async def listen_events(self, channel_key: str) -> AsyncGenerator[M, None]:
        pubsub, full_channel_name = self._get_pubsub_channel(
--- a/autogpt_platform/backend/backend/data/event_bus_test.py
+++ b/autogpt_platform/backend/backend/data/event_bus_test.py
@@ -0,0 +1,56 @@
+"""
+Tests for event_bus graceful degradation when Redis is unavailable.
+"""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from pydantic import BaseModel
+
+from backend.data.event_bus import AsyncRedisEventBus
+
+
+class TestEvent(BaseModel):
+    """Test event model."""
+
+    message: str
+
+
+class TestNotificationBus(AsyncRedisEventBus[TestEvent]):
+    """Test implementation of AsyncRedisEventBus."""
+
+    Model = TestEvent
+
+    @property
+    def event_bus_name(self) -> str:
+        return "test_event_bus"
+
+
+@pytest.mark.asyncio
+async def test_publish_event_handles_connection_failure_gracefully():
+    """Test that publish_event logs exception instead of raising when Redis is unavailable."""
+    bus = TestNotificationBus()
+    event = TestEvent(message="test message")
+
+    # Mock get_redis_async to raise connection error
+    with patch(
+        "backend.data.event_bus.redis.get_redis_async",
+        side_effect=ConnectionError("Authentication required."),
+    ):
+        # Should not raise exception
+        await bus.publish_event(event, "test_channel")
+
+
+@pytest.mark.asyncio
+async def test_publish_event_works_with_redis_available():
+    """Test that publish_event works normally when Redis is available."""
+    bus = TestNotificationBus()
+    event = TestEvent(message="test message")
+
+    # Mock successful Redis connection
+    mock_redis = AsyncMock()
+    mock_redis.publish = AsyncMock()
+
+    with patch("backend.data.event_bus.redis.get_redis_async", return_value=mock_redis):
+        await bus.publish_event(event, "test_channel")
+        mock_redis.publish.assert_called_once()
--- a/autogpt_platform/backend/backend/data/execution.py
+++ b/autogpt_platform/backend/backend/data/execution.py
@@ -81,6 +81,8 @@ class ExecutionContext(BaseModel):
    This includes information needed by blocks, sub-graphs, and execution management.
    """

+    model_config = {"extra": "ignore"}
+
    human_in_the_loop_safe_mode: bool = True
    sensitive_action_safe_mode: bool = False
    user_timezone: str = "UTC"
--- a/autogpt_platform/backend/backend/data/graph.py
+++ b/autogpt_platform/backend/backend/data/graph.py
@@ -64,6 +64,8 @@ logger = logging.getLogger(__name__)
 class GraphSettings(BaseModel):
    # Use Annotated with BeforeValidator to coerce None to default values.
    # This handles cases where the database has null values for these fields.
+    model_config = {"extra": "ignore"}
+
    human_in_the_loop_safe_mode: Annotated[
        bool, BeforeValidator(lambda v: v if v is not None else True)
    ] = True
--- a/autogpt_platform/backend/backend/data/human_review.py
+++ b/autogpt_platform/backend/backend/data/human_review.py
@@ -6,10 +6,10 @@ Handles all database operations for pending human reviews.
 import asyncio
 import logging
 from datetime import datetime, timezone
-from typing import Optional
+from typing import TYPE_CHECKING, Optional

 from prisma.enums import ReviewStatus
-from prisma.models import PendingHumanReview
+from prisma.models import AgentNodeExecution, PendingHumanReview
 from prisma.types import PendingHumanReviewUpdateInput
 from pydantic import BaseModel

@@ -17,8 +17,12 @@ from backend.api.features.executions.review.model import (
    PendingHumanReviewModel,
    SafeJsonData,
 )
+from backend.data.execution import get_graph_execution_meta
 from backend.util.json import SafeJson

+if TYPE_CHECKING:
+    pass
+
 logger = logging.getLogger(__name__)


@@ -32,6 +36,125 @@ class ReviewResult(BaseModel):
    node_exec_id: str


+def get_auto_approve_key(graph_exec_id: str, node_id: str) -> str:
+    """Generate the special nodeExecId key for auto-approval records."""
+    return f"auto_approve_{graph_exec_id}_{node_id}"
+
+
+async def check_approval(
+    node_exec_id: str,
+    graph_exec_id: str,
+    node_id: str,
+    user_id: str,
+    input_data: SafeJsonData | None = None,
+) -> Optional[ReviewResult]:
+    """
+    Check if there's an existing approval for this node execution.
+
+    Checks both:
+    1. Normal approval by node_exec_id (previous run of the same node execution)
+    2. Auto-approval by special key pattern "auto_approve_{graph_exec_id}_{node_id}"
+
+    Args:
+        node_exec_id: ID of the node execution
+        graph_exec_id: ID of the graph execution
+        node_id: ID of the node definition (not execution)
+        user_id: ID of the user (for data isolation)
+        input_data: Current input data (used for auto-approvals to avoid stale data)
+
+    Returns:
+        ReviewResult if approval found (either normal or auto), None otherwise
+    """
+    auto_approve_key = get_auto_approve_key(graph_exec_id, node_id)
+
+    # Check for either normal approval or auto-approval in a single query
+    existing_review = await PendingHumanReview.prisma().find_first(
+        where={
+            "OR": [
+                {"nodeExecId": node_exec_id},
+                {"nodeExecId": auto_approve_key},
+            ],
+            "status": ReviewStatus.APPROVED,
+            "userId": user_id,
+        },
+    )
+
+    if existing_review:
+        is_auto_approval = existing_review.nodeExecId == auto_approve_key
+        logger.info(
+            f"Found {'auto-' if is_auto_approval else ''}approval for node {node_id} "
+            f"(exec: {node_exec_id}) in execution {graph_exec_id}"
+        )
+        # For auto-approvals, use current input_data to avoid replaying stale payload
+        # For normal approvals, use the stored payload (which may have been edited)
+        return ReviewResult(
+            data=(
+                input_data
+                if is_auto_approval and input_data is not None
+                else existing_review.payload
+            ),
+            status=ReviewStatus.APPROVED,
+            message=(
+                "Auto-approved (user approved all future actions for this node)"
+                if is_auto_approval
+                else existing_review.reviewMessage or ""
+            ),
+            processed=True,
+            node_exec_id=existing_review.nodeExecId,
+        )
+
+    return None
+
+
+async def create_auto_approval_record(
+    user_id: str,
+    graph_exec_id: str,
+    graph_id: str,
+    graph_version: int,
+    node_id: str,
+    payload: SafeJsonData,
+) -> None:
+    """
+    Create an auto-approval record for a node in this execution.
+
+    This is stored as a PendingHumanReview with a special nodeExecId pattern
+    and status=APPROVED, so future executions of the same node can skip review.
+
+    Raises:
+        ValueError: If the graph execution doesn't belong to the user
+    """
+    # Validate that the graph execution belongs to this user (defense in depth)
+    graph_exec = await get_graph_execution_meta(
+        user_id=user_id, execution_id=graph_exec_id
+    )
+    if not graph_exec:
+        raise ValueError(
+            f"Graph execution {graph_exec_id} not found or doesn't belong to user {user_id}"
+        )
+
+    auto_approve_key = get_auto_approve_key(graph_exec_id, node_id)
+
+    await PendingHumanReview.prisma().upsert(
+        where={"nodeExecId": auto_approve_key},
+        data={
+            "create": {
+                "nodeExecId": auto_approve_key,
+                "userId": user_id,
+                "graphExecId": graph_exec_id,
+                "graphId": graph_id,
+                "graphVersion": graph_version,
+                "payload": SafeJson(payload),
+                "instructions": "Auto-approval record",
+                "editable": False,
+                "status": ReviewStatus.APPROVED,
+                "processed": True,
+                "reviewedAt": datetime.now(timezone.utc),
+            },
+            "update": {},  # Already exists, no update needed
+        },
+    )
+
+
 async def get_or_create_human_review(
    user_id: str,
    node_exec_id: str,
@@ -108,6 +231,87 @@ async def get_or_create_human_review(
        )


+async def get_pending_review_by_node_exec_id(
+    node_exec_id: str, user_id: str
+) -> Optional["PendingHumanReviewModel"]:
+    """
+    Get a pending review by its node execution ID.
+
+    Args:
+        node_exec_id: The node execution ID to look up
+        user_id: User ID for authorization (only returns if review belongs to this user)
+
+    Returns:
+        The pending review if found and belongs to user, None otherwise
+    """
+    review = await PendingHumanReview.prisma().find_first(
+        where={
+            "nodeExecId": node_exec_id,
+            "userId": user_id,
+            "status": ReviewStatus.WAITING,
+        }
+    )
+
+    if not review:
+        return None
+
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
+    node_exec = await get_node_execution(review.nodeExecId)
+    node_id = node_exec.node_id if node_exec else review.nodeExecId
+    return PendingHumanReviewModel.from_db(review, node_id=node_id)
+
+
+async def get_pending_reviews_by_node_exec_ids(
+    node_exec_ids: list[str], user_id: str
+) -> dict[str, "PendingHumanReviewModel"]:
+    """
+    Get multiple pending reviews by their node execution IDs in a single batch query.
+
+    Args:
+        node_exec_ids: List of node execution IDs to look up
+        user_id: User ID for authorization (only returns reviews belonging to this user)
+
+    Returns:
+        Dictionary mapping node_exec_id -> PendingHumanReviewModel for found reviews
+    """
+    if not node_exec_ids:
+        return {}
+
+    reviews = await PendingHumanReview.prisma().find_many(
+        where={
+            "nodeExecId": {"in": node_exec_ids},
+            "userId": user_id,
+            "status": ReviewStatus.WAITING,
+        }
+    )
+
+    if not reviews:
+        return {}
+
+    # Batch fetch all node executions to avoid N+1 queries
+    node_exec_ids_to_fetch = [review.nodeExecId for review in reviews]
+    node_execs = await AgentNodeExecution.prisma().find_many(
+        where={"id": {"in": node_exec_ids_to_fetch}},
+        include={"Node": True},
+    )
+
+    # Create mapping from node_exec_id to node_id
+    node_exec_id_to_node_id = {
+        node_exec.id: node_exec.agentNodeId for node_exec in node_execs
+    }
+
+    result = {}
+    for review in reviews:
+        node_id = node_exec_id_to_node_id.get(review.nodeExecId, review.nodeExecId)
+        result[review.nodeExecId] = PendingHumanReviewModel.from_db(
+            review, node_id=node_id
+        )
+
+    return result
+
+
 async def has_pending_reviews_for_graph_exec(graph_exec_id: str) -> bool:
    """
    Check if a graph execution has any pending reviews.
@@ -137,8 +341,11 @@ async def get_pending_reviews_for_user(
        page_size: Number of reviews per page

    Returns:
-        List of pending review models
+        List of pending review models with node_id included
    """
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
    # Calculate offset for pagination
    offset = (page - 1) * page_size

@@ -149,7 +356,14 @@ async def get_pending_reviews_for_user(
        take=page_size,
    )

-    return [PendingHumanReviewModel.from_db(review) for review in reviews]
+    # Fetch node_id for each review from NodeExecution
+    result = []
+    for review in reviews:
+        node_exec = await get_node_execution(review.nodeExecId)
+        node_id = node_exec.node_id if node_exec else review.nodeExecId
+        result.append(PendingHumanReviewModel.from_db(review, node_id=node_id))
+
+    return result


 async def get_pending_reviews_for_execution(
@@ -163,8 +377,11 @@ async def get_pending_reviews_for_execution(
        user_id: User ID for security validation

    Returns:
-        List of pending review models
+        List of pending review models with node_id included
    """
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
    reviews = await PendingHumanReview.prisma().find_many(
        where={
            "userId": user_id,
@@ -174,7 +391,14 @@ async def get_pending_reviews_for_execution(
        order={"createdAt": "asc"},
    )

-    return [PendingHumanReviewModel.from_db(review) for review in reviews]
+    # Fetch node_id for each review from NodeExecution
+    result = []
+    for review in reviews:
+        node_exec = await get_node_execution(review.nodeExecId)
+        node_id = node_exec.node_id if node_exec else review.nodeExecId
+        result.append(PendingHumanReviewModel.from_db(review, node_id=node_id))
+
+    return result


 async def process_all_reviews_for_execution(
@@ -244,11 +468,19 @@ async def process_all_reviews_for_execution(
    # Note: Execution resumption is now handled at the API layer after ALL reviews
    # for an execution are processed (both approved and rejected)

-    # Return as dict for easy access
-    return {
-        review.nodeExecId: PendingHumanReviewModel.from_db(review)
-        for review in updated_reviews
-    }
+    # Fetch node_id for each review and return as dict for easy access
+    # Local import to avoid event loop conflicts in tests
+    from backend.data.execution import get_node_execution
+
+    result = {}
+    for review in updated_reviews:
+        node_exec = await get_node_execution(review.nodeExecId)
+        node_id = node_exec.node_id if node_exec else review.nodeExecId
+        result[review.nodeExecId] = PendingHumanReviewModel.from_db(
+            review, node_id=node_id
+        )
+
+    return result


 async def update_review_processed_status(node_exec_id: str, processed: bool) -> None:
@@ -256,3 +488,44 @@ async def update_review_processed_status(node_exec_id: str, processed: bool) ->
    await PendingHumanReview.prisma().update(
        where={"nodeExecId": node_exec_id}, data={"processed": processed}
    )
+
+
+async def cancel_pending_reviews_for_execution(graph_exec_id: str, user_id: str) -> int:
+    """
+    Cancel all pending reviews for a graph execution (e.g., when execution is stopped).
+
+    Marks all WAITING reviews as REJECTED with a message indicating the execution was stopped.
+
+    Args:
+        graph_exec_id: The graph execution ID
+        user_id: User ID who owns the execution (for security validation)
+
+    Returns:
+        Number of reviews cancelled
+
+    Raises:
+        ValueError: If the graph execution doesn't belong to the user
+    """
+    # Validate user ownership before cancelling reviews
+    graph_exec = await get_graph_execution_meta(
+        user_id=user_id, execution_id=graph_exec_id
+    )
+    if not graph_exec:
+        raise ValueError(
+            f"Graph execution {graph_exec_id} not found or doesn't belong to user {user_id}"
+        )
+
+    result = await PendingHumanReview.prisma().update_many(
+        where={
+            "graphExecId": graph_exec_id,
+            "userId": user_id,
+            "status": ReviewStatus.WAITING,
+        },
+        data={
+            "status": ReviewStatus.REJECTED,
+            "reviewMessage": "Execution was stopped by user",
+            "processed": True,
+            "reviewedAt": datetime.now(timezone.utc),
+        },
+    )
+    return result
--- a/autogpt_platform/backend/backend/data/human_review_test.py
+++ b/autogpt_platform/backend/backend/data/human_review_test.py
@@ -36,7 +36,7 @@ def sample_db_review():
    return mock_review


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_or_create_human_review_new(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -46,8 +46,8 @@ async def test_get_or_create_human_review_new(
    sample_db_review.status = ReviewStatus.WAITING
    sample_db_review.processed = False

-    mock_upsert = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
-    mock_upsert.return_value.upsert = AsyncMock(return_value=sample_db_review)
+    mock_prisma = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
+    mock_prisma.return_value.upsert = AsyncMock(return_value=sample_db_review)

    result = await get_or_create_human_review(
        user_id="test-user-123",
@@ -64,7 +64,7 @@ async def test_get_or_create_human_review_new(
    assert result is None


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_or_create_human_review_approved(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -75,8 +75,8 @@ async def test_get_or_create_human_review_approved(
    sample_db_review.processed = False
    sample_db_review.reviewMessage = "Looks good"

-    mock_upsert = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
-    mock_upsert.return_value.upsert = AsyncMock(return_value=sample_db_review)
+    mock_prisma = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
+    mock_prisma.return_value.upsert = AsyncMock(return_value=sample_db_review)

    result = await get_or_create_human_review(
        user_id="test-user-123",
@@ -96,7 +96,7 @@ async def test_get_or_create_human_review_approved(
    assert result.message == "Looks good"


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_has_pending_reviews_for_graph_exec_true(
    mocker: pytest_mock.MockFixture,
 ):
@@ -109,7 +109,7 @@ async def test_has_pending_reviews_for_graph_exec_true(
    assert result is True


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_has_pending_reviews_for_graph_exec_false(
    mocker: pytest_mock.MockFixture,
 ):
@@ -122,7 +122,7 @@ async def test_has_pending_reviews_for_graph_exec_false(
    assert result is False


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_pending_reviews_for_user(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -131,10 +131,19 @@ async def test_get_pending_reviews_for_user(
    mock_find_many = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
    mock_find_many.return_value.find_many = AsyncMock(return_value=[sample_db_review])

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await get_pending_reviews_for_user("test_user", page=2, page_size=10)

    assert len(result) == 1
    assert result[0].node_exec_id == "test_node_123"
+    assert result[0].node_id == "test_node_def_789"

    # Verify pagination parameters
    call_args = mock_find_many.return_value.find_many.call_args
@@ -142,7 +151,7 @@ async def test_get_pending_reviews_for_user(
    assert call_args.kwargs["take"] == 10


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_get_pending_reviews_for_execution(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -151,12 +160,21 @@ async def test_get_pending_reviews_for_execution(
    mock_find_many = mocker.patch("backend.data.human_review.PendingHumanReview.prisma")
    mock_find_many.return_value.find_many = AsyncMock(return_value=[sample_db_review])

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await get_pending_reviews_for_execution(
        "test_graph_exec_456", "test-user-123"
    )

    assert len(result) == 1
    assert result[0].graph_exec_id == "test_graph_exec_456"
+    assert result[0].node_id == "test_node_def_789"

    # Verify it filters by execution and user
    call_args = mock_find_many.return_value.find_many.call_args
@@ -166,7 +184,7 @@ async def test_get_pending_reviews_for_execution(
    assert where_clause["status"] == ReviewStatus.WAITING


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_for_execution_success(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -201,6 +219,14 @@ async def test_process_all_reviews_for_execution_success(
        new=AsyncMock(return_value=[updated_review]),
    )

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await process_all_reviews_for_execution(
        user_id="test-user-123",
        review_decisions={
@@ -211,9 +237,10 @@ async def test_process_all_reviews_for_execution_success(
    assert len(result) == 1
    assert "test_node_123" in result
    assert result["test_node_123"].status == ReviewStatus.APPROVED
+    assert result["test_node_123"].node_id == "test_node_def_789"


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_for_execution_validation_errors(
    mocker: pytest_mock.MockFixture,
 ):
@@ -233,7 +260,7 @@ async def test_process_all_reviews_for_execution_validation_errors(
        )


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_edit_permission_error(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -259,7 +286,7 @@ async def test_process_all_reviews_edit_permission_error(
        )


-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="function")
 async def test_process_all_reviews_mixed_approval_rejection(
    mocker: pytest_mock.MockFixture,
    sample_db_review,
@@ -329,6 +356,14 @@ async def test_process_all_reviews_mixed_approval_rejection(
        new=AsyncMock(return_value=[approved_review, rejected_review]),
    )

+    # Mock get_node_execution to return node with node_id (async function)
+    mock_node_exec = Mock()
+    mock_node_exec.node_id = "test_node_def_789"
+    mocker.patch(
+        "backend.data.execution.get_node_execution",
+        new=AsyncMock(return_value=mock_node_exec),
+    )
+
    result = await process_all_reviews_for_execution(
        user_id="test-user-123",
        review_decisions={
@@ -340,3 +375,5 @@ async def test_process_all_reviews_mixed_approval_rejection(
    assert len(result) == 2
    assert "test_node_123" in result
    assert "test_node_456" in result
+    assert result["test_node_123"].node_id == "test_node_def_789"
+    assert result["test_node_456"].node_id == "test_node_def_789"
--- a/autogpt_platform/backend/backend/executor/database.py
+++ b/autogpt_platform/backend/backend/executor/database.py
@@ -50,6 +50,8 @@ from backend.data.graph import (
    validate_graph_execution_permissions,
 )
 from backend.data.human_review import (
+    cancel_pending_reviews_for_execution,
+    check_approval,
    get_or_create_human_review,
    has_pending_reviews_for_graph_exec,
    update_review_processed_status,
@@ -190,6 +192,8 @@ class DatabaseManager(AppService):
    get_user_notification_preference = _(get_user_notification_preference)

    # Human In The Loop
+    cancel_pending_reviews_for_execution = _(cancel_pending_reviews_for_execution)
+    check_approval = _(check_approval)
    get_or_create_human_review = _(get_or_create_human_review)
    has_pending_reviews_for_graph_exec = _(has_pending_reviews_for_graph_exec)
    update_review_processed_status = _(update_review_processed_status)
@@ -313,6 +317,8 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    set_execution_kv_data = d.set_execution_kv_data

    # Human In The Loop
+    cancel_pending_reviews_for_execution = d.cancel_pending_reviews_for_execution
+    check_approval = d.check_approval
    get_or_create_human_review = d.get_or_create_human_review
    update_review_processed_status = d.update_review_processed_status

--- a/autogpt_platform/backend/backend/executor/utils.py
+++ b/autogpt_platform/backend/backend/executor/utils.py
@@ -10,6 +10,7 @@ from pydantic import BaseModel, JsonValue, ValidationError

 from backend.data import execution as execution_db
 from backend.data import graph as graph_db
+from backend.data import human_review as human_review_db
 from backend.data import onboarding as onboarding_db
 from backend.data import user as user_db
 from backend.data.block import (
@@ -749,9 +750,27 @@ async def stop_graph_execution(
        if graph_exec.status in [
            ExecutionStatus.QUEUED,
            ExecutionStatus.INCOMPLETE,
+            ExecutionStatus.REVIEW,
        ]:
-            # If the graph is still on the queue, we can prevent them from being executed
-            # by setting the status to TERMINATED.
+            # If the graph is queued/incomplete/paused for review, terminate immediately
+            # No need to wait for executor since it's not actively running
+
+            # If graph is in REVIEW status, clean up pending reviews before terminating
+            if graph_exec.status == ExecutionStatus.REVIEW:
+                # Use human_review_db if Prisma connected, else database manager
+                review_db = (
+                    human_review_db
+                    if prisma.is_connected()
+                    else get_database_manager_async_client()
+                )
+                # Mark all pending reviews as rejected/cancelled
+                cancelled_count = await review_db.cancel_pending_reviews_for_execution(
+                    graph_exec_id, user_id
+                )
+                logger.info(
+                    f"Cancelled {cancelled_count} pending review(s) for stopped execution {graph_exec_id}"
+                )
+
            graph_exec.status = ExecutionStatus.TERMINATED

            await asyncio.gather(
@@ -887,9 +906,28 @@ async def add_graph_execution(
            nodes_to_skip=nodes_to_skip,
            execution_context=execution_context,
        )
-        logger.info(f"Publishing execution {graph_exec.id} to execution queue")
+        logger.info(f"Queueing execution {graph_exec.id}")
+
+        # Update execution status to QUEUED BEFORE publishing to prevent race condition
+        # where two concurrent requests could both publish the same execution
+        updated_exec = await edb.update_graph_execution_stats(
+            graph_exec_id=graph_exec.id,
+            status=ExecutionStatus.QUEUED,
+        )
+
+        # Verify the status update succeeded (prevents duplicate queueing in race conditions)
+        # If another request already updated the status, this execution will not be QUEUED
+        if not updated_exec or updated_exec.status != ExecutionStatus.QUEUED:
+            logger.warning(
+                f"Skipping queue publish for execution {graph_exec.id} - "
+                f"status update failed or execution already queued by another request"
+            )
+            return graph_exec
+
+        graph_exec.status = ExecutionStatus.QUEUED

        # Publish to execution queue for executor to pick up
+        # This happens AFTER status update to ensure only one request publishes
        exec_queue = await get_async_execution_queue()
        await exec_queue.publish_message(
            routing_key=GRAPH_EXECUTION_ROUTING_KEY,
@@ -897,13 +935,6 @@ async def add_graph_execution(
            exchange=GRAPH_EXECUTION_EXCHANGE,
        )
        logger.info(f"Published execution {graph_exec.id} to RabbitMQ queue")
-
-        # Update execution status to QUEUED
-        graph_exec.status = ExecutionStatus.QUEUED
-        await edb.update_graph_execution_stats(
-            graph_exec_id=graph_exec.id,
-            status=graph_exec.status,
-        )
    except BaseException as e:
        err = str(e) or type(e).__name__
        if not graph_exec:
--- a/autogpt_platform/backend/backend/executor/utils_test.py
+++ b/autogpt_platform/backend/backend/executor/utils_test.py
@@ -4,6 +4,7 @@ import pytest
 from pytest_mock import MockerFixture

 from backend.data.dynamic_fields import merge_execution_input, parse_execution_output
+from backend.data.execution import ExecutionStatus
 from backend.util.mock import MockObject


@@ -346,6 +347,7 @@ async def test_add_graph_execution_is_repeatable(mocker: MockerFixture):
    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
    mock_graph_exec.id = "execution-id-123"
    mock_graph_exec.node_executions = []  # Add this to avoid AttributeError
+    mock_graph_exec.status = ExecutionStatus.QUEUED  # Required for race condition check
    mock_graph_exec.to_graph_execution_entry.return_value = mocker.MagicMock()

    # Mock the queue and event bus
@@ -611,6 +613,7 @@ async def test_add_graph_execution_with_nodes_to_skip(mocker: MockerFixture):
    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionWithNodes)
    mock_graph_exec.id = "execution-id-123"
    mock_graph_exec.node_executions = []
+    mock_graph_exec.status = ExecutionStatus.QUEUED  # Required for race condition check

    # Track what's passed to to_graph_execution_entry
    captured_kwargs = {}
@@ -670,3 +673,232 @@ async def test_add_graph_execution_with_nodes_to_skip(mocker: MockerFixture):
    # Verify nodes_to_skip was passed to to_graph_execution_entry
    assert "nodes_to_skip" in captured_kwargs
    assert captured_kwargs["nodes_to_skip"] == nodes_to_skip
+
+
+@pytest.mark.asyncio
+async def test_stop_graph_execution_in_review_status_cancels_pending_reviews(
+    mocker: MockerFixture,
+):
+    """Test that stopping an execution in REVIEW status cancels pending reviews."""
+    from backend.data.execution import ExecutionStatus, GraphExecutionMeta
+    from backend.executor.utils import stop_graph_execution
+
+    user_id = "test-user"
+    graph_exec_id = "test-exec-123"
+
+    # Mock graph execution in REVIEW status
+    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_graph_exec.id = graph_exec_id
+    mock_graph_exec.status = ExecutionStatus.REVIEW
+
+    # Mock dependencies
+    mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
+    mock_queue_client = mocker.AsyncMock()
+    mock_get_queue.return_value = mock_queue_client
+
+    mock_prisma = mocker.patch("backend.executor.utils.prisma")
+    mock_prisma.is_connected.return_value = True
+
+    mock_human_review_db = mocker.patch("backend.executor.utils.human_review_db")
+    mock_human_review_db.cancel_pending_reviews_for_execution = mocker.AsyncMock(
+        return_value=2  # 2 reviews cancelled
+    )
+
+    mock_execution_db = mocker.patch("backend.executor.utils.execution_db")
+    mock_execution_db.get_graph_execution_meta = mocker.AsyncMock(
+        return_value=mock_graph_exec
+    )
+    mock_execution_db.update_graph_execution_stats = mocker.AsyncMock()
+
+    mock_get_event_bus = mocker.patch(
+        "backend.executor.utils.get_async_execution_event_bus"
+    )
+    mock_event_bus = mocker.MagicMock()
+    mock_event_bus.publish = mocker.AsyncMock()
+    mock_get_event_bus.return_value = mock_event_bus
+
+    mock_get_child_executions = mocker.patch(
+        "backend.executor.utils._get_child_executions"
+    )
+    mock_get_child_executions.return_value = []  # No children
+
+    # Call stop_graph_execution with timeout to allow status check
+    await stop_graph_execution(
+        user_id=user_id,
+        graph_exec_id=graph_exec_id,
+        wait_timeout=1.0,  # Wait to allow status check
+        cascade=True,
+    )
+
+    # Verify pending reviews were cancelled
+    mock_human_review_db.cancel_pending_reviews_for_execution.assert_called_once_with(
+        graph_exec_id, user_id
+    )
+
+    # Verify execution status was updated to TERMINATED
+    mock_execution_db.update_graph_execution_stats.assert_called_once()
+    call_kwargs = mock_execution_db.update_graph_execution_stats.call_args[1]
+    assert call_kwargs["graph_exec_id"] == graph_exec_id
+    assert call_kwargs["status"] == ExecutionStatus.TERMINATED
+
+
+@pytest.mark.asyncio
+async def test_stop_graph_execution_with_database_manager_when_prisma_disconnected(
+    mocker: MockerFixture,
+):
+    """Test that stop uses database manager when Prisma is not connected."""
+    from backend.data.execution import ExecutionStatus, GraphExecutionMeta
+    from backend.executor.utils import stop_graph_execution
+
+    user_id = "test-user"
+    graph_exec_id = "test-exec-456"
+
+    # Mock graph execution in REVIEW status
+    mock_graph_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_graph_exec.id = graph_exec_id
+    mock_graph_exec.status = ExecutionStatus.REVIEW
+
+    # Mock dependencies
+    mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
+    mock_queue_client = mocker.AsyncMock()
+    mock_get_queue.return_value = mock_queue_client
+
+    # Prisma is NOT connected
+    mock_prisma = mocker.patch("backend.executor.utils.prisma")
+    mock_prisma.is_connected.return_value = False
+
+    # Mock database manager client
+    mock_get_db_manager = mocker.patch(
+        "backend.executor.utils.get_database_manager_async_client"
+    )
+    mock_db_manager = mocker.AsyncMock()
+    mock_db_manager.get_graph_execution_meta = mocker.AsyncMock(
+        return_value=mock_graph_exec
+    )
+    mock_db_manager.cancel_pending_reviews_for_execution = mocker.AsyncMock(
+        return_value=3  # 3 reviews cancelled
+    )
+    mock_db_manager.update_graph_execution_stats = mocker.AsyncMock()
+    mock_get_db_manager.return_value = mock_db_manager
+
+    mock_get_event_bus = mocker.patch(
+        "backend.executor.utils.get_async_execution_event_bus"
+    )
+    mock_event_bus = mocker.MagicMock()
+    mock_event_bus.publish = mocker.AsyncMock()
+    mock_get_event_bus.return_value = mock_event_bus
+
+    mock_get_child_executions = mocker.patch(
+        "backend.executor.utils._get_child_executions"
+    )
+    mock_get_child_executions.return_value = []  # No children
+
+    # Call stop_graph_execution with timeout
+    await stop_graph_execution(
+        user_id=user_id,
+        graph_exec_id=graph_exec_id,
+        wait_timeout=1.0,
+        cascade=True,
+    )
+
+    # Verify database manager was used for cancel_pending_reviews
+    mock_db_manager.cancel_pending_reviews_for_execution.assert_called_once_with(
+        graph_exec_id, user_id
+    )
+
+    # Verify execution status was updated via database manager
+    mock_db_manager.update_graph_execution_stats.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_stop_graph_execution_cascades_to_child_with_reviews(
+    mocker: MockerFixture,
+):
+    """Test that stopping parent execution cascades to children and cancels their reviews."""
+    from backend.data.execution import ExecutionStatus, GraphExecutionMeta
+    from backend.executor.utils import stop_graph_execution
+
+    user_id = "test-user"
+    parent_exec_id = "parent-exec"
+    child_exec_id = "child-exec"
+
+    # Mock parent execution in RUNNING status
+    mock_parent_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_parent_exec.id = parent_exec_id
+    mock_parent_exec.status = ExecutionStatus.RUNNING
+
+    # Mock child execution in REVIEW status
+    mock_child_exec = mocker.MagicMock(spec=GraphExecutionMeta)
+    mock_child_exec.id = child_exec_id
+    mock_child_exec.status = ExecutionStatus.REVIEW
+
+    # Mock dependencies
+    mock_get_queue = mocker.patch("backend.executor.utils.get_async_execution_queue")
+    mock_queue_client = mocker.AsyncMock()
+    mock_get_queue.return_value = mock_queue_client
+
+    mock_prisma = mocker.patch("backend.executor.utils.prisma")
+    mock_prisma.is_connected.return_value = True
+
+    mock_human_review_db = mocker.patch("backend.executor.utils.human_review_db")
+    mock_human_review_db.cancel_pending_reviews_for_execution = mocker.AsyncMock(
+        return_value=1  # 1 child review cancelled
+    )
+
+    # Mock execution_db to return different status based on which execution is queried
+    mock_execution_db = mocker.patch("backend.executor.utils.execution_db")
+
+    # Track call count to simulate status transition
+    call_count = {"count": 0}
+
+    async def get_exec_meta_side_effect(execution_id, user_id):
+        call_count["count"] += 1
+        if execution_id == parent_exec_id:
+            # After a few calls (child processing happens), transition parent to TERMINATED
+            # This simulates the executor service processing the stop request
+            if call_count["count"] > 3:
+                mock_parent_exec.status = ExecutionStatus.TERMINATED
+            return mock_parent_exec
+        elif execution_id == child_exec_id:
+            return mock_child_exec
+        return None
+
+    mock_execution_db.get_graph_execution_meta = mocker.AsyncMock(
+        side_effect=get_exec_meta_side_effect
+    )
+    mock_execution_db.update_graph_execution_stats = mocker.AsyncMock()
+
+    mock_get_event_bus = mocker.patch(
+        "backend.executor.utils.get_async_execution_event_bus"
+    )
+    mock_event_bus = mocker.MagicMock()
+    mock_event_bus.publish = mocker.AsyncMock()
+    mock_get_event_bus.return_value = mock_event_bus
+
+    # Mock _get_child_executions to return the child
+    mock_get_child_executions = mocker.patch(
+        "backend.executor.utils._get_child_executions"
+    )
+
+    def get_children_side_effect(parent_id):
+        if parent_id == parent_exec_id:
+            return [mock_child_exec]
+        return []
+
+    mock_get_child_executions.side_effect = get_children_side_effect
+
+    # Call stop_graph_execution on parent with cascade=True
+    await stop_graph_execution(
+        user_id=user_id,
+        graph_exec_id=parent_exec_id,
+        wait_timeout=1.0,
+        cascade=True,
+    )
+
+    # Verify child reviews were cancelled
+    mock_human_review_db.cancel_pending_reviews_for_execution.assert_called_once_with(
+        child_exec_id, user_id
+    )
+
+    # Verify both parent and child status updates
+    assert mock_execution_db.update_graph_execution_stats.call_count >= 1
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -350,6 +350,19 @@ class Config(UpdateTrackingModel["Config"], BaseSettings):
        description="Whether to mark failed scans as clean or not",
    )

+    agentgenerator_host: str = Field(
+        default="",
+        description="The host for the Agent Generator service (empty to use built-in)",
+    )
+    agentgenerator_port: int = Field(
+        default=8000,
+        description="The port for the Agent Generator service",
+    )
+    agentgenerator_timeout: int = Field(
+        default=120,
+        description="The timeout in seconds for Agent Generator service requests",
+    )
+
    enable_example_blocks: bool = Field(
        default=False,
        description="Whether to enable example blocks in production",
--- a/autogpt_platform/backend/backend/util/test.py
+++ b/autogpt_platform/backend/backend/util/test.py
@@ -1,3 +1,4 @@
+import asyncio
 import inspect
 import logging
 import time
@@ -58,6 +59,11 @@ class SpinTestServer:
        self.db_api.__exit__(exc_type, exc_val, exc_tb)
        self.notif_manager.__exit__(exc_type, exc_val, exc_tb)

+        # Give services time to fully shut down
+        #  This prevents event loop issues where services haven't fully cleaned up
+        # before the next test starts
+        await asyncio.sleep(0.5)
+
    def setup_dependency_overrides(self):
        # Override get_user_id for testing
        self.agent_server.set_test_dependency_overrides(
--- a/autogpt_platform/backend/migrations/20260109181714_add_docs_embedding/migration.sql
+++ b/autogpt_platform/backend/migrations/20260109181714_add_docs_embedding/migration.sql
@@ -1,11 +1,37 @@
 -- CreateExtension
 -- Supabase: pgvector must be enabled via Dashboard → Database → Extensions first
-- Create in public schema so vector type is available across all schemas
+-- Ensures vector extension is in the current schema (from DATABASE_URL ?schema= param)
+-- If it exists in a different schema (e.g., public), we drop and recreate it in the current schema
+-- This ensures vector type is in the same schema as tables, making ::vector work without explicit qualification
 DO $$
+DECLARE
+    current_schema_name text;
+    vector_schema text;
 BEGIN
-    CREATE EXTENSION IF NOT EXISTS "vector" WITH SCHEMA "public";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'vector extension not available or already exists, skipping';
+    -- Get the current schema from search_path
+    SELECT current_schema() INTO current_schema_name;
+
+    -- Check if vector extension exists and which schema it's in
+    SELECT n.nspname INTO vector_schema
+    FROM pg_extension e
+    JOIN pg_namespace n ON e.extnamespace = n.oid
+    WHERE e.extname = 'vector';
+
+    -- Handle removal if in wrong schema
+    IF vector_schema IS NOT NULL AND vector_schema != current_schema_name THEN
+        BEGIN
+            -- Vector exists in a different schema, drop it first
+            RAISE WARNING 'pgvector found in schema "%" but need it in "%". Dropping and reinstalling...',
+                vector_schema, current_schema_name;
+            EXECUTE 'DROP EXTENSION IF EXISTS vector CASCADE';
+        EXCEPTION WHEN OTHERS THEN
+            RAISE EXCEPTION 'Failed to drop pgvector from schema "%": %. You may need to drop it manually.',
+                vector_schema, SQLERRM;
+        END;
+    END IF;
+
+    -- Create extension in current schema (let it fail naturally if not available)
+    EXECUTE format('CREATE EXTENSION IF NOT EXISTS vector SCHEMA %I', current_schema_name);
 END $$;

 -- CreateEnum
@@ -19,7 +45,7 @@ CREATE TABLE "UnifiedContentEmbedding" (
    "contentType" "ContentType" NOT NULL,
    "contentId" TEXT NOT NULL,
    "userId" TEXT,
-    "embedding" public.vector(1536) NOT NULL,
+    "embedding" vector(1536) NOT NULL,
    "searchableText" TEXT NOT NULL,
    "metadata" JSONB NOT NULL DEFAULT '{}',

@@ -45,4 +71,4 @@ CREATE UNIQUE INDEX "UnifiedContentEmbedding_contentType_contentId_userId_key" O
 -- Uses cosine distance operator (<=>), which matches the query in hybrid_search.py
 -- Note: Drop first in case Prisma created a btree index (Prisma doesn't support HNSW)
 DROP INDEX IF EXISTS "UnifiedContentEmbedding_embedding_idx";
-CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" public.vector_cosine_ops);
+CREATE INDEX "UnifiedContentEmbedding_embedding_idx" ON "UnifiedContentEmbedding" USING hnsw ("embedding" vector_cosine_ops);
--- a/autogpt_platform/backend/migrations/20260112173500_add_supabase_extensions_to_platform_schema/migration.sql
+++ b/autogpt_platform/backend/migrations/20260112173500_add_supabase_extensions_to_platform_schema/migration.sql
@@ -1,71 +0,0 @@
-- Acknowledge Supabase-managed extensions to prevent drift warnings
-- These extensions are pre-installed by Supabase in specific schemas
-- This migration ensures they exist where available (Supabase) or skips gracefully (CI)
-
-- Create schemas (safe in both CI and Supabase)
-CREATE SCHEMA IF NOT EXISTS "extensions";
-
-- Extensions that exist in both CI and Supabase
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pgcrypto" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pgcrypto extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'uuid-ossp extension not available, skipping';
-END $$;
-
-- Supabase-specific extensions (skip gracefully in CI)
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pg_stat_statements" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pg_stat_statements extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pg_net" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pg_net extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE EXTENSION IF NOT EXISTS "pgjwt" WITH SCHEMA "extensions";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pgjwt extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE SCHEMA IF NOT EXISTS "graphql";
-    CREATE EXTENSION IF NOT EXISTS "pg_graphql" WITH SCHEMA "graphql";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pg_graphql extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE SCHEMA IF NOT EXISTS "pgsodium";
-    CREATE EXTENSION IF NOT EXISTS "pgsodium" WITH SCHEMA "pgsodium";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'pgsodium extension not available, skipping';
-END $$;
-
-DO $$
-BEGIN
-    CREATE SCHEMA IF NOT EXISTS "vault";
-    CREATE EXTENSION IF NOT EXISTS "supabase_vault" WITH SCHEMA "vault";
-EXCEPTION WHEN OTHERS THEN
-    RAISE NOTICE 'supabase_vault extension not available, skipping';
-END $$;
-
-
-- Return to platform
-CREATE SCHEMA IF NOT EXISTS "platform";
--- a/autogpt_platform/backend/migrations/20260121200000_remove_node_execution_fk_from_pending_human_review/migration.sql
+++ b/autogpt_platform/backend/migrations/20260121200000_remove_node_execution_fk_from_pending_human_review/migration.sql
@@ -0,0 +1,7 @@
+-- Remove NodeExecution foreign key from PendingHumanReview
+-- The nodeExecId column remains as the primary key, but we remove the FK constraint
+-- to AgentNodeExecution since PendingHumanReview records can persist after node
+-- execution records are deleted.
+
+-- Drop foreign key constraint that linked PendingHumanReview.nodeExecId to AgentNodeExecution.id
+ALTER TABLE "PendingHumanReview" DROP CONSTRAINT IF EXISTS "PendingHumanReview_nodeExecId_fkey";
--- a/autogpt_platform/backend/schema.prisma
+++ b/autogpt_platform/backend/schema.prisma
@@ -517,8 +517,6 @@ model AgentNodeExecution {

  stats Json?

-  PendingHumanReview PendingHumanReview?
-
  @@index([agentGraphExecutionId, agentNodeId, executionStatus])
  @@index([agentNodeId, executionStatus])
  @@index([addedTime, queuedTime])
@@ -567,6 +565,7 @@ enum ReviewStatus {
 }

 // Pending human reviews for Human-in-the-loop blocks
+// Also stores auto-approval records with special nodeExecId patterns (e.g., "auto_approve_{graph_exec_id}_{node_id}")
 model PendingHumanReview {
  nodeExecId    String       @id
  userId        String
@@ -585,7 +584,6 @@ model PendingHumanReview {
  reviewedAt    DateTime?

  User           User                @relation(fields: [userId], references: [id], onDelete: Cascade)
-  NodeExecution  AgentNodeExecution  @relation(fields: [nodeExecId], references: [id], onDelete: Cascade)
  GraphExecution AgentGraphExecution @relation(fields: [graphExecId], references: [id], onDelete: Cascade)

  @@unique([nodeExecId]) // One pending review per node execution
--- a/autogpt_platform/backend/scripts/generate_block_docs.py
+++ b/autogpt_platform/backend/scripts/generate_block_docs.py
@@ -34,7 +34,10 @@ logger = logging.getLogger(__name__)

 # Default output directory relative to repo root
 DEFAULT_OUTPUT_DIR = (
-    Path(__file__).parent.parent.parent.parent / "docs" / "integrations"
+    Path(__file__).parent.parent.parent.parent
+    / "docs"
+    / "integrations"
+    / "block-integrations"
 )


@@ -366,12 +369,12 @@ def generate_block_markdown(
    lines.append("")

    # What it is (full description)
-    lines.append(f"### What it is")
+    lines.append("### What it is")
    lines.append(block.description or "No description available.")
    lines.append("")

    # How it works (manual section)
-    lines.append(f"### How it works")
+    lines.append("### How it works")
    how_it_works = manual_content.get(
        "how_it_works", "_Add technical explanation here._"
    )
@@ -383,7 +386,7 @@ def generate_block_markdown(
    # Inputs table (auto-generated)
    visible_inputs = [f for f in block.inputs if not f.hidden]
    if visible_inputs:
-        lines.append(f"### Inputs")
+        lines.append("### Inputs")
        lines.append("")
        lines.append("| Input | Description | Type | Required |")
        lines.append("|-------|-------------|------|----------|")
@@ -400,7 +403,7 @@ def generate_block_markdown(
    # Outputs table (auto-generated)
    visible_outputs = [f for f in block.outputs if not f.hidden]
    if visible_outputs:
-        lines.append(f"### Outputs")
+        lines.append("### Outputs")
        lines.append("")
        lines.append("| Output | Description | Type |")
        lines.append("|--------|-------------|------|")
@@ -414,13 +417,21 @@ def generate_block_markdown(
        lines.append("")

    # Possible use case (manual section)
-    lines.append(f"### Possible use case")
+    lines.append("### Possible use case")
    use_case = manual_content.get("use_case", "_Add practical use case examples here._")
    lines.append("<!-- MANUAL: use_case -->")
    lines.append(use_case)
    lines.append("<!-- END MANUAL -->")
    lines.append("")

+    # Optional per-block extras (only include if has content)
+    extras = manual_content.get("extras", "")
+    if extras:
+        lines.append("<!-- MANUAL: extras -->")
+        lines.append(extras)
+        lines.append("<!-- END MANUAL -->")
+        lines.append("")
+
    lines.append("---")
    lines.append("")

@@ -456,25 +467,52 @@ def get_block_file_mapping(blocks: list[BlockDoc]) -> dict[str, list[BlockDoc]]:
    return dict(file_mapping)


-def generate_overview_table(blocks: list[BlockDoc]) -> str:
-    """Generate the overview table markdown (blocks.md)."""
+def generate_overview_table(blocks: list[BlockDoc], block_dir_prefix: str = "") -> str:
+    """Generate the overview table markdown (blocks.md).
+
+    Args:
+        blocks: List of block documentation objects
+        block_dir_prefix: Prefix for block file links (e.g., "block-integrations/")
+    """
    lines = []

+    # GitBook YAML frontmatter
+    lines.append("---")
+    lines.append("layout:")
+    lines.append("  width: default")
+    lines.append("  title:")
+    lines.append("    visible: true")
+    lines.append("  description:")
+    lines.append("    visible: true")
+    lines.append("  tableOfContents:")
+    lines.append("    visible: false")
+    lines.append("  outline:")
+    lines.append("    visible: true")
+    lines.append("  pagination:")
+    lines.append("    visible: true")
+    lines.append("  metadata:")
+    lines.append("    visible: true")
+    lines.append("---")
+    lines.append("")
+
    lines.append("# AutoGPT Blocks Overview")
    lines.append("")
    lines.append(
        'AutoGPT uses a modular approach with various "blocks" to handle different tasks. These blocks are the building blocks of AutoGPT workflows, allowing users to create complex automations by combining simple, specialized components.'
    )
    lines.append("")
-    lines.append('!!! info "Creating Your Own Blocks"')
-    lines.append("    Want to create your own custom blocks? Check out our guides:")
-    lines.append("    ")
+    lines.append('{% hint style="info" %}')
+    lines.append("**Creating Your Own Blocks**")
+    lines.append("")
+    lines.append("Want to create your own custom blocks? Check out our guides:")
+    lines.append("")
    lines.append(
-        "    - [Build your own Blocks](https://docs.agpt.co/platform/new_blocks/) - Step-by-step tutorial with examples"
+        "* [Build your own Blocks](https://docs.agpt.co/platform/new_blocks/) - Step-by-step tutorial with examples"
    )
    lines.append(
-        "    - [Block SDK Guide](https://docs.agpt.co/platform/block-sdk-guide/) - Advanced SDK patterns with OAuth, webhooks, and provider configuration"
+        "* [Block SDK Guide](https://docs.agpt.co/platform/block-sdk-guide/) - Advanced SDK patterns with OAuth, webhooks, and provider configuration"
    )
+    lines.append("{% endhint %}")
    lines.append("")
    lines.append(
        "Below is a comprehensive list of all available blocks, categorized by their primary function. Click on any block name to view its detailed documentation."
@@ -537,7 +575,8 @@ def generate_overview_table(blocks: list[BlockDoc]) -> str:
                    else "No description"
                )
                short_desc = short_desc.replace("\n", " ").replace("|", "\\|")
-                lines.append(f"| [{block.name}]({file_path}#{anchor}) | {short_desc} |")
+                link_path = f"{block_dir_prefix}{file_path}"
+                lines.append(f"| [{block.name}]({link_path}#{anchor}) | {short_desc} |")
            lines.append("")
            continue

@@ -563,13 +602,55 @@ def generate_overview_table(blocks: list[BlockDoc]) -> str:
            )
            short_desc = short_desc.replace("\n", " ").replace("|", "\\|")

-            lines.append(f"| [{block.name}]({file_path}#{anchor}) | {short_desc} |")
+            link_path = f"{block_dir_prefix}{file_path}"
+            lines.append(f"| [{block.name}]({link_path}#{anchor}) | {short_desc} |")

        lines.append("")

    return "\n".join(lines)


+def generate_summary_md(
+    blocks: list[BlockDoc], root_dir: Path, block_dir_prefix: str = ""
+) -> str:
+    """Generate SUMMARY.md for GitBook navigation.
+
+    Args:
+        blocks: List of block documentation objects
+        root_dir: The root docs directory (e.g., docs/integrations/)
+        block_dir_prefix: Prefix for block file links (e.g., "block-integrations/")
+    """
+    lines = []
+    lines.append("# Table of contents")
+    lines.append("")
+    lines.append("* [AutoGPT Blocks Overview](README.md)")
+    lines.append("")
+
+    # Check for guides/ directory at the root level (docs/integrations/guides/)
+    guides_dir = root_dir / "guides"
+    if guides_dir.exists():
+        lines.append("## Guides")
+        lines.append("")
+        for guide_file in sorted(guides_dir.glob("*.md")):
+            # Use just the file name for title (replace hyphens/underscores with spaces)
+            title = file_path_to_title(guide_file.stem.replace("-", "_") + ".md")
+            lines.append(f"* [{title}](guides/{guide_file.name})")
+        lines.append("")
+
+    lines.append("## Block Integrations")
+    lines.append("")
+
+    file_mapping = get_block_file_mapping(blocks)
+    for file_path in sorted(file_mapping.keys()):
+        title = file_path_to_title(file_path)
+        link_path = f"{block_dir_prefix}{file_path}"
+        lines.append(f"* [{title}]({link_path})")
+
+    lines.append("")
+
+    return "\n".join(lines)
+
+
 def load_all_blocks_for_docs() -> list[BlockDoc]:
    """Load all blocks and extract documentation."""
    from backend.blocks import load_all_blocks
@@ -653,6 +734,16 @@ def write_block_docs(
                )
            )

+        # Add file-level additional_content section if present
+        file_additional = extract_manual_content(existing_content).get(
+            "additional_content", ""
+        )
+        if file_additional:
+            content_parts.append("<!-- MANUAL: additional_content -->")
+            content_parts.append(file_additional)
+            content_parts.append("<!-- END MANUAL -->")
+            content_parts.append("")
+
        full_content = file_header + "\n" + "\n".join(content_parts)
        generated_files[str(file_path)] = full_content

@@ -661,14 +752,28 @@ def write_block_docs(

        full_path.write_text(full_content)

-    # Generate overview file
-    overview_content = generate_overview_table(blocks)
-    overview_path = output_dir / "README.md"
+    # Generate overview file at the parent directory (docs/integrations/)
+    # with links prefixed to point into block-integrations/
+    root_dir = output_dir.parent
+    block_dir_name = output_dir.name  # "block-integrations"
+    block_dir_prefix = f"{block_dir_name}/"
+
+    overview_content = generate_overview_table(blocks, block_dir_prefix)
+    overview_path = root_dir / "README.md"
    generated_files["README.md"] = overview_content
    overview_path.write_text(overview_content)

    if verbose:
-        print("  Writing README.md (overview)")
+        print("  Writing README.md (overview) to parent directory")
+
+    # Generate SUMMARY.md for GitBook navigation at the parent directory
+    summary_content = generate_summary_md(blocks, root_dir, block_dir_prefix)
+    summary_path = root_dir / "SUMMARY.md"
+    generated_files["SUMMARY.md"] = summary_content
+    summary_path.write_text(summary_content)
+
+    if verbose:
+        print("  Writing SUMMARY.md (navigation) to parent directory")

    return generated_files

@@ -748,6 +853,16 @@ def check_docs_in_sync(output_dir: Path, blocks: list[BlockDoc]) -> bool:
            elif block_match.group(1).strip() != expected_block_content.strip():
                mismatched_blocks.append(block.name)

+        # Add file-level additional_content to expected content (matches write_block_docs)
+        file_additional = extract_manual_content(existing_content).get(
+            "additional_content", ""
+        )
+        if file_additional:
+            content_parts.append("<!-- MANUAL: additional_content -->")
+            content_parts.append(file_additional)
+            content_parts.append("<!-- END MANUAL -->")
+            content_parts.append("")
+
        expected_content = file_header + "\n" + "\n".join(content_parts)

        if existing_content.strip() != expected_content.strip():
@@ -757,11 +872,15 @@ def check_docs_in_sync(output_dir: Path, blocks: list[BlockDoc]) -> bool:
            out_of_sync_details.append((file_path, mismatched_blocks))
            all_match = False

-    # Check overview
-    overview_path = output_dir / "README.md"
+    # Check overview at the parent directory (docs/integrations/)
+    root_dir = output_dir.parent
+    block_dir_name = output_dir.name  # "block-integrations"
+    block_dir_prefix = f"{block_dir_name}/"
+
+    overview_path = root_dir / "README.md"
    if overview_path.exists():
        existing_overview = overview_path.read_text()
-        expected_overview = generate_overview_table(blocks)
+        expected_overview = generate_overview_table(blocks, block_dir_prefix)
        if existing_overview.strip() != expected_overview.strip():
            print("OUT OF SYNC: README.md (overview)")
            print("  The blocks overview table needs regeneration")
@@ -772,6 +891,21 @@ def check_docs_in_sync(output_dir: Path, blocks: list[BlockDoc]) -> bool:
        out_of_sync_details.append(("README.md", ["overview table"]))
        all_match = False

+    # Check SUMMARY.md at the parent directory
+    summary_path = root_dir / "SUMMARY.md"
+    if summary_path.exists():
+        existing_summary = summary_path.read_text()
+        expected_summary = generate_summary_md(blocks, root_dir, block_dir_prefix)
+        if existing_summary.strip() != expected_summary.strip():
+            print("OUT OF SYNC: SUMMARY.md (navigation)")
+            print("  The GitBook navigation needs regeneration")
+            out_of_sync_details.append(("SUMMARY.md", ["navigation"]))
+            all_match = False
+    else:
+        print("MISSING: SUMMARY.md (navigation)")
+        out_of_sync_details.append(("SUMMARY.md", ["navigation"]))
+        all_match = False
+
    # Check for unfilled manual sections
    unfilled_patterns = [
        "_Add a description of this category of blocks._",
--- a/autogpt_platform/backend/test/agent_generator/init.py
+++ b/autogpt_platform/backend/test/agent_generator/init.py
@@ -0,0 +1 @@
+"""Tests for agent generator module."""
--- a/autogpt_platform/backend/test/agent_generator/test_core_integration.py
+++ b/autogpt_platform/backend/test/agent_generator/test_core_integration.py
@@ -0,0 +1,273 @@
+"""
+Tests for the Agent Generator core module.
+
+This test suite verifies that the core functions correctly delegate to
+the external Agent Generator service.
+"""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from backend.api.features.chat.tools.agent_generator import core
+from backend.api.features.chat.tools.agent_generator.core import (
+    AgentGeneratorNotConfiguredError,
+)
+
+
+class TestServiceNotConfigured:
+    """Test that functions raise AgentGeneratorNotConfiguredError when service is not configured."""
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_raises_when_not_configured(self):
+        """Test that decompose_goal raises error when service not configured."""
+        with patch.object(core, "is_external_service_configured", return_value=False):
+            with pytest.raises(AgentGeneratorNotConfiguredError):
+                await core.decompose_goal("Build a chatbot")
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_raises_when_not_configured(self):
+        """Test that generate_agent raises error when service not configured."""
+        with patch.object(core, "is_external_service_configured", return_value=False):
+            with pytest.raises(AgentGeneratorNotConfiguredError):
+                await core.generate_agent({"steps": []})
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_patch_raises_when_not_configured(self):
+        """Test that generate_agent_patch raises error when service not configured."""
+        with patch.object(core, "is_external_service_configured", return_value=False):
+            with pytest.raises(AgentGeneratorNotConfiguredError):
+                await core.generate_agent_patch("Add a node", {"nodes": []})
+
+
+class TestDecomposeGoal:
+    """Test decompose_goal function service delegation."""
+
+    @pytest.mark.asyncio
+    async def test_calls_external_service(self):
+        """Test that decompose_goal calls the external service."""
+        expected_result = {"type": "instructions", "steps": ["Step 1"]}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "decompose_goal_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            result = await core.decompose_goal("Build a chatbot")
+
+            mock_external.assert_called_once_with("Build a chatbot", "")
+            assert result == expected_result
+
+    @pytest.mark.asyncio
+    async def test_passes_context_to_external_service(self):
+        """Test that decompose_goal passes context to external service."""
+        expected_result = {"type": "instructions", "steps": ["Step 1"]}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "decompose_goal_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            await core.decompose_goal("Build a chatbot", "Use Python")
+
+            mock_external.assert_called_once_with("Build a chatbot", "Use Python")
+
+    @pytest.mark.asyncio
+    async def test_returns_none_on_service_failure(self):
+        """Test that decompose_goal returns None when external service fails."""
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "decompose_goal_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = None
+
+            result = await core.decompose_goal("Build a chatbot")
+
+            assert result is None
+
+
+class TestGenerateAgent:
+    """Test generate_agent function service delegation."""
+
+    @pytest.mark.asyncio
+    async def test_calls_external_service(self):
+        """Test that generate_agent calls the external service."""
+        expected_result = {"name": "Test Agent", "nodes": [], "links": []}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            instructions = {"type": "instructions", "steps": ["Step 1"]}
+            result = await core.generate_agent(instructions)
+
+            mock_external.assert_called_once_with(instructions)
+            # Result should have id, version, is_active added if not present
+            assert result is not None
+            assert result["name"] == "Test Agent"
+            assert "id" in result
+            assert result["version"] == 1
+            assert result["is_active"] is True
+
+    @pytest.mark.asyncio
+    async def test_preserves_existing_id_and_version(self):
+        """Test that external service result preserves existing id and version."""
+        expected_result = {
+            "id": "existing-id",
+            "version": 3,
+            "is_active": False,
+            "name": "Test Agent",
+        }
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result.copy()
+
+            result = await core.generate_agent({"steps": []})
+
+            assert result is not None
+            assert result["id"] == "existing-id"
+            assert result["version"] == 3
+            assert result["is_active"] is False
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_external_service_fails(self):
+        """Test that generate_agent returns None when external service fails."""
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = None
+
+            result = await core.generate_agent({"steps": []})
+
+            assert result is None
+
+
+class TestGenerateAgentPatch:
+    """Test generate_agent_patch function service delegation."""
+
+    @pytest.mark.asyncio
+    async def test_calls_external_service(self):
+        """Test that generate_agent_patch calls the external service."""
+        expected_result = {"name": "Updated Agent", "nodes": [], "links": []}
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_patch_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            current_agent = {"nodes": [], "links": []}
+            result = await core.generate_agent_patch("Add a node", current_agent)
+
+            mock_external.assert_called_once_with("Add a node", current_agent)
+            assert result == expected_result
+
+    @pytest.mark.asyncio
+    async def test_returns_clarifying_questions(self):
+        """Test that generate_agent_patch returns clarifying questions."""
+        expected_result = {
+            "type": "clarifying_questions",
+            "questions": [{"question": "What type of node?"}],
+        }
+
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_patch_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = expected_result
+
+            result = await core.generate_agent_patch("Add a node", {"nodes": []})
+
+            assert result == expected_result
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_external_service_fails(self):
+        """Test that generate_agent_patch returns None when service fails."""
+        with patch.object(
+            core, "is_external_service_configured", return_value=True
+        ), patch.object(
+            core, "generate_agent_patch_external", new_callable=AsyncMock
+        ) as mock_external:
+            mock_external.return_value = None
+
+            result = await core.generate_agent_patch("Add a node", {"nodes": []})
+
+            assert result is None
+
+
+class TestJsonToGraph:
+    """Test json_to_graph function."""
+
+    def test_converts_agent_json_to_graph(self):
+        """Test conversion of agent JSON to Graph model."""
+        agent_json = {
+            "id": "test-id",
+            "version": 2,
+            "is_active": True,
+            "name": "Test Agent",
+            "description": "A test agent",
+            "nodes": [
+                {
+                    "id": "node1",
+                    "block_id": "block1",
+                    "input_default": {"key": "value"},
+                    "metadata": {"x": 100},
+                }
+            ],
+            "links": [
+                {
+                    "id": "link1",
+                    "source_id": "node1",
+                    "sink_id": "output",
+                    "source_name": "result",
+                    "sink_name": "input",
+                    "is_static": False,
+                }
+            ],
+        }
+
+        graph = core.json_to_graph(agent_json)
+
+        assert graph.id == "test-id"
+        assert graph.version == 2
+        assert graph.is_active is True
+        assert graph.name == "Test Agent"
+        assert graph.description == "A test agent"
+        assert len(graph.nodes) == 1
+        assert graph.nodes[0].id == "node1"
+        assert graph.nodes[0].block_id == "block1"
+        assert len(graph.links) == 1
+        assert graph.links[0].source_id == "node1"
+
+    def test_generates_ids_if_missing(self):
+        """Test that missing IDs are generated."""
+        agent_json = {
+            "name": "Test Agent",
+            "nodes": [{"block_id": "block1"}],
+            "links": [],
+        }
+
+        graph = core.json_to_graph(agent_json)
+
+        assert graph.id is not None
+        assert graph.nodes[0].id is not None
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/autogpt_platform/backend/test/agent_generator/test_service.py
+++ b/autogpt_platform/backend/test/agent_generator/test_service.py
@@ -0,0 +1,422 @@
+"""
+Tests for the Agent Generator external service client.
+
+This test suite verifies the external Agent Generator service integration,
+including service detection, API calls, and error handling.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from backend.api.features.chat.tools.agent_generator import service
+
+
+class TestServiceConfiguration:
+    """Test service configuration detection."""
+
+    def setup_method(self):
+        """Reset settings singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    def test_external_service_not_configured_when_host_empty(self):
+        """Test that external service is not configured when host is empty."""
+        mock_settings = MagicMock()
+        mock_settings.config.agentgenerator_host = ""
+
+        with patch.object(service, "_get_settings", return_value=mock_settings):
+            assert service.is_external_service_configured() is False
+
+    def test_external_service_configured_when_host_set(self):
+        """Test that external service is configured when host is set."""
+        mock_settings = MagicMock()
+        mock_settings.config.agentgenerator_host = "agent-generator.local"
+
+        with patch.object(service, "_get_settings", return_value=mock_settings):
+            assert service.is_external_service_configured() is True
+
+    def test_get_base_url(self):
+        """Test base URL construction."""
+        mock_settings = MagicMock()
+        mock_settings.config.agentgenerator_host = "agent-generator.local"
+        mock_settings.config.agentgenerator_port = 8000
+
+        with patch.object(service, "_get_settings", return_value=mock_settings):
+            url = service._get_base_url()
+            assert url == "http://agent-generator.local:8000"
+
+
+class TestDecomposeGoalExternal:
+    """Test decompose_goal_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_returns_instructions(self):
+        """Test successful decomposition returning instructions."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "instructions",
+            "steps": ["Step 1", "Step 2"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result == {"type": "instructions", "steps": ["Step 1", "Step 2"]}
+        mock_client.post.assert_called_once_with(
+            "/api/decompose-description", json={"description": "Build a chatbot"}
+        )
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_returns_clarifying_questions(self):
+        """Test decomposition returning clarifying questions."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "clarifying_questions",
+            "questions": ["What platform?", "What language?"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build something")
+
+        assert result == {
+            "type": "clarifying_questions",
+            "questions": ["What platform?", "What language?"],
+        }
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_with_context(self):
+        """Test decomposition with additional context."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "instructions",
+            "steps": ["Step 1"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            await service.decompose_goal_external(
+                "Build a chatbot", context="Use Python"
+            )
+
+        mock_client.post.assert_called_once_with(
+            "/api/decompose-description",
+            json={"description": "Build a chatbot", "user_instruction": "Use Python"},
+        )
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_returns_unachievable_goal(self):
+        """Test decomposition returning unachievable goal response."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "unachievable_goal",
+            "reason": "Cannot do X",
+            "suggested_goal": "Try Y instead",
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Do something impossible")
+
+        assert result == {
+            "type": "unachievable_goal",
+            "reason": "Cannot do X",
+            "suggested_goal": "Try Y instead",
+        }
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_handles_http_error(self):
+        """Test decomposition handles HTTP errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.HTTPStatusError(
+            "Server error", request=MagicMock(), response=MagicMock()
+        )
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_handles_request_error(self):
+        """Test decomposition handles request errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_decompose_goal_handles_service_error(self):
+        """Test decomposition handles service returning error."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": False,
+            "error": "Internal error",
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.decompose_goal_external("Build a chatbot")
+
+        assert result is None
+
+
+class TestGenerateAgentExternal:
+    """Test generate_agent_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_success(self):
+        """Test successful agent generation."""
+        agent_json = {
+            "name": "Test Agent",
+            "nodes": [],
+            "links": [],
+        }
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "agent_json": agent_json,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        instructions = {"type": "instructions", "steps": ["Step 1"]}
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_external(instructions)
+
+        assert result == agent_json
+        mock_client.post.assert_called_once_with(
+            "/api/generate-agent", json={"instructions": instructions}
+        )
+
+    @pytest.mark.asyncio
+    async def test_generate_agent_handles_error(self):
+        """Test agent generation handles errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.post.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_external({"steps": []})
+
+        assert result is None
+
+
+class TestGenerateAgentPatchExternal:
+    """Test generate_agent_patch_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_generate_patch_returns_updated_agent(self):
+        """Test successful patch generation returning updated agent."""
+        updated_agent = {
+            "name": "Updated Agent",
+            "nodes": [{"id": "1", "block_id": "test"}],
+            "links": [],
+        }
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "agent_json": updated_agent,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        current_agent = {"name": "Old Agent", "nodes": [], "links": []}
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_patch_external(
+                "Add a new node", current_agent
+            )
+
+        assert result == updated_agent
+        mock_client.post.assert_called_once_with(
+            "/api/update-agent",
+            json={
+                "update_request": "Add a new node",
+                "current_agent_json": current_agent,
+            },
+        )
+
+    @pytest.mark.asyncio
+    async def test_generate_patch_returns_clarifying_questions(self):
+        """Test patch generation returning clarifying questions."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "type": "clarifying_questions",
+            "questions": ["What type of node?"],
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.post.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.generate_agent_patch_external(
+                "Add something", {"nodes": []}
+            )
+
+        assert result == {
+            "type": "clarifying_questions",
+            "questions": ["What type of node?"],
+        }
+
+
+class TestHealthCheck:
+    """Test health_check function."""
+
+    def setup_method(self):
+        """Reset singletons before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_false_when_not_configured(self):
+        """Test health check returns False when service not configured."""
+        with patch.object(
+            service, "is_external_service_configured", return_value=False
+        ):
+            result = await service.health_check()
+            assert result is False
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_true_when_healthy(self):
+        """Test health check returns True when service is healthy."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "status": "healthy",
+            "blocks_loaded": True,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get.return_value = mock_response
+
+        with patch.object(service, "is_external_service_configured", return_value=True):
+            with patch.object(service, "_get_client", return_value=mock_client):
+                result = await service.health_check()
+
+        assert result is True
+        mock_client.get.assert_called_once_with("/health")
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_false_when_not_healthy(self):
+        """Test health check returns False when service is not healthy."""
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "status": "unhealthy",
+            "blocks_loaded": False,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get.return_value = mock_response
+
+        with patch.object(service, "is_external_service_configured", return_value=True):
+            with patch.object(service, "_get_client", return_value=mock_client):
+                result = await service.health_check()
+
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_health_check_returns_false_on_error(self):
+        """Test health check returns False on connection error."""
+        mock_client = AsyncMock()
+        mock_client.get.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "is_external_service_configured", return_value=True):
+            with patch.object(service, "_get_client", return_value=mock_client):
+                result = await service.health_check()
+
+        assert result is False
+
+
+class TestGetBlocksExternal:
+    """Test get_blocks_external function."""
+
+    def setup_method(self):
+        """Reset client singleton before each test."""
+        service._settings = None
+        service._client = None
+
+    @pytest.mark.asyncio
+    async def test_get_blocks_success(self):
+        """Test successful blocks retrieval."""
+        blocks = [
+            {"id": "block1", "name": "Block 1"},
+            {"id": "block2", "name": "Block 2"},
+        ]
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "success": True,
+            "blocks": blocks,
+        }
+        mock_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get.return_value = mock_response
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.get_blocks_external()
+
+        assert result == blocks
+        mock_client.get.assert_called_once_with("/api/blocks")
+
+    @pytest.mark.asyncio
+    async def test_get_blocks_handles_error(self):
+        """Test blocks retrieval handles errors gracefully."""
+        mock_client = AsyncMock()
+        mock_client.get.side_effect = httpx.RequestError("Connection failed")
+
+        with patch.object(service, "_get_client", return_value=mock_client):
+            result = await service.get_blocks_external()
+
+        assert result is None
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
--- a/autogpt_platform/frontend/.env.default
+++ b/autogpt_platform/frontend/.env.default
@@ -29,4 +29,4 @@ NEXT_PUBLIC_CLOUDFLARE_TURNSTILE_SITE_KEY=
 NEXT_PUBLIC_TURNSTILE=disabled

 # PR previews
-NEXT_PUBLIC_PREVIEW_STEALING_DEV=
+NEXT_PUBLIC_PREVIEW_STEALING_DEV=
--- a/autogpt_platform/frontend/CONTRIBUTING.md
+++ b/autogpt_platform/frontend/CONTRIBUTING.md
@@ -175,6 +175,8 @@ While server components and actions are cool and cutting-edge, they introduce a

 - Prefer [React Query](https://tanstack.com/query/latest/docs/framework/react/overview) for server state, colocated near consumers (see [state colocation](https://kentcdodds.com/blog/state-colocation-will-make-your-react-app-faster))
 - Co-locate UI state inside components/hooks; keep global state minimal
+- Avoid `useMemo` and `useCallback` unless you have a measured performance issue
+- Do not abuse `useEffect`; prefer state colocation and derive values directly when possible

 ### Styling and components

@@ -549,9 +551,48 @@ Files:
 Types:

 - Prefer `interface` for object shapes
- Component props should be `interface Props { ... }`
+- Component props should be `interface Props { ... }` (not exported)
+- Only use specific exported names (e.g., `export interface MyComponentProps`) when the interface needs to be used outside the component
+- Keep type definitions inline with the component - do not create separate `types.ts` files unless types are shared across multiple files
 - Use precise types; avoid `any` and unsafe casts

+**Props naming examples:**
+
+```tsx
+// ✅ Good - internal props, not exported
+interface Props {
+  title: string;
+  onClose: () => void;
+}
+
+export function Modal({ title, onClose }: Props) {
+  // ...
+}
+
+// ✅ Good - exported when needed externally
+export interface ModalProps {
+  title: string;
+  onClose: () => void;
+}
+
+export function Modal({ title, onClose }: ModalProps) {
+  // ...
+}
+
+// ❌ Bad - unnecessarily specific name for internal use
+interface ModalComponentProps {
+  title: string;
+  onClose: () => void;
+}
+
+// ❌ Bad - separate types.ts file for single component
+// types.ts
+export interface ModalProps { ... }
+
+// Modal.tsx
+import type { ModalProps } from './types';
+```
+
 Parameters:

 - If more than one parameter is needed, pass a single `Args` object for clarity
--- a/autogpt_platform/frontend/orval.config.ts
+++ b/autogpt_platform/frontend/orval.config.ts
@@ -16,6 +16,12 @@ export default defineConfig({
      client: "react-query",
      httpClient: "fetch",
      indexFiles: false,
+      mock: {
+        type: "msw",
+        baseUrl: "http://localhost:3000/api/proxy",
+        generateEachHttpStatus: true,
+        delay: 0,
+      },
      override: {
        mutator: {
          path: "./mutators/custom-mutator.ts",
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -15,6 +15,8 @@
    "types": "tsc --noEmit",
    "test": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test",
    "test-ui": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test --ui",
+    "test:unit": "vitest run",
+    "test:unit:watch": "vitest",
    "test:no-build": "playwright test",
    "gentests": "playwright codegen http://localhost:3000",
    "storybook": "storybook dev -p 6006",
@@ -118,6 +120,7 @@
  },
  "devDependencies": {
    "@chromatic-com/storybook": "4.1.2",
+    "happy-dom": "20.3.4",
    "@opentelemetry/instrumentation": "0.209.0",
    "@playwright/test": "1.56.1",
    "@storybook/addon-a11y": "9.1.5",
@@ -127,6 +130,8 @@
    "@storybook/nextjs": "9.1.5",
    "@tanstack/eslint-plugin-query": "5.91.2",
    "@tanstack/react-query-devtools": "5.90.2",
+    "@testing-library/dom": "10.4.1",
+    "@testing-library/react": "16.3.2",
    "@types/canvas-confetti": "1.9.0",
    "@types/lodash": "4.17.20",
    "@types/negotiator": "0.6.4",
@@ -135,6 +140,7 @@
    "@types/react-dom": "18.3.5",
    "@types/react-modal": "3.16.3",
    "@types/react-window": "1.8.8",
+    "@vitejs/plugin-react": "5.1.2",
    "axe-playwright": "2.2.2",
    "chromatic": "13.3.3",
    "concurrently": "9.2.1",
@@ -153,7 +159,9 @@
    "require-in-the-middle": "8.0.1",
    "storybook": "9.1.5",
    "tailwindcss": "3.4.17",
-    "typescript": "5.9.3"
+    "typescript": "5.9.3",
+    "vite-tsconfig-paths": "6.0.4",
+    "vitest": "4.0.17"
  },
  "msw": {
    "workerDirectory": [
--- a/autogpt_platform/frontend/pnpm-lock.yaml
+++ b/autogpt_platform/frontend/pnpm-lock.yaml
--- a/autogpt_platform/frontend/src/app/(no-navbar)/logout/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/logout/page.tsx
@@ -0,0 +1,58 @@
+"use client";
+
+import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
+import { Text } from "@/components/atoms/Text/Text";
+import { useToast } from "@/components/molecules/Toast/use-toast";
+import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
+import { useRouter } from "next/navigation";
+import { useEffect, useRef } from "react";
+
+const LOGOUT_REDIRECT_DELAY_MS = 400;
+
+function wait(ms: number): Promise<void> {
+  return new Promise(function resolveAfterDelay(resolve) {
+    setTimeout(resolve, ms);
+  });
+}
+
+export default function LogoutPage() {
+  const { logOut } = useSupabase();
+  const { toast } = useToast();
+  const router = useRouter();
+  const hasStartedRef = useRef(false);
+
+  useEffect(
+    function handleLogoutEffect() {
+      if (hasStartedRef.current) return;
+      hasStartedRef.current = true;
+
+      async function runLogout() {
+        try {
+          await logOut();
+        } catch {
+          toast({
+            title: "Failed to log out. Redirecting to login.",
+            variant: "destructive",
+          });
+        } finally {
+          await wait(LOGOUT_REDIRECT_DELAY_MS);
+          router.replace("/login");
+        }
+      }
+
+      void runLogout();
+    },
+    [logOut, router, toast],
+  );
+
+  return (
+    <div className="flex min-h-screen items-center justify-center px-4">
+      <div className="flex flex-col items-center justify-center gap-4 py-8">
+        <LoadingSpinner size="large" />
+        <Text variant="body" className="text-center">
+          Logging you out...
+        </Text>
+      </div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/auth/callback/route.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/auth/callback/route.ts
@@ -9,7 +9,7 @@ export async function GET(request: Request) {
  const { searchParams, origin } = new URL(request.url);
  const code = searchParams.get("code");

-  let next = "/marketplace";
+  let next = "/";

  if (code) {
    const supabase = await getServerSupabase();
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/AgentOutputs/AgentOutputs.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/AgentOutputs/AgentOutputs.tsx
@@ -38,8 +38,12 @@ export const AgentOutputs = ({ flowID }: { flowID: string | null }) => {

    return outputNodes
      .map((node) => {
-        const executionResult = node.data.nodeExecutionResult;
-        const outputData = executionResult?.output_data?.output;
+        const executionResults = node.data.nodeExecutionResults || [];
+        const latestResult =
+          executionResults.length > 0
+            ? executionResults[executionResults.length - 1]
+            : undefined;
+        const outputData = latestResult?.output_data?.output;

        const renderer = globalRegistry.getRenderer(outputData);

--- a/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/RunInputDialog/useRunInputDialog.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/BuilderActions/components/RunInputDialog/useRunInputDialog.ts
@@ -153,6 +153,9 @@ export const useRunInputDialog = ({
      Object.entries(credentialValues).filter(([_, cred]) => cred && cred.id),
    );

+    useNodeStore.getState().clearAllNodeExecutionResults();
+    useNodeStore.getState().cleanNodesStatuses();
+
    await executeGraph({
      graphId: flowID ?? "",
      graphVersion: flowVersion || null,
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FloatingSafeModeToogle.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FloatingSafeModeToogle.tsx
@@ -86,7 +86,6 @@ export function FloatingSafeModeToggle({
  const {
    currentHITLSafeMode,
    showHITLToggle,
-    isHITLStateUndetermined,
    handleHITLToggle,
    currentSensitiveActionSafeMode,
    showSensitiveActionToggle,
@@ -99,16 +98,9 @@ export function FloatingSafeModeToggle({
    return null;
  }

-  const showHITL = showHITLToggle && !isHITLStateUndetermined;
-  const showSensitive = showSensitiveActionToggle;
-
-  if (!showHITL && !showSensitive) {
-    return null;
-  }
-
  return (
    <div className={cn("fixed z-50 flex flex-col gap-2", className)}>
-      {showHITL && (
+      {showHITLToggle && (
        <SafeModeButton
          isEnabled={currentHITLSafeMode}
          label="Human in the loop block approval"
@@ -119,7 +111,7 @@ export function FloatingSafeModeToggle({
          fullWidth={fullWidth}
        />
      )}
-      {showSensitive && (
+      {showSensitiveActionToggle && (
        <SafeModeButton
          isEnabled={currentSensitiveActionSafeMode}
          label="Sensitive actions blocks approval"
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/CustomNode.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/CustomNode.tsx
@@ -34,7 +34,7 @@ export type CustomNodeData = {
  uiType: BlockUIType;
  block_id: string;
  status?: AgentExecutionStatus;
-  nodeExecutionResult?: NodeExecutionResult;
+  nodeExecutionResults?: NodeExecutionResult[];
  staticOutput?: boolean;
  // TODO : We need better type safety for the following backend fields.
  costs: BlockCost[];
@@ -75,7 +75,11 @@ export const CustomNode: React.FC<NodeProps<CustomNode>> = React.memo(
        (value) => value !== null && value !== undefined && value !== "",
      );

-    const outputData = data.nodeExecutionResult?.output_data;
+    const latestResult =
+      data.nodeExecutionResults && data.nodeExecutionResults.length > 0
+        ? data.nodeExecutionResults[data.nodeExecutionResults.length - 1]
+        : undefined;
+    const outputData = latestResult?.output_data;
    const hasOutputError =
      typeof outputData === "object" &&
      outputData !== null &&
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/NodeOutput.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/NodeOutput.tsx
@@ -14,10 +14,15 @@ import { useNodeOutput } from "./useNodeOutput";
 import { ViewMoreData } from "./components/ViewMoreData";

 export const NodeDataRenderer = ({ nodeId }: { nodeId: string }) => {
-  const { outputData, copiedKey, handleCopy, executionResultId, inputData } =
-    useNodeOutput(nodeId);
+  const {
+    latestOutputData,
+    copiedKey,
+    handleCopy,
+    executionResultId,
+    latestInputData,
+  } = useNodeOutput(nodeId);

-  if (Object.keys(outputData).length === 0) {
+  if (Object.keys(latestOutputData).length === 0) {
    return null;
  }

@@ -41,18 +46,19 @@ export const NodeDataRenderer = ({ nodeId }: { nodeId: string }) => {
              <div className="space-y-2">
                <Text variant="small-medium">Input</Text>

-                <ContentRenderer value={inputData} shortContent={false} />
+                <ContentRenderer value={latestInputData} shortContent={false} />

                <div className="mt-1 flex justify-end gap-1">
                  <NodeDataViewer
-                    data={inputData}
                    pinName="Input"
+                    nodeId={nodeId}
                    execId={executionResultId}
+                    dataType="input"
                  />
                  <Button
                    variant="secondary"
                    size="small"
-                    onClick={() => handleCopy("input", inputData)}
+                    onClick={() => handleCopy("input", latestInputData)}
                    className={cn(
                      "h-fit min-w-0 gap-1.5 border border-zinc-200 p-2 text-black hover:text-slate-900",
                      copiedKey === "input" &&
@@ -68,70 +74,72 @@ export const NodeDataRenderer = ({ nodeId }: { nodeId: string }) => {
                </div>
              </div>

-              {Object.entries(outputData)
+              {Object.entries(latestOutputData)
                .slice(0, 2)
-                .map(([key, value]) => (
-                  <div key={key} className="flex flex-col gap-2">
-                    <div className="flex items-center gap-2">
-                      <Text
-                        variant="small-medium"
-                        className="!font-semibold text-slate-600"
-                      >
-                        Pin:
-                      </Text>
-                      <Text variant="small" className="text-slate-700">
-                        {beautifyString(key)}
-                      </Text>
-                    </div>
-                    <div className="w-full space-y-2">
-                      <Text
-                        variant="small"
-                        className="!font-semibold text-slate-600"
-                      >
-                        Data:
-                      </Text>
-                      <div className="relative space-y-2">
-                        {value.map((item, index) => (
-                          <div key={index}>
-                            <ContentRenderer value={item} shortContent={true} />
-                          </div>
-                        ))}
+                .map(([key, value]) => {
+                  return (
+                    <div key={key} className="flex flex-col gap-2">
+                      <div className="flex items-center gap-2">
+                        <Text
+                          variant="small-medium"
+                          className="!font-semibold text-slate-600"
+                        >
+                          Pin:
+                        </Text>
+                        <Text variant="small" className="text-slate-700">
+                          {beautifyString(key)}
+                        </Text>
+                      </div>
+                      <div className="w-full space-y-2">
+                        <Text
+                          variant="small"
+                          className="!font-semibold text-slate-600"
+                        >
+                          Data:
+                        </Text>
+                        <div className="relative space-y-2">
+                          {value.map((item, index) => (
+                            <div key={index}>
+                              <ContentRenderer
+                                value={item}
+                                shortContent={true}
+                              />
+                            </div>
+                          ))}

-                        <div className="mt-1 flex justify-end gap-1">
-                          <NodeDataViewer
-                            data={value}
-                            pinName={key}
-                            execId={executionResultId}
-                          />
-                          <Button
-                            variant="secondary"
-                            size="small"
-                            onClick={() => handleCopy(key, value)}
-                            className={cn(
-                              "h-fit min-w-0 gap-1.5 border border-zinc-200 p-2 text-black hover:text-slate-900",
-                              copiedKey === key &&
-                                "border-green-400 bg-green-100 hover:border-green-400 hover:bg-green-200",
-                            )}
-                          >
-                            {copiedKey === key ? (
-                              <CheckIcon size={12} className="text-green-600" />
-                            ) : (
-                              <CopyIcon size={12} />
-                            )}
-                          </Button>
+                          <div className="mt-1 flex justify-end gap-1">
+                            <NodeDataViewer
+                              pinName={key}
+                              nodeId={nodeId}
+                              execId={executionResultId}
+                            />
+                            <Button
+                              variant="secondary"
+                              size="small"
+                              onClick={() => handleCopy(key, value)}
+                              className={cn(
+                                "h-fit min-w-0 gap-1.5 border border-zinc-200 p-2 text-black hover:text-slate-900",
+                                copiedKey === key &&
+                                  "border-green-400 bg-green-100 hover:border-green-400 hover:bg-green-200",
+                              )}
+                            >
+                              {copiedKey === key ? (
+                                <CheckIcon
+                                  size={12}
+                                  className="text-green-600"
+                                />
+                              ) : (
+                                <CopyIcon size={12} />
+                              )}
+                            </Button>
+                          </div>
                        </div>
                      </div>
                    </div>
-                  </div>
-                ))}
+                  );
+                })}
            </div>
-
-            {Object.keys(outputData).length > 2 && (
-              <ViewMoreData
-                outputData={outputData}
-                execId={executionResultId}
-              />
-            )}
+            <ViewMoreData nodeId={nodeId} />
          </AccordionContent>
        </AccordionItem>
      </Accordion>
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/components/NodeDataViewer/NodeDataViewer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/components/NodeDataViewer/NodeDataViewer.tsx
@@ -19,22 +19,51 @@ import {
  CopyIcon,
  DownloadIcon,
 } from "@phosphor-icons/react";
-import { FC } from "react";
+import React, { FC } from "react";
 import { useNodeDataViewer } from "./useNodeDataViewer";
+import { useNodeStore } from "@/app/(platform)/build/stores/nodeStore";
+import { useShallow } from "zustand/react/shallow";
+import { NodeDataType } from "../../helpers";

-interface NodeDataViewerProps {
-  data: any;
+export interface NodeDataViewerProps {
+  data?: any;
  pinName: string;
+  nodeId?: string;
  execId?: string;
  isViewMoreData?: boolean;
+  dataType?: NodeDataType;
 }

 export const NodeDataViewer: FC<NodeDataViewerProps> = ({
  data,
  pinName,
+  nodeId,
  execId = "N/A",
  isViewMoreData = false,
+  dataType = "output",
 }) => {
+  const executionResults = useNodeStore(
+    useShallow((state) =>
+      nodeId ? state.getNodeExecutionResults(nodeId) : [],
+    ),
+  );
+  const latestInputData = useNodeStore(
+    useShallow((state) =>
+      nodeId ? state.getLatestNodeInputData(nodeId) : undefined,
+    ),
+  );
+  const accumulatedOutputData = useNodeStore(
+    useShallow((state) =>
+      nodeId ? state.getAccumulatedNodeOutputData(nodeId) : {},
+    ),
+  );
+
+  const resolvedData =
+    data ??
+    (dataType === "input"
+      ? (latestInputData ?? {})
+      : (accumulatedOutputData[pinName] ?? []));
+
  const {
    outputItems,
    copyExecutionId,
@@ -42,7 +71,20 @@ export const NodeDataViewer: FC<NodeDataViewerProps> = ({
    handleDownloadItem,
    dataArray,
    copiedIndex,
-  } = useNodeDataViewer(data, pinName, execId);
+    groupedExecutions,
+    totalGroupedItems,
+    handleCopyGroupedItem,
+    handleDownloadGroupedItem,
+    copiedKey,
+  } = useNodeDataViewer(
+    resolvedData,
+    pinName,
+    execId,
+    executionResults,
+    dataType,
+  );
+
+  const shouldGroupExecutions = groupedExecutions.length > 0;
  return (
    <Dialog styling={{ width: "600px" }}>
      <TooltipProvider>
@@ -68,44 +110,141 @@ export const NodeDataViewer: FC<NodeDataViewerProps> = ({
          <div className="flex items-center gap-4">
            <div className="flex items-center gap-2">
              <Text variant="large-medium" className="text-slate-900">
-                Full Output Preview
+                Full {dataType === "input" ? "Input" : "Output"} Preview
              </Text>
            </div>
            <div className="rounded-full border border-slate-300 bg-slate-100 px-3 py-1.5 text-xs font-medium text-black">
-              {dataArray.length} item{dataArray.length !== 1 ? "s" : ""} total
+              {shouldGroupExecutions ? totalGroupedItems : dataArray.length}{" "}
+              item
+              {shouldGroupExecutions
+                ? totalGroupedItems !== 1
+                  ? "s"
+                  : ""
+                : dataArray.length !== 1
+                  ? "s"
+                  : ""}{" "}
+              total
            </div>
          </div>
          <div className="text-sm text-gray-600">
-            <div className="flex items-center gap-2">
-              <Text variant="body" className="text-slate-600">
-                Execution ID:
-              </Text>
-              <Text
-                variant="body-medium"
-                className="rounded-full border border-gray-300 bg-gray-50 px-2 py-1 font-mono text-xs"
-              >
-                {execId}
-              </Text>
-              <Button
-                variant="ghost"
-                size="small"
-                onClick={copyExecutionId}
-                className="h-6 w-6 min-w-0 p-0"
-              >
-                <CopyIcon size={14} />
-              </Button>
-            </div>
-            <div className="mt-2">
-              Pin:{" "}
-              <span className="font-semibold">{beautifyString(pinName)}</span>
-            </div>
+            {shouldGroupExecutions ? (
+              <div>
+                Pin:{" "}
+                <span className="font-semibold">{beautifyString(pinName)}</span>
+              </div>
+            ) : (
+              <>
+                <div className="flex items-center gap-2">
+                  <Text variant="body" className="text-slate-600">
+                    Execution ID:
+                  </Text>
+                  <Text
+                    variant="body-medium"
+                    className="rounded-full border border-gray-300 bg-gray-50 px-2 py-1 font-mono text-xs"
+                  >
+                    {execId}
+                  </Text>
+                  <Button
+                    variant="ghost"
+                    size="small"
+                    onClick={copyExecutionId}
+                    className="h-6 w-6 min-w-0 p-0"
+                  >
+                    <CopyIcon size={14} />
+                  </Button>
+                </div>
+                <div className="mt-2">
+                  Pin:{" "}
+                  <span className="font-semibold">
+                    {beautifyString(pinName)}
+                  </span>
+                </div>
+              </>
+            )}
          </div>
        </div>

        <div className="flex-1 overflow-hidden">
          <ScrollArea className="h-full">
            <div className="my-4">
-              {dataArray.length > 0 ? (
+              {shouldGroupExecutions ? (
+                <div className="space-y-4">
+                  {groupedExecutions.map((execution) => (
+                    <div
+                      key={execution.execId}
+                      className="rounded-3xl border border-slate-200 bg-white p-4 shadow-sm"
+                    >
+                      <div className="flex items-center gap-2">
+                        <Text variant="body" className="text-slate-600">
+                          Execution ID:
+                        </Text>
+                        <Text
+                          variant="body-medium"
+                          className="rounded-full border border-gray-300 bg-gray-50 px-2 py-1 font-mono text-xs"
+                        >
+                          {execution.execId}
+                        </Text>
+                      </div>
+                      <div className="mt-2 space-y-4">
+                        {execution.outputItems.length > 0 ? (
+                          execution.outputItems.map((item, index) => (
+                            <div
+                              key={item.key}
+                              className="group flex items-start gap-4"
+                            >
+                              <div className="w-full flex-1">
+                                <OutputItem
+                                  value={item.value}
+                                  metadata={item.metadata}
+                                  renderer={item.renderer}
+                                />
+                              </div>
+
+                              <div className="flex w-fit gap-3">
+                                <Button
+                                  variant="secondary"
+                                  className="min-w-0 p-1"
+                                  size="icon"
+                                  onClick={() =>
+                                    handleCopyGroupedItem(
+                                      execution.execId,
+                                      index,
+                                      item,
+                                    )
+                                  }
+                                  aria-label="Copy item"
+                                >
+                                  {copiedKey ===
+                                  `${execution.execId}-${index}` ? (
+                                    <CheckIcon className="size-4 text-green-600" />
+                                  ) : (
+                                    <CopyIcon className="size-4 text-black" />
+                                  )}
+                                </Button>
+                                <Button
+                                  variant="secondary"
+                                  size="icon"
+                                  className="min-w-0 p-1"
+                                  onClick={() =>
+                                    handleDownloadGroupedItem(item)
+                                  }
+                                  aria-label="Download item"
+                                >
+                                  <DownloadIcon className="size-4 text-black" />
+                                </Button>
+                              </div>
+                            </div>
+                          ))
+                        ) : (
+                          <div className="py-4 text-center text-gray-500">
+                            No data available
+                          </div>
+                        )}
+                      </div>
+                    </div>
+                  ))}
+                </div>
+              ) : dataArray.length > 0 ? (
                <div className="space-y-4">
                  {outputItems.map((item, index) => (
                    <div key={item.key} className="group relative">
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/components/NodeDataViewer/useNodeDataViewer.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/components/NodeDataViewer/useNodeDataViewer.ts
@@ -1,82 +1,70 @@
-import type { OutputMetadata } from "@/components/contextual/OutputRenderers";
-import { globalRegistry } from "@/components/contextual/OutputRenderers";
 import { downloadOutputs } from "@/components/contextual/OutputRenderers/utils/download";
 import { useToast } from "@/components/molecules/Toast/use-toast";
 import { beautifyString } from "@/lib/utils";
-import React, { useMemo, useState } from "react";
+import { useState } from "react";
+import type { NodeExecutionResult } from "@/app/api/__generated__/models/nodeExecutionResult";
+import {
+  NodeDataType,
+  createOutputItems,
+  getExecutionData,
+  normalizeToArray,
+  type OutputItem,
+} from "../../helpers";
+
+export type GroupedExecution = {
+  execId: string;
+  outputItems: Array<OutputItem>;
+};

 export const useNodeDataViewer = (
  data: any,
  pinName: string,
  execId: string,
+  executionResults?: NodeExecutionResult[],
+  dataType?: NodeDataType,
 ) => {
  const { toast } = useToast();
  const [copiedIndex, setCopiedIndex] = useState<number | null>(null);
+  const [copiedKey, setCopiedKey] = useState<string | null>(null);

-  // Normalize data to array format
-  const dataArray = useMemo(() => {
-    return Array.isArray(data) ? data : [data];
-  }, [data]);
+  const dataArray = Array.isArray(data) ? data : [data];

-  // Prepare items for the enhanced renderer system
-  const outputItems = useMemo(() => {
-    if (!dataArray) return [];
-
-    const items: Array<{
-      key: string;
-      label: string;
-      value: unknown;
-      metadata?: OutputMetadata;
-      renderer: any;
-    }> = [];
-
-    dataArray.forEach((value, index) => {
-      const metadata: OutputMetadata = {};
-
-      // Extract metadata from the value if it's an object
-      if (
-        typeof value === "object" &&
-        value !== null &&
-        !React.isValidElement(value)
-      ) {
-        const objValue = value as any;
-        if (objValue.type) metadata.type = objValue.type;
-        if (objValue.mimeType) metadata.mimeType = objValue.mimeType;
-        if (objValue.filename) metadata.filename = objValue.filename;
-        if (objValue.language) metadata.language = objValue.language;
-      }
-
-      const renderer = globalRegistry.getRenderer(value, metadata);
-      if (renderer) {
-        items.push({
-          key: `item-${index}`,
+  const outputItems =
+    !dataArray || dataArray.length === 0
+      ? []
+      : createOutputItems(dataArray).map((item, index) => ({
+          ...item,
          label: index === 0 ? beautifyString(pinName) : "",
-          value,
-          metadata,
-          renderer,
-        });
-      } else {
-        // Fallback to text renderer
-        const textRenderer = globalRegistry
-          .getAllRenderers()
-          .find((r) => r.name === "TextRenderer");
-        if (textRenderer) {
-          items.push({
-            key: `item-${index}`,
-            label: index === 0 ? beautifyString(pinName) : "",
-            value:
-              typeof value === "string"
-                ? value
-                : JSON.stringify(value, null, 2),
-            metadata,
-            renderer: textRenderer,
-          });
-        }
-      }
-    });
+        }));

-    return items;
-  }, [dataArray, pinName]);
+  const groupedExecutions =
+    !executionResults || executionResults.length === 0
+      ? []
+      : [...executionResults].reverse().map((result) => {
+          const rawData = getExecutionData(
+            result,
+            dataType || "output",
+            pinName,
+          );
+          let dataArray: unknown[];
+          if (dataType === "input") {
+            dataArray =
+              rawData !== undefined && rawData !== null ? [rawData] : [];
+          } else {
+            dataArray = normalizeToArray(rawData);
+          }
+
+          const outputItems = createOutputItems(dataArray);
+          return {
+            execId: result.node_exec_id,
+            outputItems,
+          };
+        });
+
+  const totalGroupedItems = groupedExecutions.reduce(
+    (total, execution) => total + execution.outputItems.length,
+    0,
+  );

  const copyExecutionId = () => {
    navigator.clipboard.writeText(execId).then(() => {
@@ -122,6 +110,45 @@ export const useNodeDataViewer = (
    ]);
  };

+  const handleCopyGroupedItem = async (
+    execId: string,
+    index: number,
+    item: OutputItem,
+  ) => {
+    const copyContent = item.renderer.getCopyContent(item.value, item.metadata);
+
+    if (!copyContent) {
+      return;
+    }
+
+    try {
+      let text: string;
+      if (typeof copyContent.data === "string") {
+        text = copyContent.data;
+      } else if (copyContent.fallbackText) {
+        text = copyContent.fallbackText;
+      } else {
+        return;
+      }
+
+      await navigator.clipboard.writeText(text);
+      setCopiedKey(`${execId}-${index}`);
+      setTimeout(() => setCopiedKey(null), 2000);
+    } catch (error) {
+      console.error("Failed to copy:", error);
+    }
+  };
+
+  const handleDownloadGroupedItem = (item: OutputItem) => {
+    downloadOutputs([
+      {
+        value: item.value,
+        metadata: item.metadata,
+        renderer: item.renderer,
+      },
+    ]);
+  };
+
  return {
    outputItems,
    dataArray,
@@ -129,5 +156,10 @@ export const useNodeDataViewer = (
    handleCopyItem,
    handleDownloadItem,
    copiedIndex,
+    groupedExecutions,
+    totalGroupedItems,
+    handleCopyGroupedItem,
+    handleDownloadGroupedItem,
+    copiedKey,
  };
 };
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/components/ViewMoreData.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/components/ViewMoreData.tsx
@@ -8,16 +8,28 @@ import { useState } from "react";
 import { NodeDataViewer } from "./NodeDataViewer/NodeDataViewer";
 import { useToast } from "@/components/molecules/Toast/use-toast";
 import { CheckIcon, CopyIcon } from "@phosphor-icons/react";
+import { useNodeStore } from "@/app/(platform)/build/stores/nodeStore";
+import { useShallow } from "zustand/react/shallow";
+import {
+  NodeDataType,
+  getExecutionEntries,
+  normalizeToArray,
+} from "../helpers";

 export const ViewMoreData = ({
-  outputData,
-  execId,
+  nodeId,
+  dataType = "output",
 }: {
-  outputData: Record<string, Array<any>>;
-  execId?: string;
+  nodeId: string;
+  dataType?: NodeDataType;
 }) => {
  const [copiedKey, setCopiedKey] = useState<string | null>(null);
  const { toast } = useToast();
+  const executionResults = useNodeStore(
+    useShallow((state) => state.getNodeExecutionResults(nodeId)),
+  );
+
+  const reversedExecutionResults = [...executionResults].reverse();

  const handleCopy = (key: string, value: any) => {
    const textToCopy =
@@ -29,8 +41,8 @@ export const ViewMoreData = ({
    setTimeout(() => setCopiedKey(null), 2000);
  };

-  const copyExecutionId = () => {
-    navigator.clipboard.writeText(execId || "N/A").then(() => {
+  const copyExecutionId = (executionId: string) => {
+    navigator.clipboard.writeText(executionId || "N/A").then(() => {
      toast({
        title: "Execution ID copied to clipboard!",
        duration: 2000,
@@ -42,7 +54,7 @@ export const ViewMoreData = ({
    <Dialog styling={{ width: "600px", paddingRight: "16px" }}>
      <Dialog.Trigger>
        <Button
-          variant="primary"
+          variant="secondary"
          size="small"
          className="h-fit w-fit min-w-0 !text-xs"
        >
@@ -52,83 +64,114 @@ export const ViewMoreData = ({
      <Dialog.Content>
        <div className="flex flex-col gap-4">
          <Text variant="h4" className="text-slate-900">
-            Complete Output Data
+            Complete {dataType === "input" ? "Input" : "Output"} Data
          </Text>

-          <div className="flex items-center gap-2">
-            <Text variant="body" className="text-slate-600">
-              Execution ID:
-            </Text>
-            <Text
-              variant="body-medium"
-              className="rounded-full border border-gray-300 bg-gray-50 px-2 py-1 font-mono text-xs"
-            >
-              {execId}
-            </Text>
-            <Button
-              variant="ghost"
-              size="small"
-              onClick={copyExecutionId}
-              className="h-6 w-6 min-w-0 p-0"
-            >
-              <CopyIcon size={14} />
-            </Button>
-          </div>
-
          <ScrollArea className="h-full">
            <div className="flex flex-col gap-4">
-              {Object.entries(outputData).map(([key, value]) => (
-                <div key={key} className="flex flex-col gap-2">
+              {reversedExecutionResults.map((result) => (
+                <div
+                  key={result.node_exec_id}
+                  className="rounded-3xl border border-slate-200 bg-white p-4 shadow-sm"
+                >
                  <div className="flex items-center gap-2">
+                    <Text variant="body" className="text-slate-600">
+                      Execution ID:
+                    </Text>
                    <Text
                      variant="body-medium"
-                      className="!font-semibold text-slate-600"
+                      className="rounded-full border border-gray-300 bg-gray-50 px-2 py-1 font-mono text-xs"
                    >
-                      Pin:
-                    </Text>
-                    <Text variant="body-medium" className="text-slate-700">
-                      {beautifyString(key)}
+                      {result.node_exec_id}
                    </Text>
+                    <Button
+                      variant="ghost"
+                      size="small"
+                      onClick={() => copyExecutionId(result.node_exec_id)}
+                      className="h-6 w-6 min-w-0 p-0"
+                    >
+                      <CopyIcon size={14} />
+                    </Button>
                  </div>
-                  <div className="w-full space-y-2">
-                    <Text
-                      variant="body-medium"
-                      className="!font-semibold text-slate-600"
-                    >
-                      Data:
-                    </Text>
-                    <div className="relative space-y-2">
-                      {value.map((item, index) => (
-                        <div key={index}>
-                          <ContentRenderer value={item} shortContent={false} />
-                        </div>
-                      ))}

-                      <div className="mt-1 flex justify-end gap-1">
-                        <NodeDataViewer
-                          data={value}
-                          pinName={key}
-                          execId={execId}
-                          isViewMoreData={true}
-                        />
-                        <Button
-                          variant="secondary"
-                          size="small"
-                          onClick={() => handleCopy(key, value)}
-                          className={cn(
-                            "h-fit min-w-0 gap-1.5 border border-zinc-200 p-2 text-black hover:text-slate-900",
-                            copiedKey === key &&
-                              "border-green-400 bg-green-100 hover:border-green-400 hover:bg-green-200",
-                          )}
-                        >
-                          {copiedKey === key ? (
-                            <CheckIcon size={16} className="text-green-600" />
-                          ) : (
-                            <CopyIcon size={16} />
-                          )}
-                        </Button>
-                      </div>
-                    </div>
+                  <div className="mt-4 flex flex-col gap-4">
+                    {getExecutionEntries(result, dataType).map(
+                      ([key, value]) => {
+                        const normalizedValue = normalizeToArray(value);
+                        return (
+                          <div key={key} className="flex flex-col gap-2">
+                            <div className="flex items-center gap-2">
+                              <Text
+                                variant="body-medium"
+                                className="!font-semibold text-slate-600"
+                              >
+                                Pin:
+                              </Text>
+                              <Text
+                                variant="body-medium"
+                                className="text-slate-700"
+                              >
+                                {beautifyString(key)}
+                              </Text>
+                            </div>
+                            <div className="w-full space-y-2">
+                              <Text
+                                variant="body-medium"
+                                className="!font-semibold text-slate-600"
+                              >
+                                Data:
+                              </Text>
+                              <div className="relative space-y-2">
+                                {normalizedValue.map((item, index) => (
+                                  <div key={index}>
+                                    <ContentRenderer
+                                      value={item}
+                                      shortContent={false}
+                                    />
+                                  </div>
+                                ))}
+
+                                <div className="mt-1 flex justify-end gap-1">
+                                  <NodeDataViewer
+                                    data={normalizedValue}
+                                    pinName={key}
+                                    execId={result.node_exec_id}
+                                    isViewMoreData={true}
+                                    dataType={dataType}
+                                  />
+                                  <Button
+                                    variant="secondary"
+                                    size="small"
+                                    onClick={() =>
+                                      handleCopy(
+                                        `${result.node_exec_id}-${key}`,
+                                        normalizedValue,
+                                      )
+                                    }
+                                    className={cn(
+                                      "h-fit min-w-0 gap-1.5 border border-zinc-200 p-2 text-black hover:text-slate-900",
+                                      copiedKey ===
+                                        `${result.node_exec_id}-${key}` &&
+                                        "border-green-400 bg-green-100 hover:border-green-400 hover:bg-green-200",
+                                    )}
+                                  >
+                                    {copiedKey ===
+                                    `${result.node_exec_id}-${key}` ? (
+                                      <CheckIcon
+                                        size={16}
+                                        className="text-green-600"
+                                      />
+                                    ) : (
+                                      <CopyIcon size={16} />
+                                    )}
+                                  </Button>
+                                </div>
+                              </div>
+                            </div>
+                          </div>
+                        );
+                      },
+                    )}
                  </div>
                </div>
              ))}
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/helpers.ts
@@ -0,0 +1,83 @@
+import type { NodeExecutionResult } from "@/app/api/__generated__/models/nodeExecutionResult";
+import type { OutputMetadata } from "@/components/contextual/OutputRenderers";
+import { globalRegistry } from "@/components/contextual/OutputRenderers";
+import React from "react";
+
+export type NodeDataType = "input" | "output";
+
+export type OutputItem = {
+  key: string;
+  value: unknown;
+  metadata?: OutputMetadata;
+  renderer: any;
+};
+
+export const normalizeToArray = (value: unknown) => {
+  if (value === undefined) return [];
+  return Array.isArray(value) ? value : [value];
+};
+
+export const getExecutionData = (
+  result: NodeExecutionResult,
+  dataType: NodeDataType,
+  pinName: string,
+) => {
+  if (dataType === "input") {
+    return result.input_data;
+  }
+
+  return result.output_data?.[pinName];
+};
+
+export const createOutputItems = (dataArray: unknown[]): Array<OutputItem> => {
+  const items: Array<OutputItem> = [];
+
+  dataArray.forEach((value, index) => {
+    const metadata: OutputMetadata = {};
+
+    if (
+      typeof value === "object" &&
+      value !== null &&
+      !React.isValidElement(value)
+    ) {
+      const objValue = value as any;
+      if (objValue.type) metadata.type = objValue.type;
+      if (objValue.mimeType) metadata.mimeType = objValue.mimeType;
+      if (objValue.filename) metadata.filename = objValue.filename;
+      if (objValue.language) metadata.language = objValue.language;
+    }
+
+    const renderer = globalRegistry.getRenderer(value, metadata);
+    if (renderer) {
+      items.push({
+        key: `item-${index}`,
+        value,
+        metadata,
+        renderer,
+      });
+    } else {
+      const textRenderer = globalRegistry
+        .getAllRenderers()
+        .find((r) => r.name === "TextRenderer");
+      if (textRenderer) {
+        items.push({
+          key: `item-${index}`,
+          value:
+            typeof value === "string" ? value : JSON.stringify(value, null, 2),
+          metadata,
+          renderer: textRenderer,
+        });
+      }
+    }
+  });
+
+  return items;
+};
+
+export const getExecutionEntries = (
+  result: NodeExecutionResult,
+  dataType: NodeDataType,
+) => {
+  const data = dataType === "input" ? result.input_data : result.output_data;
+  return Object.entries(data || {});
+};
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/useNodeOutput.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/NodeOutput/useNodeOutput.tsx
@@ -7,15 +7,18 @@ export const useNodeOutput = (nodeId: string) => {
  const [copiedKey, setCopiedKey] = useState<string | null>(null);
  const { toast } = useToast();

-  const nodeExecutionResult = useNodeStore(
-    useShallow((state) => state.getNodeExecutionResult(nodeId)),
+  const latestResult = useNodeStore(
+    useShallow((state) => state.getLatestNodeExecutionResult(nodeId)),
  );

-  const inputData = nodeExecutionResult?.input_data;
+  const latestInputData = useNodeStore(
+    useShallow((state) => state.getLatestNodeInputData(nodeId)),
+  );
+
+  const latestOutputData: Record<string, Array<any>> = useNodeStore(
+    useShallow((state) => state.getLatestNodeOutputData(nodeId) || {}),
+  );

-  const outputData: Record<string, Array<any>> = {
-    ...nodeExecutionResult?.output_data,
-  };
  const handleCopy = async (key: string, value: any) => {
    try {
      const text = JSON.stringify(value, null, 2);
@@ -35,11 +38,12 @@ export const useNodeOutput = (nodeId: string) => {
      });
    }
  };
+
  return {
-    outputData,
-    inputData,
+    latestOutputData,
+    latestInputData,
    copiedKey,
    handleCopy,
-    executionResultId: nodeExecutionResult?.node_exec_id,
+    executionResultId: latestResult?.node_exec_id,
  };
 };
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/SubAgentUpdate/useSubAgentUpdateState.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/components/SubAgentUpdate/useSubAgentUpdateState.ts
@@ -1,10 +1,7 @@
 import { useState, useCallback, useEffect } from "react";
 import { useShallow } from "zustand/react/shallow";
 import { useGraphStore } from "@/app/(platform)/build/stores/graphStore";
-import {
-  useNodeStore,
-  NodeResolutionData,
-} from "@/app/(platform)/build/stores/nodeStore";
+import { useNodeStore } from "@/app/(platform)/build/stores/nodeStore";
 import { useEdgeStore } from "@/app/(platform)/build/stores/edgeStore";
 import {
  useSubAgentUpdate,
@@ -13,6 +10,7 @@ import {
 } from "@/app/(platform)/build/hooks/useSubAgentUpdate";
 import { GraphInputSchema, GraphOutputSchema } from "@/lib/autogpt-server-api";
 import { CustomNodeData } from "../../CustomNode";
+import { NodeResolutionData } from "@/app/(platform)/build/stores/types";

 // Stable empty set to avoid creating new references in selectors
 const EMPTY_SET: Set<string> = new Set();
--- a/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/components/FlowEditor/nodes/CustomNode/helpers.ts
@@ -1,5 +1,5 @@
 import { AgentExecutionStatus } from "@/app/api/__generated__/models/agentExecutionStatus";
-import { NodeResolutionData } from "@/app/(platform)/build/stores/nodeStore";
+import { NodeResolutionData } from "@/app/(platform)/build/stores/types";
 import { RJSFSchema } from "@rjsf/utils";

 export const nodeStyleBasedOnStatus: Record<AgentExecutionStatus, string> = {
--- a/autogpt_platform/frontend/src/app/(platform)/build/stores/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/stores/helpers.ts
@@ -0,0 +1,16 @@
+export const accumulateExecutionData = (
+  accumulated: Record<string, unknown[]>,
+  data: Record<string, unknown> | undefined,
+) => {
+  if (!data) return { ...accumulated };
+  const next = { ...accumulated };
+  Object.entries(data).forEach(([key, values]) => {
+    const nextValues = Array.isArray(values) ? values : [values];
+    if (next[key]) {
+      next[key] = [...next[key], ...nextValues];
+    } else {
+      next[key] = [...nextValues];
+    }
+  });
+  return next;
+};
--- a/autogpt_platform/frontend/src/app/(platform)/build/stores/nodeStore.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/stores/nodeStore.ts
@@ -10,6 +10,8 @@ import {
 import { Node } from "@/app/api/__generated__/models/node";
 import { AgentExecutionStatus } from "@/app/api/__generated__/models/agentExecutionStatus";
 import { NodeExecutionResult } from "@/app/api/__generated__/models/nodeExecutionResult";
+import { NodeExecutionResultInputData } from "@/app/api/__generated__/models/nodeExecutionResultInputData";
+import { NodeExecutionResultOutputData } from "@/app/api/__generated__/models/nodeExecutionResultOutputData";
 import { useHistoryStore } from "./historyStore";
 import { useEdgeStore } from "./edgeStore";
 import { BlockUIType } from "../components/types";
@@ -18,31 +20,10 @@ import {
  ensurePathExists,
  parseHandleIdToPath,
 } from "@/components/renderers/InputRenderer/helpers";
-import { IncompatibilityInfo } from "../hooks/useSubAgentUpdate/types";
+import { accumulateExecutionData } from "./helpers";
+import { NodeResolutionData } from "./types";

-// Resolution mode data stored per node
-export type NodeResolutionData = {
-  incompatibilities: IncompatibilityInfo;
-  // The NEW schema from the update (what we're updating TO)
-  pendingUpdate: {
-    input_schema: Record<string, unknown>;
-    output_schema: Record<string, unknown>;
-  };
-  // The OLD schema before the update (what we're updating FROM)
-  // Needed to merge and show removed inputs during resolution
-  currentSchema: {
-    input_schema: Record<string, unknown>;
-    output_schema: Record<string, unknown>;
-  };
-  // The full updated hardcoded values to apply when resolution completes
-  pendingHardcodedValues: Record<string, unknown>;
-};
-
-// Minimum movement (in pixels) required before logging position change to history
-// Prevents spamming history with small movements when clicking on inputs inside blocks
 const MINIMUM_MOVE_BEFORE_LOG = 50;
-
-// Track initial positions when drag starts (outside store to avoid re-renders)
 const dragStartPositions: Record<string, XYPosition> = {};

 let dragStartState: { nodes: CustomNode[]; edges: CustomEdge[] } | null = null;
@@ -52,6 +33,15 @@ type NodeStore = {
  nodeCounter: number;
  setNodeCounter: (nodeCounter: number) => void;
  nodeAdvancedStates: Record<string, boolean>;
+
+  latestNodeInputData: Record<string, NodeExecutionResultInputData | undefined>;
+  latestNodeOutputData: Record<
+    string,
+    NodeExecutionResultOutputData | undefined
+  >;
+  accumulatedNodeInputData: Record<string, Record<string, unknown[]>>;
+  accumulatedNodeOutputData: Record<string, Record<string, unknown[]>>;
+
  setNodes: (nodes: CustomNode[]) => void;
  onNodesChange: (changes: NodeChange<CustomNode>[]) => void;
  addNode: (node: CustomNode) => void;
@@ -72,12 +62,26 @@ type NodeStore = {

  updateNodeStatus: (nodeId: string, status: AgentExecutionStatus) => void;
  getNodeStatus: (nodeId: string) => AgentExecutionStatus | undefined;
+  cleanNodesStatuses: () => void;

  updateNodeExecutionResult: (
    nodeId: string,
    result: NodeExecutionResult,
  ) => void;
-  getNodeExecutionResult: (nodeId: string) => NodeExecutionResult | undefined;
+  getNodeExecutionResults: (nodeId: string) => NodeExecutionResult[];
+  getLatestNodeInputData: (
+    nodeId: string,
+  ) => NodeExecutionResultInputData | undefined;
+  getLatestNodeOutputData: (
+    nodeId: string,
+  ) => NodeExecutionResultOutputData | undefined;
+  getAccumulatedNodeInputData: (nodeId: string) => Record<string, unknown[]>;
+  getAccumulatedNodeOutputData: (nodeId: string) => Record<string, unknown[]>;
+  getLatestNodeExecutionResult: (
+    nodeId: string,
+  ) => NodeExecutionResult | undefined;
+  clearAllNodeExecutionResults: () => void;
+
  getNodeBlockUIType: (nodeId: string) => BlockUIType;
  hasWebhookNodes: () => boolean;

@@ -122,6 +126,10 @@ export const useNodeStore = create<NodeStore>((set, get) => ({
  nodeCounter: 0,
  setNodeCounter: (nodeCounter) => set({ nodeCounter }),
  nodeAdvancedStates: {},
+  latestNodeInputData: {},
+  latestNodeOutputData: {},
+  accumulatedNodeInputData: {},
+  accumulatedNodeOutputData: {},
  incrementNodeCounter: () =>
    set((state) => ({
      nodeCounter: state.nodeCounter + 1,
@@ -317,17 +325,162 @@ export const useNodeStore = create<NodeStore>((set, get) => ({
    return get().nodes.find((n) => n.id === nodeId)?.data?.status;
  },

-  updateNodeExecutionResult: (nodeId: string, result: NodeExecutionResult) => {
+  cleanNodesStatuses: () => {
    set((state) => ({
-      nodes: state.nodes.map((n) =>
-        n.id === nodeId
-          ? { ...n, data: { ...n.data, nodeExecutionResult: result } }
-          : n,
-      ),
+      nodes: state.nodes.map((n) => ({
+        ...n,
+        data: { ...n.data, status: undefined },
+      })),
    }));
  },
-  getNodeExecutionResult: (nodeId: string) => {
-    return get().nodes.find((n) => n.id === nodeId)?.data?.nodeExecutionResult;
+
+  updateNodeExecutionResult: (nodeId: string, result: NodeExecutionResult) => {
+    set((state) => {
+      let latestNodeInputData = state.latestNodeInputData;
+      let latestNodeOutputData = state.latestNodeOutputData;
+      let accumulatedNodeInputData = state.accumulatedNodeInputData;
+      let accumulatedNodeOutputData = state.accumulatedNodeOutputData;
+
+      const nodes = state.nodes.map((n) => {
+        if (n.id !== nodeId) return n;
+
+        const existingResults = n.data.nodeExecutionResults || [];
+        const duplicateIndex = existingResults.findIndex(
+          (r) => r.node_exec_id === result.node_exec_id,
+        );
+
+        if (duplicateIndex !== -1) {
+          const oldResult = existingResults[duplicateIndex];
+          const inputDataChanged =
+            JSON.stringify(oldResult.input_data) !==
+            JSON.stringify(result.input_data);
+          const outputDataChanged =
+            JSON.stringify(oldResult.output_data) !==
+            JSON.stringify(result.output_data);
+
+          if (!inputDataChanged && !outputDataChanged) {
+            return n;
+          }
+
+          const updatedResults = [...existingResults];
+          updatedResults[duplicateIndex] = result;
+
+          const recomputedAccumulatedInput = updatedResults.reduce(
+            (acc, r) => accumulateExecutionData(acc, r.input_data),
+            {} as Record<string, unknown[]>,
+          );
+          const recomputedAccumulatedOutput = updatedResults.reduce(
+            (acc, r) => accumulateExecutionData(acc, r.output_data),
+            {} as Record<string, unknown[]>,
+          );
+
+          const mostRecentResult = updatedResults[updatedResults.length - 1];
+          latestNodeInputData = {
+            ...latestNodeInputData,
+            [nodeId]: mostRecentResult.input_data,
+          };
+          latestNodeOutputData = {
+            ...latestNodeOutputData,
+            [nodeId]: mostRecentResult.output_data,
+          };
+
+          accumulatedNodeInputData = {
+            ...accumulatedNodeInputData,
+            [nodeId]: recomputedAccumulatedInput,
+          };
+          accumulatedNodeOutputData = {
+            ...accumulatedNodeOutputData,
+            [nodeId]: recomputedAccumulatedOutput,
+          };
+
+          return {
+            ...n,
+            data: {
+              ...n.data,
+              nodeExecutionResults: updatedResults,
+            },
+          };
+        }
+
+        accumulatedNodeInputData = {
+          ...accumulatedNodeInputData,
+          [nodeId]: accumulateExecutionData(
+            accumulatedNodeInputData[nodeId] || {},
+            result.input_data,
+          ),
+        };
+        accumulatedNodeOutputData = {
+          ...accumulatedNodeOutputData,
+          [nodeId]: accumulateExecutionData(
+            accumulatedNodeOutputData[nodeId] || {},
+            result.output_data,
+          ),
+        };
+
+        latestNodeInputData = {
+          ...latestNodeInputData,
+          [nodeId]: result.input_data,
+        };
+        latestNodeOutputData = {
+          ...latestNodeOutputData,
+          [nodeId]: result.output_data,
+        };
+
+        return {
+          ...n,
+          data: {
+            ...n.data,
+            nodeExecutionResults: [...existingResults, result],
+          },
+        };
+      });
+
+      return {
+        nodes,
+        latestNodeInputData,
+        latestNodeOutputData,
+        accumulatedNodeInputData,
+        accumulatedNodeOutputData,
+      };
+    });
+  },
+  getNodeExecutionResults: (nodeId: string) => {
+    return (
+      get().nodes.find((n) => n.id === nodeId)?.data?.nodeExecutionResults || []
+    );
+  },
+  getLatestNodeInputData: (nodeId: string) => {
+    return get().latestNodeInputData[nodeId];
+  },
+  getLatestNodeOutputData: (nodeId: string) => {
+    return get().latestNodeOutputData[nodeId];
+  },
+  getAccumulatedNodeInputData: (nodeId: string) => {
+    return get().accumulatedNodeInputData[nodeId] || {};
+  },
+  getAccumulatedNodeOutputData: (nodeId: string) => {
+    return get().accumulatedNodeOutputData[nodeId] || {};
+  },
+  getLatestNodeExecutionResult: (nodeId: string) => {
+    const results =
+      get().nodes.find((n) => n.id === nodeId)?.data?.nodeExecutionResults ||
+      [];
+    return results.length > 0 ? results[results.length - 1] : undefined;
+  },
+  clearAllNodeExecutionResults: () => {
+    set((state) => ({
+      nodes: state.nodes.map((n) => ({
+        ...n,
+        data: {
+          ...n.data,
+          nodeExecutionResults: [],
+        },
+      })),
+      latestNodeInputData: {},
+      latestNodeOutputData: {},
+      accumulatedNodeInputData: {},
+      accumulatedNodeOutputData: {},
+    }));
  },
  getNodeBlockUIType: (nodeId: string) => {
    return (
--- a/autogpt_platform/frontend/src/app/(platform)/build/stores/types.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/build/stores/types.ts
@@ -0,0 +1,14 @@
+import { IncompatibilityInfo } from "../hooks/useSubAgentUpdate/types";
+
+export type NodeResolutionData = {
+  incompatibilities: IncompatibilityInfo;
+  pendingUpdate: {
+    input_schema: Record<string, unknown>;
+    output_schema: Record<string, unknown>;
+  };
+  currentSchema: {
+    input_schema: Record<string, unknown>;
+    output_schema: Record<string, unknown>;
+  };
+  pendingHardcodedValues: Record<string, unknown>;
+};
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/Chat.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/Chat.tsx
@@ -1,134 +0,0 @@
-"use client";
-
-import { Button } from "@/components/atoms/Button/Button";
-import { Text } from "@/components/atoms/Text/Text";
-import { cn } from "@/lib/utils";
-import { List } from "@phosphor-icons/react";
-import React, { useState } from "react";
-import { ChatContainer } from "./components/ChatContainer/ChatContainer";
-import { ChatErrorState } from "./components/ChatErrorState/ChatErrorState";
-import { ChatLoadingState } from "./components/ChatLoadingState/ChatLoadingState";
-import { SessionsDrawer } from "./components/SessionsDrawer/SessionsDrawer";
-import { useChat } from "./useChat";
-
-export interface ChatProps {
-  className?: string;
-  headerTitle?: React.ReactNode;
-  showHeader?: boolean;
-  showSessionInfo?: boolean;
-  showNewChatButton?: boolean;
-  onNewChat?: () => void;
-  headerActions?: React.ReactNode;
-}
-
-export function Chat({
-  className,
-  headerTitle = "AutoGPT Copilot",
-  showHeader = true,
-  showSessionInfo = true,
-  showNewChatButton = true,
-  onNewChat,
-  headerActions,
-}: ChatProps) {
-  const {
-    messages,
-    isLoading,
-    isCreating,
-    error,
-    sessionId,
-    createSession,
-    clearSession,
-    loadSession,
-  } = useChat();
-
-  const [isSessionsDrawerOpen, setIsSessionsDrawerOpen] = useState(false);
-
-  const handleNewChat = () => {
-    clearSession();
-    onNewChat?.();
-  };
-
-  const handleSelectSession = async (sessionId: string) => {
-    try {
-      await loadSession(sessionId);
-    } catch (err) {
-      console.error("Failed to load session:", err);
-    }
-  };
-
-  return (
-    <div className={cn("flex h-full flex-col", className)}>
-      {/* Header */}
-      {showHeader && (
-        <header className="shrink-0 border-t border-zinc-200 bg-white p-3">
-          <div className="flex items-center justify-between">
-            <div className="flex items-center gap-3">
-              <button
-                aria-label="View sessions"
-                onClick={() => setIsSessionsDrawerOpen(true)}
-                className="flex size-8 items-center justify-center rounded hover:bg-zinc-100"
-              >
-                <List width="1.25rem" height="1.25rem" />
-              </button>
-              {typeof headerTitle === "string" ? (
-                <Text variant="h2" className="text-lg font-semibold">
-                  {headerTitle}
-                </Text>
-              ) : (
-                headerTitle
-              )}
-            </div>
-            <div className="flex items-center gap-3">
-              {showSessionInfo && sessionId && (
-                <>
-                  {showNewChatButton && (
-                    <Button
-                      variant="outline"
-                      size="small"
-                      onClick={handleNewChat}
-                    >
-                      New Chat
-                    </Button>
-                  )}
-                </>
-              )}
-              {headerActions}
-            </div>
-          </div>
-        </header>
-      )}
-
-      {/* Main Content */}
-      <main className="flex min-h-0 flex-1 flex-col overflow-hidden">
-        {/* Loading State - show when explicitly loading/creating OR when we don't have a session yet and no error */}
-        {(isLoading || isCreating || (!sessionId && !error)) && (
-          <ChatLoadingState
-            message={isCreating ? "Creating session..." : "Loading..."}
-          />
-        )}
-
-        {/* Error State */}
-        {error && !isLoading && (
-          <ChatErrorState error={error} onRetry={createSession} />
-        )}
-
-        {/* Session Content */}
-        {sessionId && !isLoading && !error && (
-          <ChatContainer
-            sessionId={sessionId}
-            initialMessages={messages}
-            className="flex-1"
-          />
-        )}
-      </main>
-
-      {/* Sessions Drawer */}
-      <SessionsDrawer
-        isOpen={isSessionsDrawerOpen}
-        onClose={() => setIsSessionsDrawerOpen(false)}
-        onSelectSession={handleSelectSession}
-        currentSessionId={sessionId}
-      />
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ChatContainer/ChatContainer.tsx
@@ -1,88 +0,0 @@
-import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
-import { cn } from "@/lib/utils";
-import { useCallback } from "react";
-import { usePageContext } from "../../usePageContext";
-import { ChatInput } from "../ChatInput/ChatInput";
-import { MessageList } from "../MessageList/MessageList";
-import { QuickActionsWelcome } from "../QuickActionsWelcome/QuickActionsWelcome";
-import { useChatContainer } from "./useChatContainer";
-
-export interface ChatContainerProps {
-  sessionId: string | null;
-  initialMessages: SessionDetailResponse["messages"];
-  className?: string;
-}
-
-export function ChatContainer({
-  sessionId,
-  initialMessages,
-  className,
-}: ChatContainerProps) {
-  const { messages, streamingChunks, isStreaming, sendMessage } =
-    useChatContainer({
-      sessionId,
-      initialMessages,
-    });
-  const { capturePageContext } = usePageContext();
-
-  // Wrap sendMessage to automatically capture page context
-  const sendMessageWithContext = useCallback(
-    async (content: string, isUserMessage: boolean = true) => {
-      const context = capturePageContext();
-      await sendMessage(content, isUserMessage, context);
-    },
-    [sendMessage, capturePageContext],
-  );
-
-  const quickActions = [
-    "Find agents for social media management",
-    "Show me agents for content creation",
-    "Help me automate my business",
-    "What can you help me with?",
-  ];
-
-  return (
-    <div
-      className={cn("flex h-full min-h-0 flex-col", className)}
-      style={{
-        backgroundColor: "#ffffff",
-        backgroundImage:
-          "radial-gradient(#e5e5e5 0.5px, transparent 0.5px), radial-gradient(#e5e5e5 0.5px, #ffffff 0.5px)",
-        backgroundSize: "20px 20px",
-        backgroundPosition: "0 0, 10px 10px",
-      }}
-    >
-      {/* Messages or Welcome Screen */}
-      <div className="flex min-h-0 flex-1 flex-col overflow-hidden pb-24">
-        {messages.length === 0 ? (
-          <QuickActionsWelcome
-            title="Welcome to AutoGPT Copilot"
-            description="Start a conversation to discover and run AI agents."
-            actions={quickActions}
-            onActionClick={sendMessageWithContext}
-            disabled={isStreaming || !sessionId}
-          />
-        ) : (
-          <MessageList
-            messages={messages}
-            streamingChunks={streamingChunks}
-            isStreaming={isStreaming}
-            onSendMessage={sendMessageWithContext}
-            className="flex-1"
-          />
-        )}
-      </div>
-
-      {/* Input - Always visible */}
-      <div className="fixed bottom-0 left-0 right-0 z-50 border-t border-zinc-200 bg-white p-4">
-        <ChatInput
-          onSend={sendMessageWithContext}
-          disabled={isStreaming || !sessionId}
-          placeholder={
-            sessionId ? "Type your message..." : "Creating session..."
-          }
-        />
-      </div>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ChatInput/ChatInput.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ChatInput/ChatInput.tsx
@@ -1,64 +0,0 @@
-import { Input } from "@/components/atoms/Input/Input";
-import { cn } from "@/lib/utils";
-import { ArrowUpIcon } from "@phosphor-icons/react";
-import { useChatInput } from "./useChatInput";
-
-export interface ChatInputProps {
-  onSend: (message: string) => void;
-  disabled?: boolean;
-  placeholder?: string;
-  className?: string;
-}
-
-export function ChatInput({
-  onSend,
-  disabled = false,
-  placeholder = "Type your message...",
-  className,
-}: ChatInputProps) {
-  const inputId = "chat-input";
-  const { value, setValue, handleKeyDown, handleSend } = useChatInput({
-    onSend,
-    disabled,
-    maxRows: 5,
-    inputId,
-  });
-
-  return (
-    <div className={cn("relative flex-1", className)}>
-      <Input
-        id={inputId}
-        label="Chat message input"
-        hideLabel
-        type="textarea"
-        value={value}
-        onChange={(e) => setValue(e.target.value)}
-        onKeyDown={handleKeyDown}
-        placeholder={placeholder}
-        disabled={disabled}
-        rows={1}
-        wrapperClassName="mb-0 relative"
-        className="pr-12"
-      />
-      <span id="chat-input-hint" className="sr-only">
-        Press Enter to send, Shift+Enter for new line
-      </span>
-
-      <button
-        onClick={handleSend}
-        disabled={disabled || !value.trim()}
-        className={cn(
-          "absolute right-3 top-1/2 flex h-8 w-8 -translate-y-1/2 items-center justify-center rounded-full",
-          "border border-zinc-800 bg-zinc-800 text-white",
-          "hover:border-zinc-900 hover:bg-zinc-900",
-          "disabled:border-zinc-200 disabled:bg-zinc-200 disabled:text-white disabled:opacity-50",
-          "transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-neutral-950",
-          "disabled:pointer-events-none",
-        )}
-        aria-label="Send message"
-      >
-        <ArrowUpIcon className="h-3 w-3" weight="bold" />
-      </button>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ChatInput/useChatInput.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ChatInput/useChatInput.ts
@@ -1,60 +0,0 @@
-import { KeyboardEvent, useCallback, useEffect, useState } from "react";
-
-interface UseChatInputArgs {
-  onSend: (message: string) => void;
-  disabled?: boolean;
-  maxRows?: number;
-  inputId?: string;
-}
-
-export function useChatInput({
-  onSend,
-  disabled = false,
-  maxRows = 5,
-  inputId = "chat-input",
-}: UseChatInputArgs) {
-  const [value, setValue] = useState("");
-
-  useEffect(() => {
-    const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
-    if (!textarea) return;
-    textarea.style.height = "auto";
-    const lineHeight = parseInt(
-      window.getComputedStyle(textarea).lineHeight,
-      10,
-    );
-    const maxHeight = lineHeight * maxRows;
-    const newHeight = Math.min(textarea.scrollHeight, maxHeight);
-    textarea.style.height = `${newHeight}px`;
-    textarea.style.overflowY =
-      textarea.scrollHeight > maxHeight ? "auto" : "hidden";
-  }, [value, maxRows, inputId]);
-
-  const handleSend = useCallback(() => {
-    if (disabled || !value.trim()) return;
-    onSend(value.trim());
-    setValue("");
-    const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
-    if (textarea) {
-      textarea.style.height = "auto";
-    }
-  }, [value, onSend, disabled, inputId]);
-
-  const handleKeyDown = useCallback(
-    (event: KeyboardEvent<HTMLInputElement | HTMLTextAreaElement>) => {
-      if (event.key === "Enter" && !event.shiftKey) {
-        event.preventDefault();
-        handleSend();
-      }
-      // Shift+Enter allows default behavior (new line) - no need to handle explicitly
-    },
-    [handleSend],
-  );
-
-  return {
-    value,
-    setValue,
-    handleKeyDown,
-    handleSend,
-  };
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/MessageList/MessageList.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/MessageList/MessageList.tsx
@@ -1,121 +0,0 @@
-"use client";
-
-import { cn } from "@/lib/utils";
-import { ChatMessage } from "../ChatMessage/ChatMessage";
-import type { ChatMessageData } from "../ChatMessage/useChatMessage";
-import { StreamingMessage } from "../StreamingMessage/StreamingMessage";
-import { ThinkingMessage } from "../ThinkingMessage/ThinkingMessage";
-import { useMessageList } from "./useMessageList";
-
-export interface MessageListProps {
-  messages: ChatMessageData[];
-  streamingChunks?: string[];
-  isStreaming?: boolean;
-  className?: string;
-  onStreamComplete?: () => void;
-  onSendMessage?: (content: string) => void;
-}
-
-export function MessageList({
-  messages,
-  streamingChunks = [],
-  isStreaming = false,
-  className,
-  onStreamComplete,
-  onSendMessage,
-}: MessageListProps) {
-  const { messagesEndRef, messagesContainerRef } = useMessageList({
-    messageCount: messages.length,
-    isStreaming,
-  });
-
-  return (
-    <div
-      ref={messagesContainerRef}
-      className={cn(
-        "flex-1 overflow-y-auto",
-        "scrollbar-thin scrollbar-track-transparent scrollbar-thumb-zinc-300",
-        className,
-      )}
-    >
-      <div className="mx-auto flex max-w-3xl flex-col py-4">
-        {/* Render all persisted messages */}
-        {messages.map((message, index) => {
-          // Check if current message is an agent_output tool_response
-          // and if previous message is an assistant message
-          let agentOutput: ChatMessageData | undefined;
-
-          if (message.type === "tool_response" && message.result) {
-            let parsedResult: Record<string, unknown> | null = null;
-            try {
-              parsedResult =
-                typeof message.result === "string"
-                  ? JSON.parse(message.result)
-                  : (message.result as Record<string, unknown>);
-            } catch {
-              parsedResult = null;
-            }
-            if (parsedResult?.type === "agent_output") {
-              const prevMessage = messages[index - 1];
-              if (
-                prevMessage &&
-                prevMessage.type === "message" &&
-                prevMessage.role === "assistant"
-              ) {
-                // This agent output will be rendered inside the previous assistant message
-                // Skip rendering this message separately
-                return null;
-              }
-            }
-          }
-
-          // Check if next message is an agent_output tool_response to include in current assistant message
-          if (message.type === "message" && message.role === "assistant") {
-            const nextMessage = messages[index + 1];
-            if (
-              nextMessage &&
-              nextMessage.type === "tool_response" &&
-              nextMessage.result
-            ) {
-              let parsedResult: Record<string, unknown> | null = null;
-              try {
-                parsedResult =
-                  typeof nextMessage.result === "string"
-                    ? JSON.parse(nextMessage.result)
-                    : (nextMessage.result as Record<string, unknown>);
-              } catch {
-                parsedResult = null;
-              }
-              if (parsedResult?.type === "agent_output") {
-                agentOutput = nextMessage;
-              }
-            }
-          }
-
-          return (
-            <ChatMessage
-              key={index}
-              message={message}
-              onSendMessage={onSendMessage}
-              agentOutput={agentOutput}
-            />
-          );
-        })}
-
-        {/* Render thinking message when streaming but no chunks yet */}
-        {isStreaming && streamingChunks.length === 0 && <ThinkingMessage />}
-
-        {/* Render streaming message if active */}
-        {isStreaming && streamingChunks.length > 0 && (
-          <StreamingMessage
-            chunks={streamingChunks}
-            onComplete={onStreamComplete}
-          />
-        )}
-
-        {/* Invisible div to scroll to */}
-        <div ref={messagesEndRef} />
-      </div>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ThinkingMessage/ThinkingMessage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ThinkingMessage/ThinkingMessage.tsx
@@ -1,70 +0,0 @@
-import { cn } from "@/lib/utils";
-import { RobotIcon } from "@phosphor-icons/react";
-import { useEffect, useRef, useState } from "react";
-import { MessageBubble } from "../MessageBubble/MessageBubble";
-
-export interface ThinkingMessageProps {
-  className?: string;
-}
-
-export function ThinkingMessage({ className }: ThinkingMessageProps) {
-  const [showSlowLoader, setShowSlowLoader] = useState(false);
-  const timerRef = useRef<NodeJS.Timeout | null>(null);
-
-  useEffect(() => {
-    if (timerRef.current === null) {
-      timerRef.current = setTimeout(() => {
-        setShowSlowLoader(true);
-      }, 8000);
-    }
-
-    return () => {
-      if (timerRef.current) {
-        clearTimeout(timerRef.current);
-        timerRef.current = null;
-      }
-    };
-  }, []);
-
-  return (
-    <div
-      className={cn(
-        "group relative flex w-full justify-start gap-3 px-4 py-3",
-        className,
-      )}
-    >
-      <div className="flex w-full max-w-3xl gap-3">
-        <div className="flex-shrink-0">
-          <div className="flex h-7 w-7 items-center justify-center rounded-lg bg-indigo-500">
-            <RobotIcon className="h-4 w-4 text-indigo-50" />
-          </div>
-        </div>
-
-        <div className="flex min-w-0 flex-1 flex-col">
-          <MessageBubble variant="assistant">
-            <div className="transition-all duration-500 ease-in-out">
-              {showSlowLoader ? (
-                <div className="flex flex-col items-center gap-3 py-2">
-                  <div className="loader" style={{ flexShrink: 0 }} />
-                  <p className="text-sm text-slate-700">
-                    Taking a bit longer to think, wait a moment please
-                  </p>
-                </div>
-              ) : (
-                <span
-                  className="inline-block bg-gradient-to-r from-neutral-400 via-neutral-600 to-neutral-400 bg-clip-text text-transparent"
-                  style={{
-                    backgroundSize: "200% 100%",
-                    animation: "shimmer 2s ease-in-out infinite",
-                  }}
-                >
-                  Thinking...
-                </span>
-              )}
-            </div>
-          </MessageBubble>
-        </div>
-      </div>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ToolCallMessage/ToolCallMessage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ToolCallMessage/ToolCallMessage.tsx
@@ -1,24 +0,0 @@
-import { Text } from "@/components/atoms/Text/Text";
-import { cn } from "@/lib/utils";
-import { WrenchIcon } from "@phosphor-icons/react";
-import { getToolActionPhrase } from "../../helpers";
-
-export interface ToolCallMessageProps {
-  toolName: string;
-  className?: string;
-}
-
-export function ToolCallMessage({ toolName, className }: ToolCallMessageProps) {
-  return (
-    <div className={cn("flex items-center justify-center gap-2", className)}>
-      <WrenchIcon
-        size={14}
-        weight="bold"
-        className="flex-shrink-0 text-neutral-500"
-      />
-      <Text variant="small" className="text-neutral-500">
-        {getToolActionPhrase(toolName)}...
-      </Text>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx
@@ -1,260 +0,0 @@
-import { Text } from "@/components/atoms/Text/Text";
-import "@/components/contextual/OutputRenderers";
-import {
-  globalRegistry,
-  OutputItem,
-} from "@/components/contextual/OutputRenderers";
-import { cn } from "@/lib/utils";
-import type { ToolResult } from "@/types/chat";
-import { WrenchIcon } from "@phosphor-icons/react";
-import { getToolActionPhrase } from "../../helpers";
-
-export interface ToolResponseMessageProps {
-  toolName: string;
-  result?: ToolResult;
-  success?: boolean;
-  className?: string;
-}
-
-export function ToolResponseMessage({
-  toolName,
-  result,
-  success: _success = true,
-  className,
-}: ToolResponseMessageProps) {
-  if (!result) {
-    return (
-      <div className={cn("flex items-center justify-center gap-2", className)}>
-        <WrenchIcon
-          size={14}
-          weight="bold"
-          className="flex-shrink-0 text-neutral-500"
-        />
-        <Text variant="small" className="text-neutral-500">
-          {getToolActionPhrase(toolName)}...
-        </Text>
-      </div>
-    );
-  }
-
-  let parsedResult: Record<string, unknown> | null = null;
-  try {
-    parsedResult =
-      typeof result === "string"
-        ? JSON.parse(result)
-        : (result as Record<string, unknown>);
-  } catch {
-    parsedResult = null;
-  }
-
-  if (parsedResult && typeof parsedResult === "object") {
-    const responseType = parsedResult.type as string | undefined;
-
-    if (responseType === "agent_output") {
-      const execution = parsedResult.execution as
-        | {
-            outputs?: Record<string, unknown[]>;
-          }
-        | null
-        | undefined;
-      const outputs = execution?.outputs || {};
-      const message = parsedResult.message as string | undefined;
-
-      return (
-        <div className={cn("space-y-4 px-4 py-2", className)}>
-          <div className="flex items-center gap-2">
-            <WrenchIcon
-              size={14}
-              weight="bold"
-              className="flex-shrink-0 text-neutral-500"
-            />
-            <Text variant="small" className="text-neutral-500">
-              {getToolActionPhrase(toolName)}
-            </Text>
-          </div>
-          {message && (
-            <div className="rounded border p-4">
-              <Text variant="small" className="text-neutral-600">
-                {message}
-              </Text>
-            </div>
-          )}
-          {Object.keys(outputs).length > 0 && (
-            <div className="space-y-4">
-              {Object.entries(outputs).map(([outputName, values]) =>
-                values.map((value, index) => {
-                  const renderer = globalRegistry.getRenderer(value);
-                  if (renderer) {
-                    return (
-                      <OutputItem
-                        key={`${outputName}-${index}`}
-                        value={value}
-                        renderer={renderer}
-                        label={outputName}
-                      />
-                    );
-                  }
-                  return (
-                    <div
-                      key={`${outputName}-${index}`}
-                      className="rounded border p-4"
-                    >
-                      <Text variant="large-medium" className="mb-2 capitalize">
-                        {outputName}
-                      </Text>
-                      <pre className="overflow-auto text-sm">
-                        {JSON.stringify(value, null, 2)}
-                      </pre>
-                    </div>
-                  );
-                }),
-              )}
-            </div>
-          )}
-        </div>
-      );
-    }
-
-    if (responseType === "block_output" && parsedResult.outputs) {
-      const outputs = parsedResult.outputs as Record<string, unknown[]>;
-
-      return (
-        <div className={cn("space-y-4 px-4 py-2", className)}>
-          <div className="flex items-center gap-2">
-            <WrenchIcon
-              size={14}
-              weight="bold"
-              className="flex-shrink-0 text-neutral-500"
-            />
-            <Text variant="small" className="text-neutral-500">
-              {getToolActionPhrase(toolName)}
-            </Text>
-          </div>
-          <div className="space-y-4">
-            {Object.entries(outputs).map(([outputName, values]) =>
-              values.map((value, index) => {
-                const renderer = globalRegistry.getRenderer(value);
-                if (renderer) {
-                  return (
-                    <OutputItem
-                      key={`${outputName}-${index}`}
-                      value={value}
-                      renderer={renderer}
-                      label={outputName}
-                    />
-                  );
-                }
-                return (
-                  <div
-                    key={`${outputName}-${index}`}
-                    className="rounded border p-4"
-                  >
-                    <Text variant="large-medium" className="mb-2 capitalize">
-                      {outputName}
-                    </Text>
-                    <pre className="overflow-auto text-sm">
-                      {JSON.stringify(value, null, 2)}
-                    </pre>
-                  </div>
-                );
-              }),
-            )}
-          </div>
-        </div>
-      );
-    }
-
-    // Handle other response types with a message field (e.g., understanding_updated)
-    if (parsedResult.message && typeof parsedResult.message === "string") {
-      // Format tool name from snake_case to Title Case
-      const formattedToolName = toolName
-        .split("_")
-        .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
-        .join(" ");
-
-      // Clean up message - remove incomplete user_name references
-      let cleanedMessage = parsedResult.message;
-      // Remove "Updated understanding with: user_name" pattern if user_name is just a placeholder
-      cleanedMessage = cleanedMessage.replace(
-        /Updated understanding with:\s*user_name\.?\s*/gi,
-        "",
-      );
-      // Remove standalone user_name references
-      cleanedMessage = cleanedMessage.replace(/\buser_name\b\.?\s*/gi, "");
-      cleanedMessage = cleanedMessage.trim();
-
-      // Only show message if it has content after cleaning
-      if (!cleanedMessage) {
-        return (
-          <div
-            className={cn(
-              "flex items-center justify-center gap-2 px-4 py-2",
-              className,
-            )}
-          >
-            <WrenchIcon
-              size={14}
-              weight="bold"
-              className="flex-shrink-0 text-neutral-500"
-            />
-            <Text variant="small" className="text-neutral-500">
-              {formattedToolName}
-            </Text>
-          </div>
-        );
-      }
-
-      return (
-        <div className={cn("space-y-2 px-4 py-2", className)}>
-          <div className="flex items-center justify-center gap-2">
-            <WrenchIcon
-              size={14}
-              weight="bold"
-              className="flex-shrink-0 text-neutral-500"
-            />
-            <Text variant="small" className="text-neutral-500">
-              {formattedToolName}
-            </Text>
-          </div>
-          <div className="rounded border p-4">
-            <Text variant="small" className="text-neutral-600">
-              {cleanedMessage}
-            </Text>
-          </div>
-        </div>
-      );
-    }
-  }
-
-  const renderer = globalRegistry.getRenderer(result);
-  if (renderer) {
-    return (
-      <div className={cn("px-4 py-2", className)}>
-        <div className="mb-2 flex items-center gap-2">
-          <WrenchIcon
-            size={14}
-            weight="bold"
-            className="flex-shrink-0 text-neutral-500"
-          />
-          <Text variant="small" className="text-neutral-500">
-            {getToolActionPhrase(toolName)}
-          </Text>
-        </div>
-        <OutputItem value={result} renderer={renderer} />
-      </div>
-    );
-  }
-
-  return (
-    <div className={cn("flex items-center justify-center gap-2", className)}>
-      <WrenchIcon
-        size={14}
-        weight="bold"
-        className="flex-shrink-0 text-neutral-500"
-      />
-      <Text variant="small" className="text-neutral-500">
-        {getToolActionPhrase(toolName)}...
-      </Text>
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/helpers.ts
@@ -1,66 +0,0 @@
-/**
- * Maps internal tool names to user-friendly display names with emojis.
- * @deprecated Use getToolActionPhrase or getToolCompletionPhrase for status messages
- *
- * @param toolName - The internal tool name from the backend
- * @returns A user-friendly display name with an emoji prefix
- */
-export function getToolDisplayName(toolName: string): string {
-  const toolDisplayNames: Record<string, string> = {
-    find_agent: "🔍 Search Marketplace",
-    get_agent_details: "📋 Get Agent Details",
-    check_credentials: "🔑 Check Credentials",
-    setup_agent: "⚙️ Setup Agent",
-    run_agent: "▶️ Run Agent",
-    get_required_setup_info: "📝 Get Setup Requirements",
-  };
-  return toolDisplayNames[toolName] || toolName;
-}
-
-/**
- * Maps internal tool names to human-friendly action phrases (present continuous).
- * Used for tool call messages to indicate what action is currently happening.
- *
- * @param toolName - The internal tool name from the backend
- * @returns A human-friendly action phrase in present continuous tense
- */
-export function getToolActionPhrase(toolName: string): string {
-  const toolActionPhrases: Record<string, string> = {
-    find_agent: "Looking for agents in the marketplace",
-    agent_carousel: "Looking for agents in the marketplace",
-    get_agent_details: "Learning about the agent",
-    check_credentials: "Checking your credentials",
-    setup_agent: "Setting up the agent",
-    execution_started: "Running the agent",
-    run_agent: "Running the agent",
-    get_required_setup_info: "Getting setup requirements",
-    schedule_agent: "Scheduling the agent to run",
-  };
-
-  // Return mapped phrase or generate human-friendly fallback
-  return toolActionPhrases[toolName] || toolName;
-}
-
-/**
- * Maps internal tool names to human-friendly completion phrases (past tense).
- * Used for tool response messages to indicate what action was completed.
- *
- * @param toolName - The internal tool name from the backend
- * @returns A human-friendly completion phrase in past tense
- */
-export function getToolCompletionPhrase(toolName: string): string {
-  const toolCompletionPhrases: Record<string, string> = {
-    find_agent: "Finished searching the marketplace",
-    get_agent_details: "Got agent details",
-    check_credentials: "Checked credentials",
-    setup_agent: "Agent setup complete",
-    run_agent: "Agent execution started",
-    get_required_setup_info: "Got setup requirements",
-  };
-
-  // Return mapped phrase or generate human-friendly fallback
-  return (
-    toolCompletionPhrases[toolName] ||
-    `Finished ${toolName.replace(/_/g, " ").replace("...", "")}`
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/useChatSession.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/components/Chat/useChatSession.ts
@@ -1,271 +0,0 @@
-import {
-  getGetV2GetSessionQueryKey,
-  getGetV2GetSessionQueryOptions,
-  postV2CreateSession,
-  useGetV2GetSession,
-  usePatchV2SessionAssignUser,
-  usePostV2CreateSession,
-} from "@/app/api/__generated__/endpoints/chat/chat";
-import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
-import { okData } from "@/app/api/helpers";
-import { isValidUUID } from "@/lib/utils";
-import { Key, storage } from "@/services/storage/local-storage";
-import { useQueryClient } from "@tanstack/react-query";
-import { useCallback, useEffect, useMemo, useRef, useState } from "react";
-import { toast } from "sonner";
-
-interface UseChatSessionArgs {
-  urlSessionId?: string | null;
-  autoCreate?: boolean;
-}
-
-export function useChatSession({
-  urlSessionId,
-  autoCreate = false,
-}: UseChatSessionArgs = {}) {
-  const queryClient = useQueryClient();
-  const [sessionId, setSessionId] = useState<string | null>(null);
-  const [error, setError] = useState<Error | null>(null);
-  const justCreatedSessionIdRef = useRef<string | null>(null);
-
-  useEffect(() => {
-    if (urlSessionId) {
-      if (!isValidUUID(urlSessionId)) {
-        console.error("Invalid session ID format:", urlSessionId);
-        toast.error("Invalid session ID", {
-          description:
-            "The session ID in the URL is not valid. Starting a new session...",
-        });
-        setSessionId(null);
-        storage.clean(Key.CHAT_SESSION_ID);
-        return;
-      }
-      setSessionId(urlSessionId);
-      storage.set(Key.CHAT_SESSION_ID, urlSessionId);
-    } else {
-      const storedSessionId = storage.get(Key.CHAT_SESSION_ID);
-      if (storedSessionId) {
-        if (!isValidUUID(storedSessionId)) {
-          console.error("Invalid stored session ID:", storedSessionId);
-          storage.clean(Key.CHAT_SESSION_ID);
-          setSessionId(null);
-        } else {
-          setSessionId(storedSessionId);
-        }
-      } else if (autoCreate) {
-        setSessionId(null);
-      }
-    }
-  }, [urlSessionId, autoCreate]);
-
-  const {
-    mutateAsync: createSessionMutation,
-    isPending: isCreating,
-    error: createError,
-  } = usePostV2CreateSession();
-
-  const {
-    data: sessionData,
-    isLoading: isLoadingSession,
-    error: loadError,
-    refetch,
-  } = useGetV2GetSession(sessionId || "", {
-    query: {
-      enabled: !!sessionId,
-      select: okData,
-      staleTime: Infinity, // Never mark as stale
-      refetchOnMount: false, // Don't refetch on component mount
-      refetchOnWindowFocus: false, // Don't refetch when window regains focus
-      refetchOnReconnect: false, // Don't refetch when network reconnects
-      retry: 1,
-    },
-  });
-
-  const { mutateAsync: claimSessionMutation } = usePatchV2SessionAssignUser();
-
-  const session = useMemo(() => {
-    if (sessionData) return sessionData;
-
-    if (sessionId && justCreatedSessionIdRef.current === sessionId) {
-      return {
-        id: sessionId,
-        user_id: null,
-        messages: [],
-        created_at: new Date().toISOString(),
-        updated_at: new Date().toISOString(),
-      } as SessionDetailResponse;
-    }
-    return null;
-  }, [sessionData, sessionId]);
-
-  const messages = session?.messages || [];
-  const isLoading = isCreating || isLoadingSession;
-
-  useEffect(() => {
-    if (createError) {
-      setError(
-        createError instanceof Error
-          ? createError
-          : new Error("Failed to create session"),
-      );
-    } else if (loadError) {
-      setError(
-        loadError instanceof Error
-          ? loadError
-          : new Error("Failed to load session"),
-      );
-    } else {
-      setError(null);
-    }
-  }, [createError, loadError]);
-
-  const createSession = useCallback(
-    async function createSession() {
-      try {
-        setError(null);
-        const response = await postV2CreateSession({
-          body: JSON.stringify({}),
-        });
-        if (response.status !== 200) {
-          throw new Error("Failed to create session");
-        }
-        const newSessionId = response.data.id;
-        setSessionId(newSessionId);
-        storage.set(Key.CHAT_SESSION_ID, newSessionId);
-        justCreatedSessionIdRef.current = newSessionId;
-        setTimeout(() => {
-          if (justCreatedSessionIdRef.current === newSessionId) {
-            justCreatedSessionIdRef.current = null;
-          }
-        }, 10000);
-        return newSessionId;
-      } catch (err) {
-        const error =
-          err instanceof Error ? err : new Error("Failed to create session");
-        setError(error);
-        toast.error("Failed to create chat session", {
-          description: error.message,
-        });
-        throw error;
-      }
-    },
-    [createSessionMutation],
-  );
-
-  const loadSession = useCallback(
-    async function loadSession(id: string) {
-      try {
-        setError(null);
-        // Invalidate the query cache for this session to force a fresh fetch
-        await queryClient.invalidateQueries({
-          queryKey: getGetV2GetSessionQueryKey(id),
-        });
-        // Set sessionId after invalidation to ensure the hook refetches
-        setSessionId(id);
-        storage.set(Key.CHAT_SESSION_ID, id);
-        // Force fetch with fresh data (bypass cache)
-        const queryOptions = getGetV2GetSessionQueryOptions(id, {
-          query: {
-            staleTime: 0, // Force fresh fetch
-            retry: 1,
-          },
-        });
-        const result = await queryClient.fetchQuery(queryOptions);
-        if (!result || ("status" in result && result.status !== 200)) {
-          console.warn("Session not found on server, clearing local state");
-          storage.clean(Key.CHAT_SESSION_ID);
-          setSessionId(null);
-          throw new Error("Session not found");
-        }
-      } catch (err) {
-        const error =
-          err instanceof Error ? err : new Error("Failed to load session");
-        setError(error);
-        throw error;
-      }
-    },
-    [queryClient],
-  );
-
-  const refreshSession = useCallback(
-    async function refreshSession() {
-      if (!sessionId) {
-        console.log("[refreshSession] Skipping - no session ID");
-        return;
-      }
-      try {
-        setError(null);
-        await refetch();
-      } catch (err) {
-        const error =
-          err instanceof Error ? err : new Error("Failed to refresh session");
-        setError(error);
-        throw error;
-      }
-    },
-    [sessionId, refetch],
-  );
-
-  const claimSession = useCallback(
-    async function claimSession(id: string) {
-      try {
-        setError(null);
-        await claimSessionMutation({ sessionId: id });
-        if (justCreatedSessionIdRef.current === id) {
-          justCreatedSessionIdRef.current = null;
-        }
-        await queryClient.invalidateQueries({
-          queryKey: getGetV2GetSessionQueryKey(id),
-        });
-        await refetch();
-        toast.success("Session claimed successfully", {
-          description: "Your chat history has been saved to your account",
-        });
-      } catch (err: unknown) {
-        const error =
-          err instanceof Error ? err : new Error("Failed to claim session");
-        const is404 =
-          (typeof err === "object" &&
-            err !== null &&
-            "status" in err &&
-            err.status === 404) ||
-          (typeof err === "object" &&
-            err !== null &&
-            "response" in err &&
-            typeof err.response === "object" &&
-            err.response !== null &&
-            "status" in err.response &&
-            err.response.status === 404);
-        if (!is404) {
-          setError(error);
-          toast.error("Failed to claim session", {
-            description: error.message || "Unable to claim session",
-          });
-        }
-        throw error;
-      }
-    },
-    [claimSessionMutation, queryClient, refetch],
-  );
-
-  const clearSession = useCallback(function clearSession() {
-    setSessionId(null);
-    setError(null);
-    storage.clean(Key.CHAT_SESSION_ID);
-    justCreatedSessionIdRef.current = null;
-  }, []);
-
-  return {
-    session,
-    sessionId,
-    messages,
-    isLoading,
-    isCreating,
-    error,
-    createSession,
-    loadSession,
-    refreshSession,
-    claimSession,
-    clearSession,
-  };
-}
--- a/autogpt_platform/frontend/src/app/(platform)/chat/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/chat/page.tsx
@@ -1,27 +0,0 @@
-"use client";
-
-import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
-import { useRouter } from "next/navigation";
-import { useEffect } from "react";
-import { Chat } from "./components/Chat/Chat";
-
-export default function ChatPage() {
-  const isChatEnabled = useGetFlag(Flag.CHAT);
-  const router = useRouter();
-
-  useEffect(() => {
-    if (isChatEnabled === false) {
-      router.push("/marketplace");
-    }
-  }, [isChatEnabled, router]);
-
-  if (isChatEnabled === null || isChatEnabled === false) {
-    return null;
-  }
-
-  return (
-    <div className="flex h-full flex-col">
-      <Chat className="flex-1" />
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/NewChatContext.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/NewChatContext.tsx
@@ -0,0 +1,41 @@
+"use client";
+
+import { createContext, useContext, useRef, type ReactNode } from "react";
+
+interface NewChatContextValue {
+  onNewChatClick: () => void;
+  setOnNewChatClick: (handler?: () => void) => void;
+  performNewChat?: () => void;
+  setPerformNewChat: (handler?: () => void) => void;
+}
+
+const NewChatContext = createContext<NewChatContextValue | null>(null);
+
+export function NewChatProvider({ children }: { children: ReactNode }) {
+  const onNewChatRef = useRef<(() => void) | undefined>();
+  const performNewChatRef = useRef<(() => void) | undefined>();
+  const contextValueRef = useRef<NewChatContextValue>({
+    onNewChatClick() {
+      onNewChatRef.current?.();
+    },
+    setOnNewChatClick(handler?: () => void) {
+      onNewChatRef.current = handler;
+    },
+    performNewChat() {
+      performNewChatRef.current?.();
+    },
+    setPerformNewChat(handler?: () => void) {
+      performNewChatRef.current = handler;
+    },
+  });
+
+  return (
+    <NewChatContext.Provider value={contextValueRef.current}>
+      {children}
+    </NewChatContext.Provider>
+  );
+}
+
+export function useNewChat() {
+  return useContext(NewChatContext);
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
@@ -0,0 +1,105 @@
+"use client";
+
+import { ChatLoader } from "@/components/contextual/Chat/components/ChatLoader/ChatLoader";
+import { NAVBAR_HEIGHT_PX } from "@/lib/constants";
+import type { ReactNode } from "react";
+import { useEffect } from "react";
+import { useNewChat } from "../../NewChatContext";
+import { DesktopSidebar } from "./components/DesktopSidebar/DesktopSidebar";
+import { LoadingState } from "./components/LoadingState/LoadingState";
+import { MobileDrawer } from "./components/MobileDrawer/MobileDrawer";
+import { MobileHeader } from "./components/MobileHeader/MobileHeader";
+import { useCopilotShell } from "./useCopilotShell";
+
+interface Props {
+  children: ReactNode;
+}
+
+export function CopilotShell({ children }: Props) {
+  const {
+    isMobile,
+    isDrawerOpen,
+    isLoading,
+    isLoggedIn,
+    hasActiveSession,
+    sessions,
+    currentSessionId,
+    handleSelectSession,
+    handleOpenDrawer,
+    handleCloseDrawer,
+    handleDrawerOpenChange,
+    handleNewChat,
+    hasNextPage,
+    isFetchingNextPage,
+    fetchNextPage,
+    isReadyToShowContent,
+  } = useCopilotShell();
+
+  const newChatContext = useNewChat();
+  const handleNewChatClickWrapper =
+    newChatContext?.onNewChatClick || handleNewChat;
+
+  useEffect(
+    function registerNewChatHandler() {
+      if (!newChatContext) return;
+      newChatContext.setPerformNewChat(handleNewChat);
+      return function cleanup() {
+        newChatContext.setPerformNewChat(undefined);
+      };
+    },
+    [newChatContext, handleNewChat],
+  );
+
+  if (!isLoggedIn) {
+    return (
+      <div className="flex h-full items-center justify-center">
+        <ChatLoader />
+      </div>
+    );
+  }
+
+  return (
+    <div
+      className="flex overflow-hidden bg-[#EFEFF0]"
+      style={{ height: `calc(100vh - ${NAVBAR_HEIGHT_PX}px)` }}
+    >
+      {!isMobile && (
+        <DesktopSidebar
+          sessions={sessions}
+          currentSessionId={currentSessionId}
+          isLoading={isLoading}
+          hasNextPage={hasNextPage}
+          isFetchingNextPage={isFetchingNextPage}
+          onSelectSession={handleSelectSession}
+          onFetchNextPage={fetchNextPage}
+          onNewChat={handleNewChatClickWrapper}
+          hasActiveSession={Boolean(hasActiveSession)}
+        />
+      )}
+
+      <div className="relative flex min-h-0 flex-1 flex-col">
+        {isMobile && <MobileHeader onOpenDrawer={handleOpenDrawer} />}
+        <div className="flex min-h-0 flex-1 flex-col">
+          {isReadyToShowContent ? children : <LoadingState />}
+        </div>
+      </div>
+
+      {isMobile && (
+        <MobileDrawer
+          isOpen={isDrawerOpen}
+          sessions={sessions}
+          currentSessionId={currentSessionId}
+          isLoading={isLoading}
+          hasNextPage={hasNextPage}
+          isFetchingNextPage={isFetchingNextPage}
+          onSelectSession={handleSelectSession}
+          onFetchNextPage={fetchNextPage}
+          onNewChat={handleNewChatClickWrapper}
+          onClose={handleCloseDrawer}
+          onOpenChange={handleDrawerOpenChange}
+          hasActiveSession={Boolean(hasActiveSession)}
+        />
+      )}
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/DesktopSidebar/DesktopSidebar.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/DesktopSidebar/DesktopSidebar.tsx
@@ -0,0 +1,70 @@
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
+import { Button } from "@/components/atoms/Button/Button";
+import { Text } from "@/components/atoms/Text/Text";
+import { scrollbarStyles } from "@/components/styles/scrollbars";
+import { cn } from "@/lib/utils";
+import { Plus } from "@phosphor-icons/react";
+import { SessionsList } from "../SessionsList/SessionsList";
+
+interface Props {
+  sessions: SessionSummaryResponse[];
+  currentSessionId: string | null;
+  isLoading: boolean;
+  hasNextPage: boolean;
+  isFetchingNextPage: boolean;
+  onSelectSession: (sessionId: string) => void;
+  onFetchNextPage: () => void;
+  onNewChat: () => void;
+  hasActiveSession: boolean;
+}
+
+export function DesktopSidebar({
+  sessions,
+  currentSessionId,
+  isLoading,
+  hasNextPage,
+  isFetchingNextPage,
+  onSelectSession,
+  onFetchNextPage,
+  onNewChat,
+  hasActiveSession,
+}: Props) {
+  return (
+    <aside className="flex h-full w-80 flex-col border-r border-zinc-100 bg-zinc-50">
+      <div className="shrink-0 px-6 py-4">
+        <Text variant="h3" size="body-medium">
+          Your chats
+        </Text>
+      </div>
+      <div
+        className={cn(
+          "flex min-h-0 flex-1 flex-col overflow-y-auto px-3 py-3",
+          scrollbarStyles,
+        )}
+      >
+        <SessionsList
+          sessions={sessions}
+          currentSessionId={currentSessionId}
+          isLoading={isLoading}
+          hasNextPage={hasNextPage}
+          isFetchingNextPage={isFetchingNextPage}
+          onSelectSession={onSelectSession}
+          onFetchNextPage={onFetchNextPage}
+        />
+      </div>
+      {hasActiveSession && (
+        <div className="shrink-0 bg-zinc-50 p-3 shadow-[0_-4px_6px_-1px_rgba(0,0,0,0.05)]">
+          <Button
+            variant="primary"
+            size="small"
+            onClick={onNewChat}
+            className="w-full"
+            leftIcon={<Plus width="1rem" height="1rem" />}
+          >
+            New Chat
+          </Button>
+        </div>
+      )}
+    </aside>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/LoadingState/LoadingState.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/LoadingState/LoadingState.tsx
@@ -0,0 +1,15 @@
+import { Text } from "@/components/atoms/Text/Text";
+import { ChatLoader } from "@/components/contextual/Chat/components/ChatLoader/ChatLoader";
+
+export function LoadingState() {
+  return (
+    <div className="flex flex-1 items-center justify-center">
+      <div className="flex flex-col items-center gap-4">
+        <ChatLoader />
+        <Text variant="body" className="text-zinc-500">
+          Loading your chats...
+        </Text>
+      </div>
+    </div>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/MobileDrawer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/MobileDrawer.tsx
@@ -0,0 +1,91 @@
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
+import { Button } from "@/components/atoms/Button/Button";
+import { scrollbarStyles } from "@/components/styles/scrollbars";
+import { cn } from "@/lib/utils";
+import { PlusIcon, X } from "@phosphor-icons/react";
+import { Drawer } from "vaul";
+import { SessionsList } from "../SessionsList/SessionsList";
+
+interface Props {
+  isOpen: boolean;
+  sessions: SessionSummaryResponse[];
+  currentSessionId: string | null;
+  isLoading: boolean;
+  hasNextPage: boolean;
+  isFetchingNextPage: boolean;
+  onSelectSession: (sessionId: string) => void;
+  onFetchNextPage: () => void;
+  onNewChat: () => void;
+  onClose: () => void;
+  onOpenChange: (open: boolean) => void;
+  hasActiveSession: boolean;
+}
+
+export function MobileDrawer({
+  isOpen,
+  sessions,
+  currentSessionId,
+  isLoading,
+  hasNextPage,
+  isFetchingNextPage,
+  onSelectSession,
+  onFetchNextPage,
+  onNewChat,
+  onClose,
+  onOpenChange,
+  hasActiveSession,
+}: Props) {
+  return (
+    <Drawer.Root open={isOpen} onOpenChange={onOpenChange} direction="left">
+      <Drawer.Portal>
+        <Drawer.Overlay className="fixed inset-0 z-[60] bg-black/10 backdrop-blur-sm" />
+        <Drawer.Content className="fixed left-0 top-0 z-[70] flex h-full w-80 flex-col border-r border-zinc-200 bg-zinc-50">
+          <div className="shrink-0 border-b border-zinc-200 p-4">
+            <div className="flex items-center justify-between">
+              <Drawer.Title className="text-lg font-semibold text-zinc-800">
+                Your chats
+              </Drawer.Title>
+              <Button
+                variant="icon"
+                size="icon"
+                aria-label="Close sessions"
+                onClick={onClose}
+              >
+                <X width="1.25rem" height="1.25rem" />
+              </Button>
+            </div>
+          </div>
+          <div
+            className={cn(
+              "flex min-h-0 flex-1 flex-col overflow-y-auto px-3 py-3",
+              scrollbarStyles,
+            )}
+          >
+            <SessionsList
+              sessions={sessions}
+              currentSessionId={currentSessionId}
+              isLoading={isLoading}
+              hasNextPage={hasNextPage}
+              isFetchingNextPage={isFetchingNextPage}
+              onSelectSession={onSelectSession}
+              onFetchNextPage={onFetchNextPage}
+            />
+          </div>
+          {hasActiveSession && (
+            <div className="shrink-0 bg-white p-3 shadow-[0_-4px_6px_-1px_rgba(0,0,0,0.05)]">
+              <Button
+                variant="primary"
+                size="small"
+                onClick={onNewChat}
+                className="w-full"
+                leftIcon={<PlusIcon width="1rem" height="1rem" />}
+              >
+                New Chat
+              </Button>
+            </div>
+          )}
+        </Drawer.Content>
+      </Drawer.Portal>
+    </Drawer.Root>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/useMobileDrawer.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileDrawer/useMobileDrawer.ts
@@ -0,0 +1,24 @@
+import { useState } from "react";
+
+export function useMobileDrawer() {
+  const [isDrawerOpen, setIsDrawerOpen] = useState(false);
+
+  function handleOpenDrawer() {
+    setIsDrawerOpen(true);
+  }
+
+  function handleCloseDrawer() {
+    setIsDrawerOpen(false);
+  }
+
+  function handleDrawerOpenChange(open: boolean) {
+    setIsDrawerOpen(open);
+  }
+
+  return {
+    isDrawerOpen,
+    handleOpenDrawer,
+    handleCloseDrawer,
+    handleDrawerOpenChange,
+  };
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileHeader/MobileHeader.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/MobileHeader/MobileHeader.tsx
@@ -0,0 +1,22 @@
+import { Button } from "@/components/atoms/Button/Button";
+import { NAVBAR_HEIGHT_PX } from "@/lib/constants";
+import { ListIcon } from "@phosphor-icons/react";
+
+interface Props {
+  onOpenDrawer: () => void;
+}
+
+export function MobileHeader({ onOpenDrawer }: Props) {
+  return (
+    <Button
+      variant="icon"
+      size="icon"
+      aria-label="Open sessions"
+      onClick={onOpenDrawer}
+      className="fixed z-50 bg-white shadow-md"
+      style={{ left: "1rem", top: `${NAVBAR_HEIGHT_PX + 20}px` }}
+    >
+      <ListIcon width="1.25rem" height="1.25rem" />
+    </Button>
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/SessionsList.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/SessionsList.tsx
@@ -0,0 +1,80 @@
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
+import { Skeleton } from "@/components/__legacy__/ui/skeleton";
+import { Text } from "@/components/atoms/Text/Text";
+import { InfiniteList } from "@/components/molecules/InfiniteList/InfiniteList";
+import { cn } from "@/lib/utils";
+import { getSessionTitle } from "../../helpers";
+
+interface Props {
+  sessions: SessionSummaryResponse[];
+  currentSessionId: string | null;
+  isLoading: boolean;
+  hasNextPage: boolean;
+  isFetchingNextPage: boolean;
+  onSelectSession: (sessionId: string) => void;
+  onFetchNextPage: () => void;
+}
+
+export function SessionsList({
+  sessions,
+  currentSessionId,
+  isLoading,
+  hasNextPage,
+  isFetchingNextPage,
+  onSelectSession,
+  onFetchNextPage,
+}: Props) {
+  if (isLoading) {
+    return (
+      <div className="space-y-1">
+        {Array.from({ length: 5 }).map((_, i) => (
+          <div key={i} className="rounded-lg px-3 py-2.5">
+            <Skeleton className="h-5 w-full" />
+          </div>
+        ))}
+      </div>
+    );
+  }
+
+  if (sessions.length === 0) {
+    return (
+      <div className="flex h-full items-center justify-center">
+        <Text variant="body" className="text-zinc-500">
+          You don&apos;t have previous chats
+        </Text>
+      </div>
+    );
+  }
+
+  return (
+    <InfiniteList
+      items={sessions}
+      hasMore={hasNextPage}
+      isFetchingMore={isFetchingNextPage}
+      onEndReached={onFetchNextPage}
+      className="space-y-1"
+      renderItem={(session) => {
+        const isActive = session.id === currentSessionId;
+        return (
+          <button
+            onClick={() => onSelectSession(session.id)}
+            className={cn(
+              "w-full rounded-lg px-3 py-2.5 text-left transition-colors",
+              isActive ? "bg-zinc-100" : "hover:bg-zinc-50",
+            )}
+          >
+            <Text
+              variant="body"
+              className={cn(
+                "font-normal",
+                isActive ? "text-zinc-600" : "text-zinc-800",
+              )}
+            >
+              {getSessionTitle(session)}
+            </Text>
+          </button>
+        );
+      }}
+    />
+  );
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
@@ -0,0 +1,92 @@
+import { useGetV2ListSessions } from "@/app/api/__generated__/endpoints/chat/chat";
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
+import { okData } from "@/app/api/helpers";
+import { useEffect, useMemo, useState } from "react";
+
+const PAGE_SIZE = 50;
+
+export interface UseSessionsPaginationArgs {
+  enabled: boolean;
+}
+
+export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) {
+  const [offset, setOffset] = useState(0);
+  const [accumulatedSessions, setAccumulatedSessions] = useState<
+    SessionSummaryResponse[]
+  >([]);
+  const [totalCount, setTotalCount] = useState<number | null>(null);
+
+  const { data, isLoading, isFetching, isError } = useGetV2ListSessions(
+    { limit: PAGE_SIZE, offset },
+    {
+      query: {
+        enabled: enabled && offset >= 0,
+      },
+    },
+  );
+
+  useEffect(() => {
+    const responseData = okData(data);
+    if (responseData) {
+      const newSessions = responseData.sessions;
+      const total = responseData.total;
+      setTotalCount(total);
+
+      if (offset === 0) {
+        setAccumulatedSessions(newSessions);
+      } else {
+        setAccumulatedSessions((prev) => [...prev, ...newSessions]);
+      }
+    } else if (!enabled) {
+      setAccumulatedSessions([]);
+      setTotalCount(null);
+    }
+  }, [data, offset, enabled]);
+
+  const hasNextPage = useMemo(() => {
+    if (totalCount === null) return false;
+    return accumulatedSessions.length < totalCount;
+  }, [accumulatedSessions.length, totalCount]);
+
+  const areAllSessionsLoaded = useMemo(() => {
+    if (totalCount === null) return false;
+    return (
+      accumulatedSessions.length >= totalCount && !isFetching && !isLoading
+    );
+  }, [accumulatedSessions.length, totalCount, isFetching, isLoading]);
+
+  useEffect(() => {
+    if (
+      hasNextPage &&
+      !isFetching &&
+      !isLoading &&
+      !isError &&
+      totalCount !== null
+    ) {
+      setOffset((prev) => prev + PAGE_SIZE);
+    }
+  }, [hasNextPage, isFetching, isLoading, isError, totalCount]);
+
+  function fetchNextPage() {
+    if (hasNextPage && !isFetching) {
+      setOffset((prev) => prev + PAGE_SIZE);
+    }
+  }
+
+  function reset() {
+    setOffset(0);
+    setAccumulatedSessions([]);
+    setTotalCount(null);
+  }
+
+  return {
+    sessions: accumulatedSessions,
+    isLoading,
+    isFetching,
+    hasNextPage,
+    areAllSessionsLoaded,
+    totalCount,
+    fetchNextPage,
+    reset,
+  };
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
@@ -0,0 +1,165 @@
+import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
+import { format, formatDistanceToNow, isToday } from "date-fns";
+
+export function convertSessionDetailToSummary(
+  session: SessionDetailResponse,
+): SessionSummaryResponse {
+  return {
+    id: session.id,
+    created_at: session.created_at,
+    updated_at: session.updated_at,
+    title: undefined,
+  };
+}
+
+export function filterVisibleSessions(
+  sessions: SessionSummaryResponse[],
+): SessionSummaryResponse[] {
+  return sessions.filter(
+    (session) => session.updated_at !== session.created_at,
+  );
+}
+
+export function getSessionTitle(session: SessionSummaryResponse): string {
+  if (session.title) return session.title;
+  const isNewSession = session.updated_at === session.created_at;
+  if (isNewSession) {
+    const createdDate = new Date(session.created_at);
+    if (isToday(createdDate)) {
+      return "Today";
+    }
+    return format(createdDate, "MMM d, yyyy");
+  }
+  return "Untitled Chat";
+}
+
+export function getSessionUpdatedLabel(
+  session: SessionSummaryResponse,
+): string {
+  if (!session.updated_at) return "";
+  return formatDistanceToNow(new Date(session.updated_at), { addSuffix: true });
+}
+
+export function mergeCurrentSessionIntoList(
+  accumulatedSessions: SessionSummaryResponse[],
+  currentSessionId: string | null,
+  currentSessionData: SessionDetailResponse | null | undefined,
+): SessionSummaryResponse[] {
+  const filteredSessions: SessionSummaryResponse[] = [];
+
+  if (accumulatedSessions.length > 0) {
+    const visibleSessions = filterVisibleSessions(accumulatedSessions);
+
+    if (currentSessionId) {
+      const currentInAll = accumulatedSessions.find(
+        (s) => s.id === currentSessionId,
+      );
+      if (currentInAll) {
+        const isInVisible = visibleSessions.some(
+          (s) => s.id === currentSessionId,
+        );
+        if (!isInVisible) {
+          filteredSessions.push(currentInAll);
+        }
+      }
+    }
+
+    filteredSessions.push(...visibleSessions);
+  }
+
+  if (currentSessionId && currentSessionData) {
+    const isCurrentInList = filteredSessions.some(
+      (s) => s.id === currentSessionId,
+    );
+    if (!isCurrentInList) {
+      const summarySession = convertSessionDetailToSummary(currentSessionData);
+      filteredSessions.unshift(summarySession);
+    }
+  }
+
+  return filteredSessions;
+}
+
+export function getCurrentSessionId(
+  searchParams: URLSearchParams,
+): string | null {
+  return searchParams.get("sessionId");
+}
+
+export function shouldAutoSelectSession(
+  areAllSessionsLoaded: boolean,
+  hasAutoSelectedSession: boolean,
+  paramSessionId: string | null,
+  visibleSessions: SessionSummaryResponse[],
+  accumulatedSessions: SessionSummaryResponse[],
+  isLoading: boolean,
+  totalCount: number | null,
+): {
+  shouldSelect: boolean;
+  sessionIdToSelect: string | null;
+  shouldCreate: boolean;
+} {
+  if (!areAllSessionsLoaded || hasAutoSelectedSession) {
+    return {
+      shouldSelect: false,
+      sessionIdToSelect: null,
+      shouldCreate: false,
+    };
+  }
+
+  if (paramSessionId) {
+    return {
+      shouldSelect: false,
+      sessionIdToSelect: null,
+      shouldCreate: false,
+    };
+  }
+
+  if (visibleSessions.length > 0) {
+    return {
+      shouldSelect: true,
+      sessionIdToSelect: visibleSessions[0].id,
+      shouldCreate: false,
+    };
+  }
+
+  if (accumulatedSessions.length === 0 && !isLoading && totalCount === 0) {
+    return { shouldSelect: false, sessionIdToSelect: null, shouldCreate: true };
+  }
+
+  if (totalCount === 0) {
+    return {
+      shouldSelect: false,
+      sessionIdToSelect: null,
+      shouldCreate: false,
+    };
+  }
+
+  return { shouldSelect: false, sessionIdToSelect: null, shouldCreate: false };
+}
+
+export function checkReadyToShowContent(
+  areAllSessionsLoaded: boolean,
+  paramSessionId: string | null,
+  accumulatedSessions: SessionSummaryResponse[],
+  isCurrentSessionLoading: boolean,
+  currentSessionData: SessionDetailResponse | null | undefined,
+  hasAutoSelectedSession: boolean,
+): boolean {
+  if (!areAllSessionsLoaded) return false;
+
+  if (paramSessionId) {
+    const sessionFound = accumulatedSessions.some(
+      (s) => s.id === paramSessionId,
+    );
+    return (
+      sessionFound ||
+      (!isCurrentSessionLoading &&
+        currentSessionData !== undefined &&
+        currentSessionData !== null)
+    );
+  }
+
+  return hasAutoSelectedSession;
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
@@ -0,0 +1,172 @@
+"use client";
+
+import {
+  getGetV2ListSessionsQueryKey,
+  useGetV2GetSession,
+} from "@/app/api/__generated__/endpoints/chat/chat";
+import { okData } from "@/app/api/helpers";
+import { useBreakpoint } from "@/lib/hooks/useBreakpoint";
+import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
+import { useQueryClient } from "@tanstack/react-query";
+import { usePathname, useRouter, useSearchParams } from "next/navigation";
+import { useEffect, useRef, useState } from "react";
+import { useMobileDrawer } from "./components/MobileDrawer/useMobileDrawer";
+import { useSessionsPagination } from "./components/SessionsList/useSessionsPagination";
+import {
+  checkReadyToShowContent,
+  filterVisibleSessions,
+  getCurrentSessionId,
+  mergeCurrentSessionIntoList,
+} from "./helpers";
+
+export function useCopilotShell() {
+  const router = useRouter();
+  const pathname = usePathname();
+  const searchParams = useSearchParams();
+  const queryClient = useQueryClient();
+  const breakpoint = useBreakpoint();
+  const { isLoggedIn } = useSupabase();
+  const isMobile =
+    breakpoint === "base" || breakpoint === "sm" || breakpoint === "md";
+
+  const isOnHomepage = pathname === "/copilot";
+  const paramSessionId = searchParams.get("sessionId");
+
+  const {
+    isDrawerOpen,
+    handleOpenDrawer,
+    handleCloseDrawer,
+    handleDrawerOpenChange,
+  } = useMobileDrawer();
+
+  const paginationEnabled = !isMobile || isDrawerOpen || !!paramSessionId;
+
+  const {
+    sessions: accumulatedSessions,
+    isLoading: isSessionsLoading,
+    isFetching: isSessionsFetching,
+    hasNextPage,
+    areAllSessionsLoaded,
+    fetchNextPage,
+    reset: resetPagination,
+  } = useSessionsPagination({
+    enabled: paginationEnabled,
+  });
+
+  const currentSessionId = getCurrentSessionId(searchParams);
+
+  const { data: currentSessionData, isLoading: isCurrentSessionLoading } =
+    useGetV2GetSession(currentSessionId || "", {
+      query: {
+        enabled: !!currentSessionId,
+        select: okData,
+      },
+    });
+
+  const [hasAutoSelectedSession, setHasAutoSelectedSession] = useState(false);
+  const hasAutoSelectedRef = useRef(false);
+
+  // Mark as auto-selected when sessionId is in URL
+  useEffect(() => {
+    if (paramSessionId && !hasAutoSelectedRef.current) {
+      hasAutoSelectedRef.current = true;
+      setHasAutoSelectedSession(true);
+    }
+  }, [paramSessionId]);
+
+  // On homepage without sessionId, mark as ready immediately
+  useEffect(() => {
+    if (isOnHomepage && !paramSessionId && !hasAutoSelectedRef.current) {
+      hasAutoSelectedRef.current = true;
+      setHasAutoSelectedSession(true);
+    }
+  }, [isOnHomepage, paramSessionId]);
+
+  // Invalidate sessions list when navigating to homepage (to show newly created sessions)
+  useEffect(() => {
+    if (isOnHomepage && !paramSessionId) {
+      queryClient.invalidateQueries({
+        queryKey: getGetV2ListSessionsQueryKey(),
+      });
+    }
+  }, [isOnHomepage, paramSessionId, queryClient]);
+
+  // Reset pagination when query becomes disabled
+  const prevPaginationEnabledRef = useRef(paginationEnabled);
+  useEffect(() => {
+    if (prevPaginationEnabledRef.current && !paginationEnabled) {
+      resetPagination();
+      resetAutoSelect();
+    }
+    prevPaginationEnabledRef.current = paginationEnabled;
+  }, [paginationEnabled, resetPagination]);
+
+  const sessions = mergeCurrentSessionIntoList(
+    accumulatedSessions,
+    currentSessionId,
+    currentSessionData,
+  );
+
+  const visibleSessions = filterVisibleSessions(sessions);
+
+  const sidebarSelectedSessionId =
+    isOnHomepage && !paramSessionId ? null : currentSessionId;
+
+  const isReadyToShowContent = isOnHomepage
+    ? true
+    : checkReadyToShowContent(
+        areAllSessionsLoaded,
+        paramSessionId,
+        accumulatedSessions,
+        isCurrentSessionLoading,
+        currentSessionData,
+        hasAutoSelectedSession,
+      );
+
+  function handleSelectSession(sessionId: string) {
+    // Navigate using replaceState to avoid full page reload
+    window.history.replaceState(null, "", `/copilot?sessionId=${sessionId}`);
+    // Force a re-render by updating the URL through router
+    router.replace(`/copilot?sessionId=${sessionId}`);
+    if (isMobile) handleCloseDrawer();
+  }
+
+  function handleNewChat() {
+    resetAutoSelect();
+    resetPagination();
+    // Invalidate and refetch sessions list to ensure newly created sessions appear
+    queryClient.invalidateQueries({
+      queryKey: getGetV2ListSessionsQueryKey(),
+    });
+    window.history.replaceState(null, "", "/copilot");
+    router.replace("/copilot");
+    if (isMobile) handleCloseDrawer();
+  }
+
+  function resetAutoSelect() {
+    hasAutoSelectedRef.current = false;
+    setHasAutoSelectedSession(false);
+  }
+
+  const isLoading = isSessionsLoading && accumulatedSessions.length === 0;
+
+  return {
+    isMobile,
+    isDrawerOpen,
+    isLoggedIn,
+    hasActiveSession:
+      Boolean(currentSessionId) && (!isOnHomepage || Boolean(paramSessionId)),
+    isLoading,
+    sessions: visibleSessions,
+    currentSessionId: sidebarSelectedSessionId,
+    handleSelectSession,
+    handleOpenDrawer,
+    handleCloseDrawer,
+    handleDrawerOpenChange,
+    handleNewChat,
+    hasNextPage,
+    isFetchingNextPage: isSessionsFetching,
+    fetchNextPage,
+    isReadyToShowContent,
+  };
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/helpers.ts
@@ -0,0 +1,56 @@
+import type { User } from "@supabase/supabase-js";
+
+export type PageState =
+  | { type: "welcome" }
+  | { type: "newChat" }
+  | { type: "creating"; prompt: string }
+  | { type: "chat"; sessionId: string; initialPrompt?: string };
+
+export function getInitialPromptFromState(
+  pageState: PageState,
+  storedInitialPrompt: string | undefined,
+) {
+  if (storedInitialPrompt) return storedInitialPrompt;
+  if (pageState.type === "creating") return pageState.prompt;
+  if (pageState.type === "chat") return pageState.initialPrompt;
+}
+
+export function shouldResetToWelcome(pageState: PageState) {
+  return (
+    pageState.type !== "newChat" &&
+    pageState.type !== "creating" &&
+    pageState.type !== "welcome"
+  );
+}
+
+export function getGreetingName(user?: User | null): string {
+  if (!user) return "there";
+  const metadata = user.user_metadata as Record<string, unknown> | undefined;
+  const fullName = metadata?.full_name;
+  const name = metadata?.name;
+  if (typeof fullName === "string" && fullName.trim()) {
+    return fullName.split(" ")[0];
+  }
+  if (typeof name === "string" && name.trim()) {
+    return name.split(" ")[0];
+  }
+  if (user.email) {
+    return user.email.split("@")[0];
+  }
+  return "there";
+}
+
+export function buildCopilotChatUrl(prompt: string): string {
+  const trimmed = prompt.trim();
+  if (!trimmed) return "/copilot/chat";
+  const encoded = encodeURIComponent(trimmed);
+  return `/copilot/chat?prompt=${encoded}`;
+}
+
+export function getQuickActions(): string[] {
+  return [
+    "Show me what I can automate",
+    "Design a custom workflow",
+    "Help me with content creation",
+  ];
+}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
@@ -0,0 +1,11 @@
+import type { ReactNode } from "react";
+import { NewChatProvider } from "./NewChatContext";
+import { CopilotShell } from "./components/CopilotShell/CopilotShell";
+
+export default function CopilotLayout({ children }: { children: ReactNode }) {
+  return (
+    <NewChatProvider>
+      <CopilotShell>{children}</CopilotShell>
+    </NewChatProvider>
+  );
+}
--- a/Show More
+++ b/Show More