test: verify e2e tests on dev (no-op change)

2026-02-03 11:24:57 -05:00 · 2026-02-01 15:46:51 -06:00
42 changed files with 880 additions and 1802 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -180,4 +180,3 @@ autogpt_platform/backend/settings.py
 .claude/settings.local.json
 CLAUDE.local.md
 /autogpt_platform/backend/logs
-.next
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -3,13 +3,9 @@ import logging
 import time
 from asyncio import CancelledError
 from collections.abc import AsyncGenerator
-from typing import TYPE_CHECKING, Any, cast
+from typing import Any

 import openai
-
-if TYPE_CHECKING:
-    from backend.util.prompt import CompressResult
-
 import orjson
 from langfuse import get_client
 from openai import (
@@ -19,13 +15,7 @@ from openai import (
    PermissionDeniedError,
    RateLimitError,
 )
-from openai.types.chat import (
-    ChatCompletionChunk,
-    ChatCompletionMessageParam,
-    ChatCompletionStreamOptionsParam,
-    ChatCompletionSystemMessageParam,
-    ChatCompletionToolParam,
-)
+from openai.types.chat import ChatCompletionChunk, ChatCompletionToolParam

 from backend.data.redis_client import get_redis_async
 from backend.data.understanding import (
@@ -804,58 +794,207 @@ def _is_region_blocked_error(error: Exception) -> bool:
    return "not available in your region" in str(error).lower()


-async def _manage_context_window(
+async def _summarize_messages(
    messages: list,
    model: str,
    api_key: str | None = None,
    base_url: str | None = None,
-) -> "CompressResult":
-    """
-    Manage context window using the unified compress_context function.
+    timeout: float = 30.0,
+) -> str:
+    """Summarize a list of messages into concise context.

-    This is a thin wrapper that creates an OpenAI client for summarization
-    and delegates to the shared compression logic in prompt.py.
+    Uses the same model as the chat for higher quality summaries.

    Args:
-        messages: List of messages in OpenAI format
-        model: Model name for token counting and summarization
-        api_key: API key for summarization calls
-        base_url: Base URL for summarization calls
+        messages: List of message dicts to summarize
+        model: Model to use for summarization (same as chat model)
+        api_key: API key for OpenAI client
+        base_url: Base URL for OpenAI client
+        timeout: Request timeout in seconds (default: 30.0)

    Returns:
-        CompressResult with compacted messages and metadata
+        Summarized text
    """
+    # Format messages for summarization
+    conversation = []
+    for msg in messages:
+        role = msg.get("role", "")
+        content = msg.get("content", "")
+        # Include user, assistant, and tool messages (tool outputs are important context)
+        if content and role in ("user", "assistant", "tool"):
+            conversation.append(f"{role.upper()}: {content}")
+
+    conversation_text = "\n\n".join(conversation)
+
+    # Handle empty conversation
+    if not conversation_text:
+        return "No conversation history available."
+
+    # Truncate conversation to fit within summarization model's context
+    # gpt-4o-mini has 128k context, but we limit to ~25k tokens (~100k chars) for safety
+    MAX_CHARS = 100_000
+    if len(conversation_text) > MAX_CHARS:
+        conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]"
+
+    # Call LLM to summarize
    import openai

-    from backend.util.prompt import compress_context
+    summarization_client = openai.AsyncOpenAI(
+        api_key=api_key, base_url=base_url, timeout=timeout
+    )

-    # Convert messages to dict format
-    messages_dict = []
-    for msg in messages:
-        if isinstance(msg, dict):
-            msg_dict = {k: v for k, v in msg.items() if v is not None}
-        else:
-            msg_dict = dict(msg)
-        messages_dict.append(msg_dict)
+    response = await summarization_client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": (
+                    "Create a detailed summary of the conversation so far. "
+                    "This summary will be used as context when continuing the conversation.\n\n"
+                    "Before writing the summary, analyze each message chronologically to identify:\n"
+                    "- User requests and their explicit goals\n"
+                    "- Your approach and key decisions made\n"
+                    "- Technical specifics (file names, tool outputs, function signatures)\n"
+                    "- Errors encountered and resolutions applied\n\n"
+                    "You MUST include ALL of the following sections:\n\n"
+                    "## 1. Primary Request and Intent\n"
+                    "The user's explicit goals and what they are trying to accomplish.\n\n"
+                    "## 2. Key Technical Concepts\n"
+                    "Technologies, frameworks, tools, and patterns being used or discussed.\n\n"
+                    "## 3. Files and Resources Involved\n"
+                    "Specific files examined or modified, with relevant snippets and identifiers.\n\n"
+                    "## 4. Errors and Fixes\n"
+                    "Problems encountered, error messages, and their resolutions. "
+                    "Include any user feedback on fixes.\n\n"
+                    "## 5. Problem Solving\n"
+                    "Issues that have been resolved and how they were addressed.\n\n"
+                    "## 6. All User Messages\n"
+                    "A complete list of all user inputs (excluding tool outputs) to preserve their exact requests.\n\n"
+                    "## 7. Pending Tasks\n"
+                    "Work items the user explicitly requested that have not yet been completed.\n\n"
+                    "## 8. Current Work\n"
+                    "Precise description of what was being worked on most recently, including relevant context.\n\n"
+                    "## 9. Next Steps\n"
+                    "What should happen next, aligned with the user's most recent requests. "
+                    "Include verbatim quotes of recent instructions if relevant."
+                ),
+            },
+            {"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
+        ],
+        max_tokens=1500,
+        temperature=0.3,
+    )

-    # Only create client if api_key is provided (enables summarization)
-    # Use context manager to avoid socket leaks
-    if api_key:
-        async with openai.AsyncOpenAI(
-            api_key=api_key, base_url=base_url, timeout=30.0
-        ) as client:
-            return await compress_context(
-                messages=messages_dict,
-                model=model,
-                client=client,
-            )
-    else:
-        # No API key - use truncation-only mode
-        return await compress_context(
-            messages=messages_dict,
-            model=model,
-            client=None,
+    summary = response.choices[0].message.content
+    return summary or "No summary available."
+
+
+def _ensure_tool_pairs_intact(
+    recent_messages: list[dict],
+    all_messages: list[dict],
+    start_index: int,
+) -> list[dict]:
+    """
+    Ensure tool_call/tool_response pairs stay together after slicing.
+
+    When slicing messages for context compaction, a naive slice can separate
+    an assistant message containing tool_calls from its corresponding tool
+    response messages. This causes API validation errors (e.g., Anthropic's
+    "unexpected tool_use_id found in tool_result blocks").
+
+    This function checks for orphan tool responses in the slice and extends
+    backwards to include their corresponding assistant messages.
+
+    Args:
+        recent_messages: The sliced messages to validate
+        all_messages: The complete message list (for looking up missing assistants)
+        start_index: The index in all_messages where recent_messages begins
+
+    Returns:
+        A potentially extended list of messages with tool pairs intact
+    """
+    if not recent_messages:
+        return recent_messages
+
+    # Collect all tool_call_ids from assistant messages in the slice
+    available_tool_call_ids: set[str] = set()
+    for msg in recent_messages:
+        if msg.get("role") == "assistant" and msg.get("tool_calls"):
+            for tc in msg["tool_calls"]:
+                tc_id = tc.get("id")
+                if tc_id:
+                    available_tool_call_ids.add(tc_id)
+
+    # Find orphan tool responses (tool messages whose tool_call_id is missing)
+    orphan_tool_call_ids: set[str] = set()
+    for msg in recent_messages:
+        if msg.get("role") == "tool":
+            tc_id = msg.get("tool_call_id")
+            if tc_id and tc_id not in available_tool_call_ids:
+                orphan_tool_call_ids.add(tc_id)
+
+    if not orphan_tool_call_ids:
+        # No orphans, slice is valid
+        return recent_messages
+
+    # Find the assistant messages that contain the orphan tool_call_ids
+    # Search backwards from start_index in all_messages
+    messages_to_prepend: list[dict] = []
+    for i in range(start_index - 1, -1, -1):
+        msg = all_messages[i]
+        if msg.get("role") == "assistant" and msg.get("tool_calls"):
+            msg_tool_ids = {tc.get("id") for tc in msg["tool_calls"] if tc.get("id")}
+            if msg_tool_ids & orphan_tool_call_ids:
+                # This assistant message has tool_calls we need
+                # Also collect its contiguous tool responses that follow it
+                assistant_and_responses: list[dict] = [msg]
+
+                # Scan forward from this assistant to collect tool responses
+                for j in range(i + 1, start_index):
+                    following_msg = all_messages[j]
+                    if following_msg.get("role") == "tool":
+                        tool_id = following_msg.get("tool_call_id")
+                        if tool_id and tool_id in msg_tool_ids:
+                            assistant_and_responses.append(following_msg)
+                    else:
+                        # Stop at first non-tool message
+                        break
+
+                # Prepend the assistant and its tool responses (maintain order)
+                messages_to_prepend = assistant_and_responses + messages_to_prepend
+                # Mark these as found
+                orphan_tool_call_ids -= msg_tool_ids
+                # Also add this assistant's tool_call_ids to available set
+                available_tool_call_ids |= msg_tool_ids
+
+        if not orphan_tool_call_ids:
+            # Found all missing assistants
+            break
+
+    if orphan_tool_call_ids:
+        # Some tool_call_ids couldn't be resolved - remove those tool responses
+        # This shouldn't happen in normal operation but handles edge cases
+        logger.warning(
+            f"Could not find assistant messages for tool_call_ids: {orphan_tool_call_ids}. "
+            "Removing orphan tool responses."
        )
+        recent_messages = [
+            msg
+            for msg in recent_messages
+            if not (
+                msg.get("role") == "tool"
+                and msg.get("tool_call_id") in orphan_tool_call_ids
+            )
+        ]
+
+    if messages_to_prepend:
+        logger.info(
+            f"Extended recent messages by {len(messages_to_prepend)} to preserve "
+            f"tool_call/tool_response pairs"
+        )
+        return messages_to_prepend + recent_messages
+
+    return recent_messages


 async def _stream_chat_chunks(
@@ -883,8 +1022,11 @@ async def _stream_chat_chunks(

    logger.info("Starting pure chat stream")

+    # Build messages with system prompt prepended
    messages = session.to_openai_messages()
    if system_prompt:
+        from openai.types.chat import ChatCompletionSystemMessageParam
+
        system_message = ChatCompletionSystemMessageParam(
            role="system",
            content=system_prompt,
@@ -892,38 +1034,314 @@ async def _stream_chat_chunks(
        messages = [system_message] + messages

    # Apply context window management
-    context_result = await _manage_context_window(
-        messages=messages,
-        model=model,
-        api_key=config.api_key,
-        base_url=config.base_url,
-    )
+    token_count = 0  # Initialize for exception handler
+    try:
+        from backend.util.prompt import estimate_token_count

-    if context_result.error:
-        if "System prompt dropped" in context_result.error:
-            # Warning only - continue with reduced context
-            yield StreamError(
-                errorText=(
-                    "Warning: System prompt dropped due to size constraints. "
-                    "Assistant behavior may be affected."
-                )
+        # Convert to dict for token counting
+        # OpenAI message types are TypedDicts, so they're already dict-like
+        messages_dict = []
+        for msg in messages:
+            # TypedDict objects are already dicts, just filter None values
+            if isinstance(msg, dict):
+                msg_dict = {k: v for k, v in msg.items() if v is not None}
+            else:
+                # Fallback for unexpected types
+                msg_dict = dict(msg)
+            messages_dict.append(msg_dict)
+
+        # Estimate tokens using appropriate tokenizer
+        # Normalize model name for token counting (tiktoken only supports OpenAI models)
+        token_count_model = model
+        if "/" in model:
+            # Strip provider prefix (e.g., "anthropic/claude-opus-4.5" -> "claude-opus-4.5")
+            token_count_model = model.split("/")[-1]
+
+        # For Claude and other non-OpenAI models, approximate with gpt-4o tokenizer
+        # Most modern LLMs have similar tokenization (~1 token per 4 chars)
+        if "claude" in token_count_model.lower() or not any(
+            known in token_count_model.lower()
+            for known in ["gpt", "o1", "chatgpt", "text-"]
+        ):
+            token_count_model = "gpt-4o"
+
+        # Attempt token counting with error handling
+        try:
+            token_count = estimate_token_count(messages_dict, model=token_count_model)
+        except Exception as token_error:
+            # If token counting fails, use gpt-4o as fallback approximation
+            logger.warning(
+                f"Token counting failed for model {token_count_model}: {token_error}. "
+                "Using gpt-4o approximation."
            )
-        else:
-            # Any other error - abort to prevent failed LLM calls
+            token_count = estimate_token_count(messages_dict, model="gpt-4o")
+
+        # If over threshold, summarize old messages
+        if token_count > 120_000:
+            KEEP_RECENT = 15
+
+            # Check if we have a system prompt at the start
+            has_system_prompt = (
+                len(messages) > 0 and messages[0].get("role") == "system"
+            )
+
+            # Always attempt mitigation when over limit, even with few messages
+            if messages:
+                # Split messages based on whether system prompt exists
+                # Calculate start index for the slice
+                slice_start = max(0, len(messages_dict) - KEEP_RECENT)
+                recent_messages = messages_dict[-KEEP_RECENT:]
+
+                # Ensure tool_call/tool_response pairs stay together
+                # This prevents API errors from orphan tool responses
+                recent_messages = _ensure_tool_pairs_intact(
+                    recent_messages, messages_dict, slice_start
+                )
+
+                if has_system_prompt:
+                    # Keep system prompt separate, summarize everything between system and recent
+                    system_msg = messages[0]
+                    old_messages_dict = messages_dict[1:-KEEP_RECENT]
+                else:
+                    # No system prompt, summarize everything except recent
+                    system_msg = None
+                    old_messages_dict = messages_dict[:-KEEP_RECENT]
+
+                # Summarize any non-empty old messages (no minimum threshold)
+                # If we're over the token limit, we need to compress whatever we can
+                if old_messages_dict:
+                    # Summarize old messages using the same model as chat
+                    summary_text = await _summarize_messages(
+                        old_messages_dict,
+                        model=model,
+                        api_key=config.api_key,
+                        base_url=config.base_url,
+                    )
+
+                    # Build new message list
+                    # Use assistant role (not system) to prevent privilege escalation
+                    # of user-influenced content to instruction-level authority
+                    from openai.types.chat import ChatCompletionAssistantMessageParam
+
+                    summary_msg = ChatCompletionAssistantMessageParam(
+                        role="assistant",
+                        content=(
+                            "[Previous conversation summary — for context only]: "
+                            f"{summary_text}"
+                        ),
+                    )
+
+                    # Rebuild messages based on whether we have a system prompt
+                    if has_system_prompt:
+                        # system_prompt + summary + recent_messages
+                        messages = [system_msg, summary_msg] + recent_messages
+                    else:
+                        # summary + recent_messages (no original system prompt)
+                        messages = [summary_msg] + recent_messages
+
+                    logger.info(
+                        f"Context summarized: {token_count} tokens, "
+                        f"summarized {len(old_messages_dict)} old messages, "
+                        f"kept last {KEEP_RECENT} messages"
+                    )
+
+                    # Fallback: If still over limit after summarization, progressively drop recent messages
+                    # This handles edge cases where recent messages are extremely large
+                    new_messages_dict = []
+                    for msg in messages:
+                        if isinstance(msg, dict):
+                            msg_dict = {k: v for k, v in msg.items() if v is not None}
+                        else:
+                            msg_dict = dict(msg)
+                        new_messages_dict.append(msg_dict)
+
+                    new_token_count = estimate_token_count(
+                        new_messages_dict, model=token_count_model
+                    )
+
+                    if new_token_count > 120_000:
+                        # Still over limit - progressively reduce KEEP_RECENT
+                        logger.warning(
+                            f"Still over limit after summarization: {new_token_count} tokens. "
+                            "Reducing number of recent messages kept."
+                        )
+
+                        for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]:
+                            if keep_count == 0:
+                                # Try with just system prompt + summary (no recent messages)
+                                if has_system_prompt:
+                                    messages = [system_msg, summary_msg]
+                                else:
+                                    messages = [summary_msg]
+                                logger.info(
+                                    "Trying with 0 recent messages (system + summary only)"
+                                )
+                            else:
+                                # Slice from ORIGINAL recent_messages to avoid duplicating summary
+                                reduced_recent = (
+                                    recent_messages[-keep_count:]
+                                    if len(recent_messages) >= keep_count
+                                    else recent_messages
+                                )
+                                # Ensure tool pairs stay intact in the reduced slice
+                                reduced_slice_start = max(
+                                    0, len(recent_messages) - keep_count
+                                )
+                                reduced_recent = _ensure_tool_pairs_intact(
+                                    reduced_recent, recent_messages, reduced_slice_start
+                                )
+                                if has_system_prompt:
+                                    messages = [
+                                        system_msg,
+                                        summary_msg,
+                                    ] + reduced_recent
+                                else:
+                                    messages = [summary_msg] + reduced_recent
+
+                            new_messages_dict = []
+                            for msg in messages:
+                                if isinstance(msg, dict):
+                                    msg_dict = {
+                                        k: v for k, v in msg.items() if v is not None
+                                    }
+                                else:
+                                    msg_dict = dict(msg)
+                                new_messages_dict.append(msg_dict)
+
+                            new_token_count = estimate_token_count(
+                                new_messages_dict, model=token_count_model
+                            )
+
+                            if new_token_count <= 120_000:
+                                logger.info(
+                                    f"Reduced to {keep_count} recent messages, "
+                                    f"now {new_token_count} tokens"
+                                )
+                                break
+                        else:
+                            logger.error(
+                                f"Unable to reduce token count below threshold even with 0 messages. "
+                                f"Final count: {new_token_count} tokens"
+                            )
+                            # ABSOLUTE LAST RESORT: Drop system prompt
+                            # This should only happen if summary itself is massive
+                            if has_system_prompt and len(messages) > 1:
+                                messages = messages[1:]  # Drop system prompt
+                                logger.critical(
+                                    "CRITICAL: Dropped system prompt as absolute last resort. "
+                                    "Behavioral consistency may be affected."
+                                )
+                                # Yield error to user
+                                yield StreamError(
+                                    errorText=(
+                                        "Warning: System prompt dropped due to size constraints. "
+                                        "Assistant behavior may be affected."
+                                    )
+                                )
+                else:
+                    # No old messages to summarize - all messages are "recent"
+                    # Apply progressive truncation to reduce token count
+                    logger.warning(
+                        f"Token count {token_count} exceeds threshold but no old messages to summarize. "
+                        f"Applying progressive truncation to recent messages."
+                    )
+
+                    # Create a base list excluding system prompt to avoid duplication
+                    # This is the pool of messages we'll slice from in the loop
+                    # Use messages_dict for type consistency with _ensure_tool_pairs_intact
+                    base_msgs = (
+                        messages_dict[1:] if has_system_prompt else messages_dict
+                    )
+
+                    # Try progressively smaller keep counts
+                    new_token_count = token_count  # Initialize with current count
+                    for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]:
+                        if keep_count == 0:
+                            # Try with just system prompt (no recent messages)
+                            if has_system_prompt:
+                                messages = [system_msg]
+                                logger.info(
+                                    "Trying with 0 recent messages (system prompt only)"
+                                )
+                            else:
+                                # No system prompt and no recent messages = empty messages list
+                                # This is invalid, skip this iteration
+                                continue
+                        else:
+                            if len(base_msgs) < keep_count:
+                                continue  # Skip if we don't have enough messages
+
+                            # Slice from base_msgs to get recent messages (without system prompt)
+                            recent_messages = base_msgs[-keep_count:]
+
+                            # Ensure tool pairs stay intact in the reduced slice
+                            reduced_slice_start = max(0, len(base_msgs) - keep_count)
+                            recent_messages = _ensure_tool_pairs_intact(
+                                recent_messages, base_msgs, reduced_slice_start
+                            )
+
+                            if has_system_prompt:
+                                messages = [system_msg] + recent_messages
+                            else:
+                                messages = recent_messages
+
+                        new_messages_dict = []
+                        for msg in messages:
+                            if msg is None:
+                                continue  # Skip None messages (type safety)
+                            if isinstance(msg, dict):
+                                msg_dict = {
+                                    k: v for k, v in msg.items() if v is not None
+                                }
+                            else:
+                                msg_dict = dict(msg)
+                            new_messages_dict.append(msg_dict)
+
+                        new_token_count = estimate_token_count(
+                            new_messages_dict, model=token_count_model
+                        )
+
+                        if new_token_count <= 120_000:
+                            logger.info(
+                                f"Reduced to {keep_count} recent messages, "
+                                f"now {new_token_count} tokens"
+                            )
+                            break
+                    else:
+                        # Even with 0 messages still over limit
+                        logger.error(
+                            f"Unable to reduce token count below threshold even with 0 messages. "
+                            f"Final count: {new_token_count} tokens. Messages may be extremely large."
+                        )
+                        # ABSOLUTE LAST RESORT: Drop system prompt
+                        if has_system_prompt and len(messages) > 1:
+                            messages = messages[1:]  # Drop system prompt
+                            logger.critical(
+                                "CRITICAL: Dropped system prompt as absolute last resort. "
+                                "Behavioral consistency may be affected."
+                            )
+                            # Yield error to user
+                            yield StreamError(
+                                errorText=(
+                                    "Warning: System prompt dropped due to size constraints. "
+                                    "Assistant behavior may be affected."
+                                )
+                            )
+
+    except Exception as e:
+        logger.error(f"Context summarization failed: {e}", exc_info=True)
+        # If we were over the token limit, yield error to user
+        # Don't silently continue with oversized messages that will fail
+        if token_count > 120_000:
            yield StreamError(
                errorText=(
-                    f"Context window management failed: {context_result.error}. "
-                    "Please start a new conversation."
+                    f"Unable to manage context window (token limit exceeded: {token_count} tokens). "
+                    "Context summarization failed. Please start a new conversation."
                )
            )
            yield StreamFinish()
            return
-
-    messages = context_result.messages
-    if context_result.was_compacted:
-        logger.info(
-            f"Context compacted for streaming: {context_result.token_count} tokens"
-        )
+        # Otherwise, continue with original messages (under limit)

    # Loop to handle tool calls and continue conversation
    while True:
@@ -951,6 +1369,14 @@ async def _stream_chat_chunks(
                        :128
                    ]  # OpenRouter limit

+                # Create the stream with proper types
+                from typing import cast
+
+                from openai.types.chat import (
+                    ChatCompletionMessageParam,
+                    ChatCompletionStreamOptionsParam,
+                )
+
                stream = await client.chat.completions.create(
                    model=model,
                    messages=cast(list[ChatCompletionMessageParam], messages),
@@ -1474,36 +1900,17 @@ async def _generate_llm_continuation(
        # Build system prompt
        system_prompt, _ = await _build_system_prompt(user_id)

+        # Build messages in OpenAI format
        messages = session.to_openai_messages()
        if system_prompt:
+            from openai.types.chat import ChatCompletionSystemMessageParam
+
            system_message = ChatCompletionSystemMessageParam(
                role="system",
                content=system_prompt,
            )
            messages = [system_message] + messages

-        # Apply context window management to prevent oversized requests
-        context_result = await _manage_context_window(
-            messages=messages,
-            model=config.model,
-            api_key=config.api_key,
-            base_url=config.base_url,
-        )
-
-        if context_result.error and "System prompt dropped" not in context_result.error:
-            logger.error(
-                f"Context window management failed for session {session_id}: "
-                f"{context_result.error} (tokens={context_result.token_count})"
-            )
-            return
-
-        messages = context_result.messages
-        if context_result.was_compacted:
-            logger.info(
-                f"Context compacted for LLM continuation: "
-                f"{context_result.token_count} tokens"
-            )
-
        # Build extra_body for tracing
        extra_body: dict[str, Any] = {
            "posthogProperties": {
@@ -1516,54 +1923,19 @@ async def _generate_llm_continuation(
        if session_id:
            extra_body["session_id"] = session_id[:128]

-        retry_count = 0
-        last_error: Exception | None = None
-        response = None
+        # Make non-streaming LLM call (no tools - just text response)
+        from typing import cast

-        while retry_count <= MAX_RETRIES:
-            try:
-                logger.info(
-                    f"Generating LLM continuation for session {session_id}"
-                    f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}"
-                )
+        from openai.types.chat import ChatCompletionMessageParam

-                response = await client.chat.completions.create(
-                    model=config.model,
-                    messages=cast(list[ChatCompletionMessageParam], messages),
-                    extra_body=extra_body,
-                )
-                last_error = None  # Clear any previous error on success
-                break  # Success, exit retry loop
-            except Exception as e:
-                last_error = e
-                if _is_retryable_error(e) and retry_count < MAX_RETRIES:
-                    retry_count += 1
-                    delay = min(
-                        BASE_DELAY_SECONDS * (2 ** (retry_count - 1)),
-                        MAX_DELAY_SECONDS,
-                    )
-                    logger.warning(
-                        f"Retryable error in LLM continuation: {e!s}. "
-                        f"Retrying in {delay:.1f}s (attempt {retry_count}/{MAX_RETRIES})"
-                    )
-                    await asyncio.sleep(delay)
-                    continue
-                else:
-                    # Non-retryable error - log and exit gracefully
-                    logger.error(
-                        f"Non-retryable error in LLM continuation: {e!s}",
-                        exc_info=True,
-                    )
-                    return
+        # No tools parameter = text-only response (no tool calls)
+        response = await client.chat.completions.create(
+            model=config.model,
+            messages=cast(list[ChatCompletionMessageParam], messages),
+            extra_body=extra_body,
+        )

-        if last_error:
-            logger.error(
-                f"Max retries ({MAX_RETRIES}) exceeded for LLM continuation. "
-                f"Last error: {last_error!s}"
-            )
-            return
-
-        if response and response.choices and response.choices[0].message.content:
+        if response.choices and response.choices[0].message.content:
            assistant_content = response.choices[0].message.content

            # Reload session from DB to avoid race condition with user messages
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_generator/service.py
@@ -139,10 +139,11 @@ async def decompose_goal_external(
    """
    client = _get_client()

-    if context:
-        description = f"{description}\n\nAdditional context from user:\n{context}"
-
+    # Build the request payload
    payload: dict[str, Any] = {"description": description}
+    if context:
+        # The external service uses user_instruction for additional context
+        payload["user_instruction"] = context
    if library_agents:
        payload["library_agents"] = library_agents

--- a/autogpt_platform/backend/backend/api/features/chat/tools/models.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/models.py
@@ -38,8 +38,6 @@ class ResponseType(str, Enum):
    OPERATION_STARTED = "operation_started"
    OPERATION_PENDING = "operation_pending"
    OPERATION_IN_PROGRESS = "operation_in_progress"
-    # Input validation
-    INPUT_VALIDATION_ERROR = "input_validation_error"


 # Base response model
@@ -70,10 +68,6 @@ class AgentInfo(BaseModel):
    has_external_trigger: bool | None = None
    new_output: bool | None = None
    graph_id: str | None = None
-    inputs: dict[str, Any] | None = Field(
-        default=None,
-        description="Input schema for the agent, including field names, types, and defaults",
-    )


 class AgentsFoundResponse(ToolResponseBase):
@@ -200,20 +194,6 @@ class ErrorResponse(ToolResponseBase):
    details: dict[str, Any] | None = None


-class InputValidationErrorResponse(ToolResponseBase):
-    """Response when run_agent receives unknown input fields."""
-
-    type: ResponseType = ResponseType.INPUT_VALIDATION_ERROR
-    unrecognized_fields: list[str] = Field(
-        description="List of input field names that were not recognized"
-    )
-    inputs: dict[str, Any] = Field(
-        description="The agent's valid input schema for reference"
-    )
-    graph_id: str | None = None
-    graph_version: int | None = None
-
-
 # Agent output models
 class ExecutionOutputInfo(BaseModel):
    """Summary of a single execution's outputs."""
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -30,7 +30,6 @@ from .models import (
    ErrorResponse,
    ExecutionOptions,
    ExecutionStartedResponse,
-    InputValidationErrorResponse,
    SetupInfo,
    SetupRequirementsResponse,
    ToolResponseBase,
@@ -274,22 +273,6 @@ class RunAgentTool(BaseTool):
            input_properties = graph.input_schema.get("properties", {})
            required_fields = set(graph.input_schema.get("required", []))
            provided_inputs = set(params.inputs.keys())
-            valid_fields = set(input_properties.keys())
-
-            # Check for unknown input fields
-            unrecognized_fields = provided_inputs - valid_fields
-            if unrecognized_fields:
-                return InputValidationErrorResponse(
-                    message=(
-                        f"Unknown input field(s) provided: {', '.join(sorted(unrecognized_fields))}. "
-                        f"Agent was not executed. Please use the correct field names from the schema."
-                    ),
-                    session_id=session_id,
-                    unrecognized_fields=sorted(unrecognized_fields),
-                    inputs=graph.input_schema,
-                    graph_id=graph.id,
-                    graph_version=graph.version,
-                )

            # If agent has inputs but none were provided AND use_defaults is not set,
            # always show what's available first so user can decide
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent_test.py
@@ -402,42 +402,3 @@ async def test_run_agent_schedule_without_name(setup_test_data):
    # Should return error about missing schedule_name
    assert result_data.get("type") == "error"
    assert "schedule_name" in result_data["message"].lower()
-
-
-@pytest.mark.asyncio(loop_scope="session")
-async def test_run_agent_rejects_unknown_input_fields(setup_test_data):
-    """Test that run_agent returns input_validation_error for unknown input fields."""
-    user = setup_test_data["user"]
-    store_submission = setup_test_data["store_submission"]
-
-    tool = RunAgentTool()
-    agent_marketplace_id = f"{user.email.split('@')[0]}/{store_submission.slug}"
-    session = make_session(user_id=user.id)
-
-    # Execute with unknown input field names
-    response = await tool.execute(
-        user_id=user.id,
-        session_id=str(uuid.uuid4()),
-        tool_call_id=str(uuid.uuid4()),
-        username_agent_slug=agent_marketplace_id,
-        inputs={
-            "unknown_field": "some value",
-            "another_unknown": "another value",
-        },
-        session=session,
-    )
-
-    assert response is not None
-    assert hasattr(response, "output")
-    assert isinstance(response.output, str)
-    result_data = orjson.loads(response.output)
-
-    # Should return input_validation_error type with unrecognized fields
-    assert result_data.get("type") == "input_validation_error"
-    assert "unrecognized_fields" in result_data
-    assert set(result_data["unrecognized_fields"]) == {
-        "another_unknown",
-        "unknown_field",
-    }
-    assert "inputs" in result_data  # Contains the valid schema
-    assert "Agent was not executed" in result_data["message"]
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -5,8 +5,6 @@ import uuid
 from collections import defaultdict
 from typing import Any

-from pydantic_core import PydanticUndefined
-
 from backend.api.features.chat.model import ChatSession
 from backend.data.block import get_block
 from backend.data.execution import ExecutionContext
@@ -77,22 +75,15 @@ class RunBlockTool(BaseTool):
        self,
        user_id: str,
        block: Any,
-        input_data: dict[str, Any] | None = None,
    ) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
        """
        Check if user has required credentials for a block.

-        Args:
-            user_id: User ID
-            block: Block to check credentials for
-            input_data: Input data for the block (used to determine provider via discriminator)
-
        Returns:
            tuple[matched_credentials, missing_credentials]
        """
        matched_credentials: dict[str, CredentialsMetaInput] = {}
        missing_credentials: list[CredentialsMetaInput] = []
-        input_data = input_data or {}

        # Get credential field info from block's input schema
        credentials_fields_info = block.input_schema.get_credentials_fields_info()
@@ -105,33 +96,14 @@ class RunBlockTool(BaseTool):
        available_creds = await creds_manager.store.get_all_creds(user_id)

        for field_name, field_info in credentials_fields_info.items():
-            effective_field_info = field_info
-            if field_info.discriminator and field_info.discriminator_mapping:
-                # Get discriminator from input, falling back to schema default
-                discriminator_value = input_data.get(field_info.discriminator)
-                if discriminator_value is None:
-                    field = block.input_schema.model_fields.get(
-                        field_info.discriminator
-                    )
-                    if field and field.default is not PydanticUndefined:
-                        discriminator_value = field.default
-
-                if (
-                    discriminator_value
-                    and discriminator_value in field_info.discriminator_mapping
-                ):
-                    effective_field_info = field_info.discriminate(discriminator_value)
-                    logger.debug(
-                        f"Discriminated provider for {field_name}: "
-                        f"{discriminator_value} -> {effective_field_info.provider}"
-                    )
-
+            # field_info.provider is a frozenset of acceptable providers
+            # field_info.supported_types is a frozenset of acceptable types
            matching_cred = next(
                (
                    cred
                    for cred in available_creds
-                    if cred.provider in effective_field_info.provider
-                    and cred.type in effective_field_info.supported_types
+                    if cred.provider in field_info.provider
+                    and cred.type in field_info.supported_types
                ),
                None,
            )
@@ -145,8 +117,8 @@ class RunBlockTool(BaseTool):
                )
            else:
                # Create a placeholder for the missing credential
-                provider = next(iter(effective_field_info.provider), "unknown")
-                cred_type = next(iter(effective_field_info.supported_types), "api_key")
+                provider = next(iter(field_info.provider), "unknown")
+                cred_type = next(iter(field_info.supported_types), "api_key")
                missing_credentials.append(
                    CredentialsMetaInput(
                        id=field_name,
@@ -214,9 +186,10 @@ class RunBlockTool(BaseTool):

        logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")

+        # Check credentials
        creds_manager = IntegrationCredentialsManager()
        matched_credentials, missing_credentials = await self._check_block_credentials(
-            user_id, block, input_data
+            user_id, block
        )

        if missing_credentials:
--- a/autogpt_platform/backend/backend/api/ws_api.py
+++ b/autogpt_platform/backend/backend/api/ws_api.py
@@ -66,24 +66,18 @@ async def event_broadcaster(manager: ConnectionManager):
    execution_bus = AsyncRedisExecutionEventBus()
    notification_bus = AsyncRedisNotificationEventBus()

-    try:
+    async def execution_worker():
+        async for event in execution_bus.listen("*"):
+            await manager.send_execution_update(event)

-        async def execution_worker():
-            async for event in execution_bus.listen("*"):
-                await manager.send_execution_update(event)
+    async def notification_worker():
+        async for notification in notification_bus.listen("*"):
+            await manager.send_notification(
+                user_id=notification.user_id,
+                payload=notification.payload,
+            )

-        async def notification_worker():
-            async for notification in notification_bus.listen("*"):
-                await manager.send_notification(
-                    user_id=notification.user_id,
-                    payload=notification.payload,
-                )
-
-        await asyncio.gather(execution_worker(), notification_worker())
-    finally:
-        # Ensure PubSub connections are closed on any exit to prevent leaks
-        await execution_bus.close()
-        await notification_bus.close()
+    await asyncio.gather(execution_worker(), notification_worker())


 async def authenticate_websocket(websocket: WebSocket) -> str:
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -32,7 +32,7 @@ from backend.data.model import (
 from backend.integrations.providers import ProviderName
 from backend.util import json
 from backend.util.logging import TruncatedLogger
-from backend.util.prompt import compress_context, estimate_token_count
+from backend.util.prompt import compress_prompt, estimate_token_count
 from backend.util.text import TextFormatter

 logger = TruncatedLogger(logging.getLogger(__name__), "[LLM-Block]")
@@ -634,18 +634,11 @@ async def llm_call(
    context_window = llm_model.context_window

    if compress_prompt_to_fit:
-        result = await compress_context(
+        prompt = compress_prompt(
            messages=prompt,
            target_tokens=llm_model.context_window // 2,
-            client=None,  # Truncation-only, no LLM summarization
-            reserve=0,  # Caller handles response token budget separately
+            lossy_ok=True,
        )
-        if result.error:
-            logger.warning(
-                f"Prompt compression did not meet target: {result.error}. "
-                f"Proceeding with {result.token_count} tokens."
-            )
-        prompt = result.messages

    # Calculate available tokens based on context window and input length
    estimated_input_tokens = estimate_token_count(prompt)
--- a/autogpt_platform/backend/backend/data/event_bus.py
+++ b/autogpt_platform/backend/backend/data/event_bus.py
@@ -133,23 +133,10 @@ class RedisEventBus(BaseRedisEventBus[M], ABC):


 class AsyncRedisEventBus(BaseRedisEventBus[M], ABC):
-    def __init__(self):
-        self._pubsub: AsyncPubSub | None = None
-
    @property
    async def connection(self) -> redis.AsyncRedis:
        return await redis.get_redis_async()

-    async def close(self) -> None:
-        """Close the PubSub connection if it exists."""
-        if self._pubsub is not None:
-            try:
-                await self._pubsub.close()
-            except Exception:
-                logger.warning("Failed to close PubSub connection", exc_info=True)
-            finally:
-                self._pubsub = None
-
    async def publish_event(self, event: M, channel_key: str):
        """
        Publish an event to Redis. Gracefully handles connection failures
@@ -170,7 +157,6 @@ class AsyncRedisEventBus(BaseRedisEventBus[M], ABC):
            await self.connection, channel_key
        )
        assert isinstance(pubsub, AsyncPubSub)
-        self._pubsub = pubsub

        if "*" in channel_key:
            await pubsub.psubscribe(full_channel_name)
--- a/autogpt_platform/backend/backend/executor/database.py
+++ b/autogpt_platform/backend/backend/executor/database.py
@@ -17,7 +17,6 @@ from backend.data.analytics import (
    get_accuracy_trends_and_alerts,
    get_marketplace_graphs_for_monitoring,
 )
-from backend.data.auth.oauth import cleanup_expired_oauth_tokens
 from backend.data.credit import UsageTransactionMetadata, get_user_credit_model
 from backend.data.execution import (
    create_graph_execution,
@@ -220,9 +219,6 @@ class DatabaseManager(AppService):
    # Onboarding
    increment_onboarding_runs = _(increment_onboarding_runs)

-    # OAuth
-    cleanup_expired_oauth_tokens = _(cleanup_expired_oauth_tokens)
-
    # Store
    get_store_agents = _(get_store_agents)
    get_store_agent_details = _(get_store_agent_details)
@@ -353,9 +349,6 @@ class DatabaseManagerAsyncClient(AppServiceClient):
    # Onboarding
    increment_onboarding_runs = d.increment_onboarding_runs

-    # OAuth
-    cleanup_expired_oauth_tokens = d.cleanup_expired_oauth_tokens
-
    # Store
    get_store_agents = d.get_store_agents
    get_store_agent_details = d.get_store_agent_details
--- a/autogpt_platform/backend/backend/executor/scheduler.py
+++ b/autogpt_platform/backend/backend/executor/scheduler.py
@@ -24,9 +24,11 @@ from dotenv import load_dotenv
 from pydantic import BaseModel, Field, ValidationError
 from sqlalchemy import MetaData, create_engine

+from backend.data.auth.oauth import cleanup_expired_oauth_tokens
 from backend.data.block import BlockInput
 from backend.data.execution import GraphExecutionWithNodes
 from backend.data.model import CredentialsMetaInput
+from backend.data.onboarding import increment_onboarding_runs
 from backend.executor import utils as execution_utils
 from backend.monitoring import (
    NotificationJobArgs,
@@ -36,11 +38,7 @@ from backend.monitoring import (
    report_execution_accuracy_alerts,
    report_late_executions,
 )
-from backend.util.clients import (
-    get_database_manager_async_client,
-    get_database_manager_client,
-    get_scheduler_client,
-)
+from backend.util.clients import get_database_manager_client, get_scheduler_client
 from backend.util.cloud_storage import cleanup_expired_files_async
 from backend.util.exceptions import (
    GraphNotFoundError,
@@ -150,7 +148,6 @@ def execute_graph(**kwargs):
 async def _execute_graph(**kwargs):
    args = GraphExecutionJobArgs(**kwargs)
    start_time = asyncio.get_event_loop().time()
-    db = get_database_manager_async_client()
    try:
        logger.info(f"Executing recurring job for graph #{args.graph_id}")
        graph_exec: GraphExecutionWithNodes = await execution_utils.add_graph_execution(
@@ -160,7 +157,7 @@ async def _execute_graph(**kwargs):
            inputs=args.input_data,
            graph_credentials_inputs=args.input_credentials,
        )
-        await db.increment_onboarding_runs(args.user_id)
+        await increment_onboarding_runs(args.user_id)
        elapsed = asyncio.get_event_loop().time() - start_time
        logger.info(
            f"Graph execution started with ID {graph_exec.id} for graph {args.graph_id} "
@@ -249,13 +246,8 @@ def cleanup_expired_files():

 def cleanup_oauth_tokens():
    """Clean up expired OAuth tokens from the database."""
-
    # Wait for completion
-    async def _cleanup():
-        db = get_database_manager_async_client()
-        return await db.cleanup_expired_oauth_tokens()
-
-    run_async(_cleanup())
+    run_async(cleanup_expired_oauth_tokens())


 def execution_accuracy_alerts():
--- a/autogpt_platform/backend/backend/util/prompt.py
+++ b/autogpt_platform/backend/backend/util/prompt.py
@@ -1,19 +1,10 @@
-from __future__ import annotations
-
-import logging
 from copy import deepcopy
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
+from typing import Any

 from tiktoken import encoding_for_model

 from backend.util import json

-if TYPE_CHECKING:
-    from openai import AsyncOpenAI
-
-logger = logging.getLogger(__name__)
-
 # ---------------------------------------------------------------------------#
 #  CONSTANTS                                                                 #
 # ---------------------------------------------------------------------------#
@@ -109,17 +100,9 @@ def _is_objective_message(msg: dict) -> bool:
 def _truncate_tool_message_content(msg: dict, enc, max_tokens: int) -> None:
    """
    Carefully truncate tool message content while preserving tool structure.
-    Handles both Anthropic-style (list content) and OpenAI-style (string content) tool messages.
+    Only truncates tool_result content, leaves tool_use intact.
    """
    content = msg.get("content")
-
-    # OpenAI-style tool message: role="tool" with string content
-    if msg.get("role") == "tool" and isinstance(content, str):
-        if _tok_len(content, enc) > max_tokens:
-            msg["content"] = _truncate_middle_tokens(content, enc, max_tokens)
-        return
-
-    # Anthropic-style: list content with tool_result items
    if not isinstance(content, list):
        return

@@ -157,6 +140,141 @@ def _truncate_middle_tokens(text: str, enc, max_tok: int) -> str:
 # ---------------------------------------------------------------------------#


+def compress_prompt(
+    messages: list[dict],
+    target_tokens: int,
+    *,
+    model: str = "gpt-4o",
+    reserve: int = 2_048,
+    start_cap: int = 8_192,
+    floor_cap: int = 128,
+    lossy_ok: bool = True,
+) -> list[dict]:
+    """
+    Shrink *messages* so that::
+
+        token_count(prompt) + reserve  ≤  target_tokens
+
+    Strategy
+    --------
+    1. **Token-aware truncation** – progressively halve a per-message cap
+       (`start_cap`, `start_cap/2`, … `floor_cap`) and apply it to the
+       *content* of every message except the first and last.  Tool shells
+       are included: we keep the envelope but shorten huge payloads.
+    2. **Middle-out deletion** – if still over the limit, delete whole
+       messages working outward from the centre, **skipping** any message
+       that contains ``tool_calls`` or has ``role == "tool"``.
+    3. **Last-chance trim** – if still too big, truncate the *first* and
+       *last* message bodies down to `floor_cap` tokens.
+    4. If the prompt is *still* too large:
+         • raise ``ValueError``      when ``lossy_ok == False`` (default)
+         • return the partially-trimmed prompt when ``lossy_ok == True``
+
+    Parameters
+    ----------
+    messages        Complete chat history (will be deep-copied).
+    model           Model name; passed to tiktoken to pick the right
+                    tokenizer (gpt-4o → 'o200k_base', others fallback).
+    target_tokens   Hard ceiling for prompt size **excluding** the model's
+                    forthcoming answer.
+    reserve         How many tokens you want to leave available for that
+                    answer (`max_tokens` in your subsequent completion call).
+    start_cap       Initial per-message truncation ceiling (tokens).
+    floor_cap       Lowest cap we'll accept before moving to deletions.
+    lossy_ok        If *True* return best-effort prompt instead of raising
+                    after all trim passes have been exhausted.
+
+    Returns
+    -------
+    list[dict]  – A *new* messages list that abides by the rules above.
+    """
+    enc = encoding_for_model(model)  # best-match tokenizer
+    msgs = deepcopy(messages)  # never mutate caller
+
+    def total_tokens() -> int:
+        """Current size of *msgs* in tokens."""
+        return sum(_msg_tokens(m, enc) for m in msgs)
+
+    original_token_count = total_tokens()
+
+    if original_token_count + reserve <= target_tokens:
+        return msgs
+
+    # ---- STEP 0 : normalise content --------------------------------------
+    # Convert non-string payloads to strings so token counting is coherent.
+    for i, m in enumerate(msgs):
+        if not isinstance(m.get("content"), str) and m.get("content") is not None:
+            if _is_tool_message(m):
+                continue
+
+            # Keep first and last messages intact (unless they're tool messages)
+            if i == 0 or i == len(msgs) - 1:
+                continue
+
+            # Reasonable 20k-char ceiling prevents pathological blobs
+            content_str = json.dumps(m["content"], separators=(",", ":"))
+            if len(content_str) > 20_000:
+                content_str = _truncate_middle_tokens(content_str, enc, 20_000)
+            m["content"] = content_str
+
+    # ---- STEP 1 : token-aware truncation ---------------------------------
+    cap = start_cap
+    while total_tokens() + reserve > target_tokens and cap >= floor_cap:
+        for m in msgs[1:-1]:  # keep first & last intact
+            if _is_tool_message(m):
+                # For tool messages, only truncate tool result content, preserve structure
+                _truncate_tool_message_content(m, enc, cap)
+                continue
+
+            if _is_objective_message(m):
+                # Never truncate objective messages - they contain the core task
+                continue
+
+            content = m.get("content") or ""
+            if _tok_len(content, enc) > cap:
+                m["content"] = _truncate_middle_tokens(content, enc, cap)
+        cap //= 2  # tighten the screw
+
+    # ---- STEP 2 : middle-out deletion -----------------------------------
+    while total_tokens() + reserve > target_tokens and len(msgs) > 2:
+        # Identify all deletable messages (not first/last, not tool messages, not objective messages)
+        deletable_indices = []
+        for i in range(1, len(msgs) - 1):  # Skip first and last
+            if not _is_tool_message(msgs[i]) and not _is_objective_message(msgs[i]):
+                deletable_indices.append(i)
+
+        if not deletable_indices:
+            break  # nothing more we can drop
+
+        # Delete from center outward - find the index closest to center
+        centre = len(msgs) // 2
+        to_delete = min(deletable_indices, key=lambda i: abs(i - centre))
+        del msgs[to_delete]
+
+    # ---- STEP 3 : final safety-net trim on first & last ------------------
+    cap = start_cap
+    while total_tokens() + reserve > target_tokens and cap >= floor_cap:
+        for idx in (0, -1):  # first and last
+            if _is_tool_message(msgs[idx]):
+                # For tool messages at first/last position, truncate tool result content only
+                _truncate_tool_message_content(msgs[idx], enc, cap)
+                continue
+
+            text = msgs[idx].get("content") or ""
+            if _tok_len(text, enc) > cap:
+                msgs[idx]["content"] = _truncate_middle_tokens(text, enc, cap)
+        cap //= 2  # tighten the screw
+
+    # ---- STEP 4 : success or fail-gracefully -----------------------------
+    if total_tokens() + reserve > target_tokens and not lossy_ok:
+        raise ValueError(
+            "compress_prompt: prompt still exceeds budget "
+            f"({total_tokens() + reserve} > {target_tokens})."
+        )
+
+    return msgs
+
+
 def estimate_token_count(
    messages: list[dict],
    *,
@@ -175,8 +293,7 @@ def estimate_token_count(
    -------
    int  – Token count.
    """
-    token_model = _normalize_model_for_tokenizer(model)
-    enc = encoding_for_model(token_model)
+    enc = encoding_for_model(model)  # best-match tokenizer
    return sum(_msg_tokens(m, enc) for m in messages)


@@ -198,543 +315,6 @@ def estimate_token_count_str(
    -------
    int  – Token count.
    """
-    token_model = _normalize_model_for_tokenizer(model)
-    enc = encoding_for_model(token_model)
+    enc = encoding_for_model(model)  # best-match tokenizer
    text = json.dumps(text) if not isinstance(text, str) else text
    return _tok_len(text, enc)
-
-
-# ---------------------------------------------------------------------------#
-#  UNIFIED CONTEXT COMPRESSION                                               #
-# ---------------------------------------------------------------------------#
-
-# Default thresholds
-DEFAULT_TOKEN_THRESHOLD = 120_000
-DEFAULT_KEEP_RECENT = 15
-
-
-@dataclass
-class CompressResult:
-    """Result of context compression."""
-
-    messages: list[dict]
-    token_count: int
-    was_compacted: bool
-    error: str | None = None
-    original_token_count: int = 0
-    messages_summarized: int = 0
-    messages_dropped: int = 0
-
-
-def _normalize_model_for_tokenizer(model: str) -> str:
-    """Normalize model name for tiktoken tokenizer selection."""
-    if "/" in model:
-        model = model.split("/")[-1]
-    if "claude" in model.lower() or not any(
-        known in model.lower() for known in ["gpt", "o1", "chatgpt", "text-"]
-    ):
-        return "gpt-4o"
-    return model
-
-
-def _extract_tool_call_ids_from_message(msg: dict) -> set[str]:
-    """
-    Extract tool_call IDs from an assistant message.
-
-    Supports both formats:
-    - OpenAI: {"role": "assistant", "tool_calls": [{"id": "..."}]}
-    - Anthropic: {"role": "assistant", "content": [{"type": "tool_use", "id": "..."}]}
-
-    Returns:
-        Set of tool_call IDs found in the message.
-    """
-    ids: set[str] = set()
-    if msg.get("role") != "assistant":
-        return ids
-
-    # OpenAI format: tool_calls array
-    if msg.get("tool_calls"):
-        for tc in msg["tool_calls"]:
-            tc_id = tc.get("id")
-            if tc_id:
-                ids.add(tc_id)
-
-    # Anthropic format: content list with tool_use blocks
-    content = msg.get("content")
-    if isinstance(content, list):
-        for block in content:
-            if isinstance(block, dict) and block.get("type") == "tool_use":
-                tc_id = block.get("id")
-                if tc_id:
-                    ids.add(tc_id)
-
-    return ids
-
-
-def _extract_tool_response_ids_from_message(msg: dict) -> set[str]:
-    """
-    Extract tool_call IDs that this message is responding to.
-
-    Supports both formats:
-    - OpenAI: {"role": "tool", "tool_call_id": "..."}
-    - Anthropic: {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "..."}]}
-
-    Returns:
-        Set of tool_call IDs this message responds to.
-    """
-    ids: set[str] = set()
-
-    # OpenAI format: role=tool with tool_call_id
-    if msg.get("role") == "tool":
-        tc_id = msg.get("tool_call_id")
-        if tc_id:
-            ids.add(tc_id)
-
-    # Anthropic format: content list with tool_result blocks
-    content = msg.get("content")
-    if isinstance(content, list):
-        for block in content:
-            if isinstance(block, dict) and block.get("type") == "tool_result":
-                tc_id = block.get("tool_use_id")
-                if tc_id:
-                    ids.add(tc_id)
-
-    return ids
-
-
-def _is_tool_response_message(msg: dict) -> bool:
-    """Check if message is a tool response (OpenAI or Anthropic format)."""
-    # OpenAI format
-    if msg.get("role") == "tool":
-        return True
-    # Anthropic format
-    content = msg.get("content")
-    if isinstance(content, list):
-        for block in content:
-            if isinstance(block, dict) and block.get("type") == "tool_result":
-                return True
-    return False
-
-
-def _remove_orphan_tool_responses(
-    messages: list[dict], orphan_ids: set[str]
-) -> list[dict]:
-    """
-    Remove tool response messages/blocks that reference orphan tool_call IDs.
-
-    Supports both OpenAI and Anthropic formats.
-    For Anthropic messages with mixed valid/orphan tool_result blocks,
-    filters out only the orphan blocks instead of dropping the entire message.
-    """
-    result = []
-    for msg in messages:
-        # OpenAI format: role=tool - drop entire message if orphan
-        if msg.get("role") == "tool":
-            tc_id = msg.get("tool_call_id")
-            if tc_id and tc_id in orphan_ids:
-                continue
-            result.append(msg)
-            continue
-
-        # Anthropic format: content list may have mixed tool_result blocks
-        content = msg.get("content")
-        if isinstance(content, list):
-            has_tool_results = any(
-                isinstance(b, dict) and b.get("type") == "tool_result" for b in content
-            )
-            if has_tool_results:
-                # Filter out orphan tool_result blocks, keep valid ones
-                filtered_content = [
-                    block
-                    for block in content
-                    if not (
-                        isinstance(block, dict)
-                        and block.get("type") == "tool_result"
-                        and block.get("tool_use_id") in orphan_ids
-                    )
-                ]
-                # Only keep message if it has remaining content
-                if filtered_content:
-                    msg = msg.copy()
-                    msg["content"] = filtered_content
-                    result.append(msg)
-                continue
-
-        result.append(msg)
-    return result
-
-
-def _ensure_tool_pairs_intact(
-    recent_messages: list[dict],
-    all_messages: list[dict],
-    start_index: int,
-) -> list[dict]:
-    """
-    Ensure tool_call/tool_response pairs stay together after slicing.
-
-    When slicing messages for context compaction, a naive slice can separate
-    an assistant message containing tool_calls from its corresponding tool
-    response messages. This causes API validation errors (e.g., Anthropic's
-    "unexpected tool_use_id found in tool_result blocks").
-
-    This function checks for orphan tool responses in the slice and extends
-    backwards to include their corresponding assistant messages.
-
-    Supports both formats:
-    - OpenAI: tool_calls array + role="tool" responses
-    - Anthropic: tool_use blocks + tool_result blocks
-
-    Args:
-        recent_messages: The sliced messages to validate
-        all_messages: The complete message list (for looking up missing assistants)
-        start_index: The index in all_messages where recent_messages begins
-
-    Returns:
-        A potentially extended list of messages with tool pairs intact
-    """
-    if not recent_messages:
-        return recent_messages
-
-    # Collect all tool_call_ids from assistant messages in the slice
-    available_tool_call_ids: set[str] = set()
-    for msg in recent_messages:
-        available_tool_call_ids |= _extract_tool_call_ids_from_message(msg)
-
-    # Find orphan tool responses (responses whose tool_call_id is missing)
-    orphan_tool_call_ids: set[str] = set()
-    for msg in recent_messages:
-        response_ids = _extract_tool_response_ids_from_message(msg)
-        for tc_id in response_ids:
-            if tc_id not in available_tool_call_ids:
-                orphan_tool_call_ids.add(tc_id)
-
-    if not orphan_tool_call_ids:
-        # No orphans, slice is valid
-        return recent_messages
-
-    # Find the assistant messages that contain the orphan tool_call_ids
-    # Search backwards from start_index in all_messages
-    messages_to_prepend: list[dict] = []
-    for i in range(start_index - 1, -1, -1):
-        msg = all_messages[i]
-        msg_tool_ids = _extract_tool_call_ids_from_message(msg)
-        if msg_tool_ids & orphan_tool_call_ids:
-            # This assistant message has tool_calls we need
-            # Also collect its contiguous tool responses that follow it
-            assistant_and_responses: list[dict] = [msg]
-
-            # Scan forward from this assistant to collect tool responses
-            for j in range(i + 1, start_index):
-                following_msg = all_messages[j]
-                following_response_ids = _extract_tool_response_ids_from_message(
-                    following_msg
-                )
-                if following_response_ids and following_response_ids & msg_tool_ids:
-                    assistant_and_responses.append(following_msg)
-                elif not _is_tool_response_message(following_msg):
-                    # Stop at first non-tool-response message
-                    break
-
-            # Prepend the assistant and its tool responses (maintain order)
-            messages_to_prepend = assistant_and_responses + messages_to_prepend
-            # Mark these as found
-            orphan_tool_call_ids -= msg_tool_ids
-            # Also add this assistant's tool_call_ids to available set
-            available_tool_call_ids |= msg_tool_ids
-
-        if not orphan_tool_call_ids:
-            # Found all missing assistants
-            break
-
-    if orphan_tool_call_ids:
-        # Some tool_call_ids couldn't be resolved - remove those tool responses
-        # This shouldn't happen in normal operation but handles edge cases
-        logger.warning(
-            f"Could not find assistant messages for tool_call_ids: {orphan_tool_call_ids}. "
-            "Removing orphan tool responses."
-        )
-        recent_messages = _remove_orphan_tool_responses(
-            recent_messages, orphan_tool_call_ids
-        )
-
-    if messages_to_prepend:
-        logger.info(
-            f"Extended recent messages by {len(messages_to_prepend)} to preserve "
-            f"tool_call/tool_response pairs"
-        )
-        return messages_to_prepend + recent_messages
-
-    return recent_messages
-
-
-async def _summarize_messages_llm(
-    messages: list[dict],
-    client: AsyncOpenAI,
-    model: str,
-    timeout: float = 30.0,
-) -> str:
-    """Summarize messages using an LLM."""
-    conversation = []
-    for msg in messages:
-        role = msg.get("role", "")
-        content = msg.get("content", "")
-        if content and role in ("user", "assistant", "tool"):
-            conversation.append(f"{role.upper()}: {content}")
-
-    conversation_text = "\n\n".join(conversation)
-
-    if not conversation_text:
-        return "No conversation history available."
-
-    # Limit to ~100k chars for safety
-    MAX_CHARS = 100_000
-    if len(conversation_text) > MAX_CHARS:
-        conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]"
-
-    response = await client.with_options(timeout=timeout).chat.completions.create(
-        model=model,
-        messages=[
-            {
-                "role": "system",
-                "content": (
-                    "Create a detailed summary of the conversation so far. "
-                    "This summary will be used as context when continuing the conversation.\n\n"
-                    "Before writing the summary, analyze each message chronologically to identify:\n"
-                    "- User requests and their explicit goals\n"
-                    "- Your approach and key decisions made\n"
-                    "- Technical specifics (file names, tool outputs, function signatures)\n"
-                    "- Errors encountered and resolutions applied\n\n"
-                    "You MUST include ALL of the following sections:\n\n"
-                    "## 1. Primary Request and Intent\n"
-                    "The user's explicit goals and what they are trying to accomplish.\n\n"
-                    "## 2. Key Technical Concepts\n"
-                    "Technologies, frameworks, tools, and patterns being used or discussed.\n\n"
-                    "## 3. Files and Resources Involved\n"
-                    "Specific files examined or modified, with relevant snippets and identifiers.\n\n"
-                    "## 4. Errors and Fixes\n"
-                    "Problems encountered, error messages, and their resolutions. "
-                    "Include any user feedback on fixes.\n\n"
-                    "## 5. Problem Solving\n"
-                    "Issues that have been resolved and how they were addressed.\n\n"
-                    "## 6. All User Messages\n"
-                    "A complete list of all user inputs (excluding tool outputs) to preserve their exact requests.\n\n"
-                    "## 7. Pending Tasks\n"
-                    "Work items the user explicitly requested that have not yet been completed.\n\n"
-                    "## 8. Current Work\n"
-                    "Precise description of what was being worked on most recently, including relevant context.\n\n"
-                    "## 9. Next Steps\n"
-                    "What should happen next, aligned with the user's most recent requests. "
-                    "Include verbatim quotes of recent instructions if relevant."
-                ),
-            },
-            {"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
-        ],
-        max_tokens=1500,
-        temperature=0.3,
-    )
-
-    return response.choices[0].message.content or "No summary available."
-
-
-async def compress_context(
-    messages: list[dict],
-    target_tokens: int = DEFAULT_TOKEN_THRESHOLD,
-    *,
-    model: str = "gpt-4o",
-    client: AsyncOpenAI | None = None,
-    keep_recent: int = DEFAULT_KEEP_RECENT,
-    reserve: int = 2_048,
-    start_cap: int = 8_192,
-    floor_cap: int = 128,
-) -> CompressResult:
-    """
-    Unified context compression that combines summarization and truncation strategies.
-
-    Strategy (in order):
-    1. **LLM summarization** – If client provided, summarize old messages into a
-       single context message while keeping recent messages intact. This is the
-       primary strategy for chat service.
-    2. **Content truncation** – Progressively halve a per-message cap and truncate
-       bloated message content (tool outputs, large pastes). Preserves all messages
-       but shortens their content. Primary strategy when client=None (LLM blocks).
-    3. **Middle-out deletion** – Delete whole messages one at a time from the center
-       outward, skipping tool messages and objective messages.
-    4. **First/last trim** – Truncate first and last message content as last resort.
-
-    Parameters
-    ----------
-    messages        Complete chat history (will be deep-copied).
-    target_tokens   Hard ceiling for prompt size.
-    model           Model name for tokenization and summarization.
-    client          AsyncOpenAI client. If provided, enables LLM summarization
-                    as the first strategy. If None, skips to truncation strategies.
-    keep_recent     Number of recent messages to preserve during summarization.
-    reserve         Tokens to reserve for model response.
-    start_cap       Initial per-message truncation ceiling (tokens).
-    floor_cap       Lowest cap before moving to deletions.
-
-    Returns
-    -------
-    CompressResult with compressed messages and metadata.
-    """
-    # Guard clause for empty messages
-    if not messages:
-        return CompressResult(
-            messages=[],
-            token_count=0,
-            was_compacted=False,
-            original_token_count=0,
-        )
-
-    token_model = _normalize_model_for_tokenizer(model)
-    enc = encoding_for_model(token_model)
-    msgs = deepcopy(messages)
-
-    def total_tokens() -> int:
-        return sum(_msg_tokens(m, enc) for m in msgs)
-
-    original_count = total_tokens()
-
-    # Already under limit
-    if original_count + reserve <= target_tokens:
-        return CompressResult(
-            messages=msgs,
-            token_count=original_count,
-            was_compacted=False,
-            original_token_count=original_count,
-        )
-
-    messages_summarized = 0
-    messages_dropped = 0
-
-    # ---- STEP 1: LLM summarization (if client provided) -------------------
-    # This is the primary compression strategy for chat service.
-    # Summarize old messages while keeping recent ones intact.
-    if client is not None:
-        has_system = len(msgs) > 0 and msgs[0].get("role") == "system"
-        system_msg = msgs[0] if has_system else None
-
-        # Calculate old vs recent messages
-        if has_system:
-            if len(msgs) > keep_recent + 1:
-                old_msgs = msgs[1:-keep_recent]
-                recent_msgs = msgs[-keep_recent:]
-            else:
-                old_msgs = []
-                recent_msgs = msgs[1:] if len(msgs) > 1 else []
-        else:
-            if len(msgs) > keep_recent:
-                old_msgs = msgs[:-keep_recent]
-                recent_msgs = msgs[-keep_recent:]
-            else:
-                old_msgs = []
-                recent_msgs = msgs
-
-        # Ensure tool pairs stay intact
-        slice_start = max(0, len(msgs) - keep_recent)
-        recent_msgs = _ensure_tool_pairs_intact(recent_msgs, msgs, slice_start)
-
-        if old_msgs:
-            try:
-                summary_text = await _summarize_messages_llm(old_msgs, client, model)
-                summary_msg = {
-                    "role": "assistant",
-                    "content": f"[Previous conversation summary — for context only]: {summary_text}",
-                }
-                messages_summarized = len(old_msgs)
-
-                if has_system:
-                    msgs = [system_msg, summary_msg] + recent_msgs
-                else:
-                    msgs = [summary_msg] + recent_msgs
-
-                logger.info(
-                    f"Context summarized: {original_count} -> {total_tokens()} tokens, "
-                    f"summarized {messages_summarized} messages"
-                )
-            except Exception as e:
-                logger.warning(f"Summarization failed, continuing with truncation: {e}")
-                # Fall through to content truncation
-
-    # ---- STEP 2: Normalize content ----------------------------------------
-    # Convert non-string payloads to strings so token counting is coherent.
-    # Always run this before truncation to ensure consistent token counting.
-    for i, m in enumerate(msgs):
-        if not isinstance(m.get("content"), str) and m.get("content") is not None:
-            if _is_tool_message(m):
-                continue
-            if i == 0 or i == len(msgs) - 1:
-                continue
-            content_str = json.dumps(m["content"], separators=(",", ":"))
-            if len(content_str) > 20_000:
-                content_str = _truncate_middle_tokens(content_str, enc, 20_000)
-            m["content"] = content_str
-
-    # ---- STEP 3: Token-aware content truncation ---------------------------
-    # Progressively halve per-message cap and truncate bloated content.
-    # This preserves all messages but shortens their content.
-    cap = start_cap
-    while total_tokens() + reserve > target_tokens and cap >= floor_cap:
-        for m in msgs[1:-1]:
-            if _is_tool_message(m):
-                _truncate_tool_message_content(m, enc, cap)
-                continue
-            if _is_objective_message(m):
-                continue
-            content = m.get("content") or ""
-            if _tok_len(content, enc) > cap:
-                m["content"] = _truncate_middle_tokens(content, enc, cap)
-        cap //= 2
-
-    # ---- STEP 4: Middle-out deletion --------------------------------------
-    # Delete messages one at a time from the center outward.
-    # This is more granular than dropping all old messages at once.
-    while total_tokens() + reserve > target_tokens and len(msgs) > 2:
-        deletable: list[int] = []
-        for i in range(1, len(msgs) - 1):
-            msg = msgs[i]
-            if (
-                msg is not None
-                and not _is_tool_message(msg)
-                and not _is_objective_message(msg)
-            ):
-                deletable.append(i)
-        if not deletable:
-            break
-        centre = len(msgs) // 2
-        to_delete = min(deletable, key=lambda i: abs(i - centre))
-        del msgs[to_delete]
-        messages_dropped += 1
-
-    # ---- STEP 5: Final trim on first/last ---------------------------------
-    cap = start_cap
-    while total_tokens() + reserve > target_tokens and cap >= floor_cap:
-        for idx in (0, -1):
-            msg = msgs[idx]
-            if msg is None:
-                continue
-            if _is_tool_message(msg):
-                _truncate_tool_message_content(msg, enc, cap)
-                continue
-            text = msg.get("content") or ""
-            if _tok_len(text, enc) > cap:
-                msg["content"] = _truncate_middle_tokens(text, enc, cap)
-        cap //= 2
-
-    # Filter out any None values that may have been introduced
-    final_msgs: list[dict] = [m for m in msgs if m is not None]
-    final_count = sum(_msg_tokens(m, enc) for m in final_msgs)
-    error = None
-    if final_count + reserve > target_tokens:
-        error = f"Could not compress below target ({final_count + reserve} > {target_tokens})"
-        logger.warning(error)
-
-    return CompressResult(
-        messages=final_msgs,
-        token_count=final_count,
-        was_compacted=True,
-        error=error,
-        original_token_count=original_count,
-        messages_summarized=messages_summarized,
-        messages_dropped=messages_dropped,
-    )
--- a/autogpt_platform/backend/backend/util/prompt_test.py
+++ b/autogpt_platform/backend/backend/util/prompt_test.py
@@ -1,21 +1,10 @@
 """Tests for prompt utility functions, especially tool call token counting."""

-from unittest.mock import AsyncMock, MagicMock
-
 import pytest
 from tiktoken import encoding_for_model

 from backend.util import json
-from backend.util.prompt import (
-    CompressResult,
-    _ensure_tool_pairs_intact,
-    _msg_tokens,
-    _normalize_model_for_tokenizer,
-    _truncate_middle_tokens,
-    _truncate_tool_message_content,
-    compress_context,
-    estimate_token_count,
-)
+from backend.util.prompt import _msg_tokens, estimate_token_count


 class TestMsgTokens:
@@ -287,690 +276,3 @@ class TestEstimateTokenCount:

        assert total_tokens == expected_total
        assert total_tokens > 20  # Should be substantial
-
-
-class TestNormalizeModelForTokenizer:
-    """Test model name normalization for tiktoken."""
-
-    def test_openai_models_unchanged(self):
-        """Test that OpenAI models are returned as-is."""
-        assert _normalize_model_for_tokenizer("gpt-4o") == "gpt-4o"
-        assert _normalize_model_for_tokenizer("gpt-4") == "gpt-4"
-        assert _normalize_model_for_tokenizer("gpt-3.5-turbo") == "gpt-3.5-turbo"
-
-    def test_claude_models_normalized(self):
-        """Test that Claude models are normalized to gpt-4o."""
-        assert _normalize_model_for_tokenizer("claude-3-opus") == "gpt-4o"
-        assert _normalize_model_for_tokenizer("claude-3-sonnet") == "gpt-4o"
-        assert _normalize_model_for_tokenizer("anthropic/claude-3-haiku") == "gpt-4o"
-
-    def test_openrouter_paths_extracted(self):
-        """Test that OpenRouter model paths are handled."""
-        assert _normalize_model_for_tokenizer("openai/gpt-4o") == "gpt-4o"
-        assert _normalize_model_for_tokenizer("anthropic/claude-3-opus") == "gpt-4o"
-
-    def test_unknown_models_default_to_gpt4o(self):
-        """Test that unknown models default to gpt-4o."""
-        assert _normalize_model_for_tokenizer("some-random-model") == "gpt-4o"
-        assert _normalize_model_for_tokenizer("llama-3-70b") == "gpt-4o"
-
-
-class TestTruncateToolMessageContent:
-    """Test tool message content truncation."""
-
-    @pytest.fixture
-    def enc(self):
-        return encoding_for_model("gpt-4o")
-
-    def test_truncate_openai_tool_message(self, enc):
-        """Test truncation of OpenAI-style tool message with string content."""
-        long_content = "x" * 10000
-        msg = {"role": "tool", "tool_call_id": "call_123", "content": long_content}
-
-        _truncate_tool_message_content(msg, enc, max_tokens=100)
-
-        # Content should be truncated
-        assert len(msg["content"]) < len(long_content)
-        assert "…" in msg["content"]  # Has ellipsis marker
-
-    def test_truncate_anthropic_tool_result(self, enc):
-        """Test truncation of Anthropic-style tool_result."""
-        long_content = "y" * 10000
-        msg = {
-            "role": "user",
-            "content": [
-                {
-                    "type": "tool_result",
-                    "tool_use_id": "toolu_123",
-                    "content": long_content,
-                }
-            ],
-        }
-
-        _truncate_tool_message_content(msg, enc, max_tokens=100)
-
-        # Content should be truncated
-        result_content = msg["content"][0]["content"]
-        assert len(result_content) < len(long_content)
-        assert "…" in result_content
-
-    def test_preserve_tool_use_blocks(self, enc):
-        """Test that tool_use blocks are not truncated."""
-        msg = {
-            "role": "assistant",
-            "content": [
-                {
-                    "type": "tool_use",
-                    "id": "toolu_123",
-                    "name": "some_function",
-                    "input": {"key": "value" * 1000},  # Large input
-                }
-            ],
-        }
-
-        original = json.dumps(msg["content"][0]["input"])
-        _truncate_tool_message_content(msg, enc, max_tokens=10)
-
-        # tool_use should be unchanged
-        assert json.dumps(msg["content"][0]["input"]) == original
-
-    def test_no_truncation_when_under_limit(self, enc):
-        """Test that short content is not modified."""
-        msg = {"role": "tool", "tool_call_id": "call_123", "content": "Short content"}
-
-        original = msg["content"]
-        _truncate_tool_message_content(msg, enc, max_tokens=1000)
-
-        assert msg["content"] == original
-
-
-class TestTruncateMiddleTokens:
-    """Test middle truncation of text."""
-
-    @pytest.fixture
-    def enc(self):
-        return encoding_for_model("gpt-4o")
-
-    def test_truncates_long_text(self, enc):
-        """Test that long text is truncated with ellipsis in middle."""
-        long_text = "word " * 1000
-        result = _truncate_middle_tokens(long_text, enc, max_tok=50)
-
-        assert len(enc.encode(result)) <= 52  # Allow some slack for ellipsis
-        assert "…" in result
-        assert result.startswith("word")  # Head preserved
-        assert result.endswith("word ")  # Tail preserved
-
-    def test_preserves_short_text(self, enc):
-        """Test that short text is not modified."""
-        short_text = "Hello world"
-        result = _truncate_middle_tokens(short_text, enc, max_tok=100)
-
-        assert result == short_text
-
-
-class TestEnsureToolPairsIntact:
-    """Test tool call/response pair preservation for both OpenAI and Anthropic formats."""
-
-    # ---- OpenAI Format Tests ----
-
-    def test_openai_adds_missing_tool_call(self):
-        """Test that orphaned OpenAI tool_response gets its tool_call prepended."""
-        all_msgs = [
-            {"role": "system", "content": "You are helpful."},
-            {
-                "role": "assistant",
-                "tool_calls": [
-                    {"id": "call_1", "type": "function", "function": {"name": "f1"}}
-                ],
-            },
-            {"role": "tool", "tool_call_id": "call_1", "content": "result"},
-            {"role": "user", "content": "Thanks!"},
-        ]
-        # Recent messages start at index 2 (the tool response)
-        recent = [all_msgs[2], all_msgs[3]]
-        start_index = 2
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        # Should prepend the tool_call message
-        assert len(result) == 3
-        assert result[0]["role"] == "assistant"
-        assert "tool_calls" in result[0]
-
-    def test_openai_keeps_complete_pairs(self):
-        """Test that complete OpenAI pairs are unchanged."""
-        all_msgs = [
-            {"role": "system", "content": "System"},
-            {
-                "role": "assistant",
-                "tool_calls": [
-                    {"id": "call_1", "type": "function", "function": {"name": "f1"}}
-                ],
-            },
-            {"role": "tool", "tool_call_id": "call_1", "content": "result"},
-        ]
-        recent = all_msgs[1:]  # Include both tool_call and response
-        start_index = 1
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        assert len(result) == 2  # No messages added
-
-    def test_openai_multiple_tool_calls(self):
-        """Test multiple OpenAI tool calls in one assistant message."""
-        all_msgs = [
-            {"role": "system", "content": "System"},
-            {
-                "role": "assistant",
-                "tool_calls": [
-                    {"id": "call_1", "type": "function", "function": {"name": "f1"}},
-                    {"id": "call_2", "type": "function", "function": {"name": "f2"}},
-                ],
-            },
-            {"role": "tool", "tool_call_id": "call_1", "content": "result1"},
-            {"role": "tool", "tool_call_id": "call_2", "content": "result2"},
-            {"role": "user", "content": "Thanks!"},
-        ]
-        # Recent messages start at index 2 (first tool response)
-        recent = [all_msgs[2], all_msgs[3], all_msgs[4]]
-        start_index = 2
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        # Should prepend the assistant message with both tool_calls
-        assert len(result) == 4
-        assert result[0]["role"] == "assistant"
-        assert len(result[0]["tool_calls"]) == 2
-
-    # ---- Anthropic Format Tests ----
-
-    def test_anthropic_adds_missing_tool_use(self):
-        """Test that orphaned Anthropic tool_result gets its tool_use prepended."""
-        all_msgs = [
-            {"role": "system", "content": "You are helpful."},
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_use",
-                        "id": "toolu_123",
-                        "name": "get_weather",
-                        "input": {"location": "SF"},
-                    }
-                ],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_123",
-                        "content": "22°C and sunny",
-                    }
-                ],
-            },
-            {"role": "user", "content": "Thanks!"},
-        ]
-        # Recent messages start at index 2 (the tool_result)
-        recent = [all_msgs[2], all_msgs[3]]
-        start_index = 2
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        # Should prepend the tool_use message
-        assert len(result) == 3
-        assert result[0]["role"] == "assistant"
-        assert result[0]["content"][0]["type"] == "tool_use"
-
-    def test_anthropic_keeps_complete_pairs(self):
-        """Test that complete Anthropic pairs are unchanged."""
-        all_msgs = [
-            {"role": "system", "content": "System"},
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_use",
-                        "id": "toolu_456",
-                        "name": "calculator",
-                        "input": {"expr": "2+2"},
-                    }
-                ],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_456",
-                        "content": "4",
-                    }
-                ],
-            },
-        ]
-        recent = all_msgs[1:]  # Include both tool_use and result
-        start_index = 1
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        assert len(result) == 2  # No messages added
-
-    def test_anthropic_multiple_tool_uses(self):
-        """Test multiple Anthropic tool_use blocks in one message."""
-        all_msgs = [
-            {"role": "system", "content": "System"},
-            {
-                "role": "assistant",
-                "content": [
-                    {"type": "text", "text": "Let me check both..."},
-                    {
-                        "type": "tool_use",
-                        "id": "toolu_1",
-                        "name": "get_weather",
-                        "input": {"city": "NYC"},
-                    },
-                    {
-                        "type": "tool_use",
-                        "id": "toolu_2",
-                        "name": "get_weather",
-                        "input": {"city": "LA"},
-                    },
-                ],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_1",
-                        "content": "Cold",
-                    },
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_2",
-                        "content": "Warm",
-                    },
-                ],
-            },
-            {"role": "user", "content": "Thanks!"},
-        ]
-        # Recent messages start at index 2 (tool_result)
-        recent = [all_msgs[2], all_msgs[3]]
-        start_index = 2
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        # Should prepend the assistant message with both tool_uses
-        assert len(result) == 3
-        assert result[0]["role"] == "assistant"
-        tool_use_count = sum(
-            1 for b in result[0]["content"] if b.get("type") == "tool_use"
-        )
-        assert tool_use_count == 2
-
-    # ---- Mixed/Edge Case Tests ----
-
-    def test_anthropic_with_type_message_field(self):
-        """Test Anthropic format with 'type': 'message' field (smart_decision_maker style)."""
-        all_msgs = [
-            {"role": "system", "content": "You are helpful."},
-            {
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "tool_use",
-                        "id": "toolu_abc",
-                        "name": "search",
-                        "input": {"q": "test"},
-                    }
-                ],
-            },
-            {
-                "role": "user",
-                "type": "message",  # Extra field from smart_decision_maker
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_abc",
-                        "content": "Found results",
-                    }
-                ],
-            },
-            {"role": "user", "content": "Thanks!"},
-        ]
-        # Recent messages start at index 2 (the tool_result with 'type': 'message')
-        recent = [all_msgs[2], all_msgs[3]]
-        start_index = 2
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        # Should prepend the tool_use message
-        assert len(result) == 3
-        assert result[0]["role"] == "assistant"
-        assert result[0]["content"][0]["type"] == "tool_use"
-
-    def test_handles_no_tool_messages(self):
-        """Test messages without tool calls."""
-        all_msgs = [
-            {"role": "user", "content": "Hello"},
-            {"role": "assistant", "content": "Hi there!"},
-        ]
-        recent = all_msgs
-        start_index = 0
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        assert result == all_msgs
-
-    def test_handles_empty_messages(self):
-        """Test empty message list."""
-        result = _ensure_tool_pairs_intact([], [], 0)
-        assert result == []
-
-    def test_mixed_text_and_tool_content(self):
-        """Test Anthropic message with mixed text and tool_use content."""
-        all_msgs = [
-            {
-                "role": "assistant",
-                "content": [
-                    {"type": "text", "text": "I'll help you with that."},
-                    {
-                        "type": "tool_use",
-                        "id": "toolu_mixed",
-                        "name": "search",
-                        "input": {"q": "test"},
-                    },
-                ],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_mixed",
-                        "content": "Found results",
-                    }
-                ],
-            },
-            {"role": "assistant", "content": "Here are the results..."},
-        ]
-        # Start from tool_result
-        recent = [all_msgs[1], all_msgs[2]]
-        start_index = 1
-
-        result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
-
-        # Should prepend the assistant message with tool_use
-        assert len(result) == 3
-        assert result[0]["content"][0]["type"] == "text"
-        assert result[0]["content"][1]["type"] == "tool_use"
-
-
-class TestCompressContext:
-    """Test the async compress_context function."""
-
-    @pytest.mark.asyncio
-    async def test_no_compression_needed(self):
-        """Test messages under limit return without compression."""
-        messages = [
-            {"role": "system", "content": "You are helpful."},
-            {"role": "user", "content": "Hello!"},
-        ]
-
-        result = await compress_context(messages, target_tokens=100000)
-
-        assert isinstance(result, CompressResult)
-        assert result.was_compacted is False
-        assert len(result.messages) == 2
-        assert result.error is None
-
-    @pytest.mark.asyncio
-    async def test_truncation_without_client(self):
-        """Test that truncation works without LLM client."""
-        long_content = "x" * 50000
-        messages = [
-            {"role": "system", "content": "System"},
-            {"role": "user", "content": long_content},
-            {"role": "assistant", "content": "Response"},
-        ]
-
-        result = await compress_context(
-            messages, target_tokens=1000, client=None, reserve=100
-        )
-
-        assert result.was_compacted is True
-        # Should have truncated without summarization
-        assert result.messages_summarized == 0
-
-    @pytest.mark.asyncio
-    async def test_with_mocked_llm_client(self):
-        """Test summarization with mocked LLM client."""
-        # Create many messages to trigger summarization
-        messages = [{"role": "system", "content": "System prompt"}]
-        for i in range(30):
-            messages.append({"role": "user", "content": f"User message {i} " * 100})
-            messages.append(
-                {"role": "assistant", "content": f"Assistant response {i} " * 100}
-            )
-
-        # Mock the AsyncOpenAI client
-        mock_client = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = "Summary of conversation"
-        mock_client.with_options.return_value.chat.completions.create = AsyncMock(
-            return_value=mock_response
-        )
-
-        result = await compress_context(
-            messages,
-            target_tokens=5000,
-            client=mock_client,
-            keep_recent=5,
-            reserve=500,
-        )
-
-        assert result.was_compacted is True
-        # Should have attempted summarization
-        assert mock_client.with_options.called or result.messages_summarized > 0
-
-    @pytest.mark.asyncio
-    async def test_preserves_tool_pairs(self):
-        """Test that tool call/response pairs stay together."""
-        messages = [
-            {"role": "system", "content": "System"},
-            {"role": "user", "content": "Do something"},
-            {
-                "role": "assistant",
-                "tool_calls": [
-                    {"id": "call_1", "type": "function", "function": {"name": "func"}}
-                ],
-            },
-            {"role": "tool", "tool_call_id": "call_1", "content": "Result " * 1000},
-            {"role": "assistant", "content": "Done!"},
-        ]
-
-        result = await compress_context(
-            messages, target_tokens=500, client=None, reserve=50
-        )
-
-        # Check that if tool response exists, its call exists too
-        tool_call_ids = set()
-        tool_response_ids = set()
-        for msg in result.messages:
-            if "tool_calls" in msg:
-                for tc in msg["tool_calls"]:
-                    tool_call_ids.add(tc["id"])
-            if msg.get("role") == "tool":
-                tool_response_ids.add(msg.get("tool_call_id"))
-
-        # All tool responses should have their calls
-        assert tool_response_ids <= tool_call_ids
-
-    @pytest.mark.asyncio
-    async def test_returns_error_when_cannot_compress(self):
-        """Test that error is returned when compression fails."""
-        # Single huge message that can't be compressed enough
-        messages = [
-            {"role": "user", "content": "x" * 100000},
-        ]
-
-        result = await compress_context(
-            messages, target_tokens=100, client=None, reserve=50
-        )
-
-        # Should have an error since we can't get below 100 tokens
-        assert result.error is not None
-        assert result.was_compacted is True
-
-    @pytest.mark.asyncio
-    async def test_empty_messages(self):
-        """Test that empty messages list returns early without error."""
-        result = await compress_context([], target_tokens=1000)
-
-        assert result.messages == []
-        assert result.token_count == 0
-        assert result.was_compacted is False
-        assert result.error is None
-
-
-class TestRemoveOrphanToolResponses:
-    """Test _remove_orphan_tool_responses helper function."""
-
-    def test_removes_openai_orphan(self):
-        """Test removal of orphan OpenAI tool response."""
-        from backend.util.prompt import _remove_orphan_tool_responses
-
-        messages = [
-            {"role": "tool", "tool_call_id": "call_orphan", "content": "result"},
-            {"role": "user", "content": "Hello"},
-        ]
-        orphan_ids = {"call_orphan"}
-
-        result = _remove_orphan_tool_responses(messages, orphan_ids)
-
-        assert len(result) == 1
-        assert result[0]["role"] == "user"
-
-    def test_keeps_valid_openai_tool(self):
-        """Test that valid OpenAI tool responses are kept."""
-        from backend.util.prompt import _remove_orphan_tool_responses
-
-        messages = [
-            {"role": "tool", "tool_call_id": "call_valid", "content": "result"},
-        ]
-        orphan_ids = {"call_other"}
-
-        result = _remove_orphan_tool_responses(messages, orphan_ids)
-
-        assert len(result) == 1
-        assert result[0]["tool_call_id"] == "call_valid"
-
-    def test_filters_anthropic_mixed_blocks(self):
-        """Test filtering individual orphan blocks from Anthropic message with mixed valid/orphan."""
-        from backend.util.prompt import _remove_orphan_tool_responses
-
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_valid",
-                        "content": "valid result",
-                    },
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_orphan",
-                        "content": "orphan result",
-                    },
-                ],
-            },
-        ]
-        orphan_ids = {"toolu_orphan"}
-
-        result = _remove_orphan_tool_responses(messages, orphan_ids)
-
-        assert len(result) == 1
-        # Should only have the valid tool_result, orphan filtered out
-        assert len(result[0]["content"]) == 1
-        assert result[0]["content"][0]["tool_use_id"] == "toolu_valid"
-
-    def test_removes_anthropic_all_orphan(self):
-        """Test removal of Anthropic message when all tool_results are orphans."""
-        from backend.util.prompt import _remove_orphan_tool_responses
-
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_orphan1",
-                        "content": "result1",
-                    },
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": "toolu_orphan2",
-                        "content": "result2",
-                    },
-                ],
-            },
-        ]
-        orphan_ids = {"toolu_orphan1", "toolu_orphan2"}
-
-        result = _remove_orphan_tool_responses(messages, orphan_ids)
-
-        # Message should be completely removed since no content left
-        assert len(result) == 0
-
-    def test_preserves_non_tool_messages(self):
-        """Test that non-tool messages are preserved."""
-        from backend.util.prompt import _remove_orphan_tool_responses
-
-        messages = [
-            {"role": "user", "content": "Hello"},
-            {"role": "assistant", "content": "Hi there!"},
-        ]
-        orphan_ids = {"some_id"}
-
-        result = _remove_orphan_tool_responses(messages, orphan_ids)
-
-        assert result == messages
-
-
-class TestCompressResultDataclass:
-    """Test CompressResult dataclass."""
-
-    def test_default_values(self):
-        """Test default values are set correctly."""
-        result = CompressResult(
-            messages=[{"role": "user", "content": "test"}],
-            token_count=10,
-            was_compacted=False,
-        )
-
-        assert result.error is None
-        assert result.original_token_count == 0  # Defaults to 0, not None
-        assert result.messages_summarized == 0
-        assert result.messages_dropped == 0
-
-    def test_all_fields(self):
-        """Test all fields can be set."""
-        result = CompressResult(
-            messages=[{"role": "user", "content": "test"}],
-            token_count=100,
-            was_compacted=True,
-            error="Some error",
-            original_token_count=500,
-            messages_summarized=10,
-            messages_dropped=5,
-        )
-
-        assert result.token_count == 100
-        assert result.was_compacted is True
-        assert result.error == "Some error"
-        assert result.original_token_count == 500
-        assert result.messages_summarized == 10
-        assert result.messages_dropped == 5
--- a/autogpt_platform/backend/test/agent_generator/test_service.py
+++ b/autogpt_platform/backend/test/agent_generator/test_service.py
@@ -102,7 +102,7 @@ class TestDecomposeGoalExternal:

    @pytest.mark.asyncio
    async def test_decompose_goal_with_context(self):
-        """Test decomposition with additional context enriched into description."""
+        """Test decomposition with additional context."""
        mock_response = MagicMock()
        mock_response.json.return_value = {
            "success": True,
@@ -119,12 +119,9 @@ class TestDecomposeGoalExternal:
                "Build a chatbot", context="Use Python"
            )

-        expected_description = (
-            "Build a chatbot\n\nAdditional context from user:\nUse Python"
-        )
        mock_client.post.assert_called_once_with(
            "/api/decompose-description",
-            json={"description": expected_description},
+            json={"description": "Build a chatbot", "user_instruction": "Use Python"},
        )

    @pytest.mark.asyncio
--- a/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/page.tsx
+++ b/autogpt_platform/frontend/src/app/(no-navbar)/onboarding/page.tsx
@@ -1,9 +1,10 @@
 "use client";
-import { getV1OnboardingState } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
-import { getOnboardingStatus, resolveResponse } from "@/app/api/helpers";
 import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
 import { useRouter } from "next/navigation";
 import { useEffect } from "react";
+import { resolveResponse, getOnboardingStatus } from "@/app/api/helpers";
+import { getV1OnboardingState } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
+import { getHomepageRoute } from "@/lib/constants";

 export default function OnboardingPage() {
  const router = useRouter();
@@ -12,10 +13,12 @@ export default function OnboardingPage() {
    async function redirectToStep() {
      try {
        // Check if onboarding is enabled (also gets chat flag for redirect)
-        const { shouldShowOnboarding } = await getOnboardingStatus();
+        const { shouldShowOnboarding, isChatEnabled } =
+          await getOnboardingStatus();
+        const homepageRoute = getHomepageRoute(isChatEnabled);

        if (!shouldShowOnboarding) {
-          router.replace("/");
+          router.replace(homepageRoute);
          return;
        }

@@ -23,7 +26,7 @@ export default function OnboardingPage() {

        // Handle completed onboarding
        if (onboarding.completedSteps.includes("GET_RESULTS")) {
-          router.replace("/");
+          router.replace(homepageRoute);
          return;
        }

--- a/autogpt_platform/frontend/src/app/(platform)/auth/callback/route.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/auth/callback/route.ts
@@ -1,8 +1,9 @@
-import { getOnboardingStatus } from "@/app/api/helpers";
-import BackendAPI from "@/lib/autogpt-server-api";
 import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
-import { revalidatePath } from "next/cache";
+import { getHomepageRoute } from "@/lib/constants";
+import BackendAPI from "@/lib/autogpt-server-api";
 import { NextResponse } from "next/server";
+import { revalidatePath } from "next/cache";
+import { getOnboardingStatus } from "@/app/api/helpers";

 // Handle the callback to complete the user session login
 export async function GET(request: Request) {
@@ -26,12 +27,13 @@ export async function GET(request: Request) {
        await api.createUser();

        // Get onboarding status from backend (includes chat flag evaluated for this user)
-        const { shouldShowOnboarding } = await getOnboardingStatus();
+        const { shouldShowOnboarding, isChatEnabled } =
+          await getOnboardingStatus();
        if (shouldShowOnboarding) {
          next = "/onboarding";
          revalidatePath("/onboarding", "layout");
        } else {
-          next = "/";
+          next = getHomepageRoute(isChatEnabled);
          revalidatePath(next, "layout");
        }
      } catch (createUserError) {
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
@@ -1,13 +1,6 @@
-"use client";
-import { FeatureFlagPage } from "@/services/feature-flags/FeatureFlagPage";
-import { Flag } from "@/services/feature-flags/use-get-flag";
-import { type ReactNode } from "react";
+import type { ReactNode } from "react";
 import { CopilotShell } from "./components/CopilotShell/CopilotShell";

 export default function CopilotLayout({ children }: { children: ReactNode }) {
-  return (
-    <FeatureFlagPage flag={Flag.CHAT} whenDisabled="/library">
-      <CopilotShell>{children}</CopilotShell>
-    </FeatureFlagPage>
-  );
+  return <CopilotShell>{children}</CopilotShell>;
 }
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/page.tsx
@@ -14,8 +14,14 @@ export default function CopilotPage() {
  const isInterruptModalOpen = useCopilotStore((s) => s.isInterruptModalOpen);
  const confirmInterrupt = useCopilotStore((s) => s.confirmInterrupt);
  const cancelInterrupt = useCopilotStore((s) => s.cancelInterrupt);
-  const { greetingName, quickActions, isLoading, hasSession, initialPrompt } =
-    state;
+  const {
+    greetingName,
+    quickActions,
+    isLoading,
+    hasSession,
+    initialPrompt,
+    isReady,
+  } = state;
  const {
    handleQuickAction,
    startChatWithPrompt,
@@ -23,6 +29,8 @@ export default function CopilotPage() {
    handleStreamingChange,
  } = handlers;

+  if (!isReady) return null;
+
  if (hasSession) {
    return (
      <div className="flex h-full flex-col">
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
@@ -3,11 +3,18 @@ import {
  postV2CreateSession,
 } from "@/app/api/__generated__/endpoints/chat/chat";
 import { useToast } from "@/components/molecules/Toast/use-toast";
+import { getHomepageRoute } from "@/lib/constants";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { useOnboarding } from "@/providers/onboarding/onboarding-provider";
+import {
+  Flag,
+  type FlagValues,
+  useGetFlag,
+} from "@/services/feature-flags/use-get-flag";
 import { SessionKey, sessionStorage } from "@/services/storage/session-storage";
 import * as Sentry from "@sentry/nextjs";
 import { useQueryClient } from "@tanstack/react-query";
+import { useFlags } from "launchdarkly-react-client-sdk";
 import { useRouter } from "next/navigation";
 import { useEffect } from "react";
 import { useCopilotStore } from "./copilot-page-store";
@@ -26,6 +33,22 @@ export function useCopilotPage() {
  const isCreating = useCopilotStore((s) => s.isCreatingSession);
  const setIsCreating = useCopilotStore((s) => s.setIsCreatingSession);

+  // Complete VISIT_COPILOT onboarding step to grant $5 welcome bonus
+  useEffect(() => {
+    if (isLoggedIn) {
+      completeStep("VISIT_COPILOT");
+    }
+  }, [completeStep, isLoggedIn]);
+
+  const isChatEnabled = useGetFlag(Flag.CHAT);
+  const flags = useFlags<FlagValues>();
+  const homepageRoute = getHomepageRoute(isChatEnabled);
+  const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
+  const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
+  const isLaunchDarklyConfigured = envEnabled && Boolean(clientId);
+  const isFlagReady =
+    !isLaunchDarklyConfigured || flags[Flag.CHAT] !== undefined;
+
  const greetingName = getGreetingName(user);
  const quickActions = getQuickActions();

@@ -35,8 +58,11 @@ export function useCopilotPage() {
    : undefined;

  useEffect(() => {
-    if (isLoggedIn) completeStep("VISIT_COPILOT");
-  }, [completeStep, isLoggedIn]);
+    if (!isFlagReady) return;
+    if (isChatEnabled === false) {
+      router.replace(homepageRoute);
+    }
+  }, [homepageRoute, isChatEnabled, isFlagReady, router]);

  async function startChatWithPrompt(prompt: string) {
    if (!prompt?.trim()) return;
@@ -90,6 +116,7 @@ export function useCopilotPage() {
      isLoading: isUserLoading,
      hasSession,
      initialPrompt,
+      isReady: isFlagReady && isChatEnabled !== false && isLoggedIn,
    },
    handlers: {
      handleQuickAction,
--- a/autogpt_platform/frontend/src/app/(platform)/error/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/error/page.tsx
@@ -1,6 +1,8 @@
 "use client";

 import { ErrorCard } from "@/components/molecules/ErrorCard/ErrorCard";
+import { getHomepageRoute } from "@/lib/constants";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
 import { useSearchParams } from "next/navigation";
 import { Suspense } from "react";
 import { getErrorDetails } from "./helpers";
@@ -9,6 +11,8 @@ function ErrorPageContent() {
  const searchParams = useSearchParams();
  const errorMessage = searchParams.get("message");
  const errorDetails = getErrorDetails(errorMessage);
+  const isChatEnabled = useGetFlag(Flag.CHAT);
+  const homepageRoute = getHomepageRoute(isChatEnabled);

  function handleRetry() {
    // Auth-related errors should redirect to login
@@ -26,7 +30,7 @@ function ErrorPageContent() {
      }, 2000);
    } else {
      // For server/network errors, go to home
-      window.location.href = "/";
+      window.location.href = homepageRoute;
    }
  }

--- a/autogpt_platform/frontend/src/app/(platform)/login/actions.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/login/actions.ts
@@ -1,5 +1,6 @@
 "use server";

+import { getHomepageRoute } from "@/lib/constants";
 import BackendAPI from "@/lib/autogpt-server-api";
 import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
 import { loginFormSchema } from "@/types/auth";
@@ -37,8 +38,10 @@ export async function login(email: string, password: string) {
    await api.createUser();

    // Get onboarding status from backend (includes chat flag evaluated for this user)
-    const { shouldShowOnboarding } = await getOnboardingStatus();
-    const next = shouldShowOnboarding ? "/onboarding" : "/";
+    const { shouldShowOnboarding, isChatEnabled } = await getOnboardingStatus();
+    const next = shouldShowOnboarding
+      ? "/onboarding"
+      : getHomepageRoute(isChatEnabled);

    return {
      success: true,
--- a/autogpt_platform/frontend/src/app/(platform)/login/useLoginPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/login/useLoginPage.ts
@@ -1,6 +1,8 @@
 import { useToast } from "@/components/molecules/Toast/use-toast";
+import { getHomepageRoute } from "@/lib/constants";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { environment } from "@/services/environment";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
 import { loginFormSchema, LoginProvider } from "@/types/auth";
 import { zodResolver } from "@hookform/resolvers/zod";
 import { useRouter, useSearchParams } from "next/navigation";
@@ -20,15 +22,17 @@ export function useLoginPage() {
  const [isGoogleLoading, setIsGoogleLoading] = useState(false);
  const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
  const isCloudEnv = environment.isCloud();
+  const isChatEnabled = useGetFlag(Flag.CHAT);
+  const homepageRoute = getHomepageRoute(isChatEnabled);

  // Get redirect destination from 'next' query parameter
  const nextUrl = searchParams.get("next");

  useEffect(() => {
    if (isLoggedIn && !isLoggingIn) {
-      router.push(nextUrl || "/");
+      router.push(nextUrl || homepageRoute);
    }
-  }, [isLoggedIn, isLoggingIn, nextUrl, router]);
+  }, [homepageRoute, isLoggedIn, isLoggingIn, nextUrl, router]);

  const form = useForm<z.infer<typeof loginFormSchema>>({
    resolver: zodResolver(loginFormSchema),
@@ -94,7 +98,7 @@ export function useLoginPage() {
      }

      // Prefer URL's next parameter, then use backend-determined route
-      router.replace(nextUrl || result.next || "/");
+      router.replace(nextUrl || result.next || homepageRoute);
    } catch (error) {
      toast({
        title:
--- a/autogpt_platform/frontend/src/app/(platform)/signup/actions.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/signup/actions.ts
@@ -1,5 +1,6 @@
 "use server";

+import { getHomepageRoute } from "@/lib/constants";
 import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
 import { signupFormSchema } from "@/types/auth";
 import * as Sentry from "@sentry/nextjs";
@@ -58,8 +59,10 @@ export async function signup(
    }

    // Get onboarding status from backend (includes chat flag evaluated for this user)
-    const { shouldShowOnboarding } = await getOnboardingStatus();
-    const next = shouldShowOnboarding ? "/onboarding" : "/";
+    const { shouldShowOnboarding, isChatEnabled } = await getOnboardingStatus();
+    const next = shouldShowOnboarding
+      ? "/onboarding"
+      : getHomepageRoute(isChatEnabled);

    return { success: true, next };
  } catch (err) {
--- a/autogpt_platform/frontend/src/app/(platform)/signup/useSignupPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/signup/useSignupPage.ts
@@ -1,6 +1,8 @@
 import { useToast } from "@/components/molecules/Toast/use-toast";
+import { getHomepageRoute } from "@/lib/constants";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { environment } from "@/services/environment";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
 import { LoginProvider, signupFormSchema } from "@/types/auth";
 import { zodResolver } from "@hookform/resolvers/zod";
 import { useRouter, useSearchParams } from "next/navigation";
@@ -20,15 +22,17 @@ export function useSignupPage() {
  const [isGoogleLoading, setIsGoogleLoading] = useState(false);
  const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
  const isCloudEnv = environment.isCloud();
+  const isChatEnabled = useGetFlag(Flag.CHAT);
+  const homepageRoute = getHomepageRoute(isChatEnabled);

  // Get redirect destination from 'next' query parameter
  const nextUrl = searchParams.get("next");

  useEffect(() => {
    if (isLoggedIn && !isSigningUp) {
-      router.push(nextUrl || "/");
+      router.push(nextUrl || homepageRoute);
    }
-  }, [isLoggedIn, isSigningUp, nextUrl, router]);
+  }, [homepageRoute, isLoggedIn, isSigningUp, nextUrl, router]);

  const form = useForm<z.infer<typeof signupFormSchema>>({
    resolver: zodResolver(signupFormSchema),
@@ -129,7 +133,7 @@ export function useSignupPage() {
      }

      // Prefer the URL's next parameter, then result.next (for onboarding), then default
-      const redirectTo = nextUrl || result.next || "/";
+      const redirectTo = nextUrl || result.next || homepageRoute;
      router.replace(redirectTo);
    } catch (error) {
      setIsLoading(false);
--- a/autogpt_platform/frontend/src/app/api/helpers.ts
+++ b/autogpt_platform/frontend/src/app/api/helpers.ts
@@ -181,5 +181,6 @@ export async function getOnboardingStatus() {
  const isCompleted = onboarding.completedSteps.includes("CONGRATS");
  return {
    shouldShowOnboarding: status.is_onboarding_enabled && !isCompleted,
+    isChatEnabled: status.is_chat_enabled,
  };
 }
--- a/autogpt_platform/frontend/src/app/page.tsx
+++ b/autogpt_platform/frontend/src/app/page.tsx
@@ -1,15 +1,27 @@
 "use client";

-import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
+import { getHomepageRoute } from "@/lib/constants";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
 import { useRouter } from "next/navigation";
 import { useEffect } from "react";

 export default function Page() {
+  const isChatEnabled = useGetFlag(Flag.CHAT);
  const router = useRouter();
+  const homepageRoute = getHomepageRoute(isChatEnabled);
+  const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
+  const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
+  const isLaunchDarklyConfigured = envEnabled && Boolean(clientId);
+  const isFlagReady =
+    !isLaunchDarklyConfigured || typeof isChatEnabled === "boolean";

-  useEffect(() => {
-    router.replace("/copilot");
-  }, [router]);
+  useEffect(
+    function redirectToHomepage() {
+      if (!isFlagReady) return;
+      router.replace(homepageRoute);
+    },
+    [homepageRoute, isFlagReady, router],
+  );

-  return <LoadingSpinner size="large" cover />;
+  return null;
 }
--- a/autogpt_platform/frontend/src/components/layout/Navbar/components/NavbarLink.tsx
+++ b/autogpt_platform/frontend/src/components/layout/Navbar/components/NavbarLink.tsx
@@ -1,6 +1,7 @@
 "use client";

 import { IconLaptop } from "@/components/__legacy__/ui/icons";
+import { getHomepageRoute } from "@/lib/constants";
 import { cn } from "@/lib/utils";
 import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
 import { ListChecksIcon } from "@phosphor-icons/react/dist/ssr";
@@ -23,11 +24,11 @@ interface Props {
 export function NavbarLink({ name, href }: Props) {
  const pathname = usePathname();
  const isChatEnabled = useGetFlag(Flag.CHAT);
-  const expectedHomeRoute = isChatEnabled ? "/copilot" : "/library";
+  const homepageRoute = getHomepageRoute(isChatEnabled);

  const isActive =
-    href === expectedHomeRoute
-      ? pathname === "/" || pathname.startsWith(expectedHomeRoute)
+    href === homepageRoute
+      ? pathname === "/" || pathname.startsWith(homepageRoute)
      : pathname.includes(href);

  return (
--- a/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
+++ b/autogpt_platform/frontend/src/hooks/useAgentGraph.tsx
@@ -66,7 +66,7 @@ export default function useAgentGraph(
  >(null);
  const [xyNodes, setXYNodes] = useState<CustomNode[]>([]);
  const [xyEdges, setXYEdges] = useState<CustomEdge[]>([]);
-  const betaBlocks = useGetFlag(Flag.BETA_BLOCKS) as string[];
+  const betaBlocks = useGetFlag(Flag.BETA_BLOCKS);

  // Filter blocks based on beta flags
  const availableBlocks = useMemo(() => {
--- a/autogpt_platform/frontend/src/lib/constants.ts
+++ b/autogpt_platform/frontend/src/lib/constants.ts
@@ -11,3 +11,10 @@ export const API_KEY_HEADER_NAME = "X-API-Key";

 // Layout
 export const NAVBAR_HEIGHT_PX = 60;
+
+// Routes
+export function getHomepageRoute(isChatEnabled?: boolean | null): string {
+  if (isChatEnabled === true) return "/copilot";
+  if (isChatEnabled === false) return "/library";
+  return "/";
+}
--- a/autogpt_platform/frontend/src/lib/supabase/helpers.ts
+++ b/autogpt_platform/frontend/src/lib/supabase/helpers.ts
@@ -1,3 +1,4 @@
+import { getHomepageRoute } from "@/lib/constants";
 import { environment } from "@/services/environment";
 import { Key, storage } from "@/services/storage/local-storage";
 import { type CookieOptions } from "@supabase/ssr";
@@ -70,7 +71,7 @@ export function getRedirectPath(
  }

  if (isAdminPage(path) && userRole !== "admin") {
-    return "/";
+    return getHomepageRoute();
  }

  return null;
--- a/autogpt_platform/frontend/src/lib/supabase/middleware.ts
+++ b/autogpt_platform/frontend/src/lib/supabase/middleware.ts
@@ -1,3 +1,4 @@
+import { getHomepageRoute } from "@/lib/constants";
 import { environment } from "@/services/environment";
 import { createServerClient } from "@supabase/ssr";
 import { NextResponse, type NextRequest } from "next/server";
@@ -66,7 +67,7 @@ export async function updateSession(request: NextRequest) {

    // 2. Check if user is authenticated but lacks admin role when accessing admin pages
    if (user && userRole !== "admin" && isAdminPage(pathname)) {
-      url.pathname = "/";
+      url.pathname = getHomepageRoute();
      return NextResponse.redirect(url);
    }

--- a/autogpt_platform/frontend/src/providers/onboarding/onboarding-provider.tsx
+++ b/autogpt_platform/frontend/src/providers/onboarding/onboarding-provider.tsx
@@ -23,7 +23,9 @@ import {
  WebSocketNotification,
 } from "@/lib/autogpt-server-api";
 import { useBackendAPI } from "@/lib/autogpt-server-api/context";
+import { getHomepageRoute } from "@/lib/constants";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
+import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
 import Link from "next/link";
 import { usePathname, useRouter } from "next/navigation";
 import {
@@ -102,6 +104,8 @@ export default function OnboardingProvider({
  const pathname = usePathname();
  const router = useRouter();
  const { isLoggedIn } = useSupabase();
+  const isChatEnabled = useGetFlag(Flag.CHAT);
+  const homepageRoute = getHomepageRoute(isChatEnabled);

  useOnboardingTimezoneDetection();

@@ -146,7 +150,7 @@ export default function OnboardingProvider({
        if (isOnOnboardingRoute) {
          const enabled = await resolveResponse(getV1IsOnboardingEnabled());
          if (!enabled) {
-            router.push("/");
+            router.push(homepageRoute);
            return;
          }
        }
@@ -158,7 +162,7 @@ export default function OnboardingProvider({
          isOnOnboardingRoute &&
          shouldRedirectFromOnboarding(onboarding.completedSteps, pathname)
        ) {
-          router.push("/");
+          router.push(homepageRoute);
        }
      } catch (error) {
        console.error("Failed to initialize onboarding:", error);
@@ -173,7 +177,7 @@ export default function OnboardingProvider({
    }

    initializeOnboarding();
-  }, [api, isOnOnboardingRoute, router, isLoggedIn, pathname]);
+  }, [api, homepageRoute, isOnOnboardingRoute, router, isLoggedIn, pathname]);

  const handleOnboardingNotification = useCallback(
    (notification: WebSocketNotification) => {
--- a/autogpt_platform/frontend/src/services/environment/index.ts
+++ b/autogpt_platform/frontend/src/services/environment/index.ts
@@ -83,10 +83,6 @@ function getPostHogCredentials() {
  };
 }

-function getLaunchDarklyClientId() {
-  return process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
-}
-
 function isProductionBuild() {
  return process.env.NODE_ENV === "production";
 }
@@ -124,10 +120,7 @@ function isVercelPreview() {
 }

 function areFeatureFlagsEnabled() {
-  return (
-    process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true" &&
-    Boolean(process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID)
-  );
+  return process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "enabled";
 }

 function isPostHogEnabled() {
@@ -150,7 +143,6 @@ export const environment = {
  getSupabaseAnonKey,
  getPreviewStealingDev,
  getPostHogCredentials,
-  getLaunchDarklyClientId,
  // Assertions
  isServerSide,
  isClientSide,
--- a/autogpt_platform/frontend/src/services/feature-flags/FeatureFlagPage.tsx
+++ b/autogpt_platform/frontend/src/services/feature-flags/FeatureFlagPage.tsx
@@ -1,59 +0,0 @@
-"use client";
-
-import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
-import { useLDClient } from "launchdarkly-react-client-sdk";
-import { useRouter } from "next/navigation";
-import { ReactNode, useEffect, useState } from "react";
-import { environment } from "../environment";
-import { Flag, useGetFlag } from "./use-get-flag";
-
-interface FeatureFlagRedirectProps {
-  flag: Flag;
-  whenDisabled: string;
-  children: ReactNode;
-}
-
-export function FeatureFlagPage({
-  flag,
-  whenDisabled,
-  children,
-}: FeatureFlagRedirectProps) {
-  const [isLoading, setIsLoading] = useState(true);
-  const router = useRouter();
-  const flagValue = useGetFlag(flag);
-  const ldClient = useLDClient();
-  const ldEnabled = environment.areFeatureFlagsEnabled();
-  const ldReady = Boolean(ldClient);
-  const flagEnabled = Boolean(flagValue);
-
-  useEffect(() => {
-    const initialize = async () => {
-      if (!ldEnabled) {
-        router.replace(whenDisabled);
-        setIsLoading(false);
-        return;
-      }
-
-      // Wait for LaunchDarkly to initialize when enabled to prevent race conditions
-      if (ldEnabled && !ldReady) return;
-
-      try {
-        await ldClient?.waitForInitialization();
-        if (!flagEnabled) router.replace(whenDisabled);
-      } catch (error) {
-        console.error(error);
-        router.replace(whenDisabled);
-      } finally {
-        setIsLoading(false);
-      }
-    };
-
-    initialize();
-  }, [ldReady, flagEnabled]);
-
-  return isLoading || !flagEnabled ? (
-    <LoadingSpinner size="large" cover />
-  ) : (
-    <>{children}</>
-  );
-}
--- a/autogpt_platform/frontend/src/services/feature-flags/FeatureFlagRedirect.tsx
+++ b/autogpt_platform/frontend/src/services/feature-flags/FeatureFlagRedirect.tsx
@@ -1,51 +0,0 @@
-"use client";
-
-import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
-import { useLDClient } from "launchdarkly-react-client-sdk";
-import { useRouter } from "next/navigation";
-import { useEffect } from "react";
-import { environment } from "../environment";
-import { Flag, useGetFlag } from "./use-get-flag";
-
-interface FeatureFlagRedirectProps {
-  flag: Flag;
-  whenEnabled: string;
-  whenDisabled: string;
-}
-
-export function FeatureFlagRedirect({
-  flag,
-  whenEnabled,
-  whenDisabled,
-}: FeatureFlagRedirectProps) {
-  const router = useRouter();
-  const flagValue = useGetFlag(flag);
-  const ldEnabled = environment.areFeatureFlagsEnabled();
-  const ldClient = useLDClient();
-  const ldReady = Boolean(ldClient);
-  const flagEnabled = Boolean(flagValue);
-
-  useEffect(() => {
-    const initialize = async () => {
-      if (!ldEnabled) {
-        router.replace(whenDisabled);
-        return;
-      }
-
-      // Wait for LaunchDarkly to initialize when enabled to prevent race conditions
-      if (ldEnabled && !ldReady) return;
-
-      try {
-        await ldClient?.waitForInitialization();
-        router.replace(flagEnabled ? whenEnabled : whenDisabled);
-      } catch (error) {
-        console.error(error);
-        router.replace(whenDisabled);
-      }
-    };
-
-    initialize();
-  }, [ldReady, flagEnabled]);
-
-  return <LoadingSpinner size="large" cover />;
-}
--- a/autogpt_platform/frontend/src/services/feature-flags/feature-flag-provider.tsx
+++ b/autogpt_platform/frontend/src/services/feature-flags/feature-flag-provider.tsx
@@ -1,6 +1,5 @@
 "use client";

-import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import * as Sentry from "@sentry/nextjs";
 import { LDProvider } from "launchdarkly-react-client-sdk";
@@ -8,17 +7,17 @@ import type { ReactNode } from "react";
 import { useMemo } from "react";
 import { environment } from "../environment";

+const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
+const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
 const LAUNCHDARKLY_INIT_TIMEOUT_MS = 5000;

 export function LaunchDarklyProvider({ children }: { children: ReactNode }) {
  const { user, isUserLoading } = useSupabase();
-  const envEnabled = environment.areFeatureFlagsEnabled();
-  const clientId = environment.getLaunchDarklyClientId();
+  const isCloud = environment.isCloud();
+  const isLaunchDarklyConfigured = isCloud && envEnabled && clientId;

  const context = useMemo(() => {
-    if (isUserLoading) return;
-
-    if (!user) {
+    if (isUserLoading || !user) {
      return {
        kind: "user" as const,
        key: "anonymous",
@@ -37,17 +36,15 @@ export function LaunchDarklyProvider({ children }: { children: ReactNode }) {
    };
  }, [user, isUserLoading]);

-  if (!envEnabled) {
+  if (!isLaunchDarklyConfigured) {
    return <>{children}</>;
  }

-  if (isUserLoading) {
-    return <LoadingSpinner size="large" cover />;
-  }
-
  return (
    <LDProvider
-      clientSideID={clientId ?? ""}
+      // Add this key prop. It will be 'anonymous' when logged out,
+      key={context.key}
+      clientSideID={clientId}
      context={context}
      timeout={LAUNCHDARKLY_INIT_TIMEOUT_MS}
      reactOptions={{ useCamelCaseFlagKeys: false }}
--- a/autogpt_platform/frontend/src/services/feature-flags/use-get-flag.ts
+++ b/autogpt_platform/frontend/src/services/feature-flags/use-get-flag.ts
@@ -1,7 +1,6 @@
 "use client";

 import { DEFAULT_SEARCH_TERMS } from "@/app/(platform)/marketplace/components/HeroSection/helpers";
-import { environment } from "@/services/environment";
 import { useFlags } from "launchdarkly-react-client-sdk";

 export enum Flag {
@@ -19,9 +18,24 @@ export enum Flag {
  CHAT = "chat",
 }

+export type FlagValues = {
+  [Flag.BETA_BLOCKS]: string[];
+  [Flag.NEW_BLOCK_MENU]: boolean;
+  [Flag.NEW_AGENT_RUNS]: boolean;
+  [Flag.GRAPH_SEARCH]: boolean;
+  [Flag.ENABLE_ENHANCED_OUTPUT_HANDLING]: boolean;
+  [Flag.NEW_FLOW_EDITOR]: boolean;
+  [Flag.BUILDER_VIEW_SWITCH]: boolean;
+  [Flag.SHARE_EXECUTION_RESULTS]: boolean;
+  [Flag.AGENT_FAVORITING]: boolean;
+  [Flag.MARKETPLACE_SEARCH_TERMS]: string[];
+  [Flag.ENABLE_PLATFORM_PAYMENT]: boolean;
+  [Flag.CHAT]: boolean;
+};
+
 const isPwMockEnabled = process.env.NEXT_PUBLIC_PW_TEST === "true";

-const defaultFlags = {
+const mockFlags = {
  [Flag.BETA_BLOCKS]: [],
  [Flag.NEW_BLOCK_MENU]: false,
  [Flag.NEW_AGENT_RUNS]: false,
@@ -36,16 +50,17 @@ const defaultFlags = {
  [Flag.CHAT]: false,
 };

-type FlagValues = typeof defaultFlags;
-
-export function useGetFlag<T extends Flag>(flag: T): FlagValues[T] {
+export function useGetFlag<T extends Flag>(flag: T): FlagValues[T] | null {
  const currentFlags = useFlags<FlagValues>();
  const flagValue = currentFlags[flag];
-  const areFlagsEnabled = environment.areFeatureFlagsEnabled();

-  if (!areFlagsEnabled || isPwMockEnabled) {
-    return defaultFlags[flag];
+  const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
+  const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
+  const isLaunchDarklyConfigured = envEnabled && Boolean(clientId);
+
+  if (!isLaunchDarklyConfigured || isPwMockEnabled) {
+    return mockFlags[flag];
  }

-  return flagValue ?? defaultFlags[flag];
+  return flagValue ?? mockFlags[flag];
 }
--- a/autogpt_platform/frontend/src/tests/signup.spec.ts
+++ b/autogpt_platform/frontend/src/tests/signup.spec.ts
@@ -124,3 +124,4 @@ test("user can signup with existing email handling", async ({
    console.error("❌ Duplicate email handling test failed:", error);
  }
 });
+
--- a/classic/frontend/.gitignore
+++ b/classic/frontend/.gitignore
@@ -8,7 +8,6 @@
 .buildlog/
 .history
 .svn/
-.next/
 migrate_working_dir/

 # IntelliJ related
--- a/classic/frontend/build/web/flutter_service_worker.js
+++ b/classic/frontend/build/web/flutter_service_worker.js
@@ -3,45 +3,45 @@ const MANIFEST = 'flutter-app-manifest';
 const TEMP = 'flutter-temp-cache';
 const CACHE_NAME = 'flutter-app-cache';

-const RESOURCES = {"flutter.js": "6fef97aeca90b426343ba6c5c9dc5d4a",
-"icons/Icon-512.png": "96e752610906ba2a93c65f8abe1645f1",
-"icons/Icon-maskable-512.png": "301a7604d45b3e739efc881eb04896ea",
-"icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",
-"icons/Icon-maskable-192.png": "c457ef57daa1d16f64b27b786ec2ea3c",
-"manifest.json": "0fa552613b8ec0fda5cda565914e3b16",
-"index.html": "3442c510a9ea217672c82e799ae070f7",
-"/": "3442c510a9ea217672c82e799ae070f7",
-"assets/shaders/ink_sparkle.frag": "f8b80e740d33eb157090be4e995febdf",
-"assets/assets/tree_structure.json": "cda9b1a239f956c547411efad9f7c794",
-"assets/assets/coding_tree_structure.json": "017a857cf3e274346a0a7eab4ce02eed",
-"assets/assets/general_tree_structure.json": "41dfbcdc2349dcdda2b082e597c6d5ee",
-"assets/assets/github_logo.svg.png": "ba087b073efdc4996b035d3a12bad0e4",
-"assets/assets/images/discord_logo.png": "0e4a4162c5de8665a7d63ae9665405ae",
-"assets/assets/images/github_logo.svg.png": "ba087b073efdc4996b035d3a12bad0e4",
-"assets/assets/images/twitter_logo.png": "af6c11b96a5e732b8dfda86a2351ecab",
-"assets/assets/images/google_logo.svg.png": "0e29f8e1acfb8996437dbb2b0f591f19",
-"assets/assets/images/autogpt_logo.png": "6a5362a7d1f2f840e43ee259e733476c",
-"assets/assets/google_logo.svg.png": "0e29f8e1acfb8996437dbb2b0f591f19",
-"assets/assets/scrape_synthesize_tree_structure.json": "a9665c1b465bb0cb939c7210f2bf0b13",
-"assets/assets/data_tree_structure.json": "5f9627548304155821968182f3883ca7",
-"assets/fonts/MaterialIcons-Regular.otf": "245e0462249d95ad589a087f1c9f58e1",
-"assets/NOTICES": "28ba0c63fc6e4d1ef829af7441e27f78",
-"assets/packages/fluttertoast/assets/toastify.css": "a85675050054f179444bc5ad70ffc635",
-"assets/packages/fluttertoast/assets/toastify.js": "56e2c9cedd97f10e7e5f1cebd85d53e3",
-"assets/packages/cupertino_icons/assets/CupertinoIcons.ttf": "055d9e87e4a40dbf72b2af1a20865d57",
-"assets/FontManifest.json": "dc3d03800ccca4601324923c0b1d6d57",
-"assets/AssetManifest.bin": "791447d17744ac2ade3999c1672fdbe8",
-"assets/AssetManifest.json": "1b1e4a4276722b65eb1ef765e2991840",
-"canvaskit/chromium/canvaskit.wasm": "393ec8fb05d94036734f8104fa550a67",
-"canvaskit/chromium/canvaskit.js": "ffb2bb6484d5689d91f393b60664d530",
-"canvaskit/skwasm.worker.js": "51253d3321b11ddb8d73fa8aa87d3b15",
+const RESOURCES = {"canvaskit/skwasm.worker.js": "51253d3321b11ddb8d73fa8aa87d3b15",
 "canvaskit/skwasm.js": "95f16c6690f955a45b2317496983dbe9",
 "canvaskit/canvaskit.wasm": "d9f69e0f428f695dc3d66b3a83a4aa8e",
-"canvaskit/canvaskit.js": "5caccb235fad20e9b72ea6da5a0094e6",
 "canvaskit/skwasm.wasm": "d1fde2560be92c0b07ad9cf9acb10d05",
+"canvaskit/canvaskit.js": "5caccb235fad20e9b72ea6da5a0094e6",
+"canvaskit/chromium/canvaskit.wasm": "393ec8fb05d94036734f8104fa550a67",
+"canvaskit/chromium/canvaskit.js": "ffb2bb6484d5689d91f393b60664d530",
+"icons/Icon-maskable-192.png": "c457ef57daa1d16f64b27b786ec2ea3c",
+"icons/Icon-maskable-512.png": "301a7604d45b3e739efc881eb04896ea",
+"icons/Icon-512.png": "96e752610906ba2a93c65f8abe1645f1",
+"icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",
+"manifest.json": "0fa552613b8ec0fda5cda565914e3b16",
 "favicon.png": "5dcef449791fa27946b3d35ad8803796",
 "version.json": "46a52461e018faa623d9196334aa3f50",
-"main.dart.js": "6fcbf8bbcb0a76fae9029f72ac7fbdc3"};
+"index.html": "e6981504a32bf86f892909c1875df208",
+"/": "e6981504a32bf86f892909c1875df208",
+"main.dart.js": "6fcbf8bbcb0a76fae9029f72ac7fbdc3",
+"assets/AssetManifest.json": "1b1e4a4276722b65eb1ef765e2991840",
+"assets/packages/cupertino_icons/assets/CupertinoIcons.ttf": "055d9e87e4a40dbf72b2af1a20865d57",
+"assets/packages/fluttertoast/assets/toastify.js": "56e2c9cedd97f10e7e5f1cebd85d53e3",
+"assets/packages/fluttertoast/assets/toastify.css": "a85675050054f179444bc5ad70ffc635",
+"assets/shaders/ink_sparkle.frag": "f8b80e740d33eb157090be4e995febdf",
+"assets/fonts/MaterialIcons-Regular.otf": "245e0462249d95ad589a087f1c9f58e1",
+"assets/assets/images/twitter_logo.png": "af6c11b96a5e732b8dfda86a2351ecab",
+"assets/assets/images/discord_logo.png": "0e4a4162c5de8665a7d63ae9665405ae",
+"assets/assets/images/google_logo.svg.png": "0e29f8e1acfb8996437dbb2b0f591f19",
+"assets/assets/images/autogpt_logo.png": "6a5362a7d1f2f840e43ee259e733476c",
+"assets/assets/images/github_logo.svg.png": "ba087b073efdc4996b035d3a12bad0e4",
+"assets/assets/scrape_synthesize_tree_structure.json": "a9665c1b465bb0cb939c7210f2bf0b13",
+"assets/assets/coding_tree_structure.json": "017a857cf3e274346a0a7eab4ce02eed",
+"assets/assets/general_tree_structure.json": "41dfbcdc2349dcdda2b082e597c6d5ee",
+"assets/assets/google_logo.svg.png": "0e29f8e1acfb8996437dbb2b0f591f19",
+"assets/assets/tree_structure.json": "cda9b1a239f956c547411efad9f7c794",
+"assets/assets/data_tree_structure.json": "5f9627548304155821968182f3883ca7",
+"assets/assets/github_logo.svg.png": "ba087b073efdc4996b035d3a12bad0e4",
+"assets/NOTICES": "28ba0c63fc6e4d1ef829af7441e27f78",
+"assets/AssetManifest.bin": "791447d17744ac2ade3999c1672fdbe8",
+"assets/FontManifest.json": "dc3d03800ccca4601324923c0b1d6d57",
+"flutter.js": "6fef97aeca90b426343ba6c5c9dc5d4a"};
 // The application shell files that are downloaded before a service worker can
 // start.
 const CORE = ["main.dart.js",
--- a/classic/frontend/build/web/index.html
+++ b/classic/frontend/build/web/index.html
@@ -35,7 +35,7 @@

  <script>
    // The value below is injected by flutter build, do not touch.
-    const serviceWorkerVersion = "1550046101";
+    const serviceWorkerVersion = "726743092";
  </script>
  <!-- This script adds the flutter initialization JS code -->
  <script src="flutter.js" defer></script>