From 859f3f8c06a862abc068df7c3bd04f30a8a93325 Mon Sep 17 00:00:00 2001
From: Zamil Majdy <zamil.majdy@agpt.co>
Date: Tue, 27 Jan 2026 03:22:30 -0600
Subject: [PATCH 1/7] feat(frontend): implement clarification questions UI for
 agent generation (#11833)

## Summary
Add interactive UI to collect user answers when the agent-generator
service returns clarifying questions during agent creation/editing.

Previously, when the backend asked clarifying questions, the frontend
would just display them as text with no way for users to answer. This
caused the chat to keep retrying without the necessary context.

## Changes
- **ChatMessageData type**: Add `clarification_needed` variant with
questions field
- **ClarificationQuestionsWidget**: New component with interactive form
to collect answers
- **parseToolResponse**: Detect and parse `clarification_needed`
responses from backend
- **ChatMessage**: Render the widget when clarification is needed

## How It Works
1. User requests to create/edit agent
2. Backend returns `ClarificationNeededResponse` with list of questions
3. Frontend shows interactive form with text inputs for each question
4. User fills in answers and clicks "Submit Answers"
5. Answers are sent back as context to the tool
6. Backend receives full context and continues

## UI Features
- Shows all questions with examples (if provided)
- Input validation (all questions must be answered to submit)
- Visual feedback (checkmarks when answered)
- Numbered questions for clarity
- Submit button disabled until all answered
- Follows same design pattern as `credentials_needed` flow

## Related
- Backend support for clarification was added in #11819
- Fixes the issue shown in the screenshot where users couldn't answer
clarifying questions

## Test plan
- [ ] Test creating agent that requires clarifying questions
- [ ] Verify questions are displayed in interactive form
- [ ] Verify all questions must be answered before submitting
- [ ] Verify answers are sent back to backend as context
- [ ] Verify agent creation continues with full context
---
 .../Chat/components/ChatContainer/helpers.ts  |  17 ++
 .../components/ChatMessage/ChatMessage.tsx    |  25 +++
 .../components/ChatMessage/useChatMessage.ts  |  13 ++
 .../ClarificationQuestionsWidget.tsx          | 154 ++++++++++++++++++
 4 files changed, 209 insertions(+)
 create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/components/ClarificationQuestionsWidget/ClarificationQuestionsWidget.tsx

diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts
index 9d51003a93..0edd1b411a 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts
@@ -213,6 +213,23 @@ export function parseToolResponse(
         timestamp: timestamp || new Date(),
       };
     }
+    if (responseType === "clarification_needed") {
+      return {
+        type: "clarification_needed",
+        toolName,
+        questions:
+          (parsedResult.questions as Array<{
+            question: string;
+            keyword: string;
+            example?: string;
+          }>) || [],
+        message:
+          (parsedResult.message as string) ||
+          "I need more information to proceed.",
+        sessionId: (parsedResult.session_id as string) || "",
+        timestamp: timestamp || new Date(),
+      };
+    }
     if (responseType === "need_login") {
       return {
         type: "login_needed",
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx
index a2827ce611..0fee33dbc0 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/ChatMessage.tsx
@@ -14,6 +14,7 @@ import { AgentCarouselMessage } from "../AgentCarouselMessage/AgentCarouselMessa
 import { AIChatBubble } from "../AIChatBubble/AIChatBubble";
 import { AuthPromptWidget } from "../AuthPromptWidget/AuthPromptWidget";
 import { ChatCredentialsSetup } from "../ChatCredentialsSetup/ChatCredentialsSetup";
+import { ClarificationQuestionsWidget } from "../ClarificationQuestionsWidget/ClarificationQuestionsWidget";
 import { ExecutionStartedMessage } from "../ExecutionStartedMessage/ExecutionStartedMessage";
 import { MarkdownContent } from "../MarkdownContent/MarkdownContent";
 import { NoResultsMessage } from "../NoResultsMessage/NoResultsMessage";
@@ -69,6 +70,7 @@ export function ChatMessage({
     isToolResponse,
     isLoginNeeded,
     isCredentialsNeeded,
+    isClarificationNeeded,
   } = useChatMessage(message);
   const displayContent = getDisplayContent(message, isUser);
 
@@ -96,6 +98,18 @@ export function ChatMessage({
     }
   }
 
+  function handleClarificationAnswers(answers: Record<string, string>) {
+    if (onSendMessage) {
+      const contextMessage = Object.entries(answers)
+        .map(([keyword, answer]) => `${keyword}: ${answer}`)
+        .join("\n");
+
+      onSendMessage(
+        `I have the answers to your questions:\n\n${contextMessage}\n\nPlease proceed with creating the agent.`,
+      );
+    }
+  }
+
   const handleCopy = useCallback(
     async function handleCopy() {
       if (message.type !== "message") return;
@@ -141,6 +155,17 @@ export function ChatMessage({
     );
   }
 
+  if (isClarificationNeeded && message.type === "clarification_needed") {
+    return (
+      <ClarificationQuestionsWidget
+        questions={message.questions}
+        message={message.message}
+        onSubmitAnswers={handleClarificationAnswers}
+        className={className}
+      />
+    );
+  }
+
   // Render login needed messages
   if (isLoginNeeded && message.type === "login_needed") {
     // If user is already logged in, show success message instead of auth prompt
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/useChatMessage.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/useChatMessage.ts
index 5ee61bc554..142b140c8b 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/useChatMessage.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatMessage/useChatMessage.ts
@@ -91,6 +91,18 @@ export type ChatMessageData =
       credentialsSchema?: Record<string, any>;
       message: string;
       timestamp?: string | Date;
+    }
+  | {
+      type: "clarification_needed";
+      toolName: string;
+      questions: Array<{
+        question: string;
+        keyword: string;
+        example?: string;
+      }>;
+      message: string;
+      sessionId: string;
+      timestamp?: string | Date;
     };
 
 export function useChatMessage(message: ChatMessageData) {
@@ -111,5 +123,6 @@ export function useChatMessage(message: ChatMessageData) {
     isAgentCarousel: message.type === "agent_carousel",
     isExecutionStarted: message.type === "execution_started",
     isInputsNeeded: message.type === "inputs_needed",
+    isClarificationNeeded: message.type === "clarification_needed",
   };
 }
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ClarificationQuestionsWidget/ClarificationQuestionsWidget.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ClarificationQuestionsWidget/ClarificationQuestionsWidget.tsx
new file mode 100644
index 0000000000..b2d3608254
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ClarificationQuestionsWidget/ClarificationQuestionsWidget.tsx
@@ -0,0 +1,154 @@
+"use client";
+
+import { Button } from "@/components/atoms/Button/Button";
+import { Card } from "@/components/atoms/Card/Card";
+import { Input } from "@/components/atoms/Input/Input";
+import { Text } from "@/components/atoms/Text/Text";
+import { cn } from "@/lib/utils";
+import { CheckCircleIcon, QuestionIcon } from "@phosphor-icons/react";
+import { useState } from "react";
+
+export interface ClarifyingQuestion {
+  question: string;
+  keyword: string;
+  example?: string;
+}
+
+interface Props {
+  questions: ClarifyingQuestion[];
+  message: string;
+  onSubmitAnswers: (answers: Record<string, string>) => void;
+  onCancel?: () => void;
+  className?: string;
+}
+
+export function ClarificationQuestionsWidget({
+  questions,
+  message,
+  onSubmitAnswers,
+  onCancel,
+  className,
+}: Props) {
+  const [answers, setAnswers] = useState<Record<string, string>>({});
+
+  function handleAnswerChange(keyword: string, value: string) {
+    setAnswers((prev) => ({ ...prev, [keyword]: value }));
+  }
+
+  function handleSubmit() {
+    // Check if all questions are answered
+    const allAnswered = questions.every((q) => answers[q.keyword]?.trim());
+    if (!allAnswered) {
+      return;
+    }
+    onSubmitAnswers(answers);
+  }
+
+  const allAnswered = questions.every((q) => answers[q.keyword]?.trim());
+
+  return (
+    <div
+      className={cn(
+        "group relative flex w-full justify-start gap-3 px-4 py-3",
+        className,
+      )}
+    >
+      <div className="flex w-full max-w-3xl gap-3">
+        <div className="flex-shrink-0">
+          <div className="flex h-7 w-7 items-center justify-center rounded-lg bg-indigo-500">
+            <QuestionIcon className="h-4 w-4 text-indigo-50" weight="bold" />
+          </div>
+        </div>
+
+        <div className="flex min-w-0 flex-1 flex-col">
+          <Card className="space-y-4 p-4">
+            <div>
+              <Text variant="h4" className="mb-1 text-slate-900">
+                I need more information
+              </Text>
+              <Text variant="small" className="text-slate-600">
+                {message}
+              </Text>
+            </div>
+
+            <div className="space-y-3">
+              {questions.map((q, index) => {
+                const isAnswered = !!answers[q.keyword]?.trim();
+
+                return (
+                  <div
+                    key={`${q.keyword}-${index}`}
+                    className={cn(
+                      "relative rounded-lg border p-3",
+                      isAnswered
+                        ? "border-green-500 bg-green-50/50"
+                        : "border-slate-200 bg-white/50",
+                    )}
+                  >
+                    <div className="mb-2 flex items-start gap-2">
+                      {isAnswered ? (
+                        <CheckCircleIcon
+                          size={16}
+                          className="mt-0.5 text-green-500"
+                          weight="bold"
+                        />
+                      ) : (
+                        <div className="mt-0.5 flex h-4 w-4 items-center justify-center rounded-full border border-slate-300 bg-white text-xs text-slate-500">
+                          {index + 1}
+                        </div>
+                      )}
+                      <div className="flex-1">
+                        <Text
+                          variant="small"
+                          className="mb-2 font-semibold text-slate-900"
+                        >
+                          {q.question}
+                        </Text>
+                        {q.example && (
+                          <Text
+                            variant="small"
+                            className="mb-2 italic text-slate-500"
+                          >
+                            Example: {q.example}
+                          </Text>
+                        )}
+                        <Input
+                          type="textarea"
+                          id={`clarification-${q.keyword}-${index}`}
+                          label={q.question}
+                          hideLabel
+                          placeholder="Your answer..."
+                          rows={2}
+                          value={answers[q.keyword] || ""}
+                          onChange={(e) =>
+                            handleAnswerChange(q.keyword, e.target.value)
+                          }
+                        />
+                      </div>
+                    </div>
+                  </div>
+                );
+              })}
+            </div>
+
+            <div className="flex gap-2">
+              <Button
+                onClick={handleSubmit}
+                disabled={!allAnswered}
+                className="flex-1"
+                variant="primary"
+              >
+                Submit Answers
+              </Button>
+              {onCancel && (
+                <Button onClick={onCancel} variant="outline">
+                  Cancel
+                </Button>
+              )}
+            </div>
+          </Card>
+        </div>
+      </div>
+    </div>
+  );
+}

From bab436231a1e330e4213cca0993ad5dbcdc2efea Mon Sep 17 00:00:00 2001
From: Swifty <craigswift13@gmail.com>
Date: Tue, 27 Jan 2026 13:07:42 +0100
Subject: [PATCH 2/7] refactor(backend): remove Langfuse tracing from chat
 system (#11829)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are removing Langfuse tracing from the chat/copilot system in favor
of using OpenRouter's broadcast feature, which keeps our codebase
simpler. Langfuse prompt management is retained for fetching system
prompts.

### Changes 🏗️

**Removed Langfuse tracing:**
- Removed `@observe` decorators from all 11 chat tool files
- Removed `langfuse.openai` wrapper (now using standard `openai` client)
- Removed `start_as_current_observation` and `propagate_attributes`
context managers from `service.py`
- Removed `update_current_trace()`, `update_current_span()`,
`span.update()` calls

**Retained Langfuse prompt management:**
- `langfuse.get_prompt()` for fetching system prompts
- `_is_langfuse_configured()` check for prompt availability
- Configuration for `langfuse_prompt_name`

**Files modified:**
- `backend/api/features/chat/service.py`
- `backend/api/features/chat/tools/*.py` (11 tool files)

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] Verified `poetry run format` passes
  - [x] Verified no `@observe` decorators remain in chat tools
- [x] Verified Langfuse prompt fetching is still functional (code
preserved)
---
 .../backend/api/features/chat/service.py      | 609 ++++++++----------
 .../features/chat/tools/add_understanding.py  |   3 -
 .../api/features/chat/tools/agent_output.py   |   2 -
 .../api/features/chat/tools/create_agent.py   |   3 -
 .../api/features/chat/tools/edit_agent.py     |   3 -
 .../api/features/chat/tools/find_agent.py     |   3 -
 .../api/features/chat/tools/find_block.py     |   2 -
 .../features/chat/tools/find_library_agent.py |   3 -
 .../api/features/chat/tools/get_doc_page.py   |   3 -
 .../api/features/chat/tools/run_agent.py      |   2 -
 .../api/features/chat/tools/run_block.py      |   3 -
 .../api/features/chat/tools/search_docs.py    |   2 -
 12 files changed, 282 insertions(+), 356 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index e10343fff6..3976cd5f38 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -5,9 +5,9 @@ from asyncio import CancelledError
 from collections.abc import AsyncGenerator
 from typing import Any
 
+import openai
 import orjson
-from langfuse import get_client, propagate_attributes
-from langfuse.openai import openai  # type: ignore
+from langfuse import get_client
 from openai import (
     APIConnectionError,
     APIError,
@@ -299,347 +299,302 @@ async def stream_chat_completion(
     # Build system prompt with business understanding
     system_prompt, understanding = await _build_system_prompt(user_id)
 
-    # Create Langfuse trace for this LLM call (each call gets its own trace, grouped by session_id)
-    # Using v3 SDK: start_observation creates a root span, update_trace sets trace-level attributes
-    input = message
-    if not message and tool_call_response:
-        input = tool_call_response
+    # Initialize variables for streaming
+    assistant_response = ChatMessage(
+        role="assistant",
+        content="",
+    )
+    accumulated_tool_calls: list[dict[str, Any]] = []
+    has_saved_assistant_message = False
+    has_appended_streaming_message = False
+    last_cache_time = 0.0
+    last_cache_content_len = 0
 
-    langfuse = get_client()
-    with langfuse.start_as_current_observation(
-        as_type="span",
-        name="user-copilot-request",
-        input=input,
-    ) as span:
-        with propagate_attributes(
-            session_id=session_id,
-            user_id=user_id,
-            tags=["copilot"],
-            metadata={
-                "users_information": format_understanding_for_prompt(understanding)[
-                    :200
-                ]  # langfuse only accepts upto to 200 chars
-            },
+    has_yielded_end = False
+    has_yielded_error = False
+    has_done_tool_call = False
+    has_received_text = False
+    text_streaming_ended = False
+    tool_response_messages: list[ChatMessage] = []
+    should_retry = False
+
+    # Generate unique IDs for AI SDK protocol
+    import uuid as uuid_module
+
+    message_id = str(uuid_module.uuid4())
+    text_block_id = str(uuid_module.uuid4())
+
+    # Yield message start
+    yield StreamStart(messageId=message_id)
+
+    try:
+        async for chunk in _stream_chat_chunks(
+            session=session,
+            tools=tools,
+            system_prompt=system_prompt,
+            text_block_id=text_block_id,
         ):
 
-            # Initialize variables that will be used in finally block (must be defined before try)
-            assistant_response = ChatMessage(
-                role="assistant",
-                content="",
-            )
-            accumulated_tool_calls: list[dict[str, Any]] = []
-            has_saved_assistant_message = False
-            has_appended_streaming_message = False
-            last_cache_time = 0.0
-            last_cache_content_len = 0
-
-            # Wrap main logic in try/finally to ensure Langfuse observations are always ended
-            has_yielded_end = False
-            has_yielded_error = False
-            has_done_tool_call = False
-            has_received_text = False
-            text_streaming_ended = False
-            tool_response_messages: list[ChatMessage] = []
-            should_retry = False
-
-            # Generate unique IDs for AI SDK protocol
-            import uuid as uuid_module
-
-            message_id = str(uuid_module.uuid4())
-            text_block_id = str(uuid_module.uuid4())
-
-            # Yield message start
-            yield StreamStart(messageId=message_id)
-
-            try:
-                async for chunk in _stream_chat_chunks(
-                    session=session,
-                    tools=tools,
-                    system_prompt=system_prompt,
-                    text_block_id=text_block_id,
+            if isinstance(chunk, StreamTextStart):
+                # Emit text-start before first text delta
+                if not has_received_text:
+                    yield chunk
+            elif isinstance(chunk, StreamTextDelta):
+                delta = chunk.delta or ""
+                assert assistant_response.content is not None
+                assistant_response.content += delta
+                has_received_text = True
+                if not has_appended_streaming_message:
+                    session.messages.append(assistant_response)
+                    has_appended_streaming_message = True
+                current_time = time.monotonic()
+                content_len = len(assistant_response.content)
+                if (
+                    current_time - last_cache_time >= 1.0
+                    and content_len > last_cache_content_len
                 ):
-
-                    if isinstance(chunk, StreamTextStart):
-                        # Emit text-start before first text delta
-                        if not has_received_text:
-                            yield chunk
-                    elif isinstance(chunk, StreamTextDelta):
-                        delta = chunk.delta or ""
-                        assert assistant_response.content is not None
-                        assistant_response.content += delta
-                        has_received_text = True
-                        if not has_appended_streaming_message:
-                            session.messages.append(assistant_response)
-                            has_appended_streaming_message = True
-                        current_time = time.monotonic()
-                        content_len = len(assistant_response.content)
-                        if (
-                            current_time - last_cache_time >= 1.0
-                            and content_len > last_cache_content_len
-                        ):
-                            try:
-                                await cache_chat_session(session)
-                            except Exception as e:
-                                logger.warning(
-                                    f"Failed to cache partial session {session.session_id}: {e}"
-                                )
-                            last_cache_time = current_time
-                            last_cache_content_len = content_len
-                        yield chunk
-                    elif isinstance(chunk, StreamTextEnd):
-                        # Emit text-end after text completes
-                        if has_received_text and not text_streaming_ended:
-                            text_streaming_ended = True
-                            if assistant_response.content:
-                                logger.warn(
-                                    f"StreamTextEnd: Attempting to set output {assistant_response.content}"
-                                )
-                                span.update_trace(output=assistant_response.content)
-                                span.update(output=assistant_response.content)
-                            yield chunk
-                    elif isinstance(chunk, StreamToolInputStart):
-                        # Emit text-end before first tool call, but only if we've received text
-                        if has_received_text and not text_streaming_ended:
-                            yield StreamTextEnd(id=text_block_id)
-                            text_streaming_ended = True
-                        yield chunk
-                    elif isinstance(chunk, StreamToolInputAvailable):
-                        # Accumulate tool calls in OpenAI format
-                        accumulated_tool_calls.append(
-                            {
-                                "id": chunk.toolCallId,
-                                "type": "function",
-                                "function": {
-                                    "name": chunk.toolName,
-                                    "arguments": orjson.dumps(chunk.input).decode(
-                                        "utf-8"
-                                    ),
-                                },
-                            }
-                        )
-                    elif isinstance(chunk, StreamToolOutputAvailable):
-                        result_content = (
-                            chunk.output
-                            if isinstance(chunk.output, str)
-                            else orjson.dumps(chunk.output).decode("utf-8")
-                        )
-                        tool_response_messages.append(
-                            ChatMessage(
-                                role="tool",
-                                content=result_content,
-                                tool_call_id=chunk.toolCallId,
-                            )
-                        )
-                        has_done_tool_call = True
-                        # Track if any tool execution failed
-                        if not chunk.success:
-                            logger.warning(
-                                f"Tool {chunk.toolName} (ID: {chunk.toolCallId}) execution failed"
-                            )
-                        yield chunk
-                    elif isinstance(chunk, StreamFinish):
-                        if not has_done_tool_call:
-                            # Emit text-end before finish if we received text but haven't closed it
-                            if has_received_text and not text_streaming_ended:
-                                yield StreamTextEnd(id=text_block_id)
-                                text_streaming_ended = True
-
-                            # Save assistant message before yielding finish to ensure it's persisted
-                            # even if client disconnects immediately after receiving StreamFinish
-                            if not has_saved_assistant_message:
-                                messages_to_save_early: list[ChatMessage] = []
-                                if accumulated_tool_calls:
-                                    assistant_response.tool_calls = (
-                                        accumulated_tool_calls
-                                    )
-                                if not has_appended_streaming_message and (
-                                    assistant_response.content
-                                    or assistant_response.tool_calls
-                                ):
-                                    messages_to_save_early.append(assistant_response)
-                                messages_to_save_early.extend(tool_response_messages)
-
-                                if messages_to_save_early:
-                                    session.messages.extend(messages_to_save_early)
-                                    logger.info(
-                                        f"Saving assistant message before StreamFinish: "
-                                        f"content_len={len(assistant_response.content or '')}, "
-                                        f"tool_calls={len(assistant_response.tool_calls or [])}, "
-                                        f"tool_responses={len(tool_response_messages)}"
-                                    )
-                                if (
-                                    messages_to_save_early
-                                    or has_appended_streaming_message
-                                ):
-                                    await upsert_chat_session(session)
-                                    has_saved_assistant_message = True
-
-                            has_yielded_end = True
-                            yield chunk
-                    elif isinstance(chunk, StreamError):
-                        has_yielded_error = True
-                        yield chunk
-                    elif isinstance(chunk, StreamUsage):
-                        session.usage.append(
-                            Usage(
-                                prompt_tokens=chunk.promptTokens,
-                                completion_tokens=chunk.completionTokens,
-                                total_tokens=chunk.totalTokens,
-                            )
-                        )
-                    else:
-                        logger.error(
-                            f"Unknown chunk type: {type(chunk)}", exc_info=True
-                        )
-                if assistant_response.content:
-                    langfuse.update_current_trace(output=assistant_response.content)
-                    langfuse.update_current_span(output=assistant_response.content)
-                elif tool_response_messages:
-                    langfuse.update_current_trace(output=str(tool_response_messages))
-                    langfuse.update_current_span(output=str(tool_response_messages))
-
-            except CancelledError:
-                if not has_saved_assistant_message:
-                    if accumulated_tool_calls:
-                        assistant_response.tool_calls = accumulated_tool_calls
-                    if assistant_response.content:
-                        assistant_response.content = (
-                            f"{assistant_response.content}\n\n[interrupted]"
-                        )
-                    else:
-                        assistant_response.content = "[interrupted]"
-                    if not has_appended_streaming_message:
-                        session.messages.append(assistant_response)
-                    if tool_response_messages:
-                        session.messages.extend(tool_response_messages)
                     try:
-                        await upsert_chat_session(session)
+                        await cache_chat_session(session)
                     except Exception as e:
                         logger.warning(
-                            f"Failed to save interrupted session {session.session_id}: {e}"
+                            f"Failed to cache partial session {session.session_id}: {e}"
                         )
-                raise
-            except Exception as e:
-                logger.error(f"Error during stream: {e!s}", exc_info=True)
-
-                # Check if this is a retryable error (JSON parsing, incomplete tool calls, etc.)
-                is_retryable = isinstance(
-                    e, (orjson.JSONDecodeError, KeyError, TypeError)
-                )
-
-                if is_retryable and retry_count < config.max_retries:
-                    logger.info(
-                        f"Retryable error encountered. Attempt {retry_count + 1}/{config.max_retries}"
-                    )
-                    should_retry = True
-                else:
-                    # Non-retryable error or max retries exceeded
-                    # Save any partial progress before reporting error
-                    messages_to_save: list[ChatMessage] = []
-
-                    # Add assistant message if it has content or tool calls
-                    if accumulated_tool_calls:
-                        assistant_response.tool_calls = accumulated_tool_calls
-                    if not has_appended_streaming_message and (
-                        assistant_response.content or assistant_response.tool_calls
-                    ):
-                        messages_to_save.append(assistant_response)
-
-                    # Add tool response messages after assistant message
-                    messages_to_save.extend(tool_response_messages)
-
-                    if not has_saved_assistant_message:
-                        if messages_to_save:
-                            session.messages.extend(messages_to_save)
-                        if messages_to_save or has_appended_streaming_message:
-                            await upsert_chat_session(session)
-
-                    if not has_yielded_error:
-                        error_message = str(e)
-                        if not is_retryable:
-                            error_message = f"Non-retryable error: {error_message}"
-                        elif retry_count >= config.max_retries:
-                            error_message = f"Max retries ({config.max_retries}) exceeded: {error_message}"
-
-                        error_response = StreamError(errorText=error_message)
-                        yield error_response
-                    if not has_yielded_end:
-                        yield StreamFinish()
-                    return
-
-            # Handle retry outside of exception handler to avoid nesting
-            if should_retry and retry_count < config.max_retries:
-                logger.info(
-                    f"Retrying stream_chat_completion for session {session_id}, attempt {retry_count + 1}"
-                )
-                async for chunk in stream_chat_completion(
-                    session_id=session.session_id,
-                    user_id=user_id,
-                    retry_count=retry_count + 1,
-                    session=session,
-                    context=context,
-                ):
+                    last_cache_time = current_time
+                    last_cache_content_len = content_len
+                yield chunk
+            elif isinstance(chunk, StreamTextEnd):
+                # Emit text-end after text completes
+                if has_received_text and not text_streaming_ended:
+                    text_streaming_ended = True
                     yield chunk
-                return  # Exit after retry to avoid double-saving in finally block
+            elif isinstance(chunk, StreamToolInputStart):
+                # Emit text-end before first tool call, but only if we've received text
+                if has_received_text and not text_streaming_ended:
+                    yield StreamTextEnd(id=text_block_id)
+                    text_streaming_ended = True
+                yield chunk
+            elif isinstance(chunk, StreamToolInputAvailable):
+                # Accumulate tool calls in OpenAI format
+                accumulated_tool_calls.append(
+                    {
+                        "id": chunk.toolCallId,
+                        "type": "function",
+                        "function": {
+                            "name": chunk.toolName,
+                            "arguments": orjson.dumps(chunk.input).decode("utf-8"),
+                        },
+                    }
+                )
+                yield chunk
+            elif isinstance(chunk, StreamToolOutputAvailable):
+                result_content = (
+                    chunk.output
+                    if isinstance(chunk.output, str)
+                    else orjson.dumps(chunk.output).decode("utf-8")
+                )
+                tool_response_messages.append(
+                    ChatMessage(
+                        role="tool",
+                        content=result_content,
+                        tool_call_id=chunk.toolCallId,
+                    )
+                )
+                has_done_tool_call = True
+                # Track if any tool execution failed
+                if not chunk.success:
+                    logger.warning(
+                        f"Tool {chunk.toolName} (ID: {chunk.toolCallId}) execution failed"
+                    )
+                yield chunk
+            elif isinstance(chunk, StreamFinish):
+                if not has_done_tool_call:
+                    # Emit text-end before finish if we received text but haven't closed it
+                    if has_received_text and not text_streaming_ended:
+                        yield StreamTextEnd(id=text_block_id)
+                        text_streaming_ended = True
+
+                    # Save assistant message before yielding finish to ensure it's persisted
+                    # even if client disconnects immediately after receiving StreamFinish
+                    if not has_saved_assistant_message:
+                        messages_to_save_early: list[ChatMessage] = []
+                        if accumulated_tool_calls:
+                            assistant_response.tool_calls = accumulated_tool_calls
+                        if not has_appended_streaming_message and (
+                            assistant_response.content or assistant_response.tool_calls
+                        ):
+                            messages_to_save_early.append(assistant_response)
+                        messages_to_save_early.extend(tool_response_messages)
+
+                        if messages_to_save_early:
+                            session.messages.extend(messages_to_save_early)
+                            logger.info(
+                                f"Saving assistant message before StreamFinish: "
+                                f"content_len={len(assistant_response.content or '')}, "
+                                f"tool_calls={len(assistant_response.tool_calls or [])}, "
+                                f"tool_responses={len(tool_response_messages)}"
+                            )
+                        if messages_to_save_early or has_appended_streaming_message:
+                            await upsert_chat_session(session)
+                            has_saved_assistant_message = True
+
+                    has_yielded_end = True
+                    yield chunk
+            elif isinstance(chunk, StreamError):
+                has_yielded_error = True
+                yield chunk
+            elif isinstance(chunk, StreamUsage):
+                session.usage.append(
+                    Usage(
+                        prompt_tokens=chunk.promptTokens,
+                        completion_tokens=chunk.completionTokens,
+                        total_tokens=chunk.totalTokens,
+                    )
+                )
+            else:
+                logger.error(f"Unknown chunk type: {type(chunk)}", exc_info=True)
+
+    except CancelledError:
+        if not has_saved_assistant_message:
+            if accumulated_tool_calls:
+                assistant_response.tool_calls = accumulated_tool_calls
+            if assistant_response.content:
+                assistant_response.content = (
+                    f"{assistant_response.content}\n\n[interrupted]"
+                )
+            else:
+                assistant_response.content = "[interrupted]"
+            if not has_appended_streaming_message:
+                session.messages.append(assistant_response)
+            if tool_response_messages:
+                session.messages.extend(tool_response_messages)
+            try:
+                await upsert_chat_session(session)
+            except Exception as e:
+                logger.warning(
+                    f"Failed to save interrupted session {session.session_id}: {e}"
+                )
+        raise
+    except Exception as e:
+        logger.error(f"Error during stream: {e!s}", exc_info=True)
+
+        # Check if this is a retryable error (JSON parsing, incomplete tool calls, etc.)
+        is_retryable = isinstance(e, (orjson.JSONDecodeError, KeyError, TypeError))
+
+        if is_retryable and retry_count < config.max_retries:
+            logger.info(
+                f"Retryable error encountered. Attempt {retry_count + 1}/{config.max_retries}"
+            )
+            should_retry = True
+        else:
+            # Non-retryable error or max retries exceeded
+            # Save any partial progress before reporting error
+            messages_to_save: list[ChatMessage] = []
+
+            # Add assistant message if it has content or tool calls
+            if accumulated_tool_calls:
+                assistant_response.tool_calls = accumulated_tool_calls
+            if not has_appended_streaming_message and (
+                assistant_response.content or assistant_response.tool_calls
+            ):
+                messages_to_save.append(assistant_response)
+
+            # Add tool response messages after assistant message
+            messages_to_save.extend(tool_response_messages)
 
-            # Normal completion path - save session and handle tool call continuation
-            # Only save if we haven't already saved when StreamFinish was received
             if not has_saved_assistant_message:
-                logger.info(
-                    f"Normal completion path: session={session.session_id}, "
-                    f"current message_count={len(session.messages)}"
-                )
-
-                # Build the messages list in the correct order
-                messages_to_save: list[ChatMessage] = []
-
-                # Add assistant message with tool_calls if any
-                if accumulated_tool_calls:
-                    assistant_response.tool_calls = accumulated_tool_calls
-                    logger.info(
-                        f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
-                    )
-                if not has_appended_streaming_message and (
-                    assistant_response.content or assistant_response.tool_calls
-                ):
-                    messages_to_save.append(assistant_response)
-                    logger.info(
-                        f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
-                    )
-
-                # Add tool response messages after assistant message
-                messages_to_save.extend(tool_response_messages)
-                logger.info(
-                    f"Saving {len(tool_response_messages)} tool response messages, "
-                    f"total_to_save={len(messages_to_save)}"
-                )
-
                 if messages_to_save:
                     session.messages.extend(messages_to_save)
-                    logger.info(
-                        f"Extended session messages, new message_count={len(session.messages)}"
-                    )
                 if messages_to_save or has_appended_streaming_message:
                     await upsert_chat_session(session)
-            else:
-                logger.info(
-                    "Assistant message already saved when StreamFinish was received, "
-                    "skipping duplicate save"
-                )
 
-            # If we did a tool call, stream the chat completion again to get the next response
-            if has_done_tool_call:
-                logger.info(
-                    "Tool call executed, streaming chat completion again to get assistant response"
-                )
-                async for chunk in stream_chat_completion(
-                    session_id=session.session_id,
-                    user_id=user_id,
-                    session=session,  # Pass session object to avoid Redis refetch
-                    context=context,
-                    tool_call_response=str(tool_response_messages),
-                ):
-                    yield chunk
+            if not has_yielded_error:
+                error_message = str(e)
+                if not is_retryable:
+                    error_message = f"Non-retryable error: {error_message}"
+                elif retry_count >= config.max_retries:
+                    error_message = (
+                        f"Max retries ({config.max_retries}) exceeded: {error_message}"
+                    )
+
+                error_response = StreamError(errorText=error_message)
+                yield error_response
+            if not has_yielded_end:
+                yield StreamFinish()
+            return
+
+    # Handle retry outside of exception handler to avoid nesting
+    if should_retry and retry_count < config.max_retries:
+        logger.info(
+            f"Retrying stream_chat_completion for session {session_id}, attempt {retry_count + 1}"
+        )
+        async for chunk in stream_chat_completion(
+            session_id=session.session_id,
+            user_id=user_id,
+            retry_count=retry_count + 1,
+            session=session,
+            context=context,
+        ):
+            yield chunk
+        return  # Exit after retry to avoid double-saving in finally block
+
+    # Normal completion path - save session and handle tool call continuation
+    # Only save if we haven't already saved when StreamFinish was received
+    if not has_saved_assistant_message:
+        logger.info(
+            f"Normal completion path: session={session.session_id}, "
+            f"current message_count={len(session.messages)}"
+        )
+
+        # Build the messages list in the correct order
+        messages_to_save: list[ChatMessage] = []
+
+        # Add assistant message with tool_calls if any
+        if accumulated_tool_calls:
+            assistant_response.tool_calls = accumulated_tool_calls
+            logger.info(
+                f"Added {len(accumulated_tool_calls)} tool calls to assistant message"
+            )
+        if not has_appended_streaming_message and (
+            assistant_response.content or assistant_response.tool_calls
+        ):
+            messages_to_save.append(assistant_response)
+            logger.info(
+                f"Saving assistant message with content_len={len(assistant_response.content or '')}, tool_calls={len(assistant_response.tool_calls or [])}"
+            )
+
+        # Add tool response messages after assistant message
+        messages_to_save.extend(tool_response_messages)
+        logger.info(
+            f"Saving {len(tool_response_messages)} tool response messages, "
+            f"total_to_save={len(messages_to_save)}"
+        )
+
+        if messages_to_save:
+            session.messages.extend(messages_to_save)
+            logger.info(
+                f"Extended session messages, new message_count={len(session.messages)}"
+            )
+        if messages_to_save or has_appended_streaming_message:
+            await upsert_chat_session(session)
+    else:
+        logger.info(
+            "Assistant message already saved when StreamFinish was received, "
+            "skipping duplicate save"
+        )
+
+    # If we did a tool call, stream the chat completion again to get the next response
+    if has_done_tool_call:
+        logger.info(
+            "Tool call executed, streaming chat completion again to get assistant response"
+        )
+        async for chunk in stream_chat_completion(
+            session_id=session.session_id,
+            user_id=user_id,
+            session=session,  # Pass session object to avoid Redis refetch
+            context=context,
+            tool_call_response=str(tool_response_messages),
+        ):
+            yield chunk
 
 
 # Retry configuration for OpenAI API calls
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py b/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
index bd93f0e2a6..fe3d5e8984 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/add_understanding.py
@@ -3,8 +3,6 @@
 import logging
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 from backend.data.understanding import (
     BusinessUnderstandingInput,
@@ -61,7 +59,6 @@ and automations for the user's specific needs."""
         """Requires authentication to store user-specific data."""
         return True
 
-    @observe(as_type="tool", name="add_understanding")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py b/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
index 00c6d8499b..457e4a4f9b 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/agent_output.py
@@ -5,7 +5,6 @@ import re
 from datetime import datetime, timedelta, timezone
 from typing import Any
 
-from langfuse import observe
 from pydantic import BaseModel, field_validator
 
 from backend.api.features.chat.model import ChatSession
@@ -329,7 +328,6 @@ class AgentOutputTool(BaseTool):
             total_executions=len(available_executions) if available_executions else 1,
         )
 
-    @observe(as_type="tool", name="view_agent_output")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
index 5a3c44fb94..6469cc4442 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -3,8 +3,6 @@
 import logging
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 
 from .agent_generator import (
@@ -75,7 +73,6 @@ class CreateAgentTool(BaseTool):
             "required": ["description"],
         }
 
-    @observe(as_type="tool", name="create_agent")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
index 777c39a254..df1e4a9c3e 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -3,8 +3,6 @@
 import logging
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 
 from .agent_generator import (
@@ -81,7 +79,6 @@ class EditAgentTool(BaseTool):
             "required": ["agent_id", "changes"],
         }
 
-    @observe(as_type="tool", name="edit_agent")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
index f231ef4484..477522757d 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_agent.py
@@ -2,8 +2,6 @@
 
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 
 from .agent_search import search_agents
@@ -37,7 +35,6 @@ class FindAgentTool(BaseTool):
             "required": ["query"],
         }
 
-    @observe(as_type="tool", name="find_agent")
     async def _execute(
         self, user_id: str | None, session: ChatSession, **kwargs
     ) -> ToolResponseBase:
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
index fc20fdfc4a..a5e66f0a1c 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -1,7 +1,6 @@
 import logging
 from typing import Any
 
-from langfuse import observe
 from prisma.enums import ContentType
 
 from backend.api.features.chat.model import ChatSession
@@ -56,7 +55,6 @@ class FindBlockTool(BaseTool):
     def requires_auth(self) -> bool:
         return True
 
-    @observe(as_type="tool", name="find_block")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
index d9b5edfa9b..108fba75ae 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_library_agent.py
@@ -2,8 +2,6 @@
 
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 
 from .agent_search import search_agents
@@ -43,7 +41,6 @@ class FindLibraryAgentTool(BaseTool):
     def requires_auth(self) -> bool:
         return True
 
-    @observe(as_type="tool", name="find_library_agent")
     async def _execute(
         self, user_id: str | None, session: ChatSession, **kwargs
     ) -> ToolResponseBase:
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py b/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
index b2fdcccfcd..7040cd7db5 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/get_doc_page.py
@@ -4,8 +4,6 @@ import logging
 from pathlib import Path
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 from backend.api.features.chat.tools.base import BaseTool
 from backend.api.features.chat.tools.models import (
@@ -73,7 +71,6 @@ class GetDocPageTool(BaseTool):
         url_path = path.rsplit(".", 1)[0] if "." in path else path
         return f"{DOCS_BASE_URL}/{url_path}"
 
-    @observe(as_type="tool", name="get_doc_page")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
index 88d432a797..a7fa65348a 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_agent.py
@@ -3,7 +3,6 @@
 import logging
 from typing import Any
 
-from langfuse import observe
 from pydantic import BaseModel, Field, field_validator
 
 from backend.api.features.chat.config import ChatConfig
@@ -159,7 +158,6 @@ class RunAgentTool(BaseTool):
         """All operations require authentication."""
         return True
 
-    @observe(as_type="tool", name="run_agent")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
index 0d233fcfec..3f57236564 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/run_block.py
@@ -4,8 +4,6 @@ import logging
 from collections import defaultdict
 from typing import Any
 
-from langfuse import observe
-
 from backend.api.features.chat.model import ChatSession
 from backend.data.block import get_block
 from backend.data.execution import ExecutionContext
@@ -130,7 +128,6 @@ class RunBlockTool(BaseTool):
 
         return matched_credentials, missing_credentials
 
-    @observe(as_type="tool", name="run_block")
     async def _execute(
         self,
         user_id: str | None,
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py b/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
index 4903230b40..edb0c0de1e 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/search_docs.py
@@ -3,7 +3,6 @@
 import logging
 from typing import Any
 
-from langfuse import observe
 from prisma.enums import ContentType
 
 from backend.api.features.chat.model import ChatSession
@@ -88,7 +87,6 @@ class SearchDocsTool(BaseTool):
         url_path = path.rsplit(".", 1)[0] if "." in path else path
         return f"{DOCS_BASE_URL}/{url_path}"
 
-    @observe(as_type="tool", name="search_docs")
     async def _execute(
         self,
         user_id: str | None,

From 91c78968599a8bbe9ac15925a798a4023fb5acf9 Mon Sep 17 00:00:00 2001
From: Bently <Github@bentlybro.com>
Date: Tue, 27 Jan 2026 15:37:17 +0100
Subject: [PATCH 3/7] fix(backend): implement context window management for
 long chat sessions (#11848)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Changes 🏗️

Implements automatic context window management to prevent chat failures
when conversations exceed token limits.

  ### Problem
- **Issue**: [SECRT-1800] Long chat conversations stop working when
context grows beyond model limits (~113k tokens observed)
- **Root Cause**: Chat service sends ALL messages to LLM without
token-aware compression, eventually exceeding Claude Opus 4.5's 200k
context window

  ### Solution
  Implements a sliding window with summarization strategy:
1. Monitors token count before sending to LLM (triggers at 120k tokens)
2. Keeps last 15 messages completely intact (preserves recent
conversation flow)
  3. Summarizes older messages using gpt-4o-mini (fast & cheap)
4. Rebuilds context: `[system_prompt] + [summary] +
[recent_15_messages]`
5. Full history preserved in database (only compresses when sending to
LLM)

  ### Changes Made
- **Added** `_summarize_messages()` helper function to create concise
summaries using gpt-4o-mini
- **Modified** `_stream_chat_chunks()` to implement token counting and
conditional summarization
- **Integrated** existing `estimate_token_count()` utility for accurate
token measurement
- **Added** graceful fallback - continues with original messages if
summarization fails

  ## Motivation and Context 🎯

Without context management, users with long chat sessions (250+
messages) experience:
  - Complete chat failure when hitting 200k token limit
  - Lost conversation context
  - Poor user experience

  This fix enables:
  - ✅ Unlimited conversation length
  - ✅ Transparent operation (no UX changes)
  - ✅ Preserved conversation quality (recent messages intact)
  - ✅ Cost-efficient (~$0.0001 per summarization)

  ## Testing 🧪

  ### Expected Behavior
  - Conversations < 120k tokens: No change (normal operation)
  - Conversations > 120k tokens:
- Log message: `Context summarized: {tokens} tokens, kept last 15
messages + summary`
    - Chat continues working smoothly
    - Recent context remains intact

  ### How to Verify
  1. Start a chat session in copilot
  2. Send 250-600 messages (or 50+ with large code blocks)
  3. Check logs for "Context summarized:" message
  4. Verify chat continues working without errors
  5. Verify conversation quality remains good

  ## Checklist ✅

  - [x] My code follows the style guidelines of this project
  - [x] I have performed a self-review of my own code
- [x] I have commented my code, particularly in hard-to-understand areas
  - [x] My changes generate no new warnings
  - [x] I have tested my changes and verified they work as expected
---
 .../backend/api/features/chat/service.py      | 392 +++++++++++++++++-
 1 file changed, 390 insertions(+), 2 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index 3976cd5f38..f8336b9107 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -628,6 +628,101 @@ def _is_region_blocked_error(error: Exception) -> bool:
     return "not available in your region" in str(error).lower()
 
 
+async def _summarize_messages(
+    messages: list,
+    model: str,
+    api_key: str | None = None,
+    base_url: str | None = None,
+    timeout: float = 30.0,
+) -> str:
+    """Summarize a list of messages into concise context.
+
+    Uses the same model as the chat for higher quality summaries.
+
+    Args:
+        messages: List of message dicts to summarize
+        model: Model to use for summarization (same as chat model)
+        api_key: API key for OpenAI client
+        base_url: Base URL for OpenAI client
+        timeout: Request timeout in seconds (default: 30.0)
+
+    Returns:
+        Summarized text
+    """
+    # Format messages for summarization
+    conversation = []
+    for msg in messages:
+        role = msg.get("role", "")
+        content = msg.get("content", "")
+        # Include user, assistant, and tool messages (tool outputs are important context)
+        if content and role in ("user", "assistant", "tool"):
+            conversation.append(f"{role.upper()}: {content}")
+
+    conversation_text = "\n\n".join(conversation)
+
+    # Handle empty conversation
+    if not conversation_text:
+        return "No conversation history available."
+
+    # Truncate conversation to fit within summarization model's context
+    # gpt-4o-mini has 128k context, but we limit to ~25k tokens (~100k chars) for safety
+    MAX_CHARS = 100_000
+    if len(conversation_text) > MAX_CHARS:
+        conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]"
+
+    # Call LLM to summarize
+    import openai
+
+    summarization_client = openai.AsyncOpenAI(
+        api_key=api_key, base_url=base_url, timeout=timeout
+    )
+
+    response = await summarization_client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": (
+                    "Create a detailed summary of the conversation so far. "
+                    "This summary will be used as context when continuing the conversation.\n\n"
+                    "Before writing the summary, analyze each message chronologically to identify:\n"
+                    "- User requests and their explicit goals\n"
+                    "- Your approach and key decisions made\n"
+                    "- Technical specifics (file names, tool outputs, function signatures)\n"
+                    "- Errors encountered and resolutions applied\n\n"
+                    "You MUST include ALL of the following sections:\n\n"
+                    "## 1. Primary Request and Intent\n"
+                    "The user's explicit goals and what they are trying to accomplish.\n\n"
+                    "## 2. Key Technical Concepts\n"
+                    "Technologies, frameworks, tools, and patterns being used or discussed.\n\n"
+                    "## 3. Files and Resources Involved\n"
+                    "Specific files examined or modified, with relevant snippets and identifiers.\n\n"
+                    "## 4. Errors and Fixes\n"
+                    "Problems encountered, error messages, and their resolutions. "
+                    "Include any user feedback on fixes.\n\n"
+                    "## 5. Problem Solving\n"
+                    "Issues that have been resolved and how they were addressed.\n\n"
+                    "## 6. All User Messages\n"
+                    "A complete list of all user inputs (excluding tool outputs) to preserve their exact requests.\n\n"
+                    "## 7. Pending Tasks\n"
+                    "Work items the user explicitly requested that have not yet been completed.\n\n"
+                    "## 8. Current Work\n"
+                    "Precise description of what was being worked on most recently, including relevant context.\n\n"
+                    "## 9. Next Steps\n"
+                    "What should happen next, aligned with the user's most recent requests. "
+                    "Include verbatim quotes of recent instructions if relevant."
+                ),
+            },
+            {"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
+        ],
+        max_tokens=1500,
+        temperature=0.3,
+    )
+
+    summary = response.choices[0].message.content
+    return summary or "No summary available."
+
+
 async def _stream_chat_chunks(
     session: ChatSession,
     tools: list[ChatCompletionToolParam],
@@ -664,6 +759,292 @@ async def _stream_chat_chunks(
         )
         messages = [system_message] + messages
 
+    # Apply context window management
+    token_count = 0  # Initialize for exception handler
+    try:
+        from backend.util.prompt import estimate_token_count
+
+        # Convert to dict for token counting
+        # OpenAI message types are TypedDicts, so they're already dict-like
+        messages_dict = []
+        for msg in messages:
+            # TypedDict objects are already dicts, just filter None values
+            if isinstance(msg, dict):
+                msg_dict = {k: v for k, v in msg.items() if v is not None}
+            else:
+                # Fallback for unexpected types
+                msg_dict = dict(msg)
+            messages_dict.append(msg_dict)
+
+        # Estimate tokens using appropriate tokenizer
+        # Normalize model name for token counting (tiktoken only supports OpenAI models)
+        token_count_model = model
+        if "/" in model:
+            # Strip provider prefix (e.g., "anthropic/claude-opus-4.5" -> "claude-opus-4.5")
+            token_count_model = model.split("/")[-1]
+
+        # For Claude and other non-OpenAI models, approximate with gpt-4o tokenizer
+        # Most modern LLMs have similar tokenization (~1 token per 4 chars)
+        if "claude" in token_count_model.lower() or not any(
+            known in token_count_model.lower()
+            for known in ["gpt", "o1", "chatgpt", "text-"]
+        ):
+            token_count_model = "gpt-4o"
+
+        # Attempt token counting with error handling
+        try:
+            token_count = estimate_token_count(messages_dict, model=token_count_model)
+        except Exception as token_error:
+            # If token counting fails, use gpt-4o as fallback approximation
+            logger.warning(
+                f"Token counting failed for model {token_count_model}: {token_error}. "
+                "Using gpt-4o approximation."
+            )
+            token_count = estimate_token_count(messages_dict, model="gpt-4o")
+
+        # If over threshold, summarize old messages
+        if token_count > 120_000:
+            KEEP_RECENT = 15
+
+            # Check if we have a system prompt at the start
+            has_system_prompt = (
+                len(messages) > 0 and messages[0].get("role") == "system"
+            )
+
+            # Always attempt mitigation when over limit, even with few messages
+            if messages:
+                # Split messages based on whether system prompt exists
+                recent_messages = messages[-KEEP_RECENT:]
+
+                if has_system_prompt:
+                    # Keep system prompt separate, summarize everything between system and recent
+                    system_msg = messages[0]
+                    old_messages_dict = messages_dict[1:-KEEP_RECENT]
+                else:
+                    # No system prompt, summarize everything except recent
+                    system_msg = None
+                    old_messages_dict = messages_dict[:-KEEP_RECENT]
+
+                # Summarize any non-empty old messages (no minimum threshold)
+                # If we're over the token limit, we need to compress whatever we can
+                if old_messages_dict:
+                    # Summarize old messages using the same model as chat
+                    summary_text = await _summarize_messages(
+                        old_messages_dict,
+                        model=model,
+                        api_key=config.api_key,
+                        base_url=config.base_url,
+                    )
+
+                    # Build new message list
+                    # Use assistant role (not system) to prevent privilege escalation
+                    # of user-influenced content to instruction-level authority
+                    from openai.types.chat import ChatCompletionAssistantMessageParam
+
+                    summary_msg = ChatCompletionAssistantMessageParam(
+                        role="assistant",
+                        content=(
+                            "[Previous conversation summary — for context only]: "
+                            f"{summary_text}"
+                        ),
+                    )
+
+                    # Rebuild messages based on whether we have a system prompt
+                    if has_system_prompt:
+                        # system_prompt + summary + recent_messages
+                        messages = [system_msg, summary_msg] + recent_messages
+                    else:
+                        # summary + recent_messages (no original system prompt)
+                        messages = [summary_msg] + recent_messages
+
+                    logger.info(
+                        f"Context summarized: {token_count} tokens, "
+                        f"summarized {len(old_messages_dict)} old messages, "
+                        f"kept last {KEEP_RECENT} messages"
+                    )
+
+                    # Fallback: If still over limit after summarization, progressively drop recent messages
+                    # This handles edge cases where recent messages are extremely large
+                    new_messages_dict = []
+                    for msg in messages:
+                        if isinstance(msg, dict):
+                            msg_dict = {k: v for k, v in msg.items() if v is not None}
+                        else:
+                            msg_dict = dict(msg)
+                        new_messages_dict.append(msg_dict)
+
+                    new_token_count = estimate_token_count(
+                        new_messages_dict, model=token_count_model
+                    )
+
+                    if new_token_count > 120_000:
+                        # Still over limit - progressively reduce KEEP_RECENT
+                        logger.warning(
+                            f"Still over limit after summarization: {new_token_count} tokens. "
+                            "Reducing number of recent messages kept."
+                        )
+
+                        for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]:
+                            if keep_count == 0:
+                                # Try with just system prompt + summary (no recent messages)
+                                if has_system_prompt:
+                                    messages = [system_msg, summary_msg]
+                                else:
+                                    messages = [summary_msg]
+                                logger.info(
+                                    "Trying with 0 recent messages (system + summary only)"
+                                )
+                            else:
+                                # Slice from ORIGINAL recent_messages to avoid duplicating summary
+                                reduced_recent = (
+                                    recent_messages[-keep_count:]
+                                    if len(recent_messages) >= keep_count
+                                    else recent_messages
+                                )
+                                if has_system_prompt:
+                                    messages = [
+                                        system_msg,
+                                        summary_msg,
+                                    ] + reduced_recent
+                                else:
+                                    messages = [summary_msg] + reduced_recent
+
+                            new_messages_dict = []
+                            for msg in messages:
+                                if isinstance(msg, dict):
+                                    msg_dict = {
+                                        k: v for k, v in msg.items() if v is not None
+                                    }
+                                else:
+                                    msg_dict = dict(msg)
+                                new_messages_dict.append(msg_dict)
+
+                            new_token_count = estimate_token_count(
+                                new_messages_dict, model=token_count_model
+                            )
+
+                            if new_token_count <= 120_000:
+                                logger.info(
+                                    f"Reduced to {keep_count} recent messages, "
+                                    f"now {new_token_count} tokens"
+                                )
+                                break
+                        else:
+                            logger.error(
+                                f"Unable to reduce token count below threshold even with 0 messages. "
+                                f"Final count: {new_token_count} tokens"
+                            )
+                            # ABSOLUTE LAST RESORT: Drop system prompt
+                            # This should only happen if summary itself is massive
+                            if has_system_prompt and len(messages) > 1:
+                                messages = messages[1:]  # Drop system prompt
+                                logger.critical(
+                                    "CRITICAL: Dropped system prompt as absolute last resort. "
+                                    "Behavioral consistency may be affected."
+                                )
+                                # Yield error to user
+                                yield StreamError(
+                                    errorText=(
+                                        "Warning: System prompt dropped due to size constraints. "
+                                        "Assistant behavior may be affected."
+                                    )
+                                )
+                else:
+                    # No old messages to summarize - all messages are "recent"
+                    # Apply progressive truncation to reduce token count
+                    logger.warning(
+                        f"Token count {token_count} exceeds threshold but no old messages to summarize. "
+                        f"Applying progressive truncation to recent messages."
+                    )
+
+                    # Create a base list excluding system prompt to avoid duplication
+                    # This is the pool of messages we'll slice from in the loop
+                    base_msgs = messages[1:] if has_system_prompt else messages
+
+                    # Try progressively smaller keep counts
+                    new_token_count = token_count  # Initialize with current count
+                    for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]:
+                        if keep_count == 0:
+                            # Try with just system prompt (no recent messages)
+                            if has_system_prompt:
+                                messages = [system_msg]
+                                logger.info(
+                                    "Trying with 0 recent messages (system prompt only)"
+                                )
+                            else:
+                                # No system prompt and no recent messages = empty messages list
+                                # This is invalid, skip this iteration
+                                continue
+                        else:
+                            if len(base_msgs) < keep_count:
+                                continue  # Skip if we don't have enough messages
+
+                            # Slice from base_msgs to get recent messages (without system prompt)
+                            recent_messages = base_msgs[-keep_count:]
+
+                            if has_system_prompt:
+                                messages = [system_msg] + recent_messages
+                            else:
+                                messages = recent_messages
+
+                        new_messages_dict = []
+                        for msg in messages:
+                            if msg is None:
+                                continue  # Skip None messages (type safety)
+                            if isinstance(msg, dict):
+                                msg_dict = {
+                                    k: v for k, v in msg.items() if v is not None
+                                }
+                            else:
+                                msg_dict = dict(msg)
+                            new_messages_dict.append(msg_dict)
+
+                        new_token_count = estimate_token_count(
+                            new_messages_dict, model=token_count_model
+                        )
+
+                        if new_token_count <= 120_000:
+                            logger.info(
+                                f"Reduced to {keep_count} recent messages, "
+                                f"now {new_token_count} tokens"
+                            )
+                            break
+                    else:
+                        # Even with 0 messages still over limit
+                        logger.error(
+                            f"Unable to reduce token count below threshold even with 0 messages. "
+                            f"Final count: {new_token_count} tokens. Messages may be extremely large."
+                        )
+                        # ABSOLUTE LAST RESORT: Drop system prompt
+                        if has_system_prompt and len(messages) > 1:
+                            messages = messages[1:]  # Drop system prompt
+                            logger.critical(
+                                "CRITICAL: Dropped system prompt as absolute last resort. "
+                                "Behavioral consistency may be affected."
+                            )
+                            # Yield error to user
+                            yield StreamError(
+                                errorText=(
+                                    "Warning: System prompt dropped due to size constraints. "
+                                    "Assistant behavior may be affected."
+                                )
+                            )
+
+    except Exception as e:
+        logger.error(f"Context summarization failed: {e}", exc_info=True)
+        # If we were over the token limit, yield error to user
+        # Don't silently continue with oversized messages that will fail
+        if token_count > 120_000:
+            yield StreamError(
+                errorText=(
+                    f"Unable to manage context window (token limit exceeded: {token_count} tokens). "
+                    "Context summarization failed. Please start a new conversation."
+                )
+            )
+            yield StreamFinish()
+            return
+        # Otherwise, continue with original messages (under limit)
+
     # Loop to handle tool calls and continue conversation
     while True:
         retry_count = 0
@@ -691,13 +1072,20 @@ async def _stream_chat_chunks(
                     ]  # OpenRouter limit
 
                 # Create the stream with proper types
+                from typing import cast
+
+                from openai.types.chat import (
+                    ChatCompletionMessageParam,
+                    ChatCompletionStreamOptionsParam,
+                )
+
                 stream = await client.chat.completions.create(
                     model=model,
-                    messages=messages,
+                    messages=cast(list[ChatCompletionMessageParam], messages),
                     tools=tools,
                     tool_choice="auto",
                     stream=True,
-                    stream_options={"include_usage": True},
+                    stream_options=ChatCompletionStreamOptionsParam(include_usage=True),
                     extra_body=extra_body,
                 )
 

From fac10c422bb49b164ab205142694199990136122 Mon Sep 17 00:00:00 2001
From: Swifty <craigswift13@gmail.com>
Date: Tue, 27 Jan 2026 15:41:58 +0100
Subject: [PATCH 4/7] fix(backend): add SSE heartbeats to prevent tool
 execution timeouts (#11855)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Long-running chat tools (like `create_agent` and `edit_agent`) were
timing out because no SSE data was sent during tool execution. GCP load
balancers and proxies have idle connection timeouts (~60 seconds), and
when the external Agent Generator service takes longer than this, the
connection would drop.

This PR adds SSE heartbeat comments during tool execution to keep
connections alive.

### Changes 🏗️

- **response_model.py**: Added `StreamHeartbeat` response type that
emits SSE comments (`: heartbeat\n\n`)
- **service.py**: Modified `_yield_tool_call()` to:
  - Run tool execution in a background asyncio task
  - Yield heartbeat events every 15 seconds while waiting
- Handle task failures with explicit error responses (no silent
failures)
  - Handle cancellation gracefully
- **create_agent.py**: Improved error messages with more context and
details
- **edit_agent.py**: Improved error messages with more context and
details

### How It Works

```
Tool Call → Background Task Started
     │
     ├── Every 15 seconds: yield `: heartbeat\n\n` (SSE comment)
     │
     └── Task Complete → yield tool result OR error response
```

SSE comments (`: heartbeat\n\n`) are:
- Ignored by SSE clients (don't trigger events)
- Keep TCP connections alive through proxies/load balancers
- Don't affect the AI SDK data protocol

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] All chat service tests pass (17 tests)
  - [x] Verified heartbeats are sent during long tool execution
  - [x] Verified errors are properly reported to frontend
---
 .../api/features/chat/response_model.py       | 18 ++++++
 .../backend/api/features/chat/service.py      | 58 +++++++++++++++++--
 .../api/features/chat/tools/create_agent.py   | 14 +++--
 .../api/features/chat/tools/edit_agent.py     |  5 +-
 4 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/response_model.py b/autogpt_platform/backend/backend/api/features/chat/response_model.py
index 49a9b38e8f..53a8cf3a1f 100644
--- a/autogpt_platform/backend/backend/api/features/chat/response_model.py
+++ b/autogpt_platform/backend/backend/api/features/chat/response_model.py
@@ -31,6 +31,7 @@ class ResponseType(str, Enum):
     # Other
     ERROR = "error"
     USAGE = "usage"
+    HEARTBEAT = "heartbeat"
 
 
 class StreamBaseResponse(BaseModel):
@@ -142,3 +143,20 @@ class StreamError(StreamBaseResponse):
     details: dict[str, Any] | None = Field(
         default=None, description="Additional error details"
     )
+
+
+class StreamHeartbeat(StreamBaseResponse):
+    """Heartbeat to keep SSE connection alive during long-running operations.
+
+    Uses SSE comment format (: comment) which is ignored by clients but keeps
+    the connection alive through proxies and load balancers.
+    """
+
+    type: ResponseType = ResponseType.HEARTBEAT
+    toolCallId: str | None = Field(
+        default=None, description="Tool call ID if heartbeat is for a specific tool"
+    )
+
+    def to_sse(self) -> str:
+        """Convert to SSE comment format to keep connection alive."""
+        return ": heartbeat\n\n"
diff --git a/autogpt_platform/backend/backend/api/features/chat/service.py b/autogpt_platform/backend/backend/api/features/chat/service.py
index f8336b9107..386b37784d 100644
--- a/autogpt_platform/backend/backend/api/features/chat/service.py
+++ b/autogpt_platform/backend/backend/api/features/chat/service.py
@@ -38,6 +38,7 @@ from .response_model import (
     StreamBaseResponse,
     StreamError,
     StreamFinish,
+    StreamHeartbeat,
     StreamStart,
     StreamTextDelta,
     StreamTextEnd,
@@ -48,6 +49,7 @@ from .response_model import (
     StreamUsage,
 )
 from .tools import execute_tool, tools
+from .tools.models import ErrorResponse
 from .tracking import track_user_message
 
 logger = logging.getLogger(__name__)
@@ -1258,6 +1260,9 @@ async def _yield_tool_call(
     """
     Yield a tool call and its execution result.
 
+    For long-running tools, yields heartbeat events every 15 seconds to keep
+    the SSE connection alive through proxies and load balancers.
+
     Raises:
         orjson.JSONDecodeError: If tool call arguments cannot be parsed as JSON
         KeyError: If expected tool call fields are missing
@@ -1280,12 +1285,53 @@ async def _yield_tool_call(
         input=arguments,
     )
 
-    tool_execution_response: StreamToolOutputAvailable = await execute_tool(
-        tool_name=tool_name,
-        parameters=arguments,
-        tool_call_id=tool_call_id,
-        user_id=session.user_id,
-        session=session,
+    # Run tool execution in background task with heartbeats to keep connection alive
+    tool_task = asyncio.create_task(
+        execute_tool(
+            tool_name=tool_name,
+            parameters=arguments,
+            tool_call_id=tool_call_id,
+            user_id=session.user_id,
+            session=session,
+        )
     )
 
+    # Yield heartbeats every 15 seconds while waiting for tool to complete
+    heartbeat_interval = 15.0  # seconds
+    while not tool_task.done():
+        try:
+            # Wait for either the task to complete or the heartbeat interval
+            await asyncio.wait_for(
+                asyncio.shield(tool_task), timeout=heartbeat_interval
+            )
+        except asyncio.TimeoutError:
+            # Task still running, send heartbeat to keep connection alive
+            logger.debug(f"Sending heartbeat for tool {tool_name} ({tool_call_id})")
+            yield StreamHeartbeat(toolCallId=tool_call_id)
+        except CancelledError:
+            # Task was cancelled, clean up and propagate
+            tool_task.cancel()
+            logger.warning(f"Tool execution cancelled: {tool_name} ({tool_call_id})")
+            raise
+
+    # Get the result - handle any exceptions that occurred during execution
+    try:
+        tool_execution_response: StreamToolOutputAvailable = await tool_task
+    except Exception as e:
+        # Task raised an exception - ensure we send an error response to the frontend
+        logger.error(
+            f"Tool execution failed: {tool_name} ({tool_call_id}): {e}", exc_info=True
+        )
+        error_response = ErrorResponse(
+            message=f"Tool execution failed: {e!s}",
+            error=type(e).__name__,
+            session_id=session.session_id,
+        )
+        tool_execution_response = StreamToolOutputAvailable(
+            toolCallId=tool_call_id,
+            toolName=tool_name,
+            output=error_response.model_dump_json(),
+            success=False,
+        )
+
     yield tool_execution_response
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
index 6469cc4442..87ca5ebca7 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/create_agent.py
@@ -113,8 +113,11 @@ class CreateAgentTool(BaseTool):
 
         if decomposition_result is None:
             return ErrorResponse(
-                message="Failed to analyze the goal. Please try rephrasing.",
-                error="Decomposition failed",
+                message="Failed to analyze the goal. The agent generation service may be unavailable or timed out. Please try again.",
+                error="decomposition_failed",
+                details={
+                    "description": description[:100]
+                },  # Include context for debugging
                 session_id=session_id,
             )
 
@@ -179,8 +182,11 @@ class CreateAgentTool(BaseTool):
 
         if agent_json is None:
             return ErrorResponse(
-                message="Failed to generate the agent. Please try again.",
-                error="Generation failed",
+                message="Failed to generate the agent. The agent generation service may be unavailable or timed out. Please try again.",
+                error="generation_failed",
+                details={
+                    "description": description[:100]
+                },  # Include context for debugging
                 session_id=session_id,
             )
 
diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
index df1e4a9c3e..d65b050f06 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/edit_agent.py
@@ -142,8 +142,9 @@ class EditAgentTool(BaseTool):
 
         if result is None:
             return ErrorResponse(
-                message="Failed to generate changes. Please try rephrasing.",
-                error="Update generation failed",
+                message="Failed to generate changes. The agent generation service may be unavailable or timed out. Please try again.",
+                error="update_generation_failed",
+                details={"agent_id": agent_id, "changes": changes[:100]},
                 session_id=session_id,
             )
 

From 3e9d5d0d50a81d87f2f7de3e1e91db91e21c0d3d Mon Sep 17 00:00:00 2001
From: Zamil Majdy <zamil.majdy@agpt.co>
Date: Tue, 27 Jan 2026 08:43:31 -0600
Subject: [PATCH 5/7] fix(backend): handle race condition in review processing
 gracefully (#11845)

## Summary
- Fixes race condition when multiple concurrent requests try to process
the same reviews (e.g., double-click, multiple browser tabs)
- Previously the second request would fail with "Reviews not found,
access denied, or not in WAITING status"
- Now handles this gracefully by treating already-processed reviews with
the same decision as success

## Changes
- Added `get_reviews_by_node_exec_ids()` function that fetches reviews
regardless of status
- Modified `process_all_reviews_for_execution()` to handle
already-processed reviews
- Updated route to use idempotent validation

## Test plan
- [x] Linter passes (`poetry run ruff check`)
- [x] Type checker passes (`poetry run pyright`)
- [x] Formatter passes (`poetry run format`)
- [ ] Manual testing: double-click approve button should not cause
errors

Fixes AUTOGPT-SERVER-7HE
---
 .../executions/review/review_routes_test.py   | 44 ++++++-------
 .../api/features/executions/review/routes.py  | 10 +--
 .../backend/backend/data/human_review.py      | 65 ++++++++++++++-----
 3 files changed, 77 insertions(+), 42 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py b/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
index d0c24f2cf8..c8bbfe4bb0 100644
--- a/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/review_routes_test.py
@@ -164,9 +164,9 @@ async def test_process_review_action_approve_success(
     """Test successful review approval"""
     # Mock the route functions
 
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     mock_get_reviews_for_user.return_value = {"test_node_123": sample_pending_review}
 
@@ -244,9 +244,9 @@ async def test_process_review_action_reject_success(
     """Test successful review rejection"""
     # Mock the route functions
 
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     mock_get_reviews_for_user.return_value = {"test_node_123": sample_pending_review}
 
@@ -339,9 +339,9 @@ async def test_process_review_action_mixed_success(
 
     # Mock the route functions
 
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     mock_get_reviews_for_user.return_value = {
         "test_node_123": sample_pending_review,
@@ -463,9 +463,9 @@ async def test_process_review_action_review_not_found(
     test_user_id: str,
 ) -> None:
     """Test error when review is not found"""
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     # Return empty dict to simulate review not found
     mock_get_reviews_for_user.return_value = {}
@@ -506,7 +506,7 @@ async def test_process_review_action_review_not_found(
     response = await client.post("/api/review/action", json=request_data)
 
     assert response.status_code == 404
-    assert "No pending review found" in response.json()["detail"]
+    assert "Review(s) not found" in response.json()["detail"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
@@ -517,9 +517,9 @@ async def test_process_review_action_partial_failure(
     test_user_id: str,
 ) -> None:
     """Test handling of partial failures in review processing"""
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     mock_get_reviews_for_user.return_value = {"test_node_123": sample_pending_review}
 
@@ -567,9 +567,9 @@ async def test_process_review_action_invalid_node_exec_id(
     test_user_id: str,
 ) -> None:
     """Test failure when trying to process review with invalid node execution ID"""
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     # Return empty dict to simulate review not found
     mock_get_reviews_for_user.return_value = {}
@@ -596,7 +596,7 @@ async def test_process_review_action_invalid_node_exec_id(
 
     # Returns 404 when review is not found
     assert response.status_code == 404
-    assert "No pending review found" in response.json()["detail"]
+    assert "Review(s) not found" in response.json()["detail"]
 
 
 @pytest.mark.asyncio(loop_scope="session")
@@ -607,9 +607,9 @@ async def test_process_review_action_auto_approve_creates_auto_approval_records(
     test_user_id: str,
 ) -> None:
     """Test that auto_approve_future_actions flag creates auto-approval records"""
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     mock_get_reviews_for_user.return_value = {"test_node_123": sample_pending_review}
 
@@ -737,9 +737,9 @@ async def test_process_review_action_without_auto_approve_still_loads_settings(
     test_user_id: str,
 ) -> None:
     """Test that execution context is created with settings even without auto-approve"""
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     mock_get_reviews_for_user.return_value = {"test_node_123": sample_pending_review}
 
@@ -885,9 +885,9 @@ async def test_process_review_action_auto_approve_only_applies_to_approved_revie
         reviewed_at=FIXED_NOW,
     )
 
-    # Mock get_pending_reviews_by_node_exec_ids (called to find the graph_exec_id)
+    # Mock get_reviews_by_node_exec_ids (called to find the graph_exec_id)
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
     # Need to return both reviews in WAITING state (before processing)
     approved_review_waiting = PendingHumanReviewModel(
@@ -1031,9 +1031,9 @@ async def test_process_review_action_per_review_auto_approve_granularity(
     test_user_id: str,
 ) -> None:
     """Test that auto-approval can be set per-review (granular control)"""
-    # Mock get_pending_reviews_by_node_exec_ids - return different reviews based on node_exec_id
+    # Mock get_reviews_by_node_exec_ids - return different reviews based on node_exec_id
     mock_get_reviews_for_user = mocker.patch(
-        "backend.api.features.executions.review.routes.get_pending_reviews_by_node_exec_ids"
+        "backend.api.features.executions.review.routes.get_reviews_by_node_exec_ids"
     )
 
     # Create a mapping of node_exec_id to review
diff --git a/autogpt_platform/backend/backend/api/features/executions/review/routes.py b/autogpt_platform/backend/backend/api/features/executions/review/routes.py
index a10071e9cb..539c7fd87b 100644
--- a/autogpt_platform/backend/backend/api/features/executions/review/routes.py
+++ b/autogpt_platform/backend/backend/api/features/executions/review/routes.py
@@ -14,9 +14,9 @@ from backend.data.execution import (
 from backend.data.graph import get_graph_settings
 from backend.data.human_review import (
     create_auto_approval_record,
-    get_pending_reviews_by_node_exec_ids,
     get_pending_reviews_for_execution,
     get_pending_reviews_for_user,
+    get_reviews_by_node_exec_ids,
     has_pending_reviews_for_graph_exec,
     process_all_reviews_for_execution,
 )
@@ -137,17 +137,17 @@ async def process_review_action(
             detail="At least one review must be provided",
         )
 
-    # Batch fetch all requested reviews
-    reviews_map = await get_pending_reviews_by_node_exec_ids(
+    # Batch fetch all requested reviews (regardless of status for idempotent handling)
+    reviews_map = await get_reviews_by_node_exec_ids(
         list(all_request_node_ids), user_id
     )
 
-    # Validate all reviews were found
+    # Validate all reviews were found (must exist, any status is OK for now)
     missing_ids = all_request_node_ids - set(reviews_map.keys())
     if missing_ids:
         raise HTTPException(
             status_code=status.HTTP_404_NOT_FOUND,
-            detail=f"No pending review found for node execution(s): {', '.join(missing_ids)}",
+            detail=f"Review(s) not found: {', '.join(missing_ids)}",
         )
 
     # Validate all reviews belong to the same execution
diff --git a/autogpt_platform/backend/backend/data/human_review.py b/autogpt_platform/backend/backend/data/human_review.py
index c70eaa7b64..f198043a38 100644
--- a/autogpt_platform/backend/backend/data/human_review.py
+++ b/autogpt_platform/backend/backend/data/human_review.py
@@ -263,11 +263,14 @@ async def get_pending_review_by_node_exec_id(
     return PendingHumanReviewModel.from_db(review, node_id=node_id)
 
 
-async def get_pending_reviews_by_node_exec_ids(
+async def get_reviews_by_node_exec_ids(
     node_exec_ids: list[str], user_id: str
 ) -> dict[str, "PendingHumanReviewModel"]:
     """
-    Get multiple pending reviews by their node execution IDs in a single batch query.
+    Get multiple reviews by their node execution IDs regardless of status.
+
+    Unlike get_pending_reviews_by_node_exec_ids, this returns reviews in any status
+    (WAITING, APPROVED, REJECTED). Used for validation in idempotent operations.
 
     Args:
         node_exec_ids: List of node execution IDs to look up
@@ -283,7 +286,6 @@ async def get_pending_reviews_by_node_exec_ids(
         where={
             "nodeExecId": {"in": node_exec_ids},
             "userId": user_id,
-            "status": ReviewStatus.WAITING,
         }
     )
 
@@ -407,38 +409,68 @@ async def process_all_reviews_for_execution(
 ) -> dict[str, PendingHumanReviewModel]:
     """Process all pending reviews for an execution with approve/reject decisions.
 
+    Handles race conditions gracefully: if a review was already processed with the
+    same decision by a concurrent request, it's treated as success rather than error.
+
     Args:
         user_id: User ID for ownership validation
         review_decisions: Map of node_exec_id -> (status, reviewed_data, message)
 
     Returns:
-        Dict of node_exec_id -> updated review model
+        Dict of node_exec_id -> updated review model (includes already-processed reviews)
     """
     if not review_decisions:
         return {}
 
     node_exec_ids = list(review_decisions.keys())
 
-    # Get all reviews for validation
-    reviews = await PendingHumanReview.prisma().find_many(
+    # Get all reviews (both WAITING and already processed) for the user
+    all_reviews = await PendingHumanReview.prisma().find_many(
         where={
             "nodeExecId": {"in": node_exec_ids},
             "userId": user_id,
-            "status": ReviewStatus.WAITING,
         },
     )
 
-    # Validate all reviews can be processed
-    if len(reviews) != len(node_exec_ids):
-        missing_ids = set(node_exec_ids) - {review.nodeExecId for review in reviews}
+    # Separate into pending and already-processed reviews
+    reviews_to_process = []
+    already_processed = []
+    for review in all_reviews:
+        if review.status == ReviewStatus.WAITING:
+            reviews_to_process.append(review)
+        else:
+            already_processed.append(review)
+
+    # Check for truly missing reviews (not found at all)
+    found_ids = {review.nodeExecId for review in all_reviews}
+    missing_ids = set(node_exec_ids) - found_ids
+    if missing_ids:
         raise ValueError(
-            f"Reviews not found, access denied, or not in WAITING status: {', '.join(missing_ids)}"
+            f"Reviews not found or access denied: {', '.join(missing_ids)}"
         )
 
-    # Create parallel update tasks
+    # Validate already-processed reviews have compatible status (same decision)
+    # This handles race conditions where another request processed the same reviews
+    for review in already_processed:
+        requested_status = review_decisions[review.nodeExecId][0]
+        if review.status != requested_status:
+            raise ValueError(
+                f"Review {review.nodeExecId} was already processed with status "
+                f"{review.status}, cannot change to {requested_status}"
+            )
+
+    # Log if we're handling a race condition (some reviews already processed)
+    if already_processed:
+        already_processed_ids = [r.nodeExecId for r in already_processed]
+        logger.info(
+            f"Race condition handled: {len(already_processed)} review(s) already "
+            f"processed by concurrent request: {already_processed_ids}"
+        )
+
+    # Create parallel update tasks for reviews that still need processing
     update_tasks = []
 
-    for review in reviews:
+    for review in reviews_to_process:
         new_status, reviewed_data, message = review_decisions[review.nodeExecId]
         has_data_changes = reviewed_data is not None and reviewed_data != review.payload
 
@@ -463,7 +495,7 @@ async def process_all_reviews_for_execution(
         update_tasks.append(task)
 
     # Execute all updates in parallel and get updated reviews
-    updated_reviews = await asyncio.gather(*update_tasks)
+    updated_reviews = await asyncio.gather(*update_tasks) if update_tasks else []
 
     # Note: Execution resumption is now handled at the API layer after ALL reviews
     # for an execution are processed (both approved and rejected)
@@ -472,8 +504,11 @@ async def process_all_reviews_for_execution(
     # Local import to avoid event loop conflicts in tests
     from backend.data.execution import get_node_execution
 
+    # Combine updated reviews with already-processed ones (for idempotent response)
+    all_result_reviews = list(updated_reviews) + already_processed
+
     result = {}
-    for review in updated_reviews:
+    for review in all_result_reviews:
         node_exec = await get_node_execution(review.nodeExecId)
         node_id = node_exec.node_id if node_exec else review.nodeExecId
         result[review.nodeExecId] = PendingHumanReviewModel.from_db(

From 962824c8afd9a566531ee123262dc6fe57dcb679 Mon Sep 17 00:00:00 2001
From: Ubbe <hi@ubbe.dev>
Date: Tue, 27 Jan 2026 22:09:25 +0700
Subject: [PATCH 6/7] refactor(frontend): copilot session management stream
 updates (#11853)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Changes 🏗️

- **Fix infinite loop in copilot page**
- use Zustand selectors instead of full store object to get stable
function references
- **Centralize chat streaming logic**
- move all streaming files from `providers/chat-stream/` to
`components/contextual/Chat/` for better colocation and reusability
- **Rename `copilot-store` → `copilot-page-store`**: Clarify scope
- **Fix message duplication**
- Only replay chunks from active streams (not completed ones) since
backend already provides persisted messages in `initialMessages`
- **Auto-focus chat input**
  - Focus textarea when streaming ends and input is re-enabled
- **Graceful error display**
- Render tool response errors in muted style (small text + warning icon)
instead of raw "Error: ..." text

## Checklist 📋

### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] Navigate to copilot page - no infinite loop errors
  - [x] Start a new chat, send message, verify streaming works
- [x] Navigate away and back to a completed session - no duplicate
messages
  - [x] After stream completes, verify chat input receives focus
  - [x] Trigger a tool error - verify it displays with muted styling
---
 .../app/(platform)/copilot/NewChatContext.tsx |  41 --
 .../components/CopilotShell/CopilotShell.tsx  |  22 +-
 .../SessionsList/useSessionsPagination.ts     |  73 ++-
 .../components/CopilotShell/helpers.ts        |  70 +-
 .../CopilotShell/useCopilotShell.ts           |  45 +-
 .../(platform)/copilot/copilot-page-store.ts  |  54 ++
 .../src/app/(platform)/copilot/layout.tsx     |   7 +-
 .../src/app/(platform)/copilot/page.tsx       |  30 +-
 .../app/(platform)/copilot/useCopilotPage.ts  |  74 +--
 .../frontend/src/app/(platform)/layout.tsx    |   2 +
 .../components/atoms/Skeleton/Skeleton.tsx    |  14 +
 .../atoms/Skeleton/skeleton.stories.tsx       |   2 +-
 .../components/contextual/Chat/chat-store.ts  | 234 +++++++
 .../components/contextual/Chat/chat-types.ts  |  94 +++
 .../ChatContainer/ChatContainer.tsx           |  24 +-
 .../createStreamEventDispatcher.ts            |   2 +-
 .../Chat/components/ChatContainer/helpers.ts  | 111 ++++
 .../ChatContainer/useChatContainer.ts         | 257 +++-----
 .../Chat/components/ChatInput/ChatInput.tsx   |  11 +-
 .../Chat/components/ChatInput/useChatInput.ts |  55 +-
 .../LastToolResponse/LastToolResponse.tsx     |  15 +-
 .../ToolResponseMessage.tsx                   |  27 +-
 .../components/ToolResponseMessage/helpers.ts |  43 +-
 .../contextual/Chat/stream-executor.ts        | 142 ++++
 .../contextual/Chat/stream-utils.ts           |  84 +++
 .../src/components/contextual/Chat/useChat.ts |  41 +-
 .../contextual/Chat/useChatDrawer.ts          |  17 -
 .../contextual/Chat/useChatSession.ts         |  12 +
 .../contextual/Chat/useChatStream.ts          | 615 +++---------------
 .../providers/posthog/posthog-provider.tsx    |   7 +-
 .../network-status/NetworkStatusMonitor.tsx   |   8 +
 .../network-status/useNetworkStatus.ts        |  28 +
 32 files changed, 1274 insertions(+), 987 deletions(-)
 delete mode 100644 autogpt_platform/frontend/src/app/(platform)/copilot/NewChatContext.tsx
 create mode 100644 autogpt_platform/frontend/src/app/(platform)/copilot/copilot-page-store.ts
 create mode 100644 autogpt_platform/frontend/src/components/atoms/Skeleton/Skeleton.tsx
 create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/chat-store.ts
 create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/chat-types.ts
 create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/stream-executor.ts
 create mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/stream-utils.ts
 delete mode 100644 autogpt_platform/frontend/src/components/contextual/Chat/useChatDrawer.ts
 create mode 100644 autogpt_platform/frontend/src/services/network-status/NetworkStatusMonitor.tsx
 create mode 100644 autogpt_platform/frontend/src/services/network-status/useNetworkStatus.ts

diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/NewChatContext.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/NewChatContext.tsx
deleted file mode 100644
index 0826637043..0000000000
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/NewChatContext.tsx
+++ /dev/null
@@ -1,41 +0,0 @@
-"use client";
-
-import { createContext, useContext, useRef, type ReactNode } from "react";
-
-interface NewChatContextValue {
-  onNewChatClick: () => void;
-  setOnNewChatClick: (handler?: () => void) => void;
-  performNewChat?: () => void;
-  setPerformNewChat: (handler?: () => void) => void;
-}
-
-const NewChatContext = createContext<NewChatContextValue | null>(null);
-
-export function NewChatProvider({ children }: { children: ReactNode }) {
-  const onNewChatRef = useRef<(() => void) | undefined>();
-  const performNewChatRef = useRef<(() => void) | undefined>();
-  const contextValueRef = useRef<NewChatContextValue>({
-    onNewChatClick() {
-      onNewChatRef.current?.();
-    },
-    setOnNewChatClick(handler?: () => void) {
-      onNewChatRef.current = handler;
-    },
-    performNewChat() {
-      performNewChatRef.current?.();
-    },
-    setPerformNewChat(handler?: () => void) {
-      performNewChatRef.current = handler;
-    },
-  });
-
-  return (
-    <NewChatContext.Provider value={contextValueRef.current}>
-      {children}
-    </NewChatContext.Provider>
-  );
-}
-
-export function useNewChat() {
-  return useContext(NewChatContext);
-}
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
index 44e32024a8..fb22640302 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/CopilotShell.tsx
@@ -4,7 +4,7 @@ import { ChatLoader } from "@/components/contextual/Chat/components/ChatLoader/C
 import { NAVBAR_HEIGHT_PX } from "@/lib/constants";
 import type { ReactNode } from "react";
 import { useEffect } from "react";
-import { useNewChat } from "../../NewChatContext";
+import { useCopilotStore } from "../../copilot-page-store";
 import { DesktopSidebar } from "./components/DesktopSidebar/DesktopSidebar";
 import { LoadingState } from "./components/LoadingState/LoadingState";
 import { MobileDrawer } from "./components/MobileDrawer/MobileDrawer";
@@ -35,21 +35,23 @@ export function CopilotShell({ children }: Props) {
     isReadyToShowContent,
   } = useCopilotShell();
 
-  const newChatContext = useNewChat();
-  const handleNewChatClickWrapper =
-    newChatContext?.onNewChatClick || handleNewChat;
+  const setNewChatHandler = useCopilotStore((s) => s.setNewChatHandler);
+  const requestNewChat = useCopilotStore((s) => s.requestNewChat);
 
   useEffect(
     function registerNewChatHandler() {
-      if (!newChatContext) return;
-      newChatContext.setPerformNewChat(handleNewChat);
+      setNewChatHandler(handleNewChat);
       return function cleanup() {
-        newChatContext.setPerformNewChat(undefined);
+        setNewChatHandler(null);
       };
     },
-    [newChatContext, handleNewChat],
+    [handleNewChat],
   );
 
+  function handleNewChatClick() {
+    requestNewChat();
+  }
+
   if (!isLoggedIn) {
     return (
       <div className="flex h-full items-center justify-center">
@@ -72,7 +74,7 @@ export function CopilotShell({ children }: Props) {
           isFetchingNextPage={isFetchingNextPage}
           onSelectSession={handleSelectSession}
           onFetchNextPage={fetchNextPage}
-          onNewChat={handleNewChatClickWrapper}
+          onNewChat={handleNewChatClick}
           hasActiveSession={Boolean(hasActiveSession)}
         />
       )}
@@ -94,7 +96,7 @@ export function CopilotShell({ children }: Props) {
           isFetchingNextPage={isFetchingNextPage}
           onSelectSession={handleSelectSession}
           onFetchNextPage={fetchNextPage}
-          onNewChat={handleNewChatClickWrapper}
+          onNewChat={handleNewChatClick}
           onClose={handleCloseDrawer}
           onOpenChange={handleDrawerOpenChange}
           hasActiveSession={Boolean(hasActiveSession)}
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
index 8833a419c1..1f241f992a 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/components/SessionsList/useSessionsPagination.ts
@@ -1,7 +1,12 @@
-import { useGetV2ListSessions } from "@/app/api/__generated__/endpoints/chat/chat";
+import {
+  getGetV2ListSessionsQueryKey,
+  useGetV2ListSessions,
+} from "@/app/api/__generated__/endpoints/chat/chat";
 import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
 import { okData } from "@/app/api/helpers";
-import { useEffect, useMemo, useState } from "react";
+import { useChatStore } from "@/components/contextual/Chat/chat-store";
+import { useQueryClient } from "@tanstack/react-query";
+import { useEffect, useState } from "react";
 
 const PAGE_SIZE = 50;
 
@@ -15,6 +20,8 @@ export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) {
     SessionSummaryResponse[]
   >([]);
   const [totalCount, setTotalCount] = useState<number | null>(null);
+  const queryClient = useQueryClient();
+  const onStreamComplete = useChatStore((state) => state.onStreamComplete);
 
   const { data, isLoading, isFetching, isError } = useGetV2ListSessions(
     { limit: PAGE_SIZE, offset },
@@ -25,35 +32,47 @@ export function useSessionsPagination({ enabled }: UseSessionsPaginationArgs) {
     },
   );
 
-  useEffect(() => {
-    const responseData = okData(data);
-    if (responseData) {
-      const newSessions = responseData.sessions;
-      const total = responseData.total;
-      setTotalCount(total);
-
-      if (offset === 0) {
-        setAccumulatedSessions(newSessions);
-      } else {
-        setAccumulatedSessions((prev) => [...prev, ...newSessions]);
-      }
-    } else if (!enabled) {
+  useEffect(function refreshOnStreamComplete() {
+    const unsubscribe = onStreamComplete(function handleStreamComplete() {
+      setOffset(0);
       setAccumulatedSessions([]);
       setTotalCount(null);
-    }
-  }, [data, offset, enabled]);
+      queryClient.invalidateQueries({
+        queryKey: getGetV2ListSessionsQueryKey(),
+      });
+    });
+    return unsubscribe;
+  }, []);
 
-  const hasNextPage = useMemo(() => {
-    if (totalCount === null) return false;
-    return accumulatedSessions.length < totalCount;
-  }, [accumulatedSessions.length, totalCount]);
+  useEffect(
+    function updateSessionsFromResponse() {
+      const responseData = okData(data);
+      if (responseData) {
+        const newSessions = responseData.sessions;
+        const total = responseData.total;
+        setTotalCount(total);
 
-  const areAllSessionsLoaded = useMemo(() => {
-    if (totalCount === null) return false;
-    return (
-      accumulatedSessions.length >= totalCount && !isFetching && !isLoading
-    );
-  }, [accumulatedSessions.length, totalCount, isFetching, isLoading]);
+        if (offset === 0) {
+          setAccumulatedSessions(newSessions);
+        } else {
+          setAccumulatedSessions((prev) => [...prev, ...newSessions]);
+        }
+      } else if (!enabled) {
+        setAccumulatedSessions([]);
+        setTotalCount(null);
+      }
+    },
+    [data, offset, enabled],
+  );
+
+  const hasNextPage =
+    totalCount !== null && accumulatedSessions.length < totalCount;
+
+  const areAllSessionsLoaded =
+    totalCount !== null &&
+    accumulatedSessions.length >= totalCount &&
+    !isFetching &&
+    !isLoading;
 
   useEffect(() => {
     if (
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
index bf4eb70ccb..3e932848a0 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/helpers.ts
@@ -2,9 +2,7 @@ import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessi
 import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
 import { format, formatDistanceToNow, isToday } from "date-fns";
 
-export function convertSessionDetailToSummary(
-  session: SessionDetailResponse,
-): SessionSummaryResponse {
+export function convertSessionDetailToSummary(session: SessionDetailResponse) {
   return {
     id: session.id,
     created_at: session.created_at,
@@ -13,17 +11,25 @@ export function convertSessionDetailToSummary(
   };
 }
 
-export function filterVisibleSessions(
-  sessions: SessionSummaryResponse[],
-): SessionSummaryResponse[] {
-  return sessions.filter(
-    (session) => session.updated_at !== session.created_at,
-  );
+export function filterVisibleSessions(sessions: SessionSummaryResponse[]) {
+  const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
+  return sessions.filter((session) => {
+    const hasBeenUpdated = session.updated_at !== session.created_at;
+
+    if (hasBeenUpdated) return true;
+
+    const isRecentlyCreated =
+      new Date(session.created_at).getTime() > fiveMinutesAgo;
+
+    return isRecentlyCreated;
+  });
 }
 
-export function getSessionTitle(session: SessionSummaryResponse): string {
+export function getSessionTitle(session: SessionSummaryResponse) {
   if (session.title) return session.title;
+
   const isNewSession = session.updated_at === session.created_at;
+
   if (isNewSession) {
     const createdDate = new Date(session.created_at);
     if (isToday(createdDate)) {
@@ -31,12 +37,11 @@ export function getSessionTitle(session: SessionSummaryResponse): string {
     }
     return format(createdDate, "MMM d, yyyy");
   }
+
   return "Untitled Chat";
 }
 
-export function getSessionUpdatedLabel(
-  session: SessionSummaryResponse,
-): string {
+export function getSessionUpdatedLabel(session: SessionSummaryResponse) {
   if (!session.updated_at) return "";
   return formatDistanceToNow(new Date(session.updated_at), { addSuffix: true });
 }
@@ -45,8 +50,10 @@ export function mergeCurrentSessionIntoList(
   accumulatedSessions: SessionSummaryResponse[],
   currentSessionId: string | null,
   currentSessionData: SessionDetailResponse | null | undefined,
-): SessionSummaryResponse[] {
+  recentlyCreatedSessions?: Map<string, SessionSummaryResponse>,
+) {
   const filteredSessions: SessionSummaryResponse[] = [];
+  const addedIds = new Set<string>();
 
   if (accumulatedSessions.length > 0) {
     const visibleSessions = filterVisibleSessions(accumulatedSessions);
@@ -61,29 +68,40 @@ export function mergeCurrentSessionIntoList(
         );
         if (!isInVisible) {
           filteredSessions.push(currentInAll);
+          addedIds.add(currentInAll.id);
         }
       }
     }
 
-    filteredSessions.push(...visibleSessions);
+    for (const session of visibleSessions) {
+      if (!addedIds.has(session.id)) {
+        filteredSessions.push(session);
+        addedIds.add(session.id);
+      }
+    }
   }
 
   if (currentSessionId && currentSessionData) {
-    const isCurrentInList = filteredSessions.some(
-      (s) => s.id === currentSessionId,
-    );
-    if (!isCurrentInList) {
+    if (!addedIds.has(currentSessionId)) {
       const summarySession = convertSessionDetailToSummary(currentSessionData);
       filteredSessions.unshift(summarySession);
+      addedIds.add(currentSessionId);
+    }
+  }
+
+  if (recentlyCreatedSessions) {
+    for (const [sessionId, sessionData] of recentlyCreatedSessions) {
+      if (!addedIds.has(sessionId)) {
+        filteredSessions.unshift(sessionData);
+        addedIds.add(sessionId);
+      }
     }
   }
 
   return filteredSessions;
 }
 
-export function getCurrentSessionId(
-  searchParams: URLSearchParams,
-): string | null {
+export function getCurrentSessionId(searchParams: URLSearchParams) {
   return searchParams.get("sessionId");
 }
 
@@ -95,11 +113,7 @@ export function shouldAutoSelectSession(
   accumulatedSessions: SessionSummaryResponse[],
   isLoading: boolean,
   totalCount: number | null,
-): {
-  shouldSelect: boolean;
-  sessionIdToSelect: string | null;
-  shouldCreate: boolean;
-} {
+) {
   if (!areAllSessionsLoaded || hasAutoSelectedSession) {
     return {
       shouldSelect: false,
@@ -146,7 +160,7 @@ export function checkReadyToShowContent(
   isCurrentSessionLoading: boolean,
   currentSessionData: SessionDetailResponse | null | undefined,
   hasAutoSelectedSession: boolean,
-): boolean {
+) {
   if (!areAllSessionsLoaded) return false;
 
   if (paramSessionId) {
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
index cadd98da3e..a3aa0b55b2 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/CopilotShell/useCopilotShell.ts
@@ -4,23 +4,25 @@ import {
   getGetV2ListSessionsQueryKey,
   useGetV2GetSession,
 } from "@/app/api/__generated__/endpoints/chat/chat";
+import type { SessionSummaryResponse } from "@/app/api/__generated__/models/sessionSummaryResponse";
 import { okData } from "@/app/api/helpers";
 import { useBreakpoint } from "@/lib/hooks/useBreakpoint";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { useQueryClient } from "@tanstack/react-query";
-import { usePathname, useRouter, useSearchParams } from "next/navigation";
+import { parseAsString, useQueryState } from "nuqs";
+import { usePathname, useSearchParams } from "next/navigation";
 import { useEffect, useRef, useState } from "react";
 import { useMobileDrawer } from "./components/MobileDrawer/useMobileDrawer";
 import { useSessionsPagination } from "./components/SessionsList/useSessionsPagination";
 import {
   checkReadyToShowContent,
+  convertSessionDetailToSummary,
   filterVisibleSessions,
   getCurrentSessionId,
   mergeCurrentSessionIntoList,
 } from "./helpers";
 
 export function useCopilotShell() {
-  const router = useRouter();
   const pathname = usePathname();
   const searchParams = useSearchParams();
   const queryClient = useQueryClient();
@@ -29,6 +31,8 @@ export function useCopilotShell() {
   const isMobile =
     breakpoint === "base" || breakpoint === "sm" || breakpoint === "md";
 
+  const [, setUrlSessionId] = useQueryState("sessionId", parseAsString);
+
   const isOnHomepage = pathname === "/copilot";
   const paramSessionId = searchParams.get("sessionId");
 
@@ -65,6 +69,9 @@ export function useCopilotShell() {
 
   const [hasAutoSelectedSession, setHasAutoSelectedSession] = useState(false);
   const hasAutoSelectedRef = useRef(false);
+  const recentlyCreatedSessionsRef = useRef<
+    Map<string, SessionSummaryResponse>
+  >(new Map());
 
   // Mark as auto-selected when sessionId is in URL
   useEffect(() => {
@@ -91,6 +98,30 @@ export function useCopilotShell() {
     }
   }, [isOnHomepage, paramSessionId, queryClient]);
 
+  // Track newly created sessions to ensure they stay visible even when switching away
+  useEffect(() => {
+    if (currentSessionId && currentSessionData) {
+      const isNewSession =
+        currentSessionData.updated_at === currentSessionData.created_at;
+      const isNotInAccumulated = !accumulatedSessions.some(
+        (s) => s.id === currentSessionId,
+      );
+      if (isNewSession || isNotInAccumulated) {
+        const summary = convertSessionDetailToSummary(currentSessionData);
+        recentlyCreatedSessionsRef.current.set(currentSessionId, summary);
+      }
+    }
+  }, [currentSessionId, currentSessionData, accumulatedSessions]);
+
+  // Clean up recently created sessions that are now in the accumulated list
+  useEffect(() => {
+    for (const sessionId of recentlyCreatedSessionsRef.current.keys()) {
+      if (accumulatedSessions.some((s) => s.id === sessionId)) {
+        recentlyCreatedSessionsRef.current.delete(sessionId);
+      }
+    }
+  }, [accumulatedSessions]);
+
   // Reset pagination when query becomes disabled
   const prevPaginationEnabledRef = useRef(paginationEnabled);
   useEffect(() => {
@@ -105,6 +136,7 @@ export function useCopilotShell() {
     accumulatedSessions,
     currentSessionId,
     currentSessionData,
+    recentlyCreatedSessionsRef.current,
   );
 
   const visibleSessions = filterVisibleSessions(sessions);
@@ -124,22 +156,17 @@ export function useCopilotShell() {
       );
 
   function handleSelectSession(sessionId: string) {
-    // Navigate using replaceState to avoid full page reload
-    window.history.replaceState(null, "", `/copilot?sessionId=${sessionId}`);
-    // Force a re-render by updating the URL through router
-    router.replace(`/copilot?sessionId=${sessionId}`);
+    setUrlSessionId(sessionId, { shallow: false });
     if (isMobile) handleCloseDrawer();
   }
 
   function handleNewChat() {
     resetAutoSelect();
     resetPagination();
-    // Invalidate and refetch sessions list to ensure newly created sessions appear
     queryClient.invalidateQueries({
       queryKey: getGetV2ListSessionsQueryKey(),
     });
-    window.history.replaceState(null, "", "/copilot");
-    router.replace("/copilot");
+    setUrlSessionId(null, { shallow: false });
     if (isMobile) handleCloseDrawer();
   }
 
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/copilot-page-store.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/copilot-page-store.ts
new file mode 100644
index 0000000000..22bf5000a1
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/copilot-page-store.ts
@@ -0,0 +1,54 @@
+"use client";
+
+import { create } from "zustand";
+
+interface CopilotStoreState {
+  isStreaming: boolean;
+  isNewChatModalOpen: boolean;
+  newChatHandler: (() => void) | null;
+}
+
+interface CopilotStoreActions {
+  setIsStreaming: (isStreaming: boolean) => void;
+  setNewChatHandler: (handler: (() => void) | null) => void;
+  requestNewChat: () => void;
+  confirmNewChat: () => void;
+  cancelNewChat: () => void;
+}
+
+type CopilotStore = CopilotStoreState & CopilotStoreActions;
+
+export const useCopilotStore = create<CopilotStore>((set, get) => ({
+  isStreaming: false,
+  isNewChatModalOpen: false,
+  newChatHandler: null,
+
+  setIsStreaming(isStreaming) {
+    set({ isStreaming });
+  },
+
+  setNewChatHandler(handler) {
+    set({ newChatHandler: handler });
+  },
+
+  requestNewChat() {
+    const { isStreaming, newChatHandler } = get();
+    if (isStreaming) {
+      set({ isNewChatModalOpen: true });
+    } else if (newChatHandler) {
+      newChatHandler();
+    }
+  },
+
+  confirmNewChat() {
+    const { newChatHandler } = get();
+    set({ isNewChatModalOpen: false });
+    if (newChatHandler) {
+      newChatHandler();
+    }
+  },
+
+  cancelNewChat() {
+    set({ isNewChatModalOpen: false });
+  },
+}));
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
index 0f40de8f25..89cf72e2ba 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/layout.tsx
@@ -1,11 +1,6 @@
 import type { ReactNode } from "react";
-import { NewChatProvider } from "./NewChatContext";
 import { CopilotShell } from "./components/CopilotShell/CopilotShell";
 
 export default function CopilotLayout({ children }: { children: ReactNode }) {
-  return (
-    <NewChatProvider>
-      <CopilotShell>{children}</CopilotShell>
-    </NewChatProvider>
-  );
+  return <CopilotShell>{children}</CopilotShell>;
 }
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/page.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/page.tsx
index 3bbafd087b..83b21bf82e 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/page.tsx
@@ -1,16 +1,19 @@
 "use client";
 
-import { Skeleton } from "@/components/__legacy__/ui/skeleton";
 import { Button } from "@/components/atoms/Button/Button";
+
+import { Skeleton } from "@/components/atoms/Skeleton/Skeleton";
 import { Text } from "@/components/atoms/Text/Text";
 import { Chat } from "@/components/contextual/Chat/Chat";
 import { ChatInput } from "@/components/contextual/Chat/components/ChatInput/ChatInput";
 import { ChatLoader } from "@/components/contextual/Chat/components/ChatLoader/ChatLoader";
 import { Dialog } from "@/components/molecules/Dialog/Dialog";
+import { useCopilotStore } from "./copilot-page-store";
 import { useCopilotPage } from "./useCopilotPage";
 
 export default function CopilotPage() {
   const { state, handlers } = useCopilotPage();
+  const confirmNewChat = useCopilotStore((s) => s.confirmNewChat);
   const {
     greetingName,
     quickActions,
@@ -25,15 +28,11 @@ export default function CopilotPage() {
     handleSessionNotFound,
     handleStreamingChange,
     handleCancelNewChat,
-    proceedWithNewChat,
     handleNewChatModalOpen,
   } = handlers;
 
-  if (!isReady) {
-    return null;
-  }
+  if (!isReady) return null;
 
-  // Show Chat when we have an active session
   if (pageState.type === "chat") {
     return (
       <div className="flex h-full flex-col">
@@ -71,7 +70,7 @@ export default function CopilotPage() {
                 <Button
                   type="button"
                   variant="primary"
-                  onClick={proceedWithNewChat}
+                  onClick={confirmNewChat}
                 >
                   Start new chat
                 </Button>
@@ -83,7 +82,7 @@ export default function CopilotPage() {
     );
   }
 
-  if (pageState.type === "newChat") {
+  if (pageState.type === "newChat" || pageState.type === "creating") {
     return (
       <div className="flex h-full flex-1 flex-col items-center justify-center bg-[#f8f8f9]">
         <div className="flex flex-col items-center gap-4">
@@ -96,21 +95,6 @@ export default function CopilotPage() {
     );
   }
 
-  // Show loading state while creating session and sending first message
-  if (pageState.type === "creating") {
-    return (
-      <div className="flex h-full flex-1 flex-col items-center justify-center bg-[#f8f8f9]">
-        <div className="flex flex-col items-center gap-4">
-          <ChatLoader />
-          <Text variant="body" className="text-zinc-500">
-            Loading your chats...
-          </Text>
-        </div>
-      </div>
-    );
-  }
-
-  // Show Welcome screen
   return (
     <div className="flex h-full flex-1 items-center justify-center overflow-y-auto bg-[#f8f8f9] px-6 py-10">
       <div className="w-full text-center">
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
index cb13137432..1d9c843d7d 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/useCopilotPage.ts
@@ -1,4 +1,7 @@
-import { postV2CreateSession } from "@/app/api/__generated__/endpoints/chat/chat";
+import {
+  getGetV2ListSessionsQueryKey,
+  postV2CreateSession,
+} from "@/app/api/__generated__/endpoints/chat/chat";
 import { useToast } from "@/components/molecules/Toast/use-toast";
 import { getHomepageRoute } from "@/lib/constants";
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
@@ -8,25 +11,22 @@ import {
   useGetFlag,
 } from "@/services/feature-flags/use-get-flag";
 import * as Sentry from "@sentry/nextjs";
+import { useQueryClient } from "@tanstack/react-query";
 import { useFlags } from "launchdarkly-react-client-sdk";
 import { useRouter } from "next/navigation";
 import { useEffect, useReducer } from "react";
-import { useNewChat } from "./NewChatContext";
+import { useCopilotStore } from "./copilot-page-store";
 import { getGreetingName, getQuickActions, type PageState } from "./helpers";
 import { useCopilotURLState } from "./useCopilotURLState";
 
 type CopilotState = {
   pageState: PageState;
-  isStreaming: boolean;
-  isNewChatModalOpen: boolean;
   initialPrompts: Record<string, string>;
   previousSessionId: string | null;
 };
 
 type CopilotAction =
   | { type: "setPageState"; pageState: PageState }
-  | { type: "setStreaming"; isStreaming: boolean }
-  | { type: "setNewChatModalOpen"; isOpen: boolean }
   | { type: "setInitialPrompt"; sessionId: string; prompt: string }
   | { type: "setPreviousSessionId"; sessionId: string | null };
 
@@ -52,14 +52,6 @@ function copilotReducer(
     if (isSamePageState(action.pageState, state.pageState)) return state;
     return { ...state, pageState: action.pageState };
   }
-  if (action.type === "setStreaming") {
-    if (action.isStreaming === state.isStreaming) return state;
-    return { ...state, isStreaming: action.isStreaming };
-  }
-  if (action.type === "setNewChatModalOpen") {
-    if (action.isOpen === state.isNewChatModalOpen) return state;
-    return { ...state, isNewChatModalOpen: action.isOpen };
-  }
   if (action.type === "setInitialPrompt") {
     if (state.initialPrompts[action.sessionId] === action.prompt) return state;
     return {
@@ -79,9 +71,14 @@ function copilotReducer(
 
 export function useCopilotPage() {
   const router = useRouter();
+  const queryClient = useQueryClient();
   const { user, isLoggedIn, isUserLoading } = useSupabase();
   const { toast } = useToast();
 
+  const isNewChatModalOpen = useCopilotStore((s) => s.isNewChatModalOpen);
+  const setIsStreaming = useCopilotStore((s) => s.setIsStreaming);
+  const cancelNewChat = useCopilotStore((s) => s.cancelNewChat);
+
   const isChatEnabled = useGetFlag(Flag.CHAT);
   const flags = useFlags<FlagValues>();
   const homepageRoute = getHomepageRoute(isChatEnabled);
@@ -93,13 +90,10 @@ export function useCopilotPage() {
 
   const [state, dispatch] = useReducer(copilotReducer, {
     pageState: { type: "welcome" },
-    isStreaming: false,
-    isNewChatModalOpen: false,
     initialPrompts: {},
     previousSessionId: null,
   });
 
-  const newChatContext = useNewChat();
   const greetingName = getGreetingName(user);
   const quickActions = getQuickActions();
 
@@ -124,17 +118,6 @@ export function useCopilotPage() {
     setPreviousSessionId,
   });
 
-  useEffect(
-    function registerNewChatHandler() {
-      if (!newChatContext) return;
-      newChatContext.setOnNewChatClick(handleNewChatClick);
-      return function cleanup() {
-        newChatContext.setOnNewChatClick(undefined);
-      };
-    },
-    [newChatContext, handleNewChatClick],
-  );
-
   useEffect(
     function transitionNewChatToWelcome() {
       if (state.pageState.type === "newChat") {
@@ -189,6 +172,10 @@ export function useCopilotPage() {
         prompt: trimmedPrompt,
       });
 
+      await queryClient.invalidateQueries({
+        queryKey: getGetV2ListSessionsQueryKey(),
+      });
+
       await setUrlSessionId(sessionId, { shallow: false });
       dispatch({
         type: "setPageState",
@@ -211,37 +198,15 @@ export function useCopilotPage() {
   }
 
   function handleStreamingChange(isStreamingValue: boolean) {
-    dispatch({ type: "setStreaming", isStreaming: isStreamingValue });
-  }
-
-  async function proceedWithNewChat() {
-    dispatch({ type: "setNewChatModalOpen", isOpen: false });
-    if (newChatContext?.performNewChat) {
-      newChatContext.performNewChat();
-      return;
-    }
-    try {
-      await setUrlSessionId(null, { shallow: false });
-    } catch (error) {
-      console.error("[CopilotPage] Failed to clear session:", error);
-    }
-    router.replace("/copilot");
+    setIsStreaming(isStreamingValue);
   }
 
   function handleCancelNewChat() {
-    dispatch({ type: "setNewChatModalOpen", isOpen: false });
+    cancelNewChat();
   }
 
   function handleNewChatModalOpen(isOpen: boolean) {
-    dispatch({ type: "setNewChatModalOpen", isOpen });
-  }
-
-  function handleNewChatClick() {
-    if (state.isStreaming) {
-      dispatch({ type: "setNewChatModalOpen", isOpen: true });
-    } else {
-      proceedWithNewChat();
-    }
+    if (!isOpen) cancelNewChat();
   }
 
   return {
@@ -250,7 +215,7 @@ export function useCopilotPage() {
       quickActions,
       isLoading: isUserLoading,
       pageState: state.pageState,
-      isNewChatModalOpen: state.isNewChatModalOpen,
+      isNewChatModalOpen,
       isReady: isFlagReady && isChatEnabled !== false && isLoggedIn,
     },
     handlers: {
@@ -259,7 +224,6 @@ export function useCopilotPage() {
       handleSessionNotFound,
       handleStreamingChange,
       handleCancelNewChat,
-      proceedWithNewChat,
       handleNewChatModalOpen,
     },
   };
diff --git a/autogpt_platform/frontend/src/app/(platform)/layout.tsx b/autogpt_platform/frontend/src/app/(platform)/layout.tsx
index f5e3f3b99b..048110f8b2 100644
--- a/autogpt_platform/frontend/src/app/(platform)/layout.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/layout.tsx
@@ -1,10 +1,12 @@
 import { Navbar } from "@/components/layout/Navbar/Navbar";
+import { NetworkStatusMonitor } from "@/services/network-status/NetworkStatusMonitor";
 import { ReactNode } from "react";
 import { AdminImpersonationBanner } from "./admin/components/AdminImpersonationBanner";
 
 export default function PlatformLayout({ children }: { children: ReactNode }) {
   return (
     <main className="flex h-screen w-full flex-col">
+      <NetworkStatusMonitor />
       <Navbar />
       <AdminImpersonationBanner />
       <section className="flex-1">{children}</section>
diff --git a/autogpt_platform/frontend/src/components/atoms/Skeleton/Skeleton.tsx b/autogpt_platform/frontend/src/components/atoms/Skeleton/Skeleton.tsx
new file mode 100644
index 0000000000..4789e281ce
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/atoms/Skeleton/Skeleton.tsx
@@ -0,0 +1,14 @@
+import { cn } from "@/lib/utils";
+
+interface Props extends React.HTMLAttributes<HTMLDivElement> {
+  className?: string;
+}
+
+export function Skeleton({ className, ...props }: Props) {
+  return (
+    <div
+      className={cn("animate-pulse rounded-md bg-zinc-100", className)}
+      {...props}
+    />
+  );
+}
diff --git a/autogpt_platform/frontend/src/components/atoms/Skeleton/skeleton.stories.tsx b/autogpt_platform/frontend/src/components/atoms/Skeleton/skeleton.stories.tsx
index 04d87a6e0e..69bb7c3440 100644
--- a/autogpt_platform/frontend/src/components/atoms/Skeleton/skeleton.stories.tsx
+++ b/autogpt_platform/frontend/src/components/atoms/Skeleton/skeleton.stories.tsx
@@ -1,4 +1,4 @@
-import { Skeleton } from "@/components/__legacy__/ui/skeleton";
+import { Skeleton } from "./Skeleton";
 import type { Meta, StoryObj } from "@storybook/nextjs";
 
 const meta: Meta<typeof Skeleton> = {
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/chat-store.ts b/autogpt_platform/frontend/src/components/contextual/Chat/chat-store.ts
new file mode 100644
index 0000000000..28028369a9
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/chat-store.ts
@@ -0,0 +1,234 @@
+"use client";
+
+import { create } from "zustand";
+import type {
+  ActiveStream,
+  StreamChunk,
+  StreamCompleteCallback,
+  StreamResult,
+  StreamStatus,
+} from "./chat-types";
+import { executeStream } from "./stream-executor";
+
+const COMPLETED_STREAM_TTL = 5 * 60 * 1000; // 5 minutes
+
+interface ChatStoreState {
+  activeStreams: Map<string, ActiveStream>;
+  completedStreams: Map<string, StreamResult>;
+  activeSessions: Set<string>;
+  streamCompleteCallbacks: Set<StreamCompleteCallback>;
+}
+
+interface ChatStoreActions {
+  startStream: (
+    sessionId: string,
+    message: string,
+    isUserMessage: boolean,
+    context?: { url: string; content: string },
+    onChunk?: (chunk: StreamChunk) => void,
+  ) => Promise<void>;
+  stopStream: (sessionId: string) => void;
+  subscribeToStream: (
+    sessionId: string,
+    onChunk: (chunk: StreamChunk) => void,
+    skipReplay?: boolean,
+  ) => () => void;
+  getStreamStatus: (sessionId: string) => StreamStatus;
+  getCompletedStream: (sessionId: string) => StreamResult | undefined;
+  clearCompletedStream: (sessionId: string) => void;
+  isStreaming: (sessionId: string) => boolean;
+  registerActiveSession: (sessionId: string) => void;
+  unregisterActiveSession: (sessionId: string) => void;
+  isSessionActive: (sessionId: string) => boolean;
+  onStreamComplete: (callback: StreamCompleteCallback) => () => void;
+}
+
+type ChatStore = ChatStoreState & ChatStoreActions;
+
+function notifyStreamComplete(
+  callbacks: Set<StreamCompleteCallback>,
+  sessionId: string,
+) {
+  for (const callback of callbacks) {
+    try {
+      callback(sessionId);
+    } catch (err) {
+      console.warn("[ChatStore] Stream complete callback error:", err);
+    }
+  }
+}
+
+function cleanupCompletedStreams(completedStreams: Map<string, StreamResult>) {
+  const now = Date.now();
+  for (const [sessionId, result] of completedStreams) {
+    if (now - result.completedAt > COMPLETED_STREAM_TTL) {
+      completedStreams.delete(sessionId);
+    }
+  }
+}
+
+function moveToCompleted(
+  activeStreams: Map<string, ActiveStream>,
+  completedStreams: Map<string, StreamResult>,
+  streamCompleteCallbacks: Set<StreamCompleteCallback>,
+  sessionId: string,
+) {
+  const stream = activeStreams.get(sessionId);
+  if (!stream) return;
+
+  const result: StreamResult = {
+    sessionId,
+    status: stream.status,
+    chunks: stream.chunks,
+    completedAt: Date.now(),
+    error: stream.error,
+  };
+
+  completedStreams.set(sessionId, result);
+  activeStreams.delete(sessionId);
+  cleanupCompletedStreams(completedStreams);
+
+  if (stream.status === "completed" || stream.status === "error") {
+    notifyStreamComplete(streamCompleteCallbacks, sessionId);
+  }
+}
+
+export const useChatStore = create<ChatStore>((set, get) => ({
+  activeStreams: new Map(),
+  completedStreams: new Map(),
+  activeSessions: new Set(),
+  streamCompleteCallbacks: new Set(),
+
+  startStream: async function startStream(
+    sessionId,
+    message,
+    isUserMessage,
+    context,
+    onChunk,
+  ) {
+    const { activeStreams, completedStreams, streamCompleteCallbacks } = get();
+
+    const existingStream = activeStreams.get(sessionId);
+    if (existingStream) {
+      existingStream.abortController.abort();
+      moveToCompleted(
+        activeStreams,
+        completedStreams,
+        streamCompleteCallbacks,
+        sessionId,
+      );
+    }
+
+    const abortController = new AbortController();
+    const initialCallbacks = new Set<(chunk: StreamChunk) => void>();
+    if (onChunk) initialCallbacks.add(onChunk);
+
+    const stream: ActiveStream = {
+      sessionId,
+      abortController,
+      status: "streaming",
+      startedAt: Date.now(),
+      chunks: [],
+      onChunkCallbacks: initialCallbacks,
+    };
+
+    activeStreams.set(sessionId, stream);
+
+    try {
+      await executeStream(stream, message, isUserMessage, context);
+    } finally {
+      if (onChunk) stream.onChunkCallbacks.delete(onChunk);
+      if (stream.status !== "streaming") {
+        moveToCompleted(
+          activeStreams,
+          completedStreams,
+          streamCompleteCallbacks,
+          sessionId,
+        );
+      }
+    }
+  },
+
+  stopStream: function stopStream(sessionId) {
+    const { activeStreams, completedStreams, streamCompleteCallbacks } = get();
+    const stream = activeStreams.get(sessionId);
+    if (stream) {
+      stream.abortController.abort();
+      stream.status = "completed";
+      moveToCompleted(
+        activeStreams,
+        completedStreams,
+        streamCompleteCallbacks,
+        sessionId,
+      );
+    }
+  },
+
+  subscribeToStream: function subscribeToStream(
+    sessionId,
+    onChunk,
+    skipReplay = false,
+  ) {
+    const { activeStreams } = get();
+
+    const stream = activeStreams.get(sessionId);
+    if (stream) {
+      if (!skipReplay) {
+        for (const chunk of stream.chunks) {
+          onChunk(chunk);
+        }
+      }
+      stream.onChunkCallbacks.add(onChunk);
+      return function unsubscribe() {
+        stream.onChunkCallbacks.delete(onChunk);
+      };
+    }
+
+    return function noop() {};
+  },
+
+  getStreamStatus: function getStreamStatus(sessionId) {
+    const { activeStreams, completedStreams } = get();
+
+    const active = activeStreams.get(sessionId);
+    if (active) return active.status;
+
+    const completed = completedStreams.get(sessionId);
+    if (completed) return completed.status;
+
+    return "idle";
+  },
+
+  getCompletedStream: function getCompletedStream(sessionId) {
+    return get().completedStreams.get(sessionId);
+  },
+
+  clearCompletedStream: function clearCompletedStream(sessionId) {
+    get().completedStreams.delete(sessionId);
+  },
+
+  isStreaming: function isStreaming(sessionId) {
+    const stream = get().activeStreams.get(sessionId);
+    return stream?.status === "streaming";
+  },
+
+  registerActiveSession: function registerActiveSession(sessionId) {
+    get().activeSessions.add(sessionId);
+  },
+
+  unregisterActiveSession: function unregisterActiveSession(sessionId) {
+    get().activeSessions.delete(sessionId);
+  },
+
+  isSessionActive: function isSessionActive(sessionId) {
+    return get().activeSessions.has(sessionId);
+  },
+
+  onStreamComplete: function onStreamComplete(callback) {
+    const { streamCompleteCallbacks } = get();
+    streamCompleteCallbacks.add(callback);
+    return function unsubscribe() {
+      streamCompleteCallbacks.delete(callback);
+    };
+  },
+}));
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/chat-types.ts b/autogpt_platform/frontend/src/components/contextual/Chat/chat-types.ts
new file mode 100644
index 0000000000..8c8aa7b704
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/chat-types.ts
@@ -0,0 +1,94 @@
+import type { ToolArguments, ToolResult } from "@/types/chat";
+
+export type StreamStatus = "idle" | "streaming" | "completed" | "error";
+
+export interface StreamChunk {
+  type:
+    | "text_chunk"
+    | "text_ended"
+    | "tool_call"
+    | "tool_call_start"
+    | "tool_response"
+    | "login_needed"
+    | "need_login"
+    | "credentials_needed"
+    | "error"
+    | "usage"
+    | "stream_end";
+  timestamp?: string;
+  content?: string;
+  message?: string;
+  code?: string;
+  details?: Record<string, unknown>;
+  tool_id?: string;
+  tool_name?: string;
+  arguments?: ToolArguments;
+  result?: ToolResult;
+  success?: boolean;
+  idx?: number;
+  session_id?: string;
+  agent_info?: {
+    graph_id: string;
+    name: string;
+    trigger_type: string;
+  };
+  provider?: string;
+  provider_name?: string;
+  credential_type?: string;
+  scopes?: string[];
+  title?: string;
+  [key: string]: unknown;
+}
+
+export type VercelStreamChunk =
+  | { type: "start"; messageId: string }
+  | { type: "finish" }
+  | { type: "text-start"; id: string }
+  | { type: "text-delta"; id: string; delta: string }
+  | { type: "text-end"; id: string }
+  | { type: "tool-input-start"; toolCallId: string; toolName: string }
+  | {
+      type: "tool-input-available";
+      toolCallId: string;
+      toolName: string;
+      input: Record<string, unknown>;
+    }
+  | {
+      type: "tool-output-available";
+      toolCallId: string;
+      toolName?: string;
+      output: unknown;
+      success?: boolean;
+    }
+  | {
+      type: "usage";
+      promptTokens: number;
+      completionTokens: number;
+      totalTokens: number;
+    }
+  | {
+      type: "error";
+      errorText: string;
+      code?: string;
+      details?: Record<string, unknown>;
+    };
+
+export interface ActiveStream {
+  sessionId: string;
+  abortController: AbortController;
+  status: StreamStatus;
+  startedAt: number;
+  chunks: StreamChunk[];
+  error?: Error;
+  onChunkCallbacks: Set<(chunk: StreamChunk) => void>;
+}
+
+export interface StreamResult {
+  sessionId: string;
+  status: StreamStatus;
+  chunks: StreamChunk[];
+  completedAt: number;
+  error?: Error;
+}
+
+export type StreamCompleteCallback = (sessionId: string) => void;
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/ChatContainer.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/ChatContainer.tsx
index 17748f8dbc..f062df1397 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/ChatContainer.tsx
@@ -4,6 +4,7 @@ import { Text } from "@/components/atoms/Text/Text";
 import { Dialog } from "@/components/molecules/Dialog/Dialog";
 import { useBreakpoint } from "@/lib/hooks/useBreakpoint";
 import { cn } from "@/lib/utils";
+import { GlobeHemisphereEastIcon } from "@phosphor-icons/react";
 import { useEffect } from "react";
 import { ChatInput } from "../ChatInput/ChatInput";
 import { MessageList } from "../MessageList/MessageList";
@@ -55,24 +56,37 @@ export function ChatContainer({
       )}
     >
       <Dialog
-        title="Service unavailable"
+        title={
+          <div className="flex items-center gap-2">
+            <GlobeHemisphereEastIcon className="size-6" />
+            <Text
+              variant="body"
+              className="text-md font-poppins leading-none md:text-lg"
+            >
+              Service unavailable
+            </Text>
+          </div>
+        }
         controlled={{
           isOpen: isRegionBlockedModalOpen,
           set: handleRegionModalOpenChange,
         }}
         onClose={handleRegionModalClose}
+        styling={{ maxWidth: 550, width: "100%", minWidth: "auto" }}
       >
         <Dialog.Content>
-          <div className="flex flex-col gap-4">
+          <div className="flex flex-col gap-8">
             <Text variant="body">
-              This model is not available in your region. Please connect via VPN
-              and try again.
+              The Autogpt AI model is not available in your region or your
+              connection is blocking it. Please try again with a different
+              connection.
             </Text>
-            <div className="flex justify-end">
+            <div className="flex justify-center">
               <Button
                 type="button"
                 variant="primary"
                 onClick={handleRegionModalClose}
+                className="w-full"
               >
                 Got it
               </Button>
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/createStreamEventDispatcher.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/createStreamEventDispatcher.ts
index 791cf046d5..82e9b05e88 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/createStreamEventDispatcher.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/createStreamEventDispatcher.ts
@@ -1,5 +1,5 @@
 import { toast } from "sonner";
-import { StreamChunk } from "../../useChatStream";
+import type { StreamChunk } from "../../chat-types";
 import type { HandlerDependencies } from "./handlers";
 import {
   handleError,
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts
index 0edd1b411a..7dee924634 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/helpers.ts
@@ -1,7 +1,118 @@
+import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
 import { SessionKey, sessionStorage } from "@/services/storage/session-storage";
 import type { ToolResult } from "@/types/chat";
 import type { ChatMessageData } from "../ChatMessage/useChatMessage";
 
+export function processInitialMessages(
+  initialMessages: SessionDetailResponse["messages"],
+): ChatMessageData[] {
+  const processedMessages: ChatMessageData[] = [];
+  const toolCallMap = new Map<string, string>();
+
+  for (const msg of initialMessages) {
+    if (!isValidMessage(msg)) {
+      console.warn("Invalid message structure from backend:", msg);
+      continue;
+    }
+
+    let content = String(msg.content || "");
+    const role = String(msg.role || "assistant").toLowerCase();
+    const toolCalls = msg.tool_calls;
+    const timestamp = msg.timestamp
+      ? new Date(msg.timestamp as string)
+      : undefined;
+
+    if (role === "user") {
+      content = removePageContext(content);
+      if (!content.trim()) continue;
+      processedMessages.push({
+        type: "message",
+        role: "user",
+        content,
+        timestamp,
+      });
+      continue;
+    }
+
+    if (role === "assistant") {
+      content = content
+        .replace(/<thinking>[\s\S]*?<\/thinking>/gi, "")
+        .replace(/<internal_reasoning>[\s\S]*?<\/internal_reasoning>/gi, "")
+        .trim();
+
+      if (toolCalls && isToolCallArray(toolCalls) && toolCalls.length > 0) {
+        for (const toolCall of toolCalls) {
+          const toolName = toolCall.function.name;
+          const toolId = toolCall.id;
+          toolCallMap.set(toolId, toolName);
+
+          try {
+            const args = JSON.parse(toolCall.function.arguments || "{}");
+            processedMessages.push({
+              type: "tool_call",
+              toolId,
+              toolName,
+              arguments: args,
+              timestamp,
+            });
+          } catch (err) {
+            console.warn("Failed to parse tool call arguments:", err);
+            processedMessages.push({
+              type: "tool_call",
+              toolId,
+              toolName,
+              arguments: {},
+              timestamp,
+            });
+          }
+        }
+        if (content.trim()) {
+          processedMessages.push({
+            type: "message",
+            role: "assistant",
+            content,
+            timestamp,
+          });
+        }
+      } else if (content.trim()) {
+        processedMessages.push({
+          type: "message",
+          role: "assistant",
+          content,
+          timestamp,
+        });
+      }
+      continue;
+    }
+
+    if (role === "tool") {
+      const toolCallId = (msg.tool_call_id as string) || "";
+      const toolName = toolCallMap.get(toolCallId) || "unknown";
+      const toolResponse = parseToolResponse(
+        content,
+        toolCallId,
+        toolName,
+        timestamp,
+      );
+      if (toolResponse) {
+        processedMessages.push(toolResponse);
+      }
+      continue;
+    }
+
+    if (content.trim()) {
+      processedMessages.push({
+        type: "message",
+        role: role as "user" | "assistant" | "system",
+        content,
+        timestamp,
+      });
+    }
+  }
+
+  return processedMessages;
+}
+
 export function hasSentInitialPrompt(sessionId: string): boolean {
   try {
     const sent = JSON.parse(
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/useChatContainer.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/useChatContainer.ts
index 42dd04670d..b7f9d305dd 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/useChatContainer.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatContainer/useChatContainer.ts
@@ -1,5 +1,6 @@
 import type { SessionDetailResponse } from "@/app/api/__generated__/models/sessionDetailResponse";
-import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
+import { useChatStore } from "../../chat-store";
 import { toast } from "sonner";
 import { useChatStream } from "../../useChatStream";
 import { usePageContext } from "../../usePageContext";
@@ -9,11 +10,8 @@ import {
   createUserMessage,
   filterAuthMessages,
   hasSentInitialPrompt,
-  isToolCallArray,
-  isValidMessage,
   markInitialPromptSent,
-  parseToolResponse,
-  removePageContext,
+  processInitialMessages,
 } from "./helpers";
 
 interface Args {
@@ -41,11 +39,18 @@ export function useChatContainer({
     sendMessage: sendStreamMessage,
     stopStreaming,
   } = useChatStream();
+  const activeStreams = useChatStore((s) => s.activeStreams);
+  const subscribeToStream = useChatStore((s) => s.subscribeToStream);
   const isStreaming = isStreamingInitiated || hasTextChunks;
 
-  useEffect(() => {
-    if (sessionId !== previousSessionIdRef.current) {
-      stopStreaming(previousSessionIdRef.current ?? undefined, true);
+  useEffect(
+    function handleSessionChange() {
+      if (sessionId === previousSessionIdRef.current) return;
+
+      const prevSession = previousSessionIdRef.current;
+      if (prevSession) {
+        stopStreaming(prevSession);
+      }
       previousSessionIdRef.current = sessionId;
       setMessages([]);
       setStreamingChunks([]);
@@ -53,138 +58,11 @@ export function useChatContainer({
       setHasTextChunks(false);
       setIsStreamingInitiated(false);
       hasResponseRef.current = false;
-    }
-  }, [sessionId, stopStreaming]);
 
-  const allMessages = useMemo(() => {
-    const processedInitialMessages: ChatMessageData[] = [];
-    const toolCallMap = new Map<string, string>();
+      if (!sessionId) return;
 
-    for (const msg of initialMessages) {
-      if (!isValidMessage(msg)) {
-        console.warn("Invalid message structure from backend:", msg);
-        continue;
-      }
-
-      let content = String(msg.content || "");
-      const role = String(msg.role || "assistant").toLowerCase();
-      const toolCalls = msg.tool_calls;
-      const timestamp = msg.timestamp
-        ? new Date(msg.timestamp as string)
-        : undefined;
-
-      if (role === "user") {
-        content = removePageContext(content);
-        if (!content.trim()) continue;
-        processedInitialMessages.push({
-          type: "message",
-          role: "user",
-          content,
-          timestamp,
-        });
-        continue;
-      }
-
-      if (role === "assistant") {
-        content = content
-          .replace(/<thinking>[\s\S]*?<\/thinking>/gi, "")
-          .trim();
-
-        if (toolCalls && isToolCallArray(toolCalls) && toolCalls.length > 0) {
-          for (const toolCall of toolCalls) {
-            const toolName = toolCall.function.name;
-            const toolId = toolCall.id;
-            toolCallMap.set(toolId, toolName);
-
-            try {
-              const args = JSON.parse(toolCall.function.arguments || "{}");
-              processedInitialMessages.push({
-                type: "tool_call",
-                toolId,
-                toolName,
-                arguments: args,
-                timestamp,
-              });
-            } catch (err) {
-              console.warn("Failed to parse tool call arguments:", err);
-              processedInitialMessages.push({
-                type: "tool_call",
-                toolId,
-                toolName,
-                arguments: {},
-                timestamp,
-              });
-            }
-          }
-          if (content.trim()) {
-            processedInitialMessages.push({
-              type: "message",
-              role: "assistant",
-              content,
-              timestamp,
-            });
-          }
-        } else if (content.trim()) {
-          processedInitialMessages.push({
-            type: "message",
-            role: "assistant",
-            content,
-            timestamp,
-          });
-        }
-        continue;
-      }
-
-      if (role === "tool") {
-        const toolCallId = (msg.tool_call_id as string) || "";
-        const toolName = toolCallMap.get(toolCallId) || "unknown";
-        const toolResponse = parseToolResponse(
-          content,
-          toolCallId,
-          toolName,
-          timestamp,
-        );
-        if (toolResponse) {
-          processedInitialMessages.push(toolResponse);
-        }
-        continue;
-      }
-
-      if (content.trim()) {
-        processedInitialMessages.push({
-          type: "message",
-          role: role as "user" | "assistant" | "system",
-          content,
-          timestamp,
-        });
-      }
-    }
-
-    return [...processedInitialMessages, ...messages];
-  }, [initialMessages, messages]);
-
-  const sendMessage = useCallback(
-    async function sendMessage(
-      content: string,
-      isUserMessage: boolean = true,
-      context?: { url: string; content: string },
-    ) {
-      if (!sessionId) {
-        console.error("[useChatContainer] Cannot send message: no session ID");
-        return;
-      }
-      setIsRegionBlockedModalOpen(false);
-      if (isUserMessage) {
-        const userMessage = createUserMessage(content);
-        setMessages((prev) => [...filterAuthMessages(prev), userMessage]);
-      } else {
-        setMessages((prev) => filterAuthMessages(prev));
-      }
-      setStreamingChunks([]);
-      streamingChunksRef.current = [];
-      setHasTextChunks(false);
-      setIsStreamingInitiated(true);
-      hasResponseRef.current = false;
+      const activeStream = activeStreams.get(sessionId);
+      if (!activeStream || activeStream.status !== "streaming") return;
 
       const dispatcher = createStreamEventDispatcher({
         setHasTextChunks,
@@ -197,42 +75,85 @@ export function useChatContainer({
         setIsStreamingInitiated,
       });
 
-      try {
-        await sendStreamMessage(
-          sessionId,
-          content,
-          dispatcher,
-          isUserMessage,
-          context,
-        );
-      } catch (err) {
-        console.error("[useChatContainer] Failed to send message:", err);
-        setIsStreamingInitiated(false);
-
-        // Don't show error toast for AbortError (expected during cleanup)
-        if (err instanceof Error && err.name === "AbortError") return;
-
-        const errorMessage =
-          err instanceof Error ? err.message : "Failed to send message";
-        toast.error("Failed to send message", {
-          description: errorMessage,
-        });
-      }
+      setIsStreamingInitiated(true);
+      const skipReplay = initialMessages.length > 0;
+      return subscribeToStream(sessionId, dispatcher, skipReplay);
     },
-    [sessionId, sendStreamMessage],
+    [sessionId, stopStreaming, activeStreams, subscribeToStream],
   );
 
-  const handleStopStreaming = useCallback(() => {
+  const allMessages = useMemo(
+    () => [...processInitialMessages(initialMessages), ...messages],
+    [initialMessages, messages],
+  );
+
+  async function sendMessage(
+    content: string,
+    isUserMessage: boolean = true,
+    context?: { url: string; content: string },
+  ) {
+    if (!sessionId) {
+      console.error("[useChatContainer] Cannot send message: no session ID");
+      return;
+    }
+    setIsRegionBlockedModalOpen(false);
+    if (isUserMessage) {
+      const userMessage = createUserMessage(content);
+      setMessages((prev) => [...filterAuthMessages(prev), userMessage]);
+    } else {
+      setMessages((prev) => filterAuthMessages(prev));
+    }
+    setStreamingChunks([]);
+    streamingChunksRef.current = [];
+    setHasTextChunks(false);
+    setIsStreamingInitiated(true);
+    hasResponseRef.current = false;
+
+    const dispatcher = createStreamEventDispatcher({
+      setHasTextChunks,
+      setStreamingChunks,
+      streamingChunksRef,
+      hasResponseRef,
+      setMessages,
+      setIsRegionBlockedModalOpen,
+      sessionId,
+      setIsStreamingInitiated,
+    });
+
+    try {
+      await sendStreamMessage(
+        sessionId,
+        content,
+        dispatcher,
+        isUserMessage,
+        context,
+      );
+    } catch (err) {
+      console.error("[useChatContainer] Failed to send message:", err);
+      setIsStreamingInitiated(false);
+
+      if (err instanceof Error && err.name === "AbortError") return;
+
+      const errorMessage =
+        err instanceof Error ? err.message : "Failed to send message";
+      toast.error("Failed to send message", {
+        description: errorMessage,
+      });
+    }
+  }
+
+  function handleStopStreaming() {
     stopStreaming();
     setStreamingChunks([]);
     streamingChunksRef.current = [];
     setHasTextChunks(false);
     setIsStreamingInitiated(false);
-  }, [stopStreaming]);
+  }
 
   const { capturePageContext } = usePageContext();
+  const sendMessageRef = useRef(sendMessage);
+  sendMessageRef.current = sendMessage;
 
-  // Send initial prompt if provided (for new sessions from homepage)
   useEffect(
     function handleInitialPrompt() {
       if (!initialPrompt || !sessionId) return;
@@ -241,15 +162,9 @@ export function useChatContainer({
 
       markInitialPromptSent(sessionId);
       const context = capturePageContext();
-      sendMessage(initialPrompt, true, context);
+      sendMessageRef.current(initialPrompt, true, context);
     },
-    [
-      initialPrompt,
-      sessionId,
-      initialMessages.length,
-      sendMessage,
-      capturePageContext,
-    ],
+    [initialPrompt, sessionId, initialMessages.length, capturePageContext],
   );
 
   async function sendMessageWithContext(
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
index 8cdecf0bf4..c45e8dc250 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/ChatInput.tsx
@@ -21,7 +21,7 @@ export function ChatInput({
   className,
 }: Props) {
   const inputId = "chat-input";
-  const { value, setValue, handleKeyDown, handleSend, hasMultipleLines } =
+  const { value, handleKeyDown, handleSubmit, handleChange, hasMultipleLines } =
     useChatInput({
       onSend,
       disabled: disabled || isStreaming,
@@ -29,15 +29,6 @@ export function ChatInput({
       inputId,
     });
 
-  function handleSubmit(e: React.FormEvent<HTMLFormElement>) {
-    e.preventDefault();
-    handleSend();
-  }
-
-  function handleChange(e: React.ChangeEvent<HTMLTextAreaElement>) {
-    setValue(e.target.value);
-  }
-
   return (
     <form onSubmit={handleSubmit} className={cn("relative flex-1", className)}>
       <div className="relative">
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
index 93d764b026..6fa8e7252b 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ChatInput/useChatInput.ts
@@ -1,4 +1,10 @@
-import { KeyboardEvent, useCallback, useEffect, useState } from "react";
+import {
+  ChangeEvent,
+  FormEvent,
+  KeyboardEvent,
+  useEffect,
+  useState,
+} from "react";
 
 interface UseChatInputArgs {
   onSend: (message: string) => void;
@@ -16,6 +22,23 @@ export function useChatInput({
   const [value, setValue] = useState("");
   const [hasMultipleLines, setHasMultipleLines] = useState(false);
 
+  useEffect(
+    function focusOnMount() {
+      const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
+      if (textarea) textarea.focus();
+    },
+    [inputId],
+  );
+
+  useEffect(
+    function focusWhenEnabled() {
+      if (disabled) return;
+      const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
+      if (textarea) textarea.focus();
+    },
+    [disabled, inputId],
+  );
+
   useEffect(() => {
     const textarea = document.getElementById(inputId) as HTMLTextAreaElement;
     const wrapper = document.getElementById(
@@ -77,7 +100,7 @@ export function useChatInput({
     }
   }, [value, maxRows, inputId]);
 
-  const handleSend = useCallback(() => {
+  const handleSend = () => {
     if (disabled || !value.trim()) return;
     onSend(value.trim());
     setValue("");
@@ -93,23 +116,31 @@ export function useChatInput({
       wrapper.style.height = "";
       wrapper.style.maxHeight = "";
     }
-  }, [value, onSend, disabled, inputId]);
+  };
 
-  const handleKeyDown = useCallback(
-    (event: KeyboardEvent<HTMLTextAreaElement>) => {
-      if (event.key === "Enter" && !event.shiftKey) {
-        event.preventDefault();
-        handleSend();
-      }
-    },
-    [handleSend],
-  );
+  function handleKeyDown(event: KeyboardEvent<HTMLTextAreaElement>) {
+    if (event.key === "Enter" && !event.shiftKey) {
+      event.preventDefault();
+      handleSend();
+    }
+  }
+
+  function handleSubmit(e: FormEvent<HTMLFormElement>) {
+    e.preventDefault();
+    handleSend();
+  }
+
+  function handleChange(e: ChangeEvent<HTMLTextAreaElement>) {
+    setValue(e.target.value);
+  }
 
   return {
     value,
     setValue,
     handleKeyDown,
     handleSend,
+    handleSubmit,
+    handleChange,
     hasMultipleLines,
   };
 }
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/MessageList/components/LastToolResponse/LastToolResponse.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/MessageList/components/LastToolResponse/LastToolResponse.tsx
index 3e6bf91ad2..15b10e5715 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/MessageList/components/LastToolResponse/LastToolResponse.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/MessageList/components/LastToolResponse/LastToolResponse.tsx
@@ -1,7 +1,5 @@
-import { AIChatBubble } from "../../../AIChatBubble/AIChatBubble";
 import type { ChatMessageData } from "../../../ChatMessage/useChatMessage";
-import { MarkdownContent } from "../../../MarkdownContent/MarkdownContent";
-import { formatToolResponse } from "../../../ToolResponseMessage/helpers";
+import { ToolResponseMessage } from "../../../ToolResponseMessage/ToolResponseMessage";
 import { shouldSkipAgentOutput } from "../../helpers";
 
 export interface LastToolResponseProps {
@@ -15,16 +13,15 @@ export function LastToolResponse({
 }: LastToolResponseProps) {
   if (message.type !== "tool_response") return null;
 
-  // Skip if this is an agent_output that should be rendered inside assistant message
   if (shouldSkipAgentOutput(message, prevMessage)) return null;
 
-  const formattedText = formatToolResponse(message.result, message.toolName);
-
   return (
     <div className="min-w-0 overflow-x-hidden hyphens-auto break-words px-4 py-2">
-      <AIChatBubble>
-        <MarkdownContent content={formattedText} />
-      </AIChatBubble>
+      <ToolResponseMessage
+        toolId={message.toolId}
+        toolName={message.toolName}
+        result={message.result}
+      />
     </div>
   );
 }
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx
index 1ba10dd248..27da02beb8 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/ToolResponseMessage.tsx
@@ -1,7 +1,14 @@
+import { Text } from "@/components/atoms/Text/Text";
+import { cn } from "@/lib/utils";
 import type { ToolResult } from "@/types/chat";
+import { WarningCircleIcon } from "@phosphor-icons/react";
 import { AIChatBubble } from "../AIChatBubble/AIChatBubble";
 import { MarkdownContent } from "../MarkdownContent/MarkdownContent";
-import { formatToolResponse } from "./helpers";
+import {
+  formatToolResponse,
+  getErrorMessage,
+  isErrorResponse,
+} from "./helpers";
 
 export interface ToolResponseMessageProps {
   toolId?: string;
@@ -18,6 +25,24 @@ export function ToolResponseMessage({
   success: _success,
   className,
 }: ToolResponseMessageProps) {
+  if (isErrorResponse(result)) {
+    const errorMessage = getErrorMessage(result);
+    return (
+      <AIChatBubble className={className}>
+        <div className="flex items-center gap-2">
+          <WarningCircleIcon
+            size={14}
+            weight="regular"
+            className="shrink-0 text-neutral-400"
+          />
+          <Text variant="small" className={cn("text-xs text-neutral-500")}>
+            {errorMessage}
+          </Text>
+        </div>
+      </AIChatBubble>
+    );
+  }
+
   const formattedText = formatToolResponse(result, toolName);
 
   return (
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts
index cf2bca95f7..400f32936e 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts
@@ -1,3 +1,42 @@
+function stripInternalReasoning(content: string): string {
+  return content
+    .replace(/<internal_reasoning>[\s\S]*?<\/internal_reasoning>/gi, "")
+    .replace(/<thinking>[\s\S]*?<\/thinking>/gi, "")
+    .replace(/\n{3,}/g, "\n\n")
+    .trim();
+}
+
+export function isErrorResponse(result: unknown): boolean {
+  if (typeof result === "string") {
+    const lower = result.toLowerCase();
+    return (
+      lower.startsWith("error:") ||
+      lower.includes("not found") ||
+      lower.includes("does not exist") ||
+      lower.includes("failed to") ||
+      lower.includes("unable to")
+    );
+  }
+  if (typeof result === "object" && result !== null) {
+    const response = result as Record<string, unknown>;
+    return response.type === "error" || response.error !== undefined;
+  }
+  return false;
+}
+
+export function getErrorMessage(result: unknown): string {
+  if (typeof result === "string") {
+    return stripInternalReasoning(result.replace(/^error:\s*/i, ""));
+  }
+  if (typeof result === "object" && result !== null) {
+    const response = result as Record<string, unknown>;
+    if (response.error) return stripInternalReasoning(String(response.error));
+    if (response.message)
+      return stripInternalReasoning(String(response.message));
+  }
+  return "An error occurred";
+}
+
 function getToolCompletionPhrase(toolName: string): string {
   const toolCompletionPhrases: Record<string, string> = {
     add_understanding: "Updated your business information",
@@ -28,10 +67,10 @@ export function formatToolResponse(result: unknown, toolName: string): string {
         const parsed = JSON.parse(trimmed);
         return formatToolResponse(parsed, toolName);
       } catch {
-        return trimmed;
+        return stripInternalReasoning(trimmed);
       }
     }
-    return result;
+    return stripInternalReasoning(result);
   }
 
   if (typeof result !== "object" || result === null) {
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/stream-executor.ts b/autogpt_platform/frontend/src/components/contextual/Chat/stream-executor.ts
new file mode 100644
index 0000000000..b0d970c286
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/stream-executor.ts
@@ -0,0 +1,142 @@
+import type {
+  ActiveStream,
+  StreamChunk,
+  VercelStreamChunk,
+} from "./chat-types";
+import {
+  INITIAL_RETRY_DELAY,
+  MAX_RETRIES,
+  normalizeStreamChunk,
+  parseSSELine,
+} from "./stream-utils";
+
+function notifySubscribers(stream: ActiveStream, chunk: StreamChunk) {
+  stream.chunks.push(chunk);
+  for (const callback of stream.onChunkCallbacks) {
+    try {
+      callback(chunk);
+    } catch (err) {
+      console.warn("[StreamExecutor] Subscriber callback error:", err);
+    }
+  }
+}
+
+export async function executeStream(
+  stream: ActiveStream,
+  message: string,
+  isUserMessage: boolean,
+  context?: { url: string; content: string },
+  retryCount: number = 0,
+): Promise<void> {
+  const { sessionId, abortController } = stream;
+
+  try {
+    const url = `/api/chat/sessions/${sessionId}/stream`;
+    const body = JSON.stringify({
+      message,
+      is_user_message: isUserMessage,
+      context: context || null,
+    });
+
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Accept: "text/event-stream",
+      },
+      body,
+      signal: abortController.signal,
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(errorText || `HTTP ${response.status}`);
+    }
+
+    if (!response.body) {
+      throw new Error("Response body is null");
+    }
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = "";
+
+    while (true) {
+      const { done, value } = await reader.read();
+
+      if (done) {
+        notifySubscribers(stream, { type: "stream_end" });
+        stream.status = "completed";
+        return;
+      }
+
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split("\n");
+      buffer = lines.pop() || "";
+
+      for (const line of lines) {
+        const data = parseSSELine(line);
+        if (data !== null) {
+          if (data === "[DONE]") {
+            notifySubscribers(stream, { type: "stream_end" });
+            stream.status = "completed";
+            return;
+          }
+
+          try {
+            const rawChunk = JSON.parse(data) as
+              | StreamChunk
+              | VercelStreamChunk;
+            const chunk = normalizeStreamChunk(rawChunk);
+            if (!chunk) continue;
+
+            notifySubscribers(stream, chunk);
+
+            if (chunk.type === "stream_end") {
+              stream.status = "completed";
+              return;
+            }
+
+            if (chunk.type === "error") {
+              stream.status = "error";
+              stream.error = new Error(
+                chunk.message || chunk.content || "Stream error",
+              );
+              return;
+            }
+          } catch (err) {
+            console.warn("[StreamExecutor] Failed to parse SSE chunk:", err);
+          }
+        }
+      }
+    }
+  } catch (err) {
+    if (err instanceof Error && err.name === "AbortError") {
+      notifySubscribers(stream, { type: "stream_end" });
+      stream.status = "completed";
+      return;
+    }
+
+    if (retryCount < MAX_RETRIES) {
+      const retryDelay = INITIAL_RETRY_DELAY * Math.pow(2, retryCount);
+      console.log(
+        `[StreamExecutor] Retrying in ${retryDelay}ms (attempt ${retryCount + 1}/${MAX_RETRIES})`,
+      );
+      await new Promise((resolve) => setTimeout(resolve, retryDelay));
+      return executeStream(
+        stream,
+        message,
+        isUserMessage,
+        context,
+        retryCount + 1,
+      );
+    }
+
+    stream.status = "error";
+    stream.error = err instanceof Error ? err : new Error("Stream failed");
+    notifySubscribers(stream, {
+      type: "error",
+      message: stream.error.message,
+    });
+  }
+}
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/stream-utils.ts b/autogpt_platform/frontend/src/components/contextual/Chat/stream-utils.ts
new file mode 100644
index 0000000000..4100926e79
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/stream-utils.ts
@@ -0,0 +1,84 @@
+import type { ToolArguments, ToolResult } from "@/types/chat";
+import type { StreamChunk, VercelStreamChunk } from "./chat-types";
+
+const LEGACY_STREAM_TYPES = new Set<StreamChunk["type"]>([
+  "text_chunk",
+  "text_ended",
+  "tool_call",
+  "tool_call_start",
+  "tool_response",
+  "login_needed",
+  "need_login",
+  "credentials_needed",
+  "error",
+  "usage",
+  "stream_end",
+]);
+
+export function isLegacyStreamChunk(
+  chunk: StreamChunk | VercelStreamChunk,
+): chunk is StreamChunk {
+  return LEGACY_STREAM_TYPES.has(chunk.type as StreamChunk["type"]);
+}
+
+export function normalizeStreamChunk(
+  chunk: StreamChunk | VercelStreamChunk,
+): StreamChunk | null {
+  if (isLegacyStreamChunk(chunk)) return chunk;
+
+  switch (chunk.type) {
+    case "text-delta":
+      return { type: "text_chunk", content: chunk.delta };
+    case "text-end":
+      return { type: "text_ended" };
+    case "tool-input-available":
+      return {
+        type: "tool_call_start",
+        tool_id: chunk.toolCallId,
+        tool_name: chunk.toolName,
+        arguments: chunk.input as ToolArguments,
+      };
+    case "tool-output-available":
+      return {
+        type: "tool_response",
+        tool_id: chunk.toolCallId,
+        tool_name: chunk.toolName,
+        result: chunk.output as ToolResult,
+        success: chunk.success ?? true,
+      };
+    case "usage":
+      return {
+        type: "usage",
+        promptTokens: chunk.promptTokens,
+        completionTokens: chunk.completionTokens,
+        totalTokens: chunk.totalTokens,
+      };
+    case "error":
+      return {
+        type: "error",
+        message: chunk.errorText,
+        code: chunk.code,
+        details: chunk.details,
+      };
+    case "finish":
+      return { type: "stream_end" };
+    case "start":
+    case "text-start":
+      return null;
+    case "tool-input-start":
+      return {
+        type: "tool_call_start",
+        tool_id: chunk.toolCallId,
+        tool_name: chunk.toolName,
+        arguments: {},
+      };
+  }
+}
+
+export const MAX_RETRIES = 3;
+export const INITIAL_RETRY_DELAY = 1000;
+
+export function parseSSELine(line: string): string | null {
+  if (line.startsWith("data: ")) return line.slice(6);
+  return null;
+}
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/useChat.ts b/autogpt_platform/frontend/src/components/contextual/Chat/useChat.ts
index cf629a287c..f6b2031059 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/useChat.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/useChat.ts
@@ -2,7 +2,6 @@
 
 import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
 import { useEffect, useRef, useState } from "react";
-import { toast } from "sonner";
 import { useChatSession } from "./useChatSession";
 import { useChatStream } from "./useChatStream";
 
@@ -67,38 +66,16 @@ export function useChat({ urlSessionId }: UseChatArgs = {}) {
     ],
   );
 
-  useEffect(() => {
-    if (isLoading || isCreating) {
-      const timer = setTimeout(() => {
-        setShowLoader(true);
-      }, 300);
-      return () => clearTimeout(timer);
-    } else {
+  useEffect(
+    function showLoaderWithDelay() {
+      if (isLoading || isCreating) {
+        const timer = setTimeout(() => setShowLoader(true), 300);
+        return () => clearTimeout(timer);
+      }
       setShowLoader(false);
-    }
-  }, [isLoading, isCreating]);
-
-  useEffect(function monitorNetworkStatus() {
-    function handleOnline() {
-      toast.success("Connection restored", {
-        description: "You're back online",
-      });
-    }
-
-    function handleOffline() {
-      toast.error("You're offline", {
-        description: "Check your internet connection",
-      });
-    }
-
-    window.addEventListener("online", handleOnline);
-    window.addEventListener("offline", handleOffline);
-
-    return () => {
-      window.removeEventListener("online", handleOnline);
-      window.removeEventListener("offline", handleOffline);
-    };
-  }, []);
+    },
+    [isLoading, isCreating],
+  );
 
   function clearSession() {
     clearSessionBase();
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/useChatDrawer.ts b/autogpt_platform/frontend/src/components/contextual/Chat/useChatDrawer.ts
deleted file mode 100644
index 62e1a5a569..0000000000
--- a/autogpt_platform/frontend/src/components/contextual/Chat/useChatDrawer.ts
+++ /dev/null
@@ -1,17 +0,0 @@
-"use client";
-
-import { create } from "zustand";
-
-interface ChatDrawerState {
-  isOpen: boolean;
-  open: () => void;
-  close: () => void;
-  toggle: () => void;
-}
-
-export const useChatDrawer = create<ChatDrawerState>((set) => ({
-  isOpen: false,
-  open: () => set({ isOpen: true }),
-  close: () => set({ isOpen: false }),
-  toggle: () => set((state) => ({ isOpen: !state.isOpen })),
-}));
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/useChatSession.ts b/autogpt_platform/frontend/src/components/contextual/Chat/useChatSession.ts
index 553e348f79..dd743874f7 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/useChatSession.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/useChatSession.ts
@@ -1,6 +1,7 @@
 import {
   getGetV2GetSessionQueryKey,
   getGetV2GetSessionQueryOptions,
+  getGetV2ListSessionsQueryKey,
   postV2CreateSession,
   useGetV2GetSession,
   usePatchV2SessionAssignUser,
@@ -101,6 +102,17 @@ export function useChatSession({
     }
   }, [createError, loadError]);
 
+  useEffect(
+    function refreshSessionsListOnLoad() {
+      if (sessionId && sessionData && !isLoadingSession) {
+        queryClient.invalidateQueries({
+          queryKey: getGetV2ListSessionsQueryKey(),
+        });
+      }
+    },
+    [sessionId, sessionData, isLoadingSession, queryClient],
+  );
+
   async function createSession() {
     try {
       setError(null);
diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/useChatStream.ts b/autogpt_platform/frontend/src/components/contextual/Chat/useChatStream.ts
index 903c19cd30..5a9f637457 100644
--- a/autogpt_platform/frontend/src/components/contextual/Chat/useChatStream.ts
+++ b/autogpt_platform/frontend/src/components/contextual/Chat/useChatStream.ts
@@ -1,543 +1,110 @@
-import type { ToolArguments, ToolResult } from "@/types/chat";
-import { useCallback, useEffect, useRef, useState } from "react";
+"use client";
+
+import { useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
+import { useChatStore } from "./chat-store";
+import type { StreamChunk } from "./chat-types";
 
-const MAX_RETRIES = 3;
-const INITIAL_RETRY_DELAY = 1000;
-
-export interface StreamChunk {
-  type:
-    | "text_chunk"
-    | "text_ended"
-    | "tool_call"
-    | "tool_call_start"
-    | "tool_response"
-    | "login_needed"
-    | "need_login"
-    | "credentials_needed"
-    | "error"
-    | "usage"
-    | "stream_end";
-  timestamp?: string;
-  content?: string;
-  message?: string;
-  code?: string;
-  details?: Record<string, unknown>;
-  tool_id?: string;
-  tool_name?: string;
-  arguments?: ToolArguments;
-  result?: ToolResult;
-  success?: boolean;
-  idx?: number;
-  session_id?: string;
-  agent_info?: {
-    graph_id: string;
-    name: string;
-    trigger_type: string;
-  };
-  provider?: string;
-  provider_name?: string;
-  credential_type?: string;
-  scopes?: string[];
-  title?: string;
-  [key: string]: unknown;
-}
-
-type VercelStreamChunk =
-  | { type: "start"; messageId: string }
-  | { type: "finish" }
-  | { type: "text-start"; id: string }
-  | { type: "text-delta"; id: string; delta: string }
-  | { type: "text-end"; id: string }
-  | { type: "tool-input-start"; toolCallId: string; toolName: string }
-  | {
-      type: "tool-input-available";
-      toolCallId: string;
-      toolName: string;
-      input: ToolArguments;
-    }
-  | {
-      type: "tool-output-available";
-      toolCallId: string;
-      toolName?: string;
-      output: ToolResult;
-      success?: boolean;
-    }
-  | {
-      type: "usage";
-      promptTokens: number;
-      completionTokens: number;
-      totalTokens: number;
-    }
-  | {
-      type: "error";
-      errorText: string;
-      code?: string;
-      details?: Record<string, unknown>;
-    };
-
-const LEGACY_STREAM_TYPES = new Set<StreamChunk["type"]>([
-  "text_chunk",
-  "text_ended",
-  "tool_call",
-  "tool_call_start",
-  "tool_response",
-  "login_needed",
-  "need_login",
-  "credentials_needed",
-  "error",
-  "usage",
-  "stream_end",
-]);
-
-function isLegacyStreamChunk(
-  chunk: StreamChunk | VercelStreamChunk,
-): chunk is StreamChunk {
-  return LEGACY_STREAM_TYPES.has(chunk.type as StreamChunk["type"]);
-}
-
-function normalizeStreamChunk(
-  chunk: StreamChunk | VercelStreamChunk,
-): StreamChunk | null {
-  if (isLegacyStreamChunk(chunk)) {
-    return chunk;
-  }
-  switch (chunk.type) {
-    case "text-delta":
-      return { type: "text_chunk", content: chunk.delta };
-    case "text-end":
-      return { type: "text_ended" };
-    case "tool-input-available":
-      return {
-        type: "tool_call_start",
-        tool_id: chunk.toolCallId,
-        tool_name: chunk.toolName,
-        arguments: chunk.input,
-      };
-    case "tool-output-available":
-      return {
-        type: "tool_response",
-        tool_id: chunk.toolCallId,
-        tool_name: chunk.toolName,
-        result: chunk.output,
-        success: chunk.success ?? true,
-      };
-    case "usage":
-      return {
-        type: "usage",
-        promptTokens: chunk.promptTokens,
-        completionTokens: chunk.completionTokens,
-        totalTokens: chunk.totalTokens,
-      };
-    case "error":
-      return {
-        type: "error",
-        message: chunk.errorText,
-        code: chunk.code,
-        details: chunk.details,
-      };
-    case "finish":
-      return { type: "stream_end" };
-    case "start":
-    case "text-start":
-      return null;
-    case "tool-input-start":
-      const toolInputStart = chunk as Extract<
-        VercelStreamChunk,
-        { type: "tool-input-start" }
-      >;
-      return {
-        type: "tool_call_start",
-        tool_id: toolInputStart.toolCallId,
-        tool_name: toolInputStart.toolName,
-        arguments: {},
-      };
-  }
-}
+export type { StreamChunk } from "./chat-types";
 
 export function useChatStream() {
   const [isStreaming, setIsStreaming] = useState(false);
   const [error, setError] = useState<Error | null>(null);
-  const retryCountRef = useRef<number>(0);
-  const retryTimeoutRef = useRef<NodeJS.Timeout | null>(null);
-  const abortControllerRef = useRef<AbortController | null>(null);
   const currentSessionIdRef = useRef<string | null>(null);
-  const requestStartTimeRef = useRef<number | null>(null);
-
-  const stopStreaming = useCallback(
-    (sessionId?: string, force: boolean = false) => {
-      console.log("[useChatStream] stopStreaming called", {
-        hasAbortController: !!abortControllerRef.current,
-        isAborted: abortControllerRef.current?.signal.aborted,
-        currentSessionId: currentSessionIdRef.current,
-        requestedSessionId: sessionId,
-        requestStartTime: requestStartTimeRef.current,
-        timeSinceStart: requestStartTimeRef.current
-          ? Date.now() - requestStartTimeRef.current
-          : null,
-        force,
-        stack: new Error().stack,
-      });
-
-      if (
-        sessionId &&
-        currentSessionIdRef.current &&
-        currentSessionIdRef.current !== sessionId
-      ) {
-        console.log(
-          "[useChatStream] Session changed, aborting previous stream",
-          {
-            oldSessionId: currentSessionIdRef.current,
-            newSessionId: sessionId,
-          },
-        );
-      }
-
-      const controller = abortControllerRef.current;
-      if (controller) {
-        const timeSinceStart = requestStartTimeRef.current
-          ? Date.now() - requestStartTimeRef.current
-          : null;
-
-        if (!force && timeSinceStart !== null && timeSinceStart < 100) {
-          console.log(
-            "[useChatStream] Request just started (<100ms), skipping abort to prevent race condition",
-            {
-              timeSinceStart,
-            },
-          );
-          return;
-        }
-
-        try {
-          const signal = controller.signal;
-
-          if (
-            signal &&
-            typeof signal.aborted === "boolean" &&
-            !signal.aborted
-          ) {
-            console.log("[useChatStream] Aborting stream");
-            controller.abort();
-          } else {
-            console.log(
-              "[useChatStream] Stream already aborted or signal invalid",
-            );
-          }
-        } catch (error) {
-          if (error instanceof Error && error.name === "AbortError") {
-            console.log(
-              "[useChatStream] AbortError caught (expected during cleanup)",
-            );
-          } else {
-            console.warn("[useChatStream] Error aborting stream:", error);
-          }
-        } finally {
-          abortControllerRef.current = null;
-          requestStartTimeRef.current = null;
-        }
-      }
-      if (retryTimeoutRef.current) {
-        clearTimeout(retryTimeoutRef.current);
-        retryTimeoutRef.current = null;
-      }
-      setIsStreaming(false);
-    },
-    [],
+  const onChunkCallbackRef = useRef<((chunk: StreamChunk) => void) | null>(
+    null,
   );
 
+  const stopStream = useChatStore((s) => s.stopStream);
+  const unregisterActiveSession = useChatStore(
+    (s) => s.unregisterActiveSession,
+  );
+  const isSessionActive = useChatStore((s) => s.isSessionActive);
+  const onStreamComplete = useChatStore((s) => s.onStreamComplete);
+  const getCompletedStream = useChatStore((s) => s.getCompletedStream);
+  const registerActiveSession = useChatStore((s) => s.registerActiveSession);
+  const startStream = useChatStore((s) => s.startStream);
+  const getStreamStatus = useChatStore((s) => s.getStreamStatus);
+
+  function stopStreaming(sessionId?: string) {
+    const targetSession = sessionId || currentSessionIdRef.current;
+    if (targetSession) {
+      stopStream(targetSession);
+      unregisterActiveSession(targetSession);
+    }
+    setIsStreaming(false);
+  }
+
   useEffect(() => {
-    console.log("[useChatStream] Component mounted");
-    return () => {
-      const sessionIdAtUnmount = currentSessionIdRef.current;
-      console.log(
-        "[useChatStream] Component unmounting, calling stopStreaming",
-        {
-          sessionIdAtUnmount,
-        },
-      );
-      stopStreaming(undefined, false);
+    return function cleanup() {
+      const sessionId = currentSessionIdRef.current;
+      if (sessionId && !isSessionActive(sessionId)) {
+        stopStream(sessionId);
+      }
       currentSessionIdRef.current = null;
+      onChunkCallbackRef.current = null;
     };
-  }, [stopStreaming]);
+  }, []);
 
-  const sendMessage = useCallback(
-    async (
-      sessionId: string,
-      message: string,
-      onChunk: (chunk: StreamChunk) => void,
-      isUserMessage: boolean = true,
-      context?: { url: string; content: string },
-      isRetry: boolean = false,
-    ) => {
-      console.log("[useChatStream] sendMessage called", {
-        sessionId,
-        message: message.substring(0, 50),
-        isUserMessage,
-        isRetry,
-        stack: new Error().stack,
-      });
+  useEffect(() => {
+    const unsubscribe = onStreamComplete(
+      function handleStreamComplete(completedSessionId) {
+        if (completedSessionId !== currentSessionIdRef.current) return;
 
-      const previousSessionId = currentSessionIdRef.current;
-      stopStreaming(sessionId, true);
-      currentSessionIdRef.current = sessionId;
-
-      const abortController = new AbortController();
-      abortControllerRef.current = abortController;
-      requestStartTimeRef.current = Date.now();
-      console.log("[useChatStream] Created new AbortController", {
-        sessionId,
-        previousSessionId,
-        requestStartTime: requestStartTimeRef.current,
-      });
-
-      if (abortController.signal.aborted) {
-        console.warn(
-          "[useChatStream] AbortController was aborted before request started",
-        );
-        requestStartTimeRef.current = null;
-        return Promise.reject(new Error("Request aborted"));
-      }
-
-      if (!isRetry) {
-        retryCountRef.current = 0;
-      }
-      setIsStreaming(true);
-      setError(null);
-
-      try {
-        const url = `/api/chat/sessions/${sessionId}/stream`;
-        const body = JSON.stringify({
-          message,
-          is_user_message: isUserMessage,
-          context: context || null,
-        });
-
-        const response = await fetch(url, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            Accept: "text/event-stream",
-          },
-          body,
-          signal: abortController.signal,
-        });
-
-        console.info("[useChatStream] Stream response", {
-          sessionId,
-          status: response.status,
-          ok: response.ok,
-          contentType: response.headers.get("content-type"),
-        });
-
-        if (!response.ok) {
-          const errorText = await response.text();
-          console.warn("[useChatStream] Stream response error", {
-            sessionId,
-            status: response.status,
-            errorText,
-          });
-          throw new Error(errorText || `HTTP ${response.status}`);
-        }
-
-        if (!response.body) {
-          console.warn("[useChatStream] Response body is null", { sessionId });
-          throw new Error("Response body is null");
-        }
-
-        const reader = response.body.getReader();
-        const decoder = new TextDecoder();
-        let buffer = "";
-        let receivedChunkCount = 0;
-        let firstChunkAt: number | null = null;
-        let loggedLineCount = 0;
-
-        return new Promise<void>((resolve, reject) => {
-          let didDispatchStreamEnd = false;
-
-          function dispatchStreamEnd() {
-            if (didDispatchStreamEnd) return;
-            didDispatchStreamEnd = true;
-            onChunk({ type: "stream_end" });
-          }
-
-          const cleanup = () => {
-            reader.cancel().catch(() => {
-              // Ignore cancel errors
-            });
-          };
-
-          async function readStream() {
-            try {
-              while (true) {
-                const { done, value } = await reader.read();
-
-                if (done) {
-                  cleanup();
-                  console.info("[useChatStream] Stream closed", {
-                    sessionId,
-                    receivedChunkCount,
-                    timeSinceStart: requestStartTimeRef.current
-                      ? Date.now() - requestStartTimeRef.current
-                      : null,
-                  });
-                  dispatchStreamEnd();
-                  retryCountRef.current = 0;
-                  stopStreaming();
-                  resolve();
-                  return;
-                }
-
-                buffer += decoder.decode(value, { stream: true });
-                const lines = buffer.split("\n");
-                buffer = lines.pop() || "";
-
-                for (const line of lines) {
-                  if (line.startsWith("data: ")) {
-                    const data = line.slice(6);
-                    if (loggedLineCount < 3) {
-                      console.info("[useChatStream] Raw stream line", {
-                        sessionId,
-                        data:
-                          data.length > 300 ? `${data.slice(0, 300)}...` : data,
-                      });
-                      loggedLineCount += 1;
-                    }
-                    if (data === "[DONE]") {
-                      cleanup();
-                      console.info("[useChatStream] Stream done marker", {
-                        sessionId,
-                        receivedChunkCount,
-                        timeSinceStart: requestStartTimeRef.current
-                          ? Date.now() - requestStartTimeRef.current
-                          : null,
-                      });
-                      dispatchStreamEnd();
-                      retryCountRef.current = 0;
-                      stopStreaming();
-                      resolve();
-                      return;
-                    }
-
-                    try {
-                      const rawChunk = JSON.parse(data) as
-                        | StreamChunk
-                        | VercelStreamChunk;
-                      const chunk = normalizeStreamChunk(rawChunk);
-                      if (!chunk) {
-                        continue;
-                      }
-
-                      if (!firstChunkAt) {
-                        firstChunkAt = Date.now();
-                        console.info("[useChatStream] First stream chunk", {
-                          sessionId,
-                          chunkType: chunk.type,
-                          timeSinceStart: requestStartTimeRef.current
-                            ? firstChunkAt - requestStartTimeRef.current
-                            : null,
-                        });
-                      }
-                      receivedChunkCount += 1;
-
-                      // Call the chunk handler
-                      onChunk(chunk);
-
-                      // Handle stream lifecycle
-                      if (chunk.type === "stream_end") {
-                        didDispatchStreamEnd = true;
-                        cleanup();
-                        console.info("[useChatStream] Stream end chunk", {
-                          sessionId,
-                          receivedChunkCount,
-                          timeSinceStart: requestStartTimeRef.current
-                            ? Date.now() - requestStartTimeRef.current
-                            : null,
-                        });
-                        retryCountRef.current = 0;
-                        stopStreaming();
-                        resolve();
-                        return;
-                      } else if (chunk.type === "error") {
-                        cleanup();
-                        reject(
-                          new Error(
-                            chunk.message || chunk.content || "Stream error",
-                          ),
-                        );
-                        return;
-                      }
-                    } catch (err) {
-                      // Skip invalid JSON lines
-                      console.warn("Failed to parse SSE chunk:", err, data);
-                    }
-                  }
-                }
-              }
-            } catch (err) {
-              if (err instanceof Error && err.name === "AbortError") {
-                cleanup();
-                dispatchStreamEnd();
-                stopStreaming();
-                resolve();
-                return;
-              }
-
-              const streamError =
-                err instanceof Error ? err : new Error("Failed to read stream");
-
-              if (retryCountRef.current < MAX_RETRIES) {
-                retryCountRef.current += 1;
-                const retryDelay =
-                  INITIAL_RETRY_DELAY * Math.pow(2, retryCountRef.current - 1);
-
-                toast.info("Connection interrupted", {
-                  description: `Retrying in ${retryDelay / 1000} seconds...`,
-                });
-
-                retryTimeoutRef.current = setTimeout(() => {
-                  sendMessage(
-                    sessionId,
-                    message,
-                    onChunk,
-                    isUserMessage,
-                    context,
-                    true,
-                  ).catch((_err) => {
-                    // Retry failed
-                  });
-                }, retryDelay);
-              } else {
-                setError(streamError);
-                toast.error("Connection Failed", {
-                  description:
-                    "Unable to connect to chat service. Please try again.",
-                });
-                cleanup();
-                dispatchStreamEnd();
-                retryCountRef.current = 0;
-                stopStreaming();
-                reject(streamError);
-              }
-            }
-          }
-
-          readStream();
-        });
-      } catch (err) {
-        if (err instanceof Error && err.name === "AbortError") {
-          setIsStreaming(false);
-          return Promise.resolve();
-        }
-        const streamError =
-          err instanceof Error ? err : new Error("Failed to start stream");
-        setError(streamError);
         setIsStreaming(false);
-        throw streamError;
+        const completed = getCompletedStream(completedSessionId);
+        if (completed?.error) {
+          setError(completed.error);
+        }
+        unregisterActiveSession(completedSessionId);
+      },
+    );
+
+    return unsubscribe;
+  }, []);
+
+  async function sendMessage(
+    sessionId: string,
+    message: string,
+    onChunk: (chunk: StreamChunk) => void,
+    isUserMessage: boolean = true,
+    context?: { url: string; content: string },
+  ) {
+    const previousSessionId = currentSessionIdRef.current;
+    if (previousSessionId && previousSessionId !== sessionId) {
+      stopStreaming(previousSessionId);
+    }
+
+    currentSessionIdRef.current = sessionId;
+    onChunkCallbackRef.current = onChunk;
+    setIsStreaming(true);
+    setError(null);
+
+    registerActiveSession(sessionId);
+
+    try {
+      await startStream(sessionId, message, isUserMessage, context, onChunk);
+
+      const status = getStreamStatus(sessionId);
+      if (status === "error") {
+        const completed = getCompletedStream(sessionId);
+        if (completed?.error) {
+          setError(completed.error);
+          toast.error("Connection Failed", {
+            description: "Unable to connect to chat service. Please try again.",
+          });
+          throw completed.error;
+        }
       }
-    },
-    [stopStreaming],
-  );
+    } catch (err) {
+      const streamError =
+        err instanceof Error ? err : new Error("Failed to start stream");
+      setError(streamError);
+      throw streamError;
+    } finally {
+      setIsStreaming(false);
+    }
+  }
 
   return {
     isStreaming,
diff --git a/autogpt_platform/frontend/src/providers/posthog/posthog-provider.tsx b/autogpt_platform/frontend/src/providers/posthog/posthog-provider.tsx
index 249d74596a..674f6c55eb 100644
--- a/autogpt_platform/frontend/src/providers/posthog/posthog-provider.tsx
+++ b/autogpt_platform/frontend/src/providers/posthog/posthog-provider.tsx
@@ -9,11 +9,12 @@ import { ReactNode, useEffect, useRef } from "react";
 
 export function PostHogProvider({ children }: { children: ReactNode }) {
   const isPostHogEnabled = environment.isPostHogEnabled();
+  const postHogCredentials = environment.getPostHogCredentials();
 
   useEffect(() => {
-    if (process.env.NEXT_PUBLIC_POSTHOG_KEY) {
-      posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY, {
-        api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST,
+    if (postHogCredentials.key) {
+      posthog.init(postHogCredentials.key, {
+        api_host: postHogCredentials.host,
         defaults: "2025-11-30",
         capture_pageview: false,
         capture_pageleave: true,
diff --git a/autogpt_platform/frontend/src/services/network-status/NetworkStatusMonitor.tsx b/autogpt_platform/frontend/src/services/network-status/NetworkStatusMonitor.tsx
new file mode 100644
index 0000000000..7552bbf78c
--- /dev/null
+++ b/autogpt_platform/frontend/src/services/network-status/NetworkStatusMonitor.tsx
@@ -0,0 +1,8 @@
+"use client";
+
+import { useNetworkStatus } from "./useNetworkStatus";
+
+export function NetworkStatusMonitor() {
+  useNetworkStatus();
+  return null;
+}
diff --git a/autogpt_platform/frontend/src/services/network-status/useNetworkStatus.ts b/autogpt_platform/frontend/src/services/network-status/useNetworkStatus.ts
new file mode 100644
index 0000000000..472a6e0e90
--- /dev/null
+++ b/autogpt_platform/frontend/src/services/network-status/useNetworkStatus.ts
@@ -0,0 +1,28 @@
+"use client";
+
+import { useEffect } from "react";
+import { toast } from "sonner";
+
+export function useNetworkStatus() {
+  useEffect(function monitorNetworkStatus() {
+    function handleOnline() {
+      toast.success("Connection restored", {
+        description: "You're back online",
+      });
+    }
+
+    function handleOffline() {
+      toast.error("You're offline", {
+        description: "Check your internet connection",
+      });
+    }
+
+    window.addEventListener("online", handleOnline);
+    window.addEventListener("offline", handleOffline);
+
+    return function cleanup() {
+      window.removeEventListener("online", handleOnline);
+      window.removeEventListener("offline", handleOffline);
+    };
+  }, []);
+}

From 2134d777bef69cc9d64a11beb843d3165dadadb7 Mon Sep 17 00:00:00 2001
From: Swifty <craigswift13@gmail.com>
Date: Tue, 27 Jan 2026 16:21:13 +0100
Subject: [PATCH 7/7] fix(backend): exclude disabled blocks from chat search
 and indexing (#11854)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Disabled blocks (e.g., webhook blocks without `platform_base_url`
configured) were being indexed and returned in chat tool search results.
This PR ensures they are properly filtered out.

### Changes 🏗️

- **find_block.py**: Skip disabled blocks when enriching search results
- **content_handlers.py**:
  - Skip disabled blocks during embedding indexing
- Update `get_stats()` to only count enabled blocks for accurate
coverage metrics

### Why

Blocks can be disabled for various reasons (missing OAuth config, no
platform URL for webhooks, etc.). These blocks shouldn't appear in
search results since users cannot use them.

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] Verified disabled blocks are filtered from search results
  - [x] Verified disabled blocks are not indexed
  - [x] Verified stats accurately reflect enabled block count
---
 .../api/features/chat/tools/find_block.py      |  3 ++-
 .../api/features/store/content_handlers.py     | 15 +++++++++++++--
 .../features/store/content_handlers_test.py    | 18 ++++++++++++++----
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
index a5e66f0a1c..7ca85961f9 100644
--- a/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
+++ b/autogpt_platform/backend/backend/api/features/chat/tools/find_block.py
@@ -107,7 +107,8 @@ class FindBlockTool(BaseTool):
                 block_id = result["content_id"]
                 block = get_block(block_id)
 
-                if block:
+                # Skip disabled blocks
+                if block and not block.disabled:
                     # Get input/output schemas
                     input_schema = {}
                     output_schema = {}
diff --git a/autogpt_platform/backend/backend/api/features/store/content_handlers.py b/autogpt_platform/backend/backend/api/features/store/content_handlers.py
index 1560db421c..cbbdcfbebf 100644
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers.py
@@ -188,6 +188,10 @@ class BlockHandler(ContentHandler):
             try:
                 block_instance = block_cls()
 
+                # Skip disabled blocks - they shouldn't be indexed
+                if block_instance.disabled:
+                    continue
+
                 # Build searchable text from block metadata
                 parts = []
                 if hasattr(block_instance, "name") and block_instance.name:
@@ -248,12 +252,19 @@ class BlockHandler(ContentHandler):
         from backend.data.block import get_blocks
 
         all_blocks = get_blocks()
-        total_blocks = len(all_blocks)
+
+        # Filter out disabled blocks - they're not indexed
+        enabled_block_ids = [
+            block_id
+            for block_id, block_cls in all_blocks.items()
+            if not block_cls().disabled
+        ]
+        total_blocks = len(enabled_block_ids)
 
         if total_blocks == 0:
             return {"total": 0, "with_embeddings": 0, "without_embeddings": 0}
 
-        block_ids = list(all_blocks.keys())
+        block_ids = enabled_block_ids
         placeholders = ",".join([f"${i+1}" for i in range(len(block_ids))])
 
         embedded_result = await query_raw_with_schema(
diff --git a/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py b/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
index 28bc88e270..fee879fae0 100644
--- a/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
+++ b/autogpt_platform/backend/backend/api/features/store/content_handlers_test.py
@@ -81,6 +81,7 @@ async def test_block_handler_get_missing_items(mocker):
     mock_block_instance.name = "Calculator Block"
     mock_block_instance.description = "Performs calculations"
     mock_block_instance.categories = [MagicMock(value="MATH")]
+    mock_block_instance.disabled = False
     mock_block_instance.input_schema.model_json_schema.return_value = {
         "properties": {"expression": {"description": "Math expression to evaluate"}}
     }
@@ -116,11 +117,18 @@ async def test_block_handler_get_stats(mocker):
     """Test BlockHandler returns correct stats."""
     handler = BlockHandler()
 
-    # Mock get_blocks
+    # Mock get_blocks - each block class returns an instance with disabled=False
+    def make_mock_block_class():
+        mock_class = MagicMock()
+        mock_instance = MagicMock()
+        mock_instance.disabled = False
+        mock_class.return_value = mock_instance
+        return mock_class
+
     mock_blocks = {
-        "block-1": MagicMock(),
-        "block-2": MagicMock(),
-        "block-3": MagicMock(),
+        "block-1": make_mock_block_class(),
+        "block-2": make_mock_block_class(),
+        "block-3": make_mock_block_class(),
     }
 
     # Mock embedded count query (2 blocks have embeddings)
@@ -309,6 +317,7 @@ async def test_block_handler_handles_missing_attributes():
     mock_block_class = MagicMock()
     mock_block_instance = MagicMock()
     mock_block_instance.name = "Minimal Block"
+    mock_block_instance.disabled = False
     # No description, categories, or schema
     del mock_block_instance.description
     del mock_block_instance.categories
@@ -342,6 +351,7 @@ async def test_block_handler_skips_failed_blocks():
     good_instance.name = "Good Block"
     good_instance.description = "Works fine"
     good_instance.categories = []
+    good_instance.disabled = False
     good_block.return_value = good_instance
 
     bad_block = MagicMock()