diff --git a/.claude/skills/write-frontend-tests/SKILL.md b/.claude/skills/write-frontend-tests/SKILL.md
index 177ce64a68..389de2023b 100644
--- a/.claude/skills/write-frontend-tests/SKILL.md
+++ b/.claude/skills/write-frontend-tests/SKILL.md
@@ -48,14 +48,15 @@ git diff "$BASE_BRANCH"...HEAD -- src/ | head -500
 For each changed file, determine:
 
 1. **Is it a page?** (`page.tsx`) — these are the primary test targets
-2. **Is it a hook?** (`use*.ts`) — test via the page that uses it
+2. **Is it a hook?** (`use*.ts`) — test via the page/component that uses it; avoid direct `renderHook()` tests unless it is a shared reusable hook with standalone business logic
 3. **Is it a component?** (`.tsx` in `components/`) — test via the parent page unless it's complex enough to warrant isolation
 4. **Is it a helper?** (`helpers.ts`, `utils.ts`) — unit test directly if pure logic
 
 **Priority order:**
+
 1. Pages with new/changed data fetching or user interactions
 2. Components with complex internal logic (modals, forms, wizards)
-3. Hooks with non-trivial business logic
+3. Shared hooks with standalone business logic when UI-level coverage is impractical
 4. Pure helper functions
 
 Skip: styling-only changes, type-only changes, config changes.
@@ -163,6 +164,7 @@ describe("LibraryPage", () => {
 - Use `waitFor` when asserting side effects or state changes after interactions
 - Import `fireEvent` or `userEvent` from the test-utils for interactions
 - Do NOT mock internal hooks or functions — mock at the API boundary via MSW
+- Prefer Orval-generated MSW handlers and response builders over hand-built API response objects
 - Do NOT use `act()` manually — `render` and `fireEvent` handle it
 - Keep tests focused: one behavior per test
 - Use descriptive test names that read like sentences
@@ -190,9 +192,7 @@ import { http, HttpResponse } from "msw";
 server.use(
   http.get("http://localhost:3000/api/proxy/api/v2/library/agents", () => {
     return HttpResponse.json({
-      agents: [
-        { id: "1", name: "Test Agent", description: "A test agent" },
-      ],
+      agents: [{ id: "1", name: "Test Agent", description: "A test agent" }],
       pagination: { total_items: 1, total_pages: 1, page: 1, page_size: 10 },
     });
   }),
@@ -211,6 +211,7 @@ pnpm test:unit --reporter=verbose
 ```
 
 If tests fail:
+
 1. Read the error output carefully
 2. Fix the test (not the source code, unless there is a genuine bug)
 3. Re-run until all pass
diff --git a/.github/workflows/platform-fullstack-ci.yml b/.github/workflows/platform-fullstack-ci.yml
index 5020f8aa2e..605c13c38b 100644
--- a/.github/workflows/platform-fullstack-ci.yml
+++ b/.github/workflows/platform-fullstack-ci.yml
@@ -160,6 +160,7 @@ jobs:
         run: |
           cp ../backend/.env.default ../backend/.env
           echo "OPENAI_INTERNAL_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> ../backend/.env
+          echo "SCHEDULER_STARTUP_EMBEDDING_BACKFILL=false" >> ../backend/.env
         env:
           # Used by E2E test data script to generate embeddings for approved store agents
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -288,6 +289,14 @@ jobs:
           cache: "pnpm"
           cache-dependency-path: autogpt_platform/frontend/pnpm-lock.yaml
 
+      - name: Set up tests - Cache Playwright browsers
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/ms-playwright
+          key: playwright-${{ runner.os }}-${{ hashFiles('autogpt_platform/frontend/pnpm-lock.yaml') }}
+          restore-keys: |
+            playwright-${{ runner.os }}-
+
       - name: Copy source maps from Docker for E2E coverage
         run: |
           FRONTEND_CONTAINER=$(docker compose -f ../docker-compose.resolved.yml ps -q frontend)
@@ -299,8 +308,8 @@ jobs:
       - name: Set up tests - Install browser 'chromium'
         run: pnpm playwright install --with-deps chromium
 
-      - name: Run Playwright tests
-        run: pnpm test:no-build
+      - name: Run Playwright E2E suite
+        run: pnpm test:e2e:no-build
         continue-on-error: false
 
       - name: Upload E2E coverage to Codecov
diff --git a/.gitignore b/.gitignore
index 2b209b957a..97d6b18a76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -194,3 +194,4 @@ test.db
 .next
 # Implementation plans (generated by AI agents)
 plans/
+.claude/worktrees/
diff --git a/autogpt_platform/backend/agents/calculator-agent.json b/autogpt_platform/backend/agents/calculator-agent.json
new file mode 100644
index 0000000000..9851b1496b
--- /dev/null
+++ b/autogpt_platform/backend/agents/calculator-agent.json
@@ -0,0 +1,166 @@
+{
+  "id": "858e2226-e047-4d19-a832-3be4a134d155",
+  "version": 2,
+  "is_active": true,
+  "name": "Calculator agent",
+  "description": "",
+  "instructions": null,
+  "recommended_schedule_cron": null,
+  "forked_from_id": null,
+  "forked_from_version": null,
+  "user_id": "",
+  "created_at": "2026-04-13T03:45:11.241Z",
+  "nodes": [
+    {
+      "id": "6762da5d-6915-4836-a431-6dcd7d36a54a",
+      "block_id": "c0a8e994-ebf1-4a9c-a4d8-89d09c86741b",
+      "input_default": {
+        "name": "Input",
+        "secret": false,
+        "advanced": false
+      },
+      "metadata": {
+        "position": {
+          "x": -188.2244873046875,
+          "y": 95
+        }
+      },
+      "input_links": [],
+      "output_links": [
+        {
+          "id": "432c7caa-49b9-4b70-bd21-2fa33a569601",
+          "source_id": "6762da5d-6915-4836-a431-6dcd7d36a54a",
+          "sink_id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+          "source_name": "result",
+          "sink_name": "a",
+          "is_static": true
+        }
+      ],
+      "graph_id": "858e2226-e047-4d19-a832-3be4a134d155",
+      "graph_version": 2,
+      "webhook_id": null
+    },
+    {
+      "id": "65429c9e-a0c6-4032-a421-6899c394fa74",
+      "block_id": "363ae599-353e-4804-937e-b2ee3cef3da4",
+      "input_default": {
+        "name": "Output",
+        "secret": false,
+        "advanced": false,
+        "escape_html": false
+      },
+      "metadata": {
+        "position": {
+          "x": 825.198974609375,
+          "y": 123.75
+        }
+      },
+      "input_links": [
+        {
+          "id": "8cdb2f33-5b10-4cc2-8839-f8ccb70083a3",
+          "source_id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+          "sink_id": "65429c9e-a0c6-4032-a421-6899c394fa74",
+          "source_name": "result",
+          "sink_name": "value",
+          "is_static": false
+        }
+      ],
+      "output_links": [],
+      "graph_id": "858e2226-e047-4d19-a832-3be4a134d155",
+      "graph_version": 2,
+      "webhook_id": null
+    },
+    {
+      "id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+      "block_id": "b1ab9b19-67a6-406d-abf5-2dba76d00c79",
+      "input_default": {
+        "b": 34,
+        "operation": "Add",
+        "round_result": false
+      },
+      "metadata": {
+        "position": {
+          "x": 323.0255126953125,
+          "y": 121.25
+        }
+      },
+      "input_links": [
+        {
+          "id": "432c7caa-49b9-4b70-bd21-2fa33a569601",
+          "source_id": "6762da5d-6915-4836-a431-6dcd7d36a54a",
+          "sink_id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+          "source_name": "result",
+          "sink_name": "a",
+          "is_static": true
+        }
+      ],
+      "output_links": [
+        {
+          "id": "8cdb2f33-5b10-4cc2-8839-f8ccb70083a3",
+          "source_id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+          "sink_id": "65429c9e-a0c6-4032-a421-6899c394fa74",
+          "source_name": "result",
+          "sink_name": "value",
+          "is_static": false
+        }
+      ],
+      "graph_id": "858e2226-e047-4d19-a832-3be4a134d155",
+      "graph_version": 2,
+      "webhook_id": null
+    }
+  ],
+  "links": [
+    {
+      "id": "8cdb2f33-5b10-4cc2-8839-f8ccb70083a3",
+      "source_id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+      "sink_id": "65429c9e-a0c6-4032-a421-6899c394fa74",
+      "source_name": "result",
+      "sink_name": "value",
+      "is_static": false
+    },
+    {
+      "id": "432c7caa-49b9-4b70-bd21-2fa33a569601",
+      "source_id": "6762da5d-6915-4836-a431-6dcd7d36a54a",
+      "sink_id": "bf4a15ff-b0c4-4032-a21b-5880224af690",
+      "source_name": "result",
+      "sink_name": "a",
+      "is_static": true
+    }
+  ],
+  "sub_graphs": [],
+  "input_schema": {
+    "type": "object",
+    "properties": {
+      "Input": {
+        "advanced": false,
+        "secret": false,
+        "title": "Input"
+      }
+    },
+    "required": [
+      "Input"
+    ]
+  },
+  "output_schema": {
+    "type": "object",
+    "properties": {
+      "Output": {
+        "advanced": false,
+        "secret": false,
+        "title": "Output"
+      }
+    },
+    "required": [
+      "Output"
+    ]
+  },
+  "has_external_trigger": false,
+  "has_human_in_the_loop": false,
+  "has_sensitive_action": false,
+  "trigger_setup_info": null,
+  "credentials_input_schema": {
+    "type": "object",
+    "properties": {},
+    "required": []
+  }
+}
\ No newline at end of file
diff --git a/autogpt_platform/backend/backend/blocks/_base.py b/autogpt_platform/backend/backend/blocks/_base.py
index 56986d15c4..2a26421c91 100644
--- a/autogpt_platform/backend/backend/blocks/_base.py
+++ b/autogpt_platform/backend/backend/blocks/_base.py
@@ -25,6 +25,7 @@ from backend.data.model import (
     Credentials,
     CredentialsFieldInfo,
     CredentialsMetaInput,
+    NodeExecutionStats,
     SchemaField,
     is_credentials_field_name,
 )
@@ -43,7 +44,7 @@ logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
     from backend.data.execution import ExecutionContext
-    from backend.data.model import ContributorDetails, NodeExecutionStats
+    from backend.data.model import ContributorDetails
 
     from ..data.graph import Link
 
@@ -420,6 +421,19 @@ class BlockWebhookConfig(BlockManualWebhookConfig):
 class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
     _optimized_description: ClassVar[str | None] = None
 
+    def extra_runtime_cost(self, execution_stats: NodeExecutionStats) -> int:
+        """Return extra runtime cost to charge after this block run completes.
+
+        Called by the executor after a block finishes with COMPLETED status.
+        The return value is the number of additional base-cost credits to
+        charge beyond the single credit already collected by charge_usage
+        at the start of execution. Defaults to 0 (no extra charges).
+
+        Override in blocks (e.g. OrchestratorBlock) that make multiple LLM
+        calls within one run and should be billed per call.
+        """
+        return 0
+
     def __init__(
         self,
         id: str = "",
@@ -455,8 +469,6 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
             disabled: If the block is disabled, it will not be available for execution.
             static_output: Whether the output links of the block are static by default.
         """
-        from backend.data.model import NodeExecutionStats
-
         self.id = id
         self.input_schema = input_schema
         self.output_schema = output_schema
@@ -474,7 +486,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
         self.is_sensitive_action = is_sensitive_action
         # Read from ClassVar set by initialize_blocks()
         self.optimized_description: str | None = type(self)._optimized_description
-        self.execution_stats: "NodeExecutionStats" = NodeExecutionStats()
+        self.execution_stats: NodeExecutionStats = NodeExecutionStats()
 
         if self.webhook_config:
             if isinstance(self.webhook_config, BlockWebhookConfig):
@@ -554,7 +566,7 @@ class Block(ABC, Generic[BlockSchemaInputType, BlockSchemaOutputType]):
                 return data
         raise ValueError(f"{self.name} did not produce any output for {output}")
 
-    def merge_stats(self, stats: "NodeExecutionStats") -> "NodeExecutionStats":
+    def merge_stats(self, stats: NodeExecutionStats) -> NodeExecutionStats:
         self.execution_stats += stats
         return self.execution_stats
 
diff --git a/autogpt_platform/backend/backend/blocks/orchestrator.py b/autogpt_platform/backend/backend/blocks/orchestrator.py
index 6fbff643fb..b2a6df8481 100644
--- a/autogpt_platform/backend/backend/blocks/orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/orchestrator.py
@@ -36,6 +36,7 @@ from backend.data.execution import ExecutionContext
 from backend.data.model import NodeExecutionStats, SchemaField
 from backend.util import json
 from backend.util.clients import get_database_manager_async_client
+from backend.util.exceptions import InsufficientBalanceError
 from backend.util.prompt import MAIN_OBJECTIVE_PREFIX
 from backend.util.security import SENSITIVE_FIELD_NAMES
 from backend.util.tool_call_loop import (
@@ -364,10 +365,31 @@ def _disambiguate_tool_names(tools: list[dict[str, Any]]) -> None:
 
 
 class OrchestratorBlock(Block):
+    """A block that uses a language model to orchestrate tool calls.
+
+    Supports both single-shot and iterative agent mode execution.
+
+    **InsufficientBalanceError propagation contract**: ``InsufficientBalanceError``
+    (IBE) must always re-raise through every ``except`` block in this class.
+    Swallowing IBE would let the agent loop continue with unpaid work. Every
+    exception handler that catches ``Exception`` includes an explicit IBE
+    re-raise carve-out for this reason.
     """
-    A block that uses a language model to orchestrate tool calls, supporting both
-    single-shot and iterative agent mode execution.
-    """
+
+    def extra_runtime_cost(self, execution_stats: NodeExecutionStats) -> int:
+        """Charge one extra runtime cost per LLM call beyond the first.
+
+        In agent mode each iteration makes one LLM call. The first is already
+        covered by charge_usage(); this returns the number of additional
+        credits so the executor can bill the remaining calls post-completion.
+
+        SDK-mode exemption: when the block runs via _execute_tools_sdk_mode,
+        the SDK manages its own conversation loop and only exposes aggregate
+        usage. We hardcode llm_call_count=1 there (the SDK does not report a
+        per-turn call count), so this method always returns 0 for SDK-mode
+        executions. Per-iteration billing does not apply to SDK mode.
+        """
+        return max(0, execution_stats.llm_call_count - 1)
 
     # MCP server name used by the Claude Code SDK execution mode.  Keep in sync
     # with _create_graph_mcp_server and the MCP_PREFIX derivation in _execute_tools_sdk_mode.
@@ -1077,7 +1099,10 @@ class OrchestratorBlock(Block):
                 input_data=input_value,
             )
 
-        assert node_exec_result is not None, "node_exec_result should not be None"
+        if node_exec_result is None:
+            raise RuntimeError(
+                f"upsert_execution_input returned None for node {sink_node_id}"
+            )
 
         # Create NodeExecutionEntry for execution manager
         node_exec_entry = NodeExecutionEntry(
@@ -1112,15 +1137,86 @@ class OrchestratorBlock(Block):
                 task=node_exec_future,
             )
 
-            # Execute the node directly since we're in the Orchestrator context
-            node_exec_future.set_result(
-                await execution_processor.on_node_execution(
+            # Execute the node directly since we're in the Orchestrator context.
+            # Wrap in try/except so the future is always resolved, even on
+            # error — an unresolved Future would block anything awaiting it.
+            #
+            # on_node_execution is decorated with @async_error_logged(swallow=True),
+            # which catches BaseException and returns None rather than raising.
+            # Treat a None return as a failure: set_exception so the future
+            # carries an error state rather than a None result, and return an
+            # error response so the LLM knows the tool failed.
+            try:
+                tool_node_stats = await execution_processor.on_node_execution(
                     node_exec=node_exec_entry,
                     node_exec_progress=node_exec_progress,
                     nodes_input_masks=None,
                     graph_stats_pair=graph_stats_pair,
                 )
-            )
+                if tool_node_stats is None:
+                    nil_err = RuntimeError(
+                        f"on_node_execution returned None for node {sink_node_id} "
+                        "(error was swallowed by @async_error_logged)"
+                    )
+                    node_exec_future.set_exception(nil_err)
+                    resp = _create_tool_response(
+                        tool_call.id,
+                        "Tool execution returned no result",
+                        responses_api=responses_api,
+                    )
+                    resp["_is_error"] = True
+                    return resp
+                node_exec_future.set_result(tool_node_stats)
+            except Exception as exec_err:
+                node_exec_future.set_exception(exec_err)
+                raise
+
+            # Charge user credits AFTER successful tool execution. Tools
+            # spawned by the orchestrator bypass the main execution queue
+            # (where _charge_usage is called), so we must charge here to
+            # avoid free tool execution. Charging post-completion (vs.
+            # pre-execution) avoids billing users for failed tool calls.
+            # Skipped for dry runs.
+            #
+            # `error is None` intentionally excludes both Exception and
+            # BaseException subclasses (e.g. CancelledError) so cancelled
+            # or terminated tool runs are not billed.
+            #
+            # Billing errors (including non-balance exceptions) are kept
+            # in a separate try/except so they are never silently swallowed
+            # by the generic tool-error handler below.
+            if (
+                not execution_params.execution_context.dry_run
+                and tool_node_stats.error is None
+            ):
+                try:
+                    tool_cost, _ = await execution_processor.charge_node_usage(
+                        node_exec_entry,
+                    )
+                except InsufficientBalanceError:
+                    # IBE must propagate — see OrchestratorBlock class docstring.
+                    # Log the billing failure here so the discarded tool result
+                    # is traceable before the loop aborts.
+                    logger.warning(
+                        "Insufficient balance charging for tool node %s after "
+                        "successful execution; agent loop will be aborted",
+                        sink_node_id,
+                    )
+                    raise
+                except Exception:
+                    # Non-billing charge failures (DB outage, network, etc.)
+                    # must NOT propagate to the outer except handler because
+                    # the tool itself succeeded. Re-raising would mark the
+                    # tool as failed (_is_error=True), causing the LLM to
+                    # retry side-effectful operations. Log and continue.
+                    logger.exception(
+                        "Unexpected error charging for tool node %s; "
+                        "tool execution was successful",
+                        sink_node_id,
+                    )
+                    tool_cost = 0
+                if tool_cost > 0:
+                    self.merge_stats(NodeExecutionStats(extra_cost=tool_cost))
 
             # Get outputs from database after execution completes using database manager client
             node_outputs = await db_client.get_execution_outputs_by_node_exec_id(
@@ -1133,18 +1229,26 @@ class OrchestratorBlock(Block):
                 if node_outputs
                 else "Tool executed successfully"
             )
-            return _create_tool_response(
+            resp = _create_tool_response(
                 tool_call.id, tool_response_content, responses_api=responses_api
             )
+            resp["_is_error"] = False
+            return resp
 
+        except InsufficientBalanceError:
+            # IBE must propagate — see class docstring.
+            raise
         except Exception as e:
-            logger.warning("Tool execution with manager failed: %s", e)
-            # Return error response
-            return _create_tool_response(
+            logger.warning("Tool execution with manager failed: %s", e, exc_info=True)
+            # Return a generic error to the LLM — internal exception messages
+            # may contain server paths, DB details, or infrastructure info.
+            resp = _create_tool_response(
                 tool_call.id,
-                f"Tool execution failed: {e}",
+                "Tool execution failed due to an internal error",
                 responses_api=responses_api,
             )
+            resp["_is_error"] = True
+            return resp
 
     async def _agent_mode_llm_caller(
         self,
@@ -1244,13 +1348,16 @@ class OrchestratorBlock(Block):
                 content = str(raw_content)
             else:
                 content = "Tool executed successfully"
-            tool_failed = content.startswith("Tool execution failed:")
+            tool_failed = result.get("_is_error", True)
             return ToolCallResult(
                 tool_call_id=tool_call.id,
                 tool_name=tool_call.name,
                 content=content,
                 is_error=tool_failed,
             )
+        except InsufficientBalanceError:
+            # IBE must propagate — see class docstring.
+            raise
         except Exception as e:
             logger.error("Tool execution failed: %s", e)
             return ToolCallResult(
@@ -1370,9 +1477,13 @@ class OrchestratorBlock(Block):
                             "arguments": tc.arguments,
                         },
                     )
+        except InsufficientBalanceError:
+            # IBE must propagate — see class docstring.
+            raise
         except Exception as e:
-            # Catch all errors (validation, network, API) so that the block
-            # surfaces them as user-visible output instead of crashing.
+            # Catch all OTHER errors (validation, network, API) so that
+            # the block surfaces them as user-visible output instead of
+            # crashing.
             yield "error", str(e)
             return
 
@@ -1450,11 +1561,14 @@ class OrchestratorBlock(Block):
                             text = content
                         else:
                             text = json.dumps(content)
-                        tool_failed = text.startswith("Tool execution failed:")
+                        tool_failed = result.get("_is_error", True)
                         return {
                             "content": [{"type": "text", "text": text}],
                             "isError": tool_failed,
                         }
+                    except InsufficientBalanceError:
+                        # IBE must propagate — see class docstring.
+                        raise
                     except Exception as e:
                         logger.error("SDK tool execution failed: %s", e)
                         return {
@@ -1733,11 +1847,15 @@ class OrchestratorBlock(Block):
                             await pending_task
                         except (asyncio.CancelledError, StopAsyncIteration):
                             pass
+        except InsufficientBalanceError:
+            # IBE must propagate — see class docstring. The `finally`
+            # block below still runs and records partial token usage.
+            raise
         except Exception as e:
-            # Surface SDK errors as user-visible output instead of crashing,
-            # consistent with _execute_tools_agent_mode error handling.
-            # Don't return yet — fall through to merge_stats below so
-            # partial token usage is always recorded.
+            # Surface OTHER SDK errors as user-visible output instead
+            # of crashing, consistent with _execute_tools_agent_mode
+            # error handling. Don't return yet — fall through to
+            # merge_stats below so partial token usage is always recorded.
             sdk_error = e
         finally:
             # Always record usage stats, even on error.  The SDK may have
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
index 55f137428f..2eb27012dc 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator.py
@@ -922,6 +922,11 @@ async def test_orchestrator_agent_mode():
         mock_execution_processor.on_node_execution = AsyncMock(
             return_value=mock_node_stats
         )
+        # Mock charge_node_usage (called after successful tool execution).
+        # Returns (cost, remaining_balance). Must be AsyncMock because it is
+        # an async method and is directly awaited in _execute_single_tool_with_manager.
+        # Use a non-zero cost so the merge_stats branch is exercised.
+        mock_execution_processor.charge_node_usage = AsyncMock(return_value=(10, 990))
 
         # Mock the get_execution_outputs_by_node_exec_id method
         mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {
@@ -967,6 +972,11 @@ async def test_orchestrator_agent_mode():
         # Verify tool was executed via execution processor
         assert mock_execution_processor.on_node_execution.call_count == 1
 
+        # Verify charge_node_usage was actually called for the successful
+        # tool execution — this guards against regressions where the
+        # post-execution tool charging is accidentally removed.
+        assert mock_execution_processor.charge_node_usage.call_count == 1
+
 
 @pytest.mark.asyncio
 async def test_orchestrator_traditional_mode_default():
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
index 1069fc8ad5..f2242ea527 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_dynamic_fields.py
@@ -641,6 +641,14 @@ async def test_validation_errors_dont_pollute_conversation():
                 mock_execution_processor.on_node_execution.return_value = (
                     mock_node_stats
                 )
+                # Mock charge_node_usage (called after successful tool execution).
+                # Must be AsyncMock because it is async and is awaited in
+                # _execute_single_tool_with_manager — a plain MagicMock would
+                # return a non-awaitable tuple and TypeError out, then be
+                # silently swallowed by the orchestrator's catch-all.
+                mock_execution_processor.charge_node_usage = AsyncMock(
+                    return_value=(0, 0)
+                )
 
                 async for output_name, output_value in block.run(
                     input_data,
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
new file mode 100644
index 0000000000..441bc08a42
--- /dev/null
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_per_iteration_cost.py
@@ -0,0 +1,1020 @@
+"""Tests for OrchestratorBlock per-iteration cost charging.
+
+The OrchestratorBlock in agent mode makes multiple LLM calls in a single
+node execution. The executor uses ``Block.extra_runtime_cost`` to detect
+this and charge ``base_cost * (llm_call_count - 1)`` extra credits after
+the block completes.
+"""
+
+import threading
+from collections import defaultdict
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from backend.blocks._base import Block
+from backend.blocks.orchestrator import ExecutionParams, OrchestratorBlock
+from backend.data.execution import ExecutionContext, ExecutionStatus
+from backend.data.model import NodeExecutionStats
+from backend.executor import billing, manager
+from backend.util.exceptions import InsufficientBalanceError
+
+# ── extra_runtime_cost hook ────────────────────────────────────────
+
+
+class _NoOpBlock(Block):
+    """Minimal concrete Block subclass that does not override extra_runtime_cost."""
+
+    def __init__(self):
+        super().__init__(
+            id="00000000-0000-0000-0000-000000000001", description="No-op test block"
+        )
+
+    def run(self, input_data, **kwargs):  # type: ignore[override]
+        yield "out", {}
+
+
+class TestExtraRuntimeCost:
+    """OrchestratorBlock opts into per-LLM-call billing via extra_runtime_cost."""
+
+    def test_orchestrator_returns_nonzero_for_multiple_calls(self):
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=3)
+        assert block.extra_runtime_cost(stats) == 2
+
+    def test_orchestrator_returns_zero_for_single_call(self):
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=1)
+        assert block.extra_runtime_cost(stats) == 0
+
+    def test_orchestrator_returns_zero_for_zero_calls(self):
+        block = OrchestratorBlock()
+        stats = NodeExecutionStats(llm_call_count=0)
+        assert block.extra_runtime_cost(stats) == 0
+
+    def test_default_block_returns_zero(self):
+        """A block that does not override extra_runtime_cost returns 0."""
+        block = _NoOpBlock()
+        stats = NodeExecutionStats(llm_call_count=10)
+        assert block.extra_runtime_cost(stats) == 0
+
+
+# ── charge_extra_runtime_cost math ───────────────────────────────────
+
+
+@pytest.fixture()
+def fake_node_exec():
+    node_exec = MagicMock()
+    node_exec.user_id = "u"
+    node_exec.graph_exec_id = "g"
+    node_exec.graph_id = "g"
+    node_exec.node_exec_id = "ne"
+    node_exec.node_id = "n"
+    node_exec.block_id = "b"
+    node_exec.inputs = {}
+    return node_exec
+
+
+@pytest.fixture()
+def patched_processor(monkeypatch):
+    """ExecutionProcessor with stubbed db client / block lookup helpers.
+
+    Returns the processor and a list of credit amounts spent so tests can
+    assert on what was charged.
+
+    Note: ``ExecutionProcessor.__new__()`` bypasses ``__init__`` — if
+    ``__init__`` gains required state in the future this fixture will need
+    updating.
+    """
+    spent: list[int] = []
+
+    class FakeDb:
+        def spend_credits(self, *, user_id, cost, metadata):
+            spent.append(cost)
+            return 1000  # remaining balance
+
+    fake_block = MagicMock()
+    fake_block.name = "FakeBlock"
+
+    monkeypatch.setattr(billing, "get_db_client", lambda: FakeDb())
+    monkeypatch.setattr(billing, "get_block", lambda block_id: fake_block)
+    monkeypatch.setattr(
+        billing,
+        "block_usage_cost",
+        lambda block, input_data, **_kw: (10, {"model": "claude-sonnet-4-6"}),
+    )
+
+    proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+    return proc, spent
+
+
+class TestChargeExtraRuntimeCost:
+    @pytest.mark.asyncio
+    async def test_zero_extra_iterations_charges_nothing(
+        self, patched_processor, fake_node_exec
+    ):
+        proc, spent = patched_processor
+        cost, balance = await proc.charge_extra_runtime_cost(
+            fake_node_exec, extra_count=0
+        )
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
+
+    @pytest.mark.asyncio
+    async def test_extra_iterations_multiplies_base_cost(
+        self, patched_processor, fake_node_exec
+    ):
+        proc, spent = patched_processor
+        cost, balance = await proc.charge_extra_runtime_cost(
+            fake_node_exec, extra_count=4
+        )
+        assert cost == 40  # 4 × 10
+        assert balance == 1000
+        assert spent == [40]
+
+    @pytest.mark.asyncio
+    async def test_negative_extra_iterations_charges_nothing(
+        self, patched_processor, fake_node_exec
+    ):
+        proc, spent = patched_processor
+        cost, balance = await proc.charge_extra_runtime_cost(
+            fake_node_exec, extra_count=-1
+        )
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
+
+    @pytest.mark.asyncio
+    async def test_capped_at_max(self, monkeypatch, fake_node_exec):
+        """Runaway llm_call_count is capped at _MAX_EXTRA_RUNTIME_COST."""
+
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 1000
+
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(billing, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(billing, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            billing,
+            "block_usage_cost",
+            lambda block, input_data, **_kw: (10, {}),
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cap = billing._MAX_EXTRA_RUNTIME_COST
+        cost, _ = await proc.charge_extra_runtime_cost(
+            fake_node_exec, extra_count=cap * 100
+        )
+        # Charged at most cap × 10
+        assert cost == cap * 10
+        assert spent == [cap * 10]
+
+    @pytest.mark.asyncio
+    async def test_zero_base_cost_skips_charge(self, monkeypatch, fake_node_exec):
+
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 0
+
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(billing, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(billing, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            billing, "block_usage_cost", lambda block, input_data, **_kw: (0, {})
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_extra_runtime_cost(
+            fake_node_exec, extra_count=4
+        )
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
+
+    @pytest.mark.asyncio
+    async def test_block_not_found_skips_charge(self, monkeypatch, fake_node_exec):
+
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 0
+
+        monkeypatch.setattr(billing, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(billing, "get_block", lambda block_id: None)
+        monkeypatch.setattr(
+            billing, "block_usage_cost", lambda block, input_data, **_kw: (10, {})
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_extra_runtime_cost(
+            fake_node_exec, extra_count=3
+        )
+        assert cost == 0
+        assert balance == 0
+        assert spent == []
+
+    @pytest.mark.asyncio
+    async def test_propagates_insufficient_balance_error(
+        self, monkeypatch, fake_node_exec
+    ):
+        """Out-of-credits errors must propagate, not be silently swallowed."""
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                raise InsufficientBalanceError(
+                    user_id=user_id,
+                    message="Insufficient balance",
+                    balance=0,
+                    amount=cost,
+                )
+
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(billing, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(billing, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            billing, "block_usage_cost", lambda block, input_data, **_kw: (10, {})
+        )
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        with pytest.raises(InsufficientBalanceError):
+            await proc.charge_extra_runtime_cost(fake_node_exec, extra_count=4)
+
+
+# ── charge_node_usage ──────────────────────────────────────────────
+
+
+class TestChargeNodeUsage:
+    """charge_node_usage delegates to billing.charge_usage with execution_count=0."""
+
+    @pytest.mark.asyncio
+    async def test_delegates_with_zero_execution_count(
+        self, monkeypatch, fake_node_exec
+    ):
+        """Nested tool charges should NOT inflate the per-execution counter."""
+
+        captured: dict = {}
+
+        def fake_charge_usage(node_exec, execution_count):
+            captured["execution_count"] = execution_count
+            captured["node_exec"] = node_exec
+            return (5, 100)
+
+        def fake_handle_low_balance(
+            db_client, user_id, current_balance, transaction_cost
+        ):
+            pass
+
+        monkeypatch.setattr(billing, "charge_usage", fake_charge_usage)
+        monkeypatch.setattr(billing, "handle_low_balance", fake_handle_low_balance)
+        monkeypatch.setattr(billing, "get_db_client", lambda: MagicMock())
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_node_usage(fake_node_exec)
+        assert cost == 5
+        assert balance == 100
+        assert captured["execution_count"] == 0
+
+    @pytest.mark.asyncio
+    async def test_calls_handle_low_balance_when_cost_nonzero(
+        self, monkeypatch, fake_node_exec
+    ):
+        """charge_node_usage should call handle_low_balance when total_cost > 0."""
+
+        low_balance_calls: list[dict] = []
+
+        def fake_charge_usage(node_exec, execution_count):
+            return (10, 50)
+
+        def fake_handle_low_balance(
+            db_client, user_id, current_balance, transaction_cost
+        ):
+            low_balance_calls.append(
+                {
+                    "user_id": user_id,
+                    "current_balance": current_balance,
+                    "transaction_cost": transaction_cost,
+                }
+            )
+
+        monkeypatch.setattr(billing, "charge_usage", fake_charge_usage)
+        monkeypatch.setattr(billing, "handle_low_balance", fake_handle_low_balance)
+        monkeypatch.setattr(billing, "get_db_client", lambda: MagicMock())
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_node_usage(fake_node_exec)
+        assert cost == 10
+        assert balance == 50
+        assert len(low_balance_calls) == 1
+        assert low_balance_calls[0]["user_id"] == "u"
+        assert low_balance_calls[0]["current_balance"] == 50
+        assert low_balance_calls[0]["transaction_cost"] == 10
+
+    @pytest.mark.asyncio
+    async def test_skips_handle_low_balance_when_cost_zero(
+        self, monkeypatch, fake_node_exec
+    ):
+        """charge_node_usage should NOT call handle_low_balance when cost is 0."""
+
+        low_balance_calls: list = []
+
+        def fake_charge_usage(node_exec, execution_count):
+            return (0, 200)
+
+        def fake_handle_low_balance(
+            db_client, user_id, current_balance, transaction_cost
+        ):
+            low_balance_calls.append(True)
+
+        monkeypatch.setattr(billing, "charge_usage", fake_charge_usage)
+        monkeypatch.setattr(billing, "handle_low_balance", fake_handle_low_balance)
+        monkeypatch.setattr(billing, "get_db_client", lambda: MagicMock())
+
+        proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+        cost, balance = await proc.charge_node_usage(fake_node_exec)
+        assert cost == 0
+        assert low_balance_calls == []
+
+
+# ── on_node_execution charging gate ────────────────────────────────
+
+
+class _FakeNode:
+    """Minimal stand-in for a ``Node`` object with a block attribute."""
+
+    def __init__(self, extra_charges: int = 0, block_name: str = "FakeBlock"):
+        self.block = MagicMock()
+        self.block.name = block_name
+        self.block.extra_runtime_cost = MagicMock(return_value=extra_charges)
+
+
+class _FakeExecContext:
+    def __init__(self, dry_run: bool = False):
+        self.dry_run = dry_run
+
+
+def _make_node_exec(dry_run: bool = False) -> MagicMock:
+    """Build a NodeExecutionEntry-like mock for on_node_execution tests."""
+    ne = MagicMock()
+    ne.user_id = "u"
+    ne.graph_id = "g"
+    ne.graph_exec_id = "ge"
+    ne.node_id = "n"
+    ne.node_exec_id = "ne"
+    ne.block_id = "b"
+    ne.inputs = {}
+    ne.execution_context = _FakeExecContext(dry_run=dry_run)
+    return ne
+
+
+@pytest.fixture()
+def gated_processor(monkeypatch):
+    """ExecutionProcessor with on_node_execution's downstream calls stubbed.
+
+    Lets tests flip the gate conditions (status, extra_runtime_cost result,
+    llm_call_count, dry_run) and observe whether charge_extra_runtime_cost
+    was called.
+    """
+
+    calls: dict[str, list] = {
+        "charge_extra_runtime_cost": [],
+        "handle_low_balance": [],
+        "handle_insufficient_funds_notif": [],
+    }
+
+    # Stub node lookup + DB client so the wrapper doesn't touch real infra.
+    fake_db = MagicMock()
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=2))
+    monkeypatch.setattr(manager, "get_db_async_client", lambda: fake_db)
+    monkeypatch.setattr(billing, "get_db_client", lambda: fake_db)
+    # get_block is called by LogMetadata construction in on_node_execution.
+    monkeypatch.setattr(
+        manager,
+        "get_block",
+        lambda block_id: MagicMock(name="FakeBlock"),
+    )
+    # Persistence + cost logging are not under test here.
+    monkeypatch.setattr(
+        manager,
+        "async_update_node_execution_status",
+        AsyncMock(return_value=None),
+    )
+    monkeypatch.setattr(
+        manager,
+        "async_update_graph_execution_state",
+        AsyncMock(return_value=None),
+    )
+    monkeypatch.setattr(
+        manager,
+        "log_system_credential_cost",
+        AsyncMock(return_value=None),
+    )
+
+    proc = manager.ExecutionProcessor.__new__(manager.ExecutionProcessor)
+
+    # Control the status returned by the inner execution call.
+    inner_result = {"status": ExecutionStatus.COMPLETED, "llm_call_count": 3}
+
+    async def fake_inner(
+        self,
+        *,
+        node,
+        node_exec,
+        node_exec_progress,
+        stats,
+        db_client,
+        log_metadata,
+        nodes_input_masks=None,
+        nodes_to_skip=None,
+    ):
+        stats.llm_call_count = inner_result["llm_call_count"]
+        return MagicMock(wall_time=0.1, cpu_time=0.1), inner_result["status"]
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "_on_node_execution",
+        fake_inner,
+    )
+
+    async def fake_charge_extra(node_exec, extra_count):
+        calls["charge_extra_runtime_cost"].append(extra_count)
+        return (extra_count * 10, 500)
+
+    monkeypatch.setattr(billing, "charge_extra_runtime_cost", fake_charge_extra)
+
+    def fake_low_balance(db_client, user_id, current_balance, transaction_cost):
+        calls["handle_low_balance"].append(
+            {
+                "user_id": user_id,
+                "current_balance": current_balance,
+                "transaction_cost": transaction_cost,
+            }
+        )
+
+    monkeypatch.setattr(billing, "handle_low_balance", fake_low_balance)
+
+    def fake_notif(db_client, user_id, graph_id, e):
+        calls["handle_insufficient_funds_notif"].append(
+            {"user_id": user_id, "graph_id": graph_id, "error": e}
+        )
+
+    monkeypatch.setattr(billing, "handle_insufficient_funds_notif", fake_notif)
+
+    return proc, calls, inner_result, fake_db, NodeExecutionStats
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_charges_extra_iterations_when_gate_passes(
+    gated_processor,
+):
+    """COMPLETED + extra_runtime_cost > 0 + not dry_run → charged."""
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 3  # → extra_charges = 2
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=2))
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_runtime_cost"] == [2]
+    # handle_low_balance must be called with the remaining balance returned by
+    # charge_extra_runtime_cost (500) so users are alerted when balance drops low.
+    assert len(calls["handle_low_balance"]) == 1
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_status_not_completed(gated_processor):
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.FAILED
+    inner["llm_call_count"] = 5
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=4))
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_runtime_cost"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_extra_charges_zero(gated_processor):
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 5
+    # Block returns 0 extra charges (base class default)
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=0))
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_runtime_cost"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_skips_when_dry_run(gated_processor):
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 5
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=4))
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=True),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    assert calls["charge_extra_runtime_cost"] == []
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_insufficient_balance_records_error_and_notifies(
+    monkeypatch,
+    gated_processor,
+):
+    """When extra-iteration charging fails with InsufficientBalanceError:
+
+    - the run still reports COMPLETED (the work is already done)
+    - execution_stats.error is NOT set (would flip node_error_count and
+      leak balance amounts into persisted node_stats — see manager.py
+      comment in the IBE handler)
+    - _handle_insufficient_funds_notif is called so the user is notified
+    - the structured ERROR log is the alerting hook
+    """
+
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 4
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=3))
+
+    async def raise_ibe(node_exec, extra_count):
+        raise InsufficientBalanceError(
+            user_id=node_exec.user_id,
+            message="Insufficient balance",
+            balance=0,
+            amount=extra_count * 10,
+        )
+
+    monkeypatch.setattr(billing, "charge_extra_runtime_cost", raise_ibe)
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    result_stats = await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    # error stays None — node ran to completion, only the post-hoc
+    # charge failed. Setting .error would (a) flip node_error_count++
+    # creating an "errored COMPLETED node" inconsistency, and (b) leak
+    # balance amounts into persisted node_stats.
+    assert result_stats.error is None
+    # User notification fired.
+    assert len(calls["handle_insufficient_funds_notif"]) == 1
+    assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u"
+
+
+# ── Orchestrator _execute_single_tool_with_manager charging gates ──
+
+
+async def _run_tool_exec_with_stats(
+    *,
+    dry_run: bool,
+    tool_stats_error,
+    charge_node_usage_mock=None,
+):
+    """Invoke _execute_single_tool_with_manager against fully mocked deps
+    and return (charge_call_count, merge_stats_calls).
+
+    Used to prove the dry_run and error guards around charge_node_usage
+    behave as documented, and that InsufficientBalanceError propagates.
+    """
+    block = OrchestratorBlock()
+
+    # Mocked async DB client used inside orchestrator.
+    mock_db_client = AsyncMock()
+    mock_target_node = MagicMock()
+    mock_target_node.block_id = "test-block-id"
+    mock_target_node.input_default = {}
+    mock_db_client.get_node.return_value = mock_target_node
+    mock_node_exec_result = MagicMock()
+    mock_node_exec_result.node_exec_id = "test-tool-exec-id"
+    mock_db_client.upsert_execution_input.return_value = (
+        mock_node_exec_result,
+        {"query": "t"},
+    )
+    mock_db_client.get_execution_outputs_by_node_exec_id.return_value = {"result": "ok"}
+
+    # ExecutionProcessor mock: on_node_execution returns supplied error.
+    mock_processor = AsyncMock()
+    mock_processor.running_node_execution = defaultdict(MagicMock)
+    mock_processor.execution_stats = MagicMock()
+    mock_processor.execution_stats_lock = threading.Lock()
+    mock_node_stats = MagicMock()
+    mock_node_stats.error = tool_stats_error
+    mock_processor.on_node_execution = AsyncMock(return_value=mock_node_stats)
+    mock_processor.charge_node_usage = charge_node_usage_mock or AsyncMock(
+        return_value=(10, 990)
+    )
+
+    # Build a tool_info shaped like _build_tool_info_from_args output.
+    tool_call = MagicMock()
+    tool_call.id = "call-1"
+    tool_call.name = "search_keywords"
+    tool_call.arguments = '{"query":"t"}'
+    tool_def = {
+        "type": "function",
+        "function": {
+            "name": "search_keywords",
+            "_sink_node_id": "test-sink-node-id",
+            "_field_mapping": {},
+            "parameters": {
+                "properties": {"query": {"type": "string"}},
+                "required": ["query"],
+            },
+        },
+    }
+    tool_info = OrchestratorBlock._build_tool_info_from_args(
+        tool_call_id="call-1",
+        tool_name="search_keywords",
+        tool_args={"query": "t"},
+        tool_def=tool_def,
+    )
+
+    exec_params = ExecutionParams(
+        user_id="u",
+        graph_id="g",
+        node_id="n",
+        graph_version=1,
+        graph_exec_id="ge",
+        node_exec_id="ne",
+        execution_context=ExecutionContext(
+            human_in_the_loop_safe_mode=False, dry_run=dry_run
+        ),
+    )
+
+    with patch(
+        "backend.blocks.orchestrator.get_database_manager_async_client",
+        return_value=mock_db_client,
+    ):
+        try:
+            await block._execute_single_tool_with_manager(
+                tool_info, exec_params, mock_processor, responses_api=False
+            )
+            raised = None
+        except Exception as e:
+            raised = e
+
+    return mock_processor.charge_node_usage, raised
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_skips_charging_on_dry_run():
+    """dry_run=True → charge_node_usage is NOT called."""
+    charge_mock, raised = await _run_tool_exec_with_stats(
+        dry_run=True, tool_stats_error=None
+    )
+    assert raised is None
+    assert charge_mock.call_count == 0
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_skips_charging_on_failed_tool():
+    """tool_node_stats.error is an Exception → charge_node_usage NOT called."""
+    charge_mock, raised = await _run_tool_exec_with_stats(
+        dry_run=False, tool_stats_error=RuntimeError("tool blew up")
+    )
+    assert raised is None
+    assert charge_mock.call_count == 0
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_skips_charging_on_cancelled_tool():
+    """Cancellation (BaseException subclass) → charge_node_usage NOT called.
+
+    Guards the fix for sentry's BaseException concern: the old
+    `isinstance(error, Exception)` check would have treated CancelledError
+    as "no error" and billed the user for a terminated run.
+    """
+    import asyncio as _asyncio
+
+    charge_mock, raised = await _run_tool_exec_with_stats(
+        dry_run=False, tool_stats_error=_asyncio.CancelledError()
+    )
+    assert raised is None
+    assert charge_mock.call_count == 0
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_insufficient_balance_propagates():
+    """InsufficientBalanceError from charge_node_usage must propagate out.
+
+    If this leaked into a ToolCallResult the LLM loop would keep running
+    with 'tool failed' errors and the user would get unpaid work.
+    """
+    raising_charge = AsyncMock(
+        side_effect=InsufficientBalanceError(
+            user_id="u", message="nope", balance=0, amount=10
+        )
+    )
+    _, raised = await _run_tool_exec_with_stats(
+        dry_run=False,
+        tool_stats_error=None,
+        charge_node_usage_mock=raising_charge,
+    )
+    assert isinstance(raised, InsufficientBalanceError)
+
+
+@pytest.mark.asyncio
+async def test_tool_execution_on_node_execution_returns_none_sets_is_error():
+    """on_node_execution returning None (swallowed by @async_error_logged) must
+    result in a tool response with _is_error=True so the LLM loop knows the
+    tool failed and does not treat a silent error as a successful execution.
+    """
+    block = OrchestratorBlock()
+
+    mock_db_client = AsyncMock()
+    mock_target_node = MagicMock()
+    mock_target_node.block_id = "test-block-id"
+    mock_target_node.input_default = {}
+    mock_db_client.get_node.return_value = mock_target_node
+    mock_node_exec_result = MagicMock()
+    mock_node_exec_result.node_exec_id = "test-tool-exec-id"
+    mock_db_client.upsert_execution_input.return_value = (
+        mock_node_exec_result,
+        {"query": "t"},
+    )
+
+    mock_processor = AsyncMock()
+    mock_processor.running_node_execution = defaultdict(MagicMock)
+    mock_processor.execution_stats = MagicMock()
+    mock_processor.execution_stats_lock = threading.Lock()
+    # on_node_execution returns None — simulates @async_error_logged(swallow=True)
+    # swallowing an internal error
+    mock_processor.on_node_execution = AsyncMock(return_value=None)
+
+    tool_call = MagicMock()
+    tool_call.id = "call-none"
+    tool_call.name = "search_keywords"
+    tool_call.arguments = '{"query":"t"}'
+    tool_def = {
+        "type": "function",
+        "function": {
+            "name": "search_keywords",
+            "_sink_node_id": "test-sink-node-id",
+            "_field_mapping": {},
+            "parameters": {
+                "properties": {"query": {"type": "string"}},
+                "required": ["query"],
+            },
+        },
+    }
+    tool_info = OrchestratorBlock._build_tool_info_from_args(
+        tool_call_id="call-none",
+        tool_name="search_keywords",
+        tool_args={"query": "t"},
+        tool_def=tool_def,
+    )
+
+    exec_params = ExecutionParams(
+        user_id="u",
+        graph_id="g",
+        node_id="n",
+        graph_version=1,
+        graph_exec_id="ge",
+        node_exec_id="ne",
+        execution_context=ExecutionContext(
+            human_in_the_loop_safe_mode=False, dry_run=False
+        ),
+    )
+
+    with patch(
+        "backend.blocks.orchestrator.get_database_manager_async_client",
+        return_value=mock_db_client,
+    ):
+        resp = await block._execute_single_tool_with_manager(
+            tool_info, exec_params, mock_processor, responses_api=False
+        )
+
+    assert resp.get("_is_error") is True
+    # charge_node_usage must NOT be called for a failed tool execution
+    mock_processor.charge_node_usage.assert_not_called()
+
+
+# ── on_node_execution FAILED + InsufficientBalanceError notification ──
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_failed_ibe_sends_notification(
+    monkeypatch,
+    gated_processor,
+):
+    """When status == FAILED and execution_stats.error is InsufficientBalanceError,
+    _handle_insufficient_funds_notif must be called.
+
+    This path fires when a nested tool charge inside the orchestrator raises
+    InsufficientBalanceError, which propagates out of the block's run() generator
+    and is caught by _on_node_execution's broad except, setting status=FAILED and
+    execution_stats.error=IBE. on_node_execution's post-execution block then
+    sends the user notification so they understand why the run stopped.
+    """
+
+    proc, calls, inner, fake_db, NodeExecutionStats = gated_processor
+    ibe = InsufficientBalanceError(
+        user_id="u",
+        message="Insufficient balance",
+        balance=0,
+        amount=30,
+    )
+
+    # Simulate _on_node_execution returning FAILED with IBE in stats.error.
+    async def fake_inner_failed(
+        self,
+        *,
+        node,
+        node_exec,
+        node_exec_progress,
+        stats,
+        db_client,
+        log_metadata,
+        nodes_input_masks=None,
+        nodes_to_skip=None,
+    ):
+        stats.error = ibe
+        return MagicMock(wall_time=0.1, cpu_time=0.1), ExecutionStatus.FAILED
+
+    monkeypatch.setattr(
+        manager.ExecutionProcessor,
+        "_on_node_execution",
+        fake_inner_failed,
+    )
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=0))
+
+    stats_pair = (
+        MagicMock(
+            node_count=0, nodes_cputime=0, nodes_walltime=0, cost=0, node_error_count=0
+        ),
+        threading.Lock(),
+    )
+    await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    # The notification must have fired so the user knows why their run stopped.
+    assert len(calls["handle_insufficient_funds_notif"]) == 1
+    assert calls["handle_insufficient_funds_notif"][0]["user_id"] == "u"
+    # charge_extra_runtime_cost must NOT be called — status is FAILED.
+    assert calls["charge_extra_runtime_cost"] == []
+
+
+# ── Billing leak: non-IBE exception during extra-iteration charging ──
+
+
+@pytest.mark.asyncio
+async def test_on_node_execution_non_ibe_billing_failure_keeps_completed(
+    monkeypatch,
+    gated_processor,
+):
+    """When charge_extra_runtime_cost raises a non-IBE exception (e.g. DB outage):
+
+    - execution_stats.error stays None (node ran to completion)
+    - status stays COMPLETED (work already done)
+    - the billing_leak error is logged but does not corrupt execution_stats
+    """
+    proc, calls, inner, fake_db, _ = gated_processor
+    inner["status"] = ExecutionStatus.COMPLETED
+    inner["llm_call_count"] = 4
+    fake_db.get_node = AsyncMock(return_value=_FakeNode(extra_charges=3))
+
+    async def raise_conn_error(node_exec, extra_count):
+        raise ConnectionError("DB connection lost")
+
+    monkeypatch.setattr(billing, "charge_extra_runtime_cost", raise_conn_error)
+
+    stats_pair = (
+        MagicMock(
+            node_count=0,
+            nodes_cputime=0,
+            nodes_walltime=0,
+            cost=0,
+            node_error_count=0,
+        ),
+        threading.Lock(),
+    )
+    result_stats = await proc.on_node_execution(
+        node_exec=_make_node_exec(dry_run=False),
+        node_exec_progress=MagicMock(),
+        nodes_input_masks=None,
+        graph_stats_pair=stats_pair,
+    )
+    # error stays None — node completed, only billing failed.
+    assert result_stats.error is None
+    # No notification was sent (only IBE triggers notification).
+    assert len(calls["handle_insufficient_funds_notif"]) == 0
+
+
+# ── _charge_usage with execution_count=0 ──
+
+
+class TestChargeUsageZeroExecutionCount:
+    """Verify _charge_usage(node_exec, 0) does not invoke execution_usage_cost."""
+
+    def test_execution_count_zero_skips_execution_tier(self, monkeypatch):
+        """_charge_usage with execution_count=0 must not call execution_usage_cost."""
+        execution_tier_called = []
+
+        def fake_execution_usage_cost(count):
+            execution_tier_called.append(count)
+            return (100, count)
+
+        spent: list[int] = []
+
+        class FakeDb:
+            def spend_credits(self, *, user_id, cost, metadata):
+                spent.append(cost)
+                return 500
+
+        fake_block = MagicMock()
+        fake_block.name = "FakeBlock"
+
+        monkeypatch.setattr(billing, "get_db_client", lambda: FakeDb())
+        monkeypatch.setattr(billing, "get_block", lambda block_id: fake_block)
+        monkeypatch.setattr(
+            billing,
+            "block_usage_cost",
+            lambda block, input_data, **_kw: (10, {}),
+        )
+        monkeypatch.setattr(billing, "execution_usage_cost", fake_execution_usage_cost)
+
+        ne = MagicMock()
+        ne.user_id = "u"
+        ne.graph_exec_id = "ge"
+        ne.graph_id = "g"
+        ne.node_exec_id = "ne"
+        ne.node_id = "n"
+        ne.block_id = "b"
+        ne.inputs = {}
+
+        total_cost, remaining = billing.charge_usage(ne, 0)
+        assert total_cost == 10  # block cost only
+        assert remaining == 500
+        assert spent == [10]
+        # execution_usage_cost must NOT have been called
+        assert execution_tier_called == []
diff --git a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
index f9ec7676ba..ac78b6d35b 100644
--- a/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
+++ b/autogpt_platform/backend/backend/blocks/test/test_orchestrator_responses_api.py
@@ -956,6 +956,12 @@ async def test_agent_mode_conversation_valid_for_responses_api():
     ep.execution_stats_lock = threading.Lock()
     ns = MagicMock(error=None)
     ep.on_node_execution = AsyncMock(return_value=ns)
+    # Mock charge_node_usage (called after successful tool execution).
+    # Must be AsyncMock because it is async and is awaited in
+    # _execute_single_tool_with_manager — a plain MagicMock would return a
+    # non-awaitable tuple and TypeError out, then be silently swallowed by
+    # the orchestrator's catch-all.
+    ep.charge_node_usage = AsyncMock(return_value=(0, 0))
 
     with patch("backend.blocks.llm.llm_call", llm_mock), patch.object(
         block, "_create_tool_node_signatures", return_value=tool_sigs
diff --git a/autogpt_platform/backend/backend/copilot/config.py b/autogpt_platform/backend/backend/copilot/config.py
index 28fa24f868..cfbc6feef4 100644
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -197,6 +197,15 @@ class ChatConfig(BaseSettings):
         description="Maximum number of retries for transient API errors "
         "(429, 5xx, ECONNRESET) before surfacing the error to the user.",
     )
+    claude_agent_cross_user_prompt_cache: bool = Field(
+        default=True,
+        description="Enable cross-user prompt caching via SystemPromptPreset. "
+        "The Claude Code default prompt becomes a cacheable prefix shared "
+        "across all users, and our custom prompt is appended after it. "
+        "Dynamic sections (working dir, git status, auto-memory) are excluded "
+        "from the prefix. Set to False to fall back to passing the system "
+        "prompt as a raw string.",
+    )
     claude_agent_cli_path: str | None = Field(
         default=None,
         description="Optional explicit path to a Claude Code CLI binary. "
diff --git a/autogpt_platform/backend/backend/copilot/sdk/context_fallback_test.py b/autogpt_platform/backend/backend/copilot/sdk/context_fallback_test.py
new file mode 100644
index 0000000000..5b99296314
--- /dev/null
+++ b/autogpt_platform/backend/backend/copilot/sdk/context_fallback_test.py
@@ -0,0 +1,555 @@
+"""Tests for context fallback paths introduced in fix/copilot-transcript-resume-gate.
+
+Scenario table
+==============
+
+| # | use_resume | transcript_msg_count | gap     | target_tokens | Expected output                            |
+|---|------------|----------------------|---------|---------------|--------------------------------------------|
+| A | True       | covers all           | empty   | None          | bare message (--resume has full context)   |
+| B | True       | stale                | 2 msgs  | None          | gap context prepended                      |
+| C | True       | stale                | 2 msgs  | 50_000        | gap compressed to budget, prepended        |
+| D | False      | 0                    | N/A     | None          | full session compressed, prepended         |
+| E | False      | 0                    | N/A     | 50_000        | full session compressed to budget          |
+| F | False      | 2 (partial)          | 2 msgs  | None          | full session compressed (not just gap;     |
+|   |            |                      |         |               | CLI has zero context without --resume)     |
+| G | False      | 2 (partial)          | 2 msgs  | 50_000        | full session compressed to budget          |
+| H | False      | covers all           | empty   | None          | full session compressed                    |
+|   |            |                      |         |               | (NOT bare message — the bug that was fixed)|
+| I | False      | covers all           | empty   | 50_000        | full session compressed to tight budget    |
+| J | False      | 2 (partial)          | n/a     | None          | exactly ONE compression call (full prior)  |
+
+Compression unit tests
+=======================
+
+| # | Input                | target_tokens | Expected                                      |
+|---|----------------------|---------------|-----------------------------------------------|
+| K | []                   | None          | ([], False) — empty guard                     |
+| L | [1 msg]              | None          | ([msg], False) — single-msg guard             |
+| M | [2+ msgs]            | None          | target_tokens=None forwarded to _run_compression |
+| N | [2+ msgs]            | 30_000        | target_tokens=30_000 forwarded                |
+| O | [2+ msgs], run fails | None          | returns originals, False                      |
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from backend.copilot.model import ChatMessage, ChatSession
+from backend.copilot.sdk.service import _build_query_message, _compress_messages
+from backend.util.prompt import CompressResult
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_session(messages: list[ChatMessage]) -> ChatSession:
+    now = datetime.now(UTC)
+    return ChatSession(
+        session_id="test-session",
+        user_id="user-1",
+        messages=messages,
+        title="test",
+        usage=[],
+        started_at=now,
+        updated_at=now,
+    )
+
+
+def _msgs(*pairs: tuple[str, str]) -> list[ChatMessage]:
+    return [ChatMessage(role=r, content=c) for r, c in pairs]
+
+
+def _passthrough_compress(target_tokens=None):
+    """Return a mock that passes messages through and records its call args."""
+    calls: list[tuple[list, int | None]] = []
+
+    async def _mock(msgs, tok=None):
+        calls.append((msgs, tok))
+        return msgs, False
+
+    _mock.calls = calls  # type: ignore[attr-defined]
+    return _mock
+
+
+# ---------------------------------------------------------------------------
+# _build_query_message — scenario A–J
+# ---------------------------------------------------------------------------
+
+
+class TestBuildQueryMessageResume:
+    """use_resume=True paths (--resume supplies history; only inject gap if stale)."""
+
+    @pytest.mark.asyncio
+    async def test_scenario_a_transcript_current_returns_bare_message(self):
+        """Scenario A: --resume covers full context → no prefix injected."""
+        session = _make_session(
+            _msgs(("user", "q1"), ("assistant", "a1"), ("user", "q2"))
+        )
+        result, compacted = await _build_query_message(
+            "q2", session, use_resume=True, transcript_msg_count=2, session_id="s"
+        )
+        assert result == "q2"
+        assert compacted is False
+
+    @pytest.mark.asyncio
+    async def test_scenario_b_stale_transcript_injects_gap(self, monkeypatch):
+        """Scenario B: stale transcript → gap context prepended."""
+        session = _make_session(
+            _msgs(
+                ("user", "q1"),
+                ("assistant", "a1"),
+                ("user", "q2"),
+                ("assistant", "a2"),
+                ("user", "q3"),
+            )
+        )
+
+        async def _mock_compress(msgs, target_tokens=None):
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        result, compacted = await _build_query_message(
+            "q3", session, use_resume=True, transcript_msg_count=2, session_id="s"
+        )
+        assert "<conversation_history>" in result
+        assert "q2" in result
+        assert "a2" in result
+        assert "Now, the user says:\nq3" in result
+        # q1/a1 are covered by the transcript — must NOT appear in gap context
+        assert "q1" not in result
+
+    @pytest.mark.asyncio
+    async def test_scenario_c_stale_transcript_passes_target_tokens(self, monkeypatch):
+        """Scenario C: target_tokens is forwarded to _compress_messages for the gap."""
+        session = _make_session(
+            _msgs(
+                ("user", "q1"),
+                ("assistant", "a1"),
+                ("user", "q2"),
+                ("assistant", "a2"),
+                ("user", "q3"),
+            )
+        )
+        captured: list[int | None] = []
+
+        async def _mock_compress(msgs, target_tokens=None):
+            captured.append(target_tokens)
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        await _build_query_message(
+            "q3",
+            session,
+            use_resume=True,
+            transcript_msg_count=2,
+            session_id="s",
+            target_tokens=50_000,
+        )
+        assert captured == [50_000]
+
+
+class TestBuildQueryMessageNoResumeNoTranscript:
+    """use_resume=False, transcript_msg_count=0 — full session compressed."""
+
+    @pytest.mark.asyncio
+    async def test_scenario_d_full_session_compressed(self, monkeypatch):
+        """Scenario D: no resume, no transcript → compress all prior messages."""
+        session = _make_session(
+            _msgs(("user", "q1"), ("assistant", "a1"), ("user", "q2"))
+        )
+
+        async def _mock_compress(msgs, target_tokens=None):
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        result, compacted = await _build_query_message(
+            "q2", session, use_resume=False, transcript_msg_count=0, session_id="s"
+        )
+        assert "<conversation_history>" in result
+        assert "q1" in result
+        assert "a1" in result
+        assert "Now, the user says:\nq2" in result
+
+    @pytest.mark.asyncio
+    async def test_scenario_e_passes_target_tokens_to_compression(self, monkeypatch):
+        """Scenario E: target_tokens forwarded to _compress_messages."""
+        session = _make_session(
+            _msgs(("user", "q1"), ("assistant", "a1"), ("user", "q2"))
+        )
+        captured: list[int | None] = []
+
+        async def _mock_compress(msgs, target_tokens=None):
+            captured.append(target_tokens)
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        await _build_query_message(
+            "q2",
+            session,
+            use_resume=False,
+            transcript_msg_count=0,
+            session_id="s",
+            target_tokens=15_000,
+        )
+        assert captured == [15_000]
+
+
+class TestBuildQueryMessageNoResumeWithTranscript:
+    """use_resume=False, transcript_msg_count > 0 — gap or full-session fallback."""
+
+    @pytest.mark.asyncio
+    async def test_scenario_f_no_resume_always_injects_full_session(self, monkeypatch):
+        """Scenario F: use_resume=False with transcript_msg_count > 0 still injects
+        the FULL prior session — not just the gap since the transcript end.
+
+        When there is no --resume the CLI starts with zero context, so injecting
+        only the post-transcript gap would silently drop all transcript-covered
+        history.  The correct fix is to always compress the full session.
+        """
+        session = _make_session(
+            _msgs(
+                ("user", "q1"),  # transcript_msg_count=2 covers these
+                ("assistant", "a1"),
+                ("user", "q2"),  # post-transcript gap starts here
+                ("assistant", "a2"),
+                ("user", "q3"),  # current message
+            )
+        )
+        compressed_msgs: list[list] = []
+
+        async def _mock_compress(msgs, target_tokens=None):
+            compressed_msgs.append(list(msgs))
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        result, _ = await _build_query_message(
+            "q3",
+            session,
+            use_resume=False,
+            transcript_msg_count=2,  # transcript covers q1/a1 but no --resume
+            session_id="s",
+        )
+        assert "<conversation_history>" in result
+        # Full session must be injected — transcript-covered turns ARE included
+        assert "q1" in result
+        assert "a1" in result
+        assert "q2" in result
+        assert "a2" in result
+        assert "Now, the user says:\nq3" in result
+        # Compressed exactly once with all 4 prior messages
+        assert len(compressed_msgs) == 1
+        assert len(compressed_msgs[0]) == 4
+
+    @pytest.mark.asyncio
+    async def test_scenario_g_no_resume_passes_target_tokens(self, monkeypatch):
+        """Scenario G: target_tokens forwarded when use_resume=False + transcript_msg_count > 0."""
+        session = _make_session(
+            _msgs(
+                ("user", "q1"),
+                ("assistant", "a1"),
+                ("user", "q2"),
+                ("assistant", "a2"),
+                ("user", "q3"),
+            )
+        )
+        captured: list[int | None] = []
+
+        async def _mock_compress(msgs, target_tokens=None):
+            captured.append(target_tokens)
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        await _build_query_message(
+            "q3",
+            session,
+            use_resume=False,
+            transcript_msg_count=2,
+            session_id="s",
+            target_tokens=50_000,
+        )
+        assert captured == [50_000]
+
+    @pytest.mark.asyncio
+    async def test_scenario_h_no_resume_transcript_current_injects_full_session(
+        self, monkeypatch
+    ):
+        """Scenario H: the bug that was fixed.
+
+        Old code path: use_resume=False, transcript_msg_count covers all prior
+        messages → gap sub-path: gap = [] → ``return current_message, False``
+        → model received ZERO context (bare message only).
+
+        New code path: use_resume=False always compresses the full prior session
+        regardless of transcript_msg_count — model always gets context.
+        """
+        session = _make_session(
+            _msgs(
+                ("user", "q1"),
+                ("assistant", "a1"),
+                ("user", "q2"),
+                ("assistant", "a2"),
+                ("user", "q3"),
+            )
+        )
+
+        async def _mock_compress(msgs, target_tokens=None):
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        result, _ = await _build_query_message(
+            "q3",
+            session,
+            use_resume=False,
+            transcript_msg_count=4,  # covers ALL prior → old code returned bare msg
+            session_id="s",
+        )
+        # NEW: must inject full session, NOT return bare message
+        assert result != "q3"
+        assert "<conversation_history>" in result
+        assert "q1" in result
+        assert "Now, the user says:\nq3" in result
+
+    @pytest.mark.asyncio
+    async def test_scenario_i_no_resume_target_tokens_forwarded_any_transcript_count(
+        self, monkeypatch
+    ):
+        """Scenario I: target_tokens forwarded even when transcript_msg_count covers all."""
+        session = _make_session(
+            _msgs(("user", "q1"), ("assistant", "a1"), ("user", "q2"))
+        )
+        captured: list[int | None] = []
+
+        async def _mock_compress(msgs, target_tokens=None):
+            captured.append(target_tokens)
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        await _build_query_message(
+            "q2",
+            session,
+            use_resume=False,
+            transcript_msg_count=2,
+            session_id="s",
+            target_tokens=15_000,
+        )
+        assert 15_000 in captured
+
+    @pytest.mark.asyncio
+    async def test_scenario_j_no_resume_single_compression_call(self, monkeypatch):
+        """Scenario J: use_resume=False always makes exactly ONE compression call
+        (the full session), regardless of transcript coverage.
+
+        This verifies there is no two-step gap+fallback pattern for no-resume —
+        compression is called once with the full prior session.
+        """
+        session = _make_session(
+            _msgs(
+                ("user", "q1"),
+                ("assistant", "a1"),
+                ("user", "q2"),
+                ("assistant", "a2"),
+                ("user", "q3"),
+            )
+        )
+        call_count = 0
+
+        async def _mock_compress(msgs, target_tokens=None):
+            nonlocal call_count
+            call_count += 1
+            return msgs, False
+
+        monkeypatch.setattr(
+            "backend.copilot.sdk.service._compress_messages", _mock_compress
+        )
+
+        await _build_query_message(
+            "q3",
+            session,
+            use_resume=False,
+            transcript_msg_count=2,
+            session_id="s",
+        )
+        assert call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# _compress_messages — unit tests K–O
+# ---------------------------------------------------------------------------
+
+
+class TestCompressMessages:
+    @pytest.mark.asyncio
+    async def test_scenario_k_empty_list_returns_empty(self):
+        """Scenario K: empty input → short-circuit, no compression."""
+        result, compacted = await _compress_messages([])
+        assert result == []
+        assert compacted is False
+
+    @pytest.mark.asyncio
+    async def test_scenario_l_single_message_returns_as_is(self):
+        """Scenario L: single message → short-circuit (< 2 guard)."""
+        msg = ChatMessage(role="user", content="hello")
+        result, compacted = await _compress_messages([msg])
+        assert result == [msg]
+        assert compacted is False
+
+    @pytest.mark.asyncio
+    async def test_scenario_m_target_tokens_none_forwarded(self):
+        """Scenario M: target_tokens=None forwarded to _run_compression."""
+        msgs = [
+            ChatMessage(role="user", content="q"),
+            ChatMessage(role="assistant", content="a"),
+        ]
+        fake_result = CompressResult(
+            messages=[
+                {"role": "user", "content": "q"},
+                {"role": "assistant", "content": "a"},
+            ],
+            token_count=10,
+            was_compacted=False,
+            original_token_count=10,
+        )
+        with patch(
+            "backend.copilot.sdk.service._run_compression",
+            new_callable=AsyncMock,
+            return_value=fake_result,
+        ) as mock_run:
+            await _compress_messages(msgs, target_tokens=None)
+
+        mock_run.assert_awaited_once()
+        _, kwargs = mock_run.call_args
+        assert kwargs.get("target_tokens") is None
+
+    @pytest.mark.asyncio
+    async def test_scenario_n_explicit_target_tokens_forwarded(self):
+        """Scenario N: explicit target_tokens forwarded to _run_compression."""
+        msgs = [
+            ChatMessage(role="user", content="q"),
+            ChatMessage(role="assistant", content="a"),
+        ]
+        fake_result = CompressResult(
+            messages=[{"role": "user", "content": "summary"}],
+            token_count=5,
+            was_compacted=True,
+            original_token_count=50,
+        )
+        with patch(
+            "backend.copilot.sdk.service._run_compression",
+            new_callable=AsyncMock,
+            return_value=fake_result,
+        ) as mock_run:
+            result, compacted = await _compress_messages(msgs, target_tokens=30_000)
+
+        mock_run.assert_awaited_once()
+        _, kwargs = mock_run.call_args
+        assert kwargs.get("target_tokens") == 30_000
+        assert compacted is True
+
+    @pytest.mark.asyncio
+    async def test_scenario_o_run_compression_exception_returns_originals(self):
+        """Scenario O: _run_compression raises → return original messages, False."""
+        msgs = [
+            ChatMessage(role="user", content="q"),
+            ChatMessage(role="assistant", content="a"),
+        ]
+        with patch(
+            "backend.copilot.sdk.service._run_compression",
+            new_callable=AsyncMock,
+            side_effect=RuntimeError("compression timeout"),
+        ):
+            result, compacted = await _compress_messages(msgs)
+
+        assert result == msgs
+        assert compacted is False
+
+    @pytest.mark.asyncio
+    async def test_compaction_messages_filtered_before_compression(self):
+        """filter_compaction_messages is applied before _run_compression is called."""
+        # A compaction message is one with role=assistant and specific content pattern.
+        # We verify that only real messages reach _run_compression.
+        from backend.copilot.sdk.service import filter_compaction_messages
+
+        msgs = [
+            ChatMessage(role="user", content="q"),
+            ChatMessage(role="assistant", content="a"),
+        ]
+        # filter_compaction_messages should not remove these plain messages
+        filtered = filter_compaction_messages(msgs)
+        assert len(filtered) == len(msgs)
+
+
+# ---------------------------------------------------------------------------
+# target_tokens threading — _retry_target_tokens values match expectations
+# ---------------------------------------------------------------------------
+
+
+class TestRetryTargetTokens:
+    def test_first_retry_uses_first_slot(self):
+        from backend.copilot.sdk.service import _RETRY_TARGET_TOKENS
+
+        assert _RETRY_TARGET_TOKENS[0] == 50_000
+
+    def test_second_retry_uses_second_slot(self):
+        from backend.copilot.sdk.service import _RETRY_TARGET_TOKENS
+
+        assert _RETRY_TARGET_TOKENS[1] == 15_000
+
+    def test_second_slot_smaller_than_first(self):
+        from backend.copilot.sdk.service import _RETRY_TARGET_TOKENS
+
+        assert _RETRY_TARGET_TOKENS[1] < _RETRY_TARGET_TOKENS[0]
+
+
+# ---------------------------------------------------------------------------
+# Single-message session edge cases
+# ---------------------------------------------------------------------------
+
+
+class TestSingleMessageSessions:
+    @pytest.mark.asyncio
+    async def test_no_resume_single_message_returns_bare(self):
+        """First turn (1 message): no prior history to inject."""
+        session = _make_session([ChatMessage(role="user", content="hello")])
+        result, compacted = await _build_query_message(
+            "hello", session, use_resume=False, transcript_msg_count=0, session_id="s"
+        )
+        assert result == "hello"
+        assert compacted is False
+
+    @pytest.mark.asyncio
+    async def test_resume_single_message_returns_bare(self):
+        """First turn with resume flag: transcript is empty so no gap."""
+        session = _make_session([ChatMessage(role="user", content="hello")])
+        result, compacted = await _build_query_message(
+            "hello", session, use_resume=True, transcript_msg_count=0, session_id="s"
+        )
+        assert result == "hello"
+        assert compacted is False
diff --git a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
index 57f037baba..a6e88889c3 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/query_builder_test.py
@@ -6,6 +6,7 @@ import pytest
 
 from backend.copilot.model import ChatMessage, ChatSession
 from backend.copilot.sdk.service import (
+    _BARE_MESSAGE_TOKEN_FLOOR,
     _build_query_message,
     _format_conversation_context,
 )
@@ -130,6 +131,34 @@ async def test_build_query_resume_up_to_date():
     assert was_compacted is False
 
 
+@pytest.mark.asyncio
+async def test_build_query_resume_misaligned_watermark():
+    """With --resume and watermark pointing at a user message, skip gap."""
+    # Simulates a deleted message shifting DB positions so the watermark
+    # lands on a user turn instead of the expected assistant turn.
+    session = _make_session(
+        [
+            ChatMessage(role="user", content="turn 1"),
+            ChatMessage(role="assistant", content="reply 1"),
+            ChatMessage(
+                role="user", content="turn 2"
+            ),  # ← watermark points here (role=user)
+            ChatMessage(role="assistant", content="reply 2"),
+            ChatMessage(role="user", content="turn 3"),
+        ]
+    )
+    result, was_compacted = await _build_query_message(
+        "turn 3",
+        session,
+        use_resume=True,
+        transcript_msg_count=3,  # prior[2].role == "user" — misaligned
+        session_id="test-session",
+    )
+    # Misaligned watermark → skip gap, return bare message
+    assert result == "turn 3"
+    assert was_compacted is False
+
+
 @pytest.mark.asyncio
 async def test_build_query_resume_stale_transcript():
     """With --resume and stale transcript, gap context is prepended."""
@@ -204,7 +233,7 @@ async def test_build_query_no_resume_multi_message(monkeypatch):
     )
 
     # Mock _compress_messages to return the messages as-is
-    async def _mock_compress(msgs):
+    async def _mock_compress(msgs, target_tokens=None):
         return msgs, False
 
     monkeypatch.setattr(
@@ -237,7 +266,7 @@ async def test_build_query_no_resume_multi_message_compacted(monkeypatch):
         ]
     )
 
-    async def _mock_compress(msgs):
+    async def _mock_compress(msgs, target_tokens=None):
         return msgs, True  # Simulate actual compaction
 
     monkeypatch.setattr(
@@ -253,3 +282,85 @@ async def test_build_query_no_resume_multi_message_compacted(monkeypatch):
         session_id="test-session",
     )
     assert was_compacted is True
+
+
+@pytest.mark.asyncio
+async def test_build_query_no_resume_at_token_floor():
+    """When target_tokens is at or below the floor, return bare message.
+
+    This is the final escape hatch: if the retry budget is exhausted and
+    even the most aggressive compression might not fit, skip history
+    injection entirely so the user always gets a response.
+    """
+    session = _make_session(
+        [
+            ChatMessage(role="user", content="old question"),
+            ChatMessage(role="assistant", content="old answer"),
+            ChatMessage(role="user", content="new question"),
+        ]
+    )
+    result, was_compacted = await _build_query_message(
+        "new question",
+        session,
+        use_resume=False,
+        transcript_msg_count=0,
+        session_id="test-session",
+        target_tokens=_BARE_MESSAGE_TOKEN_FLOOR,
+    )
+    # At the floor threshold, no history is injected
+    assert result == "new question"
+    assert was_compacted is False
+
+
+@pytest.mark.asyncio
+async def test_build_query_no_resume_below_token_floor():
+    """target_tokens strictly below floor also returns bare message."""
+    session = _make_session(
+        [
+            ChatMessage(role="user", content="old"),
+            ChatMessage(role="assistant", content="reply"),
+            ChatMessage(role="user", content="new"),
+        ]
+    )
+    result, was_compacted = await _build_query_message(
+        "new",
+        session,
+        use_resume=False,
+        transcript_msg_count=0,
+        session_id="test-session",
+        target_tokens=_BARE_MESSAGE_TOKEN_FLOOR - 1,
+    )
+    assert result == "new"
+    assert was_compacted is False
+
+
+@pytest.mark.asyncio
+async def test_build_query_no_resume_above_token_floor_compresses(monkeypatch):
+    """target_tokens just above the floor still triggers compression."""
+    session = _make_session(
+        [
+            ChatMessage(role="user", content="old"),
+            ChatMessage(role="assistant", content="reply"),
+            ChatMessage(role="user", content="new"),
+        ]
+    )
+
+    async def _mock_compress(msgs, target_tokens=None):
+        return msgs, False
+
+    monkeypatch.setattr(
+        "backend.copilot.sdk.service._compress_messages",
+        _mock_compress,
+    )
+
+    result, was_compacted = await _build_query_message(
+        "new",
+        session,
+        use_resume=False,
+        transcript_msg_count=0,
+        session_id="test-session",
+        target_tokens=_BARE_MESSAGE_TOKEN_FLOOR + 1,
+    )
+    # Above the floor → history is injected (not the bare message)
+    assert "<conversation_history>" in result
+    assert "Now, the user says:\nnew" in result
diff --git a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
index c705d26c22..5d132aa94d 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/sdk_compat_test.py
@@ -7,6 +7,7 @@ tests will catch it immediately.
 """
 
 import inspect
+from typing import cast
 
 import pytest
 
@@ -90,6 +91,39 @@ def test_agent_options_accepts_required_fields():
     assert opts.cwd == "/tmp"
 
 
+def test_agent_options_accepts_system_prompt_preset_with_exclude_dynamic_sections():
+    """Verify ClaudeAgentOptions accepts the exact preset dict _build_system_prompt_value produces.
+
+    The production code always includes ``exclude_dynamic_sections=True`` in the preset
+    dict.  This compat test mirrors that exact shape so any SDK version that starts
+    rejecting unknown keys will be caught here rather than at runtime.
+    """
+    from claude_agent_sdk import ClaudeAgentOptions
+    from claude_agent_sdk.types import SystemPromptPreset
+
+    from .service import _build_system_prompt_value
+
+    # Call the production helper directly so this test is tied to the real
+    # dict shape rather than a hand-rolled copy.
+    preset = _build_system_prompt_value("custom system prompt", cross_user_cache=True)
+    assert isinstance(
+        preset, dict
+    ), "_build_system_prompt_value must return a dict when caching is on"
+
+    sdk_preset = cast(SystemPromptPreset, preset)
+    opts = ClaudeAgentOptions(system_prompt=sdk_preset)
+    assert opts.system_prompt == sdk_preset
+
+
+def test_build_system_prompt_value_returns_plain_string_when_cross_user_cache_off():
+    """When cross_user_cache=False (e.g. on --resume turns), the helper must return
+    a plain string so the preset+resume crash is avoided."""
+    from .service import _build_system_prompt_value
+
+    result = _build_system_prompt_value("my prompt", cross_user_cache=False)
+    assert result == "my prompt", "Must return the raw string, not a preset dict"
+
+
 def test_agent_options_accepts_all_our_fields():
     """Comprehensive check of every field we use in service.py."""
     from claude_agent_sdk import ClaudeAgentOptions
diff --git a/autogpt_platform/backend/backend/copilot/sdk/service.py b/autogpt_platform/backend/backend/copilot/sdk/service.py
index 251fc5f8f6..c7d166adba 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -29,6 +29,7 @@ from claude_agent_sdk import (
     ToolResultBlock,
     ToolUseBlock,
 )
+from claude_agent_sdk.types import SystemPromptPreset
 from langfuse import propagate_attributes
 from langsmith.integrations.claude_agent_sdk import configure_claude_agent_sdk
 from opentelemetry import trace as otel_trace
@@ -260,6 +261,11 @@ class ReducedContext(NamedTuple):
     resume_file: str | None
     transcript_lost: bool
     tried_compaction: bool
+    # Token budget for history compression on the DB-message fallback path.
+    # None means "use model-aware default".  Halved on each retry so
+    # compress_context applies progressively more aggressive reduction
+    # (LLM summarize → content truncate → middle-out delete → first/last trim).
+    target_tokens: int | None = None
 
 
 @dataclass
@@ -304,6 +310,10 @@ class _RetryState:
     adapter: SDKResponseAdapter
     transcript_builder: TranscriptBuilder
     usage: _TokenUsage
+    # Token budget for history compression on retries (DB-message fallback path).
+    # None = model-aware default.  Halved each retry for progressively more
+    # aggressive compression (LLM summarize → truncate → middle-out → trim).
+    target_tokens: int | None = None
 
 
 @dataclass
@@ -335,12 +345,34 @@ class _StreamContext:
     lock: AsyncClusterLock
 
 
+# Per-retry token budgets for the no-transcript (use_resume=False) path.
+# When there is no CLI native session to --resume, context is built from DB
+# messages via _format_conversation_context.  For large sessions this text
+# can exceed the model context window; each retry halves the token budget so
+# compress_context applies progressively more aggressive reduction:
+#   LLM summarize → content truncate → middle-out delete → first/last trim.
+# Index 0 = first retry, 1 = second retry; last value applies beyond that.
+_RETRY_TARGET_TOKENS: tuple[int, ...] = (50_000, 15_000)
+
+# Below this token budget the model context is so tight that injecting any
+# conversation history would likely exceed the limit regardless of content.
+# _build_query_message returns the bare message when target_tokens falls to
+# or below this floor, giving the user a response instead of a hard error.
+_BARE_MESSAGE_TOKEN_FLOOR: int = 5_000
+
+# Tight token budget for seeding the transcript builder on turns where no
+# CLI native session exists.  Kept below _RETRY_TARGET_TOKENS[0] so the
+# seeded JSONL upload stays compact and future gap injections are small.
+_SEED_TARGET_TOKENS: int = 30_000
+
+
 async def _reduce_context(
     transcript_content: str,
     tried_compaction: bool,
     session_id: str,
     sdk_cwd: str,
     log_prefix: str,
+    attempt: int = 1,
 ) -> ReducedContext:
     """Prepare reduced context for a retry attempt.
 
@@ -348,9 +380,19 @@ async def _reduce_context(
     On subsequent retries (or if compaction fails), drops the transcript
     entirely so the query is rebuilt from DB messages only.
 
-    `transcript_lost` is True when the transcript was dropped (caller
-    should set `skip_transcript_upload`).
+    When no transcript is available (use_resume=False fallback path), returns
+    a decreasing ``target_tokens`` budget so ``compress_context`` applies
+    progressively more aggressive reduction (LLM summarize → content truncate
+    → middle-out delete → first/last trim).  The budget applies in
+    ``_build_query_message`` and is halved on each retry.
+
+    ``transcript_lost`` is True when the transcript was dropped (caller
+    should set ``skip_transcript_upload``).
     """
+    # Token budget for the DB fallback on this attempt (no-transcript path).
+    idx = max(0, attempt - 1)
+    retry_target = _RETRY_TARGET_TOKENS[min(idx, len(_RETRY_TARGET_TOKENS) - 1)]
+
     # First retry: try compacting our transcript builder state.
     # Note: the CLI native --resume file is not updated with the compacted
     # content (it would require emitting CLI-native JSONL format), so the
@@ -374,9 +416,14 @@ async def _reduce_context(
             return ReducedContext(tb, False, None, False, True)
         logger.warning("%s Compaction failed, dropping transcript", log_prefix)
 
-    # Subsequent retry or compaction failed: drop transcript entirely
-    logger.warning("%s Dropping transcript, rebuilding from DB messages", log_prefix)
-    return ReducedContext(TranscriptBuilder(), False, None, True, True)
+    # Subsequent retry or compaction failed: drop transcript entirely.
+    # Return retry_target so the caller compresses DB messages to that budget.
+    logger.warning(
+        "%s Dropping transcript, rebuilding from DB messages" " (target_tokens=%d)",
+        log_prefix,
+        retry_target,
+    )
+    return ReducedContext(TranscriptBuilder(), False, None, True, True, retry_target)
 
 
 def _append_error_marker(
@@ -705,6 +752,34 @@ def _is_fallback_stderr(line: str) -> bool:
     return "fallback model" in line.lower()
 
 
+def _build_system_prompt_value(
+    system_prompt: str,
+    cross_user_cache: bool,
+) -> str | SystemPromptPreset:
+    """Build the ``system_prompt`` argument for :class:`ClaudeAgentOptions`.
+
+    When *cross_user_cache* is enabled, returns a :class:`SystemPromptPreset`
+    dict so the Claude Code default prompt becomes a cacheable prefix shared
+    across all users; our custom *system_prompt* is appended after it.
+
+    When disabled (or if the SDK is too old to support ``SystemPromptPreset``),
+    the raw *system_prompt* string is returned unchanged.
+
+    An empty *system_prompt* is accepted: the preset dict will have
+    ``append: ""`` which the SDK treats as no custom suffix.
+    """
+    if cross_user_cache:
+        logger.debug("Using SystemPromptPreset for cross-user prompt cache")
+        return SystemPromptPreset(
+            type="preset",
+            preset="claude_code",
+            append=system_prompt,
+            exclude_dynamic_sections=True,
+        )
+    logger.debug("Cross-user prompt cache disabled, using raw string")
+    return system_prompt
+
+
 def _make_sdk_cwd(session_id: str) -> str:
     """Create a safe, session-specific working directory path.
 
@@ -801,6 +876,7 @@ def _format_sdk_content_blocks(blocks: list) -> list[dict[str, Any]]:
 
 async def _compress_messages(
     messages: list[ChatMessage],
+    target_tokens: int | None = None,
 ) -> tuple[list[ChatMessage], bool]:
     """Compress a list of messages if they exceed the token threshold.
 
@@ -809,6 +885,10 @@ async def _compress_messages(
     `_compress_messages` and `compact_transcript` share this helper so
     client acquisition and error handling are consistent.
 
+    ``target_tokens`` sets a hard ceiling for the compressed output so
+    callers can enforce a tighter budget on retries.  When ``None``,
+    ``compress_context`` uses the model-aware default.
+
     See also:
         `_run_compression` — shared compression with timeout guards.
         `compact_transcript` — compresses JSONL transcript entries.
@@ -832,7 +912,9 @@ async def _compress_messages(
         messages_dict.append(msg_dict)
 
     try:
-        result = await _run_compression(messages_dict, config.model, "[SDK]")
+        result = await _run_compression(
+            messages_dict, config.model, "[SDK]", target_tokens=target_tokens
+        )
     except Exception as exc:
         # Guard against timeouts or unexpected errors in compression —
         # return the original messages so the caller can proceed without
@@ -961,44 +1043,139 @@ async def _build_query_message(
     use_resume: bool,
     transcript_msg_count: int,
     session_id: str,
+    target_tokens: int | None = None,
 ) -> tuple[str, bool]:
     """Build the query message with appropriate context.
 
+    When ``use_resume=True``, the CLI has the full session via ``--resume``;
+    only a gap-fill prefix is injected when the transcript is stale.
+
+    When ``use_resume=False``, the CLI starts a fresh session with no prior
+    context, so the full prior session is always compressed and injected via
+    ``_format_conversation_context``.  ``compress_context`` handles size
+    reduction internally (LLM summarize → content truncate → middle-out delete
+    → first/last trim).  ``target_tokens`` decreases on each retry to force
+    progressively more aggressive compression when the first attempt exceeds
+    context limits.
+
     Returns:
         Tuple of (query_message, was_compacted).
     """
     msg_count = len(session.messages)
+    prior = session.messages[:-1]  # all turns except the current user message
+
+    logger.info(
+        "[SDK] [%s] Context path: use_resume=%s, transcript_msg_count=%d,"
+        " db_msg_count=%d, target_tokens=%s",
+        session_id[:8],
+        use_resume,
+        transcript_msg_count,
+        msg_count,
+        target_tokens,
+    )
 
     if use_resume and transcript_msg_count > 0:
         if transcript_msg_count < msg_count - 1:
-            gap = session.messages[transcript_msg_count:-1]
-            compressed, was_compressed = await _compress_messages(gap)
+            # Sanity-check the watermark: the last covered position should be
+            # an assistant turn.  A user-role message here means the count is
+            # misaligned (e.g. a message was deleted and DB positions shifted).
+            # Skip the gap rather than injecting wrong context — the CLI session
+            # loaded via --resume still has good history.
+            if prior[transcript_msg_count - 1].role != "assistant":
+                logger.warning(
+                    "[SDK] [%s] Watermark misaligned: prior[%d].role=%r"
+                    " (expected 'assistant') — skipping gap to avoid"
+                    " injecting wrong context (transcript=%d, db=%d)",
+                    session_id[:8],
+                    transcript_msg_count - 1,
+                    prior[transcript_msg_count - 1].role,
+                    transcript_msg_count,
+                    msg_count,
+                )
+                return current_message, False
+            gap = prior[transcript_msg_count:]
+            compressed, was_compressed = await _compress_messages(gap, target_tokens)
             gap_context = _format_conversation_context(compressed)
             if gap_context:
                 logger.info(
                     "[SDK] Transcript stale: covers %d of %d messages, "
-                    "gap=%d (compressed=%s)",
+                    "gap=%d (compressed=%s), gap_context_bytes=%d",
                     transcript_msg_count,
                     msg_count,
                     len(gap),
                     was_compressed,
+                    len(gap_context),
                 )
                 return (
                     f"{gap_context}\n\nNow, the user says:\n{current_message}",
                     was_compressed,
                 )
+            logger.warning(
+                "[SDK] [%s] Transcript stale: gap produced empty context"
+                " (%d msgs, transcript=%d/%d) — sending message without gap prefix",
+                session_id[:8],
+                len(gap),
+                transcript_msg_count,
+                msg_count,
+            )
+        else:
+            logger.info(
+                "[SDK] [%s] --resume covers full context (%d messages)",
+                session_id[:8],
+                transcript_msg_count,
+            )
+        return current_message, False
+
     elif not use_resume and msg_count > 1:
+        # No --resume: the CLI starts a fresh session with no prior context.
+        # Injecting only the post-transcript gap would omit the transcript-covered
+        # prefix entirely, so always compress the full prior session here.
+        # compress_context handles size reduction internally (LLM summarize →
+        # content truncate → middle-out delete → first/last trim).
+
+        # Final escape hatch: if the token budget is at or below the floor,
+        # the model context is so tight that even fully compressed history
+        # would risk a "prompt too long" error.  Return the bare message so
+        # the user always gets a response rather than a hard failure.
+        if target_tokens is not None and target_tokens <= _BARE_MESSAGE_TOKEN_FLOOR:
+            logger.warning(
+                "[SDK] [%s] target_tokens=%d at or below floor (%d) —"
+                " skipping history injection to guarantee response delivery"
+                " (session has %d messages)",
+                session_id[:8],
+                target_tokens,
+                _BARE_MESSAGE_TOKEN_FLOOR,
+                msg_count,
+            )
+            return current_message, False
+
         logger.warning(
-            f"[SDK] Using compression fallback for session "
-            f"{session_id} ({msg_count} messages) — no transcript for --resume"
+            "[SDK] [%s] No --resume for %d-message session — compressing"
+            " full session history (pod affinity issue or first turn after"
+            " restore failure); target_tokens=%s",
+            session_id[:8],
+            msg_count,
+            target_tokens,
         )
-        compressed, was_compressed = await _compress_messages(session.messages[:-1])
+        compressed, was_compressed = await _compress_messages(prior, target_tokens)
         history_context = _format_conversation_context(compressed)
         if history_context:
+            logger.info(
+                "[SDK] [%s] Fallback context built: compressed=%s," " context_bytes=%d",
+                session_id[:8],
+                was_compressed,
+                len(history_context),
+            )
             return (
                 f"{history_context}\n\nNow, the user says:\n{current_message}",
                 was_compressed,
             )
+        logger.warning(
+            "[SDK] [%s] Fallback context empty after compression"
+            " (%d messages) — sending message without history",
+            session_id[:8],
+            len(prior),
+        )
 
     return current_message, False
 
@@ -1927,6 +2104,48 @@ async def _run_stream_attempt(
         )
 
 
+async def _seed_transcript(
+    session: ChatSession,
+    transcript_builder: TranscriptBuilder,
+    transcript_covers_prefix: bool,
+    transcript_msg_count: int,
+    log_prefix: str,
+) -> tuple[str, bool, int]:
+    """Seed the transcript builder from compressed DB messages.
+
+    Called when ``use_resume=False`` and no prior transcript exists in storage
+    so that ``upload_transcript`` saves a compact version for future turns.
+    This ensures the next turn can use the full-session compression path with
+    the benefit of an already-compressed baseline, and a restored CLI session
+    on the next pod gets a usable compact base even for sessions that started
+    on old pods.
+
+    Returns ``(transcript_content, transcript_covers_prefix, transcript_msg_count)``
+    updated values — unchanged if seeding is not possible.
+    """
+    if len(session.messages) <= 1:
+        return "", transcript_covers_prefix, transcript_msg_count
+
+    _prior = session.messages[:-1]
+    _comp, _ = await _compress_messages(_prior, _SEED_TARGET_TOKENS)
+    if not _comp:
+        return "", transcript_covers_prefix, transcript_msg_count
+
+    _seeded = _session_messages_to_transcript(_comp)
+    if not _seeded or not validate_transcript(_seeded):
+        return "", transcript_covers_prefix, transcript_msg_count
+
+    transcript_builder.load_previous(_seeded, log_prefix=log_prefix)
+    logger.info(
+        "%s Seeded transcript from %d compressed DB messages"
+        " for next-turn upload (seed_target_tokens=%d)",
+        log_prefix,
+        len(_comp),
+        _SEED_TARGET_TOKENS,
+    )
+    return _seeded, True, len(_prior)
+
+
 async def stream_chat_completion_sdk(
     session_id: str,
     message: str | None = None,
@@ -2198,9 +2417,20 @@ async def stream_chat_completion_sdk(
                     # Builder loaded but CLI native session not available.
                     # --resume will not be used this turn; upload after turn
                     # will seed the native session for the next turn.
+                    #
+                    # Still record transcript_msg_count so _build_query_message
+                    # can use the transcript-aware gap path (inject only new
+                    # messages since the transcript end) instead of compressing
+                    # the full DB history.  This avoids prompt-too-long on
+                    # large sessions where the CLI session is temporarily
+                    # unavailable (e.g. mixed-version rolling deployment).
+                    transcript_msg_count = dl.message_count
                     logger.info(
-                        "%s CLI session not restored — running without --resume this turn",
+                        "%s CLI session not restored — running without"
+                        " --resume this turn (transcript_msg_count=%d for"
+                        " gap-aware fallback)",
                         log_prefix,
+                        transcript_msg_count,
                     )
             else:
                 logger.warning("%s Transcript downloaded but invalid", log_prefix)
@@ -2295,8 +2525,19 @@ async def stream_chat_completion_sdk(
                     sid,
                 )
 
+        # Use SystemPromptPreset for cross-user prompt caching.
+        # WORKAROUND: CLI 2.1.97 (sdk 0.1.58) exits code 1 when
+        # excludeDynamicSections=True is in the initialize request AND
+        # --resume is active.  Disable the preset on resumed turns.
+        # Turn 1 still gets the preset (no --resume).
+        _cross_user = config.claude_agent_cross_user_prompt_cache and not use_resume
+        system_prompt_value = _build_system_prompt_value(
+            system_prompt,
+            cross_user_cache=_cross_user,
+        )
+
         sdk_options_kwargs: dict[str, Any] = {
-            "system_prompt": system_prompt,
+            "system_prompt": system_prompt_value,
             "mcp_servers": {"copilot": mcp_server},
             "allowed_tools": allowed,
             "disallowed_tools": disallowed,
@@ -2425,6 +2666,22 @@ async def stream_chat_completion_sdk(
         if attachments.hint:
             query_message = f"{query_message}\n\n{attachments.hint}"
 
+        # When running without --resume and no prior transcript in storage,
+        # seed the transcript builder from compressed DB messages so that
+        # upload_transcript saves a compact version for future turns.
+        if not use_resume and not transcript_content and not skip_transcript_upload:
+            (
+                transcript_content,
+                transcript_covers_prefix,
+                transcript_msg_count,
+            ) = await _seed_transcript(
+                session,
+                transcript_builder,
+                transcript_covers_prefix,
+                transcript_msg_count,
+                log_prefix,
+            )
+
         tried_compaction = False
 
         # Build the per-request context carrier (shared across attempts).
@@ -2507,12 +2764,14 @@ async def stream_chat_completion_sdk(
                     session_id,
                     sdk_cwd,
                     log_prefix,
+                    attempt=attempt,
                 )
                 state.transcript_builder = ctx.builder
                 state.use_resume = ctx.use_resume
                 state.resume_file = ctx.resume_file
                 tried_compaction = ctx.tried_compaction
                 state.transcript_msg_count = 0
+                state.target_tokens = ctx.target_tokens
                 if ctx.transcript_lost:
                     skip_transcript_upload = True
 
@@ -2530,9 +2789,18 @@ async def stream_chat_completion_sdk(
                     # T2+ retry without --resume: do not pass --session-id.
                     # The T1 session file already exists at that path; re-using
                     # the same ID would fail with "Session ID already in use".
-                    # The upload guard skips T2+ no-resume turns anyway.
                     sdk_options_kwargs_retry.pop("resume", None)
                     sdk_options_kwargs_retry.pop("session_id", None)
+                # Recompute system_prompt for retry — ctx.use_resume may have
+                # changed (context reduction enabled --resume).  CLI 2.1.97
+                # crashes when excludeDynamicSections=True is combined with
+                # --resume, so disable the cross-user preset on resumed turns.
+                _cross_user_retry = (
+                    config.claude_agent_cross_user_prompt_cache and not ctx.use_resume
+                )
+                sdk_options_kwargs_retry["system_prompt"] = _build_system_prompt_value(
+                    system_prompt, cross_user_cache=_cross_user_retry
+                )
                 state.options = ClaudeAgentOptions(**sdk_options_kwargs_retry)  # type: ignore[arg-type]  # dynamic kwargs
                 state.query_message, state.was_compacted = await _build_query_message(
                     current_message,
@@ -2540,6 +2808,7 @@ async def stream_chat_completion_sdk(
                     state.use_resume,
                     state.transcript_msg_count,
                     session_id,
+                    target_tokens=state.target_tokens,
                 )
                 if attachments.hint:
                     state.query_message = f"{state.query_message}\n\n{attachments.hint}"
@@ -3025,6 +3294,21 @@ async def stream_chat_completion_sdk(
         # the shielded inner coroutine continues running to completion so the
         # upload is not lost.  This is intentional and matches the pattern
         # used for upload_transcript immediately above.
+        #
+        # NOTE: upload is attempted regardless of state.use_resume — even when
+        # this turn ran without --resume (restore failed or first T2+ on a new
+        # pod), the T1 session file at the expected path may still be present
+        # and should be re-uploaded so the next turn can resume from it.
+        # upload_cli_session silently skips when the file is absent, so this is
+        # always safe.
+        #
+        # Intentionally NOT gated on skip_transcript_upload: that flag is set
+        # when our custom JSONL transcript is dropped (transcript_lost=True on
+        # reduced-context retries) but the CLI's native session file is written
+        # independently.  Blocking CLI upload on transcript_lost would prevent
+        # T1 prompt-too-long retries from uploading their valid session file,
+        # breaking --resume on the next pod.  The ended_with_stream_error gate
+        # above already covers actual turn failures.
         if (
             config.claude_agent_use_resume
             and user_id
@@ -3032,9 +3316,15 @@ async def stream_chat_completion_sdk(
             and session is not None
             and state is not None
             and not ended_with_stream_error
-            and not skip_transcript_upload
-            and (not has_history or state.use_resume)
         ):
+            logger.info(
+                "%s Attempting CLI session upload"
+                " (use_resume=%s, has_history=%s, skip_transcript=%s)",
+                log_prefix,
+                state.use_resume,
+                has_history,
+                skip_transcript_upload,
+            )
             try:
                 await asyncio.shield(
                     upload_cli_session(
diff --git a/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py b/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py
index bfc5227bde..5f1487c43b 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py
@@ -15,6 +15,7 @@ from claude_agent_sdk import AssistantMessage, TextBlock, ToolUseBlock
 
 from .conftest import build_test_transcript as _build_transcript
 from .service import (
+    _RETRY_TARGET_TOKENS,
     ReducedContext,
     _is_prompt_too_long,
     _is_tool_only_message,
@@ -208,6 +209,24 @@ class TestReduceContext:
 
         assert ctx.transcript_lost is True
 
+    @pytest.mark.asyncio
+    async def test_drop_returns_target_tokens_attempt_1(self) -> None:
+        ctx = await _reduce_context("", False, "sess-1", "/tmp", "[t]", attempt=1)
+        assert ctx.transcript_lost is True
+        assert ctx.target_tokens == _RETRY_TARGET_TOKENS[0]
+
+    @pytest.mark.asyncio
+    async def test_drop_returns_target_tokens_attempt_2(self) -> None:
+        ctx = await _reduce_context("", False, "sess-1", "/tmp", "[t]", attempt=2)
+        assert ctx.transcript_lost is True
+        assert ctx.target_tokens == _RETRY_TARGET_TOKENS[1]
+
+    @pytest.mark.asyncio
+    async def test_drop_clamps_attempt_beyond_limits(self) -> None:
+        ctx = await _reduce_context("", False, "sess-1", "/tmp", "[t]", attempt=99)
+        assert ctx.transcript_lost is True
+        assert ctx.target_tokens == _RETRY_TARGET_TOKENS[-1]
+
 
 # ---------------------------------------------------------------------------
 # _iter_sdk_messages
diff --git a/autogpt_platform/backend/backend/copilot/sdk/service_test.py b/autogpt_platform/backend/backend/copilot/sdk/service_test.py
index 5eb9981c5b..caa3d1b597 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/service_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service_test.py
@@ -8,7 +8,10 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
+from backend.copilot import config as cfg_mod
+
 from .service import (
+    _build_system_prompt_value,
     _is_sdk_disconnect_error,
     _normalize_model_name,
     _prepare_file_attachments,
@@ -397,6 +400,7 @@ _CONFIG_ENV_VARS = (
     "OPENAI_BASE_URL",
     "CHAT_USE_CLAUDE_CODE_SUBSCRIPTION",
     "CHAT_USE_CLAUDE_AGENT_SDK",
+    "CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE",
 )
 
 
@@ -656,3 +660,62 @@ class TestSafeCloseSdkClient:
         client.__aexit__ = AsyncMock(side_effect=ValueError("invalid argument"))
         with pytest.raises(ValueError, match="invalid argument"):
             await _safe_close_sdk_client(client, "[test]")
+
+
+# ---------------------------------------------------------------------------
+# SystemPromptPreset — cross-user prompt caching
+# ---------------------------------------------------------------------------
+
+
+class TestSystemPromptPreset:
+    """Tests for _build_system_prompt_value — cross-user prompt caching."""
+
+    def test_preset_dict_structure_when_enabled(self):
+        """When cross_user_cache is True, returns a _SystemPromptPreset dict."""
+        custom_prompt = "You are a helpful assistant."
+        result = _build_system_prompt_value(custom_prompt, cross_user_cache=True)
+
+        assert isinstance(result, dict)
+        assert result["type"] == "preset"
+        assert result["preset"] == "claude_code"
+        assert result["append"] == custom_prompt
+        assert result["exclude_dynamic_sections"] is True
+
+    def test_raw_string_when_disabled(self):
+        """When cross_user_cache is False, returns the raw string."""
+        custom_prompt = "You are a helpful assistant."
+        result = _build_system_prompt_value(custom_prompt, cross_user_cache=False)
+
+        assert isinstance(result, str)
+        assert result == custom_prompt
+
+    def test_empty_string_with_cache_enabled(self):
+        """Empty system_prompt with cross_user_cache=True produces append=''."""
+        result = _build_system_prompt_value("", cross_user_cache=True)
+
+        assert isinstance(result, dict)
+        assert result["type"] == "preset"
+        assert result["preset"] == "claude_code"
+        assert result["append"] == ""
+        assert result["exclude_dynamic_sections"] is True
+
+    def test_default_config_is_enabled(self, _clean_config_env):
+        """The default value for claude_agent_cross_user_prompt_cache is True."""
+        cfg = cfg_mod.ChatConfig(
+            use_openrouter=False,
+            api_key=None,
+            base_url=None,
+            use_claude_code_subscription=False,
+        )
+        assert cfg.claude_agent_cross_user_prompt_cache is True
+
+    def test_env_var_disables_cache(self, _clean_config_env, monkeypatch):
+        """CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE=false disables caching."""
+        monkeypatch.setenv("CHAT_CLAUDE_AGENT_CROSS_USER_PROMPT_CACHE", "false")
+        cfg = cfg_mod.ChatConfig(
+            use_openrouter=False,
+            api_key=None,
+            base_url=None,
+            use_claude_code_subscription=False,
+        )
+        assert cfg.claude_agent_cross_user_prompt_cache is False
diff --git a/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py b/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
index bd2932854a..14e404a994 100644
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
@@ -960,7 +960,7 @@ class TestRunCompression:
         )
         call_count = [0]
 
-        async def _compress_side_effect(*, messages, model, client):
+        async def _compress_side_effect(*, messages, model, client, target_tokens=None):
             call_count[0] += 1
             if client is not None:
                 # Simulate a hang that exceeds the timeout
diff --git a/autogpt_platform/backend/backend/copilot/transcript.py b/autogpt_platform/backend/backend/copilot/transcript.py
index a59130c478..a1e11f352d 100644
--- a/autogpt_platform/backend/backend/copilot/transcript.py
+++ b/autogpt_platform/backend/backend/copilot/transcript.py
@@ -1179,6 +1179,7 @@ async def _run_compression(
     messages: list[dict],
     model: str,
     log_prefix: str,
+    target_tokens: int | None = None,
 ) -> CompressResult:
     """Run LLM-based compression with truncation fallback.
 
@@ -1187,6 +1188,12 @@ async def _run_compression(
     truncation-based compression which drops older messages without
     summarization.
 
+    ``target_tokens`` sets a hard token ceiling for the compressed output.
+    When ``None``, ``compress_context`` derives the limit from the model's
+    context window.  Pass a smaller value on retries to force more aggressive
+    compression — the compressor will LLM-summarize, content-truncate,
+    middle-out delete, and first/last trim until the result fits.
+
     A 60-second timeout prevents a hung LLM call from blocking the
     retry path indefinitely.  The truncation fallback also has a
     30-second timeout to guard against slow tokenization on very large
@@ -1196,18 +1203,27 @@ async def _run_compression(
     if client is None:
         logger.warning("%s No OpenAI client configured, using truncation", log_prefix)
         return await asyncio.wait_for(
-            compress_context(messages=messages, model=model, client=None),
+            compress_context(
+                messages=messages, model=model, client=None, target_tokens=target_tokens
+            ),
             timeout=_TRUNCATION_TIMEOUT_SECONDS,
         )
     try:
         return await asyncio.wait_for(
-            compress_context(messages=messages, model=model, client=client),
+            compress_context(
+                messages=messages,
+                model=model,
+                client=client,
+                target_tokens=target_tokens,
+            ),
             timeout=_COMPACTION_TIMEOUT_SECONDS,
         )
     except Exception as e:
         logger.warning("%s LLM compaction failed, using truncation: %s", log_prefix, e)
         return await asyncio.wait_for(
-            compress_context(messages=messages, model=model, client=None),
+            compress_context(
+                messages=messages, model=model, client=None, target_tokens=target_tokens
+            ),
             timeout=_TRUNCATION_TIMEOUT_SECONDS,
         )
 
diff --git a/autogpt_platform/backend/backend/data/credit.py b/autogpt_platform/backend/backend/data/credit.py
index 0959c15d34..24b5aae80d 100644
--- a/autogpt_platform/backend/backend/data/credit.py
+++ b/autogpt_platform/backend/backend/data/credit.py
@@ -349,7 +349,7 @@ class UserCreditBase(ABC):
                 CreditTransactionType.GRANT,
                 CreditTransactionType.TOP_UP,
             ]:
-                from backend.executor.manager import (
+                from backend.executor.billing import (
                     clear_insufficient_funds_notifications,
                 )
 
@@ -554,7 +554,7 @@ class UserCreditBase(ABC):
                 in [CreditTransactionType.GRANT, CreditTransactionType.TOP_UP]
             ):
                 # Lazy import to avoid circular dependency with executor.manager
-                from backend.executor.manager import (
+                from backend.executor.billing import (
                     clear_insufficient_funds_notifications,
                 )
 
diff --git a/autogpt_platform/backend/backend/data/model.py b/autogpt_platform/backend/backend/data/model.py
index f0393133e6..09fdaa6cf8 100644
--- a/autogpt_platform/backend/backend/data/model.py
+++ b/autogpt_platform/backend/backend/data/model.py
@@ -852,6 +852,7 @@ class NodeExecutionStats(BaseModel):
     output_token_count: int = 0
     cache_read_token_count: int = 0
     cache_creation_token_count: int = 0
+    cost: int = 0
     extra_cost: int = 0
     extra_steps: int = 0
     provider_cost: float | None = None
diff --git a/autogpt_platform/backend/backend/data/platform_cost.py b/autogpt_platform/backend/backend/data/platform_cost.py
index ec27572058..aa539bc66b 100644
--- a/autogpt_platform/backend/backend/data/platform_cost.py
+++ b/autogpt_platform/backend/backend/data/platform_cost.py
@@ -8,6 +8,7 @@ from prisma.models import User as PrismaUser
 from prisma.types import PlatformCostLogCreateInput, PlatformCostLogWhereInput
 from pydantic import BaseModel
 
+from backend.data.db import query_raw_with_schema
 from backend.util.cache import cached
 from backend.util.json import SafeJson
 
@@ -142,6 +143,7 @@ class UserCostSummary(BaseModel):
     total_cache_read_tokens: int = 0
     total_cache_creation_tokens: int = 0
     request_count: int
+    cost_bearing_request_count: int = 0
 
 
 class CostLogRow(BaseModel):
@@ -163,12 +165,27 @@ class CostLogRow(BaseModel):
     cache_creation_tokens: int | None = None
 
 
+class CostBucket(BaseModel):
+    bucket: str
+    count: int
+
+
 class PlatformCostDashboard(BaseModel):
     by_provider: list[ProviderCostSummary]
     by_user: list[UserCostSummary]
     total_cost_microdollars: int
     total_requests: int
     total_users: int
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    avg_input_tokens_per_request: float = 0.0
+    avg_output_tokens_per_request: float = 0.0
+    avg_cost_microdollars_per_request: float = 0.0
+    cost_p50_microdollars: float = 0.0
+    cost_p75_microdollars: float = 0.0
+    cost_p95_microdollars: float = 0.0
+    cost_p99_microdollars: float = 0.0
+    cost_buckets: list[CostBucket] = []
 
 
 def _si(row: dict, field: str) -> int:
@@ -228,6 +245,66 @@ def _build_prisma_where(
     return where
 
 
+def _build_raw_where(
+    start: datetime | None,
+    end: datetime | None,
+    provider: str | None,
+    user_id: str | None,
+    model: str | None = None,
+    block_name: str | None = None,
+    tracking_type: str | None = None,
+) -> tuple[str, list]:
+    """Build a parameterised WHERE clause for raw SQL queries.
+
+    Mirrors the filter logic of ``_build_prisma_where`` so there is a single
+    source of truth for which columns are filtered and how. The first clause
+    always restricts to ``cost_usd`` tracking type unless *tracking_type* is
+    explicitly provided by the caller.
+    """
+    params: list = []
+    clauses: list[str] = []
+    idx = 1
+
+    # Always filter by tracking type — defaults to cost_usd for percentile /
+    # bucket queries that only make sense on cost-denominated rows.
+    tt = tracking_type if tracking_type is not None else "cost_usd"
+    clauses.append(f'"trackingType" = ${idx}')
+    params.append(tt)
+    idx += 1
+
+    if start is not None:
+        clauses.append(f'"createdAt" >= ${idx}::timestamptz')
+        params.append(start)
+        idx += 1
+
+    if end is not None:
+        clauses.append(f'"createdAt" <= ${idx}::timestamptz')
+        params.append(end)
+        idx += 1
+
+    if provider is not None:
+        clauses.append(f'"provider" = ${idx}')
+        params.append(provider.lower())
+        idx += 1
+
+    if user_id is not None:
+        clauses.append(f'"userId" = ${idx}')
+        params.append(user_id)
+        idx += 1
+
+    if model is not None:
+        clauses.append(f'"model" = ${idx}')
+        params.append(model)
+        idx += 1
+
+    if block_name is not None:
+        clauses.append(f'LOWER("blockName") = LOWER(${idx})')
+        params.append(block_name)
+        idx += 1
+
+    return (" AND ".join(clauses), params)
+
+
 @cached(ttl_seconds=30)
 async def get_platform_cost_dashboard(
     start: datetime | None = None,
@@ -256,6 +333,14 @@ async def get_platform_cost_dashboard(
         start, end, provider, user_id, model, block_name, tracking_type
     )
 
+    # For per-user tracking-type breakdown we intentionally omit the
+    # tracking_type filter so cost_usd and tokens rows are always present.
+    # This ensures cost_bearing_request_count is correct even when the caller
+    # is filtering the main view by a different tracking_type.
+    where_no_tracking_type = _build_prisma_where(
+        start, end, provider, user_id, model, block_name, tracking_type=None
+    )
+
     sum_fields = {
         "costMicrodollars": True,
         "inputTokens": True,
@@ -266,13 +351,18 @@ async def get_platform_cost_dashboard(
         "trackingAmount": True,
     }
 
-    # Run all four aggregation queries in parallel.
-    (
-        by_provider_groups,
-        by_user_groups,
-        total_user_groups,
-        total_agg_groups,
-    ) = await asyncio.gather(
+    # Build parameterised WHERE clause for the raw SQL percentile/bucket
+    # queries.  Uses _build_raw_where so filter logic is shared with
+    # _build_prisma_where and only maintained in one place.
+    # Always force tracking_type=None here so _build_raw_where defaults to
+    # "cost_usd" — percentile and histogram queries only make sense on
+    # cost-denominated rows, regardless of what the caller is filtering.
+    raw_where, raw_params = _build_raw_where(
+        start, end, provider, user_id, model, block_name, tracking_type=None
+    )
+
+    # Queries that always run regardless of tracking_type filter.
+    common_queries = [
         # (provider, trackingType, model) aggregation — no ORDER BY in ORM;
         # sort by total cost descending in Python after fetch.
         PrismaLog.prisma().group_by(
@@ -288,20 +378,125 @@ async def get_platform_cost_dashboard(
             sum=sum_fields,
             count=True,
         ),
+        # Per-user cost-bearing request count: group by (userId, trackingType)
+        # so we can compute the correct denominator for per-user avg cost.
+        # Uses where_no_tracking_type so cost_usd rows are always included
+        # even when the caller filters the main view by a different tracking_type.
+        PrismaLog.prisma().group_by(
+            by=["userId", "trackingType"],
+            where=where_no_tracking_type,
+            count=True,
+        ),
         # Distinct user count: group by userId, count groups.
         PrismaLog.prisma().group_by(
             by=["userId"],
             where=where,
             count=True,
         ),
-        # Total aggregate: group by provider (no limit) to sum across all
-        # matching rows. Summed in Python to get grand totals.
+        # Total aggregate (filtered): group by (provider, trackingType) so we can
+        # compute cost-bearing and token-bearing denominators for avg stats.
         PrismaLog.prisma().group_by(
-            by=["provider"],
+            by=["provider", "trackingType"],
             where=where,
-            sum={"costMicrodollars": True},
+            sum={
+                "costMicrodollars": True,
+                "inputTokens": True,
+                "outputTokens": True,
+            },
             count=True,
         ),
+        # Percentile distribution of cost per request (respects all filters).
+        query_raw_with_schema(
+            "SELECT"
+            "  percentile_cont(0.5) WITHIN GROUP"
+            '    (ORDER BY "costMicrodollars") as p50,'
+            "  percentile_cont(0.75) WITHIN GROUP"
+            '    (ORDER BY "costMicrodollars") as p75,'
+            "  percentile_cont(0.95) WITHIN GROUP"
+            '    (ORDER BY "costMicrodollars") as p95,'
+            "  percentile_cont(0.99) WITHIN GROUP"
+            '    (ORDER BY "costMicrodollars") as p99'
+            ' FROM {schema_prefix}"PlatformCostLog"'
+            f" WHERE {raw_where}",
+            *raw_params,
+        ),
+        # Histogram buckets for cost distribution (respects all filters).
+        # NULL costMicrodollars is excluded explicitly to prevent such rows
+        # from falling through all WHEN clauses into the ELSE '$10+' bucket.
+        query_raw_with_schema(
+            "SELECT"
+            "  CASE"
+            '    WHEN "costMicrodollars" < 500000'
+            "      THEN '$0-0.50'"
+            '    WHEN "costMicrodollars" < 1000000'
+            "      THEN '$0.50-1'"
+            '    WHEN "costMicrodollars" < 2000000'
+            "      THEN '$1-2'"
+            '    WHEN "costMicrodollars" < 5000000'
+            "      THEN '$2-5'"
+            '    WHEN "costMicrodollars" < 10000000'
+            "      THEN '$5-10'"
+            "    ELSE '$10+'"
+            "  END as bucket,"
+            "  COUNT(*) as count"
+            ' FROM {schema_prefix}"PlatformCostLog"'
+            f' WHERE {raw_where} AND "costMicrodollars" IS NOT NULL'
+            " GROUP BY bucket"
+            ' ORDER BY MIN("costMicrodollars")',
+            *raw_params,
+        ),
+    ]
+
+    # Only run the unfiltered aggregate query when tracking_type is set;
+    # when tracking_type is None, the filtered query already contains all
+    # tracking types and reusing it avoids a redundant full aggregation.
+    if tracking_type is not None:
+        common_queries.append(
+            # Total aggregate (no tracking_type filter): used to compute
+            # cost_bearing_requests and token_bearing_requests denominators so
+            # global avg stats remain meaningful when the caller filters the
+            # main view by a specific tracking_type (e.g. 'tokens').
+            PrismaLog.prisma().group_by(
+                by=["provider", "trackingType"],
+                where=where_no_tracking_type,
+                sum={
+                    "costMicrodollars": True,
+                    "inputTokens": True,
+                    "outputTokens": True,
+                },
+                count=True,
+            )
+        )
+
+    results = await asyncio.gather(*common_queries)
+
+    # Unpack results by name for clarity.
+    by_provider_groups = results[0]
+    by_user_groups = results[1]
+    by_user_tracking_groups = results[2]
+    total_user_groups = results[3]
+    total_agg_groups = results[4]
+    percentile_rows = results[5]
+    bucket_rows = results[6]
+    # When tracking_type is None, the filtered and unfiltered queries are
+    # identical — reuse total_agg_groups to avoid the extra DB round-trip.
+    total_agg_no_tracking_type_groups = (
+        results[7] if tracking_type is not None else total_agg_groups
+    )
+
+    # Compute token grand-totals from the unfiltered aggregate so they remain
+    # consistent with the avg-token stats (which also use unfiltered data).
+    # Using by_provider_groups here would give 0 tokens when tracking_type='cost_usd'
+    # because cost_usd rows carry no token data, contradicting non-zero averages.
+    total_input_tokens = sum(
+        _si(r, "inputTokens")
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "tokens"
+    )
+    total_output_tokens = sum(
+        _si(r, "outputTokens")
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "tokens"
     )
 
     # Sort by_provider by total cost descending and cap at MAX_PROVIDER_ROWS.
@@ -328,6 +523,61 @@ async def get_platform_cost_dashboard(
     total_cost = sum(_si(r, "costMicrodollars") for r in total_agg_groups)
     total_requests = sum(_ca(r) for r in total_agg_groups)
 
+    # Extract percentile values from the raw query result.
+    pctl = percentile_rows[0] if percentile_rows else {}
+    cost_p50 = float(pctl.get("p50") or 0)
+    cost_p75 = float(pctl.get("p75") or 0)
+    cost_p95 = float(pctl.get("p95") or 0)
+    cost_p99 = float(pctl.get("p99") or 0)
+
+    # Build cost bucket list.
+    cost_buckets: list[CostBucket] = [
+        CostBucket(bucket=r["bucket"], count=int(r["count"])) for r in bucket_rows
+    ]
+
+    # Avg-stat numerators and denominators are derived from the unfiltered
+    # aggregate so they remain meaningful when the caller filters by a specific
+    # tracking_type.  Example: filtering by 'tokens' excludes cost_usd rows from
+    # total_agg_groups, so avg_cost would always be 0 if we used that; using
+    # total_agg_no_tracking_type_groups gives the correct cost_usd total/count.
+    avg_cost_total = sum(
+        _si(r, "costMicrodollars")
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "cost_usd"
+    )
+    cost_bearing_requests = sum(
+        _ca(r)
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "cost_usd"
+    )
+    avg_input_total = sum(
+        _si(r, "inputTokens")
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "tokens"
+    )
+    avg_output_total = sum(
+        _si(r, "outputTokens")
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "tokens"
+    )
+    # Token-bearing request count: only rows where trackingType == "tokens".
+    # Token averages must use this denominator; cost_usd rows do not carry tokens.
+    token_bearing_requests = sum(
+        _ca(r)
+        for r in total_agg_no_tracking_type_groups
+        if r.get("trackingType") == "tokens"
+    )
+
+    # Per-user cost-bearing request count: used for per-user avg cost so the
+    # denominator matches the numerator (cost_usd rows only, per user).
+    user_cost_bearing_counts: dict[str, int] = {}
+    for r in by_user_tracking_groups:
+        if r.get("trackingType") == "cost_usd" and r.get("userId"):
+            uid = r["userId"]
+            user_cost_bearing_counts[uid] = user_cost_bearing_counts.get(uid, 0) + _ca(
+                r
+            )
+
     return PlatformCostDashboard(
         by_provider=[
             ProviderCostSummary(
@@ -355,12 +605,35 @@ async def get_platform_cost_dashboard(
                 total_cache_read_tokens=_si(r, "cacheReadTokens"),
                 total_cache_creation_tokens=_si(r, "cacheCreationTokens"),
                 request_count=_ca(r),
+                cost_bearing_request_count=user_cost_bearing_counts.get(
+                    r.get("userId") or "", 0
+                ),
             )
             for r in by_user_groups
         ],
         total_cost_microdollars=total_cost,
         total_requests=total_requests,
         total_users=total_users,
+        total_input_tokens=total_input_tokens,
+        total_output_tokens=total_output_tokens,
+        avg_input_tokens_per_request=(
+            avg_input_total / token_bearing_requests
+            if token_bearing_requests > 0
+            else 0.0
+        ),
+        avg_output_tokens_per_request=(
+            avg_output_total / token_bearing_requests
+            if token_bearing_requests > 0
+            else 0.0
+        ),
+        avg_cost_microdollars_per_request=(
+            avg_cost_total / cost_bearing_requests if cost_bearing_requests > 0 else 0.0
+        ),
+        cost_p50_microdollars=cost_p50,
+        cost_p75_microdollars=cost_p75,
+        cost_p95_microdollars=cost_p95,
+        cost_p99_microdollars=cost_p99,
+        cost_buckets=cost_buckets,
     )
 
 
diff --git a/autogpt_platform/backend/backend/data/platform_cost_test.py b/autogpt_platform/backend/backend/data/platform_cost_test.py
index 4a2372628b..ad15fb425b 100644
--- a/autogpt_platform/backend/backend/data/platform_cost_test.py
+++ b/autogpt_platform/backend/backend/data/platform_cost_test.py
@@ -10,6 +10,8 @@ from backend.util.json import SafeJson
 
 from .platform_cost import (
     PlatformCostEntry,
+    _build_prisma_where,
+    _build_raw_where,
     _build_where,
     _mask_email,
     get_platform_cost_dashboard,
@@ -156,6 +158,84 @@ class TestBuildWhere:
         assert 'p."trackingType" = $3' in sql
 
 
+class TestBuildPrismaWhere:
+    def test_both_start_and_end(self):
+        start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        end = datetime(2026, 6, 1, tzinfo=timezone.utc)
+        where = _build_prisma_where(start, end, None, None)
+        assert where["createdAt"] == {"gte": start, "lte": end}
+
+    def test_end_only(self):
+        end = datetime(2026, 6, 1, tzinfo=timezone.utc)
+        where = _build_prisma_where(None, end, None, None)
+        assert where["createdAt"] == {"lte": end}
+
+    def test_start_only(self):
+        start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        where = _build_prisma_where(start, None, None, None)
+        assert where["createdAt"] == {"gte": start}
+
+    def test_no_filters(self):
+        where = _build_prisma_where(None, None, None, None)
+        assert "createdAt" not in where
+
+    def test_provider_lowercased(self):
+        where = _build_prisma_where(None, None, "OpenAI", None)
+        assert where["provider"] == "openai"
+
+    def test_model_filter(self):
+        where = _build_prisma_where(None, None, None, None, model="gpt-4")
+        assert where["model"] == "gpt-4"
+
+    def test_block_name_case_insensitive(self):
+        where = _build_prisma_where(None, None, None, None, block_name="LLMBlock")
+        assert where["blockName"] == {"equals": "LLMBlock", "mode": "insensitive"}
+
+    def test_tracking_type(self):
+        where = _build_prisma_where(None, None, None, None, tracking_type="tokens")
+        assert where["trackingType"] == "tokens"
+
+
+class TestBuildRawWhere:
+    def test_end_filter(self):
+        end = datetime(2026, 6, 1, tzinfo=timezone.utc)
+        sql, params = _build_raw_where(None, end, None, None)
+        assert '"createdAt" <= $2::timestamptz' in sql
+        assert end in params
+
+    def test_model_filter(self):
+        sql, params = _build_raw_where(None, None, None, None, model="gpt-4")
+        assert '"model" = $' in sql
+        assert "gpt-4" in params
+
+    def test_block_name_filter(self):
+        sql, params = _build_raw_where(None, None, None, None, block_name="LLMBlock")
+        assert 'LOWER("blockName") = LOWER($' in sql
+        assert "LLMBlock" in params
+
+    def test_all_filters_combined(self):
+        start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        end = datetime(2026, 6, 1, tzinfo=timezone.utc)
+        sql, params = _build_raw_where(
+            start, end, "anthropic", "u1", model="claude-3", block_name="LLM"
+        )
+        # trackingType (default), start, end, provider, user_id, model, block_name
+        assert len(params) == 7
+        assert "anthropic" in params
+        assert "u1" in params
+        assert "claude-3" in params
+        assert "LLM" in params
+
+    def test_default_tracking_type_is_cost_usd(self):
+        sql, params = _build_raw_where(None, None, None, None)
+        assert '"trackingType" = $1' in sql
+        assert params[0] == "cost_usd"
+
+    def test_explicit_tracking_type_overrides_default(self):
+        sql, params = _build_raw_where(None, None, None, None, tracking_type="tokens")
+        assert params[0] == "tokens"
+
+
 def _make_entry(**overrides: object) -> PlatformCostEntry:
     return PlatformCostEntry.model_validate(
         {
@@ -286,8 +366,9 @@ class TestGetPlatformCostDashboard:
             side_effect=[
                 [provider_row],  # by_provider
                 [user_row],  # by_user
+                [],  # by_user_tracking_groups (no cost_usd rows for this user)
                 [{"userId": "u1"}],  # distinct users
-                [provider_row],  # total agg
+                [provider_row],  # total agg (tracking_type=None → same as unfiltered)
             ]
         )
         mock_actions.find_many = AsyncMock(return_value=[mock_user])
@@ -301,6 +382,14 @@ class TestGetPlatformCostDashboard:
                 "backend.data.platform_cost.PrismaUser.prisma",
                 return_value=mock_actions,
             ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                new_callable=AsyncMock,
+                side_effect=[
+                    [{"p50": 1000, "p75": 2000, "p95": 4000, "p99": 5000}],
+                    [{"bucket": "$0-0.50", "count": 3}],
+                ],
+            ),
         ):
             dashboard = await get_platform_cost_dashboard()
 
@@ -313,6 +402,131 @@ class TestGetPlatformCostDashboard:
         assert dashboard.by_provider[0].total_duration_seconds == 10.5
         assert len(dashboard.by_user) == 1
         assert dashboard.by_user[0].email == "a***@b.com"
+        assert dashboard.cost_p50_microdollars == 1000
+        assert dashboard.cost_p75_microdollars == 2000
+        assert dashboard.cost_p95_microdollars == 4000
+        assert dashboard.cost_p99_microdollars == 5000
+        assert len(dashboard.cost_buckets) == 1
+        # total_input/output_tokens come from total_agg_no_tracking_type_groups
+        # (provider_row has 1000/500)
+        assert dashboard.total_input_tokens == 1000
+        assert dashboard.total_output_tokens == 500
+        # Token averages must use token_bearing_requests (3) not cost_bearing (0)
+        assert dashboard.avg_input_tokens_per_request == pytest.approx(1000 / 3)
+        assert dashboard.avg_output_tokens_per_request == pytest.approx(500 / 3)
+        # No cost_usd rows in total_agg → avg_cost should be 0
+        assert dashboard.avg_cost_microdollars_per_request == 0.0
+
+    @pytest.mark.asyncio
+    async def test_cost_bearing_request_count_nonzero_when_filtering_by_tokens(self):
+        """When filtering by tracking_type='tokens', cost_bearing_request_count
+        must still reflect cost_usd rows because by_user_tracking_groups is
+        queried without the tracking_type constraint."""
+        # total_agg only has a tokens row (because of the tracking_type filter)
+        total_row = _make_group_by_row(
+            provider="openai", tracking_type="tokens", cost=0, count=5
+        )
+        # by_user_tracking_groups returns BOTH rows (no tracking_type filter)
+        user_tracking_cost_usd_row = {
+            "_count": {"_all": 7},
+            "userId": "u1",
+            "trackingType": "cost_usd",
+        }
+        user_tracking_tokens_row = {
+            "_count": {"_all": 5},
+            "userId": "u1",
+            "trackingType": "tokens",
+        }
+
+        mock_actions = MagicMock()
+        mock_actions.group_by = AsyncMock(
+            side_effect=[
+                [total_row],  # by_provider
+                [{"_sum": {}, "_count": {"_all": 5}, "userId": "u1"}],  # by_user
+                [
+                    user_tracking_cost_usd_row,
+                    user_tracking_tokens_row,
+                ],  # by_user_tracking
+                [{"userId": "u1"}],  # distinct users
+                [total_row],  # total agg (filtered)
+                [total_row],  # total agg (no tracking_type filter)
+            ]
+        )
+        mock_actions.find_many = AsyncMock(return_value=[])
+
+        with (
+            patch(
+                "backend.data.platform_cost.PrismaLog.prisma",
+                return_value=mock_actions,
+            ),
+            patch(
+                "backend.data.platform_cost.PrismaUser.prisma",
+                return_value=mock_actions,
+            ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                new_callable=AsyncMock,
+                side_effect=[[], []],
+            ),
+        ):
+            dashboard = await get_platform_cost_dashboard(tracking_type="tokens")
+
+        # by_user has 1 user with 5 total requests (tokens rows only due to filter)
+        # but per-user cost_bearing count should be 7 (from cost_usd rows in
+        # by_user_tracking_groups which uses where_no_tracking_type)
+        assert len(dashboard.by_user) == 1
+        assert dashboard.by_user[0].cost_bearing_request_count == 7
+
+    @pytest.mark.asyncio
+    async def test_global_avg_cost_nonzero_when_filtering_by_tokens(self):
+        """When filtering by tracking_type='tokens', avg_cost_microdollars_per_request
+        must still reflect cost_usd rows from total_agg_no_tracking_type_groups,
+        not the filtered total_agg_groups which only has tokens rows."""
+        # filtered total_agg only has tokens rows (zero cost)
+        tokens_row = _make_group_by_row(
+            provider="openai", tracking_type="tokens", cost=0, count=5
+        )
+        # unfiltered total_agg has both rows (cost_usd carries the actual cost)
+        cost_usd_row = _make_group_by_row(
+            provider="openai", tracking_type="cost_usd", cost=10_000, count=4
+        )
+
+        mock_actions = MagicMock()
+        mock_actions.group_by = AsyncMock(
+            side_effect=[
+                [tokens_row],  # by_provider
+                [{"_sum": {}, "_count": {"_all": 5}, "userId": "u1"}],  # by_user
+                [],  # by_user_tracking_groups
+                [{"userId": "u1"}],  # distinct users
+                [tokens_row],  # total agg (filtered — tokens only)
+                [tokens_row, cost_usd_row],  # total agg (no tracking_type filter)
+            ]
+        )
+        mock_actions.find_many = AsyncMock(return_value=[])
+
+        with (
+            patch(
+                "backend.data.platform_cost.PrismaLog.prisma",
+                return_value=mock_actions,
+            ),
+            patch(
+                "backend.data.platform_cost.PrismaUser.prisma",
+                return_value=mock_actions,
+            ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                new_callable=AsyncMock,
+                side_effect=[[], []],
+            ),
+        ):
+            dashboard = await get_platform_cost_dashboard(tracking_type="tokens")
+
+        # avg_cost_microdollars_per_request must be non-zero: cost_usd row
+        # (10_000 microdollars, 4 requests) is present in the unfiltered agg.
+        assert dashboard.avg_cost_microdollars_per_request == pytest.approx(10_000 / 4)
+        # avg token stats use token_bearing_requests from unfiltered agg (5)
+        assert dashboard.avg_input_tokens_per_request == pytest.approx(1000 / 5)
+        assert dashboard.avg_output_tokens_per_request == pytest.approx(500 / 5)
 
     @pytest.mark.asyncio
     async def test_cache_tokens_aggregated_not_hardcoded(self):
@@ -335,8 +549,9 @@ class TestGetPlatformCostDashboard:
             side_effect=[
                 [provider_row],  # by_provider
                 [user_row],  # by_user
+                [],  # by_user_tracking_groups
                 [{"userId": "u2"}],  # distinct users
-                [provider_row],  # total agg
+                [provider_row],  # total agg (tracking_type=None → same as unfiltered)
             ]
         )
         mock_actions.find_many = AsyncMock(return_value=[])
@@ -350,6 +565,14 @@ class TestGetPlatformCostDashboard:
                 "backend.data.platform_cost.PrismaUser.prisma",
                 return_value=mock_actions,
             ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                new_callable=AsyncMock,
+                side_effect=[
+                    [{"p50": 0, "p75": 0, "p95": 0, "p99": 0}],
+                    [],
+                ],
+            ),
         ):
             dashboard = await get_platform_cost_dashboard()
 
@@ -361,7 +584,7 @@ class TestGetPlatformCostDashboard:
     @pytest.mark.asyncio
     async def test_returns_empty_dashboard(self):
         mock_actions = MagicMock()
-        mock_actions.group_by = AsyncMock(side_effect=[[], [], [], []])
+        mock_actions.group_by = AsyncMock(side_effect=[[], [], [], [], []])
         mock_actions.find_many = AsyncMock(return_value=[])
 
         with (
@@ -373,6 +596,11 @@ class TestGetPlatformCostDashboard:
                 "backend.data.platform_cost.PrismaUser.prisma",
                 return_value=mock_actions,
             ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                new_callable=AsyncMock,
+                side_effect=[[], []],
+            ),
         ):
             dashboard = await get_platform_cost_dashboard()
 
@@ -381,13 +609,56 @@ class TestGetPlatformCostDashboard:
         assert dashboard.total_users == 0
         assert dashboard.by_provider == []
         assert dashboard.by_user == []
+        assert dashboard.cost_p50_microdollars == 0
+        assert dashboard.cost_buckets == []
 
     @pytest.mark.asyncio
     async def test_passes_filters_to_queries(self):
         start = datetime(2026, 1, 1, tzinfo=timezone.utc)
 
         mock_actions = MagicMock()
-        mock_actions.group_by = AsyncMock(side_effect=[[], [], [], []])
+        mock_actions.group_by = AsyncMock(side_effect=[[], [], [], [], []])
+        mock_actions.find_many = AsyncMock(return_value=[])
+
+        raw_mock = AsyncMock(side_effect=[[], []])
+        with (
+            patch(
+                "backend.data.platform_cost.PrismaLog.prisma",
+                return_value=mock_actions,
+            ),
+            patch(
+                "backend.data.platform_cost.PrismaUser.prisma",
+                return_value=mock_actions,
+            ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                raw_mock,
+            ),
+        ):
+            await get_platform_cost_dashboard(
+                start=start, provider="openai", user_id="u1"
+            )
+
+        # group_by called 5 times (by_provider, by_user, by_user_tracking, distinct users,
+        # total agg filtered); the 6th call (total agg no-tracking-type) only runs
+        # when tracking_type is set.
+        assert mock_actions.group_by.await_count == 5
+        # The where dict passed to the first call should include createdAt
+        first_call_kwargs = mock_actions.group_by.call_args_list[0][1]
+        assert "createdAt" in first_call_kwargs.get("where", {})
+        # Raw SQL queries should receive provider and user_id as parameters
+        assert raw_mock.await_count == 2
+        raw_call_args = raw_mock.call_args_list[0][0]  # positional args of 1st call
+        raw_params = raw_call_args[1:]  # first arg is the query template
+        assert "openai" in raw_params
+        assert "u1" in raw_params
+
+    @pytest.mark.asyncio
+    async def test_user_tracking_groups_excludes_tracking_type_filter(self):
+        """by_user_tracking_groups must NOT apply the tracking_type filter so that
+        cost_usd rows are always included even when the caller filters by 'tokens'."""
+        mock_actions = MagicMock()
+        mock_actions.group_by = AsyncMock(side_effect=[[], [], [], [], [], []])
         mock_actions.find_many = AsyncMock(return_value=[])
 
         with (
@@ -399,16 +670,23 @@ class TestGetPlatformCostDashboard:
                 "backend.data.platform_cost.PrismaUser.prisma",
                 return_value=mock_actions,
             ),
+            patch(
+                "backend.data.platform_cost.query_raw_with_schema",
+                new_callable=AsyncMock,
+                side_effect=[[], []],
+            ),
         ):
-            await get_platform_cost_dashboard(
-                start=start, provider="openai", user_id="u1"
-            )
+            await get_platform_cost_dashboard(tracking_type="tokens")
 
-        # group_by called 4 times (by_provider, by_user, distinct users, totals)
-        assert mock_actions.group_by.await_count == 4
-        # The where dict passed to the first call should include createdAt
-        first_call_kwargs = mock_actions.group_by.call_args_list[0][1]
-        assert "createdAt" in first_call_kwargs.get("where", {})
+        # Call index 2 is by_user_tracking_groups (0=by_provider, 1=by_user,
+        # 2=by_user_tracking, 3=distinct_users, 4=total_agg, 5=total_agg_no_tt).
+        tracking_call_where = mock_actions.group_by.call_args_list[2][1]["where"]
+        # The main filter applies trackingType; by_user_tracking must NOT.
+        assert "trackingType" not in tracking_call_where
+        # Other filters (e.g., date range, provider) are still passed through.
+        # The first call (by_provider) should have trackingType in its where dict.
+        provider_call_where = mock_actions.group_by.call_args_list[0][1]["where"]
+        assert "trackingType" in provider_call_where
 
 
 def _make_prisma_log_row(
diff --git a/autogpt_platform/backend/backend/executor/billing.py b/autogpt_platform/backend/backend/executor/billing.py
new file mode 100644
index 0000000000..24bdec2c5c
--- /dev/null
+++ b/autogpt_platform/backend/backend/executor/billing.py
@@ -0,0 +1,509 @@
+import asyncio
+import logging
+from typing import TYPE_CHECKING, Any, cast
+
+from backend.blocks import get_block
+from backend.blocks._base import Block
+from backend.blocks.io import AgentOutputBlock
+from backend.data import redis_client as redis
+from backend.data.credit import UsageTransactionMetadata
+from backend.data.execution import (
+    ExecutionStatus,
+    GraphExecutionEntry,
+    NodeExecutionEntry,
+)
+from backend.data.graph import Node
+from backend.data.model import GraphExecutionStats, NodeExecutionStats
+from backend.data.notifications import (
+    AgentRunData,
+    LowBalanceData,
+    NotificationEventModel,
+    NotificationType,
+    ZeroBalanceData,
+)
+from backend.notifications.notifications import queue_notification
+from backend.util.clients import (
+    get_database_manager_client,
+    get_notification_manager_client,
+)
+from backend.util.exceptions import InsufficientBalanceError
+from backend.util.logging import TruncatedLogger
+from backend.util.metrics import DiscordChannel
+from backend.util.settings import Settings
+
+from .utils import LogMetadata, block_usage_cost, execution_usage_cost
+
+if TYPE_CHECKING:
+    from backend.data.db_manager import DatabaseManagerClient
+
+_logger = logging.getLogger(__name__)
+logger = TruncatedLogger(_logger, prefix="[Billing]")
+settings = Settings()
+
+# Redis key prefix for tracking insufficient funds Discord notifications.
+# We only send one notification per user per agent until they top up credits.
+INSUFFICIENT_FUNDS_NOTIFIED_PREFIX = "insufficient_funds_discord_notified"
+# TTL for the notification flag (30 days) - acts as a fallback cleanup
+INSUFFICIENT_FUNDS_NOTIFIED_TTL_SECONDS = 30 * 24 * 60 * 60
+
+# Hard cap on the multiplier passed to charge_extra_runtime_cost to
+# protect against a corrupted llm_call_count draining a user's balance.
+# Real agent-mode runs are bounded by agent_mode_max_iterations (~50);
+# 200 leaves headroom while preventing runaway charges.
+_MAX_EXTRA_RUNTIME_COST = 200
+
+
+def get_db_client() -> "DatabaseManagerClient":
+    return get_database_manager_client()
+
+
+async def clear_insufficient_funds_notifications(user_id: str) -> int:
+    """
+    Clear all insufficient funds notification flags for a user.
+
+    This should be called when a user tops up their credits, allowing
+    Discord notifications to be sent again if they run out of funds.
+
+    Args:
+        user_id: The user ID to clear notifications for.
+
+    Returns:
+        The number of keys that were deleted.
+    """
+    try:
+        redis_client = await redis.get_redis_async()
+        pattern = f"{INSUFFICIENT_FUNDS_NOTIFIED_PREFIX}:{user_id}:*"
+        keys = [key async for key in redis_client.scan_iter(match=pattern)]
+        if keys:
+            return await redis_client.delete(*keys)
+        return 0
+    except Exception as e:
+        logger.warning(
+            f"Failed to clear insufficient funds notification flags for user "
+            f"{user_id}: {e}"
+        )
+        return 0
+
+
+def resolve_block_cost(
+    node_exec: NodeExecutionEntry,
+) -> tuple["Block | None", int, dict[str, Any]]:
+    """Look up the block and compute its base usage cost for an exec.
+
+    Shared by charge_usage and charge_extra_runtime_cost so the
+    (get_block, block_usage_cost) lookup lives in exactly one place.
+    Returns ``(block, cost, matching_filter)``. ``block`` is ``None`` if
+    the block id can't be resolved — callers should treat that as
+    "nothing to charge".
+    """
+    block = get_block(node_exec.block_id)
+    if not block:
+        logger.error(f"Block {node_exec.block_id} not found.")
+        return None, 0, {}
+    cost, matching_filter = block_usage_cost(block=block, input_data=node_exec.inputs)
+    return block, cost, matching_filter
+
+
+def charge_usage(
+    node_exec: NodeExecutionEntry,
+    execution_count: int,
+) -> tuple[int, int]:
+    total_cost = 0
+    remaining_balance = 0
+    db_client = get_db_client()
+    block, cost, matching_filter = resolve_block_cost(node_exec)
+    if not block:
+        return total_cost, 0
+
+    if cost > 0:
+        remaining_balance = db_client.spend_credits(
+            user_id=node_exec.user_id,
+            cost=cost,
+            metadata=UsageTransactionMetadata(
+                graph_exec_id=node_exec.graph_exec_id,
+                graph_id=node_exec.graph_id,
+                node_exec_id=node_exec.node_exec_id,
+                node_id=node_exec.node_id,
+                block_id=node_exec.block_id,
+                block=block.name,
+                input=matching_filter,
+                reason=f"Ran block {node_exec.block_id} {block.name}",
+            ),
+        )
+        total_cost += cost
+
+    # execution_count=0 is used by charge_node_usage for nested tool calls
+    # which must not be pushed into higher execution-count tiers.
+    # execution_usage_cost(0) would trigger a charge because 0 % threshold == 0,
+    # so skip it entirely when execution_count is 0.
+    cost, usage_count = (
+        execution_usage_cost(execution_count) if execution_count > 0 else (0, 0)
+    )
+    if cost > 0:
+        remaining_balance = db_client.spend_credits(
+            user_id=node_exec.user_id,
+            cost=cost,
+            metadata=UsageTransactionMetadata(
+                graph_exec_id=node_exec.graph_exec_id,
+                graph_id=node_exec.graph_id,
+                input={
+                    "execution_count": usage_count,
+                    "charge": "Execution Cost",
+                },
+                reason=f"Execution Cost for {usage_count} blocks of ex_id:{node_exec.graph_exec_id} g_id:{node_exec.graph_id}",
+            ),
+        )
+        total_cost += cost
+
+    return total_cost, remaining_balance
+
+
+def _charge_extra_runtime_cost_sync(
+    node_exec: NodeExecutionEntry,
+    capped_count: int,
+) -> tuple[int, int]:
+    """Synchronous implementation — runs in a thread-pool worker.
+
+    Called only from charge_extra_runtime_cost. Do not call directly from
+    async code.
+
+    Note: ``resolve_block_cost`` is called again here (rather than reusing
+    the result from ``charge_usage`` at the start of execution) because the
+    two calls happen in separate thread-pool workers and sharing mutable
+    state across workers would require locks. The block config is immutable
+    during a run, so the repeated lookup is safe and produces the same cost;
+    the only overhead is an extra registry lookup.
+    """
+    db_client = get_db_client()
+    block, cost, matching_filter = resolve_block_cost(node_exec)
+    if not block or cost <= 0:
+        return 0, 0
+    total_extra_cost = cost * capped_count
+    remaining_balance = db_client.spend_credits(
+        user_id=node_exec.user_id,
+        cost=total_extra_cost,
+        metadata=UsageTransactionMetadata(
+            graph_exec_id=node_exec.graph_exec_id,
+            graph_id=node_exec.graph_id,
+            node_exec_id=node_exec.node_exec_id,
+            node_id=node_exec.node_id,
+            block_id=node_exec.block_id,
+            block=block.name,
+            input={
+                **matching_filter,
+                "extra_runtime_cost_count": capped_count,
+            },
+            reason=(
+                f"Extra agent-mode iterations for {block.name} "
+                f"({capped_count} additional LLM calls)"
+            ),
+        ),
+    )
+    return total_extra_cost, remaining_balance
+
+
+async def charge_extra_runtime_cost(
+    node_exec: NodeExecutionEntry,
+    extra_count: int,
+) -> tuple[int, int]:
+    """Charge a block extra runtime cost beyond the initial run.
+
+    Used by agent-mode blocks (e.g. OrchestratorBlock) that make multiple
+    LLM calls within a single node execution. The first iteration is already
+    charged by charge_usage; this method charges *extra_count* additional
+    copies of the block's base cost.
+
+    Returns ``(total_extra_cost, remaining_balance)``. May raise
+    ``InsufficientBalanceError`` if the user can't afford the charge.
+    """
+    if extra_count <= 0:
+        return 0, 0
+    # Cap to protect against a corrupted llm_call_count.
+    capped = min(extra_count, _MAX_EXTRA_RUNTIME_COST)
+    if extra_count > _MAX_EXTRA_RUNTIME_COST:
+        logger.warning(
+            f"extra_count {extra_count} exceeds cap {_MAX_EXTRA_RUNTIME_COST};"
+            f" charging {_MAX_EXTRA_RUNTIME_COST} (llm_call_count may be corrupted)"
+        )
+    return await asyncio.to_thread(_charge_extra_runtime_cost_sync, node_exec, capped)
+
+
+async def charge_node_usage(node_exec: NodeExecutionEntry) -> tuple[int, int]:
+    """Charge a single node execution to the user.
+
+    Public async wrapper around charge_usage for blocks (e.g. the
+    OrchestratorBlock) that spawn nested node executions outside the main
+    queue and therefore need to charge them explicitly.
+
+    Also handles low-balance notification so callers don't need to touch
+    private functions directly.
+
+    Note: this **does not** increment the global execution counter
+    (``increment_execution_count``). Nested tool executions are sub-steps
+    of a single block run from the user's perspective and should not push
+    them into higher per-execution cost tiers.
+    """
+
+    def _run():
+        total_cost, remaining = charge_usage(node_exec, 0)
+        if total_cost > 0:
+            handle_low_balance(
+                get_db_client(), node_exec.user_id, remaining, total_cost
+            )
+        return total_cost, remaining
+
+    return await asyncio.to_thread(_run)
+
+
+async def try_send_insufficient_funds_notif(
+    user_id: str,
+    graph_id: str,
+    error: InsufficientBalanceError,
+    log_metadata: LogMetadata,
+) -> None:
+    """Send an insufficient-funds notification, swallowing failures."""
+    try:
+        await asyncio.to_thread(
+            handle_insufficient_funds_notif,
+            get_db_client(),
+            user_id,
+            graph_id,
+            error,
+        )
+    except Exception as notif_error:  # pragma: no cover
+        log_metadata.warning(
+            f"Failed to send insufficient funds notification: {notif_error}"
+        )
+
+
+async def handle_post_execution_billing(
+    node: Node,
+    node_exec: NodeExecutionEntry,
+    execution_stats: NodeExecutionStats,
+    status: ExecutionStatus,
+    log_metadata: LogMetadata,
+) -> None:
+    """Charge extra runtime cost for blocks that opt into per-LLM-call billing.
+
+    The first LLM call is already covered by charge_usage(); each additional
+    call costs another base_cost. Skipped for dry runs and failed runs.
+
+    InsufficientBalanceError here is a post-hoc billing leak: the work is
+    already done but the user can no longer pay. The run stays COMPLETED and
+    the error is logged with ``billing_leak: True`` for alerting.
+    """
+    extra_iterations = (
+        cast(Block, node.block).extra_runtime_cost(execution_stats)
+        if status == ExecutionStatus.COMPLETED
+        and not node_exec.execution_context.dry_run
+        else 0
+    )
+    if extra_iterations <= 0:
+        return
+
+    try:
+        extra_cost, remaining_balance = await charge_extra_runtime_cost(
+            node_exec,
+            extra_iterations,
+        )
+        if extra_cost > 0:
+            execution_stats.extra_cost += extra_cost
+            await asyncio.to_thread(
+                handle_low_balance,
+                get_db_client(),
+                node_exec.user_id,
+                remaining_balance,
+                extra_cost,
+            )
+    except InsufficientBalanceError as e:
+        log_metadata.error(
+            "billing_leak: insufficient balance after "
+            f"{node.block.name} completed {extra_iterations} "
+            f"extra iterations",
+            extra={
+                "billing_leak": True,
+                "user_id": node_exec.user_id,
+                "graph_id": node_exec.graph_id,
+                "block_id": node_exec.block_id,
+                "extra_runtime_cost_count": extra_iterations,
+                "error": str(e),
+            },
+        )
+        # Do NOT set execution_stats.error — the node ran to completion,
+        # only the post-hoc charge failed. See class-level billing-leak
+        # contract documentation.
+        await try_send_insufficient_funds_notif(
+            node_exec.user_id,
+            node_exec.graph_id,
+            e,
+            log_metadata,
+        )
+    except Exception as e:
+        log_metadata.error(
+            f"billing_leak: failed to charge extra iterations for {node.block.name}",
+            extra={
+                "billing_leak": True,
+                "user_id": node_exec.user_id,
+                "graph_id": node_exec.graph_id,
+                "block_id": node_exec.block_id,
+                "extra_runtime_cost_count": extra_iterations,
+                "error_type": type(e).__name__,
+                "error": str(e),
+            },
+            exc_info=True,
+        )
+
+
+def handle_agent_run_notif(
+    db_client: "DatabaseManagerClient",
+    graph_exec: GraphExecutionEntry,
+    exec_stats: GraphExecutionStats,
+) -> None:
+    metadata = db_client.get_graph_metadata(
+        graph_exec.graph_id, graph_exec.graph_version
+    )
+    outputs = db_client.get_node_executions(
+        graph_exec.graph_exec_id,
+        block_ids=[AgentOutputBlock().id],
+    )
+
+    named_outputs = [
+        {
+            key: value[0] if key == "name" else value
+            for key, value in output.output_data.items()
+        }
+        for output in outputs
+    ]
+
+    queue_notification(
+        NotificationEventModel(
+            user_id=graph_exec.user_id,
+            type=NotificationType.AGENT_RUN,
+            data=AgentRunData(
+                outputs=named_outputs,
+                agent_name=metadata.name if metadata else "Unknown Agent",
+                credits_used=exec_stats.cost,
+                execution_time=exec_stats.walltime,
+                graph_id=graph_exec.graph_id,
+                node_count=exec_stats.node_count,
+            ),
+        )
+    )
+
+
+def handle_insufficient_funds_notif(
+    db_client: "DatabaseManagerClient",
+    user_id: str,
+    graph_id: str,
+    e: InsufficientBalanceError,
+) -> None:
+    # Check if we've already sent a notification for this user+agent combo.
+    # We only send one notification per user per agent until they top up credits.
+    redis_key = f"{INSUFFICIENT_FUNDS_NOTIFIED_PREFIX}:{user_id}:{graph_id}"
+    try:
+        redis_client = redis.get_redis()
+        # SET NX returns True only if the key was newly set (didn't exist)
+        is_new_notification = redis_client.set(
+            redis_key,
+            "1",
+            nx=True,
+            ex=INSUFFICIENT_FUNDS_NOTIFIED_TTL_SECONDS,
+        )
+        if not is_new_notification:
+            # Already notified for this user+agent, skip all notifications
+            logger.debug(
+                f"Skipping duplicate insufficient funds notification for "
+                f"user={user_id}, graph={graph_id}"
+            )
+            return
+    except Exception as redis_error:
+        # If Redis fails, log and continue to send the notification
+        # (better to occasionally duplicate than to never notify)
+        logger.warning(
+            f"Failed to check/set insufficient funds notification flag in Redis: "
+            f"{redis_error}"
+        )
+
+    shortfall = abs(e.amount) - e.balance
+    metadata = db_client.get_graph_metadata(graph_id)
+    base_url = settings.config.frontend_base_url or settings.config.platform_base_url
+
+    # Queue user email notification
+    queue_notification(
+        NotificationEventModel(
+            user_id=user_id,
+            type=NotificationType.ZERO_BALANCE,
+            data=ZeroBalanceData(
+                current_balance=e.balance,
+                billing_page_link=f"{base_url}/profile/credits",
+                shortfall=shortfall,
+                agent_name=metadata.name if metadata else "Unknown Agent",
+            ),
+        )
+    )
+
+    # Send Discord system alert
+    try:
+        user_email = db_client.get_user_email_by_id(user_id)
+
+        alert_message = (
+            f"❌ **Insufficient Funds Alert**\n"
+            f"User: {user_email or user_id}\n"
+            f"Agent: {metadata.name if metadata else 'Unknown Agent'}\n"
+            f"Current balance: ${e.balance / 100:.2f}\n"
+            f"Attempted cost: ${abs(e.amount) / 100:.2f}\n"
+            f"Shortfall: ${abs(shortfall) / 100:.2f}\n"
+            f"[View User Details]({base_url}/admin/spending?search={user_email})"
+        )
+
+        get_notification_manager_client().discord_system_alert(
+            alert_message, DiscordChannel.PRODUCT
+        )
+    except Exception as alert_error:
+        logger.error(f"Failed to send insufficient funds Discord alert: {alert_error}")
+
+
+def handle_low_balance(
+    db_client: "DatabaseManagerClient",
+    user_id: str,
+    current_balance: int,
+    transaction_cost: int,
+) -> None:
+    """Check and handle low balance scenarios after a transaction"""
+    LOW_BALANCE_THRESHOLD = settings.config.low_balance_threshold
+
+    balance_before = current_balance + transaction_cost
+
+    if (
+        current_balance < LOW_BALANCE_THRESHOLD
+        and balance_before >= LOW_BALANCE_THRESHOLD
+    ):
+        base_url = (
+            settings.config.frontend_base_url or settings.config.platform_base_url
+        )
+        queue_notification(
+            NotificationEventModel(
+                user_id=user_id,
+                type=NotificationType.LOW_BALANCE,
+                data=LowBalanceData(
+                    current_balance=current_balance,
+                    billing_page_link=f"{base_url}/profile/credits",
+                ),
+            )
+        )
+
+        try:
+            user_email = db_client.get_user_email_by_id(user_id)
+            alert_message = (
+                f"⚠️ **Low Balance Alert**\n"
+                f"User: {user_email or user_id}\n"
+                f"Balance dropped below ${LOW_BALANCE_THRESHOLD / 100:.2f}\n"
+                f"Current balance: ${current_balance / 100:.2f}\n"
+                f"Transaction cost: ${transaction_cost / 100:.2f}\n"
+                f"[View User Details]({base_url}/admin/spending?search={user_email})"
+            )
+            get_notification_manager_client().discord_system_alert(
+                alert_message, DiscordChannel.PRODUCT
+            )
+        except Exception as e:
+            logger.warning(f"Failed to send low balance Discord alert: {e}")
diff --git a/autogpt_platform/backend/backend/executor/manager.py b/autogpt_platform/backend/backend/executor/manager.py
index bd718d168f..2af3ce784e 100644
--- a/autogpt_platform/backend/backend/executor/manager.py
+++ b/autogpt_platform/backend/backend/executor/manager.py
@@ -21,11 +21,9 @@ from sentry_sdk.api import get_current_scope as _sentry_get_current_scope
 from backend.blocks import get_block
 from backend.blocks._base import BlockSchema
 from backend.blocks.agent import AgentExecutorBlock
-from backend.blocks.io import AgentOutputBlock
 from backend.blocks.mcp.block import MCPToolBlock
 from backend.data import redis_client as redis
 from backend.data.block import BlockInput, BlockOutput, BlockOutputEntry
-from backend.data.credit import UsageTransactionMetadata
 from backend.data.dynamic_fields import parse_execution_output
 from backend.data.execution import (
     ExecutionContext,
@@ -39,27 +37,18 @@ from backend.data.execution import (
 )
 from backend.data.graph import Link, Node
 from backend.data.model import GraphExecutionStats, NodeExecutionStats
-from backend.data.notifications import (
-    AgentRunData,
-    LowBalanceData,
-    NotificationEventModel,
-    NotificationType,
-    ZeroBalanceData,
-)
 from backend.data.rabbitmq import SyncRabbitMQ
 from backend.executor.cost_tracking import (
     drain_pending_cost_logs,
     log_system_credential_cost,
 )
 from backend.integrations.creds_manager import IntegrationCredentialsManager
-from backend.notifications.notifications import queue_notification
 from backend.util import json
 from backend.util.clients import (
     get_async_execution_event_bus,
     get_database_manager_async_client,
     get_database_manager_client,
     get_execution_event_bus,
-    get_notification_manager_client,
 )
 from backend.util.decorator import (
     async_error_logged,
@@ -75,7 +64,6 @@ from backend.util.exceptions import (
 )
 from backend.util.file import clean_exec_files
 from backend.util.logging import TruncatedLogger, configure_logging
-from backend.util.metrics import DiscordChannel
 from backend.util.process import AppProcess, set_service_name
 from backend.util.retry import (
     continuous_retry,
@@ -84,6 +72,7 @@ from backend.util.retry import (
 )
 from backend.util.settings import Settings
 
+from . import billing
 from .activity_status_generator import generate_activity_status_for_execution
 from .automod.manager import automod_manager
 from .cluster_lock import ClusterLock
@@ -98,9 +87,7 @@ from .utils import (
     ExecutionOutputEntry,
     LogMetadata,
     NodeExecutionProgress,
-    block_usage_cost,
     create_execution_queue_config,
-    execution_usage_cost,
     validate_exec,
 )
 
@@ -126,40 +113,6 @@ utilization_gauge = Gauge(
     "Ratio of active graph runs to max graph workers",
 )
 
-# Redis key prefix for tracking insufficient funds Discord notifications.
-# We only send one notification per user per agent until they top up credits.
-INSUFFICIENT_FUNDS_NOTIFIED_PREFIX = "insufficient_funds_discord_notified"
-# TTL for the notification flag (30 days) - acts as a fallback cleanup
-INSUFFICIENT_FUNDS_NOTIFIED_TTL_SECONDS = 30 * 24 * 60 * 60
-
-
-async def clear_insufficient_funds_notifications(user_id: str) -> int:
-    """
-    Clear all insufficient funds notification flags for a user.
-
-    This should be called when a user tops up their credits, allowing
-    Discord notifications to be sent again if they run out of funds.
-
-    Args:
-        user_id: The user ID to clear notifications for.
-
-    Returns:
-        The number of keys that were deleted.
-    """
-    try:
-        redis_client = await redis.get_redis_async()
-        pattern = f"{INSUFFICIENT_FUNDS_NOTIFIED_PREFIX}:{user_id}:*"
-        keys = [key async for key in redis_client.scan_iter(match=pattern)]
-        if keys:
-            return await redis_client.delete(*keys)
-        return 0
-    except Exception as e:
-        logger.warning(
-            f"Failed to clear insufficient funds notification flags for user "
-            f"{user_id}: {e}"
-        )
-        return 0
-
 
 # Thread-local storage for ExecutionProcessor instances
 _tls = threading.local()
@@ -681,12 +634,16 @@ class ExecutionProcessor:
         execution_stats.walltime = timing_info.wall_time
         execution_stats.cputime = timing_info.cpu_time
 
+        await billing.handle_post_execution_billing(
+            node, node_exec, execution_stats, status, log_metadata
+        )
+
         graph_stats, graph_stats_lock = graph_stats_pair
         with graph_stats_lock:
             graph_stats.node_count += 1 + execution_stats.extra_steps
             graph_stats.nodes_cputime += execution_stats.cputime
             graph_stats.nodes_walltime += execution_stats.walltime
-            graph_stats.cost += execution_stats.extra_cost
+            graph_stats.cost += execution_stats.cost + execution_stats.extra_cost
             if isinstance(execution_stats.error, Exception):
                 graph_stats.node_error_count += 1
 
@@ -716,6 +673,18 @@ class ExecutionProcessor:
                 db_client=db_client,
             )
 
+        # If the node failed because a nested tool charge raised IBE,
+        # send the user notification so they understand why the run stopped.
+        if status == ExecutionStatus.FAILED and isinstance(
+            execution_stats.error, InsufficientBalanceError
+        ):
+            await billing.try_send_insufficient_funds_notif(
+                node_exec.user_id,
+                node_exec.graph_id,
+                execution_stats.error,
+                log_metadata,
+            )
+
         return execution_stats
 
     @async_time_measured
@@ -935,7 +904,7 @@ class ExecutionProcessor:
                 )
         finally:
             # Communication handling
-            self._handle_agent_run_notif(db_client, graph_exec, exec_stats)
+            billing.handle_agent_run_notif(db_client, graph_exec, exec_stats)
 
             update_graph_execution_state(
                 db_client=db_client,
@@ -944,57 +913,18 @@ class ExecutionProcessor:
                 stats=exec_stats,
             )
 
-    def _charge_usage(
+    async def charge_node_usage(
         self,
         node_exec: NodeExecutionEntry,
-        execution_count: int,
     ) -> tuple[int, int]:
-        total_cost = 0
-        remaining_balance = 0
-        db_client = get_db_client()
-        block = get_block(node_exec.block_id)
-        if not block:
-            logger.error(f"Block {node_exec.block_id} not found.")
-            return total_cost, 0
+        return await billing.charge_node_usage(node_exec)
 
-        cost, matching_filter = block_usage_cost(
-            block=block, input_data=node_exec.inputs
-        )
-        if cost > 0:
-            remaining_balance = db_client.spend_credits(
-                user_id=node_exec.user_id,
-                cost=cost,
-                metadata=UsageTransactionMetadata(
-                    graph_exec_id=node_exec.graph_exec_id,
-                    graph_id=node_exec.graph_id,
-                    node_exec_id=node_exec.node_exec_id,
-                    node_id=node_exec.node_id,
-                    block_id=node_exec.block_id,
-                    block=block.name,
-                    input=matching_filter,
-                    reason=f"Ran block {node_exec.block_id} {block.name}",
-                ),
-            )
-            total_cost += cost
-
-        cost, usage_count = execution_usage_cost(execution_count)
-        if cost > 0:
-            remaining_balance = db_client.spend_credits(
-                user_id=node_exec.user_id,
-                cost=cost,
-                metadata=UsageTransactionMetadata(
-                    graph_exec_id=node_exec.graph_exec_id,
-                    graph_id=node_exec.graph_id,
-                    input={
-                        "execution_count": usage_count,
-                        "charge": "Execution Cost",
-                    },
-                    reason=f"Execution Cost for {usage_count} blocks of ex_id:{node_exec.graph_exec_id} g_id:{node_exec.graph_id}",
-                ),
-            )
-            total_cost += cost
-
-        return total_cost, remaining_balance
+    async def charge_extra_runtime_cost(
+        self,
+        node_exec: NodeExecutionEntry,
+        extra_count: int,
+    ) -> tuple[int, int]:
+        return await billing.charge_extra_runtime_cost(node_exec, extra_count)
 
     @time_measured
     def _on_graph_execution(
@@ -1106,7 +1036,7 @@ class ExecutionProcessor:
                 # Charge usage (may raise) — skipped for dry runs
                 try:
                     if not graph_exec.execution_context.dry_run:
-                        cost, remaining_balance = self._charge_usage(
+                        cost, remaining_balance = billing.charge_usage(
                             node_exec=queued_node_exec,
                             execution_count=increment_execution_count(
                                 graph_exec.user_id
@@ -1115,7 +1045,7 @@ class ExecutionProcessor:
                         with execution_stats_lock:
                             execution_stats.cost += cost
                         # Check if we crossed the low balance threshold
-                        self._handle_low_balance(
+                        billing.handle_low_balance(
                             db_client=db_client,
                             user_id=graph_exec.user_id,
                             current_balance=remaining_balance,
@@ -1135,7 +1065,7 @@ class ExecutionProcessor:
                         status=ExecutionStatus.FAILED,
                     )
 
-                    self._handle_insufficient_funds_notif(
+                    billing.handle_insufficient_funds_notif(
                         db_client,
                         graph_exec.user_id,
                         graph_exec.graph_id,
@@ -1397,165 +1327,6 @@ class ExecutionProcessor:
         ):
             execution_queue.add(next_execution)
 
-    def _handle_agent_run_notif(
-        self,
-        db_client: "DatabaseManagerClient",
-        graph_exec: GraphExecutionEntry,
-        exec_stats: GraphExecutionStats,
-    ):
-        metadata = db_client.get_graph_metadata(
-            graph_exec.graph_id, graph_exec.graph_version
-        )
-        outputs = db_client.get_node_executions(
-            graph_exec.graph_exec_id,
-            block_ids=[AgentOutputBlock().id],
-        )
-
-        named_outputs = [
-            {
-                key: value[0] if key == "name" else value
-                for key, value in output.output_data.items()
-            }
-            for output in outputs
-        ]
-
-        queue_notification(
-            NotificationEventModel(
-                user_id=graph_exec.user_id,
-                type=NotificationType.AGENT_RUN,
-                data=AgentRunData(
-                    outputs=named_outputs,
-                    agent_name=metadata.name if metadata else "Unknown Agent",
-                    credits_used=exec_stats.cost,
-                    execution_time=exec_stats.walltime,
-                    graph_id=graph_exec.graph_id,
-                    node_count=exec_stats.node_count,
-                ),
-            )
-        )
-
-    def _handle_insufficient_funds_notif(
-        self,
-        db_client: "DatabaseManagerClient",
-        user_id: str,
-        graph_id: str,
-        e: InsufficientBalanceError,
-    ):
-        # Check if we've already sent a notification for this user+agent combo.
-        # We only send one notification per user per agent until they top up credits.
-        redis_key = f"{INSUFFICIENT_FUNDS_NOTIFIED_PREFIX}:{user_id}:{graph_id}"
-        try:
-            redis_client = redis.get_redis()
-            # SET NX returns True only if the key was newly set (didn't exist)
-            is_new_notification = redis_client.set(
-                redis_key,
-                "1",
-                nx=True,
-                ex=INSUFFICIENT_FUNDS_NOTIFIED_TTL_SECONDS,
-            )
-            if not is_new_notification:
-                # Already notified for this user+agent, skip all notifications
-                logger.debug(
-                    f"Skipping duplicate insufficient funds notification for "
-                    f"user={user_id}, graph={graph_id}"
-                )
-                return
-        except Exception as redis_error:
-            # If Redis fails, log and continue to send the notification
-            # (better to occasionally duplicate than to never notify)
-            logger.warning(
-                f"Failed to check/set insufficient funds notification flag in Redis: "
-                f"{redis_error}"
-            )
-
-        shortfall = abs(e.amount) - e.balance
-        metadata = db_client.get_graph_metadata(graph_id)
-        base_url = (
-            settings.config.frontend_base_url or settings.config.platform_base_url
-        )
-
-        # Queue user email notification
-        queue_notification(
-            NotificationEventModel(
-                user_id=user_id,
-                type=NotificationType.ZERO_BALANCE,
-                data=ZeroBalanceData(
-                    current_balance=e.balance,
-                    billing_page_link=f"{base_url}/profile/credits",
-                    shortfall=shortfall,
-                    agent_name=metadata.name if metadata else "Unknown Agent",
-                ),
-            )
-        )
-
-        # Send Discord system alert
-        try:
-            user_email = db_client.get_user_email_by_id(user_id)
-
-            alert_message = (
-                f"❌ **Insufficient Funds Alert**\n"
-                f"User: {user_email or user_id}\n"
-                f"Agent: {metadata.name if metadata else 'Unknown Agent'}\n"
-                f"Current balance: ${e.balance / 100:.2f}\n"
-                f"Attempted cost: ${abs(e.amount) / 100:.2f}\n"
-                f"Shortfall: ${abs(shortfall) / 100:.2f}\n"
-                f"[View User Details]({base_url}/admin/spending?search={user_email})"
-            )
-
-            get_notification_manager_client().discord_system_alert(
-                alert_message, DiscordChannel.PRODUCT
-            )
-        except Exception as alert_error:
-            logger.error(
-                f"Failed to send insufficient funds Discord alert: {alert_error}"
-            )
-
-    def _handle_low_balance(
-        self,
-        db_client: "DatabaseManagerClient",
-        user_id: str,
-        current_balance: int,
-        transaction_cost: int,
-    ):
-        """Check and handle low balance scenarios after a transaction"""
-        LOW_BALANCE_THRESHOLD = settings.config.low_balance_threshold
-
-        balance_before = current_balance + transaction_cost
-
-        if (
-            current_balance < LOW_BALANCE_THRESHOLD
-            and balance_before >= LOW_BALANCE_THRESHOLD
-        ):
-            base_url = (
-                settings.config.frontend_base_url or settings.config.platform_base_url
-            )
-            queue_notification(
-                NotificationEventModel(
-                    user_id=user_id,
-                    type=NotificationType.LOW_BALANCE,
-                    data=LowBalanceData(
-                        current_balance=current_balance,
-                        billing_page_link=f"{base_url}/profile/credits",
-                    ),
-                )
-            )
-
-            try:
-                user_email = db_client.get_user_email_by_id(user_id)
-                alert_message = (
-                    f"⚠️ **Low Balance Alert**\n"
-                    f"User: {user_email or user_id}\n"
-                    f"Balance dropped below ${LOW_BALANCE_THRESHOLD / 100:.2f}\n"
-                    f"Current balance: ${current_balance / 100:.2f}\n"
-                    f"Transaction cost: ${transaction_cost / 100:.2f}\n"
-                    f"[View User Details]({base_url}/admin/spending?search={user_email})"
-                )
-                get_notification_manager_client().discord_system_alert(
-                    alert_message, DiscordChannel.PRODUCT
-                )
-            except Exception as e:
-                logger.warning(f"Failed to send low balance Discord alert: {e}")
-
 
 class ExecutionManager(AppProcess):
     def __init__(self):
diff --git a/autogpt_platform/backend/backend/executor/manager_insufficient_funds_test.py b/autogpt_platform/backend/backend/executor/manager_insufficient_funds_test.py
index 276c9f4f7a..ddbb4e0e1c 100644
--- a/autogpt_platform/backend/backend/executor/manager_insufficient_funds_test.py
+++ b/autogpt_platform/backend/backend/executor/manager_insufficient_funds_test.py
@@ -4,9 +4,9 @@ import pytest
 from prisma.enums import NotificationType
 
 from backend.data.notifications import ZeroBalanceData
-from backend.executor.manager import (
+from backend.executor import billing
+from backend.executor.billing import (
     INSUFFICIENT_FUNDS_NOTIFIED_PREFIX,
-    ExecutionProcessor,
     clear_insufficient_funds_notifications,
 )
 from backend.util.exceptions import InsufficientBalanceError
@@ -25,7 +25,6 @@ async def test_handle_insufficient_funds_sends_discord_alert_first_time(
 ):
     """Test that the first insufficient funds notification sends a Discord alert."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     graph_id = "test-graph-456"
     error = InsufficientBalanceError(
@@ -36,13 +35,13 @@ async def test_handle_insufficient_funds_sends_discord_alert_first_time(
     )
 
     with patch(
-        "backend.executor.manager.queue_notification"
+        "backend.executor.billing.queue_notification"
     ) as mock_queue_notif, patch(
-        "backend.executor.manager.get_notification_manager_client"
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         # Setup mocks
@@ -63,7 +62,7 @@ async def test_handle_insufficient_funds_sends_discord_alert_first_time(
         mock_db_client.get_user_email_by_id.return_value = "test@example.com"
 
         # Test the insufficient funds handler
-        execution_processor._handle_insufficient_funds_notif(
+        billing.handle_insufficient_funds_notif(
             db_client=mock_db_client,
             user_id=user_id,
             graph_id=graph_id,
@@ -99,7 +98,6 @@ async def test_handle_insufficient_funds_skips_duplicate_notifications(
 ):
     """Test that duplicate insufficient funds notifications skip both email and Discord."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     graph_id = "test-graph-456"
     error = InsufficientBalanceError(
@@ -110,13 +108,13 @@ async def test_handle_insufficient_funds_skips_duplicate_notifications(
     )
 
     with patch(
-        "backend.executor.manager.queue_notification"
+        "backend.executor.billing.queue_notification"
     ) as mock_queue_notif, patch(
-        "backend.executor.manager.get_notification_manager_client"
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         # Setup mocks
@@ -134,7 +132,7 @@ async def test_handle_insufficient_funds_skips_duplicate_notifications(
         mock_db_client.get_graph_metadata.return_value = MagicMock(name="Test Agent")
 
         # Test the insufficient funds handler
-        execution_processor._handle_insufficient_funds_notif(
+        billing.handle_insufficient_funds_notif(
             db_client=mock_db_client,
             user_id=user_id,
             graph_id=graph_id,
@@ -154,7 +152,6 @@ async def test_handle_insufficient_funds_different_agents_get_separate_alerts(
 ):
     """Test that different agents for the same user get separate Discord alerts."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     graph_id_1 = "test-graph-111"
     graph_id_2 = "test-graph-222"
@@ -166,12 +163,12 @@ async def test_handle_insufficient_funds_different_agents_get_separate_alerts(
         amount=-714,
     )
 
-    with patch("backend.executor.manager.queue_notification"), patch(
-        "backend.executor.manager.get_notification_manager_client"
+    with patch("backend.executor.billing.queue_notification"), patch(
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         mock_client = MagicMock()
@@ -190,7 +187,7 @@ async def test_handle_insufficient_funds_different_agents_get_separate_alerts(
         mock_db_client.get_user_email_by_id.return_value = "test@example.com"
 
         # First agent notification
-        execution_processor._handle_insufficient_funds_notif(
+        billing.handle_insufficient_funds_notif(
             db_client=mock_db_client,
             user_id=user_id,
             graph_id=graph_id_1,
@@ -198,7 +195,7 @@ async def test_handle_insufficient_funds_different_agents_get_separate_alerts(
         )
 
         # Second agent notification
-        execution_processor._handle_insufficient_funds_notif(
+        billing.handle_insufficient_funds_notif(
             db_client=mock_db_client,
             user_id=user_id,
             graph_id=graph_id_2,
@@ -227,7 +224,7 @@ async def test_clear_insufficient_funds_notifications(server: SpinTestServer):
 
     user_id = "test-user-123"
 
-    with patch("backend.executor.manager.redis") as mock_redis_module:
+    with patch("backend.executor.billing.redis") as mock_redis_module:
 
         mock_redis_client = MagicMock()
         # get_redis_async is an async function, so we need AsyncMock for it
@@ -263,7 +260,7 @@ async def test_clear_insufficient_funds_notifications_no_keys(server: SpinTestSe
 
     user_id = "test-user-no-notifications"
 
-    with patch("backend.executor.manager.redis") as mock_redis_module:
+    with patch("backend.executor.billing.redis") as mock_redis_module:
 
         mock_redis_client = MagicMock()
         # get_redis_async is an async function, so we need AsyncMock for it
@@ -290,7 +287,7 @@ async def test_clear_insufficient_funds_notifications_handles_redis_error(
 
     user_id = "test-user-redis-error"
 
-    with patch("backend.executor.manager.redis") as mock_redis_module:
+    with patch("backend.executor.billing.redis") as mock_redis_module:
 
         # Mock get_redis_async to raise an error
         mock_redis_module.get_redis_async = AsyncMock(
@@ -310,7 +307,6 @@ async def test_handle_insufficient_funds_continues_on_redis_error(
 ):
     """Test that both email and Discord notifications are still sent when Redis fails."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     graph_id = "test-graph-456"
     error = InsufficientBalanceError(
@@ -321,13 +317,13 @@ async def test_handle_insufficient_funds_continues_on_redis_error(
     )
 
     with patch(
-        "backend.executor.manager.queue_notification"
+        "backend.executor.billing.queue_notification"
     ) as mock_queue_notif, patch(
-        "backend.executor.manager.get_notification_manager_client"
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         mock_client = MagicMock()
@@ -346,7 +342,7 @@ async def test_handle_insufficient_funds_continues_on_redis_error(
         mock_db_client.get_user_email_by_id.return_value = "test@example.com"
 
         # Test the insufficient funds handler
-        execution_processor._handle_insufficient_funds_notif(
+        billing.handle_insufficient_funds_notif(
             db_client=mock_db_client,
             user_id=user_id,
             graph_id=graph_id,
@@ -370,7 +366,7 @@ async def test_add_transaction_clears_notifications_on_grant(server: SpinTestSer
     user_id = "test-user-grant-clear"
 
     with patch("backend.data.credit.query_raw_with_schema") as mock_query, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         # Mock the query to return a successful transaction
@@ -412,7 +408,7 @@ async def test_add_transaction_clears_notifications_on_top_up(server: SpinTestSe
     user_id = "test-user-topup-clear"
 
     with patch("backend.data.credit.query_raw_with_schema") as mock_query, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         # Mock the query to return a successful transaction
@@ -450,7 +446,7 @@ async def test_add_transaction_skips_clearing_for_inactive_transaction(
     user_id = "test-user-inactive"
 
     with patch("backend.data.credit.query_raw_with_schema") as mock_query, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         # Mock the query to return a successful transaction
@@ -486,7 +482,7 @@ async def test_add_transaction_skips_clearing_for_usage_transaction(
     user_id = "test-user-usage"
 
     with patch("backend.data.credit.query_raw_with_schema") as mock_query, patch(
-        "backend.executor.manager.redis"
+        "backend.executor.billing.redis"
     ) as mock_redis_module:
 
         # Mock the query to return a successful transaction
@@ -521,7 +517,7 @@ async def test_enable_transaction_clears_notifications(server: SpinTestServer):
 
     with patch("backend.data.credit.CreditTransaction") as mock_credit_tx, patch(
         "backend.data.credit.query_raw_with_schema"
-    ) as mock_query, patch("backend.executor.manager.redis") as mock_redis_module:
+    ) as mock_query, patch("backend.executor.billing.redis") as mock_redis_module:
 
         # Mock finding the pending transaction
         mock_transaction = MagicMock()
diff --git a/autogpt_platform/backend/backend/executor/manager_low_balance_test.py b/autogpt_platform/backend/backend/executor/manager_low_balance_test.py
index d51ffb2511..fe99379782 100644
--- a/autogpt_platform/backend/backend/executor/manager_low_balance_test.py
+++ b/autogpt_platform/backend/backend/executor/manager_low_balance_test.py
@@ -4,26 +4,25 @@ import pytest
 from prisma.enums import NotificationType
 
 from backend.data.notifications import LowBalanceData
-from backend.executor.manager import ExecutionProcessor
+from backend.executor import billing
 from backend.util.test import SpinTestServer
 
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_handle_low_balance_threshold_crossing(server: SpinTestServer):
-    """Test that _handle_low_balance triggers notification when crossing threshold."""
+    """Test that handle_low_balance triggers notification when crossing threshold."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     current_balance = 400  # $4 - below $5 threshold
     transaction_cost = 600  # $6 transaction
 
     # Mock dependencies
     with patch(
-        "backend.executor.manager.queue_notification"
+        "backend.executor.billing.queue_notification"
     ) as mock_queue_notif, patch(
-        "backend.executor.manager.get_notification_manager_client"
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings:
 
         # Setup mocks
@@ -37,7 +36,7 @@ async def test_handle_low_balance_threshold_crossing(server: SpinTestServer):
         mock_db_client.get_user_email_by_id.return_value = "test@example.com"
 
         # Test the low balance handler
-        execution_processor._handle_low_balance(
+        billing.handle_low_balance(
             db_client=mock_db_client,
             user_id=user_id,
             current_balance=current_balance,
@@ -69,7 +68,6 @@ async def test_handle_low_balance_no_notification_when_not_crossing(
 ):
     """Test that no notification is sent when not crossing the threshold."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     current_balance = 600  # $6 - above $5 threshold
     transaction_cost = (
@@ -78,11 +76,11 @@ async def test_handle_low_balance_no_notification_when_not_crossing(
 
     # Mock dependencies
     with patch(
-        "backend.executor.manager.queue_notification"
+        "backend.executor.billing.queue_notification"
     ) as mock_queue_notif, patch(
-        "backend.executor.manager.get_notification_manager_client"
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings:
 
         # Setup mocks
@@ -94,7 +92,7 @@ async def test_handle_low_balance_no_notification_when_not_crossing(
         mock_db_client = MagicMock()
 
         # Test the low balance handler
-        execution_processor._handle_low_balance(
+        billing.handle_low_balance(
             db_client=mock_db_client,
             user_id=user_id,
             current_balance=current_balance,
@@ -112,7 +110,6 @@ async def test_handle_low_balance_no_duplicate_when_already_below(
 ):
     """Test that no notification is sent when already below threshold."""
 
-    execution_processor = ExecutionProcessor()
     user_id = "test-user-123"
     current_balance = 300  # $3 - below $5 threshold
     transaction_cost = (
@@ -121,11 +118,11 @@ async def test_handle_low_balance_no_duplicate_when_already_below(
 
     # Mock dependencies
     with patch(
-        "backend.executor.manager.queue_notification"
+        "backend.executor.billing.queue_notification"
     ) as mock_queue_notif, patch(
-        "backend.executor.manager.get_notification_manager_client"
+        "backend.executor.billing.get_notification_manager_client"
     ) as mock_get_client, patch(
-        "backend.executor.manager.settings"
+        "backend.executor.billing.settings"
     ) as mock_settings:
 
         # Setup mocks
@@ -137,7 +134,7 @@ async def test_handle_low_balance_no_duplicate_when_already_below(
         mock_db_client = MagicMock()
 
         # Test the low balance handler
-        execution_processor._handle_low_balance(
+        billing.handle_low_balance(
             db_client=mock_db_client,
             user_id=user_id,
             current_balance=current_balance,
diff --git a/autogpt_platform/backend/test/e2e_test_data.py b/autogpt_platform/backend/test/e2e_test_data.py
index add6013893..974b60fb1a 100644
--- a/autogpt_platform/backend/test/e2e_test_data.py
+++ b/autogpt_platform/backend/test/e2e_test_data.py
@@ -18,9 +18,13 @@ images: {
 """
 
 import asyncio
+import json
 import random
+from pathlib import Path
 from typing import Any, Dict, List
 
+import prisma.enums as prisma_enums
+import prisma.models as prisma_models
 from faker import Faker
 
 # Import API functions from the backend
@@ -30,10 +34,12 @@ from backend.api.features.store.db import (
     create_store_submission,
     review_store_submission,
 )
+from backend.api.features.store.model import StoreSubmission
+from backend.blocks.io import AgentInputBlock
 from backend.data.auth.api_key import create_api_key
 from backend.data.credit import get_user_credit_model
 from backend.data.db import prisma
-from backend.data.graph import Graph, Link, Node, create_graph
+from backend.data.graph import Graph, Link, Node, create_graph, make_graph_model
 from backend.data.user import get_or_create_user
 from backend.util.clients import get_supabase
 
@@ -60,6 +66,31 @@ MAX_REVIEWS_PER_VERSION = 5
 GUARANTEED_FEATURED_AGENTS = 8
 GUARANTEED_FEATURED_CREATORS = 5
 GUARANTEED_TOP_AGENTS = 10
+E2E_MARKETPLACE_CREATOR_EMAIL = "test123@example.com"
+E2E_MARKETPLACE_CREATOR_USERNAME = "e2e-marketplace"
+E2E_MARKETPLACE_AGENT_SLUG = "e2e-calculator-agent"
+E2E_MARKETPLACE_AGENT_NAME = "E2E Calculator Agent"
+E2E_MARKETPLACE_AGENT_INPUT_VALUE = 8
+E2E_MARKETPLACE_AGENT_OUTPUT_VALUE = 42
+_LOCAL_TEMPLATE_PATH = (
+    Path(__file__).resolve().parents[1] / "agents" / "calculator-agent.json"
+)
+_DOCKER_TEMPLATE_PATH = Path(
+    "/app/autogpt_platform/backend/agents/calculator-agent.json"
+)
+E2E_MARKETPLACE_AGENT_TEMPLATE_PATH = (
+    _LOCAL_TEMPLATE_PATH if _LOCAL_TEMPLATE_PATH.exists() else _DOCKER_TEMPLATE_PATH
+)
+SEEDED_TEST_EMAILS = [
+    "test123@example.com",
+    "e2e.qa.auth@example.com",
+    "e2e.qa.builder@example.com",
+    "e2e.qa.library@example.com",
+    "e2e.qa.marketplace@example.com",
+    "e2e.qa.settings@example.com",
+    "e2e.qa.parallel.a@example.com",
+    "e2e.qa.parallel.b@example.com",
+]
 
 
 def get_image():
@@ -100,6 +131,25 @@ def get_category():
     return random.choice(categories)
 
 
+def load_deterministic_marketplace_graph() -> Graph:
+    graph = Graph.model_validate(
+        json.loads(E2E_MARKETPLACE_AGENT_TEMPLATE_PATH.read_text())
+    )
+    graph.name = E2E_MARKETPLACE_AGENT_NAME
+    graph.description = (
+        "Deterministic marketplace calculator graph for Playwright PR E2E coverage."
+    )
+
+    for node in graph.nodes:
+        if (
+            node.block_id == AgentInputBlock().id
+            and node.input_default.get("value") is None
+        ):
+            node.input_default["value"] = E2E_MARKETPLACE_AGENT_INPUT_VALUE
+
+    return graph
+
+
 class TestDataCreator:
     """Creates test data using API functions for E2E tests."""
 
@@ -123,9 +173,9 @@ class TestDataCreator:
         for i in range(NUM_USERS):
             try:
                 # Generate test user data
-                if i == 0:
-                    # First user should have test123@gmail.com email for testing
-                    email = "test123@gmail.com"
+                if i < len(SEEDED_TEST_EMAILS):
+                    # Keep a deterministic pool for Playwright global setup and PR smoke flows
+                    email = SEEDED_TEST_EMAILS[i]
                 else:
                     email = faker.unique.email()
                 password = "testpassword123"  # Standard test password # pragma: allowlist secret # noqa
@@ -547,6 +597,46 @@ class TestDataCreator:
                 print(f"Error updating profile {profile.id}: {e}")
                 continue
 
+        deterministic_creator = next(
+            (
+                user
+                for user in self.users
+                if user["email"] == E2E_MARKETPLACE_CREATOR_EMAIL
+            ),
+            None,
+        )
+        if deterministic_creator:
+            deterministic_profile = next(
+                (
+                    profile
+                    for profile in existing_profiles
+                    if profile.userId == deterministic_creator["id"]
+                ),
+                None,
+            )
+            if deterministic_profile:
+                try:
+                    updated_profile = await prisma.profile.update(
+                        where={"id": deterministic_profile.id},
+                        data={
+                            "name": "E2E Marketplace Creator",
+                            "username": E2E_MARKETPLACE_CREATOR_USERNAME,
+                            "description": "Deterministic marketplace creator for Playwright PR E2E coverage.",
+                            "links": ["https://example.com/e2e-marketplace"],
+                            "avatarUrl": get_image(),
+                            "isFeatured": True,
+                        },
+                    )
+                    profiles = [
+                        profile
+                        for profile in profiles
+                        if profile.get("id") != deterministic_profile.id
+                    ]
+                    if updated_profile is not None:
+                        profiles.append(updated_profile.model_dump())
+                except Exception as e:
+                    print(f"Error updating deterministic E2E creator profile: {e}")
+
         self.profiles = profiles
         return profiles
 
@@ -562,58 +652,184 @@ class TestDataCreator:
         featured_count = 0
         submission_counter = 0
 
-        # Create a special test submission for test123@gmail.com (ALWAYS approved + featured)
+        # Create a deterministic calculator marketplace agent for PR E2E coverage
         test_user = next(
-            (user for user in self.users if user["email"] == "test123@gmail.com"), None
+            (
+                user
+                for user in self.users
+                if user["email"] == E2E_MARKETPLACE_CREATOR_EMAIL
+            ),
+            None,
         )
-        if test_user and self.agent_graphs:
-            test_submission_data = {
-                "user_id": test_user["id"],
-                "graph_id": self.agent_graphs[0]["id"],
-                "graph_version": 1,
-                "slug": "test-agent-submission",
-                "name": "Test Agent Submission",
-                "sub_heading": "A test agent for frontend testing",
-                "video_url": "https://www.youtube.com/watch?v=test123",
-                "image_urls": [
-                    "https://picsum.photos/200/300",
-                    "https://picsum.photos/200/301",
-                    "https://picsum.photos/200/302",
-                ],
-                "description": "This is a test agent submission specifically created for frontend testing purposes.",
-                "categories": ["test", "demo", "frontend"],
-                "changes_summary": "Initial test submission",
-            }
+        if test_user:
+            deterministic_graph = None
 
             try:
-                test_submission = await create_store_submission(**test_submission_data)
-                submissions.append(test_submission.model_dump())
-                print("✅ Created special test store submission for test123@gmail.com")
-
-                # ALWAYS approve and feature the test submission
-                if test_submission.listing_version_id:
-                    approved_submission = await review_store_submission(
-                        store_listing_version_id=test_submission.listing_version_id,
-                        is_approved=True,
-                        external_comments="Test submission approved",
-                        internal_comments="Auto-approved test submission",
-                        reviewer_id=test_user["id"],
+                existing_graph = await prisma_models.AgentGraph.prisma().find_first(
+                    where={
+                        "userId": test_user["id"],
+                        "name": E2E_MARKETPLACE_AGENT_NAME,
+                        "isActive": True,
+                    },
+                    order={"version": "desc"},
+                )
+                if existing_graph:
+                    deterministic_graph = {
+                        "id": existing_graph.id,
+                        "version": existing_graph.version,
+                        "name": existing_graph.name,
+                        "userId": test_user["id"],
+                    }
+                    self.agent_graphs.append(deterministic_graph)
+                    print(
+                        "✅ Reused existing deterministic marketplace graph: "
+                        f"{existing_graph.id}"
                     )
-                    approved_submissions.append(approved_submission.model_dump())
-                    print("✅ Approved test store submission")
-
-                    await prisma.storelistingversion.update(
-                        where={"id": test_submission.listing_version_id},
-                        data={"isFeatured": True},
+                else:
+                    deterministic_graph_model = make_graph_model(
+                        load_deterministic_marketplace_graph(),
+                        test_user["id"],
                     )
-                    featured_count += 1
-                    print("🌟 Marked test agent as FEATURED")
-
+                    deterministic_graph_model.reassign_ids(
+                        user_id=test_user["id"],
+                        reassign_graph_id=True,
+                    )
+                    created_deterministic_graph = await create_graph(
+                        deterministic_graph_model,
+                        test_user["id"],
+                    )
+                    deterministic_graph = created_deterministic_graph.model_dump()
+                    deterministic_graph["userId"] = test_user["id"]
+                    self.agent_graphs.append(deterministic_graph)
+                    print("✅ Created deterministic marketplace graph")
             except Exception as e:
-                print(f"Error creating test store submission: {e}")
-                import traceback
+                print(f"Error creating deterministic marketplace graph: {e}")
 
-                traceback.print_exc()
+            if deterministic_graph is None and self.agent_graphs:
+                test_user_graphs = [
+                    graph
+                    for graph in self.agent_graphs
+                    if graph.get("userId") == test_user["id"]
+                ]
+                deterministic_graph = next(
+                    (
+                        graph
+                        for graph in test_user_graphs
+                        if not graph.get("name", "").startswith("DummyInput ")
+                    ),
+                    test_user_graphs[0] if test_user_graphs else None,
+                )
+
+            if deterministic_graph:
+                test_submission_data = {
+                    "user_id": test_user["id"],
+                    "graph_id": deterministic_graph["id"],
+                    "graph_version": deterministic_graph.get("version", 1),
+                    "slug": E2E_MARKETPLACE_AGENT_SLUG,
+                    "name": E2E_MARKETPLACE_AGENT_NAME,
+                    "sub_heading": "A deterministic calculator agent for PR E2E coverage",
+                    "video_url": "https://www.youtube.com/watch?v=test123",
+                    "image_urls": [
+                        "https://picsum.photos/seed/e2e-marketplace-1/200/300",
+                        "https://picsum.photos/seed/e2e-marketplace-2/200/301",
+                        "https://picsum.photos/seed/e2e-marketplace-3/200/302",
+                    ],
+                    "description": (
+                        "A deterministic marketplace calculator agent that adds "
+                        f"{E2E_MARKETPLACE_AGENT_INPUT_VALUE} and 34 to produce "
+                        f"{E2E_MARKETPLACE_AGENT_OUTPUT_VALUE} for frontend E2E coverage."
+                    ),
+                    "categories": ["test", "demo", "frontend"],
+                    "changes_summary": (
+                        "Initial deterministic calculator submission seeded from "
+                        "backend/agents/calculator-agent.json"
+                    ),
+                }
+
+                try:
+                    existing_deterministic_submission = (
+                        await prisma_models.StoreListingVersion.prisma().find_first(
+                            where={
+                                "isDeleted": False,
+                                "StoreListing": {
+                                    "is": {
+                                        "owningUserId": test_user["id"],
+                                        "slug": E2E_MARKETPLACE_AGENT_SLUG,
+                                        "isDeleted": False,
+                                    }
+                                },
+                            },
+                            include={"StoreListing": True},
+                            order={"version": "desc"},
+                        )
+                    )
+
+                    if existing_deterministic_submission:
+                        test_submission = StoreSubmission.from_listing_version(
+                            existing_deterministic_submission
+                        )
+                        submissions.append(test_submission.model_dump())
+                        print(
+                            "✅ Reused deterministic marketplace submission: "
+                            f"{E2E_MARKETPLACE_AGENT_NAME}"
+                        )
+                    else:
+                        test_submission = await create_store_submission(
+                            **test_submission_data
+                        )
+                        submissions.append(test_submission.model_dump())
+                        print(
+                            "✅ Created deterministic marketplace submission: "
+                            f"{E2E_MARKETPLACE_AGENT_NAME}"
+                        )
+
+                    current_status = (
+                        existing_deterministic_submission.submissionStatus
+                        if existing_deterministic_submission
+                        else test_submission.status
+                    )
+                    is_featured = bool(
+                        existing_deterministic_submission
+                        and existing_deterministic_submission.isFeatured
+                    )
+
+                    if test_submission.listing_version_id:
+                        if current_status != prisma_enums.SubmissionStatus.APPROVED:
+                            approved_submission = await review_store_submission(
+                                store_listing_version_id=test_submission.listing_version_id,
+                                is_approved=True,
+                                external_comments="Deterministic calculator submission approved",
+                                internal_comments="Auto-approved PR E2E marketplace submission",
+                                reviewer_id=test_user["id"],
+                            )
+                            approved_submissions.append(
+                                approved_submission.model_dump()
+                            )
+                            print("✅ Approved deterministic marketplace submission")
+                        else:
+                            approved_submissions.append(test_submission.model_dump())
+                            print(
+                                "✅ Deterministic marketplace submission already approved"
+                            )
+
+                        if is_featured:
+                            featured_count += 1
+                            print("🌟 Deterministic marketplace agent already FEATURED")
+                        else:
+                            await prisma.storelistingversion.update(
+                                where={"id": test_submission.listing_version_id},
+                                data={"isFeatured": True},
+                            )
+                            featured_count += 1
+                            print(
+                                "🌟 Marked deterministic marketplace agent as FEATURED"
+                            )
+
+                except Exception as e:
+                    print(f"Error creating deterministic marketplace submission: {e}")
+                    import traceback
+
+                    traceback.print_exc()
 
         # Create regular submissions for all users
         for user in self.users:
diff --git a/autogpt_platform/docker-compose.platform.yml b/autogpt_platform/docker-compose.platform.yml
index 29ab586a47..1b3ff8338f 100644
--- a/autogpt_platform/docker-compose.platform.yml
+++ b/autogpt_platform/docker-compose.platform.yml
@@ -6,7 +6,8 @@
 # 5. CLI arguments - docker compose run -e VAR=value
 
 # Common backend environment - Docker service names
-x-backend-env: &backend-env # Docker internal service hostnames (override localhost defaults)
+x-backend-env:
+  &backend-env # Docker internal service hostnames (override localhost defaults)
   PYRO_HOST: "0.0.0.0"
   AGENTSERVER_HOST: rest_server
   SCHEDULER_HOST: scheduler_server
@@ -39,7 +40,12 @@ services:
       context: ../
       dockerfile: autogpt_platform/backend/Dockerfile
       target: migrate
-    command: ["sh", "-c", "prisma generate && python3 scripts/gen_prisma_types_stub.py && prisma migrate deploy"]
+    command:
+      [
+        "sh",
+        "-c",
+        "prisma generate && python3 scripts/gen_prisma_types_stub.py && prisma migrate deploy",
+      ]
     develop:
       watch:
         - path: ./
@@ -79,8 +85,8 @@ services:
   falkordb:
     image: falkordb/falkordb:latest
     ports:
-      - "6380:6379"   # FalkorDB Redis protocol (6380 to avoid clash with Redis on 6379)
-      - "3001:3000"   # FalkorDB web UI
+      - "6380:6379" # FalkorDB Redis protocol (6380 to avoid clash with Redis on 6379)
+      - "3001:3000" # FalkorDB web UI
     environment:
       - REDIS_ARGS=--requirepass ${GRAPHITI_FALKORDB_PASSWORD:-}
     volumes:
@@ -88,7 +94,11 @@ services:
     networks:
       - app-network
     healthcheck:
-      test: ["CMD-SHELL", "redis-cli -p 6379 -a \"${GRAPHITI_FALKORDB_PASSWORD:-}\" --no-auth-warning ping && wget --spider -q http://localhost:3000 || exit 1"]
+      test:
+        [
+          "CMD-SHELL",
+          'redis-cli -p 6379 -a "${GRAPHITI_FALKORDB_PASSWORD:-}" --no-auth-warning ping && wget --spider -q http://localhost:3000 || exit 1',
+        ]
       interval: 10s
       timeout: 5s
       retries: 5
@@ -300,19 +310,6 @@ services:
         condition: service_completed_successfully
       database_manager:
         condition: service_started
-    # healthcheck:
-    #   test:
-    #     [
-    #       "CMD",
-    #       "curl",
-    #       "-f",
-    #       "-X",
-    #       "POST",
-    #       "http://localhost:8003/health_check",
-    #     ]
-    #   interval: 10s
-    #   timeout: 10s
-    #   retries: 5
     <<: *backend-env-files
     environment:
       <<: *backend-env
diff --git a/autogpt_platform/docker-compose.yml b/autogpt_platform/docker-compose.yml
index ef9c738834..f7b4b105fc 100644
--- a/autogpt_platform/docker-compose.yml
+++ b/autogpt_platform/docker-compose.yml
@@ -193,3 +193,4 @@ services:
       - copilot_executor
       - websocket_server
       - database_manager
+      - scheduler_server
diff --git a/autogpt_platform/frontend/.storybook/main.ts b/autogpt_platform/frontend/.storybook/main.ts
index 4e3070bfe1..235dbf4749 100644
--- a/autogpt_platform/frontend/.storybook/main.ts
+++ b/autogpt_platform/frontend/.storybook/main.ts
@@ -8,6 +8,7 @@ const config: StorybookConfig = {
     "../src/components/molecules/**/*.stories.@(js|jsx|mjs|ts|tsx)",
     "../src/components/ai-elements/**/*.stories.@(js|jsx|mjs|ts|tsx)",
     "../src/components/renderers/**/*.stories.@(js|jsx|mjs|ts|tsx)",
+    "../src/app/[(]platform[)]/copilot/**/*.stories.@(js|jsx|mjs|ts|tsx)",
   ],
   addons: [
     "@storybook/addon-a11y",
diff --git a/autogpt_platform/frontend/README.md b/autogpt_platform/frontend/README.md
index abea810fd2..aec05dfbbb 100644
--- a/autogpt_platform/frontend/README.md
+++ b/autogpt_platform/frontend/README.md
@@ -81,8 +81,10 @@ Every time a new Front-end dependency is added by you or others, you will need t
 - `pnpm lint` - Run ESLint and Prettier checks
 - `pnpm format` - Format code with Prettier
 - `pnpm types` - Run TypeScript type checking
-- `pnpm test` - Run Playwright tests
-- `pnpm test-ui` - Run Playwright tests with UI
+- `pnpm test:unit` - Run the Vitest integration and unit suite with coverage
+- `pnpm test` - Run the Playwright E2E suite used in CI
+- `pnpm test-ui` - Run the same Playwright E2E suite with UI
+- `pnpm test:e2e:no-build` - Run the same Playwright E2E suite against a running app
 - `pnpm fetch:openapi` - Fetch OpenAPI spec from backend
 - `pnpm generate:api-client` - Generate API client from OpenAPI spec
 - `pnpm generate:api` - Fetch OpenAPI spec and generate API client
diff --git a/autogpt_platform/frontend/TESTING.md b/autogpt_platform/frontend/TESTING.md
index 0b95f8eaab..ee8ed5d9cf 100644
--- a/autogpt_platform/frontend/TESTING.md
+++ b/autogpt_platform/frontend/TESTING.md
@@ -121,35 +121,49 @@ Only when the component has complex internal logic that is hard to exercise thro
 ### Running
 
 ```bash
-pnpm test                   # build + run all Playwright tests
-pnpm test-ui                # run with Playwright UI
-pnpm test:no-build          # run against a running dev server
+pnpm test                   # build + run the Playwright E2E suite used in CI
+pnpm test-ui                # run the same E2E suite with Playwright UI
+pnpm test:e2e:no-build      # run the same E2E suite against a running dev server
+pnpm exec playwright test   # run the same eight-spec Playwright suite directly
 ```
 
 ### Setup
 
 1. Start the backend + Supabase stack:
    - From `autogpt_platform`: `docker compose --profile local up deps_backend -d`
-2. Seed rich E2E data (creates `test123@gmail.com` with library agents):
+2. Seed rich E2E data (creates `test123@example.com` with library agents):
    - From `autogpt_platform/backend`: `poetry run python test/e2e_test_data.py`
 
 ### How Playwright setup works
 
-- Playwright runs from `frontend/playwright.config.ts` with a global setup step
-- Global setup creates a user pool via the real signup UI, stored in `frontend/.auth/user-pool.json`
-- `getTestUser()` (from `src/tests/utils/auth.ts`) pulls a random user from the pool
+- Playwright runs from `frontend/playwright.config.ts` and keeps browser-only code in `frontend/src/playwright/`
+- Global setup creates reusable auth states for deterministic seeded accounts in `frontend/.auth/states/`
+- `getTestUser()` (from `src/playwright/utils/auth.ts`) picks one seeded account for general auth coverage
 - `getTestUserWithLibraryAgents()` uses the rich user created by the data script
 
 ### Test users
 
-- **User pool (basic users)** — created automatically by Playwright global setup. Used by `getTestUser()`
+- **Seeded E2E accounts** — created by backend fixtures and logged in during Playwright global setup. Used by `getTestUser()` and `E2E_AUTH_STATES`
 - **Rich user with library agents** — created by `backend/test/e2e_test_data.py`. Used by `getTestUserWithLibraryAgents()`
 
+### Current Playwright E2E suite
+
+The CI suite is intentionally limited to the cross-page journeys we still require a real browser for. Playwright discovers the PR-gating specs by the `*-happy-path.spec.ts` naming pattern inside `src/playwright/`:
+
+- `src/playwright/auth-happy-path.spec.ts`
+- `src/playwright/settings-happy-path.spec.ts`
+- `src/playwright/api-keys-happy-path.spec.ts`
+- `src/playwright/builder-happy-path.spec.ts`
+- `src/playwright/library-happy-path.spec.ts`
+- `src/playwright/marketplace-happy-path.spec.ts`
+- `src/playwright/publish-happy-path.spec.ts`
+- `src/playwright/copilot-happy-path.spec.ts`
+
 ### Resetting the DB
 
 If you reset the Docker DB and logins start failing:
 
-1. Delete `frontend/.auth/user-pool.json`
+1. Delete `frontend/.auth/states/*` and `frontend/.auth/user-pool.json` if it exists
 2. Re-run `poetry run python test/e2e_test_data.py`
 
 ## Storybook
diff --git a/autogpt_platform/frontend/package.json b/autogpt_platform/frontend/package.json
index 00e9e6fc8a..4661ab2050 100644
--- a/autogpt_platform/frontend/package.json
+++ b/autogpt_platform/frontend/package.json
@@ -13,11 +13,13 @@
     "lint": "next lint && prettier --check .",
     "format": "next lint --fix; prettier --write .",
     "types": "tsc --noEmit",
-    "test": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test",
-    "test-ui": "NEXT_PUBLIC_PW_TEST=true next build --turbo && playwright test --ui",
+    "test": "NEXT_PUBLIC_PW_TEST=true next build --turbo && pnpm test:e2e:no-build",
+    "test-ui": "NEXT_PUBLIC_PW_TEST=true next build --turbo && pnpm test:e2e:ui",
     "test:unit": "vitest run --coverage",
     "test:unit:watch": "vitest",
-    "test:no-build": "playwright test",
+    "test:e2e": "NEXT_PUBLIC_PW_TEST=true next build --turbo && pnpm test:e2e:no-build",
+    "test:e2e:no-build": "playwright test",
+    "test:e2e:ui": "playwright test --ui",
     "gentests": "playwright codegen http://localhost:3000",
     "storybook": "storybook dev -p 6006",
     "build-storybook": "storybook build",
diff --git a/autogpt_platform/frontend/playwright.config.ts b/autogpt_platform/frontend/playwright.config.ts
index bf3c19845f..0805443035 100644
--- a/autogpt_platform/frontend/playwright.config.ts
+++ b/autogpt_platform/frontend/playwright.config.ts
@@ -7,10 +7,22 @@ import { defineConfig, devices } from "@playwright/test";
 import dotenv from "dotenv";
 import fs from "fs";
 import path from "path";
+import { buildCookieConsentStorageState } from "./src/playwright/credentials/storage-state";
 dotenv.config({ path: path.resolve(__dirname, ".env") });
 dotenv.config({ path: path.resolve(__dirname, "../backend/.env") });
 
 const frontendRoot = __dirname.replaceAll("\\", "/");
+const configuredBaseURL =
+  process.env.PLAYWRIGHT_BASE_URL ?? "http://localhost:3000";
+const parsedBaseURL = new URL(configuredBaseURL);
+const baseURL = parsedBaseURL.toString().replace(/\/$/, "");
+const baseOrigin = parsedBaseURL.origin;
+const jsonReporterOutputFile = process.env.PLAYWRIGHT_JSON_OUTPUT_FILE;
+const configuredWorkers = process.env.PLAYWRIGHT_WORKERS
+  ? Number(process.env.PLAYWRIGHT_WORKERS)
+  : process.env.CI
+    ? 8
+    : undefined;
 
 // Directory where CI copies .next/static from the Docker container
 const staticCoverageDir = path.resolve(__dirname, ".next-static-coverage");
@@ -57,17 +69,18 @@ function resolveSourceMap(sourcePath: string) {
 }
 
 export default defineConfig({
-  testDir: "./src/tests",
+  testDir: "./src/playwright",
+  testMatch: /.*-happy-path\.spec\.ts/,
   /* Global setup file that runs before all tests */
-  globalSetup: "./src/tests/global-setup.ts",
+  globalSetup: "./src/playwright/global-setup.ts",
   /* Run tests in files in parallel */
   fullyParallel: true,
   /* Fail the build on CI if you accidentally left test.only in the source code. */
   forbidOnly: !!process.env.CI,
   /* Retry on CI only */
-  retries: process.env.CI ? 1 : 0,
-  /* use more workers on CI. */
-  workers: process.env.CI ? 4 : undefined,
+  retries: process.env.CI ? Number(process.env.PLAYWRIGHT_RETRIES ?? 2) : 0,
+  /* Higher worker count keeps PR smoke runtime down without sharing page state. */
+  workers: configuredWorkers,
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
   reporter: [
     ["list"],
@@ -92,40 +105,25 @@ export default defineConfig({
         },
       },
     ],
+    ...(jsonReporterOutputFile
+      ? [["json", { outputFile: jsonReporterOutputFile }] as const]
+      : []),
   ],
   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
   use: {
     /* Base URL to use in actions like `await page.goto('/')`. */
-    baseURL: "http://localhost:3000/",
+    baseURL,
 
     /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
     screenshot: "only-on-failure",
     bypassCSP: true,
 
     /* Helps debugging failures */
-    trace: "retain-on-failure",
-    video: "retain-on-failure",
+    trace: process.env.CI ? "on-first-retry" : "retain-on-failure",
+    video: process.env.CI ? "off" : "retain-on-failure",
 
     /* Auto-accept cookies in all tests to prevent banner interference */
-    storageState: {
-      cookies: [],
-      origins: [
-        {
-          origin: "http://localhost:3000",
-          localStorage: [
-            {
-              name: "autogpt_cookie_consent",
-              value: JSON.stringify({
-                hasConsented: true,
-                timestamp: Date.now(),
-                analytics: true,
-                monitoring: true,
-              }),
-            },
-          ],
-        },
-      ],
-    },
+    storageState: buildCookieConsentStorageState(baseOrigin),
   },
   /* Maximum time one test can run for */
   timeout: 25000,
@@ -133,7 +131,7 @@ export default defineConfig({
   /* Configure web server to start automatically (local dev only) */
   webServer: {
     command: "pnpm start",
-    url: "http://localhost:3000",
+    url: baseURL,
     reuseExistingServer: true,
   },
 
diff --git a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/__tests__/PlatformCostContent.test.tsx b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/__tests__/PlatformCostContent.test.tsx
index 5944e94ea7..bde8507b37 100644
--- a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/__tests__/PlatformCostContent.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/__tests__/PlatformCostContent.test.tsx
@@ -29,6 +29,16 @@ const emptyDashboard: PlatformCostDashboard = {
   total_cost_microdollars: 0,
   total_requests: 0,
   total_users: 0,
+  total_input_tokens: 0,
+  total_output_tokens: 0,
+  avg_input_tokens_per_request: 0,
+  avg_output_tokens_per_request: 0,
+  avg_cost_microdollars_per_request: 0,
+  cost_p50_microdollars: 0,
+  cost_p75_microdollars: 0,
+  cost_p95_microdollars: 0,
+  cost_p99_microdollars: 0,
+  cost_buckets: [],
   by_provider: [],
   by_user: [],
 };
@@ -47,6 +57,20 @@ const dashboardWithData: PlatformCostDashboard = {
   total_cost_microdollars: 5_000_000,
   total_requests: 100,
   total_users: 5,
+  total_input_tokens: 150000,
+  total_output_tokens: 60000,
+  avg_input_tokens_per_request: 2500,
+  avg_output_tokens_per_request: 1000,
+  avg_cost_microdollars_per_request: 83333,
+  cost_p50_microdollars: 50000,
+  cost_p75_microdollars: 100000,
+  cost_p95_microdollars: 250000,
+  cost_p99_microdollars: 500000,
+  cost_buckets: [
+    { bucket: "$0-0.50", count: 80 },
+    { bucket: "$0.50-1", count: 15 },
+    { bucket: "$1-2", count: 5 },
+  ],
   by_provider: [
     {
       provider: "openai",
@@ -75,6 +99,7 @@ const dashboardWithData: PlatformCostDashboard = {
       total_input_tokens: 50000,
       total_output_tokens: 20000,
       request_count: 60,
+      cost_bearing_request_count: 40,
     },
   ],
 };
@@ -134,9 +159,14 @@ describe("PlatformCostContent", () => {
     await waitFor(() =>
       expect(document.querySelector(".animate-pulse")).toBeNull(),
     );
-    // Verify the two summary cards that show $0.0000 — Known Cost and Estimated Total
+    // Known Cost and Estimated Total cards render $0.0000
+    // "Known Cost" appears in both the SummaryCard and the ProviderTable header
+    expect(screen.getAllByText("Known Cost").length).toBeGreaterThanOrEqual(1);
+    expect(screen.getByText("Estimated Total")).toBeDefined();
+    // All cost summary cards (Known Cost, Estimated Total, Avg Cost,
+    // Typical/Upper/High/Peak Cost) show $0.0000
     const zeroCostItems = screen.getAllByText("$0.0000");
-    expect(zeroCostItems.length).toBe(2);
+    expect(zeroCostItems.length).toBe(7);
     expect(screen.getByText("No cost data yet")).toBeDefined();
   });
 
@@ -155,7 +185,9 @@ describe("PlatformCostContent", () => {
     );
     expect(screen.getByText("$5.0000")).toBeDefined();
     expect(screen.getByText("100")).toBeDefined();
-    expect(screen.getByText("5")).toBeDefined();
+    // "5" appears in multiple places (Active Users card + bucket count),
+    // so verify at least one element renders it.
+    expect(screen.getAllByText("5").length).toBeGreaterThanOrEqual(1);
     expect(screen.getByText("openai")).toBeDefined();
     expect(screen.getByText("google_maps")).toBeDefined();
   });
@@ -223,10 +255,83 @@ describe("PlatformCostContent", () => {
     await waitFor(() =>
       expect(document.querySelector(".animate-pulse")).toBeNull(),
     );
+    // Original 4 cards
     expect(screen.getAllByText("Known Cost").length).toBeGreaterThanOrEqual(1);
     expect(screen.getByText("Estimated Total")).toBeDefined();
     expect(screen.getByText("Total Requests")).toBeDefined();
     expect(screen.getByText("Active Users")).toBeDefined();
+    // New average/token cards
+    expect(screen.getByText("Avg Cost / Request")).toBeDefined();
+    expect(screen.getByText("Avg Input Tokens")).toBeDefined();
+    expect(screen.getByText("Avg Output Tokens")).toBeDefined();
+    expect(screen.getByText("Total Tokens")).toBeDefined();
+    // Percentile cards (friendlier labels)
+    expect(screen.getByText("Typical Cost (P50)")).toBeDefined();
+    expect(screen.getByText("Upper Cost (P75)")).toBeDefined();
+    expect(screen.getByText("High Cost (P95)")).toBeDefined();
+    expect(screen.getByText("Peak Cost (P99)")).toBeDefined();
+  });
+
+  it("renders cost distribution buckets", async () => {
+    mockUseGetDashboard.mockReturnValue({
+      data: dashboardWithData,
+      isLoading: false,
+    });
+    mockUseGetLogs.mockReturnValue({
+      data: logsWithData,
+      isLoading: false,
+    });
+    renderComponent();
+    await waitFor(() =>
+      expect(document.querySelector(".animate-pulse")).toBeNull(),
+    );
+    expect(screen.getByText("Cost Distribution by Bucket")).toBeDefined();
+    expect(screen.getByText("$0-0.50")).toBeDefined();
+    expect(screen.getByText("$0.50-1")).toBeDefined();
+    expect(screen.getByText("$1-2")).toBeDefined();
+    expect(screen.getByText("80")).toBeDefined();
+    expect(screen.getByText("15")).toBeDefined();
+  });
+
+  it("renders new summary card values from fixture data", async () => {
+    mockUseGetDashboard.mockReturnValue({
+      data: dashboardWithData,
+      isLoading: false,
+    });
+    mockUseGetLogs.mockReturnValue({
+      data: logsWithData,
+      isLoading: false,
+    });
+    renderComponent();
+    await waitFor(() =>
+      expect(document.querySelector(".animate-pulse")).toBeNull(),
+    );
+    // Avg Input Tokens: 2500 formatted
+    expect(screen.getByText("2,500")).toBeDefined();
+    // Avg Output Tokens: 1000 formatted
+    expect(screen.getByText("1,000")).toBeDefined();
+    // P50 cost: 50000 microdollars = $0.0500
+    expect(screen.getByText("$0.0500")).toBeDefined();
+  });
+
+  it("renders user table avg cost column with fixture data", async () => {
+    mockUseGetDashboard.mockReturnValue({
+      data: dashboardWithData,
+      isLoading: false,
+    });
+    mockUseGetLogs.mockReturnValue({
+      data: logsWithData,
+      isLoading: false,
+    });
+    renderComponent({ tab: "by-user" });
+    await waitFor(() =>
+      expect(document.querySelector(".animate-pulse")).toBeNull(),
+    );
+    // User table should show Avg Cost / Req header
+    expect(screen.getByText("Avg Cost / Req")).toBeDefined();
+    // Input/Output token columns
+    expect(screen.getByText("Input Tokens")).toBeDefined();
+    expect(screen.getByText("Output Tokens")).toBeDefined();
   });
 
   it("renders filter inputs", async () => {
diff --git a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx
index 749a2136a3..ce0329af19 100644
--- a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/PlatformCostContent.tsx
@@ -2,12 +2,13 @@
 
 import { Alert, AlertDescription } from "@/components/molecules/Alert/Alert";
 import { Skeleton } from "@/components/atoms/Skeleton/Skeleton";
-import { formatMicrodollars } from "../helpers";
+import { formatMicrodollars, formatTokens } from "../helpers";
 import { SummaryCard } from "./SummaryCard";
 import { ProviderTable } from "./ProviderTable";
 import { UserTable } from "./UserTable";
 import { LogsTable } from "./LogsTable";
 import { usePlatformCostContent } from "./usePlatformCostContent";
+import type { CostBucket } from "@/app/api/__generated__/models/costBucket";
 
 interface Props {
   searchParams: {
@@ -54,6 +55,76 @@ export function PlatformCostContent({ searchParams }: Props) {
     handleExport,
   } = usePlatformCostContent(searchParams);
 
+  const summaryCards: { label: string; value: string; subtitle?: string }[] =
+    dashboard
+      ? [
+          {
+            label: "Known Cost",
+            value: formatMicrodollars(dashboard.total_cost_microdollars),
+            subtitle: "From providers that report USD cost",
+          },
+          {
+            label: "Estimated Total",
+            value: formatMicrodollars(totalEstimatedCost),
+            subtitle: "Including per-run cost estimates",
+          },
+          {
+            label: "Total Requests",
+            value: dashboard.total_requests.toLocaleString(),
+          },
+          {
+            label: "Active Users",
+            value: dashboard.total_users.toLocaleString(),
+          },
+          {
+            label: "Avg Cost / Request",
+            value: formatMicrodollars(
+              dashboard.avg_cost_microdollars_per_request ?? 0,
+            ),
+            subtitle: "Known cost divided by cost-bearing requests",
+          },
+          {
+            label: "Avg Input Tokens",
+            value: Math.round(
+              dashboard.avg_input_tokens_per_request ?? 0,
+            ).toLocaleString(),
+            subtitle: "Prompt tokens per request (context size)",
+          },
+          {
+            label: "Avg Output Tokens",
+            value: Math.round(
+              dashboard.avg_output_tokens_per_request ?? 0,
+            ).toLocaleString(),
+            subtitle: "Completion tokens per request (response length)",
+          },
+          {
+            label: "Total Tokens",
+            value: `${formatTokens(dashboard.total_input_tokens ?? 0)} in / ${formatTokens(dashboard.total_output_tokens ?? 0)} out`,
+            subtitle: "Prompt vs completion token split",
+          },
+          {
+            label: "Typical Cost (P50)",
+            value: formatMicrodollars(dashboard.cost_p50_microdollars ?? 0),
+            subtitle: "Median cost per request",
+          },
+          {
+            label: "Upper Cost (P75)",
+            value: formatMicrodollars(dashboard.cost_p75_microdollars ?? 0),
+            subtitle: "75th percentile cost",
+          },
+          {
+            label: "High Cost (P95)",
+            value: formatMicrodollars(dashboard.cost_p95_microdollars ?? 0),
+            subtitle: "95th percentile cost",
+          },
+          {
+            label: "Peak Cost (P99)",
+            value: formatMicrodollars(dashboard.cost_p99_microdollars ?? 0),
+            subtitle: "99th percentile cost",
+          },
+        ]
+      : [];
+
   return (
     <div className="flex flex-col gap-6">
       <div className="flex flex-wrap items-end gap-3 rounded-lg border p-4">
@@ -204,37 +275,54 @@ export function PlatformCostContent({ searchParams }: Props) {
 
       {loading ? (
         <div className="flex flex-col gap-4">
-          <div className="grid grid-cols-2 gap-4 md:grid-cols-4">
-            {[...Array(4)].map((_, i) => (
+          <div className="grid grid-cols-2 gap-4 sm:grid-cols-3 md:grid-cols-4">
+            {/* 12 skeleton placeholders — one per summary card */}
+            {Array.from({ length: 12 }, (_, i) => (
               <Skeleton key={i} className="h-20 rounded-lg" />
             ))}
           </div>
+          <Skeleton className="h-32 rounded-lg" />
           <Skeleton className="h-8 w-48 rounded" />
           <Skeleton className="h-64 rounded-lg" />
         </div>
       ) : (
         <>
           {dashboard && (
-            <div className="grid grid-cols-2 gap-4 md:grid-cols-4">
-              <SummaryCard
-                label="Known Cost"
-                value={formatMicrodollars(dashboard.total_cost_microdollars)}
-                subtitle="From providers that report USD cost"
-              />
-              <SummaryCard
-                label="Estimated Total"
-                value={formatMicrodollars(totalEstimatedCost)}
-                subtitle="Including per-run cost estimates"
-              />
-              <SummaryCard
-                label="Total Requests"
-                value={dashboard.total_requests.toLocaleString()}
-              />
-              <SummaryCard
-                label="Active Users"
-                value={dashboard.total_users.toLocaleString()}
-              />
-            </div>
+            <>
+              <div className="grid grid-cols-2 gap-4 sm:grid-cols-3 md:grid-cols-4">
+                {summaryCards.map((card) => (
+                  <SummaryCard
+                    key={card.label}
+                    label={card.label}
+                    value={card.value}
+                    subtitle={card.subtitle}
+                  />
+                ))}
+              </div>
+
+              {dashboard.cost_buckets && dashboard.cost_buckets.length > 0 && (
+                <div className="rounded-lg border p-4">
+                  <h3 className="mb-3 text-sm font-medium">
+                    Cost Distribution by Bucket
+                  </h3>
+                  <div className="grid grid-cols-2 gap-2 sm:grid-cols-3 md:grid-cols-6">
+                    {dashboard.cost_buckets.map((b: CostBucket) => (
+                      <div
+                        key={b.bucket}
+                        className="flex flex-col items-center rounded border p-2 text-center"
+                      >
+                        <span className="text-xs text-muted-foreground">
+                          {b.bucket}
+                        </span>
+                        <span className="text-lg font-semibold">
+                          {b.count.toLocaleString()}
+                        </span>
+                      </div>
+                    ))}
+                  </div>
+                </div>
+              )}
+            </>
           )}
 
           <div
diff --git a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/ProviderTable.tsx b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/ProviderTable.tsx
index db100e0220..22c8ccc3a8 100644
--- a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/ProviderTable.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/ProviderTable.tsx
@@ -3,6 +3,7 @@ import {
   defaultRateFor,
   estimateCostForRow,
   formatMicrodollars,
+  formatTokens,
   rateKey,
   rateUnitLabel,
   trackingValue,
@@ -33,6 +34,20 @@ function ProviderTable({ data, rateOverrides, onRateOverride }: Props) {
             <th scope="col" className="px-4 py-3 text-right">
               Usage
             </th>
+            <th
+              scope="col"
+              className="px-4 py-3 text-right"
+              title="Only populated for token-tracking providers (e.g. LLM calls). Non-token rows (per_run, characters, etc.) show —."
+            >
+              Input Tokens
+            </th>
+            <th
+              scope="col"
+              className="px-4 py-3 text-right"
+              title="Only populated for token-tracking providers (e.g. LLM calls). Non-token rows (per_run, characters, etc.) show —."
+            >
+              Output Tokens
+            </th>
             <th scope="col" className="px-4 py-3 text-right">
               Requests
             </th>
@@ -74,6 +89,16 @@ function ProviderTable({ data, rateOverrides, onRateOverride }: Props) {
                   <TrackingBadge trackingType={row.tracking_type} />
                 </td>
                 <td className="px-4 py-3 text-right">{trackingValue(row)}</td>
+                <td className="px-4 py-3 text-right">
+                  {row.total_input_tokens > 0
+                    ? formatTokens(row.total_input_tokens)
+                    : "-"}
+                </td>
+                <td className="px-4 py-3 text-right">
+                  {row.total_output_tokens > 0
+                    ? formatTokens(row.total_output_tokens)
+                    : "-"}
+                </td>
                 <td className="px-4 py-3 text-right">
                   {row.request_count.toLocaleString()}
                 </td>
@@ -124,7 +149,7 @@ function ProviderTable({ data, rateOverrides, onRateOverride }: Props) {
           {data.length === 0 && (
             <tr>
               <td
-                colSpan={8}
+                colSpan={10}
                 className="px-4 py-8 text-center text-muted-foreground"
               >
                 No cost data yet
diff --git a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/UserTable.tsx b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/UserTable.tsx
index c2ee70ce72..aa14ca175c 100644
--- a/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/UserTable.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/admin/platform-costs/components/UserTable.tsx
@@ -27,10 +27,7 @@ function UserTable({ data }: Props) {
               Output Tokens
             </th>
             <th scope="col" className="px-4 py-3 text-right">
-              Cache Read
-            </th>
-            <th scope="col" className="px-4 py-3 text-right">
-              Cache Write
+              Avg Cost / Req
             </th>
           </tr>
         </thead>
@@ -61,13 +58,12 @@ function UserTable({ data }: Props) {
                 {formatTokens(row.total_output_tokens)}
               </td>
               <td className="px-4 py-3 text-right">
-                {(row.total_cache_read_tokens ?? 0) > 0
-                  ? formatTokens(row.total_cache_read_tokens ?? 0)
-                  : "-"}
-              </td>
-              <td className="px-4 py-3 text-right">
-                {(row.total_cache_creation_tokens ?? 0) > 0
-                  ? formatTokens(row.total_cache_creation_tokens ?? 0)
+                {(row.cost_bearing_request_count ?? 0) > 0 &&
+                row.total_cost_microdollars > 0
+                  ? formatMicrodollars(
+                      row.total_cost_microdollars /
+                        (row.cost_bearing_request_count ?? 1),
+                    )
                   : "-"}
               </td>
             </tr>
@@ -75,7 +71,7 @@ function UserTable({ data }: Props) {
           {data.length === 0 && (
             <tr>
               <td
-                colSpan={7}
+                colSpan={6}
                 className="px-4 py-8 text-center text-muted-foreground"
               >
                 No cost data yet
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactCard/ArtifactCard.stories.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactCard/ArtifactCard.stories.tsx
new file mode 100644
index 0000000000..d4fc07fb48
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactCard/ArtifactCard.stories.tsx
@@ -0,0 +1,145 @@
+import type { Meta, StoryObj } from "@storybook/nextjs";
+import { ArtifactCard } from "./ArtifactCard";
+import type { ArtifactRef } from "../../store";
+import { useCopilotUIStore } from "../../store";
+
+function makeArtifact(overrides?: Partial<ArtifactRef>): ArtifactRef {
+  return {
+    id: "file-001",
+    title: "report.html",
+    mimeType: "text/html",
+    sourceUrl: "/api/proxy/api/workspace/files/file-001/download",
+    origin: "agent",
+    ...overrides,
+  };
+}
+
+const meta: Meta<typeof ArtifactCard> = {
+  title: "Copilot/ArtifactCard",
+  component: ArtifactCard,
+  tags: ["autodocs"],
+  parameters: {
+    layout: "padded",
+    docs: {
+      description: {
+        component:
+          "Inline artifact card rendered in chat messages. Openable artifacts show a caret and open the ArtifactPanel on click. Download-only artifacts trigger a file download.",
+      },
+    },
+  },
+  decorators: [
+    (Story) => (
+      <div className="w-96">
+        <Story />
+      </div>
+    ),
+  ],
+};
+
+export default meta;
+type Story = StoryObj<typeof meta>;
+
+export const OpenableHTML: Story = {
+  name: "Openable (HTML)",
+  args: {
+    artifact: makeArtifact({
+      title: "dashboard.html",
+      mimeType: "text/html",
+    }),
+  },
+};
+
+export const OpenableImage: Story = {
+  name: "Openable (Image)",
+  args: {
+    artifact: makeArtifact({
+      id: "img-card",
+      title: "chart.png",
+      mimeType: "image/png",
+    }),
+  },
+};
+
+export const OpenableCode: Story = {
+  name: "Openable (Code)",
+  args: {
+    artifact: makeArtifact({
+      title: "script.py",
+      mimeType: "text/x-python",
+    }),
+  },
+};
+
+export const DownloadOnly: Story = {
+  name: "Download Only (ZIP)",
+  args: {
+    artifact: makeArtifact({
+      title: "archive.zip",
+      mimeType: "application/zip",
+      sizeBytes: 2_500_000,
+    }),
+  },
+};
+
+export const PreviewableVideo: Story = {
+  name: "Previewable (Video)",
+  args: {
+    artifact: makeArtifact({
+      title: "demo.mp4",
+      mimeType: "video/mp4",
+      sizeBytes: 15_000_000,
+    }),
+  },
+  parameters: {
+    docs: {
+      description: {
+        story:
+          "Videos with supported formats (MP4, WebM, M4V) are previewable inline in the artifact panel.",
+      },
+    },
+  },
+};
+
+export const WithSize: Story = {
+  name: "With File Size",
+  args: {
+    artifact: makeArtifact({
+      title: "data.csv",
+      mimeType: "text/csv",
+      sizeBytes: 524_288,
+    }),
+  },
+};
+
+export const UserUpload: Story = {
+  name: "User Upload Origin",
+  args: {
+    artifact: makeArtifact({
+      title: "requirements.txt",
+      mimeType: "text/plain",
+      origin: "user-upload",
+    }),
+  },
+};
+
+export const ActiveState: Story = {
+  name: "Active (Panel Open)",
+  args: {
+    artifact: makeArtifact({ id: "active-card" }),
+  },
+  decorators: [
+    (Story) => {
+      useCopilotUIStore.setState({
+        artifactPanel: {
+          isOpen: true,
+          isMinimized: false,
+          isMaximized: false,
+          width: 600,
+          activeArtifact: makeArtifact({ id: "active-card" }),
+          history: [],
+        },
+      });
+      return <Story />;
+    },
+  ],
+};
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/ArtifactPanel.stories.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/ArtifactPanel.stories.tsx
new file mode 100644
index 0000000000..e7b457c6a9
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/ArtifactPanel.stories.tsx
@@ -0,0 +1,223 @@
+import type { Meta, StoryObj } from "@storybook/nextjs";
+import { http, HttpResponse } from "msw";
+import { ArtifactPanel } from "./ArtifactPanel";
+import { useCopilotUIStore } from "../../store";
+import type { ArtifactRef } from "../../store";
+
+const PROXY_BASE = "/api/proxy/api/workspace/files";
+
+function makeArtifact(overrides?: Partial<ArtifactRef>): ArtifactRef {
+  return {
+    id: "file-001",
+    title: "report.html",
+    mimeType: "text/html",
+    sourceUrl: `${PROXY_BASE}/file-001/download`,
+    origin: "agent",
+    ...overrides,
+  };
+}
+
+function openPanelWith(artifact: ArtifactRef) {
+  useCopilotUIStore.setState({
+    artifactPanel: {
+      isOpen: true,
+      isMinimized: false,
+      isMaximized: false,
+      width: 600,
+      activeArtifact: artifact,
+      history: [],
+    },
+  });
+}
+
+const meta: Meta<typeof ArtifactPanel> = {
+  title: "Copilot/ArtifactPanel",
+  component: ArtifactPanel,
+  tags: ["autodocs"],
+  parameters: {
+    layout: "fullscreen",
+    docs: {
+      description: {
+        component:
+          "Side panel for previewing workspace artifacts. Supports resize, minimize, maximize, and navigation history. Bug: panel auto-opens on chat switch instead of staying collapsed.",
+      },
+    },
+  },
+  decorators: [
+    (Story) => (
+      <div className="flex h-[600px] w-full">
+        <div className="flex-1 bg-zinc-50 p-8">
+          <p className="text-sm text-zinc-500">Chat area</p>
+        </div>
+        <Story />
+      </div>
+    ),
+  ],
+};
+
+export default meta;
+type Story = StoryObj<typeof meta>;
+
+export const OpenWithTextArtifact: Story = {
+  name: "Open — Text File",
+  decorators: [
+    (Story) => {
+      openPanelWith(
+        makeArtifact({ title: "notes.txt", mimeType: "text/plain" }),
+      );
+      return <Story />;
+    },
+  ],
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/file-001/download`, () => {
+          return HttpResponse.text(
+            "These are some notes from the agent execution.\n\nKey findings:\n1. Performance improved by 23%\n2. Memory usage reduced\n3. Error rate dropped to 0.1%",
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const OpenWithHTMLArtifact: Story = {
+  name: "Open — HTML",
+  decorators: [
+    (Story) => {
+      openPanelWith(
+        makeArtifact({
+          id: "html-panel",
+          title: "dashboard.html",
+          mimeType: "text/html",
+          sourceUrl: `${PROXY_BASE}/html-panel/download`,
+        }),
+      );
+      return <Story />;
+    },
+  ],
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/html-panel/download`, () => {
+          return HttpResponse.text(
+            `<!DOCTYPE html><html><body class="p-8 font-sans"><h1 class="text-2xl font-bold text-indigo-600">Dashboard</h1><p class="mt-2 text-gray-600">HTML artifact in the panel.</p></body></html>`,
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const OpenWithImageArtifact: Story = {
+  name: "Open — Image (Bug: No Loading State)",
+  decorators: [
+    (Story) => {
+      openPanelWith(
+        makeArtifact({
+          id: "img-panel",
+          title: "chart.png",
+          mimeType: "image/png",
+          sourceUrl: `${PROXY_BASE}/img-panel/download`,
+        }),
+      );
+      return <Story />;
+    },
+  ],
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/img-panel/download`, () => {
+          return HttpResponse.text(
+            '<svg xmlns="http://www.w3.org/2000/svg" width="500" height="300"><rect width="500" height="300" fill="#dbeafe"/><text x="250" y="150" text-anchor="middle" fill="#1e40af" font-size="20">Image Preview (no skeleton)</text></svg>',
+            { headers: { "Content-Type": "image/svg+xml" } },
+          );
+        }),
+      ],
+    },
+    docs: {
+      description: {
+        story:
+          "**BUG:** Image artifacts render with a bare `<img>` tag — no loading skeleton or error handling. Compare with text/HTML artifacts which show a proper skeleton while loading.",
+      },
+    },
+  },
+};
+
+export const MinimizedStrip: Story = {
+  name: "Minimized",
+  decorators: [
+    (Story) => {
+      useCopilotUIStore.setState({
+        artifactPanel: {
+          isOpen: true,
+          isMinimized: true,
+          isMaximized: false,
+          width: 600,
+          activeArtifact: makeArtifact(),
+          history: [],
+        },
+      });
+      return <Story />;
+    },
+  ],
+};
+
+export const ErrorState: Story = {
+  name: "Error — Failed to Load (Stale Artifact)",
+  decorators: [
+    (Story) => {
+      openPanelWith(
+        makeArtifact({
+          id: "stale-panel",
+          title: "old-report.html",
+          mimeType: "text/html",
+          sourceUrl: `${PROXY_BASE}/stale-panel/download`,
+        }),
+      );
+      return <Story />;
+    },
+  ],
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/stale-panel/download`, () => {
+          return new HttpResponse(null, { status: 404 });
+        }),
+      ],
+    },
+    docs: {
+      description: {
+        story:
+          "Shows what users see when opening a previously generated artifact that no longer exists on the backend (404). The 'Try again' button retries the fetch.",
+      },
+    },
+  },
+};
+
+export const Closed: Story = {
+  name: "Closed (Default State)",
+  decorators: [
+    (Story) => {
+      useCopilotUIStore.setState({
+        artifactPanel: {
+          isOpen: false,
+          isMinimized: false,
+          isMaximized: false,
+          width: 600,
+          activeArtifact: null,
+          history: [],
+        },
+      });
+      return <Story />;
+    },
+  ],
+  parameters: {
+    docs: {
+      description: {
+        story:
+          "The default state — panel is closed. It should only open when a user clicks on an artifact card in the chat.",
+      },
+    },
+  },
+};
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/__tests__/downloadArtifact.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/__tests__/downloadArtifact.test.ts
new file mode 100644
index 0000000000..4095841e89
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/__tests__/downloadArtifact.test.ts
@@ -0,0 +1,413 @@
+import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
+import { downloadArtifact } from "../downloadArtifact";
+import type { ArtifactRef } from "../../../store";
+
+function makeArtifact(overrides?: Partial<ArtifactRef>): ArtifactRef {
+  return {
+    id: "file-001",
+    title: "report.pdf",
+    mimeType: "application/pdf",
+    sourceUrl: "/api/proxy/api/workspace/files/file-001/download",
+    origin: "agent",
+    ...overrides,
+  };
+}
+
+describe("downloadArtifact", () => {
+  let clickSpy: ReturnType<typeof vi.fn>;
+  let removeSpy: ReturnType<typeof vi.fn>;
+
+  beforeEach(() => {
+    clickSpy = vi.fn();
+    removeSpy = vi.fn();
+
+    vi.stubGlobal(
+      "URL",
+      Object.assign(URL, {
+        createObjectURL: vi.fn().mockReturnValue("blob:fake-url"),
+        revokeObjectURL: vi.fn(),
+      }),
+    );
+
+    vi.spyOn(document, "createElement").mockReturnValue({
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    } as unknown as HTMLAnchorElement);
+
+    vi.spyOn(document.body, "appendChild").mockImplementation(
+      (node) => node as ChildNode,
+    );
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.unstubAllGlobals();
+  });
+
+  it("downloads file successfully on 200 response", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["pdf content"])),
+      }),
+    );
+
+    await downloadArtifact(makeArtifact());
+
+    expect(fetch).toHaveBeenCalledWith(
+      "/api/proxy/api/workspace/files/file-001/download",
+    );
+    expect(clickSpy).toHaveBeenCalled();
+    expect(removeSpy).toHaveBeenCalled();
+    expect(URL.revokeObjectURL).toHaveBeenCalledWith("blob:fake-url");
+  });
+
+  it("rejects on persistent server error after exhausting retries", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 500,
+      }),
+    );
+
+    await expect(downloadArtifact(makeArtifact())).rejects.toThrow(
+      "Download failed: 500",
+    );
+    expect(clickSpy).not.toHaveBeenCalled();
+  });
+
+  it("rejects on persistent network error after exhausting retries", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        return Promise.reject(new Error("Network error"));
+      }),
+    );
+
+    await expect(downloadArtifact(makeArtifact())).rejects.toThrow(
+      "Network error",
+    );
+    expect(callCount).toBe(3);
+    expect(clickSpy).not.toHaveBeenCalled();
+  });
+
+  it("retries on transient network error and succeeds", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return Promise.reject(new Error("Connection reset"));
+        }
+        return Promise.resolve({
+          ok: true,
+          blob: () => Promise.resolve(new Blob(["content"])),
+        });
+      }),
+    );
+
+    await downloadArtifact(makeArtifact());
+    expect(callCount).toBe(2);
+    expect(clickSpy).toHaveBeenCalled();
+  });
+
+  it("retries on transient 500 and succeeds", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return Promise.resolve({ ok: false, status: 500 });
+        }
+        return Promise.resolve({
+          ok: true,
+          blob: () => Promise.resolve(new Blob(["content"])),
+        });
+      }),
+    );
+
+    // Should succeed on second attempt
+    await downloadArtifact(makeArtifact());
+    expect(callCount).toBe(2);
+    expect(clickSpy).toHaveBeenCalled();
+  });
+
+  it("sanitizes dangerous filenames", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    await downloadArtifact(makeArtifact({ title: "../../../etc/passwd" }));
+
+    expect(anchor.download).not.toContain("..");
+    expect(anchor.download).not.toContain("/");
+  });
+
+  // ── Transient retry codes ─────────────────────────────────────────
+
+  it("retries on 408 (Request Timeout) and succeeds", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return Promise.resolve({ ok: false, status: 408 });
+        }
+        return Promise.resolve({
+          ok: true,
+          blob: () => Promise.resolve(new Blob(["content"])),
+        });
+      }),
+    );
+
+    await downloadArtifact(makeArtifact());
+    expect(callCount).toBe(2);
+    expect(clickSpy).toHaveBeenCalled();
+  });
+
+  it("retries on 429 (Too Many Requests) and succeeds", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return Promise.resolve({ ok: false, status: 429 });
+        }
+        return Promise.resolve({
+          ok: true,
+          blob: () => Promise.resolve(new Blob(["content"])),
+        });
+      }),
+    );
+
+    await downloadArtifact(makeArtifact());
+    expect(callCount).toBe(2);
+    expect(clickSpy).toHaveBeenCalled();
+  });
+
+  // ── Non-transient errors ──────────────────────────────────────────
+
+  it("rejects immediately on 403 (non-transient) without retry", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        return Promise.resolve({ ok: false, status: 403 });
+      }),
+    );
+
+    await expect(downloadArtifact(makeArtifact())).rejects.toThrow(
+      "Download failed: 403",
+    );
+    expect(callCount).toBe(1);
+    expect(clickSpy).not.toHaveBeenCalled();
+  });
+
+  it("rejects immediately on 404 without retry", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        return Promise.resolve({ ok: false, status: 404 });
+      }),
+    );
+
+    await expect(downloadArtifact(makeArtifact())).rejects.toThrow(
+      "Download failed: 404",
+    );
+    expect(callCount).toBe(1);
+  });
+
+  // ── Exhausted retries ─────────────────────────────────────────────
+
+  it("rejects after exhausting all retries on persistent 500", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        return Promise.resolve({ ok: false, status: 500 });
+      }),
+    );
+
+    await expect(downloadArtifact(makeArtifact())).rejects.toThrow(
+      "Download failed: 500",
+    );
+    // Initial attempt + 2 retries = 3 total
+    expect(callCount).toBe(3);
+    expect(clickSpy).not.toHaveBeenCalled();
+  });
+
+  // ── Filename edge cases ───────────────────────────────────────────
+
+  it("falls back to 'download' when title is empty", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    await downloadArtifact(makeArtifact({ title: "" }));
+    expect(anchor.download).toBe("download");
+  });
+
+  it("falls back to 'download' when title is only dots", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    // Dot-only names should not produce a hidden or empty filename.
+    await downloadArtifact(makeArtifact({ title: "...." }));
+    expect(anchor.download).toBe("download");
+  });
+
+  it("replaces special chars with underscores (not empty)", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    await downloadArtifact(makeArtifact({ title: '***???"' }));
+    // Special chars become underscores, not removed
+    expect(anchor.download).toBe("_______");
+  });
+
+  it("strips leading dots from filename", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    await downloadArtifact(makeArtifact({ title: "...hidden.txt" }));
+    expect(anchor.download).not.toMatch(/^\./);
+    expect(anchor.download).toContain("hidden.txt");
+  });
+
+  it("replaces Windows-reserved characters", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    await downloadArtifact(
+      makeArtifact({ title: "file<name>with:bad*chars?.txt" }),
+    );
+    expect(anchor.download).not.toMatch(/[<>:*?]/);
+  });
+
+  it("replaces control characters in filename", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+
+    const anchor = {
+      href: "",
+      download: "",
+      click: clickSpy,
+      remove: removeSpy,
+    };
+    vi.spyOn(document, "createElement").mockReturnValue(
+      anchor as unknown as HTMLAnchorElement,
+    );
+
+    await downloadArtifact(
+      makeArtifact({ title: "file\x00with\x1fcontrol.txt" }),
+    );
+    expect(anchor.download).not.toMatch(/[\x00-\x1f]/);
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.stories.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.stories.tsx
new file mode 100644
index 0000000000..6b9ef31631
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.stories.tsx
@@ -0,0 +1,460 @@
+import type { Meta, StoryObj } from "@storybook/nextjs";
+import { http, HttpResponse } from "msw";
+import { ArtifactContent } from "./ArtifactContent";
+import type { ArtifactRef } from "../../../store";
+import type { ArtifactClassification } from "../helpers";
+import {
+  Code,
+  File,
+  FileHtml,
+  FileText,
+  Image,
+  Table,
+} from "@phosphor-icons/react";
+
+const PROXY_BASE = "/api/proxy/api/workspace/files";
+
+function makeArtifact(overrides?: Partial<ArtifactRef>): ArtifactRef {
+  return {
+    id: "file-001",
+    title: "test.txt",
+    mimeType: "text/plain",
+    sourceUrl: `${PROXY_BASE}/file-001/download`,
+    origin: "agent",
+    ...overrides,
+  };
+}
+
+function makeClassification(
+  overrides?: Partial<ArtifactClassification>,
+): ArtifactClassification {
+  return {
+    type: "text",
+    icon: FileText,
+    label: "Text",
+    openable: true,
+    hasSourceToggle: false,
+    ...overrides,
+  };
+}
+
+const meta: Meta<typeof ArtifactContent> = {
+  title: "Copilot/ArtifactContent",
+  component: ArtifactContent,
+  tags: ["autodocs"],
+  parameters: {
+    layout: "padded",
+    docs: {
+      description: {
+        component:
+          "Renders artifact content based on file type classification. Supports images, HTML, code, CSV, JSON, markdown, PDF, and plain text. Bug: image artifacts render as bare <img> with no loading/error states.",
+      },
+    },
+  },
+  decorators: [
+    (Story) => (
+      <div
+        className="flex h-[500px] w-[600px] flex-col overflow-hidden border border-zinc-200"
+        style={{ resize: "both" }}
+      >
+        <Story />
+      </div>
+    ),
+  ],
+};
+
+export default meta;
+type Story = StoryObj<typeof meta>;
+
+export const ImageArtifactPNG: Story = {
+  name: "Image (PNG) — No Loading Skeleton (Bug #1)",
+  args: {
+    artifact: makeArtifact({
+      id: "img-png",
+      title: "chart.png",
+      mimeType: "image/png",
+      sourceUrl: `${PROXY_BASE}/img-png/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({ type: "image", icon: Image }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/img-png/download`, () => {
+          return HttpResponse.text(
+            '<svg xmlns="http://www.w3.org/2000/svg" width="400" height="300"><rect width="400" height="300" fill="#e0e7ff"/><text x="200" y="150" text-anchor="middle" fill="#4338ca" font-size="24">PNG Placeholder</text></svg>',
+            { headers: { "Content-Type": "image/svg+xml" } },
+          );
+        }),
+      ],
+    },
+    docs: {
+      description: {
+        story:
+          "**BUG:** This renders a bare `<img>` tag with no loading skeleton or error handling. Compare with WorkspaceFileRenderer which has proper Skeleton + onError states.",
+      },
+    },
+  },
+};
+
+export const ImageArtifactSVG: Story = {
+  name: "Image (SVG)",
+  args: {
+    artifact: makeArtifact({
+      id: "img-svg",
+      title: "diagram.svg",
+      mimeType: "image/svg+xml",
+      sourceUrl: `${PROXY_BASE}/img-svg/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({ type: "image", icon: Image }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/img-svg/download`, () => {
+          return HttpResponse.text(
+            '<svg xmlns="http://www.w3.org/2000/svg" width="400" height="300"><rect width="400" height="300" fill="#fef3c7"/><circle cx="200" cy="150" r="80" fill="#f59e0b"/><text x="200" y="155" text-anchor="middle" fill="white" font-size="20">SVG OK</text></svg>',
+            { headers: { "Content-Type": "image/svg+xml" } },
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const HTMLArtifact: Story = {
+  name: "HTML",
+  args: {
+    artifact: makeArtifact({
+      id: "html-001",
+      title: "page.html",
+      mimeType: "text/html",
+      sourceUrl: `${PROXY_BASE}/html-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "html",
+      icon: FileHtml,
+      label: "HTML",
+      hasSourceToggle: true,
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/html-001/download`, () => {
+          return HttpResponse.text(
+            `<!DOCTYPE html>
+<html>
+<head><title>Artifact Preview</title></head>
+<body class="p-8 font-sans">
+  <h1 class="text-2xl font-bold text-indigo-600 mb-4">HTML Artifact</h1>
+  <p class="text-gray-700">This is an HTML artifact rendered in a sandboxed iframe with Tailwind CSS injected.</p>
+  <div class="mt-4 p-4 bg-blue-50 rounded-lg border border-blue-200">
+    <p class="text-blue-800">Interactive content works via allow-scripts sandbox.</p>
+  </div>
+</body>
+</html>`,
+            { headers: { "Content-Type": "text/html" } },
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const CodeArtifact: Story = {
+  name: "Code (Python)",
+  args: {
+    artifact: makeArtifact({
+      id: "code-001",
+      title: "analysis.py",
+      mimeType: "text/x-python",
+      sourceUrl: `${PROXY_BASE}/code-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "code",
+      icon: Code,
+      label: "Code",
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/code-001/download`, () => {
+          return HttpResponse.text(
+            `import pandas as pd
+import matplotlib.pyplot as plt
+
+def analyze_data(filepath: str) -> pd.DataFrame:
+    """Load and analyze CSV data."""
+    df = pd.read_csv(filepath)
+    summary = df.describe()
+    print(f"Loaded {len(df)} rows")
+    return summary
+
+if __name__ == "__main__":
+    result = analyze_data("data.csv")
+    print(result)`,
+            { headers: { "Content-Type": "text/plain" } },
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const CSVArtifact: Story = {
+  name: "CSV (Spreadsheet)",
+  args: {
+    artifact: makeArtifact({
+      id: "csv-001",
+      title: "data.csv",
+      mimeType: "text/csv",
+      sourceUrl: `${PROXY_BASE}/csv-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "csv",
+      icon: Table,
+      label: "Spreadsheet",
+      hasSourceToggle: true,
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/csv-001/download`, () => {
+          return HttpResponse.text(
+            `Name,Age,City,Score
+Alice,28,New York,92
+Bob,35,San Francisco,87
+Charlie,22,Chicago,95
+Diana,31,Boston,88
+Eve,27,Seattle,91`,
+            { headers: { "Content-Type": "text/csv" } },
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const JSONArtifact: Story = {
+  name: "JSON (Data)",
+  args: {
+    artifact: makeArtifact({
+      id: "json-001",
+      title: "config.json",
+      mimeType: "application/json",
+      sourceUrl: `${PROXY_BASE}/json-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "json",
+      icon: Code,
+      label: "Data",
+      hasSourceToggle: true,
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/json-001/download`, () => {
+          return HttpResponse.text(
+            JSON.stringify(
+              {
+                name: "AutoGPT Agent",
+                version: "2.0",
+                capabilities: ["web_search", "code_execution", "file_io"],
+                settings: { maxTokens: 4096, temperature: 0.7 },
+              },
+              null,
+              2,
+            ),
+            { headers: { "Content-Type": "application/json" } },
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const MarkdownArtifact: Story = {
+  name: "Markdown",
+  args: {
+    artifact: makeArtifact({
+      id: "md-001",
+      title: "README.md",
+      mimeType: "text/markdown",
+      sourceUrl: `${PROXY_BASE}/md-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "markdown",
+      icon: FileText,
+      label: "Document",
+      hasSourceToggle: true,
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/md-001/download`, () => {
+          return HttpResponse.text(
+            `# Project Summary
+
+## Overview
+This is a **markdown** artifact rendered through the global renderer registry.
+
+## Features
+- Headings and paragraphs
+- **Bold** and *italic* text
+- Lists and code blocks
+
+\`\`\`python
+print("Hello from markdown!")
+\`\`\`
+
+> Blockquotes are also supported.`,
+            { headers: { "Content-Type": "text/plain" } },
+          );
+        }),
+      ],
+    },
+  },
+};
+
+export const PDFArtifact: Story = {
+  name: "PDF",
+  args: {
+    artifact: makeArtifact({
+      id: "pdf-001",
+      title: "report.pdf",
+      mimeType: "application/pdf",
+      sourceUrl: `${PROXY_BASE}/pdf-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "pdf",
+      icon: FileText,
+      label: "PDF",
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/pdf-001/download`, () => {
+          return HttpResponse.arrayBuffer(new ArrayBuffer(100), {
+            headers: { "Content-Type": "application/pdf" },
+          });
+        }),
+      ],
+    },
+    docs: {
+      description: {
+        story:
+          "PDF artifacts are rendered in an unsandboxed iframe using a blob URL (Chromium bug #413851 prevents sandboxed PDF rendering).",
+      },
+    },
+  },
+};
+
+export const ErrorState: Story = {
+  name: "Error — Failed to Load Content",
+  args: {
+    artifact: makeArtifact({
+      id: "error-001",
+      title: "old-report.html",
+      mimeType: "text/html",
+      sourceUrl: `${PROXY_BASE}/error-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "html",
+      icon: FileHtml,
+      label: "HTML",
+      hasSourceToggle: true,
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/error-001/download`, () => {
+          return new HttpResponse(null, { status: 404 });
+        }),
+      ],
+    },
+    docs: {
+      description: {
+        story:
+          "Shows the error state when an artifact fails to load (e.g., old/expired file returning 404). Includes a 'Try again' retry button.",
+      },
+    },
+  },
+};
+
+export const LoadingSkeleton: Story = {
+  name: "Loading State",
+  args: {
+    artifact: makeArtifact({
+      id: "loading-001",
+      title: "loading.html",
+      mimeType: "text/html",
+      sourceUrl: `${PROXY_BASE}/loading-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "html",
+      icon: FileHtml,
+      label: "HTML",
+    }),
+  },
+  parameters: {
+    msw: {
+      handlers: [
+        http.get(`${PROXY_BASE}/loading-001/download`, async () => {
+          // Delay response to show loading state
+          await new Promise((r) => setTimeout(r, 999999));
+          return HttpResponse.text("never resolves");
+        }),
+      ],
+    },
+    docs: {
+      description: {
+        story:
+          "Shows the skeleton loading state while content is being fetched.",
+      },
+    },
+  },
+};
+
+export const DownloadOnly: Story = {
+  name: "Download Only (Binary)",
+  args: {
+    artifact: makeArtifact({
+      id: "bin-001",
+      title: "archive.zip",
+      mimeType: "application/zip",
+      sourceUrl: `${PROXY_BASE}/bin-001/download`,
+    }),
+    isSourceView: false,
+    classification: makeClassification({
+      type: "download-only",
+      icon: File,
+      label: "File",
+      openable: false,
+    }),
+  },
+  parameters: {
+    docs: {
+      description: {
+        story:
+          "Download-only files (binary, video, etc.) are not rendered inline. The ArtifactPanel shows nothing for these — they are handled by ArtifactCard with a download button.",
+      },
+    },
+  },
+};
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.tsx
index 6e057293b5..506cbc3b60 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactContent.tsx
@@ -2,7 +2,8 @@
 
 import { globalRegistry } from "@/components/contextual/OutputRenderers";
 import { codeRenderer } from "@/components/contextual/OutputRenderers/renderers/CodeRenderer";
-import { Suspense } from "react";
+import { Suspense, useState } from "react";
+import { Skeleton } from "@/components/ui/skeleton";
 import type { ArtifactRef } from "../../../store";
 import type { ArtifactClassification } from "../helpers";
 import { ArtifactReactPreview } from "./ArtifactReactPreview";
@@ -63,6 +64,90 @@ function ArtifactContentLoader({
   );
 }
 
+function ArtifactImage({ src, alt }: { src: string; alt: string }) {
+  const [loaded, setLoaded] = useState(false);
+  const [error, setError] = useState(false);
+
+  if (error) {
+    return (
+      <div
+        role="alert"
+        className="flex flex-col items-center justify-center gap-3 p-8 text-center"
+      >
+        <p className="text-sm text-zinc-500">Failed to load image</p>
+        <button
+          type="button"
+          onClick={() => {
+            setError(false);
+            setLoaded(false);
+          }}
+          className="rounded-md border border-zinc-200 bg-white px-3 py-1.5 text-xs font-medium text-zinc-700 shadow-sm transition-colors hover:bg-zinc-50 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-violet-400"
+        >
+          Try again
+        </button>
+      </div>
+    );
+  }
+
+  return (
+    <div className="relative flex items-center justify-center p-4">
+      {!loaded && (
+        <Skeleton className="absolute inset-4 h-[calc(100%-2rem)] w-[calc(100%-2rem)] rounded-md" />
+      )}
+      {/* eslint-disable-next-line @next/next/no-img-element */}
+      <img
+        src={src}
+        alt={alt}
+        className={`max-h-full max-w-full object-contain transition-opacity ${loaded ? "opacity-100" : "opacity-0"}`}
+        onLoad={() => setLoaded(true)}
+        onError={() => setError(true)}
+      />
+    </div>
+  );
+}
+
+function ArtifactVideo({ src }: { src: string }) {
+  const [loaded, setLoaded] = useState(false);
+  const [error, setError] = useState(false);
+
+  if (error) {
+    return (
+      <div
+        role="alert"
+        className="flex flex-col items-center justify-center gap-3 p-8 text-center"
+      >
+        <p className="text-sm text-zinc-500">Failed to load video</p>
+        <button
+          type="button"
+          onClick={() => {
+            setError(false);
+            setLoaded(false);
+          }}
+          className="rounded-md border border-zinc-200 bg-white px-3 py-1.5 text-xs font-medium text-zinc-700 shadow-sm transition-colors hover:bg-zinc-50 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-violet-400"
+        >
+          Try again
+        </button>
+      </div>
+    );
+  }
+
+  return (
+    <div className="relative flex items-center justify-center p-4">
+      {!loaded && (
+        <Skeleton className="absolute inset-4 h-[calc(100%-2rem)] w-[calc(100%-2rem)] rounded-md" />
+      )}
+      <video
+        src={src}
+        controls
+        preload="metadata"
+        className={`max-h-full max-w-full rounded-md transition-opacity ${loaded ? "opacity-100" : "opacity-0"}`}
+        onLoadedMetadata={() => setLoaded(true)}
+        onError={() => setError(true)}
+      />
+    </div>
+  );
+}
+
 function ArtifactRenderer({
   artifact,
   content,
@@ -79,17 +164,19 @@ function ArtifactRenderer({
   // Image: render directly from URL (no content fetch)
   if (classification.type === "image") {
     return (
-      <div className="flex items-center justify-center p-4">
-        {/* eslint-disable-next-line @next/next/no-img-element */}
-        <img
-          src={artifact.sourceUrl}
-          alt={artifact.title}
-          className="max-h-full max-w-full object-contain"
-        />
-      </div>
+      <ArtifactImage
+        key={artifact.sourceUrl}
+        src={artifact.sourceUrl}
+        alt={artifact.title}
+      />
     );
   }
 
+  // Video: render with <video> controls (no content fetch)
+  if (classification.type === "video") {
+    return <ArtifactVideo key={artifact.sourceUrl} src={artifact.sourceUrl} />;
+  }
+
   if (classification.type === "pdf" && pdfUrl) {
     // No sandbox — Chrome/Edge block PDF rendering in sandboxed iframes
     // (Chromium bug #413851). The blob URL has a null origin so it can't
@@ -164,7 +251,16 @@ function ArtifactRenderer({
 
   // CSV: pass with explicit metadata so CSVRenderer matches
   if (classification.type === "csv") {
-    const csvMeta = { mimeType: "text/csv", filename: artifact.title };
+    const normalizedMime = artifact.mimeType
+      ?.toLowerCase()
+      .split(";")[0]
+      ?.trim();
+    const csvMimeType =
+      normalizedMime === "text/tab-separated-values" ||
+      artifact.title.toLowerCase().endsWith(".tsv")
+        ? "text/tab-separated-values"
+        : "text/csv";
+    const csvMeta = { mimeType: csvMimeType, filename: artifact.title };
     const csvRenderer = globalRegistry.getRenderer(content, csvMeta);
     if (csvRenderer) {
       return <div className="p-4">{csvRenderer.render(content, csvMeta)}</div>;
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactReactPreview.test.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactReactPreview.test.tsx
new file mode 100644
index 0000000000..be5b17781d
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/ArtifactReactPreview.test.tsx
@@ -0,0 +1,67 @@
+import { render, screen, waitFor } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { ArtifactReactPreview } from "./ArtifactReactPreview";
+import {
+  buildReactArtifactSrcDoc,
+  collectPreviewStyles,
+  transpileReactArtifactSource,
+} from "./reactArtifactPreview";
+
+vi.mock("./reactArtifactPreview", () => ({
+  buildReactArtifactSrcDoc: vi.fn(),
+  collectPreviewStyles: vi.fn(),
+  transpileReactArtifactSource: vi.fn(),
+}));
+
+describe("ArtifactReactPreview", () => {
+  beforeEach(() => {
+    vi.mocked(collectPreviewStyles).mockReturnValue("<style>preview</style>");
+    vi.mocked(buildReactArtifactSrcDoc).mockReturnValue("<html>preview</html>");
+  });
+
+  it("renders an iframe preview after transpilation succeeds", async () => {
+    vi.mocked(transpileReactArtifactSource).mockResolvedValue(
+      "module.exports.default = function Artifact() { return null; };",
+    );
+
+    const { container } = render(
+      <ArtifactReactPreview
+        source="export default function Artifact() { return null; }"
+        title="Artifact.tsx"
+      />,
+    );
+
+    await waitFor(() => {
+      expect(buildReactArtifactSrcDoc).toHaveBeenCalledWith(
+        "module.exports.default = function Artifact() { return null; };",
+        "Artifact.tsx",
+        "<style>preview</style>",
+      );
+    });
+
+    const iframe = container.querySelector("iframe");
+    expect(iframe).toBeTruthy();
+    expect(iframe?.getAttribute("sandbox")).toBe("allow-scripts");
+    expect(iframe?.getAttribute("title")).toBe("Artifact.tsx preview");
+    expect(iframe?.getAttribute("srcdoc")).toBe("<html>preview</html>");
+  });
+
+  it("shows a readable error when transpilation fails", async () => {
+    vi.mocked(transpileReactArtifactSource).mockRejectedValue(
+      new Error("Transpile exploded"),
+    );
+
+    render(
+      <ArtifactReactPreview
+        source="export default function Artifact() {"
+        title="Broken.tsx"
+      />,
+    );
+
+    await waitFor(() => {
+      expect(screen.getByText("Failed to render React preview")).toBeTruthy();
+    });
+
+    expect(screen.getByText("Transpile exploded")).toBeTruthy();
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/ArtifactContent.test.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/ArtifactContent.test.tsx
new file mode 100644
index 0000000000..e4b287fa9a
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/ArtifactContent.test.tsx
@@ -0,0 +1,970 @@
+import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
+import {
+  cleanup,
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from "@testing-library/react";
+import { ArtifactContent } from "../ArtifactContent";
+import type { ArtifactRef } from "../../../../store";
+import { classifyArtifact, type ArtifactClassification } from "../../helpers";
+import { globalRegistry } from "@/components/contextual/OutputRenderers";
+import { codeRenderer } from "@/components/contextual/OutputRenderers/renderers/CodeRenderer";
+import { ArtifactReactPreview } from "../ArtifactReactPreview";
+
+// Mock the renderers so we don't pull in the full renderer dependency tree
+vi.mock("@/components/contextual/OutputRenderers", () => ({
+  globalRegistry: {
+    getRenderer: vi.fn().mockReturnValue({
+      render: vi.fn((_val: unknown, meta: Record<string, unknown>) => (
+        <div data-testid="global-renderer">
+          rendered:{String(meta?.mimeType ?? "unknown")}
+        </div>
+      )),
+    }),
+  },
+}));
+
+vi.mock(
+  "@/components/contextual/OutputRenderers/renderers/CodeRenderer",
+  () => ({
+    codeRenderer: {
+      render: vi.fn((content: string) => (
+        <div data-testid="code-renderer">{content}</div>
+      )),
+    },
+  }),
+);
+
+vi.mock("../ArtifactReactPreview", () => ({
+  ArtifactReactPreview: vi.fn(
+    ({ source, title }: { source: string; title: string }) => (
+      <div data-testid="react-preview" data-title={title}>
+        {source}
+      </div>
+    ),
+  ),
+}));
+
+function makeArtifact(overrides?: Partial<ArtifactRef>): ArtifactRef {
+  return {
+    id: "file-001",
+    title: "test.txt",
+    mimeType: "text/plain",
+    sourceUrl: "/api/proxy/api/workspace/files/file-001/download",
+    origin: "agent",
+    ...overrides,
+  };
+}
+
+function makeClassification(
+  overrides?: Partial<ArtifactClassification>,
+): ArtifactClassification {
+  return {
+    type: "text",
+    icon: vi.fn(() => null) as unknown as ArtifactClassification["icon"],
+    label: "Text",
+    openable: true,
+    hasSourceToggle: false,
+    ...overrides,
+  };
+}
+
+describe("ArtifactContent", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve("file content here"),
+        blob: () => Promise.resolve(new Blob(["content"])),
+      }),
+    );
+  });
+
+  afterEach(() => {
+    cleanup();
+    vi.unstubAllGlobals();
+  });
+
+  // ── Image ─────────────────────────────────────────────────────────
+
+  it("renders image artifact as img tag with loading skeleton", () => {
+    const artifact = makeArtifact({
+      id: "img-001",
+      title: "photo.png",
+      mimeType: "image/png",
+      sourceUrl: "/api/proxy/api/workspace/files/img-001/download",
+    });
+    const classification = makeClassification({ type: "image" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const img = container.querySelector("img");
+    expect(img).toBeTruthy();
+    expect(img?.getAttribute("src")).toBe(
+      "/api/proxy/api/workspace/files/img-001/download",
+    );
+    expect(fetch).not.toHaveBeenCalled();
+  });
+
+  it("image artifact shows loading skeleton before image loads", () => {
+    const artifact = makeArtifact({
+      id: "img-skeleton",
+      title: "photo.png",
+      mimeType: "image/png",
+      sourceUrl: "/api/proxy/api/workspace/files/img-skeleton/download",
+    });
+    const classification = makeClassification({ type: "image" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    // Skeleton uses animate-pulse class
+    const skeleton = container.querySelector('[class*="animate-pulse"]');
+    expect(skeleton).toBeTruthy();
+  });
+
+  it("image artifact shows error state when image fails to load", () => {
+    const artifact = makeArtifact({
+      id: "img-error",
+      title: "broken.png",
+      mimeType: "image/png",
+      sourceUrl: "/api/proxy/api/workspace/files/img-error/download",
+    });
+    const classification = makeClassification({ type: "image" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const img = container.querySelector("img");
+    expect(img).toBeTruthy();
+    fireEvent.error(img!);
+
+    const errorAlert = screen.queryByRole("alert");
+    expect(errorAlert).toBeTruthy();
+    expect(screen.queryByText("Failed to load image")).toBeTruthy();
+  });
+
+  it("image retry resets error and re-shows img", async () => {
+    const artifact = makeArtifact({
+      id: "img-retry",
+      title: "retry.png",
+      mimeType: "image/png",
+      sourceUrl: "/api/proxy/api/workspace/files/img-retry/download",
+    });
+    const classification = makeClassification({ type: "image" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const img = container.querySelector("img");
+    fireEvent.error(img!);
+
+    // Should show error state
+    await waitFor(() => {
+      expect(screen.queryByText("Failed to load image")).toBeTruthy();
+    });
+
+    // Click "Try again"
+    fireEvent.click(screen.getByRole("button", { name: /try again/i }));
+
+    // Error should be cleared, img should reappear
+    await waitFor(() => {
+      expect(screen.queryByText("Failed to load image")).toBeNull();
+      expect(container.querySelector("img")).toBeTruthy();
+    });
+  });
+
+  // ── Video ─────────────────────────────────────────────────────────
+
+  it("renders video artifact with video tag and controls", () => {
+    const artifact = makeArtifact({
+      id: "vid-001",
+      title: "clip.mp4",
+      mimeType: "video/mp4",
+      sourceUrl: "/api/proxy/api/workspace/files/vid-001/download",
+    });
+    const classification = makeClassification({ type: "video" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const video = container.querySelector("video");
+    expect(video).toBeTruthy();
+    expect(video?.hasAttribute("controls")).toBe(true);
+    expect(video?.getAttribute("src")).toBe(
+      "/api/proxy/api/workspace/files/vid-001/download",
+    );
+    expect(fetch).not.toHaveBeenCalled();
+  });
+
+  it("video shows loading skeleton before metadata loads", () => {
+    const artifact = makeArtifact({
+      id: "vid-skel",
+      title: "clip.mp4",
+      mimeType: "video/mp4",
+      sourceUrl: "/api/proxy/api/workspace/files/vid-skel/download",
+    });
+    const classification = makeClassification({ type: "video" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const skeleton = container.querySelector('[class*="animate-pulse"]');
+    expect(skeleton).toBeTruthy();
+
+    // After metadata loads, skeleton should disappear
+    const video = container.querySelector("video");
+    fireEvent.loadedMetadata(video!);
+
+    expect(container.querySelector('[class*="animate-pulse"]')).toBeNull();
+  });
+
+  it("video shows error state when video fails to load", () => {
+    const artifact = makeArtifact({
+      id: "vid-error",
+      title: "broken.mp4",
+      mimeType: "video/mp4",
+      sourceUrl: "/api/proxy/api/workspace/files/vid-error/download",
+    });
+    const classification = makeClassification({ type: "video" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const video = container.querySelector("video");
+    expect(video).toBeTruthy();
+    fireEvent.error(video!);
+
+    const errorAlert = screen.queryByRole("alert");
+    expect(errorAlert).toBeTruthy();
+    expect(screen.queryByText("Failed to load video")).toBeTruthy();
+  });
+
+  it("video retry resets error and re-shows video", async () => {
+    const artifact = makeArtifact({
+      id: "vid-retry",
+      title: "retry.mp4",
+      mimeType: "video/mp4",
+      sourceUrl: "/api/proxy/api/workspace/files/vid-retry/download",
+    });
+    const classification = makeClassification({ type: "video" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const video = container.querySelector("video");
+    fireEvent.error(video!);
+
+    await waitFor(() => {
+      expect(screen.queryByText("Failed to load video")).toBeTruthy();
+    });
+
+    fireEvent.click(screen.getByRole("button", { name: /try again/i }));
+
+    await waitFor(() => {
+      expect(screen.queryByText("Failed to load video")).toBeNull();
+      expect(container.querySelector("video")).toBeTruthy();
+    });
+  });
+
+  // ── PDF ───────────────────────────────────────────────────────────
+
+  it("renders PDF artifact in unsandboxed iframe with blob URL", async () => {
+    const blobUrl = "blob:http://localhost/fake-pdf-blob";
+    vi.stubGlobal(
+      "URL",
+      Object.assign(URL, {
+        createObjectURL: vi.fn().mockReturnValue(blobUrl),
+        revokeObjectURL: vi.fn(),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "pdf-render",
+      title: "report.pdf",
+      mimeType: "application/pdf",
+      sourceUrl: "/api/proxy/api/workspace/files/pdf-render/download",
+    });
+    const classification = makeClassification({ type: "pdf" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    await waitFor(() => {
+      const iframe = container.querySelector("iframe");
+      expect(iframe).toBeTruthy();
+      expect(iframe?.getAttribute("src")).toBe(blobUrl);
+      // No sandbox attribute — Chrome blocks PDF in sandboxed iframes
+      expect(iframe?.hasAttribute("sandbox")).toBe(false);
+    });
+  });
+
+  // ── Fetch error ───────────────────────────────────────────────────
+
+  it("shows error state with retry button on fetch failure", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 404,
+        text: () => Promise.resolve("Not found"),
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "error-content-test" });
+    const classification = makeClassification({ type: "html" });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const errorText = await screen.findByText("Failed to load content");
+    expect(errorText).toBeTruthy();
+
+    const retryButtons = screen.getAllByRole("button", { name: /try again/i });
+    expect(retryButtons.length).toBeGreaterThan(0);
+  });
+
+  // ── HTML ──────────────────────────────────────────────────────────
+
+  it("renders HTML content in sandboxed iframe", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () =>
+          Promise.resolve("<html><body><h1>Hello World</h1></body></html>"),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "html-001",
+      title: "page.html",
+      mimeType: "text/html",
+    });
+    const classification = makeClassification({ type: "html" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    await screen.findByTitle("page.html");
+    const iframe = container.querySelector("iframe");
+    expect(iframe).toBeTruthy();
+    expect(iframe?.getAttribute("sandbox")).toBe("allow-scripts");
+  });
+
+  // ── Source view ───────────────────────────────────────────────────
+
+  it("renders source view as pre tag", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve("source code here"),
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "source-view-test" });
+    const classification = makeClassification({
+      type: "html",
+      hasSourceToggle: true,
+    });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={true}
+        classification={classification}
+      />,
+    );
+
+    await screen.findByText("source code here");
+    const pre = container.querySelector("pre");
+    expect(pre).toBeTruthy();
+    expect(pre?.textContent).toBe("source code here");
+  });
+
+  // ── React ─────────────────────────────────────────────────────────
+
+  it("renders react artifacts via ArtifactReactPreview", async () => {
+    const jsxSource = "export default function App() { return <div>Hi</div>; }";
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(jsxSource),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "react-001",
+      title: "App.tsx",
+      mimeType: "text/tsx",
+    });
+    const classification = makeClassification({ type: "react" });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const preview = await screen.findByTestId("react-preview");
+    expect(preview).toBeTruthy();
+    expect(preview.textContent).toContain(jsxSource);
+    expect(preview.getAttribute("data-title")).toBe("App.tsx");
+  });
+
+  it("routes a concrete props-based TSX artifact into ArtifactReactPreview", async () => {
+    const jsxSource = `
+      import React, { FC, useState } from "react";
+
+      interface ArtifactFile {
+        id: string;
+        name: string;
+        mimeType: string;
+        url: string;
+        sizeBytes: number;
+      }
+
+      interface Props {
+        files: ArtifactFile[];
+        onSelect: (file: ArtifactFile) => void;
+      }
+
+      export const previewProps: Props = {
+        files: [
+          {
+            id: "1",
+            name: "report.png",
+            mimeType: "image/png",
+            url: "/report.png",
+            sizeBytes: 2048,
+          },
+        ],
+        onSelect: () => {},
+      };
+
+      const ArtifactList: FC<Props> = ({ files, onSelect }) => {
+        const [selected, setSelected] = useState<string | null>(null);
+
+        const handleClick = (file: ArtifactFile) => {
+          setSelected(file.id);
+          onSelect(file);
+        };
+
+        return (
+          <ul>
+            {files.map((file) => (
+              <li key={file.id} onClick={() => handleClick(file)}>
+                <span>{selected === file.id ? "selected" : file.name}</span>
+              </li>
+            ))}
+          </ul>
+        );
+      };
+
+      export default ArtifactList;
+    `;
+
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(jsxSource),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "react-props-001",
+      title: "ArtifactList.tsx",
+      mimeType: "text/tsx",
+    });
+    const classification = classifyArtifact(artifact.mimeType, artifact.title);
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const preview = await screen.findByTestId("react-preview");
+    expect(preview.textContent).toContain("previewProps");
+    expect(preview.getAttribute("data-title")).toBe("ArtifactList.tsx");
+    expect(vi.mocked(ArtifactReactPreview).mock.calls[0]?.[0]).toEqual(
+      expect.objectContaining({
+        source: expect.stringContaining("export const previewProps"),
+        title: "ArtifactList.tsx",
+      }),
+    );
+  });
+
+  // ── Code ──────────────────────────────────────────────────────────
+
+  it("renders code artifacts via codeRenderer", async () => {
+    const code = 'def hello():\n    print("hi")';
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(code),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "code-render-001",
+      title: "script.py",
+      mimeType: "text/x-python",
+    });
+    const classification = makeClassification({ type: "code" });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const rendered = await screen.findByTestId("code-renderer");
+    expect(rendered).toBeTruthy();
+    expect(rendered.textContent).toContain(code);
+  });
+
+  it.each([
+    {
+      filename: "events.jsonl",
+      mimeType: "application/x-ndjson",
+      content: '{"event":"start"}\n{"event":"finish"}',
+    },
+    {
+      filename: ".env.local",
+      mimeType: "text/plain",
+      content: "OPENAI_API_KEY=test\nDEBUG=true",
+    },
+    {
+      filename: "Dockerfile",
+      mimeType: "text/plain",
+      content: "FROM node:20\nRUN pnpm install",
+    },
+    {
+      filename: "schema.graphql",
+      mimeType: "text/plain",
+      content: "type Query { viewer: User }",
+    },
+  ])(
+    "renders concrete code artifact $filename through codeRenderer",
+    async ({ filename, mimeType, content }) => {
+      vi.stubGlobal(
+        "fetch",
+        vi.fn().mockResolvedValue({
+          ok: true,
+          text: () => Promise.resolve(content),
+        }),
+      );
+
+      const artifact = makeArtifact({
+        id: `code-${filename}`,
+        title: filename,
+        mimeType,
+      });
+      const classification = classifyArtifact(
+        artifact.mimeType,
+        artifact.title,
+      );
+
+      render(
+        <ArtifactContent
+          artifact={artifact}
+          isSourceView={false}
+          classification={classification}
+        />,
+      );
+
+      await screen.findByTestId("code-renderer");
+
+      expect(classification.type).toBe("code");
+      expect(vi.mocked(codeRenderer.render)).toHaveBeenCalledWith(
+        content,
+        expect.objectContaining({
+          filename,
+          mimeType,
+          type: "code",
+        }),
+      );
+    },
+  );
+
+  // ── JSON ──────────────────────────────────────────────────────────
+
+  it("renders valid JSON via globalRegistry", async () => {
+    const jsonContent = JSON.stringify({ key: "value" }, null, 2);
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(jsonContent),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "json-render-001",
+      title: "data.json",
+      mimeType: "application/json",
+    });
+    const classification = makeClassification({ type: "json" });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const rendered = await screen.findByTestId("global-renderer");
+    expect(rendered).toBeTruthy();
+    expect(rendered.textContent).toContain("application/json");
+  });
+
+  it("renders invalid JSON as fallback pre tag", async () => {
+    const { globalRegistry } = await import(
+      "@/components/contextual/OutputRenderers"
+    );
+    const originalImpl = vi
+      .mocked(globalRegistry.getRenderer)
+      .getMockImplementation();
+
+    // For invalid JSON, JSON.parse throws, then the registry fallback
+    // also returns null → falls through to <pre>
+    vi.mocked(globalRegistry.getRenderer).mockReturnValue(null);
+
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve("{invalid json!!!"),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "json-invalid-001",
+      title: "bad.json",
+      mimeType: "application/json",
+    });
+    const classification = makeClassification({ type: "json" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    await waitFor(() => {
+      const pre = container.querySelector("pre");
+      expect(pre).toBeTruthy();
+      expect(pre?.textContent).toBe("{invalid json!!!");
+    });
+
+    // Restore
+    if (originalImpl) {
+      vi.mocked(globalRegistry.getRenderer).mockImplementation(originalImpl);
+    }
+  });
+
+  // ── CSV ───────────────────────────────────────────────────────────
+
+  it("renders CSV via globalRegistry with text/csv metadata", async () => {
+    const csvContent = "Name,Age\nAlice,30\nBob,25";
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(csvContent),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "csv-render-001",
+      title: "data.csv",
+      mimeType: "text/csv",
+    });
+    const classification = makeClassification({
+      type: "csv",
+      hasSourceToggle: true,
+    });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const rendered = await screen.findByTestId("global-renderer");
+    expect(rendered).toBeTruthy();
+    expect(rendered.textContent).toContain("text/csv");
+  });
+
+  it("renders TSV via globalRegistry with tab-separated metadata", async () => {
+    const tsvContent = "Name\tAge\nAlice\t30\nBob\t25";
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(tsvContent),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "tsv-render-001",
+      title: "data.tsv",
+      mimeType: "text/tab-separated-values",
+    });
+    const classification = makeClassification({
+      type: "csv",
+      hasSourceToggle: true,
+    });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const rendered = await screen.findByTestId("global-renderer");
+    expect(rendered).toBeTruthy();
+    expect(rendered.textContent).toContain("text/tab-separated-values");
+  });
+
+  // ── Markdown ──────────────────────────────────────────────────────
+
+  it("renders markdown via globalRegistry", async () => {
+    const mdContent = "# Hello\n\nThis is **markdown**.";
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve(mdContent),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "md-render-001",
+      title: "README.md",
+      mimeType: "text/markdown",
+    });
+    const classification = makeClassification({
+      type: "markdown",
+      hasSourceToggle: true,
+    });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const rendered = await screen.findByTestId("global-renderer");
+    expect(rendered).toBeTruthy();
+    expect(rendered.textContent).toContain("text/markdown");
+  });
+
+  // ── Text fallback ─────────────────────────────────────────────────
+
+  it("renders text artifacts via globalRegistry fallback", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve("plain text content"),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "text-render-001",
+      title: "notes.txt",
+      mimeType: "text/plain",
+    });
+    const classification = makeClassification({ type: "text" });
+
+    render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    const rendered = await screen.findByTestId("global-renderer");
+    expect(rendered).toBeTruthy();
+  });
+
+  it.each([
+    {
+      filename: "calendar.ics",
+      mimeType: "text/calendar",
+      content: "BEGIN:VCALENDAR\nVERSION:2.0\nEND:VCALENDAR",
+    },
+    {
+      filename: "contact.vcf",
+      mimeType: "text/vcard",
+      content: "BEGIN:VCARD\nVERSION:4.0\nFN:Alice Example\nEND:VCARD",
+    },
+  ])(
+    "renders concrete text artifact $filename through the global renderer path",
+    async ({ filename, mimeType, content }) => {
+      vi.stubGlobal(
+        "fetch",
+        vi.fn().mockResolvedValue({
+          ok: true,
+          text: () => Promise.resolve(content),
+        }),
+      );
+
+      const artifact = makeArtifact({
+        id: `text-${filename}`,
+        title: filename,
+        mimeType,
+      });
+      const classification = classifyArtifact(
+        artifact.mimeType,
+        artifact.title,
+      );
+
+      render(
+        <ArtifactContent
+          artifact={artifact}
+          isSourceView={false}
+          classification={classification}
+        />,
+      );
+
+      await screen.findByTestId("global-renderer");
+
+      expect(classification.type).toBe("text");
+      expect(vi.mocked(globalRegistry.getRenderer)).toHaveBeenCalledWith(
+        content,
+        expect.objectContaining({
+          filename,
+          mimeType,
+        }),
+      );
+    },
+  );
+
+  it("falls back to pre tag when no renderer matches", async () => {
+    const { globalRegistry } = await import(
+      "@/components/contextual/OutputRenderers"
+    );
+    const originalImpl = vi
+      .mocked(globalRegistry.getRenderer)
+      .getMockImplementation();
+
+    vi.mocked(globalRegistry.getRenderer).mockReturnValue(null);
+
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: true,
+        text: () => Promise.resolve("raw content fallback"),
+      }),
+    );
+
+    const artifact = makeArtifact({
+      id: "fallback-pre-001",
+      title: "unknown.txt",
+      mimeType: "text/plain",
+    });
+    const classification = makeClassification({ type: "text" });
+
+    const { container } = render(
+      <ArtifactContent
+        artifact={artifact}
+        isSourceView={false}
+        classification={classification}
+      />,
+    );
+
+    await waitFor(() => {
+      const pre = container.querySelector("pre");
+      expect(pre).toBeTruthy();
+      expect(pre?.textContent).toBe("raw content fallback");
+    });
+
+    // Restore
+    if (originalImpl) {
+      vi.mocked(globalRegistry.getRenderer).mockImplementation(originalImpl);
+    }
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/useArtifactContent.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/useArtifactContent.test.ts
index e9f5a11d3e..0d3c866805 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/useArtifactContent.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/__tests__/useArtifactContent.test.ts
@@ -3,6 +3,7 @@ import { renderHook, waitFor, act } from "@testing-library/react";
 import {
   useArtifactContent,
   getCachedArtifactContent,
+  clearContentCache,
 } from "../useArtifactContent";
 import type { ArtifactRef } from "../../../../store";
 import type { ArtifactClassification } from "../../helpers";
@@ -33,6 +34,7 @@ function makeClassification(
 
 describe("useArtifactContent", () => {
   beforeEach(() => {
+    clearContentCache();
     vi.stubGlobal(
       "fetch",
       vi.fn().mockResolvedValue({
@@ -44,6 +46,7 @@ describe("useArtifactContent", () => {
   });
 
   afterEach(() => {
+    clearContentCache();
     vi.restoreAllMocks();
   });
 
@@ -109,9 +112,12 @@ describe("useArtifactContent", () => {
       useArtifactContent(artifact, classification),
     );
 
-    await waitFor(() => {
-      expect(result.current.error).toBeTruthy();
-    });
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
 
     expect(result.current.error).toContain("404");
     expect(result.current.content).toBeNull();
@@ -132,6 +138,176 @@ describe("useArtifactContent", () => {
     expect(getCachedArtifactContent("cache-test")).toBe("file content here");
   });
 
+  it("sets error on fetch failure for HTML artifacts (stale artifact)", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 404,
+        text: () => Promise.resolve("Not found"),
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "stale-html-artifact" });
+    const classification = makeClassification({ type: "html" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
+
+    expect(result.current.error).toContain("404");
+    expect(result.current.content).toBeNull();
+  });
+
+  it("sets error on network failure", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockRejectedValue(new Error("Network error")),
+    );
+
+    const artifact = makeArtifact({ id: "network-error-artifact" });
+    const classification = makeClassification({ type: "html" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
+
+    expect(result.current.error).toContain("Network error");
+    expect(result.current.content).toBeNull();
+  });
+
+  it("retries transient HTML fetch failures before surfacing an error", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount < 3) {
+          return Promise.resolve({
+            ok: false,
+            status: 503,
+            headers: {
+              get: () => "application/json",
+            },
+            json: () => Promise.resolve({ detail: "temporary upstream error" }),
+          });
+        }
+
+        return Promise.resolve({
+          ok: true,
+          text: () => Promise.resolve("<html>ok now</html>"),
+        });
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "transient-html-retry" });
+    const classification = makeClassification({ type: "html" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.content).toBe("<html>ok now</html>");
+      },
+      { timeout: 2500 },
+    );
+
+    expect(callCount).toBe(3);
+    expect(result.current.error).toBeNull();
+  });
+
+  it("surfaces backend error detail from JSON responses", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 404,
+        headers: {
+          get: () => "application/json",
+        },
+        json: () => Promise.resolve({ detail: "File not found" }),
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "json-error-detail" });
+    const classification = makeClassification({ type: "html" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
+
+    expect(result.current.error).toContain("404");
+    expect(result.current.error).toContain("File not found");
+  });
+
+  it("retry after 404 on HTML artifact clears cache and re-fetches", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        if (callCount === 1) {
+          return Promise.resolve({
+            ok: false,
+            status: 404,
+            text: () => Promise.resolve("Not found"),
+          });
+        }
+        return Promise.resolve({
+          ok: true,
+          text: () => Promise.resolve("<html>recovered</html>"),
+        });
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "retry-html-artifact" });
+    const classification = makeClassification({ type: "html" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(() => {
+      expect(result.current.error).toBeTruthy();
+    });
+
+    act(() => {
+      result.current.retry();
+    });
+
+    await waitFor(
+      () => {
+        expect(result.current.content).toBe("<html>recovered</html>");
+      },
+      { timeout: 2500 },
+    );
+
+    expect(result.current.error).toBeNull();
+  });
+
   it("retry clears cache and re-fetches", async () => {
     let callCount = 0;
     vi.stubGlobal(
@@ -164,4 +340,162 @@ describe("useArtifactContent", () => {
       expect(result.current.content).toBe("response 2");
     });
   });
+
+  // ── Non-transient errors ──────────────────────────────────────────
+
+  it("rejects immediately on 403 without retrying", async () => {
+    let callCount = 0;
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation(() => {
+        callCount++;
+        return Promise.resolve({
+          ok: false,
+          status: 403,
+          text: () => Promise.resolve("Forbidden"),
+        });
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "forbidden-no-retry" });
+    const classification = makeClassification({ type: "text" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
+
+    expect(callCount).toBe(1);
+    expect(result.current.error).toContain("403");
+  });
+
+  // ── Video skip-fetch ──────────────────────────────────────────────
+
+  it("skips fetch for video artifacts (like image)", async () => {
+    const artifact = makeArtifact({
+      id: "video-skip",
+      mimeType: "video/mp4",
+    });
+    const classification = makeClassification({ type: "video" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    expect(result.current.isLoading).toBe(false);
+    expect(result.current.content).toBeNull();
+    expect(result.current.pdfUrl).toBeNull();
+    expect(fetch).not.toHaveBeenCalled();
+  });
+
+  // ── PDF error paths ───────────────────────────────────────────────
+
+  it("sets error on PDF fetch failure (non-2xx)", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockResolvedValue({
+        ok: false,
+        status: 500,
+        text: () => Promise.resolve("Server Error"),
+      }),
+    );
+
+    const artifact = makeArtifact({ id: "pdf-error" });
+    const classification = makeClassification({ type: "pdf" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
+
+    expect(result.current.error).toContain("500");
+    expect(result.current.pdfUrl).toBeNull();
+  });
+
+  it("sets error on PDF network failure", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockRejectedValue(new Error("PDF network failure")),
+    );
+
+    const artifact = makeArtifact({ id: "pdf-network-error" });
+    const classification = makeClassification({ type: "pdf" });
+
+    const { result } = renderHook(() =>
+      useArtifactContent(artifact, classification),
+    );
+
+    await waitFor(
+      () => {
+        expect(result.current.error).toBeTruthy();
+      },
+      { timeout: 2500 },
+    );
+
+    expect(result.current.error).toContain("PDF network failure");
+    expect(result.current.pdfUrl).toBeNull();
+  });
+
+  // ── LRU cache eviction ────────────────────────────────────────────
+
+  it("evicts oldest entry when cache exceeds 12 items", async () => {
+    vi.stubGlobal(
+      "fetch",
+      vi.fn().mockImplementation((url: string) => {
+        const fileId = url.match(/files\/([^/]+)\/download/)?.[1] ?? "unknown";
+        return Promise.resolve({
+          ok: true,
+          text: () => Promise.resolve(`content-${fileId}`),
+        });
+      }),
+    );
+
+    const classification = makeClassification({ type: "text" });
+
+    // Fill the cache with 12 entries (cache max = 12)
+    for (let i = 0; i < 12; i++) {
+      const artifact = makeArtifact({
+        id: `lru-${i}`,
+        sourceUrl: `/api/proxy/api/workspace/files/lru-${i}/download`,
+      });
+      const { result } = renderHook(() =>
+        useArtifactContent(artifact, classification),
+      );
+      await waitFor(() => {
+        expect(result.current.isLoading).toBe(false);
+      });
+    }
+
+    // All 12 should be cached
+    expect(getCachedArtifactContent("lru-0")).toBe("content-lru-0");
+    expect(getCachedArtifactContent("lru-11")).toBe("content-lru-11");
+
+    // Adding a 13th should evict lru-0 (the oldest)
+    const artifact13 = makeArtifact({
+      id: "lru-12",
+      sourceUrl: "/api/proxy/api/workspace/files/lru-12/download",
+    });
+    const { result: result13 } = renderHook(() =>
+      useArtifactContent(artifact13, classification),
+    );
+    await waitFor(() => {
+      expect(result13.current.isLoading).toBe(false);
+    });
+
+    expect(getCachedArtifactContent("lru-0")).toBeUndefined();
+    expect(getCachedArtifactContent("lru-1")).toBe("content-lru-1");
+    expect(getCachedArtifactContent("lru-12")).toBe("content-lru-12");
+  });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.test.ts
index 6a6bc806cb..934573fc01 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.test.ts
@@ -85,4 +85,35 @@ describe("buildReactArtifactSrcDoc", () => {
     const doc = buildReactArtifactSrcDoc("module.exports = {};", "A", STYLES);
     expect(doc).toContain("box-sizing: border-box");
   });
+
+  it("supports a named previewProps export in the runtime", () => {
+    const doc = buildReactArtifactSrcDoc("module.exports = {};", "A", STYLES);
+    expect(doc).toContain("moduleExports.previewProps");
+    expect(doc).toContain("React.createElement(Component, previewProps || {})");
+  });
+
+  it("includes a helpful message for components that expect props", () => {
+    const doc = buildReactArtifactSrcDoc("module.exports = {};", "A", STYLES);
+    expect(doc).toContain("This component appears to expect props.");
+    expect(doc).toContain("previewProps");
+  });
+
+  it("checks componentExpectsProps on the raw component before wrapping", () => {
+    const doc = buildReactArtifactSrcDoc("module.exports = {};", "A", STYLES);
+    expect(doc).toContain("RawComponent.length > 0");
+    expect(doc).toContain("wrapWithProviders(RawComponent");
+  });
+
+  it("wrapWithProviders forwards props to the wrapped component", () => {
+    const doc = buildReactArtifactSrcDoc("module.exports = {};", "A", STYLES);
+    expect(doc).toContain("function WrappedArtifactPreview(props)");
+    expect(doc).toContain("React.createElement(Component, props)");
+  });
+
+  it("supports named exported components and provider wrappers in the runtime", () => {
+    const doc = buildReactArtifactSrcDoc("module.exports = {};", "A", STYLES);
+    expect(doc).toContain('name.endsWith("Provider")');
+    expect(doc).toContain("/^[A-Z]/.test(name)");
+    expect(doc).toContain("wrapWithProviders");
+  });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.ts
index 77c031b211..f98fe9f684 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/reactArtifactPreview.ts
@@ -169,8 +169,8 @@ export function buildReactArtifactSrcDoc(
             return Component;
           }
 
-          return function WrappedArtifactPreview() {
-            let tree = React.createElement(Component);
+          return function WrappedArtifactPreview(props) {
+            let tree = React.createElement(Component, props);
 
             for (let i = providers.length - 1; i >= 0; i -= 1) {
               tree = React.createElement(providers[i], null, tree);
@@ -180,6 +180,17 @@ export function buildReactArtifactSrcDoc(
           };
         }
 
+        function getPreviewProps(moduleExports) {
+          if (
+            moduleExports.previewProps &&
+            typeof moduleExports.previewProps === "object"
+          ) {
+            return moduleExports.previewProps;
+          }
+
+          return null;
+        }
+
         function require(name) {
           if (name === "react") {
             return React;
@@ -235,6 +246,11 @@ export function buildReactArtifactSrcDoc(
 
           render() {
             if (this.state.error) {
+              const propsHelp =
+                this.props.componentExpectsProps && !this.props.hasPreviewProps
+                  ? "\\n\\nThis component appears to expect props. Export a named previewProps object with sample values to render it in artifact preview."
+                  : "";
+
               return React.createElement(
                 "div",
                 {
@@ -249,7 +265,9 @@ export function buildReactArtifactSrcDoc(
                     whiteSpace: "pre-wrap",
                   },
                 },
-                this.state.error.stack || this.state.error.message || String(this.state.error),
+                (this.state.error.stack ||
+                  this.state.error.message ||
+                  String(this.state.error)) + propsHelp,
               );
             }
 
@@ -296,16 +314,19 @@ export function buildReactArtifactSrcDoc(
             moduleExports.App = executionResult.app;
           }
 
-          const Component = wrapWithProviders(
-            getRenderableCandidate(moduleExports),
-            moduleExports,
-          );
+          const RawComponent = getRenderableCandidate(moduleExports);
+          const componentExpectsProps = RawComponent.length > 0;
+          const Component = wrapWithProviders(RawComponent, moduleExports);
+          const previewProps = getPreviewProps(moduleExports);
 
           ReactDOM.createRoot(rootElement).render(
             React.createElement(
               PreviewErrorBoundary,
-              null,
-              React.createElement(Component),
+              {
+                componentExpectsProps: componentExpectsProps,
+                hasPreviewProps: previewProps != null,
+              },
+              React.createElement(Component, previewProps || {}),
             ),
           );
         } catch (error) {
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/transpileReactArtifact.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/transpileReactArtifact.test.ts
index 5a43b99749..3da1ee84fa 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/transpileReactArtifact.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/transpileReactArtifact.test.ts
@@ -48,4 +48,104 @@ describe("transpileReactArtifactSource", () => {
     expect(out).not.toContain(": string");
     expect(out).toContain("function greet(name)");
   });
+
+  it("transpiles a concrete props-based artifact with previewProps", async () => {
+    const src = `
+      import React, { FC, useState } from "react";
+
+      interface ArtifactFile {
+        id: string;
+        name: string;
+        mimeType: string;
+        url: string;
+        sizeBytes: number;
+      }
+
+      interface Props {
+        files: ArtifactFile[];
+        onSelect: (file: ArtifactFile) => void;
+      }
+
+      export const previewProps: Props = {
+        files: [
+          {
+            id: "1",
+            name: "report.png",
+            mimeType: "image/png",
+            url: "/report.png",
+            sizeBytes: 2048,
+          },
+        ],
+        onSelect: () => {},
+      };
+
+      const ArtifactList: FC<Props> = ({ files, onSelect }) => {
+        const [selected, setSelected] = useState<string | null>(null);
+
+        const handleClick = (file: ArtifactFile) => {
+          setSelected(file.id);
+          onSelect(file);
+        };
+
+        return (
+          <ul>
+            {files.map((file) => (
+              <li key={file.id} onClick={() => handleClick(file)}>
+                <span>{selected === file.id ? "selected" : file.name}</span>
+              </li>
+            ))}
+          </ul>
+        );
+      };
+
+      export default ArtifactList;
+    `;
+
+    const out = await transpileReactArtifactSource(src, "ArtifactList.tsx");
+
+    expect(out).toContain("exports.previewProps");
+    expect(out).toContain("exports.default = ArtifactList");
+    expect(out).toContain("useState");
+    expect(out).not.toContain("interface Props");
+    expect(out).not.toContain("interface ArtifactFile");
+  });
+
+  it("transpiles a named export artifact without a default export", async () => {
+    const src = `
+      export function ResultsGrid() {
+        return (
+          <section>
+            <h1>Results</h1>
+            <p>Named export preview</p>
+          </section>
+        );
+      }
+    `;
+
+    const out = await transpileReactArtifactSource(src, "ResultsGrid.tsx");
+
+    expect(out).toContain("exports.ResultsGrid = ResultsGrid");
+    expect(out).toMatch(/\.createElement\(/);
+    expect(out).not.toContain("<section>");
+  });
+
+  it("transpiles a provider-wrapped artifact with separate provider and component exports", async () => {
+    const src = `
+      import React from "react";
+
+      export function DemoProvider({ children }: { children: React.ReactNode }) {
+        return <div data-theme="demo">{children}</div>;
+      }
+
+      export function DashboardCard() {
+        return <main>Provider-backed preview</main>;
+      }
+    `;
+
+    const out = await transpileReactArtifactSource(src, "DashboardCard.tsx");
+
+    expect(out).toContain("exports.DemoProvider = DemoProvider");
+    expect(out).toContain("exports.DashboardCard = DashboardCard");
+    expect(out).not.toContain("React.ReactNode");
+  });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/useArtifactContent.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/useArtifactContent.ts
index a800cdcd8f..1479da7a37 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/useArtifactContent.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/components/useArtifactContent.ts
@@ -7,12 +7,116 @@ import type { ArtifactClassification } from "../helpers";
 // Cap on cached text artifacts. Long sessions with many large artifacts
 // would otherwise hold every opened one in memory.
 const CONTENT_CACHE_MAX = 12;
+const CONTENT_FETCH_MAX_RETRIES = 2;
+const CONTENT_FETCH_RETRY_DELAY_MS = 500;
 
 // Module-level LRU keyed by artifact id so a sibling action (e.g. Copy
 // in ArtifactPanelHeader) can read what the panel already fetched without
 // re-hitting the network.
 const contentCache = new Map<string, string>();
 
+class ArtifactFetchError extends Error {}
+
+function isTransientArtifactFetchStatus(status: number): boolean {
+  return status === 408 || status === 429 || status >= 500;
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function getArtifactErrorMessage(body: unknown): string | null {
+  if (typeof body === "string") {
+    const trimmed = body.replace(/\s+/g, " ").trim();
+    return trimmed || null;
+  }
+
+  if (!body || typeof body !== "object") return null;
+
+  if (
+    "detail" in body &&
+    typeof body.detail === "string" &&
+    body.detail.trim().length > 0
+  ) {
+    return body.detail.trim();
+  }
+
+  if (
+    "error" in body &&
+    typeof body.error === "string" &&
+    body.error.trim().length > 0
+  ) {
+    return body.error.trim();
+  }
+
+  if (
+    "detail" in body &&
+    body.detail &&
+    typeof body.detail === "object" &&
+    "message" in body.detail &&
+    typeof body.detail.message === "string" &&
+    body.detail.message.trim().length > 0
+  ) {
+    return body.detail.message.trim();
+  }
+
+  return null;
+}
+
+async function parseArtifactFetchError(response: Response): Promise<string> {
+  const prefix = `Failed to fetch: ${response.status}`;
+  const contentType =
+    response.headers?.get?.("content-type")?.toLowerCase() ?? "";
+
+  try {
+    if (
+      contentType.includes("application/json") &&
+      typeof response.json === "function"
+    ) {
+      const body = await response.json();
+      const detail = getArtifactErrorMessage(body);
+      return detail ? `${prefix} ${detail}` : prefix;
+    }
+
+    if (typeof response.text === "function") {
+      const text = await response.text();
+      const detail = getArtifactErrorMessage(text);
+      return detail ? `${prefix} ${detail}` : prefix;
+    }
+  } catch {
+    return prefix;
+  }
+
+  return prefix;
+}
+
+async function fetchArtifactResponse(url: string): Promise<Response> {
+  for (let attempt = 0; attempt <= CONTENT_FETCH_MAX_RETRIES; attempt++) {
+    try {
+      const response = await fetch(url);
+      if (response.ok) return response;
+
+      if (
+        !isTransientArtifactFetchStatus(response.status) ||
+        attempt === CONTENT_FETCH_MAX_RETRIES
+      ) {
+        throw new ArtifactFetchError(await parseArtifactFetchError(response));
+      }
+    } catch (error) {
+      if (error instanceof ArtifactFetchError) throw error;
+      if (attempt === CONTENT_FETCH_MAX_RETRIES) {
+        throw error instanceof Error
+          ? error
+          : new Error("Failed to fetch artifact");
+      }
+    }
+
+    await sleep(CONTENT_FETCH_RETRY_DELAY_MS);
+  }
+
+  throw new Error("Failed to fetch artifact");
+}
+
 export function getCachedArtifactContent(id: string): string | undefined {
   return contentCache.get(id);
 }
@@ -64,7 +168,7 @@ export function useArtifactContent(
   }, [artifact.id, isLoading]);
 
   useEffect(() => {
-    if (classification.type === "image") {
+    if (classification.type === "image" || classification.type === "video") {
       setContent(null);
       setPdfUrl(null);
       setError(null);
@@ -80,11 +184,8 @@ export function useArtifactContent(
       let objectUrl: string | null = null;
       setContent(null);
       setPdfUrl(null);
-      fetch(artifact.sourceUrl)
-        .then((res) => {
-          if (!res.ok) throw new Error(`Failed to fetch: ${res.status}`);
-          return res.blob();
-        })
+      fetchArtifactResponse(artifact.sourceUrl)
+        .then((res) => res.blob())
         .then((blob) => {
           objectUrl = URL.createObjectURL(blob);
           if (cancelled) {
@@ -121,11 +222,8 @@ export function useArtifactContent(
         cancelled = true;
       };
     }
-    fetch(artifact.sourceUrl)
-      .then((res) => {
-        if (!res.ok) throw new Error(`Failed to fetch: ${res.status}`);
-        return res.text();
-      })
+    fetchArtifactResponse(artifact.sourceUrl)
+      .then((res) => res.text())
       .then((text) => {
         if (!cancelled) {
           if (cache.size >= CONTENT_CACHE_MAX) {
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/downloadArtifact.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/downloadArtifact.ts
index d7d902839a..6ff3e264de 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/downloadArtifact.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/downloadArtifact.ts
@@ -1,5 +1,31 @@
 import type { ArtifactRef } from "../../store";
 
+const MAX_RETRIES = 2;
+const RETRY_DELAY_MS = 500;
+
+function isTransientError(status: number): boolean {
+  return status >= 500 || status === 408 || status === 429;
+}
+
+class DownloadError extends Error {}
+
+async function fetchWithRetry(url: string, retries: number): Promise<Response> {
+  for (let attempt = 0; attempt <= retries; attempt++) {
+    try {
+      const res = await fetch(url);
+      if (res.ok) return res;
+      if (!isTransientError(res.status) || attempt === retries) {
+        throw new DownloadError(`Download failed: ${res.status}`);
+      }
+    } catch (error) {
+      if (error instanceof DownloadError) throw error;
+      if (attempt === retries) throw error;
+    }
+    await new Promise((r) => setTimeout(r, RETRY_DELAY_MS));
+  }
+  throw new Error("Unreachable");
+}
+
 /**
  * Trigger a file download from an artifact URL.
  *
@@ -7,26 +33,28 @@ import type { ArtifactRef } from "../../store";
  * ignores the `download` attribute on cross-origin responses (GCS signed
  * URLs), and some browsers require the anchor to be attached to the DOM
  * before `.click()` fires the download.
+ *
+ * Retries up to {@link MAX_RETRIES} times on transient server errors (5xx,
+ * 408, 429) to handle intermittent proxy/GCS failures.
  */
 export function downloadArtifact(artifact: ArtifactRef): Promise<void> {
   // Replace path separators, Windows-reserved chars, control chars, and
   // parent-dir sequences so the browser-assigned filename is safe to write
   // anywhere on the user's filesystem.
-  const safeName =
-    artifact.title
-      .replace(/\.\./g, "_")
-      .replace(/[\\/:*?"<>|\x00-\x1f]/g, "_")
-      .replace(/^\.+/, "") || "download";
-  return fetch(artifact.sourceUrl)
-    .then((res) => {
-      if (!res.ok) throw new Error(`Download failed: ${res.status}`);
-      return res.blob();
-    })
+  const collapsedDots = artifact.title.replace(/\.\./g, "");
+  const hasVisibleName = collapsedDots.replace(/^\.+/, "").length > 0;
+  const safeName = artifact.title
+    .replace(/\.\./g, "_")
+    .replace(/[\\/:*?"<>|\x00-\x1f]/g, "_")
+    .replace(/^\.+/, "");
+
+  return fetchWithRetry(artifact.sourceUrl, MAX_RETRIES)
+    .then((res) => res.blob())
     .then((blob) => {
       const url = URL.createObjectURL(blob);
       const a = document.createElement("a");
       a.href = url;
-      a.download = safeName;
+      a.download = safeName && hasVisibleName ? safeName : "download";
       document.body.appendChild(a);
       a.click();
       a.remove();
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.test.ts
index f45f0695b8..18738768ea 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.test.ts
@@ -56,7 +56,7 @@ describe("classifyArtifact", () => {
     expect(classifyArtifact("application/octet-stream", "x").openable).toBe(
       false,
     );
-    expect(classifyArtifact("video/mp4", "clip.mp4").openable).toBe(false);
+    expect(classifyArtifact("audio/mpeg", "track.mp3").openable).toBe(false);
   });
 
   it("defaults unknown extension+MIME to download-only (not text)", () => {
@@ -76,4 +76,398 @@ describe("classifyArtifact", () => {
     const c = classifyArtifact("text/plain", "data.csv");
     expect(c.type).toBe("csv");
   });
+
+  it("classifies video/mp4 as video (previewable)", () => {
+    const c = classifyArtifact("video/mp4", "clip.mp4");
+    expect(c.type).toBe("video");
+    expect(c.openable).toBe(true);
+  });
+
+  it("classifies video/webm as video (previewable)", () => {
+    const c = classifyArtifact("video/webm", "clip.webm");
+    expect(c.type).toBe("video");
+    expect(c.openable).toBe(true);
+  });
+
+  // ── Extension coverage ────────────────────────────────────────────
+
+  it("routes .htm as html (not just .html)", () => {
+    const c = classifyArtifact(null, "page.htm");
+    expect(c.type).toBe("html");
+    expect(c.hasSourceToggle).toBe(true);
+  });
+
+  it("routes .json as json with source toggle", () => {
+    const c = classifyArtifact(null, "config.json");
+    expect(c.type).toBe("json");
+    expect(c.hasSourceToggle).toBe(true);
+  });
+
+  it("routes .txt as text", () => {
+    expect(classifyArtifact(null, "notes.txt").type).toBe("text");
+  });
+
+  it("routes .log as text", () => {
+    expect(classifyArtifact(null, "server.log").type).toBe("text");
+  });
+
+  it("routes .mdx as markdown", () => {
+    expect(classifyArtifact(null, "docs.mdx").type).toBe("markdown");
+  });
+
+  it("routes browser-safe video extensions to video", () => {
+    for (const ext of [".mp4", ".webm", ".m4v"]) {
+      const c = classifyArtifact(null, `clip${ext}`);
+      expect(c.type).toBe("video");
+      expect(c.openable).toBe(true);
+    }
+  });
+
+  it("keeps legacy or unsupported video extensions download-only", () => {
+    for (const ext of [".ogg", ".mov", ".avi", ".mkv", ".flv", ".mpeg"]) {
+      const c = classifyArtifact(null, `clip${ext}`);
+      expect(c.type).toBe("download-only");
+      expect(c.openable).toBe(false);
+    }
+  });
+
+  it("routes all code extensions to code", () => {
+    const codeExts = [
+      "main.js",
+      "app.ts",
+      "theme.scss",
+      "legacy.less",
+      "schema.graphql",
+      "query.gql",
+      "api.proto",
+      "main.dart",
+      "lib.rb",
+      "server.rs",
+      "App.java",
+      "main.c",
+      "util.cpp",
+      "header.h",
+      "Program.cs",
+      "index.php",
+      "main.swift",
+      "App.kt",
+      "run.sh",
+      "start.bash",
+      "prompt.zsh",
+      "config.toml",
+      "settings.ini",
+      "app.cfg",
+      "query.sql",
+      "analysis.r",
+      "game.lua",
+      "script.pl",
+      "Calc.scala",
+    ];
+    for (const file of codeExts) {
+      expect(classifyArtifact(null, file).type).toBe("code");
+    }
+  });
+
+  it("routes config filenames and extensions to code", () => {
+    const configFiles = [
+      ".env",
+      ".env.local",
+      "app.properties",
+      "service.conf",
+      ".gitignore",
+      "Dockerfile",
+      "Makefile",
+    ];
+
+    for (const file of configFiles) {
+      expect(classifyArtifact(null, file).type).toBe("code");
+    }
+  });
+
+  it("routes .jsonl as code for now", () => {
+    const c = classifyArtifact(null, "events.jsonl");
+    expect(c.type).toBe("code");
+  });
+
+  it("routes .tsv as csv/spreadsheet", () => {
+    const c = classifyArtifact(null, "table.tsv");
+    expect(c.type).toBe("csv");
+    expect(c.hasSourceToggle).toBe(true);
+  });
+
+  it("routes .ics and .vcf as text", () => {
+    expect(classifyArtifact(null, "calendar.ics").type).toBe("text");
+    expect(classifyArtifact(null, "contact.vcf").type).toBe("text");
+  });
+
+  it("routes all image extensions to image", () => {
+    for (const ext of [".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".ico"]) {
+      expect(classifyArtifact(null, `file${ext}`).type).toBe("image");
+    }
+  });
+
+  // ── MIME fallback coverage ────────────────────────────────────────
+
+  it("routes application/json MIME to json", () => {
+    const c = classifyArtifact("application/json", "noext");
+    expect(c.type).toBe("json");
+  });
+
+  it("routes text/x-* MIME prefix to code", () => {
+    expect(classifyArtifact("text/x-python", "noext").type).toBe("code");
+    expect(classifyArtifact("text/x-c", "noext").type).toBe("code");
+    expect(classifyArtifact("text/x-java-source", "noext").type).toBe("code");
+  });
+
+  it("routes react MIME types to react", () => {
+    expect(classifyArtifact("text/jsx", "noext").type).toBe("react");
+    expect(classifyArtifact("text/tsx", "noext").type).toBe("react");
+    expect(classifyArtifact("application/jsx", "noext").type).toBe("react");
+    expect(classifyArtifact("application/x-typescript-jsx", "noext").type).toBe(
+      "react",
+    );
+  });
+
+  it("routes JavaScript/TypeScript MIME to code", () => {
+    expect(classifyArtifact("application/javascript", "noext").type).toBe(
+      "code",
+    );
+    expect(classifyArtifact("text/javascript", "noext").type).toBe("code");
+    expect(classifyArtifact("application/typescript", "noext").type).toBe(
+      "code",
+    );
+    expect(classifyArtifact("text/typescript", "noext").type).toBe("code");
+  });
+
+  it("routes XML MIME to code", () => {
+    expect(classifyArtifact("application/xml", "noext").type).toBe("code");
+    expect(classifyArtifact("text/xml", "noext").type).toBe("code");
+  });
+
+  it("routes text/x-markdown MIME to markdown", () => {
+    expect(classifyArtifact("text/x-markdown", "noext").type).toBe("markdown");
+  });
+
+  it("routes text/csv MIME to csv", () => {
+    expect(classifyArtifact("text/csv", "noext").type).toBe("csv");
+  });
+
+  it("routes TSV MIME to csv", () => {
+    expect(classifyArtifact("text/tab-separated-values", "noext").type).toBe(
+      "csv",
+    );
+  });
+
+  it("routes unknown text/* MIME to text (not download-only)", () => {
+    expect(classifyArtifact("text/rtf", "noext").type).toBe("text");
+  });
+
+  it("routes browser-safe image MIME types to image", () => {
+    expect(classifyArtifact("image/avif", "noext").type).toBe("image");
+  });
+
+  it("keeps unsupported image MIME types download-only", () => {
+    for (const mime of [
+      "image/tiff",
+      "image/x-portable-pixmap",
+      "image/x-portable-graymap",
+    ]) {
+      const c = classifyArtifact(mime, "noext");
+      expect(c.type).toBe("download-only");
+      expect(c.openable).toBe(false);
+    }
+  });
+
+  it("routes browser-safe video MIME types to video", () => {
+    expect(classifyArtifact("video/mp4", "noext").type).toBe("video");
+    expect(classifyArtifact("video/webm", "noext").type).toBe("video");
+  });
+
+  it("keeps legacy or unsupported video MIME types download-only", () => {
+    for (const mime of [
+      "video/x-msvideo",
+      "video/x-flv",
+      "video/mpeg",
+      "video/quicktime",
+      "video/x-matroska",
+      "video/ogg",
+    ]) {
+      const c = classifyArtifact(mime, "noext");
+      expect(c.type).toBe("download-only");
+      expect(c.openable).toBe(false);
+    }
+  });
+
+  // ── BINARY_MIMES coverage ────────────────────────────────────────
+
+  it("treats all BINARY_MIMES entries as download-only", () => {
+    const binaryMimes = [
+      "application/zip",
+      "application/x-zip-compressed",
+      "application/gzip",
+      "application/x-tar",
+      "application/x-rar-compressed",
+      "application/x-7z-compressed",
+      "application/octet-stream",
+      "application/x-executable",
+      "application/x-msdos-program",
+      "application/vnd.microsoft.portable-executable",
+    ];
+    for (const mime of binaryMimes) {
+      const c = classifyArtifact(mime, "noext");
+      expect(c.openable).toBe(false);
+      expect(c.type).toBe("download-only");
+    }
+  });
+
+  it("treats audio/* MIME as download-only", () => {
+    expect(classifyArtifact("audio/mpeg", "noext").openable).toBe(false);
+    expect(classifyArtifact("audio/wav", "noext").openable).toBe(false);
+    expect(classifyArtifact("audio/ogg", "noext").openable).toBe(false);
+  });
+
+  // ── Size gate edge cases ──────────────────────────────────────────
+
+  it("does NOT gate files at exactly 10MB (boundary is >10MB)", () => {
+    const tenMB = 10 * 1024 * 1024;
+    const c = classifyArtifact("text/plain", "exact.txt", tenMB);
+    expect(c.type).toBe("text");
+    expect(c.openable).toBe(true);
+  });
+
+  it("gates files at 10MB + 1 byte", () => {
+    const overTenMB = 10 * 1024 * 1024 + 1;
+    const c = classifyArtifact("text/plain", "big.txt", overTenMB);
+    expect(c.type).toBe("download-only");
+    expect(c.openable).toBe(false);
+  });
+
+  it("does not gate when sizeBytes is 0", () => {
+    const c = classifyArtifact("text/plain", "empty.txt", 0);
+    expect(c.type).toBe("text");
+    expect(c.openable).toBe(true);
+  });
+
+  it("does not gate when sizeBytes is undefined", () => {
+    const c = classifyArtifact("text/plain", "file.txt", undefined);
+    expect(c.type).toBe("text");
+    expect(c.openable).toBe(true);
+  });
+
+  // ── Extension over MIME priority ──────────────────────────────────
+
+  it("extension wins over MIME for JSON (MIME says text, ext says json)", () => {
+    const c = classifyArtifact("text/plain", "data.json");
+    expect(c.type).toBe("json");
+  });
+
+  it("extension wins over MIME for markdown", () => {
+    const c = classifyArtifact("text/plain", "README.md");
+    expect(c.type).toBe("markdown");
+  });
+
+  // ── Null/missing inputs ───────────────────────────────────────────
+
+  it("handles null MIME with no filename as download-only", () => {
+    const c = classifyArtifact(null, undefined);
+    expect(c.type).toBe("download-only");
+  });
+
+  it("handles null MIME with empty filename as download-only", () => {
+    const c = classifyArtifact(null, "");
+    expect(c.type).toBe("download-only");
+  });
+
+  it("handles known config files with no extension", () => {
+    const c = classifyArtifact(null, "Makefile");
+    expect(c.type).toBe("code");
+  });
+
+  // ── Exotic/compound extensions must NOT open the side panel ───────
+  // These are real file types agents might produce. Every single one
+  // must be download-only so we never try to render binary garbage.
+
+  it("does not open .tar.gz (compound extension takes last segment)", () => {
+    // getExtension("archive.tar.gz") → ".gz" which is not in EXT_KIND
+    const c = classifyArtifact(null, "archive.tar.gz");
+    expect(c.openable).toBe(false);
+    expect(c.type).toBe("download-only");
+  });
+
+  it("does not open .tar.bz2", () => {
+    const c = classifyArtifact(null, "archive.tar.bz2");
+    expect(c.openable).toBe(false);
+  });
+
+  it("does not open .tar.xz", () => {
+    const c = classifyArtifact(null, "archive.tar.xz");
+    expect(c.openable).toBe(false);
+  });
+
+  it("does not open common binary formats", () => {
+    const binaries = [
+      "setup.exe",
+      "library.dll",
+      "image.iso",
+      "installer.dmg",
+      "package.deb",
+      "package.rpm",
+      "module.wasm",
+      "Main.class",
+      "module.pyc",
+      "app.apk",
+      "game.pak",
+      "model.onnx",
+      "weights.pt",
+      "data.parquet",
+      "archive.rar",
+      "archive.7z",
+      "disk.vhd",
+      "disk.vmdk",
+      "firmware.bin",
+      "core.dump",
+      "database.sqlite",
+      "database.db",
+      "index.idx",
+    ];
+    for (const file of binaries) {
+      const c = classifyArtifact(null, file);
+      expect(c.openable).toBe(false);
+    }
+  });
+
+  it("does not open binary MIME types even with a misleading extension", () => {
+    // Extension is unknown, MIME is binary
+    const c = classifyArtifact("application/x-executable", "run.elf");
+    expect(c.openable).toBe(false);
+  });
+
+  it("does not open files with random/made-up extensions", () => {
+    const weirdExts = [
+      "output.xyz",
+      "data.foo",
+      "file.asdf",
+      "thing.blargh",
+      "result.out",
+      "x.1234",
+    ];
+    for (const file of weirdExts) {
+      const c = classifyArtifact(null, file);
+      expect(c.openable).toBe(false);
+      expect(c.type).toBe("download-only");
+    }
+  });
+
+  it("does not open font files", () => {
+    for (const file of ["sans.ttf", "serif.otf", "icon.woff", "icon.woff2"]) {
+      expect(classifyArtifact(null, file).openable).toBe(false);
+    }
+  });
+
+  it("does not open certificate/key files", () => {
+    // .pem and .key have no extension mapping and null MIME → download-only
+    for (const file of ["cert.pem", "server.key", "ca.crt", "id.p12"]) {
+      expect(classifyArtifact(null, file).openable).toBe(false);
+    }
+  });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.ts
index dc9d6cddc6..89a9e023c3 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/helpers.ts
@@ -5,6 +5,7 @@ import {
   FileText,
   Image,
   Table,
+  VideoCamera,
 } from "@phosphor-icons/react";
 import type { Icon } from "@phosphor-icons/react";
 
@@ -17,6 +18,7 @@ export interface ArtifactClassification {
     | "csv"
     | "json"
     | "image"
+    | "video"
     | "pdf"
     | "text"
     | "download-only";
@@ -38,6 +40,13 @@ const KIND: Record<string, ArtifactClassification> = {
     openable: true,
     hasSourceToggle: false,
   },
+  video: {
+    type: "video",
+    icon: VideoCamera,
+    label: "Video",
+    openable: true,
+    hasSourceToggle: false,
+  },
   pdf: {
     type: "pdf",
     icon: FileText,
@@ -113,8 +122,13 @@ const EXT_KIND: Record<string, string> = {
   ".svg": "image",
   ".bmp": "image",
   ".ico": "image",
+  ".avif": "image",
+  ".mp4": "video",
+  ".webm": "video",
+  ".m4v": "video",
   ".pdf": "pdf",
   ".csv": "csv",
+  ".tsv": "csv",
   ".html": "html",
   ".htm": "html",
   ".jsx": "react",
@@ -122,11 +136,17 @@ const EXT_KIND: Record<string, string> = {
   ".md": "markdown",
   ".mdx": "markdown",
   ".json": "json",
+  ".jsonl": "code",
   ".txt": "text",
   ".log": "text",
+  ".ics": "text",
+  ".vcf": "text",
+  ".env": "code",
+  ".gitignore": "code",
   // code extensions
   ".js": "code",
   ".ts": "code",
+  ".dart": "code",
   ".py": "code",
   ".rb": "code",
   ".go": "code",
@@ -142,11 +162,19 @@ const EXT_KIND: Record<string, string> = {
   ".sh": "code",
   ".bash": "code",
   ".zsh": "code",
+  ".scss": "code",
+  ".sass": "code",
+  ".less": "code",
+  ".graphql": "code",
+  ".gql": "code",
+  ".proto": "code",
   ".yml": "code",
   ".yaml": "code",
   ".toml": "code",
   ".ini": "code",
   ".cfg": "code",
+  ".conf": "code",
+  ".properties": "code",
   ".sql": "code",
   ".r": "code",
   ".lua": "code",
@@ -154,10 +182,16 @@ const EXT_KIND: Record<string, string> = {
   ".scala": "code",
 };
 
+const EXACT_FILENAME_KIND: Record<string, string> = {
+  dockerfile: "code",
+  makefile: "code",
+};
+
 // Exact-match MIME → kind (fallback when extension doesn't match).
 const MIME_KIND: Record<string, string> = {
   "application/pdf": "pdf",
   "text/csv": "csv",
+  "text/tab-separated-values": "csv",
   "text/html": "html",
   "text/jsx": "react",
   "text/tsx": "react",
@@ -166,6 +200,9 @@ const MIME_KIND: Record<string, string> = {
   "text/markdown": "markdown",
   "text/x-markdown": "markdown",
   "application/json": "json",
+  "application/x-ndjson": "code",
+  "application/ndjson": "code",
+  "application/jsonl": "code",
   "application/javascript": "code",
   "text/javascript": "code",
   "application/typescript": "code",
@@ -182,11 +219,37 @@ const BINARY_MIMES = new Set([
   "application/x-rar-compressed",
   "application/x-7z-compressed",
   "application/octet-stream",
+  "application/wasm",
   "application/x-executable",
   "application/x-msdos-program",
   "application/vnd.microsoft.portable-executable",
 ]);
 
+const PREVIEWABLE_IMAGE_MIMES = new Set([
+  "image/png",
+  "image/jpeg",
+  "image/gif",
+  "image/webp",
+  "image/svg+xml",
+  "image/bmp",
+  "image/x-icon",
+  "image/vnd.microsoft.icon",
+  "image/avif",
+]);
+
+const PREVIEWABLE_VIDEO_MIMES = new Set([
+  "video/mp4",
+  "video/webm",
+  "video/x-m4v",
+]);
+
+function getBasename(filename?: string): string {
+  if (!filename) return "";
+  const normalized = filename.replace(/\\/g, "/");
+  const parts = normalized.split("/");
+  return parts[parts.length - 1]?.toLowerCase() ?? "";
+}
+
 function getExtension(filename?: string): string {
   if (!filename) return "";
   const lastDot = filename.lastIndexOf(".");
@@ -202,24 +265,36 @@ export function classifyArtifact(
   // Size gate: >10MB is download-only regardless of type.
   if (sizeBytes && sizeBytes > TEN_MB) return KIND["download-only"];
 
+  const basename = getBasename(filename);
+  const exactKind = EXACT_FILENAME_KIND[basename];
+  if (exactKind) return KIND[exactKind];
+
+  if (basename === ".env" || basename.startsWith(".env.")) {
+    return KIND.code;
+  }
+
   // Extension first (more reliable than MIME for AI-generated files).
-  const ext = getExtension(filename);
+  const ext = getExtension(basename);
   const extKind = EXT_KIND[ext];
   if (extKind) return KIND[extKind];
 
   // MIME fallbacks.
   const mime = (mimeType ?? "").toLowerCase();
-  if (mime.startsWith("image/")) return KIND.image;
+  if (PREVIEWABLE_IMAGE_MIMES.has(mime)) return KIND.image;
+  if (PREVIEWABLE_VIDEO_MIMES.has(mime)) return KIND.video;
   const mimeKind = MIME_KIND[mime];
   if (mimeKind) return KIND[mimeKind];
   if (mime.startsWith("text/x-")) return KIND.code;
   if (
-    BINARY_MIMES.has(mime) ||
-    mime.startsWith("audio/") ||
-    mime.startsWith("video/")
+    mime.startsWith("image/") ||
+    mime.startsWith("video/") ||
+    mime.startsWith("font/")
   ) {
     return KIND["download-only"];
   }
+  if (BINARY_MIMES.has(mime) || mime.startsWith("audio/")) {
+    return KIND["download-only"];
+  }
   if (mime.startsWith("text/")) return KIND.text;
 
   // Unknown extension + unknown MIME: don't open — we can't safely assume
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/useArtifactPanel.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/useArtifactPanel.ts
index 3a512aa709..20443154cd 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/useArtifactPanel.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ArtifactPanel/useArtifactPanel.ts
@@ -83,6 +83,7 @@ export function useArtifactPanel() {
   const canCopy =
     classification != null &&
     classification.type !== "image" &&
+    classification.type !== "video" &&
     classification.type !== "download-only" &&
     classification.type !== "pdf";
 
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
index f116b053b4..7f3c1d0328 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/ChatContainer.tsx
@@ -64,10 +64,7 @@ export const ChatContainer = ({
   // open state drive layout width; an artifact generated in a stale session
   // state would otherwise shrink the chat column with no panel rendered.
   const isArtifactOpen = isArtifactsEnabled && isArtifactPanelOpen;
-  useAutoOpenArtifacts({
-    messages: isArtifactsEnabled ? messages : [],
-    sessionId,
-  });
+  useAutoOpenArtifacts({ sessionId });
   const isBusy =
     status === "streaming" ||
     status === "submitted" ||
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/__tests__/useAutoOpenArtifacts.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/__tests__/useAutoOpenArtifacts.test.ts
new file mode 100644
index 0000000000..bc0a66709e
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/__tests__/useAutoOpenArtifacts.test.ts
@@ -0,0 +1,77 @@
+import { describe, expect, it, beforeEach, afterEach } from "vitest";
+import { renderHook } from "@testing-library/react";
+import { useAutoOpenArtifacts } from "../useAutoOpenArtifacts";
+import { useCopilotUIStore } from "../../../store";
+
+// Capture the real store actions before any test can replace them.
+const realOpenArtifact = useCopilotUIStore.getState().openArtifact;
+const realResetArtifactPanel = useCopilotUIStore.getState().resetArtifactPanel;
+
+function resetStore() {
+  useCopilotUIStore.setState({
+    openArtifact: realOpenArtifact,
+    resetArtifactPanel: realResetArtifactPanel,
+    artifactPanel: {
+      isOpen: false,
+      isMinimized: false,
+      isMaximized: false,
+      width: 600,
+      activeArtifact: null,
+      history: [],
+    },
+  });
+}
+
+describe("useAutoOpenArtifacts", () => {
+  beforeEach(resetStore);
+  afterEach(resetStore);
+
+  it("does not auto-open artifacts on initial message load", () => {
+    renderHook(() => useAutoOpenArtifacts({ sessionId: "session-1" }));
+    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
+  });
+
+  it("does not auto-open when rerendering within the same session", () => {
+    const { rerender } = renderHook(
+      ({ sessionId }: { sessionId: string }) =>
+        useAutoOpenArtifacts({ sessionId }),
+      { initialProps: { sessionId: "session-1" } },
+    );
+
+    rerender({ sessionId: "session-1" });
+    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
+  });
+
+  it("panel should fully reset when session changes", () => {
+    const artifact = {
+      id: "file1",
+      title: "image.png",
+      mimeType: "image/png",
+      sourceUrl: "/api/proxy/api/workspace/files/file1/download",
+      origin: "agent" as const,
+    };
+    useCopilotUIStore.getState().openArtifact(artifact);
+    useCopilotUIStore.getState().openArtifact({
+      ...artifact,
+      id: "file2",
+      title: "second.png",
+      sourceUrl: "/api/proxy/api/workspace/files/file2/download",
+    });
+    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(true);
+
+    const { rerender } = renderHook(
+      ({ sessionId }: { sessionId: string }) =>
+        useAutoOpenArtifacts({ sessionId }),
+      { initialProps: { sessionId: "session-1" } },
+    );
+
+    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(true);
+
+    rerender({ sessionId: "session-2" });
+
+    const s = useCopilotUIStore.getState().artifactPanel;
+    expect(s.isOpen).toBe(false);
+    expect(s.activeArtifact).toBeNull();
+    expect(s.history).toEqual([]);
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.test.ts
index 140b46b338..8ff3046d55 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.test.ts
@@ -3,17 +3,19 @@ import { beforeEach, describe, expect, it } from "vitest";
 import { useCopilotUIStore } from "../../store";
 import { useAutoOpenArtifacts } from "./useAutoOpenArtifacts";
 
-function assistantMessageWithText(id: string, text: string) {
-  return {
-    id,
-    role: "assistant" as const,
-    parts: [{ type: "text" as const, text }],
-  };
-}
-
 const A_ID = "11111111-0000-0000-0000-000000000000";
 const B_ID = "22222222-0000-0000-0000-000000000000";
 
+function makeArtifact(id: string, title = `${id}.txt`) {
+  return {
+    id,
+    title,
+    mimeType: "text/plain",
+    sourceUrl: `/api/proxy/api/workspace/files/${id}/download`,
+    origin: "agent" as const,
+  };
+}
+
 function resetStore() {
   useCopilotUIStore.setState({
     artifactPanel: {
@@ -30,111 +32,60 @@ function resetStore() {
 describe("useAutoOpenArtifacts", () => {
   beforeEach(resetStore);
 
-  it("does NOT auto-open on the initial hydration of message list (baseline pass)", () => {
-    const messages = [
-      assistantMessageWithText("m1", `[a](workspace://${A_ID})`),
-    ];
-    renderHook(() =>
-      useAutoOpenArtifacts({ messages: messages as any, sessionId: "s1" }),
-    );
-    // Initial run just records the baseline fingerprint; nothing opens.
+  it("does not auto-open on initial render", () => {
+    renderHook(() => useAutoOpenArtifacts({ sessionId: "s1" }));
     expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
   });
 
-  it("auto-opens when an existing assistant message adds a new artifact", () => {
-    // 1st render: baseline with no artifact.
-    const initial = [assistantMessageWithText("m1", "thinking...")];
+  it("does not auto-open when rerendering within the same session", () => {
     const { rerender } = renderHook(
-      ({ messages, sessionId }) =>
-        useAutoOpenArtifacts({ messages: messages as any, sessionId }),
-      { initialProps: { messages: initial, sessionId: "s1" } },
+      ({ sessionId }) => useAutoOpenArtifacts({ sessionId }),
+      { initialProps: { sessionId: "s1" } },
     );
-    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
 
-    // 2nd render: same message id now contains an artifact link.
     act(() => {
-      rerender({
-        messages: [
-          assistantMessageWithText("m1", `here: [A](workspace://${A_ID})`),
-        ],
-        sessionId: "s1",
-      });
+      rerender({ sessionId: "s1" });
     });
+
+    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
+  });
+
+  it("resets the panel state when sessionId changes", () => {
+    useCopilotUIStore.getState().openArtifact(makeArtifact(A_ID, "a.txt"));
+    useCopilotUIStore.getState().openArtifact(makeArtifact(B_ID, "b.txt"));
+
+    const { rerender } = renderHook(
+      ({ sessionId }) => useAutoOpenArtifacts({ sessionId }),
+      { initialProps: { sessionId: "s1" } },
+    );
+
+    act(() => {
+      rerender({ sessionId: "s2" });
+    });
+
     const s = useCopilotUIStore.getState().artifactPanel;
-    expect(s.isOpen).toBe(true);
-    expect(s.activeArtifact?.id).toBe(A_ID);
+    expect(s.isOpen).toBe(false);
+    expect(s.activeArtifact).toBeNull();
+    expect(s.history).toEqual([]);
   });
 
-  it("does not re-open when the fingerprint hasn't changed", () => {
-    const msg = assistantMessageWithText("m1", `[A](workspace://${A_ID})`);
+  it("does not carry a stale back stack into the next session", () => {
+    useCopilotUIStore.getState().openArtifact(makeArtifact(A_ID, "a.txt"));
+    useCopilotUIStore.getState().openArtifact(makeArtifact(B_ID, "b.txt"));
+
     const { rerender } = renderHook(
-      ({ messages, sessionId }) =>
-        useAutoOpenArtifacts({ messages: messages as any, sessionId }),
-      { initialProps: { messages: [msg], sessionId: "s1" } },
+      ({ sessionId }) => useAutoOpenArtifacts({ sessionId }),
+      { initialProps: { sessionId: "s1" } },
     );
-    // Baseline captured; no open.
-    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
 
-    // Rerender identical content: no change in fingerprint → no open.
     act(() => {
-      rerender({ messages: [msg], sessionId: "s1" });
+      rerender({ sessionId: "s2" });
     });
-    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
-  });
 
-  it("auto-opens when a brand-new assistant message arrives after the baseline is established", () => {
-    // First render: one message without artifacts → establishes baseline.
-    const { rerender } = renderHook(
-      ({ messages, sessionId }) =>
-        useAutoOpenArtifacts({ messages: messages as any, sessionId }),
-      {
-        initialProps: {
-          messages: [assistantMessageWithText("m1", "plain")] as any,
-          sessionId: "s1",
-        },
-      },
-    );
-    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
+    useCopilotUIStore.getState().openArtifact(makeArtifact("c", "c.txt"));
 
-    // Second render: a *new* assistant message with an artifact. Baseline
-    // is already set, so this should auto-open.
-    act(() => {
-      rerender({
-        messages: [
-          assistantMessageWithText("m1", "plain"),
-          assistantMessageWithText("m2", `[B](workspace://${B_ID})`),
-        ] as any,
-        sessionId: "s1",
-      });
-    });
     const s = useCopilotUIStore.getState().artifactPanel;
-    expect(s.isOpen).toBe(true);
-    expect(s.activeArtifact?.id).toBe(B_ID);
-  });
-
-  it("resets hydration baseline when sessionId changes", () => {
-    const { rerender } = renderHook(
-      ({ messages, sessionId }) =>
-        useAutoOpenArtifacts({ messages: messages as any, sessionId }),
-      {
-        initialProps: {
-          messages: [
-            assistantMessageWithText("m1", `[A](workspace://${A_ID})`),
-          ] as any,
-          sessionId: "s1",
-        },
-      },
-    );
-    // Switch to a new session — the first pass on the new session should
-    // NOT auto-open (it's a fresh hydration).
-    act(() => {
-      rerender({
-        messages: [
-          assistantMessageWithText("m2", `[B](workspace://${B_ID})`),
-        ] as any,
-        sessionId: "s2",
-      });
-    });
-    expect(useCopilotUIStore.getState().artifactPanel.isOpen).toBe(false);
+    expect(s.activeArtifact?.id).toBe("c");
+    expect(s.history).toEqual([]);
   });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.ts
index 4fc1ca02bb..a8b867009c 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatContainer/useAutoOpenArtifacts.ts
@@ -1,91 +1,29 @@
 "use client";
 
-import { UIDataTypes, UIMessage, UITools } from "ai";
 import { useEffect, useRef } from "react";
-import type { ArtifactRef } from "../../store";
 import { useCopilotUIStore } from "../../store";
-import { getMessageArtifacts } from "../ChatMessagesContainer/helpers";
-
-function fingerprintArtifacts(artifacts: ArtifactRef[]): string {
-  return artifacts
-    .map((a) => `${a.id}:${a.title}:${a.mimeType ?? ""}:${a.sourceUrl}`)
-    .join("|");
-}
 
 interface UseAutoOpenArtifactsOptions {
-  messages: UIMessage<unknown, UIDataTypes, UITools>[];
   sessionId: string | null;
 }
 
 export function useAutoOpenArtifacts({
-  messages,
   sessionId,
 }: UseAutoOpenArtifactsOptions) {
-  const openArtifact = useCopilotUIStore((state) => state.openArtifact);
-  const messageFingerprintsRef = useRef<Map<string, string>>(new Map());
-  const hasInitializedRef = useRef(false);
+  const resetArtifactPanel = useCopilotUIStore(
+    (state) => state.resetArtifactPanel,
+  );
+  const prevSessionIdRef = useRef(sessionId);
 
   useEffect(() => {
-    messageFingerprintsRef.current = new Map();
-    hasInitializedRef.current = false;
-  }, [sessionId]);
+    const isSessionChange = prevSessionIdRef.current !== sessionId;
+    prevSessionIdRef.current = sessionId;
 
-  useEffect(() => {
-    if (messages.length === 0) {
-      messageFingerprintsRef.current = new Map();
-      return;
+    // Artifact previews should open only from an explicit user click.
+    // When the session changes, fully clear the panel state so stale
+    // active artifacts and back-stack entries never bleed into the next chat.
+    if (isSessionChange) {
+      resetArtifactPanel();
     }
-
-    // Only scan messages whose fingerprint might have changed since the
-    // last pass: that's the last assistant message (currently streaming)
-    // plus any assistant message whose id isn't in the baseline yet.
-    // This keeps the cost O(new+tail), not O(all messages), on every chunk.
-    const previous = messageFingerprintsRef.current;
-    const nextFingerprints = new Map<string, string>(previous);
-    let nextArtifact: ArtifactRef | null = null;
-    const lastAssistantIdx = (() => {
-      for (let i = messages.length - 1; i >= 0; i--) {
-        if (messages[i].role === "assistant") return i;
-      }
-      return -1;
-    })();
-
-    for (let i = 0; i < messages.length; i++) {
-      const message = messages[i];
-      if (message.role !== "assistant") continue;
-      const isTailAssistant = i === lastAssistantIdx;
-      const isNewMessage = !previous.has(message.id);
-      if (!isTailAssistant && !isNewMessage) continue;
-
-      const artifacts = getMessageArtifacts(message);
-      const fingerprint = fingerprintArtifacts(artifacts);
-      nextFingerprints.set(message.id, fingerprint);
-
-      if (!hasInitializedRef.current || fingerprint.length === 0) {
-        continue;
-      }
-
-      const previousFingerprint = previous.get(message.id) ?? "";
-      if (previousFingerprint === fingerprint) continue;
-
-      nextArtifact = artifacts[artifacts.length - 1] ?? nextArtifact;
-    }
-
-    // Drop entries for messages that no longer exist (e.g. history truncated).
-    const liveIds = new Set(messages.map((m) => m.id));
-    for (const id of nextFingerprints.keys()) {
-      if (!liveIds.has(id)) nextFingerprints.delete(id);
-    }
-
-    messageFingerprintsRef.current = nextFingerprints;
-
-    if (!hasInitializedRef.current) {
-      hasInitializedRef.current = true;
-      return;
-    }
-
-    if (nextArtifact) {
-      openArtifact(nextArtifact);
-    }
-  }, [messages, openArtifact]);
+  }, [sessionId, resetArtifactPanel]);
 }
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/__tests__/UsagePanelContent.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/__tests__/UsagePanelContent.test.ts
index c7804c6dfc..4473a78959 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/__tests__/UsagePanelContent.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/UsageLimits/__tests__/UsagePanelContent.test.ts
@@ -19,8 +19,16 @@ describe("formatResetTime", () => {
   });
 
   it("returns formatted date when over 24 hours away", () => {
-    const result = formatResetTime("2025-06-17T00:00:00Z", now);
-    expect(result).toMatch(/Tue/);
+    const resetsAt = "2025-06-17T00:00:00Z";
+    const result = formatResetTime(resetsAt, now);
+    const expected = new Date(resetsAt).toLocaleString(undefined, {
+      weekday: "short",
+      hour: "numeric",
+      minute: "2-digit",
+      timeZoneName: "short",
+    });
+
+    expect(result).toBe(expected);
   });
 
   it("accepts a Date object for resetsAt", () => {
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/store.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/store.test.ts
index d31b55ebb7..71683d65cb 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/store.test.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/store.test.ts
@@ -99,6 +99,50 @@ describe("artifactPanel store actions", () => {
     expect(s.history).toEqual([]);
   });
 
+  it("openArtifact does not resurrect a previously closed artifact into history", () => {
+    const a = makeArtifact("a");
+    const b = makeArtifact("b");
+    useCopilotUIStore.getState().openArtifact(a);
+    useCopilotUIStore.getState().closeArtifactPanel();
+    useCopilotUIStore.getState().openArtifact(b);
+
+    const s = useCopilotUIStore.getState().artifactPanel;
+    expect(s.isOpen).toBe(true);
+    expect(s.activeArtifact?.id).toBe("b");
+    expect(s.history).toEqual([]);
+  });
+
+  it("openArtifact ignores non-previewable artifacts", () => {
+    const binary = {
+      ...makeArtifact("bin", "artifact.bin"),
+      mimeType: "application/octet-stream",
+    };
+
+    useCopilotUIStore.getState().openArtifact(binary);
+
+    const s = useCopilotUIStore.getState().artifactPanel;
+    expect(s.isOpen).toBe(false);
+    expect(s.activeArtifact).toBeNull();
+    expect(s.history).toEqual([]);
+  });
+
+  it("resetArtifactPanel clears active artifact and history", () => {
+    const a = makeArtifact("a");
+    const b = makeArtifact("b");
+    useCopilotUIStore.getState().openArtifact(a);
+    useCopilotUIStore.getState().openArtifact(b);
+    useCopilotUIStore.getState().maximizeArtifactPanel();
+
+    useCopilotUIStore.getState().resetArtifactPanel();
+
+    const s = useCopilotUIStore.getState().artifactPanel;
+    expect(s.isOpen).toBe(false);
+    expect(s.isMinimized).toBe(false);
+    expect(s.isMaximized).toBe(false);
+    expect(s.activeArtifact).toBeNull();
+    expect(s.history).toEqual([]);
+  });
+
   it("minimize/restore toggles isMinimized without touching activeArtifact", () => {
     const a = makeArtifact("a");
     useCopilotUIStore.getState().openArtifact(a);
@@ -138,4 +182,35 @@ describe("artifactPanel store actions", () => {
     expect(s.width).toBe(720);
     expect(s.isMaximized).toBe(false);
   });
+
+  it("history is capped at 25 entries (MAX_HISTORY)", () => {
+    // Open 27 artifacts sequentially (A0..A26). History should never exceed 25.
+    for (let i = 0; i < 27; i++) {
+      useCopilotUIStore.getState().openArtifact(makeArtifact(`a${i}`));
+    }
+    const s = useCopilotUIStore.getState().artifactPanel;
+    expect(s.activeArtifact?.id).toBe("a26");
+    expect(s.history.length).toBe(25);
+    // The oldest entry (a0) should have been dropped; a1 is the earliest surviving.
+    expect(s.history[0].id).toBe("a1");
+    expect(s.history[24].id).toBe("a25");
+  });
+
+  it("clearCopilotLocalData resets artifact panel to default", () => {
+    const a = makeArtifact("a");
+    const b = makeArtifact("b");
+    useCopilotUIStore.getState().openArtifact(a);
+    useCopilotUIStore.getState().openArtifact(b);
+    useCopilotUIStore.getState().maximizeArtifactPanel();
+
+    useCopilotUIStore.getState().clearCopilotLocalData();
+
+    const s = useCopilotUIStore.getState().artifactPanel;
+    expect(s.isOpen).toBe(false);
+    expect(s.isMinimized).toBe(false);
+    expect(s.isMaximized).toBe(false);
+    expect(s.activeArtifact).toBeNull();
+    expect(s.history).toEqual([]);
+    expect(s.width).toBe(600); // DEFAULT_PANEL_WIDTH
+  });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts
index ebd9c3811f..d63c0bd76a 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/store.ts
@@ -1,6 +1,7 @@
 import { Key, storage } from "@/services/storage/local-storage";
 import { create } from "zustand";
 import { clearContentCache } from "./components/ArtifactPanel/components/useArtifactContent";
+import { classifyArtifact } from "./components/ArtifactPanel/helpers";
 import { ORIGINAL_TITLE, parseSessionIDs } from "./helpers";
 
 export interface DeleteTarget {
@@ -92,6 +93,10 @@ function persistCompletedSessions(ids: Set<string>) {
   }
 }
 
+function isPreviewableArtifact(ref: ArtifactRef): boolean {
+  return classifyArtifact(ref.mimeType, ref.title, ref.sizeBytes).openable;
+}
+
 interface CopilotUIState {
   /** Prompt extracted from URL hash (e.g. /copilot#prompt=...) for input prefill. */
   initialPrompt: string | null;
@@ -121,6 +126,7 @@ interface CopilotUIState {
   artifactPanel: ArtifactPanelState;
   openArtifact: (ref: ArtifactRef) => void;
   closeArtifactPanel: () => void;
+  resetArtifactPanel: () => void;
   minimizeArtifactPanel: () => void;
   maximizeArtifactPanel: () => void;
   restoreArtifactPanel: () => void;
@@ -203,14 +209,20 @@ export const useCopilotUIStore = create<CopilotUIState>((set) => ({
   },
   openArtifact: (ref) =>
     set((state) => {
+      if (!isPreviewableArtifact(ref)) return state;
+
       const { activeArtifact, history: prevHistory } = state.artifactPanel;
       const topOfHistory = prevHistory[prevHistory.length - 1];
       const isReturningToTop = topOfHistory?.id === ref.id;
+      const shouldPushHistory =
+        state.artifactPanel.isOpen &&
+        activeArtifact != null &&
+        activeArtifact.id !== ref.id;
       const MAX_HISTORY = 25;
       const history = isReturningToTop
         ? prevHistory.slice(0, -1)
-        : activeArtifact && activeArtifact.id !== ref.id
-          ? [...prevHistory, activeArtifact].slice(-MAX_HISTORY)
+        : shouldPushHistory
+          ? [...prevHistory, activeArtifact!].slice(-MAX_HISTORY)
           : prevHistory;
       return {
         artifactPanel: {
@@ -231,6 +243,17 @@ export const useCopilotUIStore = create<CopilotUIState>((set) => ({
         history: [],
       },
     })),
+  resetArtifactPanel: () =>
+    set((state) => ({
+      artifactPanel: {
+        ...state.artifactPanel,
+        isOpen: false,
+        isMinimized: false,
+        isMaximized: false,
+        activeArtifact: null,
+        history: [],
+      },
+    })),
   minimizeArtifactPanel: () =>
     set((state) => ({
       artifactPanel: { ...state.artifactPanel, isMinimized: true },
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockOutputCard/BlockOutputCard.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockOutputCard/BlockOutputCard.tsx
index 4c4a887939..941e97406d 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockOutputCard/BlockOutputCard.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/BlockOutputCard/BlockOutputCard.tsx
@@ -1,15 +1,13 @@
 "use client";
 
 import React, { useState } from "react";
-import { getGetWorkspaceDownloadFileByIdUrl } from "@/app/api/__generated__/endpoints/workspace/workspace";
 import { Button } from "@/components/atoms/Button/Button";
 import type { BlockOutputResponse } from "@/app/api/__generated__/models/blockOutputResponse";
 import {
   globalRegistry,
   OutputItem,
 } from "@/components/contextual/OutputRenderers";
-import type { OutputMetadata } from "@/components/contextual/OutputRenderers";
-import { isWorkspaceURI, parseWorkspaceURI } from "@/lib/workspace-uri";
+import { resolveForRenderer } from "@/app/(platform)/copilot/tools/ViewAgentOutput/ViewAgentOutput";
 import {
   ContentBadge,
   ContentCard,
@@ -24,28 +22,6 @@ interface Props {
 
 const COLLAPSED_LIMIT = 3;
 
-function resolveForRenderer(value: unknown): {
-  value: unknown;
-  metadata?: OutputMetadata;
-} {
-  if (!isWorkspaceURI(value)) return { value };
-
-  const parsed = parseWorkspaceURI(value);
-  if (!parsed) return { value };
-
-  const apiPath = getGetWorkspaceDownloadFileByIdUrl(parsed.fileID);
-  const url = `/api/proxy${apiPath}`;
-
-  const metadata: OutputMetadata = {};
-  if (parsed.mimeType) {
-    metadata.mimeType = parsed.mimeType;
-    if (parsed.mimeType.startsWith("image/")) metadata.type = "image";
-    else if (parsed.mimeType.startsWith("video/")) metadata.type = "video";
-  }
-
-  return { value: url, metadata };
-}
-
 function RenderOutputValue({ value }: { value: unknown }) {
   const resolved = resolveForRenderer(value);
   const renderer = globalRegistry.getRenderer(
@@ -63,16 +39,6 @@ function RenderOutputValue({ value }: { value: unknown }) {
     );
   }
 
-  // Fallback for audio workspace refs
-  if (
-    isWorkspaceURI(value) &&
-    resolved.metadata?.mimeType?.startsWith("audio/")
-  ) {
-    return (
-      <audio controls src={String(resolved.value)} className="mt-2 w-full" />
-    );
-  }
-
   return null;
 }
 
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/ViewAgentOutput.tsx b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/ViewAgentOutput.tsx
index b5adb0ef07..b31f76f378 100644
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/ViewAgentOutput.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/ViewAgentOutput.tsx
@@ -2,7 +2,6 @@
 
 import type { ToolUIPart } from "ai";
 import React from "react";
-import { getGetWorkspaceDownloadFileByIdUrl } from "@/app/api/__generated__/endpoints/workspace/workspace";
 import {
   globalRegistry,
   OutputItem,
@@ -47,7 +46,7 @@ interface Props {
   part: ViewAgentOutputToolPart;
 }
 
-function resolveForRenderer(value: unknown): {
+export function resolveForRenderer(value: unknown): {
   value: unknown;
   metadata?: OutputMetadata;
 } {
@@ -56,17 +55,17 @@ function resolveForRenderer(value: unknown): {
   const parsed = parseWorkspaceURI(value);
   if (!parsed) return { value };
 
-  const apiPath = getGetWorkspaceDownloadFileByIdUrl(parsed.fileID);
-  const url = `/api/proxy${apiPath}`;
-
+  // Pass workspace URIs through to the registry unchanged.
+  // WorkspaceFileRenderer (priority 50) matches workspace:// URIs and
+  // handles URL building, loading skeletons, and error states internally.
+  // Previously this converted to a proxy URL which bypassed
+  // WorkspaceFileRenderer, causing ImageRenderer (bare <img>) to match.
   const metadata: OutputMetadata = {};
   if (parsed.mimeType) {
     metadata.mimeType = parsed.mimeType;
-    if (parsed.mimeType.startsWith("image/")) metadata.type = "image";
-    else if (parsed.mimeType.startsWith("video/")) metadata.type = "video";
   }
 
-  return { value: url, metadata };
+  return { value, metadata };
 }
 
 function RenderOutputValue({ value }: { value: unknown }) {
@@ -86,16 +85,6 @@ function RenderOutputValue({ value }: { value: unknown }) {
     );
   }
 
-  // Fallback for audio workspace refs
-  if (
-    isWorkspaceURI(value) &&
-    resolved.metadata?.mimeType?.startsWith("audio/")
-  ) {
-    return (
-      <audio controls src={String(resolved.value)} className="mt-2 w-full" />
-    );
-  }
-
   return null;
 }
 
diff --git a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/__tests__/resolveForRenderer.test.ts b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/__tests__/resolveForRenderer.test.ts
new file mode 100644
index 0000000000..d5abf4db4f
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ViewAgentOutput/__tests__/resolveForRenderer.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from "vitest";
+import { resolveForRenderer } from "../ViewAgentOutput";
+import { globalRegistry } from "@/components/contextual/OutputRenderers";
+
+describe("resolveForRenderer", () => {
+  it("preserves workspace image URI for the registry to handle", () => {
+    const result = resolveForRenderer("workspace://abc123#image/png");
+    expect(String(result.value)).toMatch(/^workspace:\/\//);
+    expect(result.metadata?.mimeType).toBe("image/png");
+  });
+
+  it("preserves workspace video URI for the registry to handle", () => {
+    const result = resolveForRenderer("workspace://vid456#video/mp4");
+    expect(String(result.value)).toMatch(/^workspace:\/\//);
+    expect(result.metadata?.mimeType).toBe("video/mp4");
+  });
+
+  it("passes non-workspace values through unchanged", () => {
+    const result = resolveForRenderer("just a string");
+    expect(result.value).toBe("just a string");
+    expect(result.metadata).toBeUndefined();
+  });
+
+  it("passes non-string values through unchanged", () => {
+    const obj = { foo: "bar" };
+    const result = resolveForRenderer(obj);
+    expect(result.value).toBe(obj);
+    expect(result.metadata).toBeUndefined();
+  });
+
+  it("workspace image URIs match WorkspaceFileRenderer with loading/error states", () => {
+    // WorkspaceFileRenderer (priority 50) should handle workspace:// URIs
+    // since resolveForRenderer no longer pre-converts them to proxy URLs.
+    const resolved = resolveForRenderer("workspace://abc123#image/png");
+    const renderer = globalRegistry.getRenderer(
+      resolved.value,
+      resolved.metadata,
+    );
+    expect(renderer).toBeDefined();
+    expect(renderer!.name).toBe("WorkspaceFileRenderer");
+  });
+
+  it("workspace video URIs match WorkspaceFileRenderer", () => {
+    const resolved = resolveForRenderer("workspace://vid456#video/mp4");
+    const renderer = globalRegistry.getRenderer(
+      resolved.value,
+      resolved.metadata,
+    );
+    expect(renderer).toBeDefined();
+    expect(renderer!.name).toBe("WorkspaceFileRenderer");
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainAgentPage/__tests__/main.test.tsx b/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainAgentPage/__tests__/main.test.tsx
new file mode 100644
index 0000000000..f9a9d76f12
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainAgentPage/__tests__/main.test.tsx
@@ -0,0 +1,96 @@
+import {
+  getGetV2GetSpecificAgentMockHandler,
+  getGetV2GetSpecificAgentResponseMock,
+  getGetV2ListStoreAgentsMockHandler,
+  getGetV2ListStoreAgentsResponseMock,
+} from "@/app/api/__generated__/endpoints/store/store.msw";
+import { server } from "@/mocks/mock-server";
+import { render, screen } from "@/tests/integrations/test-utils";
+import { MainAgentPage } from "../MainAgentPage";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mockUseSupabase = vi.hoisted(() => vi.fn());
+
+vi.mock("@/lib/supabase/hooks/useSupabase", () => ({
+  useSupabase: mockUseSupabase,
+}));
+
+describe("MainAgentPage", () => {
+  beforeEach(() => {
+    mockUseSupabase.mockReturnValue({
+      user: null,
+    });
+  });
+
+  test("renders the marketplace agent details and related sections", async () => {
+    const agentDetails = getGetV2GetSpecificAgentResponseMock({
+      agent_name: "Deterministic Agent",
+      creator: "AutoGPT",
+      creator_avatar: "",
+      sub_heading: "A stable marketplace listing",
+      description: "This agent is used for integration coverage.",
+      categories: ["demo", "test"],
+      versions: ["1", "2"],
+      active_version_id: "store-version-1",
+      store_listing_version_id: "listing-1",
+      agent_image: ["https://example.com/agent.png"],
+      agent_output_demo: "",
+      agent_video: "",
+    });
+    const otherAgents = getGetV2ListStoreAgentsResponseMock({
+      agents: [
+        {
+          ...getGetV2ListStoreAgentsResponseMock().agents[0],
+          slug: "other-agent",
+          agent_name: "Other Agent",
+          creator: "AutoGPT",
+        },
+      ],
+    });
+    const similarAgents = getGetV2ListStoreAgentsResponseMock({
+      agents: [
+        {
+          ...getGetV2ListStoreAgentsResponseMock().agents[0],
+          slug: "similar-agent",
+          agent_name: "Similar Agent",
+          creator: "Another Creator",
+        },
+      ],
+    });
+
+    server.use(
+      getGetV2GetSpecificAgentMockHandler(agentDetails),
+      getGetV2ListStoreAgentsMockHandler(({ request }) => {
+        const url = new URL(request.url);
+
+        if (url.searchParams.get("creator") === "autogpt") {
+          return otherAgents;
+        }
+
+        if (url.searchParams.get("search_query") === "deterministic agent") {
+          return similarAgents;
+        }
+
+        return getGetV2ListStoreAgentsResponseMock({ agents: [] });
+      }),
+    );
+
+    render(
+      <MainAgentPage
+        params={{ creator: "autogpt", slug: "deterministic-agent" }}
+      />,
+    );
+
+    expect((await screen.findByTestId("agent-title")).textContent).toContain(
+      "Deterministic Agent",
+    );
+    expect(screen.getByTestId("agent-description").textContent).toContain(
+      "This agent is used for integration coverage.",
+    );
+    expect(screen.getByTestId("agent-creator").textContent).toContain(
+      "AutoGPT",
+    );
+    expect(screen.getByText("Other agents by AutoGPT")).toBeDefined();
+    expect(screen.getByText("Similar agents")).toBeDefined();
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainMarketplacePage/__tests__/main.test.tsx b/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainMarketplacePage/__tests__/main.test.tsx
index bee227a7af..0e902abe44 100644
--- a/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainMarketplacePage/__tests__/main.test.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/marketplace/components/MainMarketplacePage/__tests__/main.test.tsx
@@ -1,15 +1,64 @@
-import { expect, test } from "vitest";
+import {
+  getGetV2ListStoreAgentsResponseMock,
+  getGetV2ListStoreCreatorsResponseMock,
+} from "@/app/api/__generated__/endpoints/store/store.msw";
 import { render, screen } from "@/tests/integrations/test-utils";
 import { MainMarkeplacePage } from "../MainMarketplacePage";
-import { server } from "@/mocks/mock-server";
-import { getDeleteV2DeleteStoreSubmissionMockHandler422 } from "@/app/api/__generated__/endpoints/store/store.msw";
+import { beforeEach, describe, expect, test, vi } from "vitest";
 
-// Only for CI testing purpose, will remove it in future PR
-test("MainMarketplacePage", async () => {
-  server.use(getDeleteV2DeleteStoreSubmissionMockHandler422());
+const mockUseMainMarketplacePage = vi.hoisted(() => vi.fn());
 
-  render(<MainMarkeplacePage />);
-  expect(
-    await screen.findByText("Featured agents", { exact: false }),
-  ).toBeDefined();
+vi.mock("../useMainMarketplacePage", () => ({
+  useMainMarketplacePage: mockUseMainMarketplacePage,
+}));
+
+describe("MainMarketplacePage", () => {
+  beforeEach(() => {
+    mockUseMainMarketplacePage.mockReturnValue({
+      featuredAgents: getGetV2ListStoreAgentsResponseMock({
+        agents: [
+          {
+            ...getGetV2ListStoreAgentsResponseMock().agents[0],
+            slug: "featured-agent",
+            agent_name: "Featured Agent",
+            creator: "AutoGPT",
+          },
+        ],
+      }),
+      topAgents: getGetV2ListStoreAgentsResponseMock({
+        agents: [
+          {
+            ...getGetV2ListStoreAgentsResponseMock().agents[0],
+            slug: "top-agent",
+            agent_name: "Top Agent",
+            creator: "AutoGPT",
+          },
+        ],
+      }),
+      featuredCreators: getGetV2ListStoreCreatorsResponseMock({
+        creators: [
+          {
+            ...getGetV2ListStoreCreatorsResponseMock().creators[0],
+            name: "Creator One",
+            username: "creator-one",
+          },
+        ],
+      }),
+      isLoading: false,
+      hasError: false,
+    });
+  });
+
+  test("renders featured agents, all agents, and creators", () => {
+    render(<MainMarkeplacePage />);
+
+    expect(screen.getByText(/Featured agents/i)).toBeDefined();
+    expect(screen.getByText("Featured Agent")).toBeDefined();
+    expect(screen.getByText("All Agents")).toBeDefined();
+    expect(screen.getAllByText("Top Agent").length).toBeGreaterThan(0);
+    expect(screen.getByText("Creator One")).toBeDefined();
+    expect(
+      screen.getByRole("button", { name: "Become a Creator" }),
+    ).toBeDefined();
+  });
 });
diff --git a/autogpt_platform/frontend/src/app/(platform)/marketplace/creator/[creator]/components/MainCreatorPage/__tests__/main.test.tsx b/autogpt_platform/frontend/src/app/(platform)/marketplace/creator/[creator]/components/MainCreatorPage/__tests__/main.test.tsx
new file mode 100644
index 0000000000..b3224fa3ce
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/marketplace/creator/[creator]/components/MainCreatorPage/__tests__/main.test.tsx
@@ -0,0 +1,57 @@
+import { render, screen } from "@/tests/integrations/test-utils";
+import {
+  getGetV2GetCreatorDetailsResponseMock,
+  getGetV2ListStoreAgentsResponseMock,
+} from "@/app/api/__generated__/endpoints/store/store.msw";
+import { MainCreatorPage } from "../MainCreatorPage";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mockUseMainCreatorPage = vi.hoisted(() => vi.fn());
+
+vi.mock("../useMainCreatorPage", () => ({
+  useMainCreatorPage: mockUseMainCreatorPage,
+}));
+
+describe("MainCreatorPage", () => {
+  beforeEach(() => {
+    const creator = getGetV2GetCreatorDetailsResponseMock({
+      name: "Creator One",
+      username: "creator-one",
+      description: "Creator profile used for integration coverage.",
+      avatar_url: "",
+      top_categories: ["automation", "productivity"],
+      links: ["https://example.com/creator"],
+    });
+
+    const creatorAgents = getGetV2ListStoreAgentsResponseMock({
+      agents: [
+        {
+          ...getGetV2ListStoreAgentsResponseMock().agents[0],
+          slug: "creator-agent",
+          agent_name: "Creator Agent",
+          creator: "Creator One",
+        },
+      ],
+    });
+
+    mockUseMainCreatorPage.mockReturnValue({
+      creatorAgents,
+      creator,
+      isLoading: false,
+      hasError: false,
+    });
+  });
+
+  test("renders creator details and their agents", () => {
+    render(<MainCreatorPage params={{ creator: "creator-one" }} />);
+
+    expect(screen.getByTestId("creator-title").textContent).toContain(
+      "Creator One",
+    );
+    expect(screen.getByTestId("creator-description").textContent).toContain(
+      "Creator profile used for integration coverage.",
+    );
+    expect(screen.getByText("Agents by Creator One")).toBeDefined();
+    expect(screen.getAllByText("Creator Agent").length).toBeGreaterThan(0);
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/__tests__/page.test.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/__tests__/page.test.tsx
new file mode 100644
index 0000000000..c6cd516c26
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/__tests__/page.test.tsx
@@ -0,0 +1,83 @@
+import type { ReactNode } from "react";
+import {
+  render,
+  screen,
+  fireEvent,
+  waitFor,
+} from "@/tests/integrations/test-utils";
+import {
+  getGetV2GetUserProfileMockHandler,
+  getPostV2UpdateUserProfileMockHandler,
+} from "@/app/api/__generated__/endpoints/store/store.msw";
+import { server } from "@/mocks/mock-server";
+import UserProfilePage from "../page";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mockUseSupabase = vi.hoisted(() => vi.fn());
+
+vi.mock("@/providers/onboarding/onboarding-provider", () => ({
+  default: ({ children }: { children: ReactNode }) => <>{children}</>,
+}));
+
+vi.mock("@/lib/supabase/hooks/useSupabase", () => ({
+  useSupabase: mockUseSupabase,
+}));
+
+const testUser = {
+  id: "user-1",
+  email: "user@example.com",
+  app_metadata: {},
+  user_metadata: {},
+  aud: "authenticated",
+  created_at: "2026-01-01T00:00:00.000Z",
+};
+
+describe("UserProfilePage", () => {
+  beforeEach(() => {
+    mockUseSupabase.mockReturnValue({
+      user: testUser,
+      isLoggedIn: true,
+      isUserLoading: false,
+      supabase: {},
+    });
+  });
+
+  test("renders the existing profile and saves changes", async () => {
+    let profile = {
+      name: "Original Name",
+      username: "original-user",
+      description: "Original bio",
+      links: ["https://example.com/1", "", "", "", ""],
+      avatar_url: "",
+      is_featured: false,
+    };
+
+    server.use(
+      getGetV2GetUserProfileMockHandler(() => profile),
+      getPostV2UpdateUserProfileMockHandler(async ({ request }) => {
+        profile = (await request.json()) as typeof profile;
+        return profile;
+      }),
+    );
+
+    render(<UserProfilePage />);
+
+    const displayName = await screen.findByLabelText("Display name");
+    const handle = screen.getByLabelText("Handle");
+    const bio = screen.getByLabelText("Bio");
+
+    expect((displayName as HTMLInputElement).value).toBe("Original Name");
+    expect((handle as HTMLInputElement).value).toBe("original-user");
+
+    fireEvent.change(displayName, { target: { value: "Updated Name" } });
+    fireEvent.change(handle, { target: { value: "updated-user" } });
+    fireEvent.change(bio, { target: { value: "Updated bio" } });
+    fireEvent.click(screen.getByRole("button", { name: "Save changes" }));
+
+    await waitFor(() => {
+      expect(profile.name).toBe("Updated Name");
+      expect(profile.username).toBe("updated-user");
+      expect(profile.description).toBe("Updated bio");
+    });
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/api-keys/__tests__/page.test.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/api-keys/__tests__/page.test.tsx
new file mode 100644
index 0000000000..404957e4c0
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/api-keys/__tests__/page.test.tsx
@@ -0,0 +1,138 @@
+import {
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from "@/tests/integrations/test-utils";
+import {
+  getDeleteV1RevokeApiKeyMockHandler,
+  getGetV1ListUserApiKeysMockHandler,
+  getPostV1CreateNewApiKeyMockHandler,
+} from "@/app/api/__generated__/endpoints/api-keys/api-keys.msw";
+import { APIKeyPermission } from "@/app/api/__generated__/models/aPIKeyPermission";
+import { APIKeyStatus } from "@/app/api/__generated__/models/aPIKeyStatus";
+import { server } from "@/mocks/mock-server";
+import ApiKeysPage from "../page";
+import { beforeEach, describe, expect, test } from "vitest";
+
+type ApiKeyRecord = {
+  id: string;
+  name: string;
+  head: string;
+  tail: string;
+  status: APIKeyStatus;
+};
+
+function toApiKeyResponse(key: ApiKeyRecord) {
+  return {
+    id: key.id,
+    user_id: "user-1",
+    scopes: [APIKeyPermission.EXECUTE_GRAPH],
+    type: "api_key" as const,
+    created_at: new Date("2026-01-01T00:00:00.000Z"),
+    expires_at: null,
+    last_used_at: null,
+    revoked_at: null,
+    name: key.name,
+    head: key.head,
+    tail: key.tail,
+    status: key.status,
+    description: null,
+  };
+}
+
+describe("ApiKeysPage", () => {
+  let apiKeys: ApiKeyRecord[];
+  let revokedKeyId: string;
+
+  beforeEach(() => {
+    apiKeys = [];
+    revokedKeyId = "";
+
+    server.use(
+      getGetV1ListUserApiKeysMockHandler(() =>
+        apiKeys.map((key) => toApiKeyResponse(key)),
+      ),
+      getPostV1CreateNewApiKeyMockHandler(async ({ request }) => {
+        const body = (await request.json()) as {
+          name: string;
+          description?: string;
+          permissions?: APIKeyPermission[];
+        };
+
+        const createdKey: ApiKeyRecord = {
+          id: `key-${apiKeys.length + 1}`,
+          name: body.name,
+          head: "head",
+          tail: "tail",
+          status: APIKeyStatus.ACTIVE,
+        };
+
+        apiKeys = [...apiKeys, createdKey];
+
+        return {
+          api_key: toApiKeyResponse(createdKey),
+          plain_text_key: "plain-text-key",
+        };
+      }),
+      getDeleteV1RevokeApiKeyMockHandler(({ params }) => {
+        const keyId = String(params.keyId);
+        const removedKey = apiKeys.find((key) => key.id === keyId);
+
+        revokedKeyId = keyId;
+        apiKeys = apiKeys.filter((key) => key.id !== keyId);
+
+        return toApiKeyResponse(
+          removedKey ?? {
+            id: keyId,
+            name: "Unknown key",
+            head: "head",
+            tail: "tail",
+            status: APIKeyStatus.REVOKED,
+          },
+        );
+      }),
+    );
+  });
+
+  test("creates a new API key", async () => {
+    render(<ApiKeysPage />);
+
+    fireEvent.click(await screen.findByText("Create Key"));
+    fireEvent.change(screen.getByLabelText("Name"), {
+      target: { value: "CLI Key" },
+    });
+    fireEvent.click(screen.getByText("Create"));
+
+    expect(
+      await screen.findByText("AutoGPT Platform API Key Created"),
+    ).toBeDefined();
+
+    await waitFor(() => {
+      expect(apiKeys[0]?.name).toBe("CLI Key");
+    });
+  });
+
+  test("revokes an existing API key", async () => {
+    apiKeys = [
+      {
+        id: "key-1",
+        name: "Existing Key",
+        head: "head",
+        tail: "tail",
+        status: APIKeyStatus.ACTIVE,
+      },
+    ];
+
+    render(<ApiKeysPage />);
+
+    expect(await screen.findByText("Existing Key")).toBeDefined();
+
+    fireEvent.pointerDown(screen.getByTestId("api-key-actions"));
+    fireEvent.click(await screen.findByRole("menuitem", { name: "Revoke" }));
+
+    await waitFor(() => {
+      expect(revokedKeyId).toBe("key-1");
+    });
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/dashboard/components/AgentTableRow/__tests__/AgentTableRow.test.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/dashboard/components/AgentTableRow/__tests__/AgentTableRow.test.tsx
new file mode 100644
index 0000000000..04e1d4ad1e
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/dashboard/components/AgentTableRow/__tests__/AgentTableRow.test.tsx
@@ -0,0 +1,76 @@
+import { render, screen, fireEvent } from "@testing-library/react";
+import { getGetV2ListMySubmissionsResponseMock } from "@/app/api/__generated__/endpoints/store/store.msw";
+import { SubmissionStatus } from "@/app/api/__generated__/models/submissionStatus";
+import { AgentTableRow } from "../AgentTableRow";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+function makeSubmission(status: SubmissionStatus) {
+  const submission = getGetV2ListMySubmissionsResponseMock().submissions[0];
+
+  return {
+    ...submission,
+    graph_id: "graph-1",
+    graph_version: 7,
+    listing_version_id: `listing-${status.toLowerCase()}`,
+    name: `Agent ${status}`,
+    description: `Description ${status}`,
+    status,
+    image_urls: [],
+    submitted_at: new Date("2026-01-01T00:00:00.000Z"),
+  };
+}
+
+describe("AgentTableRow", () => {
+  const onViewSubmission = vi.fn();
+  const onDeleteSubmission = vi.fn();
+  const onEditSubmission = vi.fn();
+
+  beforeEach(() => {
+    onViewSubmission.mockReset();
+    onDeleteSubmission.mockReset();
+    onEditSubmission.mockReset();
+  });
+
+  test("shows edit and delete actions for pending submissions", async () => {
+    render(
+      <AgentTableRow
+        storeAgentSubmission={makeSubmission(SubmissionStatus.PENDING)}
+        onViewSubmission={onViewSubmission}
+        onDeleteSubmission={onDeleteSubmission}
+        onEditSubmission={onEditSubmission}
+      />,
+    );
+
+    fireEvent.pointerDown(screen.getByTestId("agent-table-row-actions"));
+
+    fireEvent.click(await screen.findByText("Edit"));
+    expect(onEditSubmission).toHaveBeenCalledTimes(1);
+
+    fireEvent.pointerDown(screen.getByTestId("agent-table-row-actions"));
+    fireEvent.click(await screen.findByText("Delete"));
+    expect(onDeleteSubmission).toHaveBeenCalledWith("listing-pending");
+    expect(onViewSubmission).not.toHaveBeenCalled();
+  });
+
+  test("shows view only for non-pending submissions", async () => {
+    const approvedSubmission = makeSubmission(SubmissionStatus.APPROVED);
+
+    render(
+      <AgentTableRow
+        storeAgentSubmission={approvedSubmission}
+        onViewSubmission={onViewSubmission}
+        onDeleteSubmission={onDeleteSubmission}
+        onEditSubmission={onEditSubmission}
+      />,
+    );
+
+    fireEvent.pointerDown(screen.getByTestId("agent-table-row-actions"));
+
+    const viewAction = await screen.findByText("View");
+    fireEvent.click(viewAction);
+
+    expect(onViewSubmission).toHaveBeenCalledWith(approvedSubmission);
+    expect(screen.queryByText("Edit")).toBeNull();
+    expect(screen.queryByText("Delete")).toBeNull();
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/__tests__/page.test.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/__tests__/page.test.tsx
new file mode 100644
index 0000000000..75c706dbcb
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/__tests__/page.test.tsx
@@ -0,0 +1,147 @@
+import type { ReactNode } from "react";
+import {
+  render,
+  screen,
+  fireEvent,
+  waitFor,
+} from "@/tests/integrations/test-utils";
+import {
+  getGetV1GetNotificationPreferencesMockHandler,
+  getGetV1GetUserTimezoneMockHandler,
+  getPostV1UpdateNotificationPreferencesMockHandler,
+  getPostV1UpdateUserEmailMockHandler,
+} from "@/app/api/__generated__/endpoints/auth/auth.msw";
+import { server } from "@/mocks/mock-server";
+import SettingsPage from "../page";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mockUseSupabase = vi.hoisted(() => vi.fn());
+
+vi.mock("@/providers/onboarding/onboarding-provider", () => ({
+  default: ({ children }: { children: ReactNode }) => <>{children}</>,
+}));
+
+vi.mock("@/lib/supabase/hooks/useSupabase", () => ({
+  useSupabase: mockUseSupabase,
+}));
+
+const testUser = {
+  id: "user-1",
+  email: "user@example.com",
+  app_metadata: {},
+  user_metadata: {},
+  aud: "authenticated",
+  created_at: "2026-01-01T00:00:00.000Z",
+};
+
+describe("SettingsPage", () => {
+  beforeEach(() => {
+    mockUseSupabase.mockReturnValue({
+      user: testUser,
+      isLoggedIn: true,
+      isUserLoading: false,
+      supabase: {},
+    });
+  });
+
+  test("renders the account actions", async () => {
+    server.use(
+      getGetV1GetNotificationPreferencesMockHandler({
+        user_id: "user-1",
+        email: "user@example.com",
+        preferences: {
+          AGENT_RUN: true,
+          ZERO_BALANCE: false,
+          LOW_BALANCE: false,
+          BLOCK_EXECUTION_FAILED: true,
+          CONTINUOUS_AGENT_ERROR: false,
+          DAILY_SUMMARY: false,
+          WEEKLY_SUMMARY: true,
+          MONTHLY_SUMMARY: false,
+          AGENT_APPROVED: true,
+          AGENT_REJECTED: true,
+        },
+        daily_limit: 0,
+        emails_sent_today: 0,
+        last_reset_date: new Date("2026-01-01T00:00:00.000Z"),
+      }),
+      getGetV1GetUserTimezoneMockHandler({ timezone: "Asia/Kolkata" }),
+      getPostV1UpdateUserEmailMockHandler({}),
+      getPostV1UpdateNotificationPreferencesMockHandler({
+        user_id: "user-1",
+        email: "user@example.com",
+        preferences: {},
+        daily_limit: 0,
+        emails_sent_today: 0,
+        last_reset_date: new Date("2026-01-01T00:00:00.000Z"),
+      }),
+    );
+
+    render(<SettingsPage />);
+
+    const emailInput = await screen.findByLabelText("Email");
+    expect((emailInput as HTMLInputElement).value).toBe("user@example.com");
+    expect(
+      screen.getByRole("link", { name: "Reset password" }).getAttribute("href"),
+    ).toBe("/reset-password");
+  });
+
+  test("saves notification preference changes", async () => {
+    let submittedPreferences:
+      | {
+          email: string;
+          preferences: Record<string, boolean>;
+        }
+      | undefined;
+
+    server.use(
+      getGetV1GetNotificationPreferencesMockHandler({
+        user_id: "user-1",
+        email: "user@example.com",
+        preferences: {
+          AGENT_RUN: false,
+          ZERO_BALANCE: false,
+          LOW_BALANCE: false,
+          BLOCK_EXECUTION_FAILED: false,
+          CONTINUOUS_AGENT_ERROR: false,
+          DAILY_SUMMARY: false,
+          WEEKLY_SUMMARY: false,
+          MONTHLY_SUMMARY: false,
+          AGENT_APPROVED: false,
+          AGENT_REJECTED: false,
+        },
+        daily_limit: 0,
+        emails_sent_today: 0,
+        last_reset_date: new Date("2026-01-01T00:00:00.000Z"),
+      }),
+      getGetV1GetUserTimezoneMockHandler({ timezone: "Asia/Kolkata" }),
+      getPostV1UpdateUserEmailMockHandler({}),
+      getPostV1UpdateNotificationPreferencesMockHandler(async ({ request }) => {
+        submittedPreferences = (await request.json()) as {
+          email: string;
+          preferences: Record<string, boolean>;
+        };
+
+        return {
+          user_id: "user-1",
+          email: submittedPreferences.email,
+          preferences: submittedPreferences.preferences,
+          daily_limit: 0,
+          emails_sent_today: 0,
+          last_reset_date: new Date("2026-01-01T00:00:00.000Z"),
+        };
+      }),
+    );
+
+    render(<SettingsPage />);
+
+    fireEvent.click(
+      await screen.findByRole("switch", { name: "Agent Run Notifications" }),
+    );
+    fireEvent.click(screen.getByRole("button", { name: "Save preferences" }));
+
+    await waitFor(() => {
+      expect(submittedPreferences?.preferences.AGENT_RUN).toBe(true);
+    });
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/EmailForm/__tests__/EmailForm.test.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/EmailForm/__tests__/EmailForm.test.tsx
new file mode 100644
index 0000000000..fb7e4d397a
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/EmailForm/__tests__/EmailForm.test.tsx
@@ -0,0 +1,97 @@
+import {
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from "@/tests/integrations/test-utils";
+import type { ReactNode } from "react";
+import type { User } from "@supabase/supabase-js";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { EmailForm } from "../EmailForm";
+
+const mockToast = vi.hoisted(() => vi.fn());
+const mockMutateAsync = vi.hoisted(() => vi.fn());
+
+vi.mock("@/components/molecules/Toast/use-toast", () => ({
+  useToast: () => ({ toast: mockToast }),
+}));
+
+vi.mock("@/app/api/__generated__/endpoints/auth/auth", () => ({
+  usePostV1UpdateUserEmail: () => ({
+    mutateAsync: mockMutateAsync,
+    isPending: false,
+  }),
+}));
+
+vi.mock("@/providers/onboarding/onboarding-provider", () => ({
+  default: ({ children }: { children: ReactNode }) => <>{children}</>,
+}));
+
+const testUser = {
+  id: "user-1",
+  email: "user@example.com",
+  app_metadata: {},
+  user_metadata: {},
+  aud: "authenticated",
+  created_at: "2026-01-01T00:00:00.000Z",
+} as User;
+
+describe("EmailForm", () => {
+  beforeEach(() => {
+    mockToast.mockReset();
+    mockMutateAsync.mockReset();
+    mockMutateAsync.mockResolvedValue({});
+  });
+
+  afterEach(() => {
+    vi.unstubAllGlobals();
+  });
+
+  test("submits a changed email to both update endpoints", async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      json: async () => ({}),
+    });
+
+    vi.stubGlobal("fetch", fetchMock);
+
+    render(<EmailForm user={testUser} />);
+
+    fireEvent.change(screen.getByLabelText("Email"), {
+      target: { value: "updated@example.com" },
+    });
+    fireEvent.click(screen.getByRole("button", { name: "Update email" }));
+
+    await waitFor(() => {
+      expect(fetchMock).toHaveBeenCalledWith("/api/auth/user", {
+        method: "PUT",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({ email: "updated@example.com" }),
+      });
+    });
+    await waitFor(() => {
+      expect(mockMutateAsync).toHaveBeenCalledWith({
+        data: "updated@example.com",
+      });
+    });
+    expect(mockToast).toHaveBeenCalledWith(
+      expect.objectContaining({
+        title: "Successfully updated email",
+      }),
+    );
+  });
+
+  test("keeps submit disabled when the email has not changed", () => {
+    render(<EmailForm user={testUser} />);
+
+    expect(
+      (
+        screen.getByRole("button", {
+          name: "Update email",
+        }) as HTMLButtonElement
+      ).disabled,
+    ).toBe(true);
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/NotificationForm/NotificationForm.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/NotificationForm/NotificationForm.tsx
index 38473234ab..8b85488cf5 100644
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/NotificationForm/NotificationForm.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/settings/components/SettingsForm/components/NotificationForm/NotificationForm.tsx
@@ -55,6 +55,7 @@ export function NotificationForm({ preferences, user }: NotificationFormProps) {
                   </div>
                   <FormControl>
                     <Switch
+                      aria-label="Agent Run Notifications"
                       checked={field.value}
                       onCheckedChange={field.onChange}
                     />
diff --git a/autogpt_platform/frontend/src/app/(platform)/signup/__tests__/page.test.tsx b/autogpt_platform/frontend/src/app/(platform)/signup/__tests__/page.test.tsx
new file mode 100644
index 0000000000..4ac1e3dc50
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/(platform)/signup/__tests__/page.test.tsx
@@ -0,0 +1,73 @@
+import type { ReactNode } from "react";
+import {
+  render,
+  screen,
+  fireEvent,
+  waitFor,
+} from "@/tests/integrations/test-utils";
+import SignupPage from "../page";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mockUseSupabase = vi.hoisted(() => vi.fn());
+const mockSignupAction = vi.hoisted(() => vi.fn());
+
+vi.mock("@/providers/onboarding/onboarding-provider", () => ({
+  default: ({ children }: { children: ReactNode }) => <>{children}</>,
+}));
+
+vi.mock("@/lib/supabase/hooks/useSupabase", () => ({
+  useSupabase: mockUseSupabase,
+}));
+
+vi.mock("../actions", () => ({
+  signup: mockSignupAction,
+}));
+
+describe("SignupPage", () => {
+  beforeEach(() => {
+    mockUseSupabase.mockReturnValue({
+      supabase: {},
+      user: null,
+      isUserLoading: false,
+      isLoggedIn: false,
+    });
+    mockSignupAction.mockReset();
+  });
+
+  test("shows existing user feedback from signup action", async () => {
+    mockSignupAction.mockResolvedValue({
+      success: false,
+      error: "user_already_exists",
+    });
+
+    render(<SignupPage />);
+
+    fireEvent.change(screen.getByLabelText("Email"), {
+      target: { value: "existing@example.com" },
+    });
+    fireEvent.change(screen.getByLabelText("Password", { selector: "input" }), {
+      target: { value: "validpassword123" },
+    });
+    fireEvent.change(
+      screen.getByLabelText("Confirm Password", { selector: "input" }),
+      {
+        target: { value: "validpassword123" },
+      },
+    );
+    fireEvent.click(screen.getByRole("checkbox"));
+    fireEvent.click(screen.getByRole("button", { name: "Sign up" }));
+
+    await waitFor(() => {
+      expect(mockSignupAction).toHaveBeenCalledWith(
+        "existing@example.com",
+        "validpassword123",
+        "validpassword123",
+        true,
+      );
+    });
+
+    expect(
+      await screen.findByText("User with this email already exists"),
+    ).toBeDefined();
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/api/__generated__/models/blockOutputResponse.ts b/autogpt_platform/frontend/src/app/api/__generated__/models/blockOutputResponse.ts
deleted file mode 100644
index a25b1a04d3..0000000000
--- a/autogpt_platform/frontend/src/app/api/__generated__/models/blockOutputResponse.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Generated by orval v7.13.0 🍺
- * Do not edit manually.
- * AutoGPT Agent Server
- * This server is used to execute agents that are created by the AutoGPT system.
- * OpenAPI spec version: 0.1
- */
-import type { ResponseType } from "./responseType";
-import type { BlockOutputResponseSessionId } from "./blockOutputResponseSessionId";
-import type { BlockOutputResponseOutputs } from "./blockOutputResponseOutputs";
-import type { BlockOutputResponseIsDryRun } from "./blockOutputResponseIsDryRun";
-
-/**
- * Response for run_block tool.
- */
-export interface BlockOutputResponse {
-  type?: ResponseType;
-  message: string;
-  session_id?: BlockOutputResponseSessionId;
-  block_id: string;
-  block_name: string;
-  outputs: BlockOutputResponseOutputs;
-  success?: boolean;
-  is_dry_run?: BlockOutputResponseIsDryRun;
-}
diff --git a/autogpt_platform/frontend/src/app/api/__generated__/models/graphExecutionMeta.ts b/autogpt_platform/frontend/src/app/api/__generated__/models/graphExecutionMeta.ts
deleted file mode 100644
index c8bf7115ce..0000000000
--- a/autogpt_platform/frontend/src/app/api/__generated__/models/graphExecutionMeta.ts
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Generated by orval v7.13.0 🍺
- * Do not edit manually.
- * AutoGPT Agent Server
- * This server is used to execute agents that are created by the AutoGPT system.
- * OpenAPI spec version: 0.1
- */
-import type { GraphExecutionMetaInputs } from "./graphExecutionMetaInputs";
-import type { GraphExecutionMetaCredentialInputs } from "./graphExecutionMetaCredentialInputs";
-import type { GraphExecutionMetaNodesInputMasks } from "./graphExecutionMetaNodesInputMasks";
-import type { GraphExecutionMetaPresetId } from "./graphExecutionMetaPresetId";
-import type { AgentExecutionStatus } from "./agentExecutionStatus";
-import type { GraphExecutionMetaStartedAt } from "./graphExecutionMetaStartedAt";
-import type { GraphExecutionMetaEndedAt } from "./graphExecutionMetaEndedAt";
-import type { GraphExecutionMetaShareToken } from "./graphExecutionMetaShareToken";
-import type { GraphExecutionMetaStats } from "./graphExecutionMetaStats";
-
-export interface GraphExecutionMeta {
-  id: string;
-  user_id: string;
-  graph_id: string;
-  graph_version: number;
-  inputs: GraphExecutionMetaInputs;
-  credential_inputs: GraphExecutionMetaCredentialInputs;
-  nodes_input_masks: GraphExecutionMetaNodesInputMasks;
-  preset_id: GraphExecutionMetaPresetId;
-  status: AgentExecutionStatus;
-  /** When execution started running. Null if not yet started (QUEUED). */
-  started_at?: GraphExecutionMetaStartedAt;
-  /** When execution finished. Null if not yet completed (QUEUED, RUNNING, INCOMPLETE, REVIEW). */
-  ended_at?: GraphExecutionMetaEndedAt;
-  is_shared?: boolean;
-  share_token?: GraphExecutionMetaShareToken;
-  is_dry_run?: boolean;
-  stats: GraphExecutionMetaStats;
-}
diff --git a/autogpt_platform/frontend/src/app/api/__generated__/models/suggestedPromptsResponse.ts b/autogpt_platform/frontend/src/app/api/__generated__/models/suggestedPromptsResponse.ts
deleted file mode 100644
index 9f8b44c585..0000000000
--- a/autogpt_platform/frontend/src/app/api/__generated__/models/suggestedPromptsResponse.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-/**
- * Generated by orval v7.13.0 🍺
- * Do not edit manually.
- * AutoGPT Agent Server
- * This server is used to execute agents that are created by the AutoGPT system.
- * OpenAPI spec version: 0.1
- */
-import type { SuggestedTheme } from "./suggestedTheme";
-
-/**
- * Response model for user-specific suggested prompts grouped by theme.
- */
-export interface SuggestedPromptsResponse {
-  themes: SuggestedTheme[];
-}
diff --git a/autogpt_platform/frontend/src/app/api/__generated__/models/suggestedTheme.ts b/autogpt_platform/frontend/src/app/api/__generated__/models/suggestedTheme.ts
deleted file mode 100644
index 5fec92e394..0000000000
--- a/autogpt_platform/frontend/src/app/api/__generated__/models/suggestedTheme.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-/**
- * Generated by orval v7.13.0 🍺
- * Do not edit manually.
- * AutoGPT Agent Server
- * This server is used to execute agents that are created by the AutoGPT system.
- * OpenAPI spec version: 0.1
- */
-
-/**
- * A themed group of suggested prompts.
- */
-export interface SuggestedTheme {
-  name: string;
-  prompts: string[];
-}
diff --git a/autogpt_platform/frontend/src/app/api/openapi.json b/autogpt_platform/frontend/src/app/api/openapi.json
index 43f14a13fd..732ef569d9 100644
--- a/autogpt_platform/frontend/src/app/api/openapi.json
+++ b/autogpt_platform/frontend/src/app/api/openapi.json
@@ -9123,6 +9123,15 @@
         ],
         "title": "ContentType"
       },
+      "CostBucket": {
+        "properties": {
+          "bucket": { "type": "string", "title": "Bucket" },
+          "count": { "type": "integer", "title": "Count" }
+        },
+        "type": "object",
+        "required": ["bucket", "count"],
+        "title": "CostBucket"
+      },
       "CostLogRow": {
         "properties": {
           "id": { "type": "string", "title": "Id" },
@@ -12141,7 +12150,58 @@
             "title": "Total Cost Microdollars"
           },
           "total_requests": { "type": "integer", "title": "Total Requests" },
-          "total_users": { "type": "integer", "title": "Total Users" }
+          "total_users": { "type": "integer", "title": "Total Users" },
+          "total_input_tokens": {
+            "type": "integer",
+            "title": "Total Input Tokens",
+            "default": 0
+          },
+          "total_output_tokens": {
+            "type": "integer",
+            "title": "Total Output Tokens",
+            "default": 0
+          },
+          "avg_input_tokens_per_request": {
+            "type": "number",
+            "title": "Avg Input Tokens Per Request",
+            "default": 0.0
+          },
+          "avg_output_tokens_per_request": {
+            "type": "number",
+            "title": "Avg Output Tokens Per Request",
+            "default": 0.0
+          },
+          "avg_cost_microdollars_per_request": {
+            "type": "number",
+            "title": "Avg Cost Microdollars Per Request",
+            "default": 0.0
+          },
+          "cost_p50_microdollars": {
+            "type": "number",
+            "title": "Cost P50 Microdollars",
+            "default": 0.0
+          },
+          "cost_p75_microdollars": {
+            "type": "number",
+            "title": "Cost P75 Microdollars",
+            "default": 0.0
+          },
+          "cost_p95_microdollars": {
+            "type": "number",
+            "title": "Cost P95 Microdollars",
+            "default": 0.0
+          },
+          "cost_p99_microdollars": {
+            "type": "number",
+            "title": "Cost P99 Microdollars",
+            "default": 0.0
+          },
+          "cost_buckets": {
+            "items": { "$ref": "#/components/schemas/CostBucket" },
+            "type": "array",
+            "title": "Cost Buckets",
+            "default": []
+          }
         },
         "type": "object",
         "required": [
@@ -15585,7 +15645,12 @@
             "title": "Total Cache Creation Tokens",
             "default": 0
           },
-          "request_count": { "type": "integer", "title": "Request Count" }
+          "request_count": { "type": "integer", "title": "Request Count" },
+          "cost_bearing_request_count": {
+            "type": "integer",
+            "title": "Cost Bearing Request Count",
+            "default": 0
+          }
         },
         "type": "object",
         "required": [
diff --git a/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.helpers.test.ts b/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.helpers.test.ts
new file mode 100644
index 0000000000..c5f8f6d9f9
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.helpers.test.ts
@@ -0,0 +1,282 @@
+import { describe, expect, it, vi, beforeEach, afterEach } from "vitest";
+import {
+  isWorkspaceDownloadRequest,
+  isRedirectStatus,
+  isTransientWorkspaceDownloadStatus,
+  getWorkspaceDownloadErrorMessage,
+  fetchWorkspaceDownloadOnce,
+  fetchWorkspaceDownloadWithRetry,
+} from "./route.helpers";
+
+describe("isWorkspaceDownloadRequest", () => {
+  it("matches api/workspace/files/{id}/download pattern", () => {
+    expect(
+      isWorkspaceDownloadRequest([
+        "api",
+        "workspace",
+        "files",
+        "abc-123",
+        "download",
+      ]),
+    ).toBe(true);
+  });
+
+  it("rejects paths with wrong segment count", () => {
+    expect(
+      isWorkspaceDownloadRequest(["api", "workspace", "files", "download"]),
+    ).toBe(false);
+    expect(
+      isWorkspaceDownloadRequest([
+        "api",
+        "workspace",
+        "files",
+        "id",
+        "download",
+        "extra",
+      ]),
+    ).toBe(false);
+  });
+
+  it("rejects paths with wrong prefix", () => {
+    expect(
+      isWorkspaceDownloadRequest([
+        "v1",
+        "workspace",
+        "files",
+        "id",
+        "download",
+      ]),
+    ).toBe(false);
+  });
+
+  it("rejects paths not ending with download", () => {
+    expect(
+      isWorkspaceDownloadRequest([
+        "api",
+        "workspace",
+        "files",
+        "id",
+        "metadata",
+      ]),
+    ).toBe(false);
+  });
+});
+
+describe("isRedirectStatus", () => {
+  it.each([301, 302, 303, 307, 308])("returns true for %d", (status) => {
+    expect(isRedirectStatus(status)).toBe(true);
+  });
+
+  it.each([200, 304, 400, 404, 500])("returns false for %d", (status) => {
+    expect(isRedirectStatus(status)).toBe(false);
+  });
+});
+
+describe("isTransientWorkspaceDownloadStatus", () => {
+  it.each([408, 429, 500, 502, 503, 504])(
+    "returns true for transient %d",
+    (status) => {
+      expect(isTransientWorkspaceDownloadStatus(status)).toBe(true);
+    },
+  );
+
+  it.each([400, 401, 403, 404, 405])(
+    "returns false for non-transient %d",
+    (status) => {
+      expect(isTransientWorkspaceDownloadStatus(status)).toBe(false);
+    },
+  );
+});
+
+describe("getWorkspaceDownloadErrorMessage", () => {
+  it("extracts detail string from object", () => {
+    expect(getWorkspaceDownloadErrorMessage({ detail: "Not found" })).toBe(
+      "Not found",
+    );
+  });
+
+  it("extracts error string from object", () => {
+    expect(getWorkspaceDownloadErrorMessage({ error: "Server error" })).toBe(
+      "Server error",
+    );
+  });
+
+  it("extracts nested detail.message", () => {
+    expect(
+      getWorkspaceDownloadErrorMessage({
+        detail: { message: "Nested error" },
+      }),
+    ).toBe("Nested error");
+  });
+
+  it("returns trimmed string body", () => {
+    expect(getWorkspaceDownloadErrorMessage("  error text  ")).toBe(
+      "error text",
+    );
+  });
+
+  it("returns null for empty string", () => {
+    expect(getWorkspaceDownloadErrorMessage("")).toBeNull();
+  });
+
+  it("returns null for whitespace-only string", () => {
+    expect(getWorkspaceDownloadErrorMessage("   ")).toBeNull();
+  });
+
+  it("returns null for null/undefined", () => {
+    expect(getWorkspaceDownloadErrorMessage(null)).toBeNull();
+    expect(getWorkspaceDownloadErrorMessage(undefined)).toBeNull();
+  });
+
+  it("returns null for object with empty detail", () => {
+    expect(getWorkspaceDownloadErrorMessage({ detail: "" })).toBeNull();
+  });
+
+  it("returns null for object with no recognized keys", () => {
+    expect(getWorkspaceDownloadErrorMessage({ foo: "bar" })).toBeNull();
+  });
+
+  it("prefers detail over error", () => {
+    expect(
+      getWorkspaceDownloadErrorMessage({
+        detail: "detail msg",
+        error: "error msg",
+      }),
+    ).toBe("detail msg");
+  });
+});
+
+describe("fetchWorkspaceDownloadOnce", () => {
+  beforeEach(() => {
+    vi.stubGlobal("fetch", vi.fn());
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.unstubAllGlobals();
+  });
+
+  it("returns response directly for non-redirect status", async () => {
+    const mockResponse = { ok: true, status: 200, headers: new Headers() };
+    vi.mocked(fetch).mockResolvedValue(mockResponse as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadOnce("https://backend/file", {});
+    expect(result).toBe(mockResponse);
+    expect(fetch).toHaveBeenCalledOnce();
+  });
+
+  it("follows redirect when Location header is present", async () => {
+    const redirectResponse = {
+      ok: false,
+      status: 302,
+      headers: new Headers({ Location: "https://storage.example.com/file" }),
+    };
+    const finalResponse = { ok: true, status: 200, headers: new Headers() };
+    vi.mocked(fetch)
+      .mockResolvedValueOnce(redirectResponse as unknown as Response)
+      .mockResolvedValueOnce(finalResponse as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadOnce("https://backend/file", {
+      Authorization: "Bearer token",
+    });
+    expect(result).toBe(finalResponse);
+    expect(fetch).toHaveBeenCalledTimes(2);
+    expect(fetch).toHaveBeenNthCalledWith(
+      2,
+      "https://storage.example.com/file",
+      { method: "GET", redirect: "follow" },
+    );
+  });
+
+  it("returns redirect response when Location header is missing", async () => {
+    const redirectResponse = {
+      ok: false,
+      status: 307,
+      headers: new Headers(),
+    };
+    vi.mocked(fetch).mockResolvedValue(redirectResponse as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadOnce("https://backend/file", {});
+    expect(result).toBe(redirectResponse);
+    expect(fetch).toHaveBeenCalledOnce();
+  });
+});
+
+describe("fetchWorkspaceDownloadWithRetry", () => {
+  beforeEach(() => {
+    vi.stubGlobal("fetch", vi.fn());
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.unstubAllGlobals();
+  });
+
+  it("returns immediately on success", async () => {
+    const okResponse = { ok: true, status: 200, headers: new Headers() };
+    vi.mocked(fetch).mockResolvedValue(okResponse as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadWithRetry(
+      "https://backend/file",
+      {},
+      2,
+      0,
+    );
+    expect(result).toBe(okResponse);
+    expect(fetch).toHaveBeenCalledOnce();
+  });
+
+  it("returns immediately on non-transient error without retrying", async () => {
+    const notFound = { ok: false, status: 404, headers: new Headers() };
+    vi.mocked(fetch).mockResolvedValue(notFound as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadWithRetry(
+      "https://backend/file",
+      {},
+      2,
+      0,
+    );
+    expect(result.status).toBe(404);
+    expect(fetch).toHaveBeenCalledOnce();
+  });
+
+  it("retries on transient 502 and succeeds", async () => {
+    const bad = { ok: false, status: 502, headers: new Headers() };
+    const ok = { ok: true, status: 200, headers: new Headers() };
+    vi.mocked(fetch)
+      .mockResolvedValueOnce(bad as unknown as Response)
+      .mockResolvedValueOnce(ok as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadWithRetry(
+      "https://backend/file",
+      {},
+      2,
+      0,
+    );
+    expect(result).toBe(ok);
+    expect(fetch).toHaveBeenCalledTimes(2);
+  });
+
+  it("returns last transient response after exhausting retries", async () => {
+    const bad = { ok: false, status: 503, headers: new Headers() };
+    vi.mocked(fetch).mockResolvedValue(bad as unknown as Response);
+
+    const result = await fetchWorkspaceDownloadWithRetry(
+      "https://backend/file",
+      {},
+      2,
+      0,
+    );
+    expect(result.status).toBe(503);
+    expect(fetch).toHaveBeenCalledTimes(3);
+  });
+
+  it("retries on network error and throws after exhausting retries", async () => {
+    vi.mocked(fetch).mockRejectedValue(new Error("Connection reset"));
+
+    await expect(
+      fetchWorkspaceDownloadWithRetry("https://backend/file", {}, 1, 0),
+    ).rejects.toThrow("Connection reset");
+    expect(fetch).toHaveBeenCalledTimes(2);
+  });
+});
diff --git a/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.helpers.ts b/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.helpers.ts
new file mode 100644
index 0000000000..cd83c7274d
--- /dev/null
+++ b/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.helpers.ts
@@ -0,0 +1,108 @@
+export function isWorkspaceDownloadRequest(path: string[]): boolean {
+  return (
+    path.length == 5 &&
+    path[0] === "api" &&
+    path[1] === "workspace" &&
+    path[2] === "files" &&
+    path[path.length - 1] === "download"
+  );
+}
+
+export function isRedirectStatus(status: number): boolean {
+  return [301, 302, 303, 307, 308].includes(status);
+}
+
+export function isTransientWorkspaceDownloadStatus(status: number): boolean {
+  return status === 408 || status === 429 || status >= 500;
+}
+
+export function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+export async function fetchWorkspaceDownloadOnce(
+  backendUrl: string,
+  headers: Record<string, string>,
+): Promise<Response> {
+  const backendResponse = await fetch(backendUrl, {
+    method: "GET",
+    headers,
+    redirect: "manual",
+  });
+
+  if (!isRedirectStatus(backendResponse.status)) {
+    return backendResponse;
+  }
+
+  const location = backendResponse.headers.get("Location");
+  if (!location) return backendResponse;
+
+  return await fetch(location, {
+    method: "GET",
+    redirect: "follow",
+  });
+}
+
+export async function fetchWorkspaceDownloadWithRetry(
+  backendUrl: string,
+  headers: Record<string, string>,
+  maxRetries: number,
+  retryDelayMs: number,
+): Promise<Response> {
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    try {
+      const response = await fetchWorkspaceDownloadOnce(backendUrl, headers);
+      if (
+        response.ok ||
+        !isTransientWorkspaceDownloadStatus(response.status) ||
+        attempt === maxRetries
+      ) {
+        return response;
+      }
+    } catch (error) {
+      if (attempt === maxRetries) throw error;
+    }
+
+    await sleep(retryDelayMs);
+  }
+
+  throw new Error("Workspace download failed after retries");
+}
+
+export function getWorkspaceDownloadErrorMessage(body: unknown): string | null {
+  if (typeof body === "string") {
+    const trimmed = body.trim();
+    return trimmed || null;
+  }
+
+  if (!body || typeof body !== "object") return null;
+
+  if (
+    "detail" in body &&
+    typeof body.detail === "string" &&
+    body.detail.trim().length > 0
+  ) {
+    return body.detail.trim();
+  }
+
+  if (
+    "error" in body &&
+    typeof body.error === "string" &&
+    body.error.trim().length > 0
+  ) {
+    return body.error.trim();
+  }
+
+  if (
+    "detail" in body &&
+    body.detail &&
+    typeof body.detail === "object" &&
+    "message" in body.detail &&
+    typeof body.detail.message === "string" &&
+    body.detail.message.trim().length > 0
+  ) {
+    return body.detail.message.trim();
+  }
+
+  return null;
+}
diff --git a/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.ts b/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.ts
index 74f36d8d0a..605d4cf16c 100644
--- a/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.ts
+++ b/autogpt_platform/frontend/src/app/api/proxy/[...path]/route.ts
@@ -11,25 +11,17 @@ import { NextRequest, NextResponse } from "next/server";
 export const maxDuration = 300; // 5 minutes timeout for large uploads
 export const dynamic = "force-dynamic";
 
+import {
+  fetchWorkspaceDownloadWithRetry,
+  getWorkspaceDownloadErrorMessage,
+  isWorkspaceDownloadRequest,
+} from "./route.helpers";
+
 function buildBackendUrl(path: string[], queryString: string): string {
   const backendPath = path.join("/");
   return `${environment.getAGPTServerBaseUrl()}/${backendPath}${queryString}`;
 }
 
-/**
- * Check if this is a workspace file download request that needs binary response handling.
- */
-function isWorkspaceDownloadRequest(path: string[]): boolean {
-  // Match pattern: api/workspace/files/{id}/download (5 segments)
-  return (
-    path.length == 5 &&
-    path[0] === "api" &&
-    path[1] === "workspace" &&
-    path[2] === "files" &&
-    path[path.length - 1] === "download"
-  );
-}
-
 /**
  * Handle workspace file download requests with proper binary response streaming.
  */
@@ -44,17 +36,15 @@ async function handleWorkspaceDownload(
     headers["Authorization"] = `Bearer ${token}`;
   }
 
-  const response = await fetch(backendUrl, {
-    method: "GET",
+  const response = await fetchWorkspaceDownloadWithRetry(
+    backendUrl,
     headers,
-    redirect: "follow", // Follow redirects to signed URLs
-  });
+    2,
+    500,
+  );
 
   if (!response.ok) {
-    return NextResponse.json(
-      { error: `Failed to download file: ${response.statusText}` },
-      { status: response.status },
-    );
+    return await createWorkspaceDownloadErrorResponse(response);
   }
 
   // Fully buffer the response before forwarding.  Passing response.body as a
@@ -81,6 +71,34 @@ async function handleWorkspaceDownload(
   });
 }
 
+async function createWorkspaceDownloadErrorResponse(
+  response: Response,
+): Promise<NextResponse> {
+  const contentType = response.headers.get("Content-Type")?.toLowerCase() ?? "";
+
+  try {
+    if (contentType.includes("application/json")) {
+      const body = await response.json();
+      return NextResponse.json(body, { status: response.status });
+    }
+
+    const text = await response.text();
+    const detail =
+      getWorkspaceDownloadErrorMessage(text) ||
+      response.statusText ||
+      "Failed to download file";
+
+    return NextResponse.json({ detail }, { status: response.status });
+  } catch {
+    return NextResponse.json(
+      {
+        detail: response.statusText || "Failed to download file",
+      },
+      { status: response.status },
+    );
+  }
+}
+
 async function handleJsonRequest(
   req: NextRequest,
   method: string,
diff --git a/autogpt_platform/frontend/src/components/__legacy__/__tests__/ProfileInfoForm.test.tsx b/autogpt_platform/frontend/src/components/__legacy__/__tests__/ProfileInfoForm.test.tsx
new file mode 100644
index 0000000000..3ee732912c
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/__legacy__/__tests__/ProfileInfoForm.test.tsx
@@ -0,0 +1,94 @@
+import { describe, expect, it } from "vitest";
+import {
+  fireEvent,
+  render,
+  screen,
+  waitFor,
+} from "@/tests/integrations/test-utils";
+import {
+  getPostV2UpdateUserProfileMockHandler200,
+  getPostV2UpdateUserProfileMockHandler422,
+  getPostV2UpdateUserProfileResponseMock422,
+} from "@/app/api/__generated__/endpoints/store/store.msw";
+import { server } from "@/mocks/mock-server";
+import type { ProfileDetails } from "@/app/api/__generated__/models/profileDetails";
+import { ProfileInfoForm } from "../ProfileInfoForm";
+
+function makeProfile(overrides: Partial<ProfileDetails> = {}): ProfileDetails {
+  return {
+    name: "Initial Name",
+    username: "initial-user",
+    description: "Initial description",
+    links: [],
+    avatar_url: "",
+    ...overrides,
+  } as ProfileDetails;
+}
+
+describe("ProfileInfoForm", () => {
+  it("renders the existing profile values into editable fields", () => {
+    render(<ProfileInfoForm profile={makeProfile({ name: "Hello World" })} />);
+    const nameInput = screen.getByTestId(
+      "profile-info-form-display-name",
+    ) as HTMLInputElement;
+    expect(nameInput.defaultValue).toBe("Hello World");
+  });
+
+  it("submits the new display name to POST /api/store/profile and reflects the response", async () => {
+    let receivedBody: Record<string, unknown> | null = null;
+
+    server.use(
+      getPostV2UpdateUserProfileMockHandler200(async ({ request }) => {
+        receivedBody = (await request.json()) as Record<string, unknown>;
+        return makeProfile({ name: receivedBody?.name as string });
+      }),
+    );
+
+    render(<ProfileInfoForm profile={makeProfile({ name: "Old Name" })} />);
+
+    const nameInput = screen.getByTestId("profile-info-form-display-name");
+    fireEvent.change(nameInput, { target: { value: "Brand New Name" } });
+
+    fireEvent.click(screen.getByRole("button", { name: "Save changes" }));
+
+    await waitFor(() => {
+      expect(
+        receivedBody,
+        "POST /api/store/profile must fire when the user clicks Save",
+      ).not.toBeNull();
+    });
+
+    expect(receivedBody!.name).toBe("Brand New Name");
+  });
+
+  it("does not silently swallow the request when the API returns 422", async () => {
+    let calls = 0;
+    server.use(
+      getPostV2UpdateUserProfileMockHandler422(() => {
+        calls += 1;
+        return getPostV2UpdateUserProfileResponseMock422({
+          detail: [
+            {
+              loc: ["body", "name"],
+              msg: "validation error",
+              type: "value_error",
+            },
+          ],
+        });
+      }),
+    );
+
+    render(<ProfileInfoForm profile={makeProfile()} />);
+
+    const nameInput = screen.getByTestId("profile-info-form-display-name");
+    fireEvent.change(nameInput, { target: { value: "Anything" } });
+    fireEvent.click(screen.getByRole("button", { name: "Save changes" }));
+
+    await waitFor(() => {
+      expect(
+        calls,
+        "save click must hit the backend even when validation fails",
+      ).toBeGreaterThan(0);
+    });
+  });
+});
diff --git a/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.test.ts b/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.test.ts
index 6fc650cd1a..5576c54064 100644
--- a/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.test.ts
+++ b/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.test.ts
@@ -1,3 +1,5 @@
+import { render, screen } from "@testing-library/react";
+import type React from "react";
 import { describe, expect, it } from "vitest";
 import { csvRenderer } from "./CSVRenderer";
 
@@ -16,6 +18,16 @@ describe("csvRenderer.canRender", () => {
   it("matches .csv filename case-insensitively", () => {
     expect(csvRenderer.canRender("a,b", { filename: "data.CSV" })).toBe(true);
   });
+  it("matches TSV mime type", () => {
+    expect(
+      csvRenderer.canRender("a\tb\n1\t2", {
+        mimeType: "text/tab-separated-values",
+      }),
+    ).toBe(true);
+  });
+  it("matches .tsv filename case-insensitively", () => {
+    expect(csvRenderer.canRender("a\tb", { filename: "data.TSV" })).toBe(true);
+  });
   it("rejects non-string values", () => {
     expect(csvRenderer.canRender(42, { mimeType: "text/csv" })).toBe(false);
   });
@@ -64,4 +76,16 @@ describe("csvRenderer.render (parse via render output smoke)", () => {
     const csv = 'name\n"She said ""hi"""';
     expect(() => csvRenderer.render(csv)).not.toThrow();
   });
+  it("renders TSV columns using tabs as the delimiter", () => {
+    render(
+      csvRenderer.render("name\tage\nAlice\t30", {
+        filename: "data.tsv",
+      }) as React.ReactElement,
+    );
+
+    expect(screen.getByText("name")).toBeDefined();
+    expect(screen.getByText("age")).toBeDefined();
+    expect(screen.getByText("Alice")).toBeDefined();
+    expect(screen.getByText("30")).toBeDefined();
+  });
 });
diff --git a/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.tsx b/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.tsx
index 7f39064eb1..594aecb306 100644
--- a/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.tsx
+++ b/autogpt_platform/frontend/src/components/contextual/OutputRenderers/renderers/CSVRenderer.tsx
@@ -6,7 +6,35 @@ import {
   CopyContent,
 } from "../types";
 
-function parseCSV(text: string): { headers: string[]; rows: string[][] } {
+function normalizeMime(mime?: string): string | undefined {
+  return mime?.toLowerCase().split(";")[0]?.trim();
+}
+
+function getDelimiter(metadata?: OutputMetadata): "," | "\t" {
+  if (
+    normalizeMime(metadata?.mimeType) === "text/tab-separated-values" ||
+    metadata?.filename?.toLowerCase().endsWith(".tsv")
+  ) {
+    return "\t";
+  }
+
+  return ",";
+}
+
+function getDelimitedMimeType(metadata?: OutputMetadata): string {
+  return getDelimiter(metadata) === "\t"
+    ? "text/tab-separated-values"
+    : "text/csv";
+}
+
+function getDelimitedFallbackFilename(metadata?: OutputMetadata): string {
+  return getDelimiter(metadata) === "\t" ? "data.tsv" : "data.csv";
+}
+
+function parseDelimitedText(
+  text: string,
+  delimiter: "," | "\t",
+): { headers: string[]; rows: string[][] } {
   const normalized = text
     .replace(/\r\n?/g, "\n")
     .replace(/^\ufeff/, "")
@@ -32,7 +60,7 @@ function parseCSV(text: string): { headers: string[]; rows: string[][] } {
       }
     } else if (ch === '"') {
       inQuotes = true;
-    } else if (ch === ",") {
+    } else if (ch === delimiter) {
       row.push(current);
       current = "";
     } else if (ch === "\n") {
@@ -51,8 +79,17 @@ function parseCSV(text: string): { headers: string[]; rows: string[][] } {
   return { headers, rows: rows.slice(1) };
 }
 
-function CSVTable({ value }: { value: string }) {
-  const { headers, rows } = useMemo(() => parseCSV(value), [value]);
+function CSVTable({
+  value,
+  delimiter,
+}: {
+  value: string;
+  delimiter: "," | "\t";
+}) {
+  const { headers, rows } = useMemo(
+    () => parseDelimitedText(value, delimiter),
+    [delimiter, value],
+  );
   const [sortCol, setSortCol] = useState<number | null>(null);
   const [sortAsc, setSortAsc] = useState(true);
 
@@ -134,16 +171,17 @@ function CSVTable({ value }: { value: string }) {
 
 function canRenderCSV(value: unknown, metadata?: OutputMetadata): boolean {
   if (typeof value !== "string") return false;
-  if (metadata?.mimeType === "text/csv") return true;
+  const mime = normalizeMime(metadata?.mimeType);
+  if (mime === "text/csv" || mime === "text/tab-separated-values") {
+    return true;
+  }
   if (metadata?.filename?.toLowerCase().endsWith(".csv")) return true;
+  if (metadata?.filename?.toLowerCase().endsWith(".tsv")) return true;
   return false;
 }
 
-function renderCSV(
-  value: unknown,
-  _metadata?: OutputMetadata,
-): React.ReactNode {
-  return <CSVTable value={String(value)} />;
+function renderCSV(value: unknown, metadata?: OutputMetadata): React.ReactNode {
+  return <CSVTable value={String(value)} delimiter={getDelimiter(metadata)} />;
 }
 
 function getCopyContentCSV(
@@ -159,10 +197,11 @@ function getDownloadContentCSV(
   metadata?: OutputMetadata,
 ): DownloadContent | null {
   const text = String(value);
+  const mimeType = getDelimitedMimeType(metadata);
   return {
-    data: new Blob([text], { type: "text/csv" }),
-    filename: metadata?.filename || "data.csv",
-    mimeType: "text/csv",
+    data: new Blob([text], { type: mimeType }),
+    filename: metadata?.filename || getDelimitedFallbackFilename(metadata),
+    mimeType,
   };
 }
 
diff --git a/autogpt_platform/frontend/src/components/layout/Navbar/components/AgentActivityDropdown/__tests__/AgentActivityDropdown.test.tsx b/autogpt_platform/frontend/src/components/layout/Navbar/components/AgentActivityDropdown/__tests__/AgentActivityDropdown.test.tsx
new file mode 100644
index 0000000000..5c45af03f4
--- /dev/null
+++ b/autogpt_platform/frontend/src/components/layout/Navbar/components/AgentActivityDropdown/__tests__/AgentActivityDropdown.test.tsx
@@ -0,0 +1,76 @@
+import { render, screen } from "@/tests/integrations/test-utils";
+import { AgentExecutionStatus } from "@/app/api/__generated__/models/agentExecutionStatus";
+import { AgentActivityDropdown } from "../AgentActivityDropdown";
+import { AgentExecutionWithInfo } from "../helpers";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mockUseAgentActivityDropdown = vi.hoisted(() => vi.fn());
+
+vi.mock("../useAgentActivityDropdown", () => ({
+  useAgentActivityDropdown: mockUseAgentActivityDropdown,
+}));
+
+function makeExecution(
+  overrides: Partial<AgentExecutionWithInfo> = {},
+): AgentExecutionWithInfo {
+  return {
+    id: "exec-1",
+    graph_id: "graph-1",
+    status: AgentExecutionStatus.RUNNING,
+    started_at: new Date(),
+    ended_at: null,
+    user_id: "user-1",
+    graph_version: 1,
+    inputs: {},
+    credential_inputs: {},
+    nodes_input_masks: {},
+    preset_id: null,
+    stats: null,
+    agent_name: "Test Agent",
+    agent_description: "A running agent",
+    library_agent_id: "library-1",
+    ...overrides,
+  };
+}
+
+describe("AgentActivityDropdown", () => {
+  beforeEach(() => {
+    mockUseAgentActivityDropdown.mockReturnValue({
+      activeExecutions: [makeExecution(), makeExecution({ id: "exec-2" })],
+      recentCompletions: [],
+      recentFailures: [],
+      totalCount: 2,
+      isReady: true,
+      error: null,
+      isOpen: false,
+      setIsOpen: vi.fn(),
+    });
+  });
+
+  test("shows the active execution badge count", () => {
+    render(<AgentActivityDropdown />);
+
+    expect(screen.getByTestId("agent-activity-badge").textContent).toContain(
+      "2",
+    );
+    expect(screen.getByTestId("agent-activity-button")).toBeDefined();
+  });
+
+  test("renders the dropdown content when open", async () => {
+    mockUseAgentActivityDropdown.mockReturnValue({
+      activeExecutions: [makeExecution()],
+      recentCompletions: [],
+      recentFailures: [],
+      totalCount: 1,
+      isReady: true,
+      error: null,
+      isOpen: true,
+      setIsOpen: vi.fn(),
+    });
+
+    render(<AgentActivityDropdown />);
+
+    expect(screen.getByTestId("agent-activity-dropdown")).toBeDefined();
+    expect(await screen.findByText("Test Agent")).toBeDefined();
+  });
+});
diff --git a/autogpt_platform/frontend/src/lib/utils.test.ts b/autogpt_platform/frontend/src/lib/utils.test.ts
new file mode 100644
index 0000000000..62742ac574
--- /dev/null
+++ b/autogpt_platform/frontend/src/lib/utils.test.ts
@@ -0,0 +1,97 @@
+import { describe, expect, test } from "vitest";
+import { setNestedProperty } from "./utils";
+
+const testCases = [
+  {
+    name: "simple property assignment",
+    path: "name",
+    value: "John",
+    expected: { name: "John" },
+  },
+  {
+    name: "nested property with dot notation",
+    path: "user.settings.theme",
+    value: "dark",
+    expected: { user: { settings: { theme: "dark" } } },
+  },
+  {
+    name: "nested property with slash notation",
+    path: "user/settings/language",
+    value: "en",
+    expected: { user: { settings: { language: "en" } } },
+  },
+  {
+    name: "mixed dot and slash notation",
+    path: "user.settings/preferences.color",
+    value: "blue",
+    expected: { user: { settings: { preferences: { color: "blue" } } } },
+  },
+  {
+    name: "overwrite primitive with object",
+    path: "user.details",
+    value: { age: 30 },
+    expected: { user: { details: { age: 30 } } },
+  },
+];
+
+describe("setNestedProperty", () => {
+  for (const { name, path, value, expected } of testCases) {
+    test(name, () => {
+      const obj = {};
+      setNestedProperty(obj, path, value);
+      expect(obj).toEqual(expected);
+    });
+  }
+
+  test("throws for null object", () => {
+    expect(() => {
+      setNestedProperty(null, "test", "value");
+    }).toThrow("Target must be a non-null object");
+  });
+
+  test("throws for undefined object", () => {
+    expect(() => {
+      setNestedProperty(undefined, "test", "value");
+    }).toThrow("Target must be a non-null object");
+  });
+
+  test("throws for non-object target", () => {
+    expect(() => {
+      setNestedProperty("string", "test", "value");
+    }).toThrow("Target must be a non-null object");
+  });
+
+  test("throws for empty path", () => {
+    expect(() => {
+      setNestedProperty({}, "", "value");
+    }).toThrow("Path must be a non-empty string");
+  });
+
+  test("throws for __proto__ access", () => {
+    expect(() => {
+      setNestedProperty({}, "__proto__.malicious", "attack");
+    }).toThrow("Invalid property name: __proto__");
+  });
+
+  test("throws for constructor access", () => {
+    expect(() => {
+      setNestedProperty({}, "constructor.prototype.malicious", "attack");
+    }).toThrow("Invalid property name: constructor");
+  });
+
+  test("throws for prototype access", () => {
+    expect(() => {
+      setNestedProperty({}, "obj.prototype.malicious", "attack");
+    }).toThrow("Invalid property name: prototype");
+  });
+
+  test("prevents prototype pollution", () => {
+    const obj = {};
+
+    expect(() => {
+      setNestedProperty(obj, "__proto__.polluted", true);
+    }).toThrow("Invalid property name: __proto__");
+
+    expect(({} as { polluted?: boolean }).polluted).toBeUndefined();
+  });
+});
diff --git a/autogpt_platform/frontend/src/playwright/api-keys-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/api-keys-happy-path.spec.ts
new file mode 100644
index 0000000000..9d0cbf8afc
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/api-keys-happy-path.spec.ts
@@ -0,0 +1,100 @@
+import { randomUUID } from "crypto";
+import { expect, test } from "./coverage-fixture";
+import { E2E_AUTH_STATES } from "./credentials/accounts";
+
+test.use({ storageState: E2E_AUTH_STATES.parallelB });
+
+test("api keys happy path: user can create, copy, and revoke an API key", async ({
+  page,
+  context,
+}) => {
+  test.setTimeout(120000);
+
+  await context.grantPermissions(["clipboard-read", "clipboard-write"]);
+
+  const keyName = `E2E CLI Key ${randomUUID().slice(0, 8)}`;
+
+  await page.goto("/profile/api-keys");
+  await expect(page).toHaveURL(/\/profile\/api-keys/);
+  await expect(
+    page.getByText(
+      "Manage your AutoGPT Platform API keys for programmatic access",
+    ),
+  ).toBeVisible();
+
+  await page.getByRole("button", { name: "Create Key" }).click();
+  await page.getByLabel("Name").fill(keyName);
+  const executeGraphCheckbox = page.getByRole("checkbox", {
+    name: /EXECUTE_GRAPH/i,
+  });
+  const executeGraphChecked =
+    (await executeGraphCheckbox.getAttribute("aria-checked")) === "true";
+  if (!executeGraphChecked) {
+    await executeGraphCheckbox.click();
+  }
+  await expect(executeGraphCheckbox).toHaveAttribute("aria-checked", "true");
+
+  await page.getByRole("button", { name: "Create" }).click();
+
+  const secretDialog = page.getByRole("dialog", {
+    name: "AutoGPT Platform API Key Created",
+  });
+  await expect
+    .poll(
+      async () => {
+        if (await secretDialog.isVisible().catch(() => false)) {
+          return "created";
+        }
+
+        const creationFailed = await page
+          .getByText("Failed to create AutoGPT Platform API key")
+          .isVisible()
+          .catch(() => false);
+        if (creationFailed) {
+          return "failed";
+        }
+
+        return "pending";
+      },
+      {
+        timeout: 30000,
+        message:
+          "API key creation should either open the created-key dialog or surface an explicit failure toast",
+      },
+    )
+    .toBe("created");
+  await expect(secretDialog).toBeVisible();
+
+  const createdSecret = (
+    (await secretDialog.locator("code").textContent()) ?? ""
+  ).trim();
+  expect(createdSecret.length).toBeGreaterThan(0);
+
+  await secretDialog.getByRole("button").first().click();
+  await expect(page.getByText("Copied", { exact: true })).toBeVisible({
+    timeout: 15000,
+  });
+  await expect
+    .poll(() => page.evaluate(() => navigator.clipboard.readText()), {
+      timeout: 10000,
+    })
+    .toBe(createdSecret);
+
+  await secretDialog.getByRole("button", { name: "Close" }).first().click();
+
+  const createdKeyRow = page
+    .getByTestId("api-key-row")
+    .filter({ hasText: keyName })
+    .first();
+  await expect(createdKeyRow).toBeVisible({ timeout: 15000 });
+
+  await createdKeyRow.getByTestId("api-key-actions").click();
+  await page.getByRole("menuitem", { name: "Revoke" }).click();
+
+  await expect(
+    page.getByText("AutoGPT Platform API key revoked successfully"),
+  ).toBeVisible({ timeout: 15000 });
+  await expect(
+    page.getByTestId("api-key-row").filter({ hasText: keyName }),
+  ).toHaveCount(0);
+});
diff --git a/autogpt_platform/frontend/src/tests/assets/testing_agent.json b/autogpt_platform/frontend/src/playwright/assets/testing_agent.json
similarity index 100%
rename from autogpt_platform/frontend/src/tests/assets/testing_agent.json
rename to autogpt_platform/frontend/src/playwright/assets/testing_agent.json
diff --git a/autogpt_platform/frontend/src/playwright/auth-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/auth-happy-path.spec.ts
new file mode 100644
index 0000000000..a7872cb706
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/auth-happy-path.spec.ts
@@ -0,0 +1,158 @@
+import { expect, test } from "./coverage-fixture";
+import { getSeededTestUser } from "./credentials/accounts";
+import { BuildPage } from "./pages/build.page";
+import { LoginPage } from "./pages/login.page";
+import {
+  completeOnboardingWizard,
+  skipOnboardingIfPresent,
+} from "./utils/onboarding";
+import { signupTestUser } from "./utils/signup";
+
+test("auth happy path: user can sign up with a fresh account", async ({
+  page,
+}) => {
+  test.setTimeout(60000);
+
+  await signupTestUser(page, undefined, undefined, false);
+  await expect(page).toHaveURL(/\/onboarding/);
+  await expect(page.getByText("Welcome to AutoGPT")).toBeVisible();
+});
+
+test("auth happy path: user can sign up, enter the app, and log out", async ({
+  page,
+}) => {
+  test.setTimeout(90000);
+
+  await signupTestUser(page, undefined, undefined, false);
+  await expect(page).toHaveURL(/\/onboarding/);
+  await expect(page.getByText("Welcome to AutoGPT")).toBeVisible();
+
+  await skipOnboardingIfPresent(page, "/marketplace");
+  await expect(page).toHaveURL(/\/marketplace/);
+  await expect(page.getByTestId("profile-popout-menu-trigger")).toBeVisible();
+
+  await page.getByTestId("profile-popout-menu-trigger").click();
+  await page.getByRole("button", { name: "Log out" }).click();
+
+  await expect(page).toHaveURL(/\/login/);
+
+  await page.goto("/library");
+  await expect(page).toHaveURL(/\/login\?next=%2Flibrary/);
+});
+
+test("auth happy path: seeded user can log in", async ({ page }) => {
+  test.setTimeout(60000);
+
+  const testUser = getSeededTestUser("smokeAuth");
+  const loginPage = new LoginPage(page);
+
+  await page.goto("/login");
+  await loginPage.login(testUser.email, testUser.password);
+
+  await expect(page).toHaveURL(/\/marketplace/);
+  await expect(page.getByTestId("profile-popout-menu-trigger")).toBeVisible();
+});
+
+test("auth happy path: seeded user can log out and protected routes redirect to login", async ({
+  page,
+}) => {
+  test.setTimeout(60000);
+
+  const testUser = getSeededTestUser("primary");
+  const loginPage = new LoginPage(page);
+
+  await page.goto("/login");
+  await loginPage.login(testUser.email, testUser.password);
+
+  await expect(page).toHaveURL(/\/marketplace/);
+  await page.getByTestId("profile-popout-menu-trigger").click();
+  await page.getByRole("button", { name: "Log out" }).click();
+
+  await expect(page).toHaveURL(/\/login/, { timeout: 15000 });
+
+  await page.goto("/profile");
+  await expect(page).toHaveURL(/\/login\?next=%2Fprofile/);
+});
+
+test("auth happy path: user can complete onboarding and land in the app", async ({
+  page,
+}) => {
+  test.setTimeout(60000);
+
+  await signupTestUser(page, undefined, undefined, false);
+  await expect(page).toHaveURL(/\/onboarding/);
+
+  await completeOnboardingWizard(page, {
+    name: "Smoke User",
+    role: "Engineering",
+    painPoints: ["Research", "Reports & data"],
+  });
+
+  await expect(page).toHaveURL(/\/copilot/);
+  await expect(page.getByTestId("profile-popout-menu-trigger")).toBeVisible();
+});
+
+test("auth happy path: multi-tab logout clears shared builder sessions", async ({
+  context,
+}) => {
+  // Two pages + builder load + logout sequence justifies a higher timeout
+  test.setTimeout(90000);
+
+  const consoleErrors: string[] = [];
+
+  const page1 = await context.newPage();
+  const page2 = await context.newPage();
+  const buildPage = new BuildPage(page1);
+
+  const recordWebSocketErrors =
+    (label: string) => (msg: { type: () => string; text: () => string }) => {
+      if (msg.type() === "error" && msg.text().includes("WebSocket")) {
+        consoleErrors.push(`${label}: ${msg.text()}`);
+      }
+    };
+
+  page1.on("console", recordWebSocketErrors("page1"));
+  page2.on("console", recordWebSocketErrors("page2"));
+
+  await signupTestUser(page1, undefined, undefined, false);
+  await expect(page1).toHaveURL(/\/onboarding/);
+  await skipOnboardingIfPresent(page1, "/build");
+
+  await page1.goto("/build");
+  await expect(page1).toHaveURL(/\/build/);
+  await buildPage.closeTutorial();
+  await expect(page1.getByTestId("profile-popout-menu-trigger")).toBeVisible();
+
+  await page2.goto("/build");
+  await expect(page2).toHaveURL(/\/build/);
+  await expect(page2.getByTestId("profile-popout-menu-trigger")).toBeVisible();
+
+  await page1.getByTestId("profile-popout-menu-trigger").click();
+  await page1.getByRole("button", { name: "Log out" }).click();
+  await expect(page1).toHaveURL(/\/login/);
+
+  await page2.reload();
+  await expect(page2).toHaveURL(/\/login\?next=%2Fbuild/);
+  await expect(page2.getByTestId("profile-popout-menu-trigger")).toBeHidden();
+
+  expect(consoleErrors).toHaveLength(0);
+
+  // Prove the auth token is actually gone, not just the UI hidden. Supabase
+  // overwrites the cookie on signout with an empty value + past expiry
+  // rather than deleting it. An assertion that is silently skipped when the
+  // cookie is missing under the expected name would hide a real regression,
+  // so we assert on every non-empty sb-*auth-token* cookie explicitly.
+  const cookiesAfterLogout = await context.cookies();
+  const authCookies = cookiesAfterLogout.filter(
+    (c) => c.name.startsWith("sb-") && c.name.includes("auth-token"),
+  );
+  for (const cookie of authCookies) {
+    expect(
+      cookie.value,
+      `supabase auth cookie ${cookie.name} must be empty after logout`,
+    ).toBe("");
+  }
+
+  await page1.close();
+  await page2.close();
+});
diff --git a/autogpt_platform/frontend/src/playwright/builder-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/builder-happy-path.spec.ts
new file mode 100644
index 0000000000..b6c2f8d8c2
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/builder-happy-path.spec.ts
@@ -0,0 +1,83 @@
+import { expect, test } from "./coverage-fixture";
+import { E2E_AUTH_STATES } from "./credentials/accounts";
+import { BuildPage } from "./pages/build.page";
+
+test.use({ storageState: E2E_AUTH_STATES.builder });
+
+test("builder happy path: user can walk through the builder tutorial and cancel midway, persisting canceled state", async ({
+  page,
+}) => {
+  test.setTimeout(180000);
+
+  const buildPage = new BuildPage(page);
+  await buildPage.startTutorial();
+  await buildPage.walkWelcomeToBlockMenu();
+  await buildPage.walkSearchAndAddCalculator();
+  await buildPage.cancelTutorial();
+
+  expect(await buildPage.getTutorialStateFromStorage()).toBe("canceled");
+  expect(await buildPage.getNodeCount()).toBeGreaterThanOrEqual(1);
+});
+
+test("builder happy path: user can skip the builder tutorial from the welcome step", async ({
+  page,
+}) => {
+  test.setTimeout(60000);
+
+  const buildPage = new BuildPage(page);
+  await buildPage.startTutorial();
+  await buildPage.skipTutorialFromWelcome();
+});
+
+test("builder happy path: user can create a simple agent in builder with core blocks", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const buildPage = new BuildPage(page);
+  await buildPage.open();
+  await buildPage.addSimpleAgentBlocks();
+
+  await expect(buildPage.getNodeLocator()).toHaveCount(2);
+  await expect(
+    buildPage
+      .getNodeLocator(0)
+      .locator('input[placeholder="Enter string value..."]'),
+  ).toHaveValue("smoke-value");
+  await expect(buildPage.getNodeTextInput("Add to Dictionary", 0)).toHaveValue(
+    "smoke-key",
+  );
+  await expect(buildPage.getNodeTextInput("Add to Dictionary", 1)).toHaveValue(
+    "smoke-value",
+  );
+});
+
+test("builder happy path: user can save the created agent", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const buildPage = new BuildPage(page);
+  await buildPage.createAndSaveSimpleAgent("Smoke Save Agent");
+
+  await expect(page).toHaveURL(/flowID=/);
+  expect(await buildPage.isRunButtonEnabled()).toBeTruthy();
+});
+
+test("builder happy path: user can run the saved agent from builder and see execution state", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const buildPage = new BuildPage(page);
+  await buildPage.createAndSaveSimpleAgent("Smoke Run Agent");
+
+  await buildPage.startRun();
+  await expect(
+    page.locator('[data-id="stop-graph-button"], [data-id="run-graph-button"]'),
+  ).toBeVisible({ timeout: 15000 });
+
+  await expect
+    .poll(() => buildPage.getExecutionState(), { timeout: 15000 })
+    .not.toBe("unknown");
+});
diff --git a/autogpt_platform/frontend/src/playwright/copilot-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/copilot-happy-path.spec.ts
new file mode 100644
index 0000000000..5af1fc7a86
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/copilot-happy-path.spec.ts
@@ -0,0 +1,44 @@
+import { expect, test } from "./coverage-fixture";
+import { E2E_AUTH_STATES } from "./credentials/accounts";
+import { CopilotPage } from "./pages/copilot.page";
+
+test.use({ storageState: E2E_AUTH_STATES.marketplace });
+
+test("copilot happy path: user can create a deterministic AutoPilot session and keep it after reload", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const copilotPage = new CopilotPage(page);
+  await copilotPage.open();
+
+  const sessionId = await copilotPage.createSessionViaApi();
+
+  await copilotPage.open(sessionId);
+  await copilotPage.waitForChatInput();
+
+  await page.reload();
+  await page.waitForLoadState("domcontentloaded");
+  await copilotPage.dismissNotificationPrompt();
+
+  await expect
+    .poll(() => new URL(page.url()).searchParams.get("sessionId"), {
+      timeout: 15000,
+    })
+    .toBe(sessionId);
+  await copilotPage.waitForChatInput();
+
+  // Sending a message must render the user's prompt in the conversation
+  // immediately. This catches a regression where the chat input accepts
+  // text but Enter is a no-op, without depending on knowing the exact
+  // backend endpoint name (which has shifted historically).
+  const userPrompt = `ping from e2e ${Date.now().toString().slice(-6)}`;
+  const chatInput = copilotPage.getChatInput();
+  await chatInput.fill(userPrompt);
+  await chatInput.press("Enter");
+
+  await expect(
+    page.getByText(userPrompt, { exact: false }).first(),
+    "user's typed prompt must appear in the chat after pressing Enter",
+  ).toBeVisible({ timeout: 15000 });
+});
diff --git a/autogpt_platform/frontend/src/tests/coverage-fixture.ts b/autogpt_platform/frontend/src/playwright/coverage-fixture.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/coverage-fixture.ts
rename to autogpt_platform/frontend/src/playwright/coverage-fixture.ts
diff --git a/autogpt_platform/frontend/src/playwright/credentials/accounts.ts b/autogpt_platform/frontend/src/playwright/credentials/accounts.ts
new file mode 100644
index 0000000000..f0fef0cfea
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/credentials/accounts.ts
@@ -0,0 +1,85 @@
+import path from "path";
+
+export const SEEDED_TEST_PASSWORD =
+  process.env.SEEDED_TEST_PASSWORD || "testpassword123";
+export const SEEDED_USER_POOL_VERSION = "2.0.0";
+
+export const SEEDED_TEST_ACCOUNTS = {
+  primary: {
+    key: "primary",
+    email: "test123@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  smokeAuth: {
+    key: "smokeAuth",
+    email: "e2e.qa.auth@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  smokeBuilder: {
+    key: "smokeBuilder",
+    email: "e2e.qa.builder@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  smokeLibrary: {
+    key: "smokeLibrary",
+    email: "e2e.qa.library@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  smokeMarketplace: {
+    key: "smokeMarketplace",
+    email: "e2e.qa.marketplace@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  smokeSettings: {
+    key: "smokeSettings",
+    email: "e2e.qa.settings@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  parallelA: {
+    key: "parallelA",
+    email: "e2e.qa.parallel.a@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+  parallelB: {
+    key: "parallelB",
+    email: "e2e.qa.parallel.b@example.com",
+    password: SEEDED_TEST_PASSWORD,
+  },
+} as const;
+
+export type SeededTestAccountKey = keyof typeof SEEDED_TEST_ACCOUNTS;
+export type SeededTestAccount =
+  (typeof SEEDED_TEST_ACCOUNTS)[SeededTestAccountKey];
+
+export const SEEDED_TEST_USERS = Object.values(SEEDED_TEST_ACCOUNTS);
+export const SEEDED_AUTH_STATE_ACCOUNT_KEYS = [
+  "smokeBuilder",
+  "smokeLibrary",
+  "smokeMarketplace",
+  "smokeSettings",
+  "parallelA",
+  "parallelB",
+] as const;
+
+export const AUTH_DIRECTORY = path.resolve(process.cwd(), ".auth");
+
+export function getAuthStatePath(accountKey: SeededTestAccountKey) {
+  return path.join(AUTH_DIRECTORY, "states", `${accountKey}.json`);
+}
+
+export const E2E_AUTH_STATES = {
+  builder: getAuthStatePath("smokeBuilder"),
+  library: getAuthStatePath("smokeLibrary"),
+  marketplace: getAuthStatePath("smokeMarketplace"),
+  settings: getAuthStatePath("smokeSettings"),
+  parallelA: getAuthStatePath("parallelA"),
+  parallelB: getAuthStatePath("parallelB"),
+} as const;
+
+export const SMOKE_AUTH_STATES = E2E_AUTH_STATES;
+
+export function getSeededTestUser(
+  accountKey: SeededTestAccountKey = "primary",
+): SeededTestAccount {
+  return SEEDED_TEST_ACCOUNTS[accountKey];
+}
diff --git a/autogpt_platform/frontend/src/playwright/credentials/index.ts b/autogpt_platform/frontend/src/playwright/credentials/index.ts
new file mode 100644
index 0000000000..cefa3931cb
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/credentials/index.ts
@@ -0,0 +1,27 @@
+import { getSeededTestUser } from "./accounts";
+
+// E2E Test Credentials and Constants
+export const TEST_CREDENTIALS = getSeededTestUser("primary");
+
+export function getTestUserWithLibraryAgents() {
+  return TEST_CREDENTIALS;
+}
+
+// Dummy constant to help developers identify agents that don't need input
+export const DummyInput = "DummyInput";
+
+// This will be used for testing agent submission for test123@example.com
+export const TEST_AGENT_DATA = {
+  name: "E2E Calculator Agent",
+  description:
+    "A deterministic marketplace agent built from Calculator and Agent Output blocks for frontend E2E coverage.",
+  image_urls: [
+    "https://picsum.photos/seed/e2e-marketplace-1/200/300",
+    "https://picsum.photos/seed/e2e-marketplace-2/200/301",
+    "https://picsum.photos/seed/e2e-marketplace-3/200/302",
+  ],
+  video_url: "https://www.youtube.com/watch?v=test123",
+  sub_heading: "A deterministic calculator agent for PR E2E coverage",
+  categories: ["test", "demo", "frontend"],
+  changes_summary: "Initial deterministic calculator submission",
+} as const;
diff --git a/autogpt_platform/frontend/src/playwright/credentials/storage-state.ts b/autogpt_platform/frontend/src/playwright/credentials/storage-state.ts
new file mode 100644
index 0000000000..1dbaaa1616
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/credentials/storage-state.ts
@@ -0,0 +1,23 @@
+export function buildCookieConsentStorageState(
+  origin: string = "http://localhost:3000",
+) {
+  return {
+    cookies: [],
+    origins: [
+      {
+        origin,
+        localStorage: [
+          {
+            name: "autogpt_cookie_consent",
+            value: JSON.stringify({
+              hasConsented: true,
+              timestamp: Date.now(),
+              analytics: true,
+              monitoring: true,
+            }),
+          },
+        ],
+      },
+    ],
+  };
+}
diff --git a/autogpt_platform/frontend/src/playwright/global-setup.ts b/autogpt_platform/frontend/src/playwright/global-setup.ts
new file mode 100644
index 0000000000..90270d32a0
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/global-setup.ts
@@ -0,0 +1,49 @@
+import { FullConfig } from "@playwright/test";
+import {
+  ensureSeededAuthStates,
+  getInvalidSeededAuthStateKeys,
+} from "./utils/auth";
+
+function resolveBaseURL(config: FullConfig) {
+  const configuredBaseURL =
+    config.projects[0]?.use?.baseURL ?? "http://localhost:3000";
+
+  if (typeof configuredBaseURL !== "string") {
+    throw new Error(
+      `Playwright baseURL must be a string during global setup. Received ${String(
+        configuredBaseURL,
+      )}.`,
+    );
+  }
+
+  return configuredBaseURL;
+}
+
+async function globalSetup(config: FullConfig) {
+  console.log("🚀 Starting global test setup...");
+
+  try {
+    const baseURL = resolveBaseURL(config);
+    const invalidKeys = await getInvalidSeededAuthStateKeys(baseURL);
+
+    if (invalidKeys.length === 0) {
+      console.log("♻️ Reusing stored seeded auth states");
+      return;
+    }
+
+    console.log(
+      `🔐 Refreshing seeded auth states for: ${invalidKeys.join(", ")}`,
+    );
+    await ensureSeededAuthStates(baseURL);
+
+    console.log("✅ Global setup completed successfully!");
+  } catch (error) {
+    console.error("❌ Global setup failed:", error);
+    console.error(
+      "💡 Run backend/test/e2e_test_data.py to seed the deterministic Playwright accounts before retrying.",
+    );
+    throw error;
+  }
+}
+
+export default globalSetup;
diff --git a/autogpt_platform/frontend/src/playwright/library-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/library-happy-path.spec.ts
new file mode 100644
index 0000000000..f7ed0e796c
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/library-happy-path.spec.ts
@@ -0,0 +1,559 @@
+import path from "path";
+import type { Page } from "@playwright/test";
+import { expect, test } from "./coverage-fixture";
+import { E2E_AUTH_STATES } from "./credentials/accounts";
+import { BuildPage, createUniqueAgentName } from "./pages/build.page";
+import {
+  clickRunButton,
+  dismissFeedbackDialog,
+  getActiveItemId,
+  importAgentFromFile,
+  LibraryPage,
+} from "./pages/library.page";
+
+test.use({ storageState: E2E_AUTH_STATES.library });
+
+const TEST_AGENT_PATH = path.resolve(__dirname, "assets", "testing_agent.json");
+const CALCULATOR_BLOCK_ID = "b1ab9b19-67a6-406d-abf5-2dba76d00c79";
+const AGENT_OUTPUT_BLOCK_ID = "363ae599-353e-4804-937e-b2ee3cef3da4";
+const STOPPED_RUN_STATUSES = new Set([
+  "terminated",
+  "failed",
+  "incomplete",
+  "completed",
+]);
+
+type UploadedGraphNode = {
+  id: string;
+  block_id: string;
+  input_default: Record<string, unknown>;
+  metadata: {
+    position: {
+      x: number;
+      y: number;
+    };
+  };
+  input_links: unknown[];
+  output_links: unknown[];
+};
+
+function createLongRunningCalculatorGraph(
+  agentName: string,
+  calculatorCount: number = 150,
+) {
+  const nodes: UploadedGraphNode[] = Array.from(
+    { length: calculatorCount },
+    (_, index) => ({
+      id: `calc-${index + 1}`,
+      block_id: CALCULATOR_BLOCK_ID,
+      input_default:
+        index === 0
+          ? {
+              operation: "Add",
+              a: 1,
+              b: 1,
+              round_result: false,
+            }
+          : {
+              operation: "Add",
+              b: 1,
+              round_result: false,
+            },
+      metadata: {
+        position: { x: 320 * index, y: 120 },
+      },
+      input_links: [],
+      output_links: [],
+    }),
+  );
+
+  const links = Array.from({ length: calculatorCount - 1 }, (_, index) => ({
+    source_id: `calc-${index + 1}`,
+    sink_id: `calc-${index + 2}`,
+    source_name: "result",
+    sink_name: "a",
+  }));
+
+  nodes.push({
+    id: "final-output",
+    block_id: AGENT_OUTPUT_BLOCK_ID,
+    input_default: {
+      name: "Final result",
+      description: "Long-running calculator chain output",
+    },
+    metadata: {
+      position: { x: 320 * calculatorCount, y: 120 },
+    },
+    input_links: [],
+    output_links: [],
+  });
+  links.push({
+    source_id: `calc-${calculatorCount}`,
+    sink_id: "final-output",
+    source_name: "result",
+    sink_name: "value",
+  });
+
+  return {
+    name: agentName,
+    description:
+      "Deterministic long-running calculator chain for runner stop coverage",
+    is_active: true,
+    nodes,
+    links,
+  };
+}
+
+async function createLongRunningSavedAgent(
+  page: Page,
+  agentName: string,
+): Promise<{ graphId: string; graphVersion: number }> {
+  const response = await page.request.post("/api/proxy/api/graphs", {
+    data: {
+      graph: createLongRunningCalculatorGraph(agentName),
+      source: "upload",
+    },
+  });
+  expect(response.ok(), "expected graph creation API request to succeed").toBe(
+    true,
+  );
+
+  const body = (await response.json()) as {
+    id?: string;
+    version?: number;
+    data?: { id?: string; version?: number };
+  };
+  expect(
+    body.data?.id ?? body.id,
+    "graph creation should return a graph id",
+  ).toBeTruthy();
+
+  return {
+    graphId: String(body.data?.id ?? body.id),
+    graphVersion: Number(body.data?.version ?? body.version ?? 1),
+  };
+}
+
+async function createDeterministicCalculatorSavedAgent(
+  page: Page,
+  agentName: string,
+  outputName: string,
+): Promise<void> {
+  const response = await page.request.post("/api/proxy/api/graphs", {
+    data: {
+      graph: {
+        name: agentName,
+        description:
+          "Deterministic calculator output for run-result assertions",
+        is_active: true,
+        nodes: [
+          {
+            id: "calc-1",
+            block_id: CALCULATOR_BLOCK_ID,
+            input_default: {
+              operation: "Add",
+              a: 1,
+              b: 1,
+              round_result: false,
+            },
+            metadata: {
+              position: { x: 120, y: 160 },
+            },
+            input_links: [],
+            output_links: [],
+          },
+          {
+            id: "final-output",
+            block_id: AGENT_OUTPUT_BLOCK_ID,
+            input_default: {
+              name: outputName,
+              description: "Deterministic result output",
+            },
+            metadata: {
+              position: { x: 520, y: 160 },
+            },
+            input_links: [],
+            output_links: [],
+          },
+        ],
+        links: [
+          {
+            source_id: "calc-1",
+            sink_id: "final-output",
+            source_name: "result",
+            sink_name: "value",
+          },
+        ],
+      },
+      source: "upload",
+    },
+  });
+  expect(
+    response.ok(),
+    "expected deterministic calculator graph creation API request to succeed",
+  ).toBe(true);
+}
+
+async function getExecutionStatusFromApi(
+  page: Page,
+  graphId: string,
+  runId: string,
+): Promise<string> {
+  const response = await page.request.get(
+    `/api/proxy/api/graphs/${graphId}/executions/${runId}`,
+  );
+  expect(response.ok(), "execution details API should succeed").toBe(true);
+
+  const body = (await response.json()) as { status?: string };
+  return body.status?.toLowerCase() ?? "unknown";
+}
+
+async function createAndSaveDeterministicOutputAgent(
+  page: Page,
+  prefix: string,
+): Promise<{ agentName: string; expectedOutput: string; outputName: string }> {
+  const buildPage = new BuildPage(page);
+  const agentName = createUniqueAgentName(prefix);
+  const expectedOutput = `e2e-output-${Date.now()}`;
+  const outputName = `e2e-result-${Date.now()}`;
+
+  await buildPage.open();
+  await buildPage.addBlockByClick("Store Value");
+  await buildPage.waitForNodeOnCanvas(1);
+  await buildPage.fillBlockInputByPlaceholder(
+    "Enter string value...",
+    expectedOutput,
+    0,
+  );
+
+  await buildPage.addBlockByClick("Agent Output");
+  await buildPage.waitForNodeOnCanvas(2);
+  await buildPage.connectNodes(0, 1);
+  await buildPage.fillLastNodeTextInput("Agent Output", outputName);
+
+  await buildPage.saveAgent(
+    agentName,
+    "Deterministic output agent for library run verification",
+  );
+  await buildPage.waitForSaveComplete();
+  await buildPage.waitForSaveButton();
+
+  return { agentName, expectedOutput, outputName };
+}
+
+test("library happy path: user can import an agent file into Library", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const { importedAgent } = await importAgentFromFile(
+    page,
+    TEST_AGENT_PATH,
+    createUniqueAgentName("E2E Import Agent"),
+  );
+
+  expect(importedAgent.name).toContain("E2E Import Agent");
+});
+
+test("library happy path: user can open the imported or saved agent from Library in builder", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const { libraryPage, importedAgent } = await importAgentFromFile(
+    page,
+    TEST_AGENT_PATH,
+    createUniqueAgentName("E2E Open Agent"),
+  );
+
+  // Register the popup listener before clicking so we don't miss a fast open.
+  // A short timeout covers the case where the link opens in the current tab.
+  const popupPromise = page
+    .context()
+    .waitForEvent("page", { timeout: 10000 })
+    .catch(() => null);
+  await libraryPage.clickOpenInBuilder(importedAgent);
+  const builderPage = (await popupPromise) ?? page;
+
+  await builderPage.waitForLoadState("domcontentloaded");
+  await expect(builderPage).toHaveURL(/\/build/);
+  const importedBuildPage = new BuildPage(builderPage);
+  await importedBuildPage.waitForNodeOnCanvas();
+  expect(await importedBuildPage.getNodeCount()).toBeGreaterThan(0);
+  if (builderPage !== page) {
+    await builderPage.close();
+  }
+});
+
+test("library happy path: user can start and stop a saved task from runner UI", async ({
+  page,
+}) => {
+  test.setTimeout(180000);
+
+  const agentName = createUniqueAgentName("E2E Stop Task Agent");
+  const { graphId } = await createLongRunningSavedAgent(page, agentName);
+
+  const libraryPage = new LibraryPage(page);
+  await libraryPage.openSavedAgent(agentName);
+  await clickRunButton(page);
+
+  await expect
+    .poll(() => getActiveItemId(page), { timeout: 45000 })
+    .not.toBe(null);
+  const runId = getActiveItemId(page);
+  expect(runId, "run id should be present after starting task").toBeTruthy();
+  await expect
+    .poll(() => libraryPage.getRunStatus(), { timeout: 45000 })
+    .toBe("running");
+
+  const stopTaskButton = page.getByRole("button", { name: /Stop task/i });
+  await expect(stopTaskButton).toBeVisible({ timeout: 30000 });
+  const stopResponsePromise = page.waitForResponse(
+    (response) =>
+      response.request().method() === "POST" &&
+      response
+        .url()
+        .includes(`/api/graphs/${graphId}/executions/${runId}/stop`),
+    { timeout: 15000 },
+  );
+  await stopTaskButton.click();
+  const stopResponse = await stopResponsePromise;
+
+  expect(stopResponse.ok(), "stop run API should succeed").toBe(true);
+  await expect(page.getByText("Run stopped")).toBeVisible({ timeout: 15000 });
+  await expect
+    .poll(
+      async () => {
+        const status = await getExecutionStatusFromApi(
+          page,
+          graphId,
+          String(runId),
+        );
+        return STOPPED_RUN_STATUSES.has(status) ? status : "running";
+      },
+      { timeout: 45000 },
+    )
+    .not.toBe("running");
+});
+
+test("library happy path: user can run a saved agent and verify expected output", async ({
+  page,
+}) => {
+  test.setTimeout(150000);
+
+  const agentName = createUniqueAgentName("E2E Expected Output Agent");
+  const outputName = `e2e-result-${Date.now()}`;
+  await createDeterministicCalculatorSavedAgent(page, agentName, outputName);
+
+  const libraryPage = new LibraryPage(page);
+  await libraryPage.openSavedAgent(agentName);
+  await clickRunButton(page);
+  await libraryPage.waitForRunToComplete();
+  await dismissFeedbackDialog(page);
+
+  await libraryPage.assertRunProducedOutput();
+  await libraryPage.assertRunOutputValue(outputName, /^2(?:\.0+)?$/);
+  await expect
+    .poll(() => libraryPage.getRunStatus(), { timeout: 15000 })
+    .toBe("completed");
+});
+
+test("library happy path: user can edit a saved agent from Library and keep changes after refresh", async ({
+  page,
+}) => {
+  test.setTimeout(150000);
+
+  const { agentName } = await createAndSaveDeterministicOutputAgent(
+    page,
+    "E2E Edit Persist Agent",
+  );
+  const editedValue = `edited-value-${Date.now()}`;
+
+  const libraryPage = new LibraryPage(page);
+  await page.goto("/library");
+  await libraryPage.waitForAgentsToLoad();
+  await libraryPage.searchAgents(agentName);
+  await libraryPage.waitForAgentsToLoad();
+
+  const agentCard = page
+    .getByTestId("library-agent-card")
+    .filter({ hasText: agentName })
+    .first();
+  await expect(agentCard).toBeVisible({ timeout: 15000 });
+
+  const popupPromise = page
+    .context()
+    .waitForEvent("page", { timeout: 10000 })
+    .catch(() => null);
+  await agentCard
+    .getByTestId("library-agent-card-open-in-builder-link")
+    .first()
+    .click();
+  const builderPage = (await popupPromise) ?? page;
+
+  const builderTabPage = new BuildPage(builderPage);
+  await builderTabPage.waitForNodeOnCanvas();
+  await builderTabPage.fillBlockInputByPlaceholder(
+    "Enter string value...",
+    editedValue,
+    0,
+  );
+
+  await builderPage.getByTestId("save-control-save-button").click();
+  const saveAgentButton = builderPage.getByRole("button", {
+    name: "Save Agent",
+  });
+  if (await saveAgentButton.isVisible({ timeout: 3000 }).catch(() => false)) {
+    await expect(saveAgentButton).toBeEnabled({ timeout: 10000 });
+    await saveAgentButton.click();
+    await expect(saveAgentButton).toBeHidden({ timeout: 15000 });
+  }
+
+  await builderPage.reload();
+  await builderTabPage.waitForNodeOnCanvas();
+  await expect(
+    builderTabPage
+      .getNodeLocator(0)
+      .locator('input[placeholder="Enter string value..."]'),
+  ).toHaveValue(editedValue);
+
+  if (builderPage !== page) {
+    await builderPage.close();
+  }
+});
+
+test("library happy path: user can rerun a completed task from the Library agent page", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const buildPage = new BuildPage(page);
+  const { agentName } =
+    await buildPage.createAndSaveSimpleAgent("E2E Rerun Agent");
+
+  const libraryPage = new LibraryPage(page);
+  await libraryPage.openSavedAgent(agentName);
+  await clickRunButton(page);
+  await libraryPage.waitForRunToComplete();
+  await dismissFeedbackDialog(page);
+
+  const rerunTaskButton = page.getByRole("button", { name: /Rerun task/i });
+  await expect(rerunTaskButton).toBeVisible({ timeout: 45000 });
+
+  await expect
+    .poll(() => getActiveItemId(page), { timeout: 45000 })
+    .not.toBe(null);
+
+  const initialRunId = getActiveItemId(page);
+  expect(initialRunId).toBeTruthy();
+
+  await rerunTaskButton.click();
+
+  await expect(page.getByText("Run started", { exact: true })).toBeVisible({
+    timeout: 15000,
+  });
+
+  await expect
+    .poll(() => getActiveItemId(page), { timeout: 45000 })
+    .not.toBe(initialRunId);
+
+  await libraryPage.waitForRunToComplete();
+
+  // Simple agent has no AgentOutputBlock — verify run completion only.
+  const runStatus = await libraryPage.getRunStatus();
+  expect(runStatus).toBe("completed");
+});
+
+test("library happy path: user can delete a completed task from the run sidebar", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const buildPage = new BuildPage(page);
+  const { agentName } = await buildPage.createAndSaveSimpleAgent(
+    "E2E Delete Task Agent",
+  );
+
+  const libraryPage = new LibraryPage(page);
+  await libraryPage.openSavedAgent(agentName);
+  await clickRunButton(page);
+  await libraryPage.waitForRunToComplete();
+  await dismissFeedbackDialog(page);
+
+  // Open the per-task actions dropdown ("More actions" three-dot button)
+  // and use the menu's Delete task option to remove the run.
+  const moreActionsButton = page
+    .getByRole("button", { name: "More actions" })
+    .first();
+  await expect(moreActionsButton).toBeVisible({ timeout: 15000 });
+  await moreActionsButton.click();
+
+  await page.getByRole("menuitem", { name: /Delete( this)? task/i }).click();
+
+  const confirmDialog = page.getByRole("dialog", { name: /Delete task/i });
+  await expect(confirmDialog).toBeVisible({ timeout: 10000 });
+  await confirmDialog.getByRole("button", { name: /^Delete Task$/ }).click();
+
+  // Toast confirms the backend actually deleted (not just dialog closed).
+  await expect(page.getByText("Task deleted", { exact: true })).toBeVisible({
+    timeout: 15000,
+  });
+
+  // Sidebar should drop the only run, returning the page to initial
+  // task-entry state.
+  await expect(
+    page.getByRole("button", { name: /^(Setup your task|New task)$/i }),
+  ).toBeVisible({ timeout: 15000 });
+});
+
+test("library happy path: user can open the agent in builder from the exact runner customise-agent path", async ({
+  page,
+  context,
+}) => {
+  test.setTimeout(120000);
+
+  const buildPage = new BuildPage(page);
+  const { agentName } = await buildPage.createAndSaveSimpleAgent(
+    "E2E View Task Agent",
+  );
+
+  const libraryPage = new LibraryPage(page);
+  await libraryPage.openSavedAgent(agentName);
+  await clickRunButton(page);
+  await libraryPage.waitForRunToComplete();
+  await dismissFeedbackDialog(page);
+
+  // The "View task details" eye-icon button on a completed run opens the
+  // agent in the builder in a new tab. This exercises the runner → builder
+  // navigation that QA item #22 ("Customise Agent" from Runner UI) covers.
+  const selectedRunId = getActiveItemId(page);
+  expect(selectedRunId).toBeTruthy();
+
+  const viewTaskButton = page
+    .locator('[aria-label="View task details"]')
+    .first();
+  await expect(viewTaskButton).toBeVisible({ timeout: 15000 });
+  const customiseAgentHref = await viewTaskButton.getAttribute("href");
+  expect(customiseAgentHref).toContain("flowID=");
+  expect(customiseAgentHref).toContain("flowVersion=");
+  expect(customiseAgentHref).toContain(`flowExecutionID=${selectedRunId}`);
+
+  const popupPromise = context.waitForEvent("page", { timeout: 15000 });
+  await viewTaskButton.click();
+  const builderTab = await popupPromise;
+
+  await builderTab.waitForLoadState("domcontentloaded");
+  await expect(builderTab).toHaveURL(/\/build/);
+  await expect(builderTab).toHaveURL(
+    new RegExp(`flowExecutionID=${selectedRunId}`),
+  );
+
+  // Verify the builder canvas actually rendered with the agent's nodes —
+  // a navigation that lands on /build but never paints the graph would
+  // otherwise pass on URL alone.
+  const builderTabPage = new BuildPage(builderTab);
+  await builderTabPage.waitForNodeOnCanvas();
+  expect(await builderTabPage.getNodeCount()).toBeGreaterThan(0);
+
+  await builderTab.close();
+});
diff --git a/autogpt_platform/frontend/src/playwright/marketplace-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/marketplace-happy-path.spec.ts
new file mode 100644
index 0000000000..f81386ea40
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/marketplace-happy-path.spec.ts
@@ -0,0 +1,48 @@
+import { expect, test } from "./coverage-fixture";
+import { E2E_AUTH_STATES } from "./credentials/accounts";
+import {
+  clickRunButton,
+  dismissFeedbackDialog,
+  LibraryPage,
+} from "./pages/library.page";
+import { MarketplacePage } from "./pages/marketplace.page";
+
+test.use({ storageState: E2E_AUTH_STATES.marketplace });
+
+test("marketplace happy path: user can browse Marketplace and open an agent detail page", async ({
+  page,
+}) => {
+  test.setTimeout(90000);
+
+  const marketplacePage = new MarketplacePage(page);
+  await marketplacePage.openFeaturedAgent();
+
+  await expect(page.getByTestId("agent-description")).toBeVisible();
+});
+
+test("marketplace happy path: user can add a Marketplace agent to Library and run it", async ({
+  page,
+}) => {
+  test.setTimeout(120000);
+
+  const marketplacePage = new MarketplacePage(page);
+  await marketplacePage.openRunnableAgent();
+
+  const agentName = await page.getByTestId("agent-title").innerText();
+
+  await page.getByTestId("agent-add-library-button").click();
+  await expect(page.getByText("Redirecting to your library...")).toBeVisible();
+  await expect(page).toHaveURL(/\/library\/agents\//);
+
+  const libraryPage = new LibraryPage(page);
+  await libraryPage.openSavedAgent(agentName);
+  await clickRunButton(page);
+
+  await libraryPage.waitForRunToComplete();
+  await dismissFeedbackDialog(page);
+
+  const runStatus = await libraryPage.getRunStatus();
+  expect(runStatus).toBe("completed");
+  await libraryPage.assertRunProducedOutput();
+  await libraryPage.assertFirstRunOutputValue(/^\d+(?:\.0+)?$/);
+});
diff --git a/autogpt_platform/frontend/src/tests/pages/base.page.ts b/autogpt_platform/frontend/src/playwright/pages/base.page.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/pages/base.page.ts
rename to autogpt_platform/frontend/src/playwright/pages/base.page.ts
diff --git a/autogpt_platform/frontend/src/playwright/pages/build.page.ts b/autogpt_platform/frontend/src/playwright/pages/build.page.ts
new file mode 100644
index 0000000000..7c3649201f
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/pages/build.page.ts
@@ -0,0 +1,642 @@
+import { randomUUID } from "crypto";
+import { expect, Locator, Page } from "@playwright/test";
+import { BasePage } from "./base.page";
+
+export function createUniqueAgentName(prefix: string): string {
+  return `${prefix} ${Date.now()}-${randomUUID().slice(0, 8)}`;
+}
+
+function escapeRegex(text: string): string {
+  return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+export class BuildPage extends BasePage {
+  constructor(page: Page) {
+    super(page);
+  }
+
+  // --- Navigation ---
+
+  async goto(): Promise<void> {
+    await this.page.goto("/build");
+    await this.page.waitForLoadState("domcontentloaded");
+  }
+
+  async isLoaded(): Promise<boolean> {
+    try {
+      await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
+      await this.page
+        .locator(".react-flow")
+        .waitFor({ state: "visible", timeout: 10_000 });
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  async closeTutorial(): Promise<void> {
+    try {
+      await this.page
+        .getByRole("button", { name: "Skip Tutorial", exact: true })
+        .click({ timeout: 3000 });
+    } catch {
+      // Tutorial not shown or already dismissed
+    }
+  }
+
+  // --- Block Menu ---
+
+  async openBlocksPanel(): Promise<void> {
+    const popoverContent = this.page.locator(
+      '[data-id="blocks-control-popover-content"]',
+    );
+    if (!(await popoverContent.isVisible())) {
+      await this.page.getByTestId("blocks-control-blocks-button").click();
+      await popoverContent.waitFor({ state: "visible", timeout: 5000 });
+    }
+  }
+
+  async closeBlocksPanel(): Promise<void> {
+    const popoverContent = this.page.locator(
+      '[data-id="blocks-control-popover-content"]',
+    );
+    if (await popoverContent.isVisible()) {
+      await this.page.getByTestId("blocks-control-blocks-button").click();
+      await popoverContent.waitFor({ state: "hidden", timeout: 5000 });
+    }
+  }
+
+  async searchBlock(searchTerm: string): Promise<void> {
+    const searchInput = this.page.locator(
+      '[data-id="blocks-control-search-bar"] input[type="text"]',
+    );
+    await searchInput.clear();
+    await searchInput.fill(searchTerm);
+    await expect(searchInput).toHaveValue(searchTerm);
+  }
+
+  private getBlockCardByName(name: string): Locator {
+    const escapedName = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+    const exactName = new RegExp(`^\\s*${escapedName}\\s*$`, "i");
+    return this.page
+      .locator('[data-id^="block-card-"]')
+      .filter({ has: this.page.locator("span", { hasText: exactName }) })
+      .first();
+  }
+
+  async addBlockByClick(searchTerm: string): Promise<void> {
+    await this.openBlocksPanel();
+    const blockCard = this.getBlockCardByName(searchTerm);
+
+    for (let attempt = 0; attempt < 2; attempt++) {
+      await this.searchBlock(searchTerm);
+
+      const cardVisible = await blockCard
+        .waitFor({
+          state: "visible",
+          timeout: attempt === 0 ? 15000 : 5000,
+        })
+        .then(() => true)
+        .catch(() => false);
+
+      if (cardVisible) {
+        break;
+      }
+    }
+
+    await expect(blockCard).toBeVisible({ timeout: 5000 });
+    await blockCard.click();
+
+    // Close the panel so it doesn't overlay the canvas
+    await this.closeBlocksPanel();
+  }
+
+  async dragBlockToCanvas(searchTerm: string): Promise<void> {
+    await this.openBlocksPanel();
+    await this.searchBlock(searchTerm);
+
+    const anyCard = this.page.locator('[data-id^="block-card-"]').first();
+    await anyCard.waitFor({ state: "visible", timeout: 10000 });
+
+    const blockCard = this.getBlockCardByName(searchTerm);
+    await blockCard.waitFor({ state: "visible", timeout: 5000 });
+
+    const canvas = this.page.locator(".react-flow__pane").first();
+    await blockCard.dragTo(canvas);
+  }
+
+  // --- Nodes on Canvas ---
+
+  getNodeLocator(index?: number): Locator {
+    const locator = this.page.locator('[data-id^="custom-node-"]');
+    return index !== undefined ? locator.nth(index) : locator;
+  }
+
+  getNodeLocatorByTitle(title: string): Locator {
+    const exactTitle = new RegExp(`^\\s*${escapeRegex(title)}\\s*$`, "i");
+    return this.page
+      .locator('[data-id^="custom-node-"]')
+      .filter({ has: this.page.getByText(exactTitle) })
+      .first();
+  }
+
+  getNodeTextInputs(nodeTitle: string): Locator {
+    return this.getNodeLocatorByTitle(nodeTitle).locator(
+      'input[placeholder="Enter string value..."]:visible',
+    );
+  }
+
+  getNodeTextInput(nodeTitle: string, inputIndex = 0): Locator {
+    return this.getNodeTextInputs(nodeTitle).nth(inputIndex);
+  }
+
+  async fillNodeTextInput(
+    nodeTitle: string,
+    value: string,
+    inputIndex = 0,
+  ): Promise<void> {
+    const node = this.getNodeLocatorByTitle(nodeTitle);
+    await expect(node).toBeVisible({ timeout: 15000 });
+    await expect
+      .poll(async () => await this.getNodeTextInputs(nodeTitle).count(), {
+        timeout: 15000,
+      })
+      .toBeGreaterThan(inputIndex);
+    const input = this.getNodeTextInput(nodeTitle, inputIndex);
+    await input.scrollIntoViewIfNeeded();
+    await input.fill(value);
+  }
+
+  async fillLastNodeTextInput(nodeTitle: string, value: string): Promise<void> {
+    const node = this.getNodeLocatorByTitle(nodeTitle);
+    await expect(node).toBeVisible({ timeout: 15000 });
+    await expect
+      .poll(async () => await this.getNodeTextInputs(nodeTitle).count(), {
+        timeout: 15000,
+      })
+      .toBeGreaterThan(0);
+    const input = this.getNodeTextInputs(nodeTitle).last();
+    await input.scrollIntoViewIfNeeded();
+    await input.fill(value);
+  }
+
+  async getNodeCount(): Promise<number> {
+    return await this.getNodeLocator().count();
+  }
+
+  async waitForNodeOnCanvas(expectedCount?: number): Promise<void> {
+    if (expectedCount !== undefined) {
+      await expect(this.getNodeLocator()).toHaveCount(expectedCount, {
+        timeout: 10000,
+      });
+    } else {
+      await this.getNodeLocator()
+        .first()
+        .waitFor({ state: "visible", timeout: 10000 });
+    }
+  }
+
+  async selectNode(index: number = 0): Promise<void> {
+    const node = this.getNodeLocator(index);
+    await node.click();
+  }
+
+  async selectAllNodes(): Promise<void> {
+    await this.page.locator(".react-flow__pane").first().click();
+    const isMac = process.platform === "darwin";
+    await this.page.keyboard.press(isMac ? "Meta+a" : "Control+a");
+  }
+
+  async deleteSelectedNodes(): Promise<void> {
+    await this.page.keyboard.press("Backspace");
+  }
+
+  // --- Connections (Edges) ---
+
+  async connectNodes(
+    sourceNodeIndex: number,
+    targetNodeIndex: number,
+  ): Promise<void> {
+    // Get the node wrapper elements to scope handle search
+    const sourceNode = this.getNodeLocator(sourceNodeIndex);
+    const targetNode = this.getNodeLocator(targetNodeIndex);
+
+    // ReactFlow renders Handle components as .react-flow__handle elements
+    // Output handles have class .react-flow__handle-right (Position.Right)
+    // Input handles have class .react-flow__handle-left (Position.Left)
+    const sourceHandle = sourceNode
+      .locator(".react-flow__handle-right")
+      .first();
+    const targetHandle = targetNode.locator(".react-flow__handle-left").first();
+
+    // Get precise center coordinates using evaluate to avoid CSS transform issues
+    const getHandleCenter = async (locator: Locator) => {
+      const el = await locator.elementHandle();
+      if (!el) throw new Error("Handle element not found");
+      const rect = await el.evaluate((node) => {
+        const r = node.getBoundingClientRect();
+        return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
+      });
+      return rect;
+    };
+
+    const source = await getHandleCenter(sourceHandle);
+    const target = await getHandleCenter(targetHandle);
+
+    // ReactFlow requires a proper drag sequence with intermediate moves
+    await this.page.mouse.move(source.x, source.y);
+    await this.page.mouse.down();
+    // Move in steps to trigger ReactFlow's connection detection
+    const steps = 20;
+    for (let i = 1; i <= steps; i++) {
+      const ratio = i / steps;
+      await this.page.mouse.move(
+        source.x + (target.x - source.x) * ratio,
+        source.y + (target.y - source.y) * ratio,
+      );
+    }
+    await this.page.mouse.up();
+  }
+
+  async getEdgeCount(): Promise<number> {
+    return await this.page.locator(".react-flow__edge").count();
+  }
+
+  // --- Save ---
+
+  async saveAgent(
+    name: string = "Test Agent",
+    description: string = "",
+  ): Promise<void> {
+    await this.page.getByTestId("save-control-save-button").click();
+
+    const nameInput = this.page.getByTestId("save-control-name-input");
+    await nameInput.waitFor({ state: "visible", timeout: 5000 });
+    await nameInput.fill(name);
+
+    if (description) {
+      await this.page
+        .getByTestId("save-control-description-input")
+        .fill(description);
+    }
+
+    await this.page.getByTestId("save-control-save-agent-button").click();
+  }
+
+  async waitForSaveComplete(): Promise<void> {
+    await expect(this.page).toHaveURL(/flowID=/, { timeout: 15000 });
+  }
+
+  async waitForSaveButton(): Promise<void> {
+    await this.page.waitForSelector(
+      '[data-testid="save-control-save-button"]:not([disabled])',
+      { timeout: 10000 },
+    );
+  }
+
+  // --- Run ---
+
+  async isRunButtonEnabled(): Promise<boolean> {
+    const runButton = this.page.locator('[data-id="run-graph-button"]');
+    return await runButton.isEnabled();
+  }
+
+  async clickRunButton(): Promise<void> {
+    // Dismiss any post-save toast that may be intercepting pointer events on
+    // the run button. Actively close it rather than waiting for Sonner's
+    // default auto-dismiss — the auto-dismiss + fade-out routinely runs over
+    // 5s and caused flakes here. The toast is optional (only after save), so
+    // the dismissal is guarded.
+    await this.dismissSaveToast();
+    const runButton = this.page.locator('[data-id="run-graph-button"]');
+    await runButton.click();
+  }
+
+  // --- Undo / Redo ---
+
+  async isUndoEnabled(): Promise<boolean> {
+    const btn = this.page.locator('[data-id="undo-button"]');
+    return !(await btn.isDisabled());
+  }
+
+  async isRedoEnabled(): Promise<boolean> {
+    const btn = this.page.locator('[data-id="redo-button"]');
+    return !(await btn.isDisabled());
+  }
+
+  async clickUndo(): Promise<void> {
+    await this.page.locator('[data-id="undo-button"]').click();
+  }
+
+  async clickRedo(): Promise<void> {
+    await this.page.locator('[data-id="redo-button"]').click();
+  }
+
+  // --- Copy / Paste ---
+
+  async copyViaKeyboard(): Promise<void> {
+    const isMac = process.platform === "darwin";
+    await this.page.keyboard.press(isMac ? "Meta+c" : "Control+c");
+  }
+
+  async pasteViaKeyboard(): Promise<void> {
+    const isMac = process.platform === "darwin";
+    await this.page.keyboard.press(isMac ? "Meta+v" : "Control+v");
+  }
+
+  // --- Helpers ---
+
+  async fillBlockInputByPlaceholder(
+    placeholder: string,
+    value: string,
+    nodeIndex: number = 0,
+  ): Promise<void> {
+    const node = this.getNodeLocator(nodeIndex);
+    const input = node.getByPlaceholder(placeholder);
+    await input.fill(value);
+  }
+
+  async clickCanvas(): Promise<void> {
+    const pane = this.page.locator(".react-flow__pane").first();
+    const box = await pane.boundingBox();
+    if (box) {
+      // Click in the center of the canvas to avoid sidebar/toolbar overlaps
+      await pane.click({
+        position: { x: box.width / 2, y: box.height / 2 },
+      });
+    } else {
+      await pane.click();
+    }
+  }
+
+  getPlaywrightPage(): Page {
+    return this.page;
+  }
+
+  getSavedGraphRef(): { graphId: string; graphVersion: number } {
+    const currentUrl = new URL(this.page.url());
+    const graphId = currentUrl.searchParams.get("flowID");
+    const graphVersion = Number(currentUrl.searchParams.get("flowVersion"));
+
+    if (!graphId || Number.isNaN(graphVersion)) {
+      throw new Error(
+        `Saved graph reference missing from builder URL: ${this.page.url()}`,
+      );
+    }
+
+    return { graphId, graphVersion };
+  }
+
+  async createDummyAgent(): Promise<void> {
+    await this.closeTutorial();
+    await this.addBlockByClick("Add to Dictionary");
+    await this.waitForNodeOnCanvas(1);
+    await this.saveAgent("Test Agent", "Test Description");
+    await this.waitForSaveComplete();
+  }
+
+  // --- Happy-path flows shared across PR smoke specs ---
+
+  async open(): Promise<void> {
+    await this.goto();
+    await this.closeTutorial();
+    await expect(this.page.locator(".react-flow")).toBeVisible({
+      timeout: 15000,
+    });
+    await expect(
+      this.page.getByTestId("blocks-control-blocks-button"),
+    ).toBeVisible({ timeout: 15000 });
+  }
+
+  async addSimpleAgentBlocks(): Promise<void> {
+    await this.addBlockByClick("Store Value");
+    await this.waitForNodeOnCanvas(1);
+    await this.fillBlockInputByPlaceholder(
+      "Enter string value...",
+      "smoke-value",
+      0,
+    );
+
+    await this.addBlockByClick("Add to Dictionary");
+    await this.waitForNodeOnCanvas(2);
+
+    await this.fillNodeTextInput("Add to Dictionary", "smoke-key", 0);
+    await this.fillNodeTextInput("Add to Dictionary", "smoke-value", 1);
+
+    // Connect Store Value's output to Add to Dictionary so the graph has a
+    // real edge and actually produces output when run. Without this edge the
+    // graph runs but emits no output, and `assertRunProducedOutput` rightly
+    // fails — catching exactly the "I forgot to connect the blocks" bug
+    // manual QA would catch.
+    await this.connectNodes(0, 1);
+  }
+
+  async createAndSaveSimpleAgent(
+    prefix: string,
+  ): Promise<{ agentName: string; graphId: string; graphVersion: number }> {
+    await this.open();
+    const agentName = createUniqueAgentName(prefix);
+
+    await this.addSimpleAgentBlocks();
+    await this.saveAgent(agentName, "PR E2E builder coverage");
+    await this.waitForSaveComplete();
+    await this.waitForSaveButton();
+    const { graphId, graphVersion } = this.getSavedGraphRef();
+
+    return { agentName, graphId, graphVersion };
+  }
+
+  async dismissSaveToast(): Promise<void> {
+    const closeToastButton = this.page.getByRole("button", {
+      name: "Close toast",
+    });
+    // Toast is optional — only shown after a save action
+    if (await closeToastButton.isVisible({ timeout: 1000 })) {
+      await closeToastButton.click();
+    }
+
+    // If the toast appeared but is not yet hidden, wait for it. If it never
+    // appeared at all the locator is simply hidden already — no-op.
+    const savedToast = this.page.getByText("Graph saved successfully");
+    if (await savedToast.isVisible({ timeout: 500 })) {
+      await expect(savedToast).toBeHidden({ timeout: 10000 });
+    }
+  }
+
+  async startRun(): Promise<void> {
+    await this.clickRunButton();
+
+    // The run-input dialog is optional — agents without required inputs skip it
+    const runDialog = this.page.locator('[data-id="run-input-dialog-content"]');
+    if (await runDialog.isVisible({ timeout: 5000 })) {
+      await this.page
+        .locator('[data-id="run-input-manual-run-button"]')
+        .click();
+    }
+  }
+
+  async getExecutionState(): Promise<"running" | "idle" | "unknown"> {
+    const stopButton = this.page.locator('[data-id="stop-graph-button"]');
+    if (await stopButton.isVisible().catch(() => false)) {
+      return "running";
+    }
+
+    const runButton = this.page.locator('[data-id="run-graph-button"]');
+    if (await runButton.isVisible().catch(() => false)) {
+      return "idle";
+    }
+
+    return "unknown";
+  }
+
+  // --- Tutorial (Shepherd.js tour) ---
+
+  // Each Shepherd step's <h3> title has id="<stepId>-label"; using it avoids
+  // title-overlap collisions like "Open the Block Menu" vs "The Block Menu".
+  private getShepherdStep(stepId: string): Locator {
+    return this.page.locator(`#${stepId}-label`);
+  }
+
+  // Scope to .shepherd-enabled so we don't click buttons on hidden-but-still-
+  // attached previous steps.
+  private getShepherdButton(name: string | RegExp): Locator {
+    return this.page
+      .locator(".shepherd-element.shepherd-enabled")
+      .getByRole("button", { name });
+  }
+
+  async startTutorial(): Promise<void> {
+    // Tutorial only starts from pristine /build; a flowID query param routes
+    // the tutorial button to /build?view=new instead.
+    await this.page.goto("/build");
+    await this.page.waitForLoadState("domcontentloaded");
+    await expect(this.page.locator(".react-flow")).toBeVisible({
+      timeout: 15000,
+    });
+
+    await this.page.evaluate(() => {
+      window.localStorage.removeItem("shepherd-tour");
+    });
+
+    const tutorialButton = this.page.locator('[data-id="tutorial-button"]');
+    await expect(tutorialButton).toBeVisible({ timeout: 15000 });
+    await expect(tutorialButton).toBeEnabled({ timeout: 15000 });
+    await tutorialButton.click();
+
+    await expect(this.getShepherdStep("welcome")).toBeVisible({
+      timeout: 15000,
+    });
+  }
+
+  async walkWelcomeToBlockMenu(): Promise<void> {
+    await this.getShepherdButton("Let's Begin").click();
+
+    await expect(this.getShepherdStep("open-block-menu")).toBeVisible({
+      timeout: 10000,
+    });
+    await this.page
+      .locator('[data-id="blocks-control-popover-trigger"]')
+      .click();
+
+    await expect(this.getShepherdStep("block-menu-overview")).toBeVisible({
+      timeout: 10000,
+    });
+    await this.getShepherdButton("Next").click();
+  }
+
+  async walkSearchAndAddCalculator(): Promise<void> {
+    // search-calculator auto-advances once the Calculator block card appears
+    // in the filtered results; select-calculator auto-advances once the
+    // Calculator is added to the node store.
+    await expect(this.getShepherdStep("search-calculator")).toBeVisible({
+      timeout: 10000,
+    });
+    await this.page
+      .locator('[data-id="blocks-control-search-bar"] input[type="text"]')
+      .fill("Calculator");
+
+    const calculatorCard = this.page.locator(
+      '[data-id="blocks-control-search-results"] [data-id="block-card-b1ab9b1967a6406dabf52dba76d00c79"]',
+    );
+    await expect(calculatorCard).toBeVisible({ timeout: 15000 });
+
+    await expect(this.getShepherdStep("select-calculator")).toBeVisible({
+      timeout: 15000,
+    });
+    await calculatorCard.scrollIntoViewIfNeeded();
+    await calculatorCard.click();
+
+    await expect(this.getShepherdStep("focus-new-block")).toBeVisible({
+      timeout: 10000,
+    });
+    await this.waitForNodeOnCanvas(1);
+  }
+
+  // Use dispatchEvent — the Shepherd cancel icon sits inside a step that's
+  // pinned to an off-screen React Flow node, so Playwright's visibility
+  // checks reject a normal click. A synthetic click event still triggers
+  // tour.cancel() via Shepherd's listener.
+  async cancelTutorial(): Promise<void> {
+    await this.page
+      .locator(".shepherd-element.shepherd-enabled .shepherd-cancel-icon")
+      .first()
+      .dispatchEvent("click");
+    await expect(
+      this.page.locator(".shepherd-element.shepherd-enabled"),
+    ).toHaveCount(0, { timeout: 10000 });
+  }
+
+  // NOTE: welcome.ts "Skip Tutorial" only calls handleTutorialSkip, which
+  // writes localStorage but does NOT call tour.cancel(). The tour UI stays
+  // open — the skip state is persisted so the next /build visit knows the
+  // user already dismissed the tour. Callers that want the UI closed must
+  // also call cancelTutorial().
+  async skipTutorialFromWelcome(): Promise<void> {
+    await expect(this.getShepherdStep("welcome")).toBeVisible({
+      timeout: 10000,
+    });
+    await this.getShepherdButton(/Skip Tutorial/i).click();
+    await expect
+      .poll(() => this.getTutorialStateFromStorage(), { timeout: 5000 })
+      .toBe("skipped");
+  }
+
+  async getTutorialStateFromStorage(): Promise<string | null> {
+    return this.page.evaluate(() =>
+      window.localStorage.getItem("shepherd-tour"),
+    );
+  }
+
+  async createScheduleForSavedAgent(agentName: string): Promise<void> {
+    await this.dismissSaveToast();
+
+    const { graphId, graphVersion } = this.getSavedGraphRef();
+    const scheduleName = `Daily ${agentName}`;
+    const scheduleCreateUrl = `/api/proxy/api/graphs/${graphId}/schedules`;
+    const timeoutAt = Date.now() + 45000;
+    let lastFailure = "schedule request did not run";
+
+    while (Date.now() < timeoutAt) {
+      const createResponse = await this.page.request.post(scheduleCreateUrl, {
+        data: {
+          name: scheduleName,
+          graph_version: graphVersion,
+          cron: "0 10 * * *",
+          inputs: {},
+          credentials: {},
+          timezone: "UTC",
+        },
+      });
+
+      const createResponseBody = await createResponse.text();
+      if (createResponse.ok()) {
+        return;
+      }
+
+      lastFailure = `${createResponse.status()} ${createResponseBody}`;
+      await this.page.waitForTimeout(1000);
+    }
+
+    throw new Error(`schedule creation API should succeed: ${lastFailure}`);
+  }
+}
diff --git a/autogpt_platform/frontend/src/playwright/pages/copilot.page.ts b/autogpt_platform/frontend/src/playwright/pages/copilot.page.ts
new file mode 100644
index 0000000000..d67e20ef6e
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/pages/copilot.page.ts
@@ -0,0 +1,44 @@
+import { expect, Locator, Page } from "@playwright/test";
+import { BasePage } from "./base.page";
+
+export class CopilotPage extends BasePage {
+  constructor(page: Page) {
+    super(page);
+  }
+
+  async open(sessionId?: string): Promise<void> {
+    const url = sessionId ? `/copilot?sessionId=${sessionId}` : "/copilot";
+    await this.page.goto(url);
+    await expect(this.page).toHaveURL(/\/copilot/);
+    await this.dismissNotificationPrompt();
+  }
+
+  async dismissNotificationPrompt(): Promise<void> {
+    // Notification permission prompt is optional — only shown on first visit
+    const notNowButton = this.page.getByRole("button", { name: "Not now" });
+    if (await notNowButton.isVisible({ timeout: 3000 })) {
+      await notNowButton.click();
+    }
+  }
+
+  async createSessionViaApi(): Promise<string> {
+    const response = await this.page.request.post(
+      "/api/proxy/api/chat/sessions",
+      { data: null },
+    );
+    expect(response.ok()).toBeTruthy();
+
+    const session = await response.json();
+    const sessionId = session?.id;
+    expect(sessionId).toBeTruthy();
+    return sessionId as string;
+  }
+
+  getChatInput(): Locator {
+    return this.page.locator("#chat-input-session");
+  }
+
+  async waitForChatInput(): Promise<void> {
+    await expect(this.getChatInput()).toBeVisible({ timeout: 15000 });
+  }
+}
diff --git a/autogpt_platform/frontend/src/tests/pages/header.page.ts b/autogpt_platform/frontend/src/playwright/pages/header.page.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/pages/header.page.ts
rename to autogpt_platform/frontend/src/playwright/pages/header.page.ts
diff --git a/autogpt_platform/frontend/src/playwright/pages/library.page.ts b/autogpt_platform/frontend/src/playwright/pages/library.page.ts
new file mode 100644
index 0000000000..85c3f3978a
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/pages/library.page.ts
@@ -0,0 +1,1342 @@
+import { expect, Locator, Page } from "@playwright/test";
+import { getSeededTestUser } from "../credentials/accounts";
+import { getSelectors } from "../utils/selectors";
+import { BasePage } from "./base.page";
+
+export interface Agent {
+  id: string;
+  name: string;
+  description: string;
+  imageUrl?: string;
+  seeRunsUrl: string;
+  openInBuilderUrl: string;
+}
+
+export class LibraryPage extends BasePage {
+  constructor(page: Page) {
+    super(page);
+  }
+
+  async isLoaded(): Promise<boolean> {
+    console.log(`checking if library page is loaded`);
+    try {
+      await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
+
+      await this.page.waitForSelector('[data-testid="library-textbox"]', {
+        state: "visible",
+        timeout: 10_000,
+      });
+
+      console.log("Library page is loaded successfully");
+      return true;
+    } catch (error) {
+      console.log("Library page failed to load:", error);
+      return false;
+    }
+  }
+
+  async navigateToLibrary(): Promise<void> {
+    await this.page.goto("/library");
+    await this.isLoaded();
+  }
+
+  async openSavedAgent(agentName: string): Promise<void> {
+    await openSavedAgentInLibrary(this.page, agentName);
+  }
+
+  async waitForRunToComplete(timeout = 45000): Promise<void> {
+    await waitForRunToComplete(this.page, timeout);
+  }
+
+  async getRunStatus(): Promise<string> {
+    return getRunStatus(this.page);
+  }
+
+  async assertRunProducedOutput(timeout = 15000): Promise<void> {
+    await assertRunProducedOutput(this.page, timeout);
+  }
+
+  async assertRunOutputValue(
+    outputName: string,
+    expectedValue: RegExp | string,
+    timeout = 15000,
+  ): Promise<void> {
+    await assertRunOutputValue(this.page, outputName, expectedValue, timeout);
+  }
+
+  async assertFirstRunOutputValue(
+    expectedValue: RegExp | string,
+    timeout = 15000,
+  ): Promise<void> {
+    await assertRunOutputContainsText(this.page, expectedValue, timeout);
+  }
+
+  async clickExportAgent(): Promise<void> {
+    await clickExportAgent(this.page);
+  }
+
+  async getAgentCount(): Promise<number> {
+    const { getId } = getSelectors(this.page);
+    const countText = await getId("agents-count").textContent();
+    const match = countText?.match(/^(\d+)/);
+    return match ? parseInt(match[1], 10) : 0;
+  }
+
+  async getAgentCountByListLength(): Promise<number> {
+    const { getId } = getSelectors(this.page);
+    const agentCards = await getId("library-agent-card").all();
+    return agentCards.length;
+  }
+
+  async searchAgents(searchTerm: string): Promise<void> {
+    console.log(`searching for agents with term: ${searchTerm}`);
+    const { getRole } = getSelectors(this.page);
+    const searchInput = getRole("textbox", "Search agents");
+    await searchInput.fill(searchTerm);
+    await expect(searchInput).toHaveValue(searchTerm);
+  }
+
+  async clearSearch(): Promise<void> {
+    console.log(`clearing search`);
+    // Look for the clear button (X icon)
+    const clearButton = this.page.locator(".lucide.lucide-x");
+    const searchInput = this.page.getByRole("textbox", {
+      name: "Search agents",
+    });
+    if (await clearButton.isVisible()) {
+      await clearButton.click();
+    } else {
+      // If no clear button, clear the search input directly
+      await searchInput.fill("");
+    }
+    await expect(searchInput).toHaveValue("");
+  }
+
+  async selectSortOption(
+    page: Page,
+    sortOption: "Creation Date" | "Last Modified",
+  ): Promise<void> {
+    const { getRole } = getSelectors(page);
+    await getRole("combobox").click();
+
+    await getRole("option", sortOption).click();
+  }
+
+  async getCurrentSortOption(): Promise<string> {
+    console.log(`getting current sort option`);
+    try {
+      const sortCombobox = this.page.getByRole("combobox");
+      const currentOption = await sortCombobox.textContent();
+      return currentOption?.trim() || "";
+    } catch (error) {
+      console.error("Error getting current sort option:", error);
+      return "";
+    }
+  }
+
+  async openUploadDialog(): Promise<void> {
+    console.log(`opening upload dialog`);
+    // Open the unified Import dialog first
+    await this.page.getByRole("button", { name: "Import" }).click();
+
+    // Wait for dialog to appear
+    await this.page.getByRole("dialog", { name: "Import" }).waitFor({
+      state: "visible",
+      timeout: 5_000,
+    });
+
+    // Click the "AutoGPT agent" tab
+    await this.page.getByRole("tab", { name: "AutoGPT agent" }).click();
+  }
+
+  async closeUploadDialog(): Promise<void> {
+    await this.page.getByRole("button", { name: "Close" }).click();
+
+    await this.page.getByRole("dialog", { name: "Import" }).waitFor({
+      state: "hidden",
+      timeout: 5_000,
+    });
+  }
+
+  async isUploadDialogVisible(): Promise<boolean> {
+    console.log(`checking if upload dialog is visible`);
+    try {
+      const dialog = this.page.getByRole("dialog", { name: "Import" });
+      return await dialog.isVisible();
+    } catch {
+      return false;
+    }
+  }
+
+  async fillUploadForm(agentName: string, description: string): Promise<void> {
+    console.log(
+      `filling upload form with name: ${agentName}, description: ${description}`,
+    );
+
+    // Fill agent name
+    await this.page
+      .getByRole("textbox", { name: "Agent name" })
+      .fill(agentName);
+
+    // Fill description
+    await this.page
+      .getByRole("textbox", { name: "Agent description" })
+      .fill(description);
+  }
+
+  async isUploadButtonEnabled(): Promise<boolean> {
+    console.log(`checking if upload button is enabled`);
+    try {
+      const uploadButton = this.page.getByRole("button", {
+        name: "Upload",
+      });
+      return await uploadButton.isEnabled();
+    } catch {
+      return false;
+    }
+  }
+
+  async getAgents(): Promise<Agent[]> {
+    const { getId } = getSelectors(this.page);
+    const agents: Agent[] = [];
+
+    await getId("library-agent-card")
+      .first()
+      .waitFor({ state: "visible", timeout: 10_000 });
+    const agentCards = await getId("library-agent-card").all();
+
+    for (const card of agentCards) {
+      const name = await getId("library-agent-card-name", card).textContent();
+      const seeRunsLink = getId("library-agent-card-see-runs-link", card);
+      const openInBuilderLink = getId(
+        "library-agent-card-open-in-builder-link",
+        card,
+      );
+
+      const seeRunsUrl = await seeRunsLink.getAttribute("href");
+
+      // Check if the "Open in builder" link exists before getting its href
+      const openInBuilderLinkCount = await openInBuilderLink.count();
+      const openInBuilderUrl =
+        openInBuilderLinkCount > 0
+          ? await openInBuilderLink.getAttribute("href")
+          : null;
+
+      if (name && seeRunsUrl) {
+        const idMatch = seeRunsUrl.match(/\/library\/agents\/([^\/]+)/);
+        const id = idMatch ? idMatch[1] : "";
+
+        agents.push({
+          id,
+          name: name.trim(),
+          description: "", // Description is not currently rendered in the card
+          seeRunsUrl,
+          openInBuilderUrl: openInBuilderUrl || "",
+        });
+      }
+    }
+
+    console.log(`found ${agents.length} agents`);
+    return agents;
+  }
+
+  async clickAgent(agent: Agent): Promise<void> {
+    const { getId } = getSelectors(this.page);
+    const nameElement = getId("library-agent-card-name").filter({
+      hasText: agent.name,
+    });
+    await nameElement.first().click();
+  }
+
+  async clickSeeRuns(agent: Agent): Promise<void> {
+    console.log(`clicking see runs for agent: ${agent.name}`);
+
+    const { getId } = getSelectors(this.page);
+    const agentCard = getId("library-agent-card").filter({
+      hasText: agent.name,
+    });
+    const seeRunsLink = getId("library-agent-card-see-runs-link", agentCard);
+    await seeRunsLink.first().click();
+  }
+
+  async clickOpenInBuilder(agent: Agent): Promise<void> {
+    console.log(`clicking open in builder for agent: ${agent.name}`);
+
+    const { getId } = getSelectors(this.page);
+    const agentCard = getId("library-agent-card").filter({
+      hasText: agent.name,
+    });
+    const builderLink = getId(
+      "library-agent-card-open-in-builder-link",
+      agentCard,
+    );
+    await builderLink.first().click();
+  }
+
+  async waitForAgentsToLoad(): Promise<void> {
+    const { getId } = getSelectors(this.page);
+    await expect
+      .poll(
+        async () => {
+          const [agentCardVisible, agentsCountVisible] = await Promise.all([
+            getId("library-agent-card")
+              .first()
+              .isVisible()
+              .catch(() => false),
+            getId("agents-count")
+              .isVisible()
+              .catch(() => false),
+          ]);
+
+          return agentCardVisible || agentsCountVisible;
+        },
+        { timeout: 10_000 },
+      )
+      .toBe(true);
+  }
+
+  async getSearchValue(): Promise<string> {
+    console.log(`getting search input value`);
+    try {
+      const searchInput = this.page.getByRole("textbox", {
+        name: "Search agents",
+      });
+      return await searchInput.inputValue();
+    } catch {
+      return "";
+    }
+  }
+
+  async hasNoAgentsMessage(): Promise<boolean> {
+    const { getText } = getSelectors(this.page);
+    const noAgentsText = getText("0 agents");
+    return noAgentsText.isVisible();
+  }
+
+  async scrollToBottom(): Promise<void> {
+    console.log(`scrolling to bottom to trigger pagination`);
+    await this.page.keyboard.press("End");
+  }
+
+  async scrollDown(): Promise<void> {
+    console.log(`scrolling down to trigger pagination`);
+    await this.page.keyboard.press("PageDown");
+  }
+
+  // Returns true if more agents loaded, false if we're on the last page.
+  // Callers must distinguish these cases so a broken pagination pipeline
+  // doesn't quietly look like "we reached the end".
+  async scrollToLoadMore(): Promise<boolean> {
+    const initialCount = await this.getAgentCountByListLength();
+    console.log(`Initial agent count (DOM cards): ${initialCount}`);
+
+    await this.scrollToBottom();
+
+    try {
+      await this.page.waitForFunction(
+        (prevCount) =>
+          document.querySelectorAll('[data-testid="library-agent-card"]')
+            .length > prevCount,
+        initialCount,
+        { timeout: 10000 },
+      );
+      return true;
+    } catch {
+      // No new cards — caller should verify this is actually the last page
+      // (e.g., by comparing against `getAgentCount()`), not a broken fetch.
+      return false;
+    }
+  }
+
+  async testPagination(): Promise<{
+    initialCount: number;
+    finalCount: number;
+    hasMore: boolean;
+  }> {
+    const initialCount = await this.getAgentCountByListLength();
+    await this.scrollToLoadMore();
+    const finalCount = await this.getAgentCountByListLength();
+
+    const hasMore = finalCount > initialCount;
+    return {
+      initialCount,
+      finalCount,
+      hasMore,
+    };
+  }
+
+  async getAgentsWithPagination(): Promise<Agent[]> {
+    console.log(`getting all agents with pagination`);
+
+    let allAgents: Agent[] = [];
+    let previousCount = 0;
+    let currentCount = 0;
+    const maxAttempts = 5; // Prevent infinite loop
+    let attempts = 0;
+
+    do {
+      previousCount = currentCount;
+
+      // Get current agents
+      const currentAgents = await this.getAgents();
+      allAgents = currentAgents;
+      currentCount = currentAgents.length;
+
+      console.log(`Attempt ${attempts + 1}: Found ${currentCount} agents`);
+
+      // Try to load more by scrolling
+      await this.scrollToLoadMore();
+
+      attempts++;
+    } while (currentCount > previousCount && attempts < maxAttempts);
+
+    console.log(`Total agents found with pagination: ${allAgents.length}`);
+    return allAgents;
+  }
+
+  async waitForPaginationLoad(): Promise<void> {
+    // Wait until the agent count header stops changing. Poll every 500ms
+    // and declare stable after two consecutive equal reads, capped at 10s.
+    // The previous implementation had no delay between reads and so hit
+    // "stable" instantly — effectively a no-op.
+    const deadline = Date.now() + 10000;
+    let previousCount = -1;
+    let stableChecks = 0;
+
+    while (Date.now() < deadline && stableChecks < 2) {
+      const currentCount = await this.getAgentCount();
+      if (currentCount === previousCount) {
+        stableChecks += 1;
+      } else {
+        stableChecks = 0;
+        previousCount = currentCount;
+      }
+      await this.page.waitForTimeout(500);
+    }
+  }
+
+  async scrollAndWaitForNewAgents(): Promise<number> {
+    const initialCount = await this.getAgentCountByListLength();
+
+    await this.scrollDown();
+
+    await this.waitForPaginationLoad();
+
+    const finalCount = await this.getAgentCountByListLength();
+    const newAgentsLoaded = finalCount - initialCount;
+
+    console.log(
+      `Loaded ${newAgentsLoaded} new agents (${initialCount} -> ${finalCount})`,
+    );
+
+    return newAgentsLoaded;
+  }
+
+  async isPaginationWorking(): Promise<boolean> {
+    const newAgentsLoaded = await this.scrollAndWaitForNewAgents();
+    return newAgentsLoaded > 0;
+  }
+}
+
+// Locator functions
+export function getLibraryTab(page: Page): Locator {
+  return page.locator('a[href="/library"]');
+}
+
+export function getAgentCards(page: Page): Locator {
+  return page.getByTestId("library-agent-card");
+}
+
+export function getNewRunButton(page: Page): Locator {
+  return page.getByRole("button", { name: "New run" });
+}
+
+export function getAgentTitle(page: Page): Locator {
+  return page.locator("h1").first();
+}
+
+// Action functions
+export async function navigateToLibrary(page: Page): Promise<void> {
+  await getLibraryTab(page).click();
+  await page.waitForURL(/.*\/library/);
+}
+
+export async function clickFirstAgent(page: Page): Promise<void> {
+  const firstAgent = getAgentCards(page).first();
+  await firstAgent.click();
+}
+
+export async function navigateToAgentByName(
+  page: Page,
+  agentName: string,
+): Promise<void> {
+  const agentCard = getAgentCards(page).filter({ hasText: agentName }).first();
+  // Wait for the agent card to be visible before clicking
+  // This handles async loading of agents after page navigation
+  await agentCard.waitFor({ state: "visible", timeout: 15000 });
+  // Click the link inside the card to navigate reliably through
+  // the motion.div + draggable wrapper layers.
+  const link = agentCard.locator('a[href*="/library/agents/"]').first();
+  await link.click();
+}
+
+export async function clickRunButton(page: Page): Promise<void> {
+  const setupTaskButton = page.getByRole("button", {
+    name: /Setup your task/i,
+  });
+  const newTaskButton = page.getByRole("button", { name: /^New task$/i });
+  const rerunTaskButton = page.getByRole("button", { name: /Rerun task/i });
+  const runNowButton = page.getByRole("button", { name: /Run now/i });
+  const actionButtons = [
+    setupTaskButton,
+    newTaskButton,
+    rerunTaskButton,
+    runNowButton,
+  ];
+
+  await page.waitForLoadState("domcontentloaded");
+  await page.waitForLoadState("networkidle").catch(() => undefined);
+
+  const timeoutAt = Date.now() + 20000;
+
+  while (Date.now() < timeoutAt) {
+    if (
+      await setupTaskButton
+        .first()
+        .isVisible()
+        .catch(() => false)
+    ) {
+      const clicked = await clickActionButton(setupTaskButton.first());
+      if (!clicked) {
+        await page.waitForTimeout(250);
+        continue;
+      }
+
+      const runDialog = await waitForRunDialog(page);
+      await fillVisibleTaskInputs(runDialog);
+      await clickStartOrSimulateTask(page, runDialog);
+      return;
+    }
+
+    if (
+      await newTaskButton
+        .first()
+        .isVisible()
+        .catch(() => false)
+    ) {
+      const clicked = await clickActionButton(newTaskButton.first());
+      if (!clicked) {
+        await page.waitForTimeout(250);
+        continue;
+      }
+
+      const runDialog = await waitForRunDialog(page);
+      await fillVisibleTaskInputs(runDialog);
+      await clickStartOrSimulateTask(page, runDialog);
+      return;
+    }
+
+    if (
+      await rerunTaskButton
+        .first()
+        .isVisible()
+        .catch(() => false)
+    ) {
+      const clicked = await clickActionButton(rerunTaskButton.first());
+      if (!clicked) {
+        await page.waitForTimeout(250);
+        continue;
+      }
+
+      return;
+    }
+
+    if (
+      await runNowButton
+        .first()
+        .isVisible()
+        .catch(() => false)
+    ) {
+      const clicked = await clickActionButton(runNowButton.first());
+      if (!clicked) {
+        await page.waitForTimeout(250);
+        continue;
+      }
+
+      return;
+    }
+
+    await page.waitForTimeout(250);
+  }
+
+  const visibleButtons = await page
+    .getByRole("button")
+    .evaluateAll((elements) =>
+      elements
+        .filter((element) => {
+          const htmlElement = element as HTMLElement;
+          const rect = htmlElement.getBoundingClientRect();
+          return rect.width > 0 && rect.height > 0;
+        })
+        .map((element) => element.textContent?.trim())
+        .filter(Boolean),
+    );
+
+  throw new Error(
+    `Could not find run/start task button. URL: ${page.url()}. Visible buttons: ${visibleButtons.join(", ") || "none"}. Expected one of: ${actionButtons
+      .map((button) => button.toString())
+      .join(", ")}`,
+  );
+}
+
+async function clickActionButton(button: Locator): Promise<boolean> {
+  try {
+    await expect(button).toBeVisible({ timeout: 2000 });
+    await expect(button).toBeEnabled({ timeout: 2000 });
+    await button.click({ timeout: 3000 });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+async function waitForRunDialog(page: Page): Promise<Locator> {
+  const runDialog = page
+    .locator("[data-dialog-content]")
+    .filter({
+      has: page.getByRole("button", { name: /^Start Task$/i }),
+    })
+    .last();
+  await expect(runDialog).toBeVisible({ timeout: 15000 });
+  return runDialog;
+}
+
+async function dismissRunSafetyPopup(page: Page): Promise<void> {
+  const safetyPopup = page
+    .locator("[data-dialog-content]")
+    .filter({
+      has: page.getByText("Safety Checks Enabled", { exact: true }),
+    })
+    .last();
+
+  if (!(await safetyPopup.isVisible({ timeout: 2000 }).catch(() => false))) {
+    return;
+  }
+
+  await safetyPopup.getByRole("button", { name: /^Got it$/i }).click();
+  await expect(safetyPopup).toBeHidden({ timeout: 10000 });
+}
+
+async function clickStartOrSimulateTask(
+  page: Page,
+  runDialog: Locator,
+): Promise<void> {
+  const startBtn = runDialog.getByRole("button", { name: /^Start Task$/i });
+  // Happy-path tests must exercise a real run — do NOT fall back to the
+  // "Simulate" button if Start fails, because a broken Start code path is
+  // exactly the regression these tests exist to catch.
+  await expect(startBtn).toBeVisible({ timeout: 10000 });
+  await expect(startBtn).toBeEnabled({ timeout: 10000 });
+  await startBtn.click();
+  await dismissRunSafetyPopup(page);
+
+  await expect
+    .poll(
+      () => {
+        const currentUrl = new URL(page.url());
+        return (
+          currentUrl.searchParams.get("activeTab") === "runs" &&
+          currentUrl.searchParams.get("activeItem") !== null
+        );
+      },
+      {
+        timeout: 15000,
+        message:
+          "Start Task click did not navigate to a run detail (?activeTab=runs&activeItem=...)",
+      },
+    )
+    .toBe(true);
+}
+
+async function fillVisibleTaskInputs(container: Page | Locator): Promise<void> {
+  const seededEmail = getSeededTestUser("smokeMarketplace").email;
+  const inputs = container.locator(
+    'input:visible:not([type="hidden"]):not([type="file"]):not([disabled]), textarea:visible:not([disabled])',
+  );
+  const inputCount = await inputs.count();
+
+  for (let index = 0; index < inputCount; index += 1) {
+    const input = inputs.nth(index);
+    const currentValue = await input.inputValue().catch(() => "");
+    if (currentValue.trim()) {
+      continue;
+    }
+
+    const type = (await input.getAttribute("type"))?.toLowerCase() ?? "text";
+    const inputMetadata = await input.evaluate((element) => {
+      const formField = element as HTMLInputElement | HTMLTextAreaElement;
+      const closestLabel = formField.closest("label")?.textContent ?? "";
+      const forLabel = formField.id
+        ? (document.querySelector(`label[for="${CSS.escape(formField.id)}"]`)
+            ?.textContent ?? "")
+        : "";
+
+      return {
+        placeholder: formField.getAttribute("placeholder") ?? "",
+        ariaLabel: formField.getAttribute("aria-label") ?? "",
+        name: formField.getAttribute("name") ?? "",
+        labelText: `${closestLabel} ${forLabel}`.trim(),
+      };
+    });
+    const fieldDescriptor = [
+      inputMetadata.placeholder,
+      inputMetadata.ariaLabel,
+      inputMetadata.name,
+      inputMetadata.labelText,
+    ]
+      .join(" ")
+      .toLowerCase();
+
+    if (type === "checkbox" || type === "radio") {
+      continue;
+    }
+
+    const value =
+      type === "email" || fieldDescriptor.includes("email")
+        ? seededEmail
+        : type === "number" ||
+            /\b(a|b)\b/.test(fieldDescriptor) ||
+            fieldDescriptor.includes("number")
+          ? "1"
+          : "e2e-input";
+
+    await input.fill(value).catch(() => {});
+  }
+}
+
+export async function clickNewRunButton(page: Page): Promise<void> {
+  await getNewRunButton(page).click();
+}
+
+export async function runAgent(page: Page): Promise<void> {
+  await clickRunButton(page);
+}
+
+export async function waitForAgentPageLoad(
+  page: Page,
+  agentName?: string,
+): Promise<void> {
+  await page.waitForURL(/.*\/library\/agents\/[^/]+/);
+  // Wait for the primary content area to be present so the page has settled
+  // into its final state (empty view vs sidebar view)
+  await page.waitForLoadState("domcontentloaded");
+
+  // Transient "Something went wrong — All connection attempts failed" error
+  // boundary appears when the library agent page loads before the backend
+  // has indexed a newly-cloned agent (race between marketplace "Add to
+  // Library" and backend availability). Click "Try Again" and re-settle.
+  const errorHeading = page.getByText("Something went wrong", {
+    exact: false,
+  });
+  let errorResolved = false;
+  for (let attempt = 0; attempt < 3; attempt += 1) {
+    if (!(await errorHeading.isVisible({ timeout: 300 }).catch(() => false))) {
+      errorResolved = true;
+      break;
+    }
+    const tryAgain = page.getByRole("button", { name: "Try Again" });
+    if (await tryAgain.isVisible({ timeout: 500 }).catch(() => false)) {
+      await tryAgain.click();
+    } else {
+      await page.reload();
+    }
+    await page.waitForLoadState("domcontentloaded");
+  }
+
+  if (!errorResolved) {
+    errorResolved = !(await errorHeading
+      .isVisible({ timeout: 300 })
+      .catch(() => false));
+  }
+
+  if (!errorResolved) {
+    throw new Error(
+      "Library agent page remained on the connection-failure screen after 3 retries",
+    );
+  }
+
+  await waitForAgentDetailShell(page, agentName);
+}
+
+async function waitForLibraryListToLeave(page: Page): Promise<void> {
+  const librarySearch = page.getByTestId("library-textbox");
+  await expect
+    .poll(
+      async () => {
+        const count = await librarySearch.count();
+        if (count === 0) {
+          return "gone";
+        }
+
+        if (
+          !(await librarySearch
+            .first()
+            .isVisible()
+            .catch(() => false))
+        ) {
+          return "gone";
+        }
+
+        return "visible";
+      },
+      { timeout: 15000 },
+    )
+    .toBe("gone");
+}
+
+async function getVisibleAgentDetailSurface(page: Page): Promise<string> {
+  const visibleSurfaces: Array<[string, Locator]> = [
+    [
+      "about-agent",
+      page.getByText("About this agent", { exact: true }).first(),
+    ],
+    [
+      "setup-task",
+      page.getByRole("button", { name: /^Setup your task$/i }).first(),
+    ],
+    ["new-task", page.getByRole("button", { name: /^New task$/i }).first()],
+    ["scheduled-tab", page.getByRole("tab", { name: /^Scheduled$/i }).first()],
+  ];
+
+  for (const [surface, locator] of visibleSurfaces) {
+    if (await locator.isVisible().catch(() => false)) {
+      return surface;
+    }
+  }
+
+  return "pending";
+}
+
+async function waitForAgentDetailShell(
+  page: Page,
+  agentName?: string,
+): Promise<void> {
+  await waitForLibraryListToLeave(page);
+
+  await expect(
+    page.getByRole("link", { name: "My Library" }).first(),
+  ).toBeVisible({
+    timeout: 15000,
+  });
+
+  if (agentName) {
+    await expect(
+      page
+        .locator(`a[href*="/library/agents/"]`)
+        .filter({ hasText: agentName })
+        .first(),
+    ).toBeVisible({ timeout: 15000 });
+  }
+
+  await expect
+    .poll(() => getVisibleAgentDetailSurface(page), { timeout: 15000 })
+    .not.toBe("pending");
+}
+
+export async function getAgentName(page: Page): Promise<string> {
+  return (await getAgentTitle(page).textContent()) || "";
+}
+
+export async function isLoaded(page: Page): Promise<boolean> {
+  return await page.locator("h1").isVisible();
+}
+
+const SUCCESS_RUN_STATUS = "completed";
+const FAILURE_RUN_STATUSES = new Set(["failed", "terminated", "incomplete"]);
+const RUN_ERROR_RECOVERY_GRACE_PERIOD_MS = 1500;
+const RUN_ERROR_RECOVERY_ATTEMPTS = 2;
+
+/**
+ * Assert that a completed run actually produced output.
+ *
+ * The Library run-detail Output panel renders "No output from this run." when
+ * the run object has no `outputs` field. There's a brief window after the run
+ * reaches "completed" status where the run object is loaded without outputs,
+ * then outputs arrive and the panel re-renders. We poll for up to `timeout`
+ * ms waiting for the "No output" placeholder to GO AWAY before concluding
+ * the run genuinely produced nothing.
+ *
+ * This catches the "agent runs but produces nothing" failure mode
+ * (disconnected edges, broken graph, runtime crash before any output node
+ * fired) — the exact regression that ACCEPTED_RUN_STATUSES previously hid.
+ */
+export async function assertRunProducedOutput(
+  page: Page,
+  timeout = 15000,
+): Promise<void> {
+  await openRunOutputTab(page);
+
+  // A completed run must surface output on the CURRENT render without a
+  // page reload. Reloading to "rule out stale cache" would mask a real
+  // user-visible regression where the frontend only shows output after a
+  // manual refresh.
+  const noOutput = page.getByText("No output from this run.", { exact: true });
+  await expect(noOutput, {
+    message:
+      'run completed but produced no output ("No output from this run." still shown) — broken graph, missing output node, or stale React Query cache',
+  }).toBeHidden({ timeout });
+}
+
+function escapeRegex(text: string): string {
+  return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+async function openRunOutputTab(page: Page): Promise<void> {
+  const outputTab = page.getByRole("tab", { name: /^Output$/i }).first();
+  if (await outputTab.isVisible().catch(() => false)) {
+    await outputTab.click();
+    return;
+  }
+
+  const outputButton = page.getByRole("button", { name: /^Output$/i }).first();
+  if (await outputButton.isVisible().catch(() => false)) {
+    await outputButton.click();
+  }
+}
+
+export async function assertRunOutputValue(
+  page: Page,
+  outputName: string,
+  expectedValue: RegExp | string,
+  timeout = 15000,
+): Promise<void> {
+  await openRunOutputTab(page);
+
+  const outputLabel = page.locator("p.capitalize:visible").filter({
+    hasText: new RegExp(`^${escapeRegex(outputName)}$`, "i"),
+  });
+
+  await expect(
+    outputLabel,
+    `run output should include output key "${outputName}"`,
+  ).toBeVisible({ timeout });
+
+  const outputValue = outputLabel.locator("xpath=following-sibling::*[1]");
+  if (expectedValue instanceof RegExp) {
+    await expect(
+      outputValue,
+      `run output value for "${outputName}" should match ${expectedValue.toString()}`,
+    ).toHaveText(expectedValue, { timeout });
+    return;
+  }
+
+  await expect(
+    outputValue,
+    `run output value for "${outputName}" should be "${expectedValue}"`,
+  ).toHaveText(expectedValue, { timeout });
+}
+
+export async function assertFirstRunOutputValue(
+  page: Page,
+  expectedValue: RegExp | string,
+  timeout = 15000,
+): Promise<void> {
+  await assertRunOutputContainsText(page, expectedValue, timeout);
+}
+
+export async function assertRunOutputContainsText(
+  page: Page,
+  expectedValue: RegExp | string,
+  timeout = 15000,
+): Promise<void> {
+  await openRunOutputTab(page);
+
+  const outputCard = page
+    .locator("div")
+    .filter({
+      has: page.getByRole("button", { name: "Copy all text outputs" }),
+    })
+    .first();
+  await expect(outputCard, "run output card should be visible").toBeVisible({
+    timeout,
+  });
+
+  if (expectedValue instanceof RegExp) {
+    await expect(
+      outputCard.getByText(expectedValue).first(),
+      `run output should contain text matching ${expectedValue.toString()}`,
+    ).toBeVisible({ timeout });
+    return;
+  }
+
+  await expect(
+    outputCard.getByText(expectedValue, { exact: true }).first(),
+    `run output should contain "${expectedValue}"`,
+  ).toBeVisible({ timeout });
+}
+
+export async function waitForRunToComplete(
+  page: Page,
+  timeout = 45000,
+): Promise<void> {
+  const start = Date.now();
+  let lastStatus = "unknown";
+  let runErrorDetectedAt: number | null = null;
+  let recoveryAttempts = 0;
+  while (Date.now() - start < timeout) {
+    lastStatus = await getRunStatus(page);
+    if (lastStatus === SUCCESS_RUN_STATUS) {
+      return;
+    }
+    if (lastStatus === "error") {
+      runErrorDetectedAt ??= Date.now();
+      if (
+        Date.now() - runErrorDetectedAt >=
+        RUN_ERROR_RECOVERY_GRACE_PERIOD_MS
+      ) {
+        if (recoveryAttempts >= RUN_ERROR_RECOVERY_ATTEMPTS) {
+          throw new Error(`Run reached terminal failure state "${lastStatus}"`);
+        }
+        recoveryAttempts += 1;
+        runErrorDetectedAt = null;
+        await page.reload();
+        await waitForAgentPageLoad(page);
+        continue;
+      }
+    } else {
+      runErrorDetectedAt = null;
+    }
+    if (FAILURE_RUN_STATUSES.has(lastStatus)) {
+      throw new Error(`Run reached terminal failure state "${lastStatus}"`);
+    }
+    await page.waitForTimeout(250);
+  }
+  throw new Error(
+    `waitForRunToComplete timed out after ${timeout}ms — last status was "${lastStatus}" (expected "${SUCCESS_RUN_STATUS}")`,
+  );
+}
+
+export function getActiveItemId(page: Page): string | null {
+  return new URL(page.url()).searchParams.get("activeItem");
+}
+
+export async function dismissFeedbackDialog(page: Page): Promise<void> {
+  const feedbackDialog = page.getByRole("dialog", {
+    name: "We'd love your feedback",
+  });
+  // Dialog is genuinely optional — it only appears on some run completions.
+  // Give it a realistic window to animate in; 500ms races the dialog
+  // transition and causes later clicks to land on it instead of the button
+  // behind it.
+  if (!(await feedbackDialog.isVisible({ timeout: 3000 }).catch(() => false))) {
+    return;
+  }
+
+  const cancelButton = feedbackDialog.getByRole("button", { name: "Cancel" });
+  if (await cancelButton.isVisible()) {
+    await cancelButton.click();
+    await expect(feedbackDialog).toBeHidden({ timeout: 15000 });
+    return;
+  }
+
+  await feedbackDialog.getByRole("button", { name: "Close" }).click();
+  await expect(feedbackDialog).toBeHidden({ timeout: 15000 });
+}
+
+export async function importAgentFromFile(
+  page: Page,
+  filePath: string,
+  agentName: string,
+  description: string = "PR E2E library coverage",
+): Promise<{ libraryPage: LibraryPage; importedAgent: Agent }> {
+  const libraryPage = new LibraryPage(page);
+  const importDialog = page.getByRole("dialog", { name: "Import" });
+
+  await page.goto("/library");
+  await libraryPage.openUploadDialog();
+  await libraryPage.fillUploadForm(agentName, description);
+
+  const fileInput = importDialog.locator('input[type="file"]');
+  await fileInput.setInputFiles(filePath);
+  const uploadButton = importDialog.getByRole("button", { name: "Upload" });
+  await expect(uploadButton).toBeEnabled({
+    timeout: 10000,
+  });
+  await uploadButton.click();
+  const uploadingButton = importDialog.getByRole("button", {
+    name: /Uploading\.\.\./i,
+  });
+  const sawUploadingState = await uploadingButton
+    .waitFor({ state: "visible", timeout: 2000 })
+    .then(() => true)
+    .catch(() => false);
+  if (sawUploadingState) {
+    await expect
+      .poll(
+        async () => {
+          if (/\/build/.test(page.url())) {
+            return "build";
+          }
+          if (!(await uploadingButton.isVisible().catch(() => false))) {
+            return "gone";
+          }
+          return (await uploadingButton.isDisabled().catch(() => false))
+            ? "disabled"
+            : "enabled";
+        },
+        {
+          timeout: 5000,
+          message:
+            'upload button should either stay disabled while "Uploading..." is visible or disappear because navigation already started',
+        },
+      )
+      .not.toBe("enabled");
+  }
+
+  // Upload → backend creates the graph → router pushes /build?flowID=...
+  // This pipeline includes file parsing plus a backend graph creation call.
+  // On a cold stack it can take longer than a normal UI transition, so poll
+  // for the real terminal states: builder navigation or an explicit error.
+  await expect
+    .poll(
+      async () => {
+        if (/\/build/.test(page.url())) {
+          return "build";
+        }
+
+        const uploadFailed = await page
+          .getByText("Error Uploading agent")
+          .isVisible()
+          .catch(() => false);
+        if (uploadFailed) {
+          return "failed";
+        }
+
+        return "pending";
+      },
+      {
+        timeout: 60000,
+        message:
+          "agent import should either navigate to /build or surface an explicit upload error toast",
+      },
+    )
+    .toBe("build");
+  await expect(page).toHaveURL(/\/build/, { timeout: 15000 });
+
+  // Import should produce a real graph, not an empty canvas. Lazy-import
+  // BuildPage locally to avoid a circular dependency between the two
+  // page-object modules.
+  const { BuildPage } = await import("./build.page");
+  const importedBuildPage = new BuildPage(page);
+  await importedBuildPage.waitForNodeOnCanvas();
+  const importedNodeCount = await importedBuildPage.getNodeCount();
+  expect(
+    importedNodeCount,
+    "imported agent must render at least one node on canvas",
+  ).toBeGreaterThan(0);
+
+  await page.goto("/library");
+  await libraryPage.searchAgents(agentName);
+  await libraryPage.waitForAgentsToLoad();
+
+  // Look up the specific imported card directly rather than calling
+  // getAgents() in a loop. getAgents() iterates every visible card and
+  // reads hrefs via `.getAttribute`, which deadlocks if the library list
+  // re-renders mid-iteration (previously caused this test to hang 120s on
+  // the 8th card). A filter-based lookup on the agent name is both faster
+  // and immune to list churn.
+  const { getId } = getSelectors(page);
+  const importedCard = getId("library-agent-card")
+    .filter({ hasText: agentName })
+    .first();
+  await expect(
+    importedCard,
+    `imported agent card "${agentName}" must appear in the library search results`,
+  ).toBeVisible({ timeout: 15000 });
+
+  const seeRunsLink = getId("library-agent-card-see-runs-link", importedCard);
+  const seeRunsUrl = (await seeRunsLink.getAttribute("href")) ?? "";
+  const openInBuilderLink = getId(
+    "library-agent-card-open-in-builder-link",
+    importedCard,
+  );
+  const openInBuilderUrl =
+    (await openInBuilderLink.count()) > 0
+      ? ((await openInBuilderLink.getAttribute("href")) ?? "")
+      : "";
+
+  const idMatch = seeRunsUrl.match(/\/library\/agents\/([^/]+)/);
+  const importedAgent: Agent = {
+    id: idMatch ? idMatch[1] : "",
+    name:
+      (
+        await getId("library-agent-card-name", importedCard).textContent()
+      )?.trim() ?? agentName,
+    description: "",
+    seeRunsUrl,
+    openInBuilderUrl,
+  };
+
+  expect(
+    importedAgent.name,
+    "imported agent name should contain the requested name",
+  ).toContain(agentName);
+
+  return { libraryPage, importedAgent };
+}
+
+export async function openSavedAgentInLibrary(
+  page: Page,
+  agentName: string,
+): Promise<void> {
+  const libraryPage = new LibraryPage(page);
+
+  await page.goto("/library");
+  await libraryPage.waitForAgentsToLoad();
+  await libraryPage.searchAgents(agentName);
+  await libraryPage.waitForAgentsToLoad();
+  await navigateToAgentByName(page, agentName);
+  await waitForAgentPageLoad(page, agentName);
+}
+
+async function waitForExportActionSurface(
+  page: Page,
+): Promise<"direct" | "menu"> {
+  await expect
+    .poll(
+      async () => {
+        if (
+          await getFirstVisibleLocator(page, "button", "Export agent to file")
+        ) {
+          return "direct";
+        }
+
+        if (await getFirstVisibleLocator(page, "button", "More actions")) {
+          return "menu";
+        }
+
+        return "pending";
+      },
+      { timeout: 30000 },
+    )
+    .not.toBe("pending");
+
+  if (await getFirstVisibleLocator(page, "button", "Export agent to file")) {
+    return "direct";
+  }
+
+  return "menu";
+}
+
+async function getFirstVisibleLocator(
+  page: Page,
+  role: "button" | "menuitem",
+  name: string,
+): Promise<Locator | null> {
+  const locator = page.getByRole(role, { name });
+  const count = await locator.count();
+
+  for (let index = 0; index < count; index += 1) {
+    const candidate = locator.nth(index);
+    if (await candidate.isVisible().catch(() => false)) {
+      return candidate;
+    }
+  }
+
+  return null;
+}
+
+export async function clickExportAgent(page: Page): Promise<void> {
+  const exportSurface = await waitForExportActionSurface(page);
+
+  if (exportSurface === "direct") {
+    const directExportButton = await getFirstVisibleLocator(
+      page,
+      "button",
+      "Export agent to file",
+    );
+    if (!directExportButton) {
+      throw new Error(
+        "Export button was not visible after export surface resolved",
+      );
+    }
+
+    await directExportButton.click({ timeout: 15000 });
+    return;
+  }
+
+  const moreActionsButtons = page.getByRole("button", { name: "More actions" });
+  const moreActionsCount = await moreActionsButtons.count();
+
+  for (let index = 0; index < moreActionsCount; index += 1) {
+    const moreActionsButton = moreActionsButtons.nth(index);
+
+    if (!(await moreActionsButton.isVisible().catch(() => false))) {
+      continue;
+    }
+
+    await moreActionsButton.click({ timeout: 15000 });
+
+    const exportMenuItem = await getFirstVisibleLocator(
+      page,
+      "menuitem",
+      "Export agent to file",
+    );
+    if (exportMenuItem) {
+      await exportMenuItem.click({ timeout: 15000 });
+      return;
+    }
+
+    await page.keyboard.press("Escape").catch(() => {});
+  }
+
+  throw new Error(
+    "Export action was not available from any visible More actions menu",
+  );
+}
+
+// The run status is rendered by RunStatusBadge as lowercase text inside a
+// `.capitalize` element (uppercased via CSS). Scoping to that class prevents
+// false positives from free-text occurrences of words like "completed"
+// elsewhere on the page (filter chips, tooltips, etc.).
+const RUN_STATUS_WORDS = [
+  "completed",
+  "failed",
+  "terminated",
+  "incomplete",
+  "queued",
+  "review",
+  "running",
+] as const;
+
+export async function getRunStatus(page: Page): Promise<string> {
+  // 1. Detect React error boundary first — fast loud failure if the page
+  //    crashed mid-run, instead of polling until timeout.
+  const errorBoundary = page.getByText(
+    /Something went wrong|We had the following error|Application error/i,
+  );
+  if (
+    await errorBoundary
+      .first()
+      .isVisible({ timeout: 200 })
+      .catch(() => false)
+  ) {
+    return "error";
+  }
+
+  // 2. Read the status from the scoped RunStatusBadge element. This is the
+  //    only source of truth — no free-text matching across the whole page,
+  //    no spinner heuristics that confuse a skeleton loader with a live run.
+  const badges = page.locator(".capitalize");
+  const badgeCount = await badges.count().catch(() => 0);
+  for (let i = 0; i < badgeCount; i += 1) {
+    const badge = badges.nth(i);
+    if (!(await badge.isVisible().catch(() => false))) continue;
+    const text = ((await badge.textContent()) ?? "").trim().toLowerCase();
+    if ((RUN_STATUS_WORDS as readonly string[]).includes(text)) {
+      return text;
+    }
+  }
+
+  return "unknown";
+}
diff --git a/autogpt_platform/frontend/src/playwright/pages/login.page.ts b/autogpt_platform/frontend/src/playwright/pages/login.page.ts
new file mode 100644
index 0000000000..e5aab2d678
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/pages/login.page.ts
@@ -0,0 +1,123 @@
+import { Page } from "@playwright/test";
+import {
+  getSeededTestUser,
+  type SeededTestAccountKey,
+} from "../credentials/accounts";
+import { skipOnboardingIfPresent } from "../utils/onboarding";
+
+export class LoginPage {
+  constructor(private page: Page) {}
+
+  async goto() {
+    await this.page.goto("/login");
+  }
+
+  async loginAsSeededUser(userKey: SeededTestAccountKey): Promise<void> {
+    const user = getSeededTestUser(userKey);
+    await this.page.goto("/login");
+    await this.login(user.email, user.password);
+  }
+
+  async login(email: string, password: string) {
+    console.log(`ℹ️ Attempting login on ${this.page.url()} with`, {
+      email,
+      password,
+    });
+
+    // Wait for the form to be ready
+    await this.page.waitForSelector("form", { state: "visible" });
+
+    // Fill email using input selector instead of label
+    const emailInput = this.page.locator('input[type="email"]');
+    await emailInput.waitFor({ state: "visible" });
+    await emailInput.fill(email);
+
+    // Fill password using input selector instead of label
+    const passwordInput = this.page.locator('input[type="password"]');
+    await passwordInput.waitFor({ state: "visible" });
+    await passwordInput.fill(password);
+
+    // Wait for the button to be ready
+    const loginButton = this.page.getByRole("button", {
+      name: "Login",
+      exact: true,
+    });
+    await loginButton.waitFor({ state: "visible" });
+
+    // Attach navigation logger for debug purposes
+    this.page.once("load", (page) =>
+      console.log(`ℹ️ Now at URL: ${page.url()}`),
+    );
+
+    const hasReachedPostLoginRoute = () =>
+      this.page.waitForFunction(
+        () => {
+          const pathname = window.location.pathname;
+          return /^\/(marketplace|onboarding(\/.*)?|library|copilot)$/.test(
+            pathname,
+          );
+        },
+        { timeout: 15_000 },
+      );
+
+    console.log(`🖱️ Clicking login button...`);
+    for (let attempt = 0; attempt < 2; attempt += 1) {
+      await loginButton.click();
+
+      console.log("⏳ Waiting for navigation away from /login ...");
+      try {
+        await hasReachedPostLoginRoute();
+        break;
+      } catch (reason) {
+        const currentPathname = new URL(this.page.url()).pathname;
+        if (attempt === 1 || currentPathname !== "/login") {
+          console.error(
+            `🚨 Navigation away from /login timed out (current URL: ${this.page.url()}):`,
+            reason,
+          );
+          throw reason;
+        }
+      }
+    }
+
+    console.log(`⌛ Post-login redirected to ${this.page.url()}`);
+
+    await this.page.waitForLoadState("load", { timeout: 10_000 });
+
+    // If redirected to onboarding, complete it via API so tests can proceed
+    await skipOnboardingIfPresent(this.page, "/marketplace");
+
+    console.log("➡️ Navigating to /marketplace ...");
+    await this.page.goto("/marketplace", { timeout: 20_000 });
+    console.log("✅ Login process complete");
+
+    // If Wallet popover auto-opens, close it to avoid blocking account menu interactions.
+    // The popover is genuinely optional — only appears on some accounts/environments.
+    const walletPanel = this.page.getByText("Your credits").first();
+    const walletPanelVisible = await walletPanel
+      .waitFor({ state: "visible", timeout: 2500 })
+      .then(() => true)
+      .catch(() => false);
+    if (walletPanelVisible) {
+      const closeWalletButton = this.page.getByRole("button", {
+        name: /Close wallet/i,
+      });
+      const closeWalletButtonVisible = await closeWalletButton
+        .waitFor({ state: "visible", timeout: 1000 })
+        .then(() => true)
+        .catch(() => false);
+      if (closeWalletButtonVisible) {
+        await closeWalletButton.click();
+      } else {
+        await this.page.keyboard.press("Escape");
+      }
+      const walletStillVisible = await walletPanel
+        .waitFor({ state: "hidden", timeout: 3000 })
+        .then(() => false)
+        .catch(() => true);
+      if (walletStillVisible) {
+        await this.page.mouse.click(5, 5);
+      }
+    }
+  }
+}
diff --git a/autogpt_platform/frontend/src/playwright/pages/marketplace.page.ts b/autogpt_platform/frontend/src/playwright/pages/marketplace.page.ts
new file mode 100644
index 0000000000..b0d334449f
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/pages/marketplace.page.ts
@@ -0,0 +1,294 @@
+import { expect, Page } from "@playwright/test";
+import { BasePage } from "./base.page";
+import { dismissFeedbackDialog } from "./library.page";
+import { getSelectors } from "../utils/selectors";
+
+const DETERMINISTIC_MARKETPLACE_AGENT_SEARCH = "E2E Calculator Agent";
+
+export class MarketplacePage extends BasePage {
+  constructor(page: Page) {
+    super(page);
+  }
+
+  async goto(page: Page) {
+    await page.goto("/marketplace");
+    await page
+      .locator(
+        '[data-testid="store-card"], [data-testid="featured-store-card"]',
+      )
+      .first()
+      .waitFor({ state: "visible", timeout: 20000 });
+  }
+
+  async getMarketplaceTitle(page: Page) {
+    const { getText } = getSelectors(page);
+    return getText("Explore AI agents", { exact: false });
+  }
+
+  async getCreatorsSection(page: Page) {
+    const { getId, getText } = getSelectors(page);
+    return getId("creators-section") || getText("Creators", { exact: false });
+  }
+
+  async getAgentsSection(page: Page) {
+    const { getId, getText } = getSelectors(page);
+    return getId("agents-section") || getText("Agents", { exact: false });
+  }
+
+  async getCreatorsLink(page: Page) {
+    const { getLink } = getSelectors(page);
+    return getLink(/creators/i);
+  }
+
+  async getAgentsLink(page: Page) {
+    const { getLink } = getSelectors(page);
+    return getLink(/agents/i);
+  }
+
+  async getSearchInput(page: Page) {
+    const visibleSearchInput = page
+      .locator('[data-testid="store-search-input"]:visible')
+      .first();
+    if (await visibleSearchInput.isVisible().catch(() => false)) {
+      return visibleSearchInput;
+    }
+
+    const { getField, getId } = getSelectors(page);
+    return getId("store-search-input").first() || getField(/search/i).first();
+  }
+
+  async getFilterDropdown(page: Page) {
+    const { getId, getButton } = getSelectors(page);
+    return getId("filter-dropdown") || getButton(/filter/i);
+  }
+
+  async searchFor(query: string, page: Page) {
+    const searchInput = await this.getSearchInput(page);
+    await searchInput.fill(query);
+    await searchInput.press("Enter");
+  }
+
+  async clickCreators(page: Page) {
+    const creatorsLink = await this.getCreatorsLink(page);
+    await creatorsLink.click();
+  }
+
+  async clickAgents(page: Page) {
+    const agentsLink = await this.getAgentsLink(page);
+    await agentsLink.click();
+  }
+
+  async openFilter(page: Page) {
+    const filterDropdown = await this.getFilterDropdown(page);
+    await filterDropdown.click();
+  }
+
+  async getFeaturedAgentsSection(page: Page) {
+    const { getText } = getSelectors(page);
+    return getText("Featured agents");
+  }
+
+  async getTopAgentsSection(page: Page) {
+    const { getText } = getSelectors(page);
+    return getText("All Agents");
+  }
+
+  async getFeaturedCreatorsSection(page: Page) {
+    const { getText } = getSelectors(page);
+    return getText("Featured Creators");
+  }
+
+  async getFeaturedAgentCards(page: Page) {
+    const { getId } = getSelectors(page);
+    return getId("featured-store-card");
+  }
+
+  async getTopAgentCards(page: Page) {
+    const { getId } = getSelectors(page);
+    return getId("store-card");
+  }
+
+  async getCreatorProfiles(page: Page) {
+    const { getId } = getSelectors(page);
+    return getId("creator-card");
+  }
+
+  async searchAndNavigate(query: string, page: Page) {
+    const searchInput = (await this.getSearchInput(page)).first();
+    await searchInput.fill(query);
+    await searchInput.press("Enter");
+  }
+
+  async waitForSearchResults() {
+    await this.page.waitForURL("**/marketplace/search**");
+  }
+
+  async getFirstFeaturedAgent(page: Page) {
+    const { getId } = getSelectors(page);
+    const card = getId("featured-store-card").first();
+    await card.waitFor({ state: "visible", timeout: 15000 });
+    return card;
+  }
+
+  async getFirstTopAgent() {
+    const card = this.page
+      .locator('[data-testid="store-card"]:visible')
+      .first();
+    await card.waitFor({ state: "visible", timeout: 15000 });
+    return card;
+  }
+
+  async getFirstCreatorProfile(page: Page) {
+    const { getId } = getSelectors(page);
+    const card = getId("creator-card").first();
+    await card.waitFor({ state: "visible", timeout: 15000 });
+    return card;
+  }
+
+  async getSearchResultsCount(page: Page) {
+    const { getId } = getSelectors(page);
+    const storeCards = getId("store-card");
+    return await storeCards.count();
+  }
+
+  // --- Happy-path flows shared across PR smoke specs ---
+
+  async openRunnableAgent(): Promise<{ path: string }> {
+    await this.searchAndOpenAgent(DETERMINISTIC_MARKETPLACE_AGENT_SEARCH);
+
+    await expect(this.page.getByTestId("agent-add-library-button")).toBeVisible(
+      {
+        timeout: 15000,
+      },
+    );
+
+    return { path: this.page.url() };
+  }
+
+  async openFeaturedAgent(): Promise<void> {
+    await this.searchAndOpenAgent(DETERMINISTIC_MARKETPLACE_AGENT_SEARCH);
+    await dismissFeedbackDialog(this.page);
+  }
+
+  private async searchAndOpenAgent(agentName: string): Promise<void> {
+    const searchURL = `/marketplace/search?searchTerm=${encodeURIComponent(agentName)}`;
+
+    const agentCard = this.page
+      .locator('[data-testid="store-card"]:visible')
+      .filter({ hasText: agentName })
+      .first();
+
+    for (let attempt = 0; attempt < 3; attempt++) {
+      await this.page.goto(searchURL);
+      await this.page.waitForLoadState("networkidle");
+
+      const visible = await agentCard
+        .waitFor({ state: "visible", timeout: 15000 })
+        .then(() => true)
+        .catch(() => false);
+
+      if (visible) break;
+
+      if (attempt === 2) {
+        await expect(agentCard).toBeVisible({ timeout: 15000 });
+      }
+    }
+
+    await agentCard.click();
+
+    await expect(this.page).toHaveURL(/\/marketplace\/agent\//, {
+      timeout: 15000,
+    });
+    await expect(this.page.getByTestId("agent-title")).toBeVisible({
+      timeout: 15000,
+    });
+  }
+
+  async submitAgentForReview(publishableAgentName: string): Promise<{
+    agentTitle: string;
+    agentSlug: string;
+  }> {
+    await this.page.goto("/marketplace");
+    await this.page.getByRole("button", { name: "Become a Creator" }).click();
+
+    const publishAgentModal = this.page.getByTestId("publish-agent-modal");
+    await expect(publishAgentModal).toBeVisible();
+    await expect(
+      publishAgentModal.getByText(
+        "Select your project that you'd like to publish",
+      ),
+    ).toBeVisible();
+
+    const publishableAgentCard = publishAgentModal
+      .getByTestId("agent-card")
+      .filter({ hasText: publishableAgentName })
+      .first();
+    await expect(publishableAgentCard).toBeVisible({ timeout: 15000 });
+    await publishableAgentCard.click();
+    await publishAgentModal
+      .getByRole("button", { name: "Next", exact: true })
+      .click();
+
+    await expect(
+      publishAgentModal.getByText("Write a bit of details about your agent"),
+    ).toBeVisible();
+
+    const suffix = Date.now().toString().slice(-6);
+    const agentTitle = `Publish Flow ${suffix}`;
+    const agentSlug = `publish-flow-${suffix}`;
+
+    await publishAgentModal.getByLabel("Title").fill(agentTitle);
+    await publishAgentModal
+      .getByLabel("Subheader")
+      .fill("A deterministic marketplace submission");
+    await publishAgentModal.getByLabel("Slug").fill(agentSlug);
+    await publishAgentModal
+      .getByLabel("YouTube video link")
+      .fill("https://www.youtube.com/watch?v=test123");
+
+    await publishAgentModal.getByRole("combobox", { name: "Category" }).click();
+    await this.page.getByRole("option", { name: "Other" }).click();
+
+    await publishAgentModal
+      .getByLabel("Description")
+      .fill(
+        "A deterministic publish flow for consolidated Playwright coverage.",
+      );
+
+    const submitButton = publishAgentModal.getByRole("button", {
+      name: "Submit for review",
+    });
+    await expect(submitButton).toBeEnabled();
+    await submitButton.click();
+
+    await expect(
+      publishAgentModal.getByText("Agent is awaiting review"),
+    ).toBeVisible();
+    await expect(
+      publishAgentModal.getByTestId("view-progress-button"),
+    ).toBeVisible();
+
+    return { agentTitle, agentSlug };
+  }
+
+  async waitForDashboardSubmission(agentTitle: string) {
+    for (let attempt = 0; attempt < 3; attempt += 1) {
+      const submissionRow = this.page
+        .getByTestId("agent-table-row")
+        .filter({ hasText: agentTitle })
+        .first();
+
+      // Row may not appear immediately after redirect — allow a short render
+      // window before deciding the submission is absent on this attempt.
+      if (await submissionRow.isVisible({ timeout: 5000 }).catch(() => false)) {
+        return submissionRow;
+      }
+
+      await this.page.reload();
+      await expect(this.page).toHaveURL(/\/profile\/dashboard/);
+      await expect(this.page.getByText("Agent dashboard")).toBeVisible();
+    }
+
+    throw new Error(`Submission row for "${agentTitle}" did not appear`);
+  }
+}
diff --git a/autogpt_platform/frontend/src/tests/pages/navbar.page.ts b/autogpt_platform/frontend/src/playwright/pages/navbar.page.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/pages/navbar.page.ts
rename to autogpt_platform/frontend/src/playwright/pages/navbar.page.ts
diff --git a/autogpt_platform/frontend/src/tests/pages/profile-form.page.ts b/autogpt_platform/frontend/src/playwright/pages/profile-form.page.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/pages/profile-form.page.ts
rename to autogpt_platform/frontend/src/playwright/pages/profile-form.page.ts
diff --git a/autogpt_platform/frontend/src/tests/pages/profile.page.ts b/autogpt_platform/frontend/src/playwright/pages/profile.page.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/pages/profile.page.ts
rename to autogpt_platform/frontend/src/playwright/pages/profile.page.ts
diff --git a/autogpt_platform/frontend/src/playwright/pages/settings.page.ts b/autogpt_platform/frontend/src/playwright/pages/settings.page.ts
new file mode 100644
index 0000000000..7d32ccc23a
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/pages/settings.page.ts
@@ -0,0 +1,29 @@
+import { expect, Locator, Page } from "@playwright/test";
+import { BasePage } from "./base.page";
+
+export class SettingsPage extends BasePage {
+  constructor(page: Page) {
+    super(page);
+  }
+
+  async open(): Promise<void> {
+    await this.page.goto("/profile/settings");
+    await expect(this.page).toHaveURL(/\/profile\/settings/);
+    await expect(
+      this.page.getByText("Manage your account settings and preferences."),
+    ).toBeVisible();
+  }
+
+  getAgentRunNotificationsSwitch(): Locator {
+    return this.page.getByRole("switch", {
+      name: "Agent Run Notifications",
+    });
+  }
+
+  async savePreferences(): Promise<void> {
+    await this.page.getByRole("button", { name: "Save preferences" }).click();
+    await expect(
+      this.page.getByText("Successfully updated notification preferences"),
+    ).toBeVisible({ timeout: 15000 });
+  }
+}
diff --git a/autogpt_platform/frontend/src/playwright/publish-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/publish-happy-path.spec.ts
new file mode 100644
index 0000000000..00fcbaf1d4
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/publish-happy-path.spec.ts
@@ -0,0 +1,77 @@
+import { expect, test } from "./coverage-fixture";
+import { E2E_AUTH_STATES } from "./credentials/accounts";
+import { BuildPage } from "./pages/build.page";
+import { LibraryPage } from "./pages/library.page";
+import { MarketplacePage } from "./pages/marketplace.page";
+
+test.use({ storageState: E2E_AUTH_STATES.parallelA });
+
+test("publish happy path: user can submit, track, and delete an agent submission from the dashboard", async ({
+  page,
+}) => {
+  test.setTimeout(180000);
+
+  const buildPage = new BuildPage(page);
+  const libraryPage = new LibraryPage(page);
+  const marketplacePage = new MarketplacePage(page);
+
+  const { agentName: publishableAgentName } =
+    await buildPage.createAndSaveSimpleAgent("Publish Flow Agent");
+
+  await page.goto("/library");
+  await libraryPage.waitForAgentsToLoad();
+  await libraryPage.searchAgents(publishableAgentName);
+  await libraryPage.waitForAgentsToLoad();
+
+  const createdAgent = page
+    .getByTestId("library-agent-card")
+    .filter({ hasText: publishableAgentName })
+    .first();
+  await expect(createdAgent).toBeVisible({ timeout: 15000 });
+
+  const { agentTitle, agentSlug } =
+    await marketplacePage.submitAgentForReview(publishableAgentName);
+
+  await page.getByTestId("view-progress-button").click();
+  await expect(page).toHaveURL(/\/profile\/dashboard/);
+  await expect(page.getByText("Agent dashboard")).toBeVisible();
+
+  const submissionRow =
+    await marketplacePage.waitForDashboardSubmission(agentTitle);
+  await expect(
+    submissionRow.getByTestId("agent-status"),
+    `submission "${agentTitle}" should appear in the dashboard review-pending state`,
+  ).toContainText(/awaiting review/i);
+  await submissionRow.getByTestId("agent-table-row-actions").click();
+  await expect(page.getByRole("menuitem", { name: "Edit" })).toBeVisible();
+
+  // Delete the submission via the actions menu. The dashboard does not show
+  // a confirmation dialog — clicking Delete fires the API directly. We then
+  // assert the row is gone, proving the backend actually removed it (not
+  // just the menu item disappeared).
+  await page.getByRole("menuitem", { name: "Delete" }).click();
+
+  await expect(
+    page.getByTestId("agent-table-row").filter({ hasText: agentTitle }),
+    `submission row "${agentTitle}" must be removed from the dashboard after delete`,
+  ).toHaveCount(0, { timeout: 15000 });
+
+  // Validate the deleted submission is no longer discoverable in Marketplace.
+  await page.goto("/marketplace");
+  const searchInput = page
+    .locator('[data-testid="store-search-input"]:visible')
+    .first();
+  await expect(searchInput).toBeVisible({ timeout: 15000 });
+  await searchInput.fill(agentSlug);
+  await searchInput.press("Enter");
+  await expect(page).toHaveURL(/\/marketplace\/search/);
+
+  await expect(
+    page
+      .locator(
+        '[data-testid="store-card"], [data-testid="featured-store-card"]',
+      )
+      .filter({ hasText: agentTitle }),
+    `deleted submission "${agentTitle}" should not appear in marketplace results`,
+  ).toHaveCount(0, { timeout: 15000 });
+});
diff --git a/autogpt_platform/frontend/src/playwright/settings-happy-path.spec.ts b/autogpt_platform/frontend/src/playwright/settings-happy-path.spec.ts
new file mode 100644
index 0000000000..29dcd5187d
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/settings-happy-path.spec.ts
@@ -0,0 +1,75 @@
+import { expect, test } from "./coverage-fixture";
+import { LoginPage } from "./pages/login.page";
+import { ProfileFormPage } from "./pages/profile-form.page";
+import { SettingsPage } from "./pages/settings.page";
+
+test("settings happy path: user can save notification preferences and keep them after reload and re-login", async ({
+  page,
+}) => {
+  test.setTimeout(90000);
+
+  const loginPage = new LoginPage(page);
+  const settingsPage = new SettingsPage(page);
+
+  await loginPage.loginAsSeededUser("smokeSettings");
+  await settingsPage.open();
+
+  const agentRunSwitch = settingsPage.getAgentRunNotificationsSwitch();
+  // Assert the attribute exists before reading it — defaulting to "false"
+  // would silently pass a regression that removes `aria-checked` entirely.
+  await expect(agentRunSwitch).toHaveAttribute(
+    "aria-checked",
+    /^(true|false)$/,
+  );
+  const initialState = await agentRunSwitch.getAttribute("aria-checked");
+  const expectedState = initialState === "true" ? "false" : "true";
+
+  await agentRunSwitch.click();
+  await settingsPage.savePreferences();
+  await expect(agentRunSwitch).toHaveAttribute("aria-checked", expectedState);
+
+  await page.reload();
+  await settingsPage.open();
+  await expect(settingsPage.getAgentRunNotificationsSwitch()).toHaveAttribute(
+    "aria-checked",
+    expectedState,
+  );
+
+  await page.getByTestId("profile-popout-menu-trigger").click();
+  await page.getByRole("button", { name: "Log out" }).click();
+  await expect(page).toHaveURL(/\/login/);
+
+  await loginPage.loginAsSeededUser("smokeSettings");
+  await settingsPage.open();
+  await expect(settingsPage.getAgentRunNotificationsSwitch()).toHaveAttribute(
+    "aria-checked",
+    expectedState,
+  );
+});
+
+test("settings happy path: user can edit display name and keep it after refresh", async ({
+  page,
+}) => {
+  test.setTimeout(90000);
+
+  const loginPage = new LoginPage(page);
+  const profileFormPage = new ProfileFormPage(page);
+  const updatedDisplayName = `E2E Display ${Date.now()}`;
+
+  await loginPage.loginAsSeededUser("smokeSettings");
+  await page.goto("/profile");
+  await expect(await profileFormPage.isLoaded()).toBe(true);
+
+  await profileFormPage.setDisplayName(updatedDisplayName);
+  await profileFormPage.saveChanges();
+
+  await expect
+    .poll(() => profileFormPage.getDisplayName(), { timeout: 15000 })
+    .toBe(updatedDisplayName);
+
+  await page.reload();
+  await expect(await profileFormPage.isLoaded()).toBe(true);
+  await expect
+    .poll(() => profileFormPage.getDisplayName(), { timeout: 15000 })
+    .toBe(updatedDisplayName);
+});
diff --git a/autogpt_platform/frontend/src/tests/utils/assertion.ts b/autogpt_platform/frontend/src/playwright/utils/assertion.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/utils/assertion.ts
rename to autogpt_platform/frontend/src/playwright/utils/assertion.ts
diff --git a/autogpt_platform/frontend/src/playwright/utils/auth.ts b/autogpt_platform/frontend/src/playwright/utils/auth.ts
new file mode 100644
index 0000000000..2e737aa780
--- /dev/null
+++ b/autogpt_platform/frontend/src/playwright/utils/auth.ts
@@ -0,0 +1,284 @@
+import fs from "fs";
+import path from "path";
+import { LoginPage } from "../pages/login.page";
+import {
+  SEEDED_AUTH_STATE_ACCOUNT_KEYS,
+  SEEDED_TEST_ACCOUNTS,
+  SEEDED_TEST_USERS,
+  getAuthStatePath,
+} from "../credentials/accounts";
+import { buildCookieConsentStorageState } from "../credentials/storage-state";
+import { signupTestUser } from "./signup";
+import { getBrowser } from "./get-browser";
+import { skipOnboardingIfPresent } from "./onboarding";
+
+export interface TestUser {
+  email: string;
+  password: string;
+  id?: string;
+  createdAt?: string;
+}
+
+export interface UserPool {
+  users: TestUser[];
+  createdAt: string;
+  version: string;
+}
+
+const AUTH_STATE_KEYS = [...SEEDED_AUTH_STATE_ACCOUNT_KEYS];
+
+export async function createTestUser(
+  email?: string,
+  password?: string,
+  ignoreOnboarding: boolean = true,
+): Promise<TestUser> {
+  const { faker } = await import("@faker-js/faker");
+  const userEmail = email || faker.internet.email();
+  const userPassword = password || faker.internet.password({ length: 12 });
+
+  try {
+    const browser = await getBrowser();
+    const context = await browser.newContext();
+    const page = await context.newPage();
+
+    // Auto-accept cookies in test environment to prevent banner from appearing
+    await page.addInitScript(() => {
+      window.localStorage.setItem(
+        "autogpt_cookie_consent",
+        JSON.stringify({
+          hasConsented: true,
+          timestamp: Date.now(),
+          analytics: true,
+          monitoring: true,
+        }),
+      );
+    });
+
+    try {
+      const testUser = await signupTestUser(
+        page,
+        userEmail,
+        userPassword,
+        ignoreOnboarding,
+        false,
+      );
+      return testUser;
+    } finally {
+      await page.close();
+      await context.close();
+      await browser.close();
+    }
+  } catch (error) {
+    console.error(`❌ Error creating test user ${userEmail}:`, error);
+    throw error;
+  }
+}
+
+export async function createTestUsers(count: number): Promise<TestUser[]> {
+  console.log(`👥 Creating ${count} test users...`);
+
+  const users: TestUser[] = [];
+  let consecutiveFailures = 0;
+
+  for (let i = 0; i < count; i++) {
+    try {
+      const user = await createTestUser();
+      users.push(user);
+      consecutiveFailures = 0; // Reset failure counter on success
+      console.log(`✅ Created user ${i + 1}/${count}: ${user.email}`);
+    } catch (error) {
+      consecutiveFailures++;
+      console.error(`❌ Failed to create user ${i + 1}/${count}:`, error);
+
+      // If we have too many consecutive failures, stop trying
+      if (consecutiveFailures >= 3) {
+        console.error(
+          `⚠️ Stopping after ${consecutiveFailures} consecutive failures`,
+        );
+        break;
+      }
+    }
+  }
+
+  console.log(`🎉 Successfully created ${users.length}/${count} test users`);
+  return users;
+}
+
+export async function getTestUser(accountKey?: string): Promise<TestUser> {
+  if (SEEDED_TEST_USERS.length === 0) {
+    throw new Error("No seeded E2E users are configured");
+  }
+
+  if (accountKey) {
+    const matchedUser = SEEDED_TEST_USERS.find(
+      (user) => user.key === accountKey || user.email === accountKey,
+    );
+
+    if (!matchedUser) {
+      throw new Error(
+        `No seeded E2E user found for account key or email: ${accountKey}`,
+      );
+    }
+
+    return { email: matchedUser.email, password: matchedUser.password };
+  }
+
+  const rawWorkerIndex = Number.parseInt(
+    process.env.TEST_WORKER_INDEX ?? process.env.PLAYWRIGHT_WORKER_INDEX ?? "0",
+    10,
+  );
+  const workerIndex = Number.isNaN(rawWorkerIndex) ? 0 : rawWorkerIndex;
+  const deterministicIndex =
+    ((workerIndex % SEEDED_TEST_USERS.length) + SEEDED_TEST_USERS.length) %
+    SEEDED_TEST_USERS.length;
+  const { email, password } = SEEDED_TEST_USERS[deterministicIndex];
+  return { email, password };
+}
+
+function hasStoredAuthState(accountKey: (typeof AUTH_STATE_KEYS)[number]) {
+  return fs.existsSync(getAuthStatePath(accountKey));
+}
+
+function authStateMatchesOrigin(
+  accountKey: (typeof AUTH_STATE_KEYS)[number],
+  origin: string,
+): boolean {
+  const statePath = getAuthStatePath(accountKey);
+  if (!fs.existsSync(statePath)) {
+    return false;
+  }
+
+  try {
+    const state = JSON.parse(fs.readFileSync(statePath, "utf8")) as {
+      origins?: Array<{ origin?: string }>;
+    };
+    return (
+      state.origins?.some((storedOrigin) => storedOrigin.origin === origin) ??
+      false
+    );
+  } catch {
+    return false;
+  }
+}
+
+export function hasSeededAuthStates(baseURL: string): boolean {
+  const origin = new URL(baseURL).origin;
+  return AUTH_STATE_KEYS.every(
+    (accountKey) =>
+      hasStoredAuthState(accountKey) &&
+      authStateMatchesOrigin(accountKey, origin),
+  );
+}
+
+async function authStateHasLiveSession(
+  baseURL: string,
+  accountKey: (typeof AUTH_STATE_KEYS)[number],
+): Promise<boolean> {
+  const browser = await getBrowser();
+
+  try {
+    const context = await browser.newContext({
+      baseURL,
+      storageState: getAuthStatePath(accountKey),
+    });
+    const page = await context.newPage();
+
+    try {
+      await page.goto("/marketplace");
+      await page.waitForLoadState("domcontentloaded");
+      await skipOnboardingIfPresent(page, "/marketplace");
+      return await page
+        .getByTestId("profile-popout-menu-trigger")
+        .waitFor({ state: "visible", timeout: 10_000 })
+        .then(() => true)
+        .catch(() => false);
+    } finally {
+      await page.close();
+      await context.close();
+    }
+  } catch {
+    return false;
+  } finally {
+    await browser.close();
+  }
+}
+
+export async function getInvalidSeededAuthStateKeys(
+  baseURL: string,
+): Promise<(typeof AUTH_STATE_KEYS)[number][]> {
+  const origin = new URL(baseURL).origin;
+  const invalidKeys = await Promise.all(
+    AUTH_STATE_KEYS.map(async (accountKey) => {
+      if (
+        !hasStoredAuthState(accountKey) ||
+        !authStateMatchesOrigin(accountKey, origin)
+      ) {
+        return accountKey;
+      }
+
+      return (await authStateHasLiveSession(baseURL, accountKey))
+        ? null
+        : accountKey;
+    }),
+  );
+
+  return invalidKeys.filter(
+    (accountKey): accountKey is (typeof AUTH_STATE_KEYS)[number] =>
+      accountKey !== null,
+  );
+}
+
+async function createAuthStateForUser(
+  baseURL: string,
+  accountKey: (typeof AUTH_STATE_KEYS)[number],
+): Promise<void> {
+  const browser = await getBrowser();
+
+  try {
+    const { email, password } = SEEDED_TEST_ACCOUNTS[accountKey];
+    const origin = new URL(baseURL).origin;
+    const context = await browser.newContext({
+      baseURL,
+      storageState: buildCookieConsentStorageState(origin),
+    });
+    const page = await context.newPage();
+    const loginPage = new LoginPage(page);
+
+    await page.goto("/login");
+    await loginPage.login(email, password);
+    await page.waitForURL(
+      (url: URL) =>
+        /\/(onboarding|marketplace|copilot|library)/.test(url.pathname),
+      { timeout: 20000 },
+    );
+    await skipOnboardingIfPresent(page, "/marketplace");
+    await page.getByTestId("profile-popout-menu-trigger").waitFor({
+      state: "visible",
+      timeout: 10000,
+    });
+
+    const statePath = getAuthStatePath(accountKey);
+    fs.mkdirSync(path.dirname(statePath), { recursive: true });
+    await context.storageState({ path: statePath });
+    await context.close();
+  } catch (error) {
+    const { email } = SEEDED_TEST_ACCOUNTS[accountKey];
+    throw new Error(
+      `Failed to create auth state for ${email}: ${String(
+        error,
+      )}. If these seeded QA accounts are missing, seed them with backend/test/e2e_test_data.py before running Playwright.`,
+    );
+  } finally {
+    await browser.close();
+  }
+}
+
+export async function ensureSeededAuthStates(baseURL: string): Promise<void> {
+  const invalidKeys = await getInvalidSeededAuthStateKeys(baseURL);
+
+  await Promise.all(
+    invalidKeys.map((accountKey) =>
+      createAuthStateForUser(baseURL, accountKey),
+    ),
+  );
+}
diff --git a/autogpt_platform/frontend/src/tests/utils/get-browser.ts b/autogpt_platform/frontend/src/playwright/utils/get-browser.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/utils/get-browser.ts
rename to autogpt_platform/frontend/src/playwright/utils/get-browser.ts
diff --git a/autogpt_platform/frontend/src/tests/utils/onboarding.ts b/autogpt_platform/frontend/src/playwright/utils/onboarding.ts
similarity index 70%
rename from autogpt_platform/frontend/src/tests/utils/onboarding.ts
rename to autogpt_platform/frontend/src/playwright/utils/onboarding.ts
index 375babc743..b5fa79abda 100644
--- a/autogpt_platform/frontend/src/tests/utils/onboarding.ts
+++ b/autogpt_platform/frontend/src/playwright/utils/onboarding.ts
@@ -1,5 +1,14 @@
 import { Page, expect } from "@playwright/test";
 
+function resolveAppUrl(page: Page, destination: string) {
+  const baseURL =
+    page.url().startsWith("http://") || page.url().startsWith("https://")
+      ? page.url()
+      : (process.env.PLAYWRIGHT_BASE_URL ?? "http://localhost:3000");
+
+  return new URL(destination, baseURL).toString();
+}
+
 /**
  * Complete the onboarding wizard via API.
  * Use this when a test needs an authenticated user who has already finished onboarding
@@ -10,8 +19,11 @@ import { Page, expect } from "@playwright/test";
  */
 export async function completeOnboardingViaAPI(page: Page) {
   await page.request.post(
-    "http://localhost:3000/api/proxy/api/onboarding/step?step=VISIT_COPILOT",
-    { headers: { "Content-Type": "application/json" } },
+    resolveAppUrl(page, "/api/proxy/api/onboarding/step"),
+    {
+      headers: { "Content-Type": "application/json" },
+      params: { step: "VISIT_COPILOT" },
+    },
   );
 }
 
@@ -28,7 +40,7 @@ export async function skipOnboardingIfPresent(
   if (!url.includes("/onboarding")) return;
 
   await completeOnboardingViaAPI(page);
-  await page.goto(`http://localhost:3000${destination}`);
+  await page.goto(resolveAppUrl(page, destination));
   await page.waitForLoadState("domcontentloaded", { timeout: 10000 });
 }
 
@@ -70,8 +82,15 @@ export async function completeOnboardingWizard(
   }
   await page.getByRole("button", { name: "Launch Autopilot" }).click();
 
-  // Step 4: Preparing — wait for animation to complete and redirect to /copilot
-  await page.waitForURL(/\/copilot/, { timeout: 15000 });
+  // Step 4: Preparing — require the real transition state to appear first,
+  // then wait for the app shell on /copilot rather than racing the redirect.
+  await expect(
+    page.getByText("Preparing your workspace...", { exact: false }),
+  ).toBeVisible({ timeout: 10000 });
+  await page.waitForURL(/\/copilot/, { timeout: 30000 });
+  await expect(page.getByTestId("profile-popout-menu-trigger")).toBeVisible({
+    timeout: 15000,
+  });
 
   return { name, role, painPoints };
 }
diff --git a/autogpt_platform/frontend/src/tests/utils/selectors.ts b/autogpt_platform/frontend/src/playwright/utils/selectors.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/utils/selectors.ts
rename to autogpt_platform/frontend/src/playwright/utils/selectors.ts
diff --git a/autogpt_platform/frontend/src/tests/utils/signin.ts b/autogpt_platform/frontend/src/playwright/utils/signin.ts
similarity index 100%
rename from autogpt_platform/frontend/src/tests/utils/signin.ts
rename to autogpt_platform/frontend/src/playwright/utils/signin.ts
diff --git a/autogpt_platform/frontend/src/tests/utils/signup.ts b/autogpt_platform/frontend/src/playwright/utils/signup.ts
similarity index 98%
rename from autogpt_platform/frontend/src/tests/utils/signup.ts
rename to autogpt_platform/frontend/src/playwright/utils/signup.ts
index 6b7802db9d..c83c760102 100644
--- a/autogpt_platform/frontend/src/tests/utils/signup.ts
+++ b/autogpt_platform/frontend/src/playwright/utils/signup.ts
@@ -19,7 +19,7 @@ export async function signupTestUser(
 
   try {
     // Navigate to signup page
-    await page.goto("http://localhost:3000/signup");
+    await page.goto("/signup");
 
     // Wait for page to load
     getText("Create a new account");
@@ -122,7 +122,7 @@ export async function signupAndNavigateToMarketplace(
 export async function validateSignupForm(page: any): Promise<void> {
   console.log("🧪 Validating signup form...");
 
-  await page.goto("http://localhost:3000/signup");
+  await page.goto("/signup");
 
   // Test empty form submission
   console.log("❌ Testing empty form submission...");
diff --git a/autogpt_platform/frontend/src/tests/AGENTS.md b/autogpt_platform/frontend/src/tests/AGENTS.md
index 1969708e8c..87222559af 100644
--- a/autogpt_platform/frontend/src/tests/AGENTS.md
+++ b/autogpt_platform/frontend/src/tests/AGENTS.md
@@ -22,7 +22,7 @@
 - Flows requiring real browser APIs (clipboard, downloads)
 - Cross-page navigation that must work end-to-end
 
-**Location:** `src/tests/*.spec.ts` (centralized, as there will be fewer of them)
+**Location:** `src/playwright/*.spec.ts` (centralized, as there will be fewer of them)
 
 **Import:** Always import `test` and `expect` from `./coverage-fixture` instead of `@playwright/test`. This auto-collects V8 coverage per test for Codecov reporting.
 
@@ -74,6 +74,10 @@ Start with a `main.test.tsx` file and split into smaller files as it grows.
 2. Mock API requests via MSW
 3. Assert UI scenarios via Testing Library
 
+**Prefer the UI surface over direct hook tests:** if a `use*.ts` hook only exists to support a page/component, test that page/component instead of adding a `renderHook()` test. Reserve direct hook tests for shared hooks with standalone business logic that cannot be exercised cleanly through the UI.
+
+**Prefer Orval-generated mocks:** use the generated MSW handlers and response builders from `src/app/api/__generated__/endpoints/*/*.msw.ts` instead of hand-built API response objects or mocking a page/component hook.
+
 ```tsx
 // Example: Test page renders data from API
 import { server } from "@/mocks/mock-server";
@@ -98,7 +102,7 @@ test("shows error when submission fails", async () => {
 - Pure utility functions (`lib/utils.ts`)
 - Component rendering with various props
 - Component state changes
-- Custom hooks
+- Shared hooks with standalone business logic
 
 **Location:** Co-located with the file: `Component.test.tsx` next to `Component.tsx`
 
@@ -172,25 +176,29 @@ src/
 ├── mocks/
 │   ├── mock-handlers.ts             # MSW handlers (auto-generated via Orval)
 │   └── mock-server.ts               # MSW server setup
+├── playwright/
+│   ├── *.spec.ts                    # E2E tests (Playwright) - centralized
+│   ├── pages/                       # Playwright page objects
+│   └── utils/                       # Playwright helpers/fixtures
 └── tests/
     ├── integrations/
     │   ├── test-utils.tsx           # Testing utilities
     │   └── vitest.setup.tsx         # Integration test setup
-    └── *.spec.ts                    # E2E tests (Playwright) - centralized
+    └── AGENTS.md                    # Testing guidance for agents
 ```
 
 ---
 
 ## Priority Matrix
 
-| Component Type      | Test Priority | Recommended Test |
-| ------------------- | ------------- | ---------------- |
-| Pages/Features      | **Highest**   | Integration      |
-| Custom Hooks        | High          | Unit             |
-| Utility Functions   | High          | Unit             |
-| Organisms (complex) | High          | Integration      |
-| Molecules           | Medium        | Unit + Storybook |
-| Atoms               | Medium        | Storybook only\* |
+| Component Type      | Test Priority | Recommended Test                       |
+| ------------------- | ------------- | -------------------------------------- |
+| Pages/Features      | **Highest**   | Integration                            |
+| Custom Hooks        | Medium        | Parent integration or shared-hook unit |
+| Utility Functions   | High          | Unit                                   |
+| Organisms (complex) | High          | Integration                            |
+| Molecules           | Medium        | Unit + Storybook                       |
+| Atoms               | Medium        | Storybook only\*                       |
 
 \*Atoms are typically simple enough that Storybook visual tests suffice.
 
@@ -218,6 +226,8 @@ test("shows error when deletion fails", async () => {
 
 **Generated handlers location:** `src/app/api/__generated__/endpoints/*/` - each endpoint has handlers for different status codes.
 
+For Playwright support code, keep browser-only helpers in `src/playwright/` rather than `src/tests/`.
+
 ---
 
 ## Golden Rules
@@ -228,3 +238,5 @@ test("shows error when deletion fails", async () => {
 4. **Co-locate integration tests** - Keep `__tests__/` folder next to the component
 5. **E2E is expensive** - Only for critical happy paths; prefer integration tests
 6. **AI agents are good at writing integration tests** - Start with these when adding test coverage
+7. **Prefer component/page tests over hook tests** - Don't add `renderHook()` coverage for component implementation details
+8. **Use generated API mocks** - Prefer Orval MSW helpers over manual API object stubs
diff --git a/autogpt_platform/frontend/src/tests/agent-activity.spec.ts b/autogpt_platform/frontend/src/tests/agent-activity.spec.ts
deleted file mode 100644
index 4ae4a11d0c..0000000000
--- a/autogpt_platform/frontend/src/tests/agent-activity.spec.ts
+++ /dev/null
@@ -1,96 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { BuildPage } from "./pages/build.page";
-import * as LibraryPage from "./pages/library.page";
-import { LoginPage } from "./pages/login.page";
-import { hasTextContent, hasUrl, isVisible } from "./utils/assertion";
-import { getTestUser } from "./utils/auth";
-import { getSelectors } from "./utils/selectors";
-
-test.beforeEach(async ({ page }) => {
-  const loginPage = new LoginPage(page);
-  const buildPage = new BuildPage(page);
-  const testUser = await getTestUser();
-
-  await page.goto("/login");
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  await page.goto("/build");
-  await buildPage.closeTutorial();
-
-  await buildPage.addBlockByClick("Add to Dictionary");
-  await buildPage.waitForNodeOnCanvas(1);
-
-  await buildPage.saveAgent("Test Agent", "Test Description");
-  await test
-    .expect(page)
-    .toHaveURL(({ searchParams }) => !!searchParams.get("flowID"));
-
-  // Wait for save to complete
-  await page.waitForTimeout(1000);
-
-  await page.goto("/library");
-  // Navigate to the specific agent we just created, not just the first one
-  await LibraryPage.navigateToAgentByName(page, "Test Agent");
-  await LibraryPage.waitForAgentPageLoad(page);
-});
-
-test("shows badge with count when agent is running", async ({ page }) => {
-  const { getId } = getSelectors(page);
-
-  // Start the agent run
-  await LibraryPage.clickRunButton(page);
-
-  // Wait for the badge to appear and check it has a valid count
-  const badge = getId("agent-activity-badge");
-  await isVisible(badge);
-
-  // Check that badge shows a positive number (more flexible than exact count)
-  await expect(async () => {
-    const badgeText = await badge.textContent();
-    const count = parseInt(badgeText || "0");
-
-    if (count < 1) {
-      throw new Error(`Expected badge count >= 1, got: ${badgeText}`);
-    }
-  }).toPass({ timeout: 10000 });
-});
-
-test("displays the runs on the activity dropdown", async ({ page }) => {
-  const { getId } = getSelectors(page);
-
-  const activityBtn = getId("agent-activity-button");
-  await isVisible(activityBtn);
-
-  // Start the agent run
-  await LibraryPage.clickRunButton(page);
-
-  // Wait for the activity badge to appear (indicating execution started)
-  const badge = getId("agent-activity-badge");
-  await isVisible(badge);
-
-  // Click to open the dropdown
-  await activityBtn.click();
-
-  const dropdown = getId("agent-activity-dropdown");
-  await isVisible(dropdown);
-
-  // Check that the agent name appears in the dropdown
-  await hasTextContent(dropdown, "Test Agent");
-
-  // Check for execution status - be more flexible with text matching
-  await expect(async () => {
-    const dropdownText = await dropdown.textContent();
-    const hasAgentName = dropdownText?.includes("Test Agent");
-    const hasExecutionStatus =
-      dropdownText?.includes("queued") ||
-      dropdownText?.includes("running") ||
-      dropdownText?.includes("Started");
-
-    if (!hasAgentName || !hasExecutionStatus) {
-      throw new Error(
-        `Expected agent name and execution status, got: ${dropdownText}`,
-      );
-    }
-  }).toPass({ timeout: 8000 });
-});
diff --git a/autogpt_platform/frontend/src/tests/agent-dashboard.spec.ts b/autogpt_platform/frontend/src/tests/agent-dashboard.spec.ts
deleted file mode 100644
index ec7ac3bfa0..0000000000
--- a/autogpt_platform/frontend/src/tests/agent-dashboard.spec.ts
+++ /dev/null
@@ -1,260 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import { hasUrl, isHidden } from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-test.beforeEach(async ({ page }) => {
-  const loginPage = new LoginPage(page);
-  await page.goto("/login");
-  const richUser = getTestUserWithLibraryAgents();
-  await loginPage.login(richUser.email, richUser.password);
-  await hasUrl(page, "/marketplace");
-});
-
-test("dashboard page loads successfully", async ({ page }) => {
-  const { getText } = getSelectors(page);
-  await page.goto("/profile/dashboard");
-
-  await expect(getText("Agent dashboard")).toBeVisible();
-  await expect(getText("Submit a New Agent")).toBeVisible();
-  await expect(getText("Your uploaded agents")).toBeVisible();
-});
-
-test("submit agent button works correctly", async ({ page }) => {
-  const { getId, getText } = getSelectors(page);
-
-  await page.goto("/profile/dashboard");
-  const submitAgentButton = getId("submit-agent-button");
-  await expect(submitAgentButton).toBeVisible();
-  await submitAgentButton.click();
-
-  await expect(getText("Publish Agent")).toBeVisible();
-  await expect(
-    getText("Select your project that you'd like to publish"),
-  ).toBeVisible();
-
-  await page.locator('button[aria-label="Close"]').click();
-  await expect(getText("Publish Agent")).not.toBeVisible();
-});
-
-test("agent table view action works correctly for rejected agents", async ({
-  page,
-}) => {
-  await page.goto("/profile/dashboard");
-
-  const agentTable = page.getByTestId("agent-table");
-  await expect(agentTable).toBeVisible();
-
-  const rows = agentTable.getByTestId("agent-table-row");
-
-  // Find a row with rejected status
-  const rejectedRow = rows.filter({ hasText: "Rejected" }).first();
-  if (!(await rejectedRow.count())) {
-    console.log("No rejected agents available; skipping view test.");
-    return;
-  }
-
-  await rejectedRow.scrollIntoViewIfNeeded();
-
-  const actionsButton = rejectedRow.getByTestId("agent-table-row-actions");
-  await actionsButton.waitFor({ state: "visible", timeout: 10000 });
-  await actionsButton.scrollIntoViewIfNeeded();
-  await actionsButton.click();
-
-  // View button testing
-  const viewButton = page.getByRole("menuitem", { name: "View" });
-  await expect(viewButton).toBeVisible();
-  await viewButton.click();
-
-  const modal = page.getByTestId("publish-agent-modal");
-  await expect(modal).toBeVisible();
-  const viewAgentName = modal.getByText("Agent is awaiting review");
-  await expect(viewAgentName).toBeVisible();
-
-  await page.getByRole("button", { name: "Done" }).click();
-  await expect(modal).not.toBeVisible();
-});
-
-test("agent table delete action works correctly", async ({ page }) => {
-  await page.goto("/profile/dashboard");
-
-  const agentTable = page.getByTestId("agent-table");
-  await expect(agentTable).toBeVisible();
-
-  const rows = agentTable.getByTestId("agent-table-row");
-
-  // Delete button testing — only works for PENDING submissions
-  const beforeCount = await rows.count();
-
-  if (beforeCount === 0) {
-    console.log("No agents available; skipping delete flow.");
-    return;
-  }
-
-  // Find a PENDING submission to delete
-  const pendingRow = rows.filter({ hasText: "Pending" }).first();
-  if (!(await pendingRow.count())) {
-    console.log("No pending agents available; skipping delete flow.");
-    return;
-  }
-
-  const deletedSubmissionId =
-    await pendingRow.getAttribute("data-submission-id");
-  await pendingRow.scrollIntoViewIfNeeded();
-
-  const delActionsButton = pendingRow.getByTestId("agent-table-row-actions");
-  await delActionsButton.waitFor({ state: "visible", timeout: 10000 });
-  await delActionsButton.scrollIntoViewIfNeeded();
-  await delActionsButton.click();
-
-  const deleteButton = page.getByRole("menuitem", { name: "Delete" });
-  await expect(deleteButton).toBeVisible();
-  await deleteButton.click();
-
-  // Assert that the card with the deleted agent ID is not visible
-  await isHidden(page.locator(`[data-submission-id="${deletedSubmissionId}"]`));
-});
-
-test("edit and delete actions are unavailable for non-pending submissions", async ({
-  page,
-}) => {
-  await page.goto("/profile/dashboard");
-
-  const agentTable = page.getByTestId("agent-table");
-  await expect(agentTable).toBeVisible();
-
-  const rows = agentTable.getByTestId("agent-table-row");
-
-  // Test with rejected submissions (view only)
-  const rejectedRow = rows.filter({ hasText: "Rejected" }).first();
-  if (await rejectedRow.count()) {
-    await rejectedRow.scrollIntoViewIfNeeded();
-    const actionsButton = rejectedRow.getByTestId("agent-table-row-actions");
-    await actionsButton.waitFor({ state: "visible", timeout: 10000 });
-    await actionsButton.scrollIntoViewIfNeeded();
-    await actionsButton.click();
-
-    await expect(page.getByRole("menuitem", { name: "View" })).toBeVisible();
-    await expect(page.getByRole("menuitem", { name: "Edit" })).toHaveCount(0);
-    await expect(page.getByRole("menuitem", { name: "Delete" })).toHaveCount(0);
-
-    // Close the menu
-    await page.keyboard.press("Escape");
-  }
-
-  // Test with approved submissions (view only)
-  const approvedRow = rows.filter({ hasText: "Approved" }).first();
-  if (await approvedRow.count()) {
-    await approvedRow.scrollIntoViewIfNeeded();
-    const actionsButton = approvedRow.getByTestId("agent-table-row-actions");
-    await actionsButton.waitFor({ state: "visible", timeout: 10000 });
-    await actionsButton.scrollIntoViewIfNeeded();
-    await actionsButton.click();
-
-    await expect(page.getByRole("menuitem", { name: "View" })).toBeVisible();
-    await expect(page.getByRole("menuitem", { name: "Edit" })).toHaveCount(0);
-    await expect(page.getByRole("menuitem", { name: "Delete" })).toHaveCount(0);
-  }
-});
-
-test("editing a pending submission works correctly", async ({ page }) => {
-  await page.goto("/profile/dashboard");
-
-  const agentTable = page.getByTestId("agent-table");
-  await expect(agentTable).toBeVisible();
-
-  const rows = agentTable.getByTestId("agent-table-row");
-
-  // Find a PENDING submission to edit (only PENDING submissions can be edited)
-  const pendingRow = rows.filter({ hasText: "Pending" }).first();
-  if (!(await pendingRow.count())) {
-    console.log("No pending agents available; skipping edit test.");
-    return;
-  }
-
-  const beforeCount = await rows.count();
-
-  await pendingRow.scrollIntoViewIfNeeded();
-  const actionsButton = pendingRow.getByTestId("agent-table-row-actions");
-  await actionsButton.waitFor({ state: "visible", timeout: 10000 });
-  await actionsButton.scrollIntoViewIfNeeded();
-  await actionsButton.click();
-
-  const editButton = page.getByRole("menuitem", { name: "Edit" });
-  await expect(editButton).toBeVisible();
-  await editButton.click();
-
-  const editModal = page.getByTestId("edit-agent-modal");
-  await expect(editModal).toBeVisible();
-
-  const newTitle = `E2E Edit Pending ${Date.now()}`;
-  await page.getByRole("textbox", { name: "Title" }).fill(newTitle);
-  await page
-    .getByRole("textbox", { name: "Changes Summary" })
-    .fill("E2E change - updating pending submission");
-
-  await page.getByRole("button", { name: "Update submission" }).click();
-  await expect(editModal).not.toBeVisible();
-
-  // A new submission should appear with pending state
-  await expect(async () => {
-    const afterCount = await rows.count();
-    expect(afterCount).toBeGreaterThan(beforeCount);
-  }).toPass();
-
-  const newRow = rows.filter({ hasText: newTitle }).first();
-  await expect(newRow).toBeVisible();
-  await expect(newRow).toContainText(/Awaiting review/);
-});
-
-test("editing a pending agent updates the same submission in place", async ({
-  page,
-}) => {
-  await page.goto("/profile/dashboard");
-
-  const agentTable = page.getByTestId("agent-table");
-  await expect(agentTable).toBeVisible();
-
-  const rows = agentTable.getByTestId("agent-table-row");
-
-  const pendingRow = rows.filter({ hasText: /Awaiting review/ }).first();
-  if (!(await pendingRow.count())) {
-    console.log("No pending agents available; skipping pending edit test.");
-    return;
-  }
-
-  const beforeCount = await rows.count();
-
-  await pendingRow.scrollIntoViewIfNeeded();
-  const actionsButton = pendingRow.getByTestId("agent-table-row-actions");
-  await actionsButton.waitFor({ state: "visible", timeout: 10000 });
-  await actionsButton.scrollIntoViewIfNeeded();
-  await actionsButton.click();
-
-  const editButton = page.getByRole("menuitem", { name: "Edit" });
-  await expect(editButton).toBeVisible();
-  await editButton.click();
-
-  const editModal = page.getByTestId("edit-agent-modal");
-  await expect(editModal).toBeVisible();
-
-  const newTitle = `E2E Edit Pending ${Date.now()}`;
-  await page.getByRole("textbox", { name: "Title" }).fill(newTitle);
-  await page
-    .getByRole("textbox", { name: "Changes Summary" })
-    .fill("E2E change - pending -> same submission");
-
-  await page.getByRole("button", { name: "Update submission" }).click();
-  await expect(editModal).not.toBeVisible();
-
-  // Count should remain the same
-  await expect(async () => {
-    const afterCount = await rows.count();
-    expect(afterCount).toBe(beforeCount);
-  }).toPass();
-
-  const updatedRow = rows.filter({ hasText: newTitle }).first();
-  await expect(updatedRow).toBeVisible();
-  await expect(updatedRow).toContainText(/Awaiting review/);
-});
diff --git a/autogpt_platform/frontend/src/tests/api-keys.spec.ts b/autogpt_platform/frontend/src/tests/api-keys.spec.ts
deleted file mode 100644
index 8c59ced981..0000000000
--- a/autogpt_platform/frontend/src/tests/api-keys.spec.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-import { expect, test } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import { hasUrl } from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-test.describe("API Keys Page", () => {
-  test.beforeEach(async ({ page }) => {
-    const loginPage = new LoginPage(page);
-    await page.goto("/login");
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-  });
-
-  test("should redirect to login page when user is not authenticated", async ({
-    browser,
-  }) => {
-    const context = await browser.newContext();
-    const page = await context.newPage();
-
-    try {
-      await page.goto("/profile/api-keys");
-      await hasUrl(page, "/login?next=%2Fprofile%2Fapi-keys");
-    } finally {
-      await page.close();
-      await context.close();
-    }
-  });
-
-  test("should create a new API key successfully", async ({ page }) => {
-    const { getButton, getField } = getSelectors(page);
-    await page.goto("/profile/api-keys");
-    await getButton("Create Key").click();
-
-    await getField("Name").fill("Test Key");
-    await getButton("Create").click();
-
-    await expect(
-      page.getByText("AutoGPT Platform API Key Created"),
-    ).toBeVisible();
-    await getButton("Close").first().click();
-
-    await expect(page.getByText("Test Key").first()).toBeVisible();
-  });
-
-  test("should revoke an existing API key", async ({ page }) => {
-    const { getRole, getId } = getSelectors(page);
-    await page.goto("/profile/api-keys");
-
-    const apiKeyRow = getId("api-key-row").first();
-    const apiKeyContent = await apiKeyRow
-      .getByTestId("api-key-id")
-      .textContent();
-    const apiKeyActions = apiKeyRow.getByTestId("api-key-actions").first();
-
-    await apiKeyActions.click();
-    await getRole("menuitem", "Revoke").click();
-    await expect(
-      page.getByText("AutoGPT Platform API key revoked successfully"),
-    ).toBeVisible();
-
-    await expect(page.getByText(apiKeyContent!)).not.toBeVisible();
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/build.spec.ts b/autogpt_platform/frontend/src/tests/build.spec.ts
deleted file mode 100644
index ad0b9524d0..0000000000
--- a/autogpt_platform/frontend/src/tests/build.spec.ts
+++ /dev/null
@@ -1,134 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { BuildPage } from "./pages/build.page";
-import { LoginPage } from "./pages/login.page";
-import { hasUrl } from "./utils/assertion";
-import { getTestUser } from "./utils/auth";
-
-test.describe("Builder", () => {
-  let buildPage: BuildPage;
-
-  test.beforeEach(async ({ page }) => {
-    test.setTimeout(60000);
-    const loginPage = new LoginPage(page);
-    const testUser = await getTestUser();
-
-    buildPage = new BuildPage(page);
-
-    await page.goto("/login");
-    await loginPage.login(testUser.email, testUser.password);
-    await hasUrl(page, "/marketplace");
-
-    await page.goto("/build");
-    await page.waitForLoadState("domcontentloaded");
-    await buildPage.closeTutorial();
-  });
-
-  // --- Core tests ---
-
-  test("build page loads successfully", async () => {
-    await expect(buildPage.isLoaded()).resolves.toBeTruthy();
-    await expect(
-      buildPage.getPlaywrightPage().getByTestId("blocks-control-blocks-button"),
-    ).toBeVisible();
-    await expect(
-      buildPage.getPlaywrightPage().getByTestId("save-control-save-button"),
-    ).toBeVisible();
-  });
-
-  test("user can add a block via block menu", async () => {
-    const initialCount = await buildPage.getNodeCount();
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(initialCount + 1);
-    expect(await buildPage.getNodeCount()).toBe(initialCount + 1);
-  });
-
-  test("user can add multiple blocks", async () => {
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(1);
-
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(2);
-
-    expect(await buildPage.getNodeCount()).toBe(2);
-  });
-
-  test("user can remove a block", async () => {
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(1);
-
-    // Deselect, then re-select the node and delete
-    await buildPage.clickCanvas();
-    await buildPage.selectNode(0);
-    await buildPage.deleteSelectedNodes();
-
-    await expect(buildPage.getNodeLocator()).toHaveCount(0, { timeout: 5000 });
-  });
-
-  test("user can save an agent", async ({ page }) => {
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(1);
-
-    await buildPage.saveAgent("E2E Test Agent", "Created by e2e test");
-    await buildPage.waitForSaveComplete();
-
-    expect(page.url()).toContain("flowID=");
-  });
-
-  test("user can save and run button becomes enabled", async () => {
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(1);
-
-    await buildPage.saveAgent("Runnable Agent", "Test run button");
-    await buildPage.waitForSaveComplete();
-    await buildPage.waitForSaveButton();
-
-    await expect(buildPage.isRunButtonEnabled()).resolves.toBeTruthy();
-  });
-
-  // --- Copy / Paste test ---
-
-  test("user can copy and paste a node", async ({ context }) => {
-    await context.grantPermissions(["clipboard-read", "clipboard-write"]);
-
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(1);
-
-    await buildPage.selectNode(0);
-    await buildPage.copyViaKeyboard();
-    await buildPage.pasteViaKeyboard();
-
-    await buildPage.waitForNodeOnCanvas(2);
-    expect(await buildPage.getNodeCount()).toBe(2);
-  });
-
-  // --- Run agent test ---
-
-  test("user can run an agent from the builder", async () => {
-    await buildPage.addBlockByClick("Store Value");
-    await buildPage.waitForNodeOnCanvas(1);
-
-    // Save the agent (required before running)
-    await buildPage.saveAgent("Run Test Agent", "Testing run from builder");
-    await buildPage.waitForSaveComplete();
-    await buildPage.waitForSaveButton();
-
-    // Click run button
-    await buildPage.clickRunButton();
-
-    // Either the run dialog appears or the agent starts running directly
-    const runDialogOrRunning = await Promise.race([
-      buildPage
-        .getPlaywrightPage()
-        .locator('[data-id="run-input-dialog-content"]')
-        .waitFor({ state: "visible", timeout: 10000 })
-        .then(() => "dialog"),
-      buildPage
-        .getPlaywrightPage()
-        .locator('[data-id="stop-graph-button"]')
-        .waitFor({ state: "visible", timeout: 10000 })
-        .then(() => "running"),
-    ]).catch(() => "timeout");
-
-    expect(["dialog", "running"]).toContain(runDialogOrRunning);
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/credentials/index.ts b/autogpt_platform/frontend/src/tests/credentials/index.ts
deleted file mode 100644
index bc4663a045..0000000000
--- a/autogpt_platform/frontend/src/tests/credentials/index.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-// E2E Test Credentials and Constants
-export const TEST_CREDENTIALS = {
-  email: "test123@gmail.com",
-  password: "testpassword123",
-} as const;
-
-export function getTestUserWithLibraryAgents() {
-  return TEST_CREDENTIALS;
-}
-
-// Dummy constant to help developers identify agents that don't need input
-export const DummyInput = "DummyInput";
-
-// This will be used for testing agent submission for test123@gmail.com
-export const TEST_AGENT_DATA = {
-  name: "Test Agent Submission",
-  description:
-    "This is a test agent submission specifically created for frontend testing purposes.",
-  image_urls: [
-    "https://picsum.photos/200/300",
-    "https://picsum.photos/200/301",
-    "https://picsum.photos/200/302",
-  ],
-  video_url: "https://www.youtube.com/watch?v=test123",
-  sub_heading: "A test agent for frontend testing",
-  categories: ["test", "demo", "frontend"],
-  changes_summary: "Initial test submission",
-} as const;
diff --git a/autogpt_platform/frontend/src/tests/global-setup.ts b/autogpt_platform/frontend/src/tests/global-setup.ts
deleted file mode 100644
index 901eb117ef..0000000000
--- a/autogpt_platform/frontend/src/tests/global-setup.ts
+++ /dev/null
@@ -1,52 +0,0 @@
-import { FullConfig } from "@playwright/test";
-import { createTestUsers, saveUserPool, loadUserPool } from "./utils/auth";
-
-async function globalSetup(config: FullConfig) {
-  console.log("🚀 Starting global test setup...");
-
-  try {
-    const existingUserPool = await loadUserPool();
-
-    if (existingUserPool && existingUserPool.users.length > 0) {
-      console.log(
-        `♻️ Found existing user pool with ${existingUserPool.users.length} users`,
-      );
-      console.log("✅ Using existing user pool");
-      return;
-    }
-
-    // Create test users using signup page
-    const numberOfUsers = (config.workers || 1) + 8; // workers + buffer
-    console.log(`👥 Creating ${numberOfUsers} test users via signup...`);
-    console.log("⏳ Note: This may take a few minutes in CI environments");
-
-    const users = await createTestUsers(numberOfUsers);
-
-    if (users.length === 0) {
-      throw new Error("Failed to create any test users");
-    }
-
-    // Require at least a minimum number of users for tests to work
-    const minUsers = Math.max(config.workers || 1, 2);
-    if (users.length < minUsers) {
-      throw new Error(
-        `Only created ${users.length} users but need at least ${minUsers} for tests to run properly`,
-      );
-    }
-
-    // Save user pool
-    await saveUserPool(users);
-
-    console.log("✅ Global setup completed successfully!");
-    console.log(`📊 Created ${users.length} test users via signup page`);
-  } catch (error) {
-    console.error("❌ Global setup failed:", error);
-    console.error("💡 This is likely due to:");
-    console.error("   1. Backend services not fully ready");
-    console.error("   2. Network timeouts in CI environment");
-    console.error("   3. Database or authentication issues");
-    throw error;
-  }
-}
-
-export default globalSetup;
diff --git a/autogpt_platform/frontend/src/tests/integrations/vitest.setup.tsx b/autogpt_platform/frontend/src/tests/integrations/vitest.setup.tsx
index bda6a2679d..c4931856bc 100644
--- a/autogpt_platform/frontend/src/tests/integrations/vitest.setup.tsx
+++ b/autogpt_platform/frontend/src/tests/integrations/vitest.setup.tsx
@@ -2,11 +2,15 @@ import { beforeAll, afterAll, afterEach } from "vitest";
 import { server } from "@/mocks/mock-server";
 import { mockNextjsModules } from "./setup-nextjs-mocks";
 import { mockSupabaseRequest } from "./mock-supabase-request";
+import { cleanup } from "@testing-library/react";
 
 beforeAll(() => {
   mockNextjsModules();
   mockSupabaseRequest(); // If you need user's data - please mock supabase actions in your specific test - it sends null user [It's only to avoid cookies() call]
   return server.listen({ onUnhandledRequest: "error" });
 });
-afterEach(() => server.resetHandlers());
+afterEach(() => {
+  cleanup();
+  server.resetHandlers();
+});
 afterAll(() => server.close());
diff --git a/autogpt_platform/frontend/src/tests/library.spec.ts b/autogpt_platform/frontend/src/tests/library.spec.ts
deleted file mode 100644
index 98ba698398..0000000000
--- a/autogpt_platform/frontend/src/tests/library.spec.ts
+++ /dev/null
@@ -1,250 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import path from "path";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LibraryPage } from "./pages/library.page";
-import { LoginPage } from "./pages/login.page";
-import { hasUrl } from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-test.describe("Library", () => {
-  let libraryPage: LibraryPage;
-
-  test.beforeEach(async ({ page }) => {
-    libraryPage = new LibraryPage(page);
-
-    await page.goto("/login");
-    const loginPage = new LoginPage(page);
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-  });
-
-  test("library page loads successfully", async ({ page }) => {
-    const { getId } = getSelectors(page);
-    await page.goto("/library");
-
-    await expect(getId("search-bar").first()).toBeVisible();
-    await expect(getId("import-button").first()).toBeVisible();
-    await expect(getId("sort-by-dropdown").first()).toBeVisible();
-  });
-
-  test("agents are visible and cards work correctly", async ({ page }) => {
-    await page.goto("/library");
-
-    const agents = await libraryPage.getAgents();
-    expect(agents.length).toBeGreaterThan(0);
-
-    const firstAgent = agents[0];
-    expect(firstAgent).toBeTruthy();
-
-    await libraryPage.clickAgent(firstAgent);
-    await hasUrl(page, `/library/agents/${firstAgent.id}`);
-
-    await libraryPage.navigateToLibrary();
-
-    const updatedAgents = await libraryPage.getAgents();
-    const agentWithBuilder = updatedAgents.find((agent) =>
-      agent.openInBuilderUrl.includes("/build"),
-    );
-
-    if (agentWithBuilder) {
-      const [newPage] = await Promise.all([
-        page.context().waitForEvent("page"),
-        libraryPage.clickOpenInBuilder(agentWithBuilder),
-      ]);
-      await newPage.waitForLoadState();
-      test.expect(newPage.url()).toContain(`/build`);
-      await newPage.close();
-    }
-  });
-
-  test("pagination works correctly", async ({ page }, testInfo) => {
-    test.setTimeout(testInfo.timeout * 3);
-    await page.goto("/library");
-
-    const PAGE_SIZE = 20;
-    const paginationResult = await libraryPage.testPagination();
-
-    if (paginationResult.initialCount >= PAGE_SIZE) {
-      expect(paginationResult.finalCount).toBeGreaterThanOrEqual(
-        paginationResult.initialCount,
-      );
-      expect(paginationResult.hasMore).toBeTruthy();
-    }
-
-    await libraryPage.isPaginationWorking();
-
-    const allAgents = await libraryPage.getAgentsWithPagination();
-    test.expect(allAgents.length).toBeGreaterThan(0);
-
-    const displayedCount = await libraryPage.getAgentCount();
-    test.expect(allAgents.length).toEqual(displayedCount);
-  });
-
-  test("searching works correctly", async ({ page }) => {
-    await page.goto("/library");
-
-    const allAgents = await libraryPage.getAgents();
-    expect(allAgents.length).toBeGreaterThan(0);
-
-    const initialAgentCount = await libraryPage.getAgentCount();
-    expect(initialAgentCount).toBeGreaterThan(0);
-
-    const firstAgent = allAgents[0];
-    await libraryPage.searchAgents(firstAgent.name);
-    await libraryPage.waitForAgentsToLoad();
-
-    const searchResults = await libraryPage.getAgents();
-    expect(searchResults.length).toBeGreaterThan(0);
-
-    const foundAgent = searchResults.find(
-      (agent) => agent.name === firstAgent.name,
-    );
-    expect(foundAgent).toBeTruthy();
-
-    const searchValue = await libraryPage.getSearchValue();
-    expect(searchValue).toBe(firstAgent.name);
-
-    const partialSearchTerm = firstAgent.name.substring(0, 3);
-    await libraryPage.searchAgents(partialSearchTerm);
-    await libraryPage.waitForAgentsToLoad();
-
-    const partialSearchResults = await libraryPage.getAgents();
-    expect(partialSearchResults.length).toBeGreaterThan(0);
-
-    const matchingAgents = partialSearchResults.filter((agent) =>
-      agent.name.toLowerCase().includes(partialSearchTerm.toLowerCase()),
-    );
-    expect(matchingAgents.length).toBeGreaterThan(0);
-
-    await libraryPage.searchAgents("nonexistentagentnamethatdoesnotexist");
-    const noResults = await libraryPage.getAgentCount();
-    expect(noResults).toBe(0);
-
-    const hasNoAgentsMessage = await libraryPage.hasNoAgentsMessage();
-    expect(hasNoAgentsMessage).toBeTruthy();
-
-    await libraryPage.clearSearch();
-    await libraryPage.waitForAgentsToLoad();
-
-    const clearedSearchCount = await libraryPage.getAgentCount();
-    test.expect(clearedSearchCount).toEqual(initialAgentCount);
-
-    const clearedSearchValue = await libraryPage.getSearchValue();
-    test.expect(clearedSearchValue).toBe("");
-  });
-
-  test("pagination while searching works correctly", async ({
-    page,
-  }, testInfo) => {
-    test.setTimeout(testInfo.timeout * 3);
-    await page.goto("/library");
-
-    const allAgents = await libraryPage.getAgents();
-    test.expect(allAgents.length).toBeGreaterThan(0);
-
-    const searchTerm = "Agent";
-
-    await libraryPage.searchAgents(searchTerm);
-    await libraryPage.waitForAgentsToLoad();
-
-    const initialSearchResults = await libraryPage.getAgents();
-    expect(initialSearchResults.length).toBeGreaterThan(0);
-
-    const matchingResults = initialSearchResults.filter((agent) =>
-      agent.name.toLowerCase().includes(searchTerm.toLowerCase()),
-    );
-    expect(matchingResults.length).toEqual(initialSearchResults.length);
-
-    const PAGE_SIZE = 20;
-    const searchPaginationResult = await libraryPage.testPagination();
-
-    if (searchPaginationResult.initialCount >= PAGE_SIZE) {
-      expect(searchPaginationResult.finalCount).toBeGreaterThanOrEqual(
-        searchPaginationResult.initialCount,
-      );
-
-      const allPaginatedResults = await libraryPage.getAgentsWithPagination();
-      const matchingPaginatedResults = allPaginatedResults.filter((agent) =>
-        agent.name.toLowerCase().includes(searchTerm.toLowerCase()),
-      );
-      expect(matchingPaginatedResults.length).toEqual(
-        allPaginatedResults.length,
-      );
-    }
-
-    await libraryPage.scrollAndWaitForNewAgents();
-
-    const finalSearchResults = await libraryPage.getAgents();
-    const finalMatchingResults = finalSearchResults.filter((agent) =>
-      agent.name.toLowerCase().includes(searchTerm.toLowerCase()),
-    );
-    expect(finalMatchingResults.length).toEqual(finalSearchResults.length);
-
-    const preservedSearchValue = await libraryPage.getSearchValue();
-    expect(preservedSearchValue).toBe(searchTerm);
-
-    await libraryPage.clearSearch();
-    await libraryPage.waitForAgentsToLoad();
-
-    const clearedResults = await libraryPage.getAgents();
-    expect(clearedResults.length).toBeGreaterThanOrEqual(
-      initialSearchResults.length,
-    );
-  });
-
-  test("uploading an agent works correctly", async ({ page }) => {
-    await page.goto("/library");
-
-    await libraryPage.openUploadDialog();
-
-    expect(await libraryPage.isUploadDialogVisible()).toBeTruthy();
-    expect(await libraryPage.isUploadButtonEnabled()).toBeFalsy();
-
-    const testAgentName = "Test Upload Agent";
-    const testAgentDescription = "This is a test agent uploaded via automation";
-    await libraryPage.fillUploadForm(testAgentName, testAgentDescription);
-
-    const fileInput = page.locator('input[type="file"]');
-    const testAgentPath = path.resolve(
-      __dirname,
-      "assets",
-      "testing_agent.json",
-    );
-    await fileInput.setInputFiles(testAgentPath);
-
-    // Wait for file to be processed and upload button to be enabled
-    const uploadButton = page.getByRole("button", { name: "Upload" });
-    await uploadButton.waitFor({ state: "visible", timeout: 10000 });
-    await expect(uploadButton).toBeEnabled({ timeout: 10000 });
-
-    expect(await libraryPage.isUploadButtonEnabled()).toBeTruthy();
-
-    await page.getByRole("button", { name: "Upload" }).click();
-
-    await page.waitForURL("**/build**", { timeout: 10000 });
-    expect(page.url()).toContain("/build");
-
-    await page.goto("/library");
-
-    await libraryPage.searchAgents(testAgentName);
-    await libraryPage.waitForAgentsToLoad();
-
-    const searchResults = await libraryPage.getAgents();
-    test.expect(searchResults.length).toBeGreaterThan(0);
-
-    const uploadedAgent = searchResults.find((agent) =>
-      agent.name.includes(testAgentName),
-    );
-    test.expect(uploadedAgent).toBeTruthy();
-
-    if (uploadedAgent) {
-      test.expect(uploadedAgent.name).toContain(testAgentName);
-      test.expect(uploadedAgent.seeRunsUrl).toBeTruthy();
-      test.expect(uploadedAgent.openInBuilderUrl).toBeTruthy();
-    }
-
-    await libraryPage.clearSearch();
-    await libraryPage.waitForAgentsToLoad();
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/marketplace-agent.spec.ts b/autogpt_platform/frontend/src/tests/marketplace-agent.spec.ts
deleted file mode 100644
index fb38b90d63..0000000000
--- a/autogpt_platform/frontend/src/tests/marketplace-agent.spec.ts
+++ /dev/null
@@ -1,120 +0,0 @@
-import { expect, test } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import { MarketplacePage } from "./pages/marketplace.page";
-import { hasUrl, isVisible, matchesUrl } from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-function escapeRegExp(value: string) {
-  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}
-
-test.describe("Marketplace Agent Page - Basic Functionality", () => {
-  test("User can access agent page when logged out", async ({ page }) => {
-    const marketplacePage = new MarketplacePage(page);
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const firstStoreCard = await marketplacePage.getFirstTopAgent();
-    await firstStoreCard.click();
-
-    await page.waitForURL("**/marketplace/agent/**");
-    await matchesUrl(page, /\/marketplace\/agent\/.+/);
-  });
-
-  test("User can access agent page when logged in", async ({ page }) => {
-    const loginPage = new LoginPage(page);
-    const marketplacePage = new MarketplacePage(page);
-
-    await loginPage.goto();
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const firstStoreCard = await marketplacePage.getFirstTopAgent();
-    await firstStoreCard.click();
-
-    await page.waitForURL("**/marketplace/agent/**");
-    await matchesUrl(page, /\/marketplace\/agent\/.+/);
-  });
-
-  test("Agent page details are visible", async ({ page }) => {
-    const { getId } = getSelectors(page);
-
-    const marketplacePage = new MarketplacePage(page);
-    await marketplacePage.goto(page);
-
-    const firstStoreCard = await marketplacePage.getFirstTopAgent();
-    await firstStoreCard.click();
-    await page.waitForURL("**/marketplace/agent/**");
-
-    const agentTitle = getId("agent-title");
-    await isVisible(agentTitle);
-
-    const agentDescription = getId("agent-description");
-    await isVisible(agentDescription);
-
-    const creatorInfo = getId("agent-creator");
-    await isVisible(creatorInfo);
-  });
-
-  test("Download button functionality works", async ({ page }) => {
-    const { getId, getText } = getSelectors(page);
-
-    const marketplacePage = new MarketplacePage(page);
-    await marketplacePage.goto(page);
-
-    const firstStoreCard = await marketplacePage.getFirstTopAgent();
-    await firstStoreCard.click();
-    await page.waitForURL("**/marketplace/agent/**");
-
-    const downloadButton = getId("agent-download-button");
-    await isVisible(downloadButton);
-    await downloadButton.click();
-
-    const downloadSuccessMessage = getText(
-      "Your agent has been successfully downloaded.",
-    );
-    await isVisible(downloadSuccessMessage);
-  });
-
-  test("Add to library button works and agent appears in library", async ({
-    page,
-  }) => {
-    const { getId, getText } = getSelectors(page);
-
-    const loginPage = new LoginPage(page);
-    const marketplacePage = new MarketplacePage(page);
-
-    await loginPage.goto();
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-    await marketplacePage.goto(page);
-
-    const firstStoreCard = await marketplacePage.getFirstTopAgent();
-    await firstStoreCard.click();
-    await page.waitForURL("**/marketplace/agent/**");
-
-    const agentTitle = await getId("agent-title").textContent();
-    if (!agentTitle || !agentTitle.trim()) {
-      throw new Error("Agent title not found on marketplace agent page");
-    }
-    const agentName = agentTitle.trim();
-
-    const addToLibraryButton = getId("agent-add-library-button");
-    await isVisible(addToLibraryButton);
-    await addToLibraryButton.click();
-
-    const addSuccessMessage = getText("Redirecting to your library...");
-    await isVisible(addSuccessMessage);
-
-    await page.waitForURL("**/library/agents/**");
-    await expect(page).toHaveTitle(
-      new RegExp(`${escapeRegExp(agentName)} - Library - AutoGPT Platform`),
-    );
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/marketplace-creator.spec.ts b/autogpt_platform/frontend/src/tests/marketplace-creator.spec.ts
deleted file mode 100644
index 6fbf4d39be..0000000000
--- a/autogpt_platform/frontend/src/tests/marketplace-creator.spec.ts
+++ /dev/null
@@ -1,82 +0,0 @@
-import { test } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import { MarketplacePage } from "./pages/marketplace.page";
-import { hasUrl, isVisible, matchesUrl } from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-test.describe("Marketplace Creator Page – Basic Functionality", () => {
-  test("User can access creator's page when logged out", async ({ page }) => {
-    const marketplacePage = new MarketplacePage(page);
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const firstCreatorProfile =
-      await marketplacePage.getFirstCreatorProfile(page);
-    await firstCreatorProfile.click();
-
-    await page.waitForURL("**/marketplace/creator/**");
-    await matchesUrl(page, /\/marketplace\/creator\/.+/);
-  });
-
-  test("User can access creator's page when logged in", async ({ page }) => {
-    const loginPage = new LoginPage(page);
-    const marketplacePage = new MarketplacePage(page);
-
-    await loginPage.goto();
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const firstCreatorProfile =
-      await marketplacePage.getFirstCreatorProfile(page);
-    await firstCreatorProfile.click();
-
-    await page.waitForURL("**/marketplace/creator/**");
-    await matchesUrl(page, /\/marketplace\/creator\/.+/);
-  });
-
-  test("Creator page details are visible", async ({ page }) => {
-    const { getId } = getSelectors(page);
-    const marketplacePage = new MarketplacePage(page);
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const firstCreatorProfile =
-      await marketplacePage.getFirstCreatorProfile(page);
-    await firstCreatorProfile.click();
-    await page.waitForURL("**/marketplace/creator/**");
-
-    const creatorTitle = getId("creator-title");
-    await isVisible(creatorTitle);
-
-    const creatorDescription = getId("creator-description");
-    await isVisible(creatorDescription);
-  });
-
-  test("Agents in agent by sections navigation works", async ({ page }) => {
-    const marketplacePage = new MarketplacePage(page);
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const firstCreatorProfile =
-      await marketplacePage.getFirstCreatorProfile(page);
-    await firstCreatorProfile.click();
-    await page.waitForURL("**/marketplace/creator/**");
-
-    const firstAgent = page
-      .locator('[data-testid="store-card"]:visible')
-      .first();
-    await firstAgent.waitFor({ state: "visible", timeout: 15000 });
-
-    await firstAgent.click();
-    await page.waitForURL("**/marketplace/agent/**");
-    await matchesUrl(page, /\/marketplace\/agent\/.+/);
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/marketplace.spec.ts b/autogpt_platform/frontend/src/tests/marketplace.spec.ts
deleted file mode 100644
index 83b0d81d92..0000000000
--- a/autogpt_platform/frontend/src/tests/marketplace.spec.ts
+++ /dev/null
@@ -1,168 +0,0 @@
-import { expect, test } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import { MarketplacePage } from "./pages/marketplace.page";
-import { hasMinCount, hasUrl, isVisible, matchesUrl } from "./utils/assertion";
-
-// Marketplace tests for store agent search functionality
-test.describe("Marketplace – Basic Functionality", () => {
-  test("User can access marketplace page when logged out", async ({ page }) => {
-    const marketplacePage = new MarketplacePage(page);
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const marketplaceTitle = await marketplacePage.getMarketplaceTitle(page);
-    await isVisible(marketplaceTitle);
-
-    console.log(
-      "User can access marketplace page when logged out test passed ✅",
-    );
-  });
-
-  test("User can access marketplace page when logged in", async ({ page }) => {
-    const loginPage = new LoginPage(page);
-    const marketplacePage = new MarketplacePage(page);
-
-    await loginPage.goto();
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-
-    await marketplacePage.goto(page);
-    await hasUrl(page, "/marketplace");
-
-    const marketplaceTitle = await marketplacePage.getMarketplaceTitle(page);
-    await isVisible(marketplaceTitle);
-
-    console.log(
-      "User can access marketplace page when logged in test passed ✅",
-    );
-  });
-
-  test("Featured agents, top agents, and featured creators are visible", async ({
-    page,
-  }) => {
-    const marketplacePage = new MarketplacePage(page);
-    await marketplacePage.goto(page);
-
-    const featuredAgentsSection =
-      await marketplacePage.getFeaturedAgentsSection(page);
-    await isVisible(featuredAgentsSection);
-    const featuredAgentCards =
-      await marketplacePage.getFeaturedAgentCards(page);
-    await hasMinCount(featuredAgentCards, 1);
-
-    const topAgentsSection = await marketplacePage.getTopAgentsSection(page);
-    await isVisible(topAgentsSection);
-    const topAgentCards = await marketplacePage.getTopAgentCards(page);
-    await hasMinCount(topAgentCards, 1);
-
-    const featuredCreatorsSection =
-      await marketplacePage.getFeaturedCreatorsSection(page);
-    await isVisible(featuredCreatorsSection);
-    const creatorProfiles = await marketplacePage.getCreatorProfiles(page);
-    await hasMinCount(creatorProfiles, 1);
-
-    console.log(
-      "Featured agents, top agents, and featured creators are visible test passed ✅",
-    );
-  });
-
-  test("Can navigate and interact with marketplace elements", async ({
-    page,
-  }) => {
-    const marketplacePage = new MarketplacePage(page);
-    await marketplacePage.goto(page);
-
-    const firstFeaturedAgent =
-      await marketplacePage.getFirstFeaturedAgent(page);
-    await firstFeaturedAgent.click();
-    await page.waitForURL("**/marketplace/agent/**");
-    await matchesUrl(page, /\/marketplace\/agent\/.+/);
-    await marketplacePage.goto(page);
-
-    const firstTopAgent = await marketplacePage.getFirstTopAgent();
-    await firstTopAgent.click();
-    await page.waitForURL("**/marketplace/agent/**");
-    await matchesUrl(page, /\/marketplace\/agent\/.+/);
-    await marketplacePage.goto(page);
-
-    const firstCreatorProfile =
-      await marketplacePage.getFirstCreatorProfile(page);
-    await firstCreatorProfile.click();
-    await page.waitForURL("**/marketplace/creator/**");
-    await matchesUrl(page, /\/marketplace\/creator\/.+/);
-
-    console.log(
-      "Can navigate and interact with marketplace elements test passed ✅",
-    );
-  });
-
-  test("Complete search flow works correctly", async ({ page }) => {
-    const marketplacePage = new MarketplacePage(page);
-    await marketplacePage.goto(page);
-
-    await marketplacePage.searchAndNavigate("DummyInput", page);
-
-    await marketplacePage.waitForSearchResults();
-
-    await matchesUrl(page, /\/marketplace\/search\?searchTerm=/);
-
-    const resultsHeading = page.getByText("Results for:");
-    await isVisible(resultsHeading);
-
-    const searchTerm = page.getByText("DummyInput").first();
-    await isVisible(searchTerm);
-
-    await expect
-      .poll(() => marketplacePage.getSearchResultsCount(page), {
-        timeout: 15000,
-      })
-      .toBeGreaterThan(0);
-
-    console.log("Complete search flow works correctly test passed ✅");
-  });
-
-  // We need to add a test search with filters, but the current business logic for filters doesn't work as expected. We'll add it once we modify that.
-});
-
-test.describe("Marketplace – Edge Cases", () => {
-  test("Search for non-existent item renders search page correctly", async ({
-    page,
-  }) => {
-    const marketplacePage = new MarketplacePage(page);
-    await marketplacePage.goto(page);
-
-    await marketplacePage.searchAndNavigate("xyznonexistentitemxyz123", page);
-
-    await marketplacePage.waitForSearchResults();
-
-    await matchesUrl(page, /\/marketplace\/search\?searchTerm=/);
-
-    const resultsHeading = page.getByText("Results for:");
-    await isVisible(resultsHeading);
-
-    const searchTerm = page.getByText("xyznonexistentitemxyz123");
-    await isVisible(searchTerm);
-
-    // The search page should render either results or a "No results found" message
-    await expect
-      .poll(
-        async () => {
-          const hasResults =
-            (await page.locator('[data-testid="store-card"]').count()) > 0;
-          const hasNoResultsMsg = await page
-            .getByText("No results found")
-            .isVisible();
-          return hasResults || hasNoResultsMsg;
-        },
-        { timeout: 15000 },
-      )
-      .toBe(true);
-
-    console.log(
-      "Search for non-existent item renders search page correctly test passed ✅",
-    );
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/onboarding.spec.ts b/autogpt_platform/frontend/src/tests/onboarding.spec.ts
deleted file mode 100644
index 321469c268..0000000000
--- a/autogpt_platform/frontend/src/tests/onboarding.spec.ts
+++ /dev/null
@@ -1,114 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { signupTestUser } from "./utils/signup";
-import { completeOnboardingWizard } from "./utils/onboarding";
-import { getSelectors } from "./utils/selectors";
-
-test("new user completes full onboarding wizard", async ({ page }) => {
-  // Signup WITHOUT skipping onboarding (ignoreOnboarding=false)
-  await signupTestUser(page, undefined, undefined, false);
-
-  // Should be on onboarding
-  await expect(page).toHaveURL(/\/onboarding/);
-
-  // Complete the wizard
-  await completeOnboardingWizard(page, {
-    name: "Alice",
-    role: "Marketing",
-    painPoints: ["Social media", "Email & outreach"],
-  });
-
-  // Should have been redirected to /copilot
-  await expect(page).toHaveURL(/\/copilot/);
-
-  // User should be authenticated
-  await page
-    .getByTestId("profile-popout-menu-trigger")
-    .waitFor({ state: "visible", timeout: 10000 });
-});
-
-test("onboarding wizard step navigation works", async ({ page }) => {
-  await signupTestUser(page, undefined, undefined, false);
-  await expect(page).toHaveURL(/\/onboarding/);
-
-  // Step 1: Welcome
-  await expect(page.getByText("Welcome to AutoGPT")).toBeVisible();
-  await page.getByLabel("What should I call you?").fill("Bob");
-  await page.getByRole("button", { name: "Continue" }).click();
-
-  // Step 2: Role — verify we're here, then go back
-  await expect(page.getByText("What best describes you")).toBeVisible();
-  await page.getByText("Back").click();
-
-  // Should be back on step 1 with name preserved
-  await expect(page.getByText("Welcome to AutoGPT")).toBeVisible();
-  await expect(page.getByLabel("What should I call you?")).toHaveValue("Bob");
-});
-
-test("onboarding wizard validates required fields", async ({ page }) => {
-  await signupTestUser(page, undefined, undefined, false);
-  await expect(page).toHaveURL(/\/onboarding/);
-
-  // Step 1: Continue should be disabled without a name
-  const continueButton = page.getByRole("button", { name: "Continue" });
-  await expect(continueButton).toBeDisabled();
-
-  // Fill name — continue should become enabled
-  await page.getByLabel("What should I call you?").fill("Charlie");
-  await expect(continueButton).toBeEnabled();
-  await continueButton.click();
-
-  // Step 2: Role — selecting auto-advances to step 3
-  await expect(page.getByText("What best describes you")).toBeVisible();
-  await page.getByText("Engineering").click();
-
-  // Step 3: Launch Autopilot should be disabled without any pain points
-  const launchButton = page.getByRole("button", { name: "Launch Autopilot" });
-  await expect(launchButton).toBeDisabled();
-
-  // Select a pain point — button should become enabled
-  await page.getByText("Research", { exact: true }).click();
-  await expect(launchButton).toBeEnabled();
-});
-
-test("completed onboarding redirects away from /onboarding", async ({
-  page,
-}) => {
-  // Create user and complete onboarding
-  await signupTestUser(page, undefined, undefined, false);
-  await completeOnboardingWizard(page);
-
-  // Try to navigate back to onboarding — should be redirected to /copilot
-  await page.goto("http://localhost:3000/onboarding");
-  await page.waitForURL(/\/copilot/, { timeout: 10000 });
-});
-
-test("onboarding URL params sync with steps", async ({ page }) => {
-  await signupTestUser(page, undefined, undefined, false);
-  await expect(page).toHaveURL(/\/onboarding/);
-
-  // Step 1: URL may or may not include step=1 on initial load (no param is equivalent to step 1)
-  await expect(page.getByText("Welcome to AutoGPT")).toBeVisible();
-
-  // Fill name and go to step 2
-  await page.getByLabel("What should I call you?").fill("Test");
-  await page.getByRole("button", { name: "Continue" }).click();
-
-  // URL should show step=2
-  await expect(page).toHaveURL(/step=2/);
-});
-
-test("role-based pain point ordering works", async ({ page }) => {
-  await signupTestUser(page, undefined, undefined, false);
-
-  // Complete step 1
-  await page.getByLabel("What should I call you?").fill("Test");
-  await page.getByRole("button", { name: "Continue" }).click();
-
-  // Select Sales/BD role (auto-advances to step 3)
-  await page.getByText("Sales / BD").click();
-
-  // On pain points step, "Finding leads" should be visible (top pick for Sales)
-  await expect(page.getByText("What's eating your time?")).toBeVisible();
-  const { getText } = getSelectors(page);
-  await expect(getText("Finding leads")).toBeVisible();
-});
diff --git a/autogpt_platform/frontend/src/tests/pages/build.page.ts b/autogpt_platform/frontend/src/tests/pages/build.page.ts
deleted file mode 100644
index ad44f94f94..0000000000
--- a/autogpt_platform/frontend/src/tests/pages/build.page.ts
+++ /dev/null
@@ -1,310 +0,0 @@
-import { expect, Locator, Page } from "@playwright/test";
-import { BasePage } from "./base.page";
-
-export class BuildPage extends BasePage {
-  constructor(page: Page) {
-    super(page);
-  }
-
-  // --- Navigation ---
-
-  async goto(): Promise<void> {
-    await this.page.goto("/build");
-    await this.page.waitForLoadState("domcontentloaded");
-  }
-
-  async isLoaded(): Promise<boolean> {
-    try {
-      await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
-      await this.page
-        .locator(".react-flow")
-        .waitFor({ state: "visible", timeout: 10_000 });
-      return true;
-    } catch {
-      return false;
-    }
-  }
-
-  async closeTutorial(): Promise<void> {
-    try {
-      await this.page
-        .getByRole("button", { name: "Skip Tutorial", exact: true })
-        .click({ timeout: 3000 });
-    } catch {
-      // Tutorial not shown or already dismissed
-    }
-  }
-
-  // --- Block Menu ---
-
-  async openBlocksPanel(): Promise<void> {
-    const popoverContent = this.page.locator(
-      '[data-id="blocks-control-popover-content"]',
-    );
-    if (!(await popoverContent.isVisible())) {
-      await this.page.getByTestId("blocks-control-blocks-button").click();
-      await popoverContent.waitFor({ state: "visible", timeout: 5000 });
-    }
-  }
-
-  async closeBlocksPanel(): Promise<void> {
-    const popoverContent = this.page.locator(
-      '[data-id="blocks-control-popover-content"]',
-    );
-    if (await popoverContent.isVisible()) {
-      await this.page.getByTestId("blocks-control-blocks-button").click();
-      await popoverContent.waitFor({ state: "hidden", timeout: 5000 });
-    }
-  }
-
-  async searchBlock(searchTerm: string): Promise<void> {
-    const searchInput = this.page.locator(
-      '[data-id="blocks-control-search-bar"] input[type="text"]',
-    );
-    await searchInput.clear();
-    await searchInput.fill(searchTerm);
-    await this.page.waitForTimeout(300);
-  }
-
-  private getBlockCardByName(name: string): Locator {
-    const escapedName = name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-    const exactName = new RegExp(`^\\s*${escapedName}\\s*$`, "i");
-    return this.page
-      .locator('[data-id^="block-card-"]')
-      .filter({ has: this.page.locator("span", { hasText: exactName }) })
-      .first();
-  }
-
-  async addBlockByClick(searchTerm: string): Promise<void> {
-    await this.openBlocksPanel();
-    await this.searchBlock(searchTerm);
-
-    // Wait for any search results to appear
-    const anyCard = this.page.locator('[data-id^="block-card-"]').first();
-    await anyCard.waitFor({ state: "visible", timeout: 10000 });
-
-    // Click the card matching the search term name
-    const blockCard = this.getBlockCardByName(searchTerm);
-    await blockCard.waitFor({ state: "visible", timeout: 5000 });
-    await blockCard.click();
-
-    // Close the panel so it doesn't overlay the canvas
-    await this.closeBlocksPanel();
-  }
-
-  async dragBlockToCanvas(searchTerm: string): Promise<void> {
-    await this.openBlocksPanel();
-    await this.searchBlock(searchTerm);
-
-    const anyCard = this.page.locator('[data-id^="block-card-"]').first();
-    await anyCard.waitFor({ state: "visible", timeout: 10000 });
-
-    const blockCard = this.getBlockCardByName(searchTerm);
-    await blockCard.waitFor({ state: "visible", timeout: 5000 });
-
-    const canvas = this.page.locator(".react-flow__pane").first();
-    await blockCard.dragTo(canvas);
-  }
-
-  // --- Nodes on Canvas ---
-
-  getNodeLocator(index?: number): Locator {
-    const locator = this.page.locator('[data-id^="custom-node-"]');
-    return index !== undefined ? locator.nth(index) : locator;
-  }
-
-  async getNodeCount(): Promise<number> {
-    return await this.getNodeLocator().count();
-  }
-
-  async waitForNodeOnCanvas(expectedCount?: number): Promise<void> {
-    if (expectedCount !== undefined) {
-      await expect(this.getNodeLocator()).toHaveCount(expectedCount, {
-        timeout: 10000,
-      });
-    } else {
-      await this.getNodeLocator()
-        .first()
-        .waitFor({ state: "visible", timeout: 10000 });
-    }
-  }
-
-  async selectNode(index: number = 0): Promise<void> {
-    const node = this.getNodeLocator(index);
-    await node.click();
-  }
-
-  async selectAllNodes(): Promise<void> {
-    await this.page.locator(".react-flow__pane").first().click();
-    const isMac = process.platform === "darwin";
-    await this.page.keyboard.press(isMac ? "Meta+a" : "Control+a");
-  }
-
-  async deleteSelectedNodes(): Promise<void> {
-    await this.page.keyboard.press("Backspace");
-  }
-
-  // --- Connections (Edges) ---
-
-  async connectNodes(
-    sourceNodeIndex: number,
-    targetNodeIndex: number,
-  ): Promise<void> {
-    // Get the node wrapper elements to scope handle search
-    const sourceNode = this.getNodeLocator(sourceNodeIndex);
-    const targetNode = this.getNodeLocator(targetNodeIndex);
-
-    // ReactFlow renders Handle components as .react-flow__handle elements
-    // Output handles have class .react-flow__handle-right (Position.Right)
-    // Input handles have class .react-flow__handle-left (Position.Left)
-    const sourceHandle = sourceNode
-      .locator(".react-flow__handle-right")
-      .first();
-    const targetHandle = targetNode.locator(".react-flow__handle-left").first();
-
-    // Get precise center coordinates using evaluate to avoid CSS transform issues
-    const getHandleCenter = async (locator: Locator) => {
-      const el = await locator.elementHandle();
-      if (!el) throw new Error("Handle element not found");
-      const rect = await el.evaluate((node) => {
-        const r = node.getBoundingClientRect();
-        return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
-      });
-      return rect;
-    };
-
-    const source = await getHandleCenter(sourceHandle);
-    const target = await getHandleCenter(targetHandle);
-
-    // ReactFlow requires a proper drag sequence with intermediate moves
-    await this.page.mouse.move(source.x, source.y);
-    await this.page.mouse.down();
-    // Move in steps to trigger ReactFlow's connection detection
-    const steps = 20;
-    for (let i = 1; i <= steps; i++) {
-      const ratio = i / steps;
-      await this.page.mouse.move(
-        source.x + (target.x - source.x) * ratio,
-        source.y + (target.y - source.y) * ratio,
-      );
-    }
-    await this.page.mouse.up();
-  }
-
-  async getEdgeCount(): Promise<number> {
-    return await this.page.locator(".react-flow__edge").count();
-  }
-
-  // --- Save ---
-
-  async saveAgent(
-    name: string = "Test Agent",
-    description: string = "",
-  ): Promise<void> {
-    await this.page.getByTestId("save-control-save-button").click();
-
-    const nameInput = this.page.getByTestId("save-control-name-input");
-    await nameInput.waitFor({ state: "visible", timeout: 5000 });
-    await nameInput.fill(name);
-
-    if (description) {
-      await this.page
-        .getByTestId("save-control-description-input")
-        .fill(description);
-    }
-
-    await this.page.getByTestId("save-control-save-agent-button").click();
-  }
-
-  async waitForSaveComplete(): Promise<void> {
-    await expect(this.page).toHaveURL(/flowID=/, { timeout: 15000 });
-  }
-
-  async waitForSaveButton(): Promise<void> {
-    await this.page.waitForSelector(
-      '[data-testid="save-control-save-button"]:not([disabled])',
-      { timeout: 10000 },
-    );
-  }
-
-  // --- Run ---
-
-  async isRunButtonEnabled(): Promise<boolean> {
-    const runButton = this.page.locator('[data-id="run-graph-button"]');
-    return await runButton.isEnabled();
-  }
-
-  async clickRunButton(): Promise<void> {
-    const runButton = this.page.locator('[data-id="run-graph-button"]');
-    await runButton.click();
-  }
-
-  // --- Undo / Redo ---
-
-  async isUndoEnabled(): Promise<boolean> {
-    const btn = this.page.locator('[data-id="undo-button"]');
-    return !(await btn.isDisabled());
-  }
-
-  async isRedoEnabled(): Promise<boolean> {
-    const btn = this.page.locator('[data-id="redo-button"]');
-    return !(await btn.isDisabled());
-  }
-
-  async clickUndo(): Promise<void> {
-    await this.page.locator('[data-id="undo-button"]').click();
-  }
-
-  async clickRedo(): Promise<void> {
-    await this.page.locator('[data-id="redo-button"]').click();
-  }
-
-  // --- Copy / Paste ---
-
-  async copyViaKeyboard(): Promise<void> {
-    const isMac = process.platform === "darwin";
-    await this.page.keyboard.press(isMac ? "Meta+c" : "Control+c");
-  }
-
-  async pasteViaKeyboard(): Promise<void> {
-    const isMac = process.platform === "darwin";
-    await this.page.keyboard.press(isMac ? "Meta+v" : "Control+v");
-  }
-
-  // --- Helpers ---
-
-  async fillBlockInputByPlaceholder(
-    placeholder: string,
-    value: string,
-    nodeIndex: number = 0,
-  ): Promise<void> {
-    const node = this.getNodeLocator(nodeIndex);
-    const input = node.getByPlaceholder(placeholder);
-    await input.fill(value);
-  }
-
-  async clickCanvas(): Promise<void> {
-    const pane = this.page.locator(".react-flow__pane").first();
-    const box = await pane.boundingBox();
-    if (box) {
-      // Click in the center of the canvas to avoid sidebar/toolbar overlaps
-      await pane.click({
-        position: { x: box.width / 2, y: box.height / 2 },
-      });
-    } else {
-      await pane.click();
-    }
-  }
-
-  getPlaywrightPage(): Page {
-    return this.page;
-  }
-
-  async createDummyAgent(): Promise<void> {
-    await this.closeTutorial();
-    await this.addBlockByClick("Add to Dictionary");
-    await this.waitForNodeOnCanvas(1);
-    await this.saveAgent("Test Agent", "Test Description");
-    await this.waitForSaveComplete();
-  }
-}
diff --git a/autogpt_platform/frontend/src/tests/pages/library.page.ts b/autogpt_platform/frontend/src/tests/pages/library.page.ts
deleted file mode 100644
index 716e6c3188..0000000000
--- a/autogpt_platform/frontend/src/tests/pages/library.page.ts
+++ /dev/null
@@ -1,559 +0,0 @@
-import { Locator, Page } from "@playwright/test";
-import { getSelectors } from "../utils/selectors";
-import { BasePage } from "./base.page";
-
-export interface Agent {
-  id: string;
-  name: string;
-  description: string;
-  imageUrl?: string;
-  seeRunsUrl: string;
-  openInBuilderUrl: string;
-}
-
-export class LibraryPage extends BasePage {
-  constructor(page: Page) {
-    super(page);
-  }
-
-  async isLoaded(): Promise<boolean> {
-    console.log(`checking if library page is loaded`);
-    try {
-      await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
-
-      await this.page.waitForSelector('[data-testid="library-textbox"]', {
-        state: "visible",
-        timeout: 10_000,
-      });
-
-      console.log("Library page is loaded successfully");
-      return true;
-    } catch (error) {
-      console.log("Library page failed to load:", error);
-      return false;
-    }
-  }
-
-  async navigateToLibrary(): Promise<void> {
-    await this.page.goto("/library");
-    await this.isLoaded();
-  }
-
-  async getAgentCount(): Promise<number> {
-    const { getId } = getSelectors(this.page);
-    const countText = await getId("agents-count").textContent();
-    const match = countText?.match(/^(\d+)/);
-    return match ? parseInt(match[1], 10) : 0;
-  }
-
-  async getAgentCountByListLength(): Promise<number> {
-    const { getId } = getSelectors(this.page);
-    const agentCards = await getId("library-agent-card").all();
-    return agentCards.length;
-  }
-
-  async searchAgents(searchTerm: string): Promise<void> {
-    console.log(`searching for agents with term: ${searchTerm}`);
-    const { getRole } = getSelectors(this.page);
-    const searchInput = getRole("textbox", "Search agents");
-    await searchInput.fill(searchTerm);
-
-    await this.page.waitForTimeout(500);
-  }
-
-  async clearSearch(): Promise<void> {
-    console.log(`clearing search`);
-    try {
-      // Look for the clear button (X icon)
-      const clearButton = this.page.locator(".lucide.lucide-x");
-      if (await clearButton.isVisible()) {
-        await clearButton.click();
-      } else {
-        // If no clear button, clear the search input directly
-        const searchInput = this.page.getByRole("textbox", {
-          name: "Search agents",
-        });
-        await searchInput.fill("");
-      }
-
-      // Wait for results to update
-      await this.page.waitForTimeout(500);
-    } catch (error) {
-      console.error("Error clearing search:", error);
-    }
-  }
-
-  async selectSortOption(
-    page: Page,
-    sortOption: "Creation Date" | "Last Modified",
-  ): Promise<void> {
-    const { getRole } = getSelectors(page);
-    await getRole("combobox").click();
-
-    await getRole("option", sortOption).click();
-
-    await this.page.waitForTimeout(500);
-  }
-
-  async getCurrentSortOption(): Promise<string> {
-    console.log(`getting current sort option`);
-    try {
-      const sortCombobox = this.page.getByRole("combobox");
-      const currentOption = await sortCombobox.textContent();
-      return currentOption?.trim() || "";
-    } catch (error) {
-      console.error("Error getting current sort option:", error);
-      return "";
-    }
-  }
-
-  async openUploadDialog(): Promise<void> {
-    console.log(`opening upload dialog`);
-    // Open the unified Import dialog first
-    await this.page.getByRole("button", { name: "Import" }).click();
-
-    // Wait for dialog to appear
-    await this.page.getByRole("dialog", { name: "Import" }).waitFor({
-      state: "visible",
-      timeout: 5_000,
-    });
-
-    // Click the "AutoGPT agent" tab
-    await this.page.getByRole("tab", { name: "AutoGPT agent" }).click();
-  }
-
-  async closeUploadDialog(): Promise<void> {
-    await this.page.getByRole("button", { name: "Close" }).click();
-
-    await this.page.getByRole("dialog", { name: "Import" }).waitFor({
-      state: "hidden",
-      timeout: 5_000,
-    });
-  }
-
-  async isUploadDialogVisible(): Promise<boolean> {
-    console.log(`checking if upload dialog is visible`);
-    try {
-      const dialog = this.page.getByRole("dialog", { name: "Import" });
-      return await dialog.isVisible();
-    } catch {
-      return false;
-    }
-  }
-
-  async fillUploadForm(agentName: string, description: string): Promise<void> {
-    console.log(
-      `filling upload form with name: ${agentName}, description: ${description}`,
-    );
-
-    // Fill agent name
-    await this.page
-      .getByRole("textbox", { name: "Agent name" })
-      .fill(agentName);
-
-    // Fill description
-    await this.page
-      .getByRole("textbox", { name: "Agent description" })
-      .fill(description);
-  }
-
-  async isUploadButtonEnabled(): Promise<boolean> {
-    console.log(`checking if upload button is enabled`);
-    try {
-      const uploadButton = this.page.getByRole("button", {
-        name: "Upload",
-      });
-      return await uploadButton.isEnabled();
-    } catch {
-      return false;
-    }
-  }
-
-  async getAgents(): Promise<Agent[]> {
-    const { getId } = getSelectors(this.page);
-    const agents: Agent[] = [];
-
-    await getId("library-agent-card")
-      .first()
-      .waitFor({ state: "visible", timeout: 10_000 });
-    const agentCards = await getId("library-agent-card").all();
-
-    for (const card of agentCards) {
-      const name = await getId("library-agent-card-name", card).textContent();
-      const seeRunsLink = getId("library-agent-card-see-runs-link", card);
-      const openInBuilderLink = getId(
-        "library-agent-card-open-in-builder-link",
-        card,
-      );
-
-      const seeRunsUrl = await seeRunsLink.getAttribute("href");
-
-      // Check if the "Open in builder" link exists before getting its href
-      const openInBuilderLinkCount = await openInBuilderLink.count();
-      const openInBuilderUrl =
-        openInBuilderLinkCount > 0
-          ? await openInBuilderLink.getAttribute("href")
-          : null;
-
-      if (name && seeRunsUrl) {
-        const idMatch = seeRunsUrl.match(/\/library\/agents\/([^\/]+)/);
-        const id = idMatch ? idMatch[1] : "";
-
-        agents.push({
-          id,
-          name: name.trim(),
-          description: "", // Description is not currently rendered in the card
-          seeRunsUrl,
-          openInBuilderUrl: openInBuilderUrl || "",
-        });
-      }
-    }
-
-    console.log(`found ${agents.length} agents`);
-    return agents;
-  }
-
-  async clickAgent(agent: Agent): Promise<void> {
-    const { getId } = getSelectors(this.page);
-    const nameElement = getId("library-agent-card-name").filter({
-      hasText: agent.name,
-    });
-    await nameElement.first().click();
-  }
-
-  async clickSeeRuns(agent: Agent): Promise<void> {
-    console.log(`clicking see runs for agent: ${agent.name}`);
-
-    const { getId } = getSelectors(this.page);
-    const agentCard = getId("library-agent-card").filter({
-      hasText: agent.name,
-    });
-    const seeRunsLink = getId("library-agent-card-see-runs-link", agentCard);
-    await seeRunsLink.first().click();
-  }
-
-  async clickOpenInBuilder(agent: Agent): Promise<void> {
-    console.log(`clicking open in builder for agent: ${agent.name}`);
-
-    const { getId } = getSelectors(this.page);
-    const agentCard = getId("library-agent-card").filter({
-      hasText: agent.name,
-    });
-    const builderLink = getId(
-      "library-agent-card-open-in-builder-link",
-      agentCard,
-    );
-    await builderLink.first().click();
-  }
-
-  async waitForAgentsToLoad(): Promise<void> {
-    const { getId } = getSelectors(this.page);
-    await Promise.race([
-      getId("library-agent-card")
-        .first()
-        .waitFor({ state: "visible", timeout: 10_000 }),
-      getId("agents-count").waitFor({ state: "visible", timeout: 10_000 }),
-    ]);
-  }
-
-  async getSearchValue(): Promise<string> {
-    console.log(`getting search input value`);
-    try {
-      const searchInput = this.page.getByRole("textbox", {
-        name: "Search agents",
-      });
-      return await searchInput.inputValue();
-    } catch {
-      return "";
-    }
-  }
-
-  async hasNoAgentsMessage(): Promise<boolean> {
-    const { getText } = getSelectors(this.page);
-    const noAgentsText = getText("0 agents");
-    return noAgentsText !== null;
-  }
-
-  async scrollToBottom(): Promise<void> {
-    console.log(`scrolling to bottom to trigger pagination`);
-    await this.page.keyboard.press("End");
-    await this.page.waitForTimeout(1000);
-  }
-
-  async scrollDown(): Promise<void> {
-    console.log(`scrolling down to trigger pagination`);
-    await this.page.keyboard.press("PageDown");
-    await this.page.waitForTimeout(1000);
-  }
-
-  async scrollToLoadMore(): Promise<void> {
-    console.log(`scrolling to load more agents`);
-
-    const initialCount = await this.getAgentCountByListLength();
-    console.log(`Initial agent count (DOM cards): ${initialCount}`);
-
-    await this.scrollToBottom();
-
-    await this.page
-      .waitForLoadState("networkidle", { timeout: 10000 })
-      .catch(() => console.log("Network idle timeout, continuing..."));
-
-    await this.page
-      .waitForFunction(
-        (prevCount) =>
-          document.querySelectorAll('[data-testid="library-agent-card"]')
-            .length > prevCount,
-        initialCount,
-        { timeout: 5000 },
-      )
-      .catch(() => {});
-
-    const newCount = await this.getAgentCountByListLength();
-    console.log(`New agent count after scroll (DOM cards): ${newCount}`);
-  }
-
-  async testPagination(): Promise<{
-    initialCount: number;
-    finalCount: number;
-    hasMore: boolean;
-  }> {
-    const initialCount = await this.getAgentCountByListLength();
-    await this.scrollToLoadMore();
-    const finalCount = await this.getAgentCountByListLength();
-
-    const hasMore = finalCount > initialCount;
-    return {
-      initialCount,
-      finalCount,
-      hasMore,
-    };
-  }
-
-  async getAgentsWithPagination(): Promise<Agent[]> {
-    console.log(`getting all agents with pagination`);
-
-    let allAgents: Agent[] = [];
-    let previousCount = 0;
-    let currentCount = 0;
-    const maxAttempts = 5; // Prevent infinite loop
-    let attempts = 0;
-
-    do {
-      previousCount = currentCount;
-
-      // Get current agents
-      const currentAgents = await this.getAgents();
-      allAgents = currentAgents;
-      currentCount = currentAgents.length;
-
-      console.log(`Attempt ${attempts + 1}: Found ${currentCount} agents`);
-
-      // Try to load more by scrolling
-      await this.scrollToLoadMore();
-
-      attempts++;
-    } while (currentCount > previousCount && attempts < maxAttempts);
-
-    console.log(`Total agents found with pagination: ${allAgents.length}`);
-    return allAgents;
-  }
-
-  async waitForPaginationLoad(): Promise<void> {
-    console.log(`waiting for pagination to load`);
-
-    // Wait for any loading states to complete
-    await this.page.waitForTimeout(1000);
-
-    // Wait for agent count to stabilize
-    let previousCount = 0;
-    let currentCount = 0;
-    let stableChecks = 0;
-    const maxChecks = 5; // Reduced from 10 to prevent excessive waiting
-
-    while (stableChecks < 2 && stableChecks < maxChecks) {
-      currentCount = await this.getAgentCount();
-
-      if (currentCount === previousCount) {
-        stableChecks++;
-      } else {
-        stableChecks = 0;
-      }
-
-      previousCount = currentCount;
-      if (stableChecks < 2) {
-        // Only wait if we haven't stabilized yet
-        await this.page.waitForTimeout(500);
-      }
-    }
-
-    console.log(`Pagination load stabilized with ${currentCount} agents`);
-  }
-
-  async scrollAndWaitForNewAgents(): Promise<number> {
-    const initialCount = await this.getAgentCountByListLength();
-
-    await this.scrollDown();
-
-    await this.waitForPaginationLoad();
-
-    const finalCount = await this.getAgentCountByListLength();
-    const newAgentsLoaded = finalCount - initialCount;
-
-    console.log(
-      `Loaded ${newAgentsLoaded} new agents (${initialCount} -> ${finalCount})`,
-    );
-
-    return newAgentsLoaded;
-  }
-
-  async isPaginationWorking(): Promise<boolean> {
-    const newAgentsLoaded = await this.scrollAndWaitForNewAgents();
-    return newAgentsLoaded > 0;
-  }
-}
-
-// Locator functions
-export function getLibraryTab(page: Page): Locator {
-  return page.locator('a[href="/library"]');
-}
-
-export function getAgentCards(page: Page): Locator {
-  return page.getByTestId("library-agent-card");
-}
-
-export function getNewRunButton(page: Page): Locator {
-  return page.getByRole("button", { name: "New run" });
-}
-
-export function getAgentTitle(page: Page): Locator {
-  return page.locator("h1").first();
-}
-
-// Action functions
-export async function navigateToLibrary(page: Page): Promise<void> {
-  await getLibraryTab(page).click();
-  await page.waitForURL(/.*\/library/);
-}
-
-export async function clickFirstAgent(page: Page): Promise<void> {
-  const firstAgent = getAgentCards(page).first();
-  await firstAgent.click();
-}
-
-export async function navigateToAgentByName(
-  page: Page,
-  agentName: string,
-): Promise<void> {
-  const agentCard = getAgentCards(page).filter({ hasText: agentName }).first();
-  // Wait for the agent card to be visible before clicking
-  // This handles async loading of agents after page navigation
-  await agentCard.waitFor({ state: "visible", timeout: 15000 });
-  // Click the link inside the card to navigate reliably through
-  // the motion.div + draggable wrapper layers.
-  const link = agentCard.locator('a[href*="/library/agents/"]').first();
-  await link.click();
-}
-
-export async function clickRunButton(page: Page): Promise<void> {
-  const { getId } = getSelectors(page);
-
-  // Wait for sidebar loading to complete before detecting buttons.
-  // During sidebar loading, the "New task" button appears transiently
-  // even for agents with no items, then switches to "Setup your task"
-  // once loading finishes. Waiting for network idle ensures the page
-  // has settled into its final state.
-  await page.waitForLoadState("networkidle");
-
-  const setupTaskButton = page.getByRole("button", {
-    name: /Setup your task/i,
-  });
-  const newTaskButton = page.getByRole("button", { name: /New task/i });
-  const runButton = getId("agent-run-button");
-  const runAgainButton = getId("run-again-button");
-
-  // Wait for any of the buttons to appear
-  try {
-    await Promise.race([
-      setupTaskButton.waitFor({ state: "visible", timeout: 15000 }),
-      newTaskButton.waitFor({ state: "visible", timeout: 15000 }),
-      runButton.waitFor({ state: "visible", timeout: 15000 }),
-      runAgainButton.waitFor({ state: "visible", timeout: 15000 }),
-    ]);
-  } catch {
-    throw new Error(
-      "Could not find run/start task button - none of the expected buttons appeared",
-    );
-  }
-
-  // Check which button is visible and click it
-  if (await setupTaskButton.isVisible()) {
-    await setupTaskButton.click();
-    const startBtn = page.getByRole("button", { name: /Start Task/i }).first();
-    await startBtn.waitFor({ state: "visible", timeout: 15000 });
-    await startBtn.click();
-    return;
-  }
-
-  if (await newTaskButton.isVisible()) {
-    await newTaskButton.click();
-    const startBtn = page.getByRole("button", { name: /Start Task/i }).first();
-    await startBtn.waitFor({ state: "visible", timeout: 15000 });
-    await startBtn.click();
-    return;
-  }
-
-  if (await runButton.isVisible()) {
-    await runButton.click();
-    return;
-  }
-
-  if (await runAgainButton.isVisible()) {
-    await runAgainButton.click();
-    return;
-  }
-
-  throw new Error("Could not find run/start task button");
-}
-
-export async function clickNewRunButton(page: Page): Promise<void> {
-  await getNewRunButton(page).click();
-}
-
-export async function runAgent(page: Page): Promise<void> {
-  await clickRunButton(page);
-}
-
-export async function waitForAgentPageLoad(page: Page): Promise<void> {
-  await page.waitForURL(/.*\/library\/agents\/[^/]+/);
-  // Wait for sidebar data to finish loading so the page settles
-  // into its final state (empty view vs sidebar view)
-  await page.waitForLoadState("networkidle");
-}
-
-export async function getAgentName(page: Page): Promise<string> {
-  return (await getAgentTitle(page).textContent()) || "";
-}
-
-export async function isLoaded(page: Page): Promise<boolean> {
-  return await page.locator("h1").isVisible();
-}
-
-export async function waitForRunToComplete(
-  page: Page,
-  timeout = 30000,
-): Promise<void> {
-  await page.waitForSelector(".bg-green-500, .bg-red-500, .bg-purple-500", {
-    timeout,
-  });
-}
-
-export async function getRunStatus(page: Page): Promise<string> {
-  if (await page.locator(".animate-spin").isVisible()) {
-    return "running";
-  } else if (await page.locator(".bg-green-500").isVisible()) {
-    return "completed";
-  } else if (await page.locator(".bg-red-500").isVisible()) {
-    return "failed";
-  }
-  return "unknown";
-}
diff --git a/autogpt_platform/frontend/src/tests/pages/login.page.ts b/autogpt_platform/frontend/src/tests/pages/login.page.ts
deleted file mode 100644
index 8472de06ed..0000000000
--- a/autogpt_platform/frontend/src/tests/pages/login.page.ts
+++ /dev/null
@@ -1,102 +0,0 @@
-import { Page } from "@playwright/test";
-import { skipOnboardingIfPresent } from "../utils/onboarding";
-
-export class LoginPage {
-  constructor(private page: Page) {}
-
-  async goto() {
-    await this.page.goto("/login");
-  }
-
-  async login(email: string, password: string) {
-    console.log(`ℹ️ Attempting login on ${this.page.url()} with`, {
-      email,
-      password,
-    });
-
-    // Wait for the form to be ready
-    await this.page.waitForSelector("form", { state: "visible" });
-
-    // Fill email using input selector instead of label
-    const emailInput = this.page.locator('input[type="email"]');
-    await emailInput.waitFor({ state: "visible" });
-    await emailInput.fill(email);
-
-    // Fill password using input selector instead of label
-    const passwordInput = this.page.locator('input[type="password"]');
-    await passwordInput.waitFor({ state: "visible" });
-    await passwordInput.fill(password);
-
-    // Wait for the button to be ready
-    const loginButton = this.page.getByRole("button", {
-      name: "Login",
-      exact: true,
-    });
-    await loginButton.waitFor({ state: "visible" });
-
-    // Attach navigation logger for debug purposes
-    this.page.on("load", (page) => console.log(`ℹ️ Now at URL: ${page.url()}`));
-
-    // Start waiting for navigation before clicking
-    // Wait for redirect to marketplace, onboarding, library, or copilot (new landing pages)
-    const leaveLoginPage = this.page
-      .waitForURL(
-        (url: URL) =>
-          /^\/(marketplace|onboarding(\/.*)?|library|copilot)?$/.test(
-            url.pathname,
-          ),
-        { timeout: 10_000 },
-      )
-      .catch((reason) => {
-        console.error(
-          `🚨 Navigation away from /login timed out (current URL: ${this.page.url()}):`,
-          reason,
-        );
-        throw reason;
-      });
-
-    console.log(`🖱️ Clicking login button...`);
-    await loginButton.click();
-
-    console.log("⏳ Waiting for navigation away from /login ...");
-    await leaveLoginPage;
-    console.log(`⌛ Post-login redirected to ${this.page.url()}`);
-
-    await new Promise((resolve) => setTimeout(resolve, 200)); // allow time for client-side redirect
-    await this.page.waitForLoadState("load", { timeout: 10_000 });
-
-    // If redirected to onboarding, complete it via API so tests can proceed
-    await skipOnboardingIfPresent(this.page, "/marketplace");
-
-    console.log("➡️ Navigating to /marketplace ...");
-    await this.page.goto("/marketplace", { timeout: 20_000 });
-    console.log("✅ Login process complete");
-
-    // If Wallet popover auto-opens, close it to avoid blocking account menu interactions
-    try {
-      const walletPanel = this.page.getByText("Your credits").first();
-      // Wait briefly for wallet to appear after navigation (it may open asynchronously)
-      const appeared = await walletPanel
-        .waitFor({ state: "visible", timeout: 2500 })
-        .then(() => true)
-        .catch(() => false);
-      if (appeared) {
-        const closeWalletButton = this.page.getByRole("button", {
-          name: /Close wallet/i,
-        });
-        await closeWalletButton.click({ timeout: 3000 }).catch(async () => {
-          // Fallbacks: try Escape, then click outside
-          await this.page.keyboard.press("Escape").catch(() => {});
-        });
-        await walletPanel
-          .waitFor({ state: "hidden", timeout: 3000 })
-          .catch(async () => {
-            await this.page.mouse.click(5, 5).catch(() => {});
-          });
-      }
-    } catch (_e) {
-      // Non-fatal in tests; continue
-      console.log("(info) Wallet popover not present or already closed");
-    }
-  }
-}
diff --git a/autogpt_platform/frontend/src/tests/pages/marketplace.page.ts b/autogpt_platform/frontend/src/tests/pages/marketplace.page.ts
deleted file mode 100644
index 51c2935abf..0000000000
--- a/autogpt_platform/frontend/src/tests/pages/marketplace.page.ts
+++ /dev/null
@@ -1,143 +0,0 @@
-import { Page } from "@playwright/test";
-import { BasePage } from "./base.page";
-import { getSelectors } from "../utils/selectors";
-
-export class MarketplacePage extends BasePage {
-  constructor(page: Page) {
-    super(page);
-  }
-
-  async goto(page: Page) {
-    await page.goto("/marketplace");
-    await page
-      .locator(
-        '[data-testid="store-card"], [data-testid="featured-store-card"]',
-      )
-      .first()
-      .waitFor({ state: "visible", timeout: 20000 });
-  }
-
-  async getMarketplaceTitle(page: Page) {
-    const { getText } = getSelectors(page);
-    return getText("Explore AI agents", { exact: false });
-  }
-
-  async getCreatorsSection(page: Page) {
-    const { getId, getText } = getSelectors(page);
-    return getId("creators-section") || getText("Creators", { exact: false });
-  }
-
-  async getAgentsSection(page: Page) {
-    const { getId, getText } = getSelectors(page);
-    return getId("agents-section") || getText("Agents", { exact: false });
-  }
-
-  async getCreatorsLink(page: Page) {
-    const { getLink } = getSelectors(page);
-    return getLink(/creators/i);
-  }
-
-  async getAgentsLink(page: Page) {
-    const { getLink } = getSelectors(page);
-    return getLink(/agents/i);
-  }
-
-  async getSearchInput(page: Page) {
-    const { getField, getId } = getSelectors(page);
-    return getId("store-search-input") || getField(/search/i);
-  }
-
-  async getFilterDropdown(page: Page) {
-    const { getId, getButton } = getSelectors(page);
-    return getId("filter-dropdown") || getButton(/filter/i);
-  }
-
-  async searchFor(query: string, page: Page) {
-    const searchInput = await this.getSearchInput(page);
-    await searchInput.fill(query);
-    await searchInput.press("Enter");
-  }
-
-  async clickCreators(page: Page) {
-    const creatorsLink = await this.getCreatorsLink(page);
-    await creatorsLink.click();
-  }
-
-  async clickAgents(page: Page) {
-    const agentsLink = await this.getAgentsLink(page);
-    await agentsLink.click();
-  }
-
-  async openFilter(page: Page) {
-    const filterDropdown = await this.getFilterDropdown(page);
-    await filterDropdown.click();
-  }
-
-  async getFeaturedAgentsSection(page: Page) {
-    const { getText } = getSelectors(page);
-    return getText("Featured agents");
-  }
-
-  async getTopAgentsSection(page: Page) {
-    const { getText } = getSelectors(page);
-    return getText("All Agents");
-  }
-
-  async getFeaturedCreatorsSection(page: Page) {
-    const { getText } = getSelectors(page);
-    return getText("Featured Creators");
-  }
-
-  async getFeaturedAgentCards(page: Page) {
-    const { getId } = getSelectors(page);
-    return getId("featured-store-card");
-  }
-
-  async getTopAgentCards(page: Page) {
-    const { getId } = getSelectors(page);
-    return getId("store-card");
-  }
-
-  async getCreatorProfiles(page: Page) {
-    const { getId } = getSelectors(page);
-    return getId("creator-card");
-  }
-
-  async searchAndNavigate(query: string, page: Page) {
-    const searchInput = (await this.getSearchInput(page)).first();
-    await searchInput.fill(query);
-    await searchInput.press("Enter");
-  }
-
-  async waitForSearchResults() {
-    await this.page.waitForURL("**/marketplace/search**");
-  }
-
-  async getFirstFeaturedAgent(page: Page) {
-    const { getId } = getSelectors(page);
-    const card = getId("featured-store-card").first();
-    await card.waitFor({ state: "visible", timeout: 15000 });
-    return card;
-  }
-
-  async getFirstTopAgent() {
-    const card = this.page
-      .locator('[data-testid="store-card"]:visible')
-      .first();
-    await card.waitFor({ state: "visible", timeout: 15000 });
-    return card;
-  }
-
-  async getFirstCreatorProfile(page: Page) {
-    const { getId } = getSelectors(page);
-    const card = getId("creator-card").first();
-    await card.waitFor({ state: "visible", timeout: 15000 });
-    return card;
-  }
-
-  async getSearchResultsCount(page: Page) {
-    const { getId } = getSelectors(page);
-    const storeCards = getId("store-card");
-    return await storeCards.count();
-  }
-}
diff --git a/autogpt_platform/frontend/src/tests/profile-form.spec.ts b/autogpt_platform/frontend/src/tests/profile-form.spec.ts
deleted file mode 100644
index 3ca593809c..0000000000
--- a/autogpt_platform/frontend/src/tests/profile-form.spec.ts
+++ /dev/null
@@ -1,109 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import { ProfileFormPage } from "./pages/profile-form.page";
-import { hasUrl } from "./utils/assertion";
-
-test.describe("Profile Form", () => {
-  let profileFormPage: ProfileFormPage;
-
-  test.beforeEach(async ({ page }) => {
-    profileFormPage = new ProfileFormPage(page);
-
-    const loginPage = new LoginPage(page);
-    await loginPage.goto();
-    const richUser = getTestUserWithLibraryAgents();
-    await loginPage.login(richUser.email, richUser.password);
-    await hasUrl(page, "/marketplace");
-  });
-
-  test("redirects to login when user is not authenticated", async ({
-    browser,
-  }) => {
-    const context = await browser.newContext();
-    const page = await context.newPage();
-
-    try {
-      await page.goto("/profile");
-      await hasUrl(page, "/login?next=%2Fprofile");
-    } finally {
-      await page.close();
-      await context.close();
-    }
-  });
-
-  test("can save profile changes successfully", async ({ page }) => {
-    await profileFormPage.navbar.clickProfileLink();
-
-    await expect(profileFormPage.isLoaded()).resolves.toBeTruthy();
-    await hasUrl(page, new RegExp("/profile"));
-
-    const suffix = Date.now().toString().slice(-6);
-    const newDisplayName = `E2E Name ${suffix}`;
-    const newHandle = `e2euser${suffix}`;
-    const newBio = `E2E bio ${suffix}`;
-    const newLinks = [
-      `https://example.com/${suffix}/1`,
-      `https://example.com/${suffix}/2`,
-      `https://example.com/${suffix}/3`,
-      `https://example.com/${suffix}/4`,
-      `https://example.com/${suffix}/5`,
-    ];
-
-    await profileFormPage.setDisplayName(newDisplayName);
-    await profileFormPage.setHandle(newHandle);
-    await profileFormPage.setBio(newBio);
-    await profileFormPage.setLinks(newLinks);
-    await profileFormPage.saveChanges();
-
-    expect(await profileFormPage.getDisplayName()).toBe(newDisplayName);
-    expect(await profileFormPage.getHandle()).toBe(newHandle);
-    expect(await profileFormPage.getBio()).toBe(newBio);
-    for (let i = 1; i <= 5; i++) {
-      expect(await profileFormPage.getLink(i)).toBe(newLinks[i - 1]);
-    }
-
-    await page.reload();
-    await expect(profileFormPage.isLoaded()).resolves.toBeTruthy();
-
-    expect(await profileFormPage.getDisplayName()).toBe(newDisplayName);
-    expect(await profileFormPage.getHandle()).toBe(newHandle);
-    expect(await profileFormPage.getBio()).toBe(newBio);
-    for (let i = 1; i <= 5; i++) {
-      expect(await profileFormPage.getLink(i)).toBe(newLinks[i - 1]);
-    }
-  });
-
-  // Currently we are not using hook form inside the profile form, so cancel button is not working as expected, once that's fixed, we can unskip this test
-  test.skip("can cancel profile changes", async ({ page }) => {
-    await profileFormPage.navbar.clickProfileLink();
-
-    await expect(profileFormPage.isLoaded()).resolves.toBeTruthy();
-    await hasUrl(page, new RegExp("/profile"));
-
-    const originalDisplayName = await profileFormPage.getDisplayName();
-    const originalHandle = await profileFormPage.getHandle();
-    const originalBio = await profileFormPage.getBio();
-    const originalLinks: string[] = [];
-    for (let i = 1; i <= 5; i++) {
-      originalLinks.push(await profileFormPage.getLink(i));
-    }
-
-    const suffix = `${Date.now().toString().slice(-6)}_cancel`;
-    await profileFormPage.setDisplayName(`Tmp Name ${suffix}`);
-    await profileFormPage.setHandle(`tmpuser${suffix}`);
-    await profileFormPage.setBio(`Tmp bio ${suffix}`);
-    for (let i = 1; i <= 5; i++) {
-      await profileFormPage.setLink(i, `https://tmp.example/${suffix}/${i}`);
-    }
-
-    await profileFormPage.clickCancel();
-
-    expect(await profileFormPage.getDisplayName()).toBe(originalDisplayName);
-    expect(await profileFormPage.getHandle()).toBe(originalHandle);
-    expect(await profileFormPage.getBio()).toBe(originalBio);
-    for (let i = 1; i <= 5; i++) {
-      expect(await profileFormPage.getLink(i)).toBe(originalLinks[i - 1]);
-    }
-  });
-});
diff --git a/autogpt_platform/frontend/src/tests/profile.spec.ts b/autogpt_platform/frontend/src/tests/profile.spec.ts
deleted file mode 100644
index 60f28e7372..0000000000
--- a/autogpt_platform/frontend/src/tests/profile.spec.ts
+++ /dev/null
@@ -1,47 +0,0 @@
-import { LoginPage } from "./pages/login.page";
-import { ProfilePage } from "./pages/profile.page";
-import { test, expect } from "./coverage-fixture";
-import { getTestUser } from "./utils/auth";
-import { hasUrl } from "./utils/assertion";
-
-test.beforeEach(async ({ page }) => {
-  const loginPage = new LoginPage(page);
-  const testUser = await getTestUser();
-
-  await page.goto("/login");
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-});
-
-test("user can view their profile information", async ({ page }) => {
-  const profilePage = new ProfilePage(page);
-
-  await profilePage.navbar.clickProfileLink();
-
-  // workaround for #8788
-  // sleep for 10 seconds to allow page to load due to bug in our system
-  await page.waitForTimeout(10000);
-  await page.reload();
-  await page.reload();
-  await expect(profilePage.isLoaded()).resolves.toBeTruthy();
-  await hasUrl(page, new RegExp("/profile"));
-
-  // Verify email matches test worker's email
-  const displayedHandle = await profilePage.getDisplayedName();
-  expect(displayedHandle).not.toBeNull();
-  expect(displayedHandle).not.toBe("");
-  expect(displayedHandle).toBeDefined();
-});
-
-test("profile navigation is accessible from navbar", async ({ page }) => {
-  const profilePage = new ProfilePage(page);
-
-  await profilePage.navbar.clickProfileLink();
-  await hasUrl(page, new RegExp("/profile"));
-  await expect(profilePage.isLoaded()).resolves.toBeTruthy();
-});
-
-test("profile displays user Credential providers", async ({ page }) => {
-  const profilePage = new ProfilePage(page);
-  await profilePage.navbar.clickProfileLink();
-});
diff --git a/autogpt_platform/frontend/src/tests/publish-agent.spec.ts b/autogpt_platform/frontend/src/tests/publish-agent.spec.ts
deleted file mode 100644
index e2dafef873..0000000000
--- a/autogpt_platform/frontend/src/tests/publish-agent.spec.ts
+++ /dev/null
@@ -1,276 +0,0 @@
-import { test } from "./coverage-fixture";
-import { getTestUserWithLibraryAgents } from "./credentials";
-import { LoginPage } from "./pages/login.page";
-import {
-  hasUrl,
-  isDisabled,
-  isEnabled,
-  isHidden,
-  isVisible,
-} from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-test("user can publish an agent through the complete flow", async ({
-  page,
-}) => {
-  const { getId, getText, getButton } = getSelectors(page);
-
-  const loginPage = new LoginPage(page);
-  await page.goto("/login");
-  const richUser = getTestUserWithLibraryAgents();
-  await loginPage.login(richUser.email, richUser.password);
-  await hasUrl(page, "/marketplace");
-
-  await page.goto("/marketplace");
-  await getButton("Become a creator").click();
-
-  const publishAgentModal = getId("publish-agent-modal");
-  await isVisible(publishAgentModal, 10000);
-
-  await isVisible(
-    publishAgentModal.getByText(
-      "Select your project that you'd like to publish",
-    ),
-  );
-
-  const agentToSelect = publishAgentModal.getByTestId("agent-card").first();
-  await agentToSelect.click();
-
-  const nextButton = publishAgentModal.getByRole("button", {
-    name: "Next",
-    exact: true,
-  });
-
-  await isEnabled(nextButton);
-  await nextButton.click();
-
-  // 2. Adding details of agent
-  await isVisible(getText("Write a bit of details about your agent"));
-
-  const agentName = "Test Agent Name";
-
-  const agentTitle = publishAgentModal.getByLabel("Title");
-  await agentTitle.fill(agentName);
-
-  const agentSubheader = publishAgentModal.getByLabel("Subheader");
-  await agentSubheader.fill("Test Agent Subheader");
-
-  const agentSlug = publishAgentModal.getByLabel("Slug");
-  await agentSlug.fill("test-agent-slug");
-
-  const youtubeInput = publishAgentModal.getByLabel("Youtube video link");
-  await youtubeInput.fill("https://www.youtube.com/watch?v=test");
-
-  const categorySelect = publishAgentModal.locator(
-    'select[aria-hidden="true"]',
-  );
-  await categorySelect.selectOption({ value: "other" });
-
-  const descriptionInput = publishAgentModal.getByLabel("Description");
-  await descriptionInput.fill(
-    "This is a test agent description for the automated test.",
-  );
-
-  await isEnabled(publishAgentModal.getByRole("button", { name: "Submit" }));
-});
-
-test("should display appropriate content in agent creation modal when user is logged out", async ({
-  page,
-}) => {
-  const { getText, getButton } = getSelectors(page);
-
-  await page.goto("/marketplace");
-  await getButton("Become a creator").click();
-
-  await isVisible(
-    getText(
-      "Log in or create an account to publish your agents to the marketplace and join a community of creators",
-    ),
-  );
-});
-
-test("should validate all form fields in publish agent form", async ({
-  page,
-}) => {
-  const { getId, getText, getButton } = getSelectors(page);
-
-  const loginPage = new LoginPage(page);
-  await page.goto("/login");
-  const richUser = getTestUserWithLibraryAgents();
-  await loginPage.login(richUser.email, richUser.password);
-  await hasUrl(page, "/marketplace");
-
-  await page.goto("/marketplace");
-  await getButton("Become a creator").click();
-
-  const publishAgentModal = getId("publish-agent-modal");
-  await isVisible(publishAgentModal, 10000);
-
-  const agentToSelect = publishAgentModal.getByTestId("agent-card").first();
-  await agentToSelect.click();
-
-  const nextButton = publishAgentModal.getByRole("button", {
-    name: "Next",
-    exact: true,
-  });
-  await nextButton.click();
-
-  await isVisible(getText("Write a bit of details about your agent"));
-
-  // Get form elements
-  const agentTitle = publishAgentModal.getByLabel("Title");
-  const agentSubheader = publishAgentModal.getByLabel("Subheader");
-  const agentSlug = publishAgentModal.getByLabel("Slug");
-  const youtubeInput = publishAgentModal.getByLabel("Youtube video link");
-  const categorySelect = publishAgentModal.locator(
-    'select[aria-hidden="true"]',
-  );
-  const descriptionInput = publishAgentModal.getByLabel("Description");
-  const submitButton = publishAgentModal.getByRole("button", {
-    name: "Submit",
-  });
-
-  async function clearForm() {
-    await agentTitle.clear();
-    await agentSubheader.clear();
-    await agentSlug.clear();
-    await youtubeInput.clear();
-    await descriptionInput.clear();
-  }
-
-  // 1. Test required field validations
-  await clearForm();
-  await submitButton.click();
-
-  await isVisible(publishAgentModal.getByText("Title is required"));
-  await isVisible(publishAgentModal.getByText("Subheader is required"));
-  await isVisible(publishAgentModal.getByText("Slug is required"));
-  await isVisible(publishAgentModal.getByText("Category is required"));
-  await isVisible(publishAgentModal.getByText("Description is required"));
-
-  // 2. Test field length limits
-  await clearForm();
-
-  // Test title length limit (100 characters)
-  const longTitle = "a".repeat(101);
-  await agentTitle.fill(longTitle);
-  await agentTitle.blur();
-  await isVisible(
-    publishAgentModal.getByText("Title must be less than 100 characters"),
-  );
-
-  // Test subheader length limit (200 characters)
-  const longSubheader = "b".repeat(201);
-  await agentSubheader.fill(longSubheader);
-  await agentSubheader.blur();
-  await isVisible(
-    publishAgentModal.getByText("Subheader must be less than 200 characters"),
-  );
-
-  // Test slug length limit (50 characters)
-  const longSlug = "c".repeat(51);
-  await agentSlug.fill(longSlug);
-  await agentSlug.blur();
-  await isVisible(
-    publishAgentModal.getByText("Slug must be less than 50 characters"),
-  );
-
-  // Test description length limit (1000 characters)
-  const longDescription = "d".repeat(1001);
-  await descriptionInput.fill(longDescription);
-  await descriptionInput.blur();
-  await isVisible(
-    publishAgentModal.getByText(
-      "Description must be less than 1000 characters",
-    ),
-  );
-
-  // Test invalid characters in slug
-  await agentSlug.fill("Invalid Slug With Spaces");
-  await agentSlug.blur();
-  await isVisible(
-    publishAgentModal.getByText(
-      "Slug can only contain lowercase letters, numbers, and hyphens",
-    ),
-  );
-
-  await agentSlug.clear();
-  await agentSlug.fill("InvalidSlugWithCapitals");
-  await agentSlug.blur();
-  await isVisible(
-    publishAgentModal.getByText(
-      "Slug can only contain lowercase letters, numbers, and hyphens",
-    ),
-  );
-
-  await agentSlug.clear();
-  await agentSlug.fill("invalid-slug-with-@#$");
-  await agentSlug.blur();
-  await isVisible(
-    publishAgentModal.getByText(
-      "Slug can only contain lowercase letters, numbers, and hyphens",
-    ),
-  );
-
-  // Test valid slug format should not show error
-  await agentSlug.clear();
-  await agentSlug.fill("valid-slug-123");
-  await agentSlug.blur();
-  await page.waitForTimeout(500);
-
-  await isHidden(
-    publishAgentModal.getByText(
-      "Slug can only contain lowercase letters, numbers, and hyphens",
-    ),
-  );
-
-  // Test invalid YouTube URL
-  await youtubeInput.fill("https://www.google.com/invalid-url");
-  await youtubeInput.blur();
-  await isVisible(
-    publishAgentModal.getByText("Please enter a valid YouTube URL"),
-  );
-
-  await youtubeInput.clear();
-  await youtubeInput.fill("not-a-url-at-all");
-  await youtubeInput.blur();
-  await isVisible(
-    publishAgentModal.getByText("Please enter a valid YouTube URL"),
-  );
-
-  // Test valid YouTube URLs should not show error
-  await youtubeInput.clear();
-  await youtubeInput.fill("https://www.youtube.com/watch?v=test");
-  await youtubeInput.blur();
-  await page.waitForTimeout(500);
-
-  await isHidden(
-    publishAgentModal.getByText("Please enter a valid YouTube URL"),
-  );
-
-  await youtubeInput.clear();
-  await youtubeInput.fill("https://youtu.be/test123");
-  await youtubeInput.blur();
-  await page.waitForTimeout(500);
-
-  await isHidden(
-    publishAgentModal.getByText("Please enter a valid YouTube URL"),
-  );
-
-  // 5. Test submit button enabled/disabled state
-  await clearForm();
-
-  // Submit button should be disabled when form is empty
-  await page.waitForTimeout(1000);
-  await isDisabled(submitButton);
-
-  // Fill all required fields with valid data
-  await agentTitle.fill("Valid Title");
-  await agentSubheader.fill("Valid Subheader");
-  await agentSlug.fill("valid-slug");
-  await categorySelect.selectOption({ value: "other" });
-  await descriptionInput.fill("Valid description text");
-
-  // Submit button should now be enabled
-  await isEnabled(submitButton);
-});
diff --git a/autogpt_platform/frontend/src/tests/settings.spec.ts b/autogpt_platform/frontend/src/tests/settings.spec.ts
deleted file mode 100644
index 25ca0c337a..0000000000
--- a/autogpt_platform/frontend/src/tests/settings.spec.ts
+++ /dev/null
@@ -1,144 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { getTestUser } from "./utils/auth";
-import { LoginPage } from "./pages/login.page";
-import { hasAttribute, hasUrl, isHidden, isVisible } from "./utils/assertion";
-import { getSelectors } from "./utils/selectors";
-
-test.beforeEach(async ({ page }) => {
-  const testUser = await getTestUser();
-  const loginPage = new LoginPage(page);
-
-  // Login and navigate to settings
-  await page.goto("/login");
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  // Navigate to settings page
-  await page.goto("/profile/settings");
-  await hasUrl(page, "/profile/settings");
-});
-
-test("should display email form elements correctly", async ({ page }) => {
-  const { getField, getButton, getText, getLink } = getSelectors(page);
-
-  // Check email form elements are displayed
-  await isVisible(getText("Security & Access"));
-  await isVisible(getField("Email"));
-  await isVisible(getLink("Reset password"));
-  await isVisible(getButton("Update email"));
-
-  const updateEmailButton = getButton("Update email");
-  const resetPasswordButton = getLink("Reset password");
-
-  // Button should be disabled initially (no changes)
-  await expect(updateEmailButton).toBeDisabled();
-
-  // Test reset password navigation
-  await hasAttribute(resetPasswordButton, "href", "/reset-password");
-});
-
-test("should show validation error for empty email", async ({ page }) => {
-  const { getField, getButton } = getSelectors(page);
-
-  const emailField = getField("Email");
-  const updateEmailButton = getButton("Update email");
-
-  await emailField.fill("");
-  await updateEmailButton.click();
-  await isVisible(page.getByText("Email is required"));
-});
-
-test("should show validation error for invalid email", async ({ page }) => {
-  const { getField, getButton } = getSelectors(page);
-
-  const emailField = getField("Email");
-  const updateEmailButton = getButton("Update email");
-
-  await emailField.fill("invalid email");
-  await updateEmailButton.click();
-  await isVisible(page.getByText("Please enter a valid email address"));
-});
-
-test("should handle valid email", async ({ page }) => {
-  const { getField, getButton } = getSelectors(page);
-
-  const emailField = getField("Email");
-  const updateEmailButton = getButton("Update email");
-
-  // Test successful email update
-  const newEmail = `test+${Date.now()}@example.com`;
-  await emailField.fill(newEmail);
-  await expect(updateEmailButton).toBeEnabled();
-  await updateEmailButton.click();
-  await isHidden(page.getByText("Email is required"));
-  await isHidden(page.getByText("Please enter a valid email address"));
-});
-
-test("should handle complete notification form functionality and form interactions", async ({
-  page,
-}) => {
-  const { getButton } = getSelectors(page);
-
-  // Check notification form elements are displayed
-  await isVisible(
-    page.getByRole("heading", { name: "Notifications", exact: true }),
-  );
-
-  await isVisible(getButton("Cancel"));
-  await isVisible(getButton("Save preferences"));
-
-  // Check all notification switches are present - get all switches on page
-  const switches = await page.getByRole("switch").all();
-
-  for (const switchElement of switches) {
-    await isVisible(switchElement);
-  }
-
-  const savePreferencesButton = getButton("Save preferences");
-  const cancelButton = getButton("Cancel");
-
-  // Button should be disabled initially (no changes)
-  await expect(savePreferencesButton).toBeDisabled();
-
-  // Test switch toggling functionality
-  for (const switchElement of switches) {
-    const initialState = await switchElement.isChecked();
-    await switchElement.click();
-    const newState = await switchElement.isChecked();
-    expect(newState).toBe(!initialState);
-  }
-
-  // Test button enabling when changes are made
-  if (switches.length > 0) {
-    await expect(savePreferencesButton).toBeEnabled();
-  }
-
-  // Test cancel functionality
-  await cancelButton.click();
-  // Wait for form state to update after cancel
-  await page.waitForTimeout(100);
-  await expect(savePreferencesButton).toBeDisabled();
-
-  // Test successful save with multiple switches
-  const testSwitches = switches.slice(0, Math.min(3, switches.length));
-  for (const switchElement of testSwitches) {
-    await switchElement.click();
-  }
-  await expect(savePreferencesButton).toBeEnabled();
-  await savePreferencesButton.click();
-  await isVisible(getButton("Saving..."));
-  await isVisible(
-    page.getByText("Successfully updated notification preferences"),
-  );
-
-  // Test persistence after page reload
-  if (testSwitches.length > 0) {
-    const finalState = await testSwitches[0].isChecked();
-    await page.reload();
-    await hasUrl(page, "/profile/settings");
-    const reloadedSwitches = await page.getByRole("switch").all();
-    if (reloadedSwitches.length > 0) {
-      expect(await reloadedSwitches[0].isChecked()).toBe(finalState);
-    }
-  }
-});
diff --git a/autogpt_platform/frontend/src/tests/signin.spec.ts b/autogpt_platform/frontend/src/tests/signin.spec.ts
deleted file mode 100644
index f7249ca059..0000000000
--- a/autogpt_platform/frontend/src/tests/signin.spec.ts
+++ /dev/null
@@ -1,199 +0,0 @@
-// auth.spec.ts
-
-import { test } from "./coverage-fixture";
-import { BuildPage } from "./pages/build.page";
-import { LoginPage } from "./pages/login.page";
-import { hasUrl, isHidden, isVisible } from "./utils/assertion";
-import { getTestUser } from "./utils/auth";
-import { getSelectors } from "./utils/selectors";
-
-test.beforeEach(async ({ page }) => {
-  await page.goto("/login");
-});
-
-test("check the navigation when logged out", async ({ page }) => {
-  const { getButton, getText, getLink } = getSelectors(page);
-
-  // Test marketplace link
-  const marketplaceLink = getLink("Marketplace");
-  await isVisible(marketplaceLink);
-  await marketplaceLink.click();
-  await hasUrl(page, "/marketplace");
-  await isVisible(getText("Explore AI agents", { exact: false }));
-
-  // Test login button
-  const loginBtn = getButton("Log In");
-  await isVisible(loginBtn);
-  await loginBtn.click();
-  await hasUrl(page, "/login");
-  await isHidden(loginBtn);
-});
-
-test("user can login successfully", async ({ page }) => {
-  const testUser = await getTestUser();
-  const loginPage = new LoginPage(page);
-  const { getId, getButton, getRole } = getSelectors(page);
-
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  const accountMenuTrigger = getId("profile-popout-menu-trigger");
-
-  await isVisible(accountMenuTrigger);
-
-  await accountMenuTrigger.click();
-  const accountMenuPopover = getRole("dialog");
-  await isVisible(accountMenuPopover);
-
-  const accountMenuUserEmail = getId("account-menu-user-email");
-  await isVisible(accountMenuUserEmail);
-  await test
-    .expect(accountMenuUserEmail)
-    .toHaveText(testUser.email.split("@")[0].toLowerCase());
-
-  const logoutBtn = getButton("Log out");
-  await isVisible(logoutBtn);
-  await logoutBtn.click();
-});
-
-test("user can logout successfully", async ({ page }) => {
-  const testUser = await getTestUser();
-  const loginPage = new LoginPage(page);
-  const { getButton, getId } = getSelectors(page);
-
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  // Open account menu
-  await getId("profile-popout-menu-trigger").click();
-
-  // Logout
-  await getButton("Log out").click();
-  await hasUrl(page, "/login");
-});
-
-test("login in, then out, then in again", async ({ page }) => {
-  const testUser = await getTestUser();
-  const loginPage = new LoginPage(page);
-  const { getButton, getId } = getSelectors(page);
-
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  // Click on the profile menu trigger to open account menu
-  await getId("profile-popout-menu-trigger").click();
-
-  // Click the logout button in the popout menu
-  await getButton("Log out").click();
-
-  await test.expect(page).toHaveURL("/login");
-  await loginPage.login(testUser.email, testUser.password);
-  await test.expect(page).toHaveURL("/marketplace");
-  await test
-    .expect(page.getByTestId("profile-popout-menu-trigger"))
-    .toBeVisible();
-});
-
-test("multi-tab logout with WebSocket cleanup", async ({ context }) => {
-  const testUser = await getTestUser();
-
-  // Tab 1
-  const page1 = await context.newPage();
-  const builderPage1 = new BuildPage(page1);
-
-  // Capture console errors to ensure WebSocket cleanup prevents errors
-  const consoleErrors: string[] = [];
-  page1.on("console", (msg) => {
-    if (msg.type() === "error" && msg.text().includes("WebSocket")) {
-      consoleErrors.push(`Page1: ${msg.text()}`);
-    }
-  });
-
-  const loginPage1 = new LoginPage(page1);
-  const { getButton: getButton1, getId: getId1 } = getSelectors(page1);
-
-  // Login
-  await page1.goto("/login");
-  await loginPage1.login(testUser.email, testUser.password);
-  await hasUrl(page1, "/marketplace");
-
-  //  Navigate to builder + wait for WebSocket connection
-  await page1.goto("/build");
-  await hasUrl(page1, "/build");
-  await builderPage1.closeTutorial();
-  await page1.waitForTimeout(1000);
-  await isVisible(getId1("profile-popout-menu-trigger"));
-
-  // Tab 2
-  const page2 = await context.newPage();
-
-  const { getId: getId2 } = getSelectors(page2);
-
-  page2.on("console", (msg) => {
-    if (msg.type() === "error" && msg.text().includes("WebSocket")) {
-      consoleErrors.push(`Page2: ${msg.text()}`);
-    }
-  });
-
-  // Navigate to builder + wait for WebSocket connection
-  await page2.goto("/build");
-  await hasUrl(page2, "/build");
-  await page2.waitForTimeout(1000);
-  await isVisible(getId2("profile-popout-menu-trigger"));
-
-  // Tab 1: Logout
-  await getId1("profile-popout-menu-trigger").click();
-  await getButton1("Log out").click();
-  await hasUrl(page1, "/login");
-
-  // Tab 2: Wait for cross-tab logout to take effect and check if redirected to login
-  await page2.waitForTimeout(2000); // Give time for cross-tab logout mechanism
-
-  // Check if Tab 2 has been redirected to login or refresh the page to trigger redirect
-  try {
-    await page2.reload();
-    await hasUrl(page2, "/login?next=%2Fbuild");
-  } catch {
-    // If reload fails, the page might already be redirecting
-    await hasUrl(page2, "/login?next=%2Fbuild");
-  }
-
-  // Verify the profile menu is no longer visible (user is logged out)
-  await isHidden(getId2("profile-popout-menu-trigger"));
-
-  // Verify no WebSocket connection errors occurred during logout
-  test.expect(consoleErrors).toHaveLength(0);
-  if (consoleErrors.length > 0) {
-    console.log("WebSocket errors during logout:", consoleErrors);
-  }
-
-  // Clean up
-  await page1.close();
-  await page2.close();
-});
-
-test("logged in user is redirected from /login to /copilot", async ({
-  page,
-}) => {
-  const testUser = await getTestUser();
-  const loginPage = new LoginPage(page);
-
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  await page.goto("/login");
-  await hasUrl(page, "/copilot");
-});
-
-test("logged in user is redirected from /signup to /copilot", async ({
-  page,
-}) => {
-  const testUser = await getTestUser();
-  const loginPage = new LoginPage(page);
-
-  await loginPage.login(testUser.email, testUser.password);
-  await hasUrl(page, "/marketplace");
-
-  await page.goto("/signup");
-  await hasUrl(page, "/copilot");
-});
diff --git a/autogpt_platform/frontend/src/tests/signup.spec.ts b/autogpt_platform/frontend/src/tests/signup.spec.ts
deleted file mode 100644
index bcf5ea3725..0000000000
--- a/autogpt_platform/frontend/src/tests/signup.spec.ts
+++ /dev/null
@@ -1,126 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import {
-  generateTestEmail,
-  generateTestPassword,
-  signupTestUser,
-  validateSignupForm,
-} from "./utils/signup";
-import { getSelectors } from "./utils/selectors";
-import { hasUrl, isVisible } from "./utils/assertion";
-
-test("user can signup successfully", async ({ page }) => {
-  try {
-    const testUser = await signupTestUser(page);
-    const { getText, getId } = getSelectors(page);
-
-    // Verify user was created
-    expect(testUser.email).toBeTruthy();
-    expect(testUser.password).toBeTruthy();
-    expect(testUser.createdAt).toBeTruthy();
-
-    const marketplaceText = getText(
-      "Bringing you AI agents designed by thinkers from around the world",
-    ).first();
-
-    // Verify we're on marketplace and authenticated
-    await hasUrl(page, "/marketplace");
-    await isVisible(marketplaceText);
-    await isVisible(getId("profile-popout-menu-trigger"));
-  } catch (error) {
-    console.error("❌ Signup test failed:", error);
-  }
-});
-
-test("signup form validation works", async ({ page }) => {
-  const { getField, getRole, getButton } = getSelectors(page);
-  const emailInput = getField("Email");
-  const passwordInput = page.locator("#password");
-  const confirmPasswordInput = page.locator("#confirmPassword");
-  const signupButton = getButton("Sign up");
-  const termsCheckbox = getRole("checkbox");
-
-  await validateSignupForm(page);
-
-  // Additional validation tests
-  await page.goto("/signup");
-
-  // Test with mismatched passwords
-  await emailInput.fill(generateTestEmail());
-  await passwordInput.fill("password1");
-  await confirmPasswordInput.fill("password2");
-  await termsCheckbox.click();
-  await signupButton.click();
-
-  // Should still be on signup page
-  await hasUrl(page, /\/signup/);
-});
-
-test("user can signup with custom credentials", async ({ page }) => {
-  const { getId } = getSelectors(page);
-
-  try {
-    const customEmail = generateTestEmail();
-    const customPassword = await generateTestPassword();
-
-    const testUser = await signupTestUser(page, customEmail, customPassword);
-
-    // Verify correct credentials were used
-    expect(testUser.email).toBe(customEmail);
-    expect(testUser.password).toBe(customPassword);
-
-    // Verify successful signup
-    await hasUrl(page, "/marketplace");
-    await isVisible(getId("profile-popout-menu-trigger"));
-  } catch (error) {
-    console.error("❌ Custom credentials signup test failed:", error);
-  }
-});
-
-test("user can signup with existing email handling", async ({
-  page,
-  browser,
-}) => {
-  try {
-    const testEmail = generateTestEmail();
-    const testPassword = await generateTestPassword();
-
-    // First signup
-    const firstUser = await signupTestUser(page, testEmail, testPassword);
-    expect(firstUser.email).toBe(testEmail);
-
-    // Create new browser context for second signup (simulates new browser window)
-    const newContext = await browser.newContext();
-    const newPage = await newContext.newPage();
-
-    try {
-      const { getText, getField, getRole, getButton } = getSelectors(newPage);
-
-      // Second signup attempt with same email in new browser context
-      // Navigate to signup page
-      await newPage.goto("http://localhost:3000/signup");
-
-      // Wait for page to load
-      getText("Create a new account");
-
-      // Fill form
-      const emailInput = getField("Email");
-      await emailInput.fill(testEmail);
-      const passwordInput = newPage.locator("#password");
-      await passwordInput.fill(testPassword);
-      const confirmPasswordInput = newPage.locator("#confirmPassword");
-      await confirmPasswordInput.fill(testPassword);
-
-      // Agree to terms and submit
-      await getRole("checkbox").click();
-      const signupButton = getButton("Sign up");
-      await signupButton.click();
-      await isVisible(getText("User with this email already exists"));
-    } catch (_error) {
-    } finally {
-      // Clean up new browser context
-      await newContext.close();
-    }
-  } catch (error) {
-    console.error("❌ Duplicate email handling test failed:", error);
-  }
-});
diff --git a/autogpt_platform/frontend/src/tests/title.spec.ts b/autogpt_platform/frontend/src/tests/title.spec.ts
deleted file mode 100644
index 87cac8fe53..0000000000
--- a/autogpt_platform/frontend/src/tests/title.spec.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-
-test("has title", async ({ page }) => {
-  await page.goto("/");
-  await expect(page).toHaveTitle(/AutoGPT Platform/);
-});
diff --git a/autogpt_platform/frontend/src/tests/util.spec.ts b/autogpt_platform/frontend/src/tests/util.spec.ts
deleted file mode 100644
index 7e766457ac..0000000000
--- a/autogpt_platform/frontend/src/tests/util.spec.ts
+++ /dev/null
@@ -1,97 +0,0 @@
-import { test, expect } from "./coverage-fixture";
-import { setNestedProperty } from "../lib/utils";
-
-const testCases = [
-  {
-    name: "simple property assignment",
-    path: "name",
-    value: "John",
-    expected: { name: "John" },
-  },
-  {
-    name: "nested property with dot notation",
-    path: "user.settings.theme",
-    value: "dark",
-    expected: { user: { settings: { theme: "dark" } } },
-  },
-  {
-    name: "nested property with slash notation",
-    path: "user/settings/language",
-    value: "en",
-    expected: { user: { settings: { language: "en" } } },
-  },
-  {
-    name: "mixed dot and slash notation",
-    path: "user.settings/preferences.color",
-    value: "blue",
-    expected: { user: { settings: { preferences: { color: "blue" } } } },
-  },
-  {
-    name: "overwrite primitive with object",
-    path: "user.details",
-    value: { age: 30 },
-    expected: { user: { details: { age: 30 } } },
-  },
-];
-
-for (const { name, path, value, expected } of testCases) {
-  test(name, () => {
-    const obj = {};
-    setNestedProperty(obj, path, value);
-    expect(obj).toEqual(expected);
-  });
-}
-
-test("should throw error for null object", () => {
-  expect(() => {
-    setNestedProperty(null, "test", "value");
-  }).toThrow("Target must be a non-null object");
-});
-
-test("should throw error for undefined object", () => {
-  expect(() => {
-    setNestedProperty(undefined, "test", "value");
-  }).toThrow("Target must be a non-null object");
-});
-
-test("should throw error for non-object target", () => {
-  expect(() => {
-    setNestedProperty("string", "test", "value");
-  }).toThrow("Target must be a non-null object");
-});
-
-test("should throw error for empty path", () => {
-  expect(() => {
-    setNestedProperty({}, "", "value");
-  }).toThrow("Path must be a non-empty string");
-});
-
-test("should throw error for __proto__ access", () => {
-  expect(() => {
-    setNestedProperty({}, "__proto__.malicious", "attack");
-  }).toThrow("Invalid property name: __proto__");
-});
-
-test("should throw error for constructor access", () => {
-  expect(() => {
-    setNestedProperty({}, "constructor.prototype.malicious", "attack");
-  }).toThrow("Invalid property name: constructor");
-});
-
-test("should throw error for prototype access", () => {
-  expect(() => {
-    setNestedProperty({}, "obj.prototype.malicious", "attack");
-  }).toThrow("Invalid property name: prototype");
-});
-
-test("secure implementation prevents prototype pollution", () => {
-  const obj = {};
-  expect(() => {
-    setNestedProperty(obj, "__proto__.polluted", true);
-  }).toThrow("Invalid property name: __proto__");
-
-  // Verify no pollution occurred
-  // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-  // @ts-ignore
-  expect({}.polluted).toBeUndefined();
-});
diff --git a/autogpt_platform/frontend/src/tests/utils/auth.ts b/autogpt_platform/frontend/src/tests/utils/auth.ts
deleted file mode 100644
index 8e5c0a90f7..0000000000
--- a/autogpt_platform/frontend/src/tests/utils/auth.ts
+++ /dev/null
@@ -1,175 +0,0 @@
-import fs from "fs";
-import path from "path";
-import { signupTestUser } from "./signup";
-import { getBrowser } from "./get-browser";
-
-export interface TestUser {
-  email: string;
-  password: string;
-  id?: string;
-  createdAt?: string;
-}
-
-export interface UserPool {
-  users: TestUser[];
-  createdAt: string;
-  version: string;
-}
-
-export async function createTestUser(
-  email?: string,
-  password?: string,
-  ignoreOnboarding: boolean = true,
-): Promise<TestUser> {
-  const { faker } = await import("@faker-js/faker");
-  const userEmail = email || faker.internet.email();
-  const userPassword = password || faker.internet.password({ length: 12 });
-
-  try {
-    const browser = await getBrowser();
-    const context = await browser.newContext();
-    const page = await context.newPage();
-
-    // Auto-accept cookies in test environment to prevent banner from appearing
-    await page.addInitScript(() => {
-      window.localStorage.setItem(
-        "autogpt_cookie_consent",
-        JSON.stringify({
-          hasConsented: true,
-          timestamp: Date.now(),
-          analytics: true,
-          monitoring: true,
-        }),
-      );
-    });
-
-    try {
-      const testUser = await signupTestUser(
-        page,
-        userEmail,
-        userPassword,
-        ignoreOnboarding,
-        false,
-      );
-      return testUser;
-    } finally {
-      await page.close();
-      await context.close();
-      await browser.close();
-    }
-  } catch (error) {
-    console.error(`❌ Error creating test user ${userEmail}:`, error);
-    throw error;
-  }
-}
-
-export async function createTestUsers(count: number): Promise<TestUser[]> {
-  console.log(`👥 Creating ${count} test users...`);
-
-  const users: TestUser[] = [];
-  let consecutiveFailures = 0;
-
-  for (let i = 0; i < count; i++) {
-    try {
-      const user = await createTestUser();
-      users.push(user);
-      consecutiveFailures = 0; // Reset failure counter on success
-      console.log(`✅ Created user ${i + 1}/${count}: ${user.email}`);
-
-      // Small delay to prevent overwhelming the system
-      if (i < count - 1) {
-        await new Promise((resolve) => setTimeout(resolve, 500));
-      }
-    } catch (error) {
-      consecutiveFailures++;
-      console.error(`❌ Failed to create user ${i + 1}/${count}:`, error);
-
-      // If we have too many consecutive failures, stop trying
-      if (consecutiveFailures >= 3) {
-        console.error(
-          `⚠️ Stopping after ${consecutiveFailures} consecutive failures`,
-        );
-        break;
-      }
-
-      // Add a longer delay after failure to let system recover
-      await new Promise((resolve) => setTimeout(resolve, 1000));
-    }
-  }
-
-  console.log(`🎉 Successfully created ${users.length}/${count} test users`);
-  return users;
-}
-
-export async function saveUserPool(
-  users: TestUser[],
-  filePath?: string,
-): Promise<void> {
-  const defaultPath = path.resolve(process.cwd(), ".auth", "user-pool.json");
-  const finalPath = filePath || defaultPath;
-
-  // Ensure .auth directory exists
-  const dirPath = path.dirname(finalPath);
-  if (!fs.existsSync(dirPath)) {
-    fs.mkdirSync(dirPath, { recursive: true });
-  }
-
-  const userPool: UserPool = {
-    users,
-    createdAt: new Date().toISOString(),
-    version: "1.0.0",
-  };
-
-  try {
-    fs.writeFileSync(finalPath, JSON.stringify(userPool, null, 2));
-    console.log(`✅ Successfully saved user pool to: ${finalPath}`);
-  } catch (error) {
-    console.error(`❌ Failed to save user pool to ${finalPath}:`, error);
-    throw error;
-  }
-}
-
-export async function loadUserPool(
-  filePath?: string,
-): Promise<UserPool | null> {
-  const defaultPath = path.resolve(process.cwd(), ".auth", "user-pool.json");
-  const finalPath = filePath || defaultPath;
-
-  console.log(`📖 Loading user pool from: ${finalPath}`);
-
-  try {
-    if (!fs.existsSync(finalPath)) {
-      console.log(`⚠️ User pool file not found: ${finalPath}`);
-      return null;
-    }
-
-    const fileContent = fs.readFileSync(finalPath, "utf-8");
-    const userPool: UserPool = JSON.parse(fileContent);
-
-    console.log(
-      `✅ Successfully loaded ${userPool.users.length} users from: ${finalPath}`,
-    );
-    console.log(`📅 User pool created at: ${userPool.createdAt}`);
-    console.log(`🔖 User pool version: ${userPool.version}`);
-
-    return userPool;
-  } catch (error) {
-    console.error(`❌ Failed to load user pool from ${finalPath}:`, error);
-    return null;
-  }
-}
-
-export async function getTestUser(): Promise<TestUser> {
-  const userPool = await loadUserPool();
-  if (!userPool) {
-    throw new Error("User pool not found");
-  }
-
-  if (userPool.users.length === 0) {
-    throw new Error("No users available in the pool");
-  }
-
-  // Return a random user from the pool
-  const randomIndex = Math.floor(Math.random() * userPool.users.length);
-  return userPool.users[randomIndex];
-}
diff --git a/autogpt_platform/frontend/src/types/auth.test.ts b/autogpt_platform/frontend/src/types/auth.test.ts
new file mode 100644
index 0000000000..ef5c0b38e1
--- /dev/null
+++ b/autogpt_platform/frontend/src/types/auth.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, test } from "vitest";
+import { signupFormSchema } from "./auth";
+
+describe("signupFormSchema", () => {
+  test("rejects invalid signup input", () => {
+    const result = signupFormSchema.safeParse({
+      email: "not-an-email",
+      password: "short",
+      confirmPassword: "different",
+      agreeToTerms: false,
+    });
+
+    expect(result.success).toBe(false);
+
+    if (result.success) {
+      return;
+    }
+
+    const { fieldErrors } = result.error.flatten();
+
+    expect(fieldErrors.email?.length).toBeGreaterThan(0);
+    expect(fieldErrors.password).toContain(
+      "Password must contain at least 12 characters",
+    );
+    expect(fieldErrors.confirmPassword).toContain("Passwords don't match");
+    expect(fieldErrors.agreeToTerms).toContain(
+      "You must agree to the Terms of Use and Privacy Policy",
+    );
+  });
+
+  test("accepts a valid signup payload", () => {
+    const result = signupFormSchema.safeParse({
+      email: "valid@example.com",
+      password: "validpassword123",
+      confirmPassword: "validpassword123",
+      agreeToTerms: true,
+    });
+
+    expect(result.success).toBe(true);
+  });
+});
diff --git a/autogpt_platform/frontend/vitest.config.mts b/autogpt_platform/frontend/vitest.config.mts
index f91fc7442e..4e8c035673 100644
--- a/autogpt_platform/frontend/vitest.config.mts
+++ b/autogpt_platform/frontend/vitest.config.mts
@@ -16,6 +16,7 @@ export default defineConfig({
       exclude: [
         "src/**/*.test.{ts,tsx}",
         "src/**/*.stories.{ts,tsx}",
+        "src/playwright/**",
         "src/tests/**",
       ],
     },
diff --git a/codecov.yml b/codecov.yml
index 8a09885275..c59e08e110 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -4,14 +4,36 @@ codecov:
 coverage:
   status:
     project:
-      default:
+      platform-backend:
+        target: auto
+        threshold: 1%
+        flags:
+          - platform-backend
+      platform-frontend:
         target: auto
         threshold: 1%
         informational: true
+        flags:
+          - platform-frontend
     patch:
-      default:
+      platform-backend:
+        target: 80%
+        flags:
+          - platform-backend
+      platform-frontend:
+        target: 80%
+        flags:
+          - platform-frontend
+      autogpt-libs:
         target: 80%
         informational: true
+        flags:
+          - autogpt-libs
+      classic:
+        target: 80%
+        informational: true
+        flags:
+          - autogpt-agent
 
 flags:
   platform-backend:
@@ -26,6 +48,10 @@ flags:
     paths:
       - autogpt_platform/frontend/src/
     carryforward: true
+  autogpt-libs:
+    paths:
+      - autogpt_platform/autogpt_libs/
+    carryforward: true
   autogpt-agent:
     paths:
       - classic/
@@ -36,8 +62,10 @@ component_management:
     statuses:
       - type: project
         target: auto
+        informational: true
       - type: patch
         target: 80%
+        informational: true
   individual_components:
     - component_id: platform-backend
       name: "Platform Backend"