Merge remote-tracking branch 'origin/dev' into fix/copilot-transcript-compaction

fix(copilot): replace simulated retry logic in test scenarios 4-7 with _reduce_context calls
Scenarios 4 and 6 (and parts of 5, 7) were testing Python if/for logic that mimicked the retry tree instead of calling production code. Replaced with direct calls to _reduce_context which exercises the actual decision path.
2026-03-17 03:00:27 -04:00 · 2026-03-17 06:16:56 +07:00 · 2026-03-16 06:28:15 +07:00 · 2026-03-16 06:23:59 +07:00 · 2026-03-16 06:23:02 +07:00 · 2026-03-16 06:21:59 +07:00
29 changed files with 3397 additions and 1500 deletions
--- a/autogpt_platform/backend/backend/copilot/config.py
+++ b/autogpt_platform/backend/backend/copilot/config.py
@@ -115,7 +115,7 @@ class ChatConfig(BaseSettings):
        description="E2B sandbox template to use for copilot sessions.",
    )
    e2b_sandbox_timeout: int = Field(
-        default=300,  # 5 min safety net — explicit per-turn pause is the primary mechanism
+        default=420,  # 7 min safety net — allows headroom for compaction retries
        description="E2B sandbox running-time timeout (seconds). "
        "E2B timeout is wall-clock (not idle). Explicit per-turn pause is the primary "
        "mechanism; this is the safety net.",
--- a/autogpt_platform/backend/backend/copilot/integration_creds.py
+++ b/autogpt_platform/backend/backend/copilot/integration_creds.py
@@ -1,162 +0,0 @@
-"""Integration credential lookup with per-process TTL cache.
-
-Provides token retrieval for connected integrations so that copilot tools
-(e.g. bash_exec) can inject auth tokens into the execution environment without
-hitting the database on every command.
-
-Cache semantics (handled automatically by TTLCache):
- Token found → cached for _TOKEN_CACHE_TTL (5 min).  Avoids repeated DB hits
-  for users who have credentials and are running many bash commands.
- No credentials found → cached for _NULL_CACHE_TTL (60 s).  Avoids a DB hit
-  on every E2B command for users who haven't connected an account yet, while
-  still picking up a newly-connected account within one minute.
-
-Both caches are bounded to _CACHE_MAX_SIZE entries; cachetools evicts the
-least-recently-used entry when the limit is reached.
-
-Multi-worker note: both caches are in-process only.  Each worker/replica
-maintains its own independent cache, so a credential fetch may be duplicated
-across processes.  This is acceptable for the current goal (reduce DB hits per
-session per-process), but if cache efficiency across replicas becomes important
-a shared cache (e.g. Redis) should be used instead.
-"""
-
-import logging
-from typing import cast
-
-from cachetools import TTLCache
-
-from backend.data.model import APIKeyCredentials, OAuth2Credentials
-from backend.integrations.creds_manager import (
-    IntegrationCredentialsManager,
-    register_creds_changed_hook,
-)
-
-logger = logging.getLogger(__name__)
-
-# Maps provider slug → env var names to inject when the provider is connected.
-# Add new providers here when adding integration support.
-# NOTE: keep in sync with connect_integration._PROVIDER_INFO — both registries
-# must be updated when adding a new provider.
-PROVIDER_ENV_VARS: dict[str, list[str]] = {
-    "github": ["GH_TOKEN", "GITHUB_TOKEN"],
-}
-
-_TOKEN_CACHE_TTL = 300.0  # seconds — for found tokens
-_NULL_CACHE_TTL = 60.0  # seconds — for "not connected" results
-_CACHE_MAX_SIZE = 10_000
-
-# (user_id, provider) → token string.  TTLCache handles expiry + eviction.
-# Thread-safety note: TTLCache is NOT thread-safe, but that is acceptable here
-# because all callers (get_provider_token, invalidate_user_provider_cache) run
-# exclusively on the asyncio event loop.  There are no await points between a
-# cache read and its corresponding write within any function, so no concurrent
-# coroutine can interleave.  If ThreadPoolExecutor workers are ever added to
-# this path, a threading.RLock should be wrapped around these caches.
-_token_cache: TTLCache[tuple[str, str], str] = TTLCache(
-    maxsize=_CACHE_MAX_SIZE, ttl=_TOKEN_CACHE_TTL
-)
-# Separate cache for "no credentials" results with a shorter TTL.
-_null_cache: TTLCache[tuple[str, str], bool] = TTLCache(
-    maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
-)
-
-
-def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
-    """Remove the cached entry for *user_id*/*provider* from both caches.
-
-    Call this after storing new credentials so that the next
-    ``get_provider_token()`` call performs a fresh DB lookup instead of
-    serving a stale TTL-cached result.
-    """
-    key = (user_id, provider)
-    _token_cache.pop(key, None)
-    _null_cache.pop(key, None)
-
-
-# Register this module's cache-bust function with the credentials manager so
-# that any create/update/delete operation immediately evicts stale cache
-# entries.  This avoids a lazy import inside creds_manager and eliminates the
-# circular-import risk.
-register_creds_changed_hook(invalidate_user_provider_cache)
-
-# Module-level singleton to avoid re-instantiating IntegrationCredentialsManager
-# on every cache-miss call to get_provider_token().
-_manager = IntegrationCredentialsManager()
-
-
-async def get_provider_token(user_id: str, provider: str) -> str | None:
-    """Return the user's access token for *provider*, or ``None`` if not connected.
-
-    OAuth2 tokens are preferred (refreshed if needed); API keys are the fallback.
-    Found tokens are cached for _TOKEN_CACHE_TTL (5 min).  "Not connected" results
-    are cached for _NULL_CACHE_TTL (60 s) to avoid a DB hit on every bash_exec
-    command for users who haven't connected yet, while still picking up a
-    newly-connected account within one minute.
-    """
-    cache_key = (user_id, provider)
-
-    if cache_key in _null_cache:
-        return None
-    if cached := _token_cache.get(cache_key):
-        return cached
-
-    manager = _manager
-    try:
-        creds_list = await manager.store.get_creds_by_provider(user_id, provider)
-    except Exception:
-        logger.debug("Failed to fetch %s credentials for user %s", provider, user_id)
-        return None
-
-    # Pass 1: prefer OAuth2 (carry scope info, refreshable via token endpoint).
-    # Sort so broader-scoped tokens come first: a token with "repo" scope covers
-    # full git access, while a public-data-only token lacks push/pull permission.
-    # lock=False — background injection; not worth a distributed lock acquisition.
-    oauth2_creds = sorted(
-        [c for c in creds_list if c.type == "oauth2"],
-        key=lambda c: 0 if "repo" in (cast(OAuth2Credentials, c).scopes or []) else 1,
-    )
-    for creds in oauth2_creds:
-        if creds.type == "oauth2":
-            try:
-                fresh = await manager.refresh_if_needed(
-                    user_id, cast(OAuth2Credentials, creds), lock=False
-                )
-                token = fresh.access_token.get_secret_value()
-            except Exception:
-                logger.warning(
-                    "Failed to refresh %s OAuth token for user %s; "
-                    "falling back to potentially stale token",
-                    provider,
-                    user_id,
-                )
-                token = cast(OAuth2Credentials, creds).access_token.get_secret_value()
-            _token_cache[cache_key] = token
-            return token
-
-    # Pass 2: fall back to API key (no expiry, no refresh needed).
-    for creds in creds_list:
-        if creds.type == "api_key":
-            token = cast(APIKeyCredentials, creds).api_key.get_secret_value()
-            _token_cache[cache_key] = token
-            return token
-
-    # No credentials found — cache to avoid repeated DB hits.
-    _null_cache[cache_key] = True
-    return None
-
-
-async def get_integration_env_vars(user_id: str) -> dict[str, str]:
-    """Return env vars for all providers the user has connected.
-
-    Iterates :data:`PROVIDER_ENV_VARS`, fetches each token, and builds a flat
-    ``{env_var: token}`` dict ready to pass to a subprocess or E2B sandbox.
-    Only providers with a stored credential contribute entries.
-    """
-    env: dict[str, str] = {}
-    for provider, var_names in PROVIDER_ENV_VARS.items():
-        token = await get_provider_token(user_id, provider)
-        if token:
-            for var in var_names:
-                env[var] = token
-    return env
--- a/autogpt_platform/backend/backend/copilot/integration_creds_test.py
+++ b/autogpt_platform/backend/backend/copilot/integration_creds_test.py
@@ -1,193 +0,0 @@
-"""Tests for integration_creds — TTL cache and token lookup paths."""
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-from pydantic import SecretStr
-
-from backend.copilot.integration_creds import (
-    _NULL_CACHE_TTL,
-    _TOKEN_CACHE_TTL,
-    PROVIDER_ENV_VARS,
-    _null_cache,
-    _token_cache,
-    get_integration_env_vars,
-    get_provider_token,
-    invalidate_user_provider_cache,
-)
-from backend.data.model import APIKeyCredentials, OAuth2Credentials
-
-_USER = "user-integration-creds-test"
-_PROVIDER = "github"
-
-
-def _make_api_key_creds(key: str = "test-api-key") -> APIKeyCredentials:
-    return APIKeyCredentials(
-        id="creds-api-key",
-        provider=_PROVIDER,
-        api_key=SecretStr(key),
-        title="Test API Key",
-        expires_at=None,
-    )
-
-
-def _make_oauth2_creds(token: str = "test-oauth-token") -> OAuth2Credentials:
-    return OAuth2Credentials(
-        id="creds-oauth2",
-        provider=_PROVIDER,
-        title="Test OAuth",
-        access_token=SecretStr(token),
-        refresh_token=SecretStr("test-refresh"),
-        access_token_expires_at=None,
-        refresh_token_expires_at=None,
-        scopes=[],
-    )
-
-
-@pytest.fixture(autouse=True)
-def clear_caches():
-    """Ensure clean caches before and after every test."""
-    _token_cache.clear()
-    _null_cache.clear()
-    yield
-    _token_cache.clear()
-    _null_cache.clear()
-
-
-class TestInvalidateUserProviderCache:
-    def test_removes_token_entry(self):
-        key = (_USER, _PROVIDER)
-        _token_cache[key] = "tok"
-        invalidate_user_provider_cache(_USER, _PROVIDER)
-        assert key not in _token_cache
-
-    def test_removes_null_entry(self):
-        key = (_USER, _PROVIDER)
-        _null_cache[key] = True
-        invalidate_user_provider_cache(_USER, _PROVIDER)
-        assert key not in _null_cache
-
-    def test_noop_when_key_not_cached(self):
-        # Should not raise even when there is no cache entry.
-        invalidate_user_provider_cache("no-such-user", _PROVIDER)
-
-    def test_only_removes_targeted_key(self):
-        other_key = ("other-user", _PROVIDER)
-        _token_cache[other_key] = "other-tok"
-        invalidate_user_provider_cache(_USER, _PROVIDER)
-        assert other_key in _token_cache
-
-
-class TestGetProviderToken:
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_returns_cached_token_without_db_hit(self):
-        _token_cache[(_USER, _PROVIDER)] = "cached-tok"
-
-        mock_manager = MagicMock()
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result == "cached-tok"
-        mock_manager.store.get_creds_by_provider.assert_not_called()
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_returns_none_for_null_cached_provider(self):
-        _null_cache[(_USER, _PROVIDER)] = True
-
-        mock_manager = MagicMock()
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result is None
-        mock_manager.store.get_creds_by_provider.assert_not_called()
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_api_key_creds_returned_and_cached(self):
-        api_creds = _make_api_key_creds("my-api-key")
-        mock_manager = MagicMock()
-        mock_manager.store.get_creds_by_provider = AsyncMock(return_value=[api_creds])
-
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result == "my-api-key"
-        assert _token_cache.get((_USER, _PROVIDER)) == "my-api-key"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_oauth2_preferred_over_api_key(self):
-        oauth_creds = _make_oauth2_creds("oauth-tok")
-        api_creds = _make_api_key_creds("api-tok")
-        mock_manager = MagicMock()
-        mock_manager.store.get_creds_by_provider = AsyncMock(
-            return_value=[api_creds, oauth_creds]
-        )
-        mock_manager.refresh_if_needed = AsyncMock(return_value=oauth_creds)
-
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result == "oauth-tok"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_oauth2_refresh_failure_falls_back_to_stale_token(self):
-        oauth_creds = _make_oauth2_creds("stale-oauth-tok")
-        mock_manager = MagicMock()
-        mock_manager.store.get_creds_by_provider = AsyncMock(return_value=[oauth_creds])
-        mock_manager.refresh_if_needed = AsyncMock(side_effect=RuntimeError("network"))
-
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result == "stale-oauth-tok"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_no_credentials_caches_null_entry(self):
-        mock_manager = MagicMock()
-        mock_manager.store.get_creds_by_provider = AsyncMock(return_value=[])
-
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result is None
-        assert _null_cache.get((_USER, _PROVIDER)) is True
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_db_exception_returns_none_without_caching(self):
-        mock_manager = MagicMock()
-        mock_manager.store.get_creds_by_provider = AsyncMock(
-            side_effect=RuntimeError("db down")
-        )
-
-        with patch("backend.copilot.integration_creds._manager", mock_manager):
-            result = await get_provider_token(_USER, _PROVIDER)
-
-        assert result is None
-        # DB errors are not cached — next call will retry
-        assert (_USER, _PROVIDER) not in _token_cache
-        assert (_USER, _PROVIDER) not in _null_cache
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_null_cache_has_shorter_ttl_than_token_cache(self):
-        """Verify the TTL constants are set correctly for each cache."""
-        assert _null_cache.ttl == _NULL_CACHE_TTL
-        assert _token_cache.ttl == _TOKEN_CACHE_TTL
-        assert _NULL_CACHE_TTL < _TOKEN_CACHE_TTL
-
-
-class TestGetIntegrationEnvVars:
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_injects_all_env_vars_for_provider(self):
-        _token_cache[(_USER, "github")] = "gh-tok"
-
-        result = await get_integration_env_vars(_USER)
-
-        for var in PROVIDER_ENV_VARS["github"]:
-            assert result[var] == "gh-tok"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_empty_dict_when_no_credentials(self):
-        _null_cache[(_USER, "github")] = True
-
-        result = await get_integration_env_vars(_USER)
-
-        assert result == {}
--- a/autogpt_platform/backend/backend/copilot/prompting.py
+++ b/autogpt_platform/backend/backend/copilot/prompting.py
@@ -95,25 +95,6 @@ Example — committing an image file to GitHub:
  All tasks must run in the foreground.
 """

-# E2B-only notes — E2B has full internet access so gh CLI works there.
-# Not shown in local (bubblewrap) mode: --unshare-net blocks all network.
-_E2B_TOOL_NOTES = """
-### GitHub CLI (`gh`) and git
- If the user has connected their GitHub account, both `gh` and `git` are
-  pre-authenticated — use them directly without any manual login step.
-  `git` HTTPS operations (clone, push, pull) work automatically.
- If the token changes mid-session (e.g. user reconnects with a new token),
-  run `gh auth setup-git` to re-register the credential helper.
- If `gh` or `git` fails with an authentication error (e.g. "authentication
-  required", "could not read Username", or exit code 128), call
-  `connect_integration(provider="github")` to surface the GitHub credentials
-  setup card so the user can connect their account. Once connected, retry
-  the operation.
- For operations that need broader access (e.g. private org repos, GitHub
-  Actions), pass the required scopes: e.g.
-  `connect_integration(provider="github", scopes=["repo", "read:org"])`.
-"""
-

 # Environment-specific supplement templates
 def _build_storage_supplement(
@@ -124,7 +105,6 @@ def _build_storage_supplement(
    storage_system_1_persistence: list[str],
    file_move_name_1_to_2: str,
    file_move_name_2_to_1: str,
-    extra_notes: str = "",
 ) -> str:
    """Build storage/filesystem supplement for a specific environment.

@@ -139,7 +119,6 @@ def _build_storage_supplement(
        storage_system_1_persistence: List of persistence behavior descriptions
        file_move_name_1_to_2: Direction label for primary→persistent
        file_move_name_2_to_1: Direction label for persistent→primary
-        extra_notes: Environment-specific notes appended after shared notes
    """
    # Format lists as bullet points with proper indentation
    characteristics = "\n".join(f"   - {c}" for c in storage_system_1_characteristics)
@@ -173,16 +152,12 @@ def _build_storage_supplement(

 ### File persistence
 Important files (code, configs, outputs) should be saved to workspace to ensure they persist.
-{_SHARED_TOOL_NOTES}{extra_notes}"""
+{_SHARED_TOOL_NOTES}"""


 # Pre-built supplements for common environments
 def _get_local_storage_supplement(cwd: str) -> str:
-    """Local ephemeral storage (files lost between turns).
-
-    Network is isolated (bubblewrap --unshare-net), so internet-dependent CLIs
-    like gh will not work — no integration env-var notes are included.
-    """
+    """Local ephemeral storage (files lost between turns)."""
    return _build_storage_supplement(
        working_dir=cwd,
        sandbox_type="in a network-isolated sandbox",
@@ -200,11 +175,7 @@ def _get_local_storage_supplement(cwd: str) -> str:


 def _get_cloud_sandbox_supplement() -> str:
-    """Cloud persistent sandbox (files survive across turns in session).
-
-    E2B has full internet access, so integration tokens (GH_TOKEN etc.) are
-    injected per command in bash_exec — include the CLI guidance notes.
-    """
+    """Cloud persistent sandbox (files survive across turns in session)."""
    return _build_storage_supplement(
        working_dir="/home/user",
        sandbox_type="in a cloud sandbox with full internet access",
@@ -219,7 +190,6 @@ def _get_cloud_sandbox_supplement() -> str:
        ],
        file_move_name_1_to_2="Sandbox → Persistent",
        file_move_name_2_to_1="Persistent → Sandbox",
-        extra_notes=_E2B_TOOL_NOTES,
    )


--- a/autogpt_platform/backend/backend/copilot/response_model.py
+++ b/autogpt_platform/backend/backend/copilot/response_model.py
@@ -43,6 +43,7 @@ class ResponseType(str, Enum):
    ERROR = "error"
    USAGE = "usage"
    HEARTBEAT = "heartbeat"
+    STATUS = "status"


 class StreamBaseResponse(BaseModel):
@@ -232,3 +233,26 @@ class StreamHeartbeat(StreamBaseResponse):
    def to_sse(self) -> str:
        """Convert to SSE comment format to keep connection alive."""
        return ": heartbeat\n\n"
+
+
+class StreamStatus(StreamBaseResponse):
+    """Transient status notification shown to the user during long operations.
+
+    Used to provide feedback when the backend performs behind-the-scenes work
+    (e.g., compacting conversation context on a retry) that would otherwise
+    leave the user staring at an unexplained pause.
+    """
+
+    type: ResponseType = ResponseType.STATUS
+    message: str = Field(..., description="Human-readable status message")
+
+    def to_sse(self) -> str:
+        """Encode as an SSE comment so the AI SDK stream parser ignores it.
+
+        The frontend AI SDK validates every ``data:`` line against a strict
+        Zod union of known chunk types.  ``"status"`` is not in that union,
+        so sending it as ``data:`` would cause a schema-validation error that
+        breaks the entire stream.  Using an SSE comment (``:``) keeps the
+        connection alive and is silently discarded by ``EventSource`` parsers.
+        """
+        return f": status {self.message}\n\n"
--- a/autogpt_platform/backend/backend/copilot/sdk/compaction.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/compaction.py
@@ -12,6 +12,7 @@ import asyncio
 import logging
 import uuid
 from dataclasses import dataclass, field
+from typing import Any

 from ..constants import COMPACTION_DONE_MSG, COMPACTION_TOOL_NAME
 from ..model import ChatMessage, ChatSession
@@ -119,14 +120,12 @@ def filter_compaction_messages(
    filtered: list[ChatMessage] = []
    for msg in messages:
        if msg.role == "assistant" and msg.tool_calls:
+            real_calls: list[dict[str, Any]] = []
            for tc in msg.tool_calls:
                if tc.get("function", {}).get("name") == COMPACTION_TOOL_NAME:
                    compaction_ids.add(tc.get("id", ""))
-            real_calls = [
-                tc
-                for tc in msg.tool_calls
-                if tc.get("function", {}).get("name") != COMPACTION_TOOL_NAME
-            ]
+                else:
+                    real_calls.append(tc)
            if not real_calls and not msg.content:
                continue
        if msg.role == "tool" and msg.tool_call_id in compaction_ids:
@@ -222,6 +221,7 @@ class CompactionTracker:

    def reset_for_query(self) -> None:
        """Reset per-query state before a new SDK query."""
+        self._compact_start.clear()
        self._done = False
        self._start_emitted = False
        self._tool_call_id = ""
--- a/autogpt_platform/backend/backend/copilot/sdk/conftest.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/conftest.py
@@ -0,0 +1,41 @@
+"""Shared test fixtures for copilot SDK tests."""
+
+from __future__ import annotations
+
+from uuid import uuid4
+
+from backend.util import json
+
+
+def build_test_transcript(pairs: list[tuple[str, str]]) -> str:
+    """Build a minimal valid JSONL transcript from (role, content) pairs.
+
+    Use this helper in any copilot SDK test that needs a well-formed
+    transcript without hitting the real storage layer.
+    """
+    lines: list[str] = []
+    last_uuid: str | None = None
+    for role, content in pairs:
+        uid = str(uuid4())
+        entry_type = "assistant" if role == "assistant" else "user"
+        msg: dict = {"role": role, "content": content}
+        if role == "assistant":
+            msg.update(
+                {
+                    "model": "",
+                    "id": f"msg_{uid[:8]}",
+                    "type": "message",
+                    "content": [{"type": "text", "text": content}],
+                    "stop_reason": "end_turn",
+                    "stop_sequence": None,
+                }
+            )
+        entry = {
+            "type": entry_type,
+            "uuid": uid,
+            "parentUuid": last_uuid,
+            "message": msg,
+        }
+        lines.append(json.dumps(entry, separators=(",", ":")))
+        last_uuid = uid
+    return "\n".join(lines) + "\n"
--- a/autogpt_platform/backend/backend/copilot/sdk/prompt_too_long_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/prompt_too_long_test.py
@@ -0,0 +1,552 @@
+"""Tests for retry logic and transcript compaction helpers."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, patch
+from uuid import uuid4
+
+import pytest
+
+from backend.util import json
+
+from .conftest import build_test_transcript as _build_transcript
+from .service import _is_prompt_too_long
+from .transcript import (
+    _flatten_assistant_content,
+    _flatten_tool_result_content,
+    _messages_to_transcript,
+    _transcript_to_messages,
+    compact_transcript,
+    validate_transcript,
+)
+
+# ---------------------------------------------------------------------------
+# _flatten_assistant_content
+# ---------------------------------------------------------------------------
+
+
+class TestFlattenAssistantContent:
+    def test_text_blocks(self):
+        blocks = [
+            {"type": "text", "text": "Hello"},
+            {"type": "text", "text": "World"},
+        ]
+        assert _flatten_assistant_content(blocks) == "Hello\nWorld"
+
+    def test_tool_use_blocks(self):
+        blocks = [{"type": "tool_use", "name": "read_file", "input": {}}]
+        assert _flatten_assistant_content(blocks) == "[tool_use: read_file]"
+
+    def test_mixed_blocks(self):
+        blocks = [
+            {"type": "text", "text": "Let me read that."},
+            {"type": "tool_use", "name": "Read", "input": {"path": "/foo"}},
+        ]
+        result = _flatten_assistant_content(blocks)
+        assert "Let me read that." in result
+        assert "[tool_use: Read]" in result
+
+    def test_raw_strings(self):
+        assert _flatten_assistant_content(["hello", "world"]) == "hello\nworld"
+
+    def test_unknown_block_type_preserved_as_placeholder(self):
+        blocks = [
+            {"type": "text", "text": "See this image:"},
+            {"type": "image", "source": {"type": "base64", "data": "..."}},
+        ]
+        result = _flatten_assistant_content(blocks)
+        assert "See this image:" in result
+        assert "[__image__]" in result
+
+    def test_empty(self):
+        assert _flatten_assistant_content([]) == ""
+
+
+# ---------------------------------------------------------------------------
+# _flatten_tool_result_content
+# ---------------------------------------------------------------------------
+
+
+class TestFlattenToolResultContent:
+    def test_tool_result_with_text(self):
+        blocks = [
+            {
+                "type": "tool_result",
+                "tool_use_id": "123",
+                "content": [{"type": "text", "text": "file contents here"}],
+            }
+        ]
+        assert _flatten_tool_result_content(blocks) == "file contents here"
+
+    def test_tool_result_with_string_content(self):
+        blocks = [{"type": "tool_result", "tool_use_id": "123", "content": "ok"}]
+        assert _flatten_tool_result_content(blocks) == "ok"
+
+    def test_text_block(self):
+        blocks = [{"type": "text", "text": "plain text"}]
+        assert _flatten_tool_result_content(blocks) == "plain text"
+
+    def test_raw_string(self):
+        assert _flatten_tool_result_content(["raw"]) == "raw"
+
+    def test_tool_result_with_none_content(self):
+        """tool_result with content=None should produce empty string."""
+        blocks = [{"type": "tool_result", "tool_use_id": "x", "content": None}]
+        assert _flatten_tool_result_content(blocks) == ""
+
+    def test_tool_result_with_empty_list_content(self):
+        """tool_result with content=[] should produce empty string."""
+        blocks = [{"type": "tool_result", "tool_use_id": "x", "content": []}]
+        assert _flatten_tool_result_content(blocks) == ""
+
+    def test_empty(self):
+        assert _flatten_tool_result_content([]) == ""
+
+    def test_nested_dict_without_text(self):
+        """Dict blocks without text key use json.dumps fallback."""
+        blocks = [
+            {
+                "type": "tool_result",
+                "tool_use_id": "x",
+                "content": [{"type": "image", "source": "data:..."}],
+            }
+        ]
+        result = _flatten_tool_result_content(blocks)
+        assert "image" in result  # json.dumps fallback
+
+    def test_unknown_block_type_preserved_as_placeholder(self):
+        blocks = [{"type": "image", "source": {"type": "base64", "data": "..."}}]
+        result = _flatten_tool_result_content(blocks)
+        assert "[__image__]" in result
+
+
+# ---------------------------------------------------------------------------
+# _transcript_to_messages
+# ---------------------------------------------------------------------------
+
+
+def _make_entry(entry_type: str, role: str, content: str | list, **kwargs) -> str:
+    """Build a JSONL line for testing."""
+    uid = str(uuid4())
+    msg: dict = {"role": role, "content": content}
+    msg.update(kwargs)
+    entry = {
+        "type": entry_type,
+        "uuid": uid,
+        "parentUuid": None,
+        "message": msg,
+    }
+    return json.dumps(entry, separators=(",", ":"))
+
+
+class TestTranscriptToMessages:
+    def test_basic_roundtrip(self):
+        lines = [
+            _make_entry("user", "user", "Hello"),
+            _make_entry("assistant", "assistant", [{"type": "text", "text": "Hi"}]),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 2
+        assert messages[0] == {"role": "user", "content": "Hello"}
+        assert messages[1] == {"role": "assistant", "content": "Hi"}
+
+    def test_skips_strippable_types(self):
+        """Progress and metadata entries are excluded."""
+        lines = [
+            _make_entry("user", "user", "Hello"),
+            json.dumps(
+                {
+                    "type": "progress",
+                    "uuid": str(uuid4()),
+                    "parentUuid": None,
+                    "message": {"role": "assistant", "content": "..."},
+                }
+            ),
+            _make_entry("assistant", "assistant", [{"type": "text", "text": "Hi"}]),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 2
+
+    def test_empty_content(self):
+        assert _transcript_to_messages("") == []
+
+    def test_tool_result_content(self):
+        """User entries with tool_result content blocks are flattened."""
+        lines = [
+            _make_entry(
+                "user",
+                "user",
+                [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "123",
+                        "content": "tool output",
+                    }
+                ],
+            ),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 1
+        assert messages[0]["content"] == "tool output"
+
+    def test_malformed_json_lines_skipped(self):
+        """Malformed JSON lines in transcript are silently skipped."""
+        lines = [
+            _make_entry("user", "user", "Hello"),
+            "this is not valid json",
+            _make_entry("assistant", "assistant", [{"type": "text", "text": "Hi"}]),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 2
+
+    def test_empty_lines_skipped(self):
+        """Empty lines and whitespace-only lines are skipped."""
+        lines = [
+            _make_entry("user", "user", "Hello"),
+            "",
+            "   ",
+            _make_entry("assistant", "assistant", [{"type": "text", "text": "Hi"}]),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 2
+
+    def test_unicode_content_preserved(self):
+        """Unicode characters survive transcript roundtrip."""
+        lines = [
+            _make_entry("user", "user", "Hello 你好 🌍"),
+            _make_entry(
+                "assistant",
+                "assistant",
+                [{"type": "text", "text": "Bonjour 日本語 émojis 🎉"}],
+            ),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert messages[0]["content"] == "Hello 你好 🌍"
+        assert messages[1]["content"] == "Bonjour 日本語 émojis 🎉"
+
+    def test_entry_without_role_skipped(self):
+        """Entries with missing role in message are skipped."""
+        entry_no_role = json.dumps(
+            {
+                "type": "user",
+                "uuid": str(uuid4()),
+                "parentUuid": None,
+                "message": {"content": "no role here"},
+            }
+        )
+        lines = [
+            entry_no_role,
+            _make_entry("user", "user", "Hello"),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 1
+        assert messages[0]["content"] == "Hello"
+
+    def test_tool_use_and_result_pairs(self):
+        """Tool use + tool result pairs are properly flattened."""
+        lines = [
+            _make_entry(
+                "assistant",
+                "assistant",
+                [
+                    {"type": "text", "text": "Let me check."},
+                    {"type": "tool_use", "name": "read_file", "input": {"path": "/x"}},
+                ],
+            ),
+            _make_entry(
+                "user",
+                "user",
+                [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "abc",
+                        "content": [{"type": "text", "text": "file contents"}],
+                    }
+                ],
+            ),
+        ]
+        content = "\n".join(lines) + "\n"
+        messages = _transcript_to_messages(content)
+        assert len(messages) == 2
+        assert "Let me check." in messages[0]["content"]
+        assert "[tool_use: read_file]" in messages[0]["content"]
+        assert messages[1]["content"] == "file contents"
+
+
+# ---------------------------------------------------------------------------
+# _messages_to_transcript
+# ---------------------------------------------------------------------------
+
+
+class TestMessagesToTranscript:
+    def test_produces_valid_jsonl(self):
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there"},
+        ]
+        result = _messages_to_transcript(messages)
+        lines = result.strip().split("\n")
+        assert len(lines) == 2
+        for line in lines:
+            parsed = json.loads(line)
+            assert "type" in parsed
+            assert "uuid" in parsed
+            assert "message" in parsed
+
+    def test_assistant_has_proper_structure(self):
+        messages = [{"role": "assistant", "content": "Hello"}]
+        result = _messages_to_transcript(messages)
+        entry = json.loads(result.strip())
+        assert entry["type"] == "assistant"
+        msg = entry["message"]
+        assert msg["role"] == "assistant"
+        assert msg["type"] == "message"
+        assert msg["stop_reason"] == "end_turn"
+        assert isinstance(msg["content"], list)
+        assert msg["content"][0]["type"] == "text"
+
+    def test_user_has_plain_content(self):
+        messages = [{"role": "user", "content": "Hi"}]
+        result = _messages_to_transcript(messages)
+        entry = json.loads(result.strip())
+        assert entry["type"] == "user"
+        assert entry["message"]["content"] == "Hi"
+
+    def test_parent_uuid_chain(self):
+        messages = [
+            {"role": "user", "content": "A"},
+            {"role": "assistant", "content": "B"},
+            {"role": "user", "content": "C"},
+        ]
+        result = _messages_to_transcript(messages)
+        lines = result.strip().split("\n")
+        entries = [json.loads(line) for line in lines]
+        assert entries[0]["parentUuid"] == ""
+        assert entries[1]["parentUuid"] == entries[0]["uuid"]
+        assert entries[2]["parentUuid"] == entries[1]["uuid"]
+
+    def test_empty_messages(self):
+        assert _messages_to_transcript([]) == ""
+
+    def test_output_is_valid_transcript(self):
+        """Output should pass validate_transcript if it has assistant entries."""
+        messages = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi"},
+        ]
+        result = _messages_to_transcript(messages)
+        assert validate_transcript(result)
+
+    def test_roundtrip_to_messages(self):
+        """Messages → transcript → messages preserves structure."""
+        original = [
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there"},
+            {"role": "user", "content": "How are you?"},
+        ]
+        transcript = _messages_to_transcript(original)
+        restored = _transcript_to_messages(transcript)
+        assert len(restored) == len(original)
+        for orig, rest in zip(original, restored):
+            assert orig["role"] == rest["role"]
+            assert orig["content"] == rest["content"]
+
+
+# ---------------------------------------------------------------------------
+# compact_transcript
+# ---------------------------------------------------------------------------
+
+
+class TestCompactTranscript:
+    @pytest.mark.asyncio
+    async def test_too_few_messages_returns_none(self):
+        """compact_transcript returns None when transcript has < 2 messages."""
+        transcript = _build_transcript([("user", "Hello")])
+        with patch(
+            "backend.copilot.config.ChatConfig",
+            return_value=type(
+                "Cfg", (), {"model": "m", "api_key": "k", "base_url": "u"}
+            )(),
+        ):
+            result = await compact_transcript(transcript, model="test-model")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_returns_none_when_not_compacted(self):
+        """When compress_context says no compaction needed, returns None.
+        The compressor couldn't reduce it, so retrying with the same
+        content would fail identically."""
+        transcript = _build_transcript(
+            [
+                ("user", "Hello"),
+                ("assistant", "Hi there"),
+            ]
+        )
+        mock_result = type(
+            "CompressResult",
+            (),
+            {
+                "was_compacted": False,
+                "messages": [],
+                "original_token_count": 100,
+                "token_count": 100,
+                "messages_summarized": 0,
+                "messages_dropped": 0,
+            },
+        )()
+        with (
+            patch(
+                "backend.copilot.config.ChatConfig",
+                return_value=type(
+                    "Cfg", (), {"model": "m", "api_key": "k", "base_url": "u"}
+                )(),
+            ),
+            patch(
+                "backend.copilot.sdk.transcript._run_compression",
+                new_callable=AsyncMock,
+                return_value=mock_result,
+            ),
+        ):
+            result = await compact_transcript(transcript, model="test-model")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_returns_compacted_transcript(self):
+        """When compaction succeeds, returns a valid compacted transcript."""
+        transcript = _build_transcript(
+            [
+                ("user", "Hello"),
+                ("assistant", "Hi"),
+                ("user", "More"),
+                ("assistant", "Details"),
+            ]
+        )
+        compacted_msgs = [
+            {"role": "user", "content": "[summary]"},
+            {"role": "assistant", "content": "Summarized response"},
+        ]
+        mock_result = type(
+            "CompressResult",
+            (),
+            {
+                "was_compacted": True,
+                "messages": compacted_msgs,
+                "original_token_count": 500,
+                "token_count": 100,
+                "messages_summarized": 2,
+                "messages_dropped": 0,
+            },
+        )()
+        with (
+            patch(
+                "backend.copilot.config.ChatConfig",
+                return_value=type(
+                    "Cfg", (), {"model": "m", "api_key": "k", "base_url": "u"}
+                )(),
+            ),
+            patch(
+                "backend.copilot.sdk.transcript._run_compression",
+                new_callable=AsyncMock,
+                return_value=mock_result,
+            ),
+        ):
+            result = await compact_transcript(transcript, model="test-model")
+        assert result is not None
+        assert validate_transcript(result)
+        msgs = _transcript_to_messages(result)
+        assert len(msgs) == 2
+        assert msgs[1]["content"] == "Summarized response"
+
+    @pytest.mark.asyncio
+    async def test_returns_none_on_compression_failure(self):
+        """When _run_compression raises, returns None."""
+        transcript = _build_transcript(
+            [
+                ("user", "Hello"),
+                ("assistant", "Hi"),
+            ]
+        )
+        with (
+            patch(
+                "backend.copilot.config.ChatConfig",
+                return_value=type(
+                    "Cfg", (), {"model": "m", "api_key": "k", "base_url": "u"}
+                )(),
+            ),
+            patch(
+                "backend.copilot.sdk.transcript._run_compression",
+                new_callable=AsyncMock,
+                side_effect=RuntimeError("LLM unavailable"),
+            ),
+        ):
+            result = await compact_transcript(transcript, model="test-model")
+        assert result is None
+
+
+# ---------------------------------------------------------------------------
+# _is_prompt_too_long
+# ---------------------------------------------------------------------------
+
+
+class TestIsPromptTooLong:
+    """Unit tests for _is_prompt_too_long pattern matching."""
+
+    def test_prompt_is_too_long(self):
+        err = RuntimeError("prompt is too long for model context")
+        assert _is_prompt_too_long(err) is True
+
+    def test_request_too_large(self):
+        err = Exception("request too large: 250000 tokens")
+        assert _is_prompt_too_long(err) is True
+
+    def test_maximum_context_length(self):
+        err = ValueError("maximum context length exceeded")
+        assert _is_prompt_too_long(err) is True
+
+    def test_context_length_exceeded(self):
+        err = Exception("context_length_exceeded")
+        assert _is_prompt_too_long(err) is True
+
+    def test_input_tokens_exceed(self):
+        err = Exception("input tokens exceed the max_tokens limit")
+        assert _is_prompt_too_long(err) is True
+
+    def test_input_is_too_long(self):
+        err = Exception("input is too long for the model")
+        assert _is_prompt_too_long(err) is True
+
+    def test_content_length_exceeds(self):
+        err = Exception("content length exceeds maximum")
+        assert _is_prompt_too_long(err) is True
+
+    def test_unrelated_error_returns_false(self):
+        err = RuntimeError("network timeout")
+        assert _is_prompt_too_long(err) is False
+
+    def test_auth_error_returns_false(self):
+        err = Exception("authentication failed: invalid API key")
+        assert _is_prompt_too_long(err) is False
+
+    def test_chained_exception_detected(self):
+        """Prompt-too-long error wrapped in another exception is detected."""
+        inner = RuntimeError("prompt is too long")
+        outer = Exception("SDK error")
+        outer.__cause__ = inner
+        assert _is_prompt_too_long(outer) is True
+
+    def test_case_insensitive(self):
+        err = Exception("PROMPT IS TOO LONG")
+        assert _is_prompt_too_long(err) is True
+
+    def test_old_max_tokens_exceeded_not_matched(self):
+        """The old broad 'max_tokens_exceeded' pattern was removed.
+        Only 'input tokens exceed' should match now."""
+        err = Exception("max_tokens_exceeded")
+        assert _is_prompt_too_long(err) is False
--- a/autogpt_platform/backend/backend/copilot/sdk/response_adapter.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/response_adapter.py
@@ -226,7 +226,7 @@ class SDKResponseAdapter:
                responses.append(StreamFinish())

        else:
-            logger.debug(f"Unhandled SDK message type: {type(sdk_message).__name__}")
+            logger.debug("Unhandled SDK message type: %s", type(sdk_message).__name__)

        return responses

--- a/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/retry_scenarios_test.py
--- a/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/security_hooks.py
@@ -52,7 +52,7 @@ def _validate_workspace_path(
    if is_allowed_local_path(path, sdk_cwd):
        return {}

-    logger.warning(f"Blocked {tool_name} outside workspace: {path}")
+    logger.warning("Blocked %s outside workspace: %s", tool_name, path)
    workspace_hint = f" Allowed workspace: {sdk_cwd}" if sdk_cwd else ""
    return _deny(
        f"[SECURITY] Tool '{tool_name}' can only access files within the workspace "
@@ -71,7 +71,7 @@ def _validate_tool_access(
    """
    # Block forbidden tools
    if tool_name in BLOCKED_TOOLS:
-        logger.warning(f"Blocked tool access attempt: {tool_name}")
+        logger.warning("Blocked tool access attempt: %s", tool_name)
        return _deny(
            f"[SECURITY] Tool '{tool_name}' is blocked for security. "
            "This is enforced by the platform and cannot be bypassed. "
@@ -89,7 +89,9 @@ def _validate_tool_access(
    for pattern in DANGEROUS_PATTERNS:
        if re.search(pattern, input_str, re.IGNORECASE):
            logger.warning(
-                f"Blocked dangerous pattern in tool input: {pattern} in {tool_name}"
+                "Blocked dangerous pattern in tool input: %s in %s",
+                pattern,
+                tool_name,
            )
            return _deny(
                "[SECURITY] Input contains a blocked pattern. "
@@ -111,7 +113,9 @@ def _validate_user_isolation(
        # the tool itself via _validate_ephemeral_path.
        path = tool_input.get("path", "") or tool_input.get("file_path", "")
        if path and ".." in path:
-            logger.warning(f"Blocked path traversal attempt: {path} by user {user_id}")
+            logger.warning(
+                "Blocked path traversal attempt: %s by user %s", path, user_id
+            )
            return {
                "hookSpecificOutput": {
                    "hookEventName": "PreToolUse",
@@ -170,7 +174,7 @@ def create_security_hooks(
                # Block background task execution first — denied calls
                # should not consume a subtask slot.
                if tool_input.get("run_in_background"):
-                    logger.info(f"[SDK] Blocked background Task, user={user_id}")
+                    logger.info("[SDK] Blocked background Task, user=%s", user_id)
                    return cast(
                        SyncHookJSONOutput,
                        _deny(
@@ -181,7 +185,9 @@ def create_security_hooks(
                    )
                if len(task_tool_use_ids) >= max_subtasks:
                    logger.warning(
-                        f"[SDK] Task limit reached ({max_subtasks}), user={user_id}"
+                        "[SDK] Task limit reached (%d), user=%s",
+                        max_subtasks,
+                        user_id,
                    )
                    return cast(
                        SyncHookJSONOutput,
@@ -212,7 +218,7 @@ def create_security_hooks(
            if tool_name == "Task" and tool_use_id is not None:
                task_tool_use_ids.add(tool_use_id)

-            logger.debug(f"[SDK] Tool start: {tool_name}, user={user_id}")
+            logger.debug("[SDK] Tool start: %s, user=%s", tool_name, user_id)
            return cast(SyncHookJSONOutput, {})

        def _release_task_slot(tool_name: str, tool_use_id: str | None) -> None:
@@ -282,8 +288,11 @@ def create_security_hooks(
            tool_name = cast(str, input_data.get("tool_name", ""))
            error = input_data.get("error", "Unknown error")
            logger.warning(
-                f"[SDK] Tool failed: {tool_name}, error={error}, "
-                f"user={user_id}, tool_use_id={tool_use_id}"
+                "[SDK] Tool failed: %s, error=%s, user=%s, tool_use_id=%s",
+                tool_name,
+                str(error).replace("\n", "").replace("\r", ""),
+                user_id,
+                tool_use_id,
            )

            _release_task_slot(tool_name, tool_use_id)
@@ -301,16 +310,19 @@ def create_security_hooks(
            This hook provides visibility into when compaction happens.
            """
            _ = context, tool_use_id
-            trigger = input_data.get("trigger", "auto")
            # Sanitize untrusted input before logging to prevent log injection
+            trigger = (
+                str(input_data.get("trigger", "auto"))
+                .replace("\n", "")
+                .replace("\r", "")
+            )
            transcript_path = (
                str(input_data.get("transcript_path", ""))
                .replace("\n", "")
                .replace("\r", "")
            )
            logger.info(
-                "[SDK] Context compaction triggered: %s, user=%s, "
-                "transcript_path=%s",
+                "[SDK] Context compaction triggered: %s, user=%s, transcript_path=%s",
                trigger,
                user_id,
                transcript_path,
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
--- a/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service_helpers_test.py
@@ -0,0 +1,283 @@
+"""Unit tests for extracted service helpers.
+
+Covers ``_is_prompt_too_long``, ``_reduce_context``, ``_iter_sdk_messages``,
+and the ``ReducedContext`` named tuple.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import AsyncGenerator
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from .conftest import build_test_transcript as _build_transcript
+from .service import (
+    ReducedContext,
+    _is_prompt_too_long,
+    _iter_sdk_messages,
+    _reduce_context,
+)
+
+# ---------------------------------------------------------------------------
+# _is_prompt_too_long
+# ---------------------------------------------------------------------------
+
+
+class TestIsPromptTooLong:
+    def test_direct_match(self) -> None:
+        assert _is_prompt_too_long(Exception("prompt is too long")) is True
+
+    def test_case_insensitive(self) -> None:
+        assert _is_prompt_too_long(Exception("PROMPT IS TOO LONG")) is True
+
+    def test_no_match(self) -> None:
+        assert _is_prompt_too_long(Exception("network timeout")) is False
+
+    def test_request_too_large(self) -> None:
+        assert _is_prompt_too_long(Exception("request too large for model")) is True
+
+    def test_context_length_exceeded(self) -> None:
+        assert _is_prompt_too_long(Exception("context_length_exceeded")) is True
+
+    def test_max_tokens_exceeded_not_matched(self) -> None:
+        """'max_tokens_exceeded' is intentionally excluded (too broad)."""
+        assert _is_prompt_too_long(Exception("max_tokens_exceeded")) is False
+
+    def test_max_tokens_config_error_no_match(self) -> None:
+        """'max_tokens must be at least 1' should NOT match."""
+        assert _is_prompt_too_long(Exception("max_tokens must be at least 1")) is False
+
+    def test_chained_cause(self) -> None:
+        inner = Exception("prompt is too long")
+        outer = RuntimeError("SDK error")
+        outer.__cause__ = inner
+        assert _is_prompt_too_long(outer) is True
+
+    def test_chained_context(self) -> None:
+        inner = Exception("request too large")
+        outer = RuntimeError("wrapped")
+        outer.__context__ = inner
+        assert _is_prompt_too_long(outer) is True
+
+    def test_deep_chain(self) -> None:
+        bottom = Exception("maximum context length")
+        middle = RuntimeError("middle")
+        middle.__cause__ = bottom
+        top = ValueError("top")
+        top.__cause__ = middle
+        assert _is_prompt_too_long(top) is True
+
+    def test_chain_no_match(self) -> None:
+        inner = Exception("rate limit exceeded")
+        outer = RuntimeError("wrapped")
+        outer.__cause__ = inner
+        assert _is_prompt_too_long(outer) is False
+
+    def test_cycle_detection(self) -> None:
+        """Exception chain with a cycle should not infinite-loop."""
+        a = Exception("error a")
+        b = Exception("error b")
+        a.__cause__ = b
+        b.__cause__ = a  # cycle
+        assert _is_prompt_too_long(a) is False
+
+    def test_all_patterns(self) -> None:
+        patterns = [
+            "prompt is too long",
+            "request too large",
+            "maximum context length",
+            "context_length_exceeded",
+            "input tokens exceed",
+            "input is too long",
+            "content length exceeds",
+        ]
+        for pattern in patterns:
+            assert _is_prompt_too_long(Exception(pattern)) is True, pattern
+
+
+# ---------------------------------------------------------------------------
+# _reduce_context
+# ---------------------------------------------------------------------------
+
+
+class TestReduceContext:
+    @pytest.mark.asyncio
+    async def test_first_retry_compaction_success(self) -> None:
+        transcript = _build_transcript([("user", "hi"), ("assistant", "hello")])
+        compacted = _build_transcript([("user", "hi"), ("assistant", "[summary]")])
+
+        with (
+            patch(
+                "backend.copilot.sdk.service.compact_transcript",
+                new_callable=AsyncMock,
+                return_value=compacted,
+            ),
+            patch(
+                "backend.copilot.sdk.service.validate_transcript",
+                return_value=True,
+            ),
+            patch(
+                "backend.copilot.sdk.service.write_transcript_to_tempfile",
+                return_value="/tmp/resume.jsonl",
+            ),
+        ):
+            ctx = await _reduce_context(
+                transcript, False, "sess-123", "/tmp/cwd", "[test]"
+            )
+
+        assert isinstance(ctx, ReducedContext)
+        assert ctx.use_resume is True
+        assert ctx.resume_file == "/tmp/resume.jsonl"
+        assert ctx.transcript_lost is False
+        assert ctx.tried_compaction is True
+
+    @pytest.mark.asyncio
+    async def test_compaction_fails_drops_transcript(self) -> None:
+        transcript = _build_transcript([("user", "hi"), ("assistant", "hello")])
+
+        with patch(
+            "backend.copilot.sdk.service.compact_transcript",
+            new_callable=AsyncMock,
+            return_value=None,
+        ):
+            ctx = await _reduce_context(
+                transcript, False, "sess-123", "/tmp/cwd", "[test]"
+            )
+
+        assert ctx.use_resume is False
+        assert ctx.resume_file is None
+        assert ctx.transcript_lost is True
+        assert ctx.tried_compaction is True
+
+    @pytest.mark.asyncio
+    async def test_already_tried_compaction_skips(self) -> None:
+        transcript = _build_transcript([("user", "hi"), ("assistant", "hello")])
+
+        ctx = await _reduce_context(transcript, True, "sess-123", "/tmp/cwd", "[test]")
+
+        assert ctx.use_resume is False
+        assert ctx.transcript_lost is True
+        assert ctx.tried_compaction is True
+
+    @pytest.mark.asyncio
+    async def test_empty_transcript_drops(self) -> None:
+        ctx = await _reduce_context("", False, "sess-123", "/tmp/cwd", "[test]")
+
+        assert ctx.use_resume is False
+        assert ctx.transcript_lost is True
+
+    @pytest.mark.asyncio
+    async def test_compaction_returns_same_content_drops(self) -> None:
+        transcript = _build_transcript([("user", "hi"), ("assistant", "hello")])
+
+        with patch(
+            "backend.copilot.sdk.service.compact_transcript",
+            new_callable=AsyncMock,
+            return_value=transcript,  # same content
+        ):
+            ctx = await _reduce_context(
+                transcript, False, "sess-123", "/tmp/cwd", "[test]"
+            )
+
+        assert ctx.transcript_lost is True
+
+    @pytest.mark.asyncio
+    async def test_write_tempfile_fails_drops(self) -> None:
+        transcript = _build_transcript([("user", "hi"), ("assistant", "hello")])
+        compacted = _build_transcript([("user", "hi"), ("assistant", "[summary]")])
+
+        with (
+            patch(
+                "backend.copilot.sdk.service.compact_transcript",
+                new_callable=AsyncMock,
+                return_value=compacted,
+            ),
+            patch(
+                "backend.copilot.sdk.service.validate_transcript",
+                return_value=True,
+            ),
+            patch(
+                "backend.copilot.sdk.service.write_transcript_to_tempfile",
+                return_value=None,
+            ),
+        ):
+            ctx = await _reduce_context(
+                transcript, False, "sess-123", "/tmp/cwd", "[test]"
+            )
+
+        assert ctx.transcript_lost is True
+
+
+# ---------------------------------------------------------------------------
+# _iter_sdk_messages
+# ---------------------------------------------------------------------------
+
+
+class TestIterSdkMessages:
+    @pytest.mark.asyncio
+    async def test_yields_messages(self) -> None:
+        messages = ["msg1", "msg2", "msg3"]
+        client = AsyncMock()
+
+        async def _fake_receive() -> AsyncGenerator[str]:
+            for m in messages:
+                yield m
+
+        client.receive_response = _fake_receive
+        result = [msg async for msg in _iter_sdk_messages(client)]
+        assert result == messages
+
+    @pytest.mark.asyncio
+    async def test_heartbeat_on_timeout(self) -> None:
+        """Yields None when asyncio.wait times out."""
+        client = AsyncMock()
+        received: list = []
+
+        async def _slow_receive() -> AsyncGenerator[str]:
+            await asyncio.sleep(100)  # never completes
+            yield "never"  # pragma: no cover — unreachable, yield makes this an async generator
+
+        client.receive_response = _slow_receive
+
+        with patch("backend.copilot.sdk.service._HEARTBEAT_INTERVAL", 0.01):
+            count = 0
+            async for msg in _iter_sdk_messages(client):
+                received.append(msg)
+                count += 1
+                if count >= 3:
+                    break
+
+        assert all(m is None for m in received)
+
+    @pytest.mark.asyncio
+    async def test_exception_propagates(self) -> None:
+        client = AsyncMock()
+
+        async def _error_receive() -> AsyncGenerator[str]:
+            raise RuntimeError("SDK crash")
+            yield  # pragma: no cover — unreachable, yield makes this an async generator
+
+        client.receive_response = _error_receive
+
+        with pytest.raises(RuntimeError, match="SDK crash"):
+            async for _ in _iter_sdk_messages(client):
+                pass
+
+    @pytest.mark.asyncio
+    async def test_task_cleanup_on_break(self) -> None:
+        """Pending task is cancelled when generator is closed."""
+        client = AsyncMock()
+
+        async def _slow_receive() -> AsyncGenerator[str]:
+            yield "first"
+            await asyncio.sleep(100)
+            yield "second"
+
+        client.receive_response = _slow_receive
+
+        gen = _iter_sdk_messages(client)
+        first = await gen.__anext__()
+        assert first == "first"
+        await gen.aclose()  # should cancel pending task cleanly
--- a/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/tool_adapter.py
@@ -234,7 +234,9 @@ def create_tool_handler(base_tool: BaseTool):
        try:
            return await _execute_tool_sync(base_tool, user_id, session, args)
        except Exception as e:
-            logger.error(f"Error executing tool {base_tool.name}: {e}", exc_info=True)
+            logger.error(
+                "Error executing tool %s: %s", base_tool.name, e, exc_info=True
+            )
            return _mcp_error(f"Failed to execute {base_tool.name}: {e}")

    return tool_handler
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript.py
@@ -10,6 +10,9 @@ Storage is handled via ``WorkspaceStorageBackend`` (GCS in prod, local
 filesystem for self-hosted) — no DB column needed.
 """

+from __future__ import annotations
+
+import asyncio
 import logging
 import os
 import re
@@ -17,8 +20,12 @@ import shutil
 import time
 from dataclasses import dataclass
 from pathlib import Path
+from uuid import uuid4

 from backend.util import json
+from backend.util.clients import get_openai_client
+from backend.util.prompt import CompressResult, compress_context
+from backend.util.workspace_storage import GCSWorkspaceStorage, get_workspace_storage

 logger = logging.getLogger(__name__)

@@ -99,7 +106,14 @@ def strip_progress_entries(content: str) -> str:
            continue
        parent = entry.get("parentUuid", "")
        original_parent = parent
-        while parent in stripped_uuids:
+        # seen_parents is local per-entry (not shared across iterations) so
+        # it can only detect cycles within a single ancestry walk, not across
+        # entries.  This is intentional: each entry's parent chain is
+        # independent, and reusing a global set would incorrectly short-circuit
+        # valid re-use of the same UUID as a parent in different subtrees.
+        seen_parents: set[str] = set()
+        while parent in stripped_uuids and parent not in seen_parents:
+            seen_parents.add(parent)
            parent = uuid_to_parent.get(parent, "")
        if parent != original_parent:
            entry["parentUuid"] = parent
@@ -327,7 +341,7 @@ def write_transcript_to_tempfile(
    # Validate cwd is under the expected sandbox prefix (CodeQL sanitizer).
    real_cwd = os.path.realpath(cwd)
    if not real_cwd.startswith(_SAFE_CWD_PREFIX):
-        logger.warning(f"[Transcript] cwd outside sandbox: {cwd}")
+        logger.warning("[Transcript] cwd outside sandbox: %s", cwd)
        return None

    try:
@@ -337,17 +351,17 @@ def write_transcript_to_tempfile(
            os.path.join(real_cwd, f"transcript-{safe_id}.jsonl")
        )
        if not jsonl_path.startswith(real_cwd):
-            logger.warning(f"[Transcript] Path escaped cwd: {jsonl_path}")
+            logger.warning("[Transcript] Path escaped cwd: %s", jsonl_path)
            return None

        with open(jsonl_path, "w") as f:
            f.write(transcript_content)

-        logger.info(f"[Transcript] Wrote resume file: {jsonl_path}")
+        logger.info("[Transcript] Wrote resume file: %s", jsonl_path)
        return jsonl_path

    except OSError as e:
-        logger.warning(f"[Transcript] Failed to write resume file: {e}")
+        logger.warning("[Transcript] Failed to write resume file: %s", e)
        return None


@@ -408,8 +422,6 @@ def _meta_storage_path_parts(user_id: str, session_id: str) -> tuple[str, str, s

 def _build_path_from_parts(parts: tuple[str, str, str], backend: object) -> str:
    """Build a full storage path from (workspace_id, file_id, filename) parts."""
-    from backend.util.workspace_storage import GCSWorkspaceStorage
-
    wid, fid, fname = parts
    if isinstance(backend, GCSWorkspaceStorage):
        blob = f"workspaces/{wid}/{fid}/{fname}"
@@ -448,17 +460,15 @@ async def upload_transcript(
        content: Complete JSONL transcript (from TranscriptBuilder).
        message_count: ``len(session.messages)`` at upload time.
    """
-    from backend.util.workspace_storage import get_workspace_storage
-
    # Strip metadata entries (progress, file-history-snapshot, etc.)
    # Note: SDK-built transcripts shouldn't have these, but strip for safety
    stripped = strip_progress_entries(content)
    if not validate_transcript(stripped):
        # Log entry types for debugging — helps identify why validation failed
-        entry_types: list[str] = []
-        for line in stripped.strip().split("\n"):
-            entry = json.loads(line, fallback={"type": "INVALID_JSON"})
-            entry_types.append(entry.get("type", "?"))
+        entry_types = [
+            json.loads(line, fallback={"type": "INVALID_JSON"}).get("type", "?")
+            for line in stripped.strip().split("\n")
+        ]
        logger.warning(
            "%s Skipping upload — stripped content not valid "
            "(types=%s, stripped_len=%d, raw_len=%d)",
@@ -494,11 +504,14 @@ async def upload_transcript(
            content=json.dumps(meta).encode("utf-8"),
        )
    except Exception as e:
-        logger.warning(f"{log_prefix} Failed to write metadata: {e}")
+        logger.warning("%s Failed to write metadata: %s", log_prefix, e)

    logger.info(
-        f"{log_prefix} Uploaded {len(encoded)}B "
-        f"(stripped from {len(content)}B, msg_count={message_count})"
+        "%s Uploaded %dB (stripped from %dB, msg_count=%d)",
+        log_prefix,
+        len(encoded),
+        len(content),
+        message_count,
    )


@@ -512,8 +525,6 @@ async def download_transcript(
    Returns a ``TranscriptDownload`` with the JSONL content and the
    ``message_count`` watermark from the upload, or ``None`` if not found.
    """
-    from backend.util.workspace_storage import get_workspace_storage
-
    storage = await get_workspace_storage()
    path = _build_storage_path(user_id, session_id, storage)

@@ -521,10 +532,10 @@ async def download_transcript(
        data = await storage.retrieve(path)
        content = data.decode("utf-8")
    except FileNotFoundError:
-        logger.debug(f"{log_prefix} No transcript in storage")
+        logger.debug("%s No transcript in storage", log_prefix)
        return None
    except Exception as e:
-        logger.warning(f"{log_prefix} Failed to download transcript: {e}")
+        logger.warning("%s Failed to download transcript: %s", log_prefix, e)
        return None

    # Try to load metadata (best-effort — old transcripts won't have it)
@@ -536,10 +547,14 @@ async def download_transcript(
        meta = json.loads(meta_data.decode("utf-8"), fallback={})
        message_count = meta.get("message_count", 0)
        uploaded_at = meta.get("uploaded_at", 0.0)
-    except (FileNotFoundError, Exception):
+    except FileNotFoundError:
        pass  # No metadata — treat as unknown (msg_count=0 → always fill gap)
+    except Exception as e:
+        logger.debug("%s Failed to load transcript metadata: %s", log_prefix, e)

-    logger.info(f"{log_prefix} Downloaded {len(content)}B (msg_count={message_count})")
+    logger.info(
+        "%s Downloaded %dB (msg_count=%d)", log_prefix, len(content), message_count
+    )
    return TranscriptDownload(
        content=content,
        message_count=message_count,
@@ -553,8 +568,6 @@ async def delete_transcript(user_id: str, session_id: str) -> None:
    Removes both the ``.jsonl`` transcript and the companion ``.meta.json``
    so stale ``message_count`` watermarks cannot corrupt gap-fill logic.
    """
-    from backend.util.workspace_storage import get_workspace_storage
-
    storage = await get_workspace_storage()
    path = _build_storage_path(user_id, session_id, storage)

@@ -571,3 +584,280 @@ async def delete_transcript(user_id: str, session_id: str) -> None:
        logger.info("[Transcript] Deleted metadata for session %s", session_id)
    except Exception as e:
        logger.warning("[Transcript] Failed to delete metadata: %s", e)
+
+
+# ---------------------------------------------------------------------------
+# Transcript compaction — LLM summarization for prompt-too-long recovery
+# ---------------------------------------------------------------------------
+
+# JSONL protocol values used in transcript serialization.
+STOP_REASON_END_TURN = "end_turn"
+COMPACT_MSG_ID_PREFIX = "msg_compact_"
+ENTRY_TYPE_MESSAGE = "message"
+
+
+def _flatten_assistant_content(blocks: list) -> str:
+    """Flatten assistant content blocks into a single plain-text string.
+
+    Structured ``tool_use`` blocks are converted to ``[tool_use: name]``
+    placeholders.  This is intentional: ``compress_context`` requires plain
+    text for token counting and LLM summarization.  The structural loss is
+    acceptable because compaction only runs when the original transcript was
+    already too large for the model — a summarized plain-text version is
+    better than no context at all.
+    """
+    parts: list[str] = []
+    for block in blocks:
+        if isinstance(block, dict):
+            btype = block.get("type", "")
+            if btype == "text":
+                parts.append(block.get("text", ""))
+            elif btype == "tool_use":
+                parts.append(f"[tool_use: {block.get('name', '?')}]")
+            else:
+                # Preserve non-text blocks (e.g. image) as placeholders.
+                # Use __prefix__ to distinguish from literal user text.
+                parts.append(f"[__{btype}__]")
+        elif isinstance(block, str):
+            parts.append(block)
+    return "\n".join(parts) if parts else ""
+
+
+def _flatten_tool_result_content(blocks: list) -> str:
+    """Flatten tool_result and other content blocks into plain text.
+
+    Handles nested tool_result structures, text blocks, and raw strings.
+    Uses ``json.dumps`` as fallback for dict blocks without a ``text`` key
+    or where ``text`` is ``None``.
+
+    Like ``_flatten_assistant_content``, structured blocks (images, nested
+    tool results) are reduced to text representations for compression.
+    """
+    str_parts: list[str] = []
+    for block in blocks:
+        if isinstance(block, dict) and block.get("type") == "tool_result":
+            inner = block.get("content") or ""
+            if isinstance(inner, list):
+                for sub in inner:
+                    if isinstance(sub, dict):
+                        sub_type = sub.get("type")
+                        if sub_type in ("image", "document"):
+                            # Avoid serializing base64 binary data into
+                            # the compaction input — use a placeholder.
+                            str_parts.append(f"[__{sub_type}__]")
+                        elif sub_type == "text" or sub.get("text") is not None:
+                            str_parts.append(str(sub.get("text", "")))
+                        else:
+                            str_parts.append(json.dumps(sub))
+                    else:
+                        str_parts.append(str(sub))
+            else:
+                str_parts.append(str(inner))
+        elif isinstance(block, dict) and block.get("type") == "text":
+            str_parts.append(str(block.get("text", "")))
+        elif isinstance(block, dict):
+            # Preserve non-text/non-tool_result blocks (e.g. image) as placeholders.
+            # Use __prefix__ to distinguish from literal user text.
+            btype = block.get("type", "unknown")
+            str_parts.append(f"[__{btype}__]")
+        elif isinstance(block, str):
+            str_parts.append(block)
+    return "\n".join(str_parts) if str_parts else ""
+
+
+def _transcript_to_messages(content: str) -> list[dict]:
+    """Convert JSONL transcript entries to plain message dicts for compression.
+
+    Parses each line of the JSONL *content*, skips strippable metadata entries
+    (progress, file-history-snapshot, etc.), and extracts the ``role`` and
+    flattened ``content`` from the ``message`` field of each remaining entry.
+
+    Structured content blocks (``tool_use``, ``tool_result``, images) are
+    flattened to plain text via ``_flatten_assistant_content`` and
+    ``_flatten_tool_result_content`` so that ``compress_context`` can
+    perform token counting and LLM summarization on uniform strings.
+
+    Returns:
+        A list of ``{"role": str, "content": str}`` dicts suitable for
+        ``compress_context``.
+    """
+    messages: list[dict] = []
+    for line in content.strip().split("\n"):
+        if not line.strip():
+            continue
+        entry = json.loads(line, fallback=None)
+        if not isinstance(entry, dict):
+            continue
+        if entry.get("type", "") in STRIPPABLE_TYPES and not entry.get(
+            "isCompactSummary"
+        ):
+            continue
+        msg = entry.get("message", {})
+        role = msg.get("role", "")
+        if not role:
+            continue
+        msg_dict: dict = {"role": role}
+        raw_content = msg.get("content")
+        if role == "assistant" and isinstance(raw_content, list):
+            msg_dict["content"] = _flatten_assistant_content(raw_content)
+        elif isinstance(raw_content, list):
+            msg_dict["content"] = _flatten_tool_result_content(raw_content)
+        else:
+            msg_dict["content"] = raw_content or ""
+        messages.append(msg_dict)
+    return messages
+
+
+def _messages_to_transcript(messages: list[dict]) -> str:
+    """Convert compressed message dicts back to JSONL transcript format.
+
+    Rebuilds a minimal JSONL transcript from the ``{"role", "content"}``
+    dicts returned by ``compress_context``.  Each message becomes one JSONL
+    line with a fresh ``uuid`` / ``parentUuid`` chain so the CLI's
+    ``--resume`` flag can reconstruct a valid conversation tree.
+
+    Assistant messages are wrapped in the full ``message`` envelope
+    (``id``, ``model``, ``stop_reason``, structured ``content`` blocks)
+    that the CLI expects.  User messages use the simpler ``{role, content}``
+    form.
+
+    Returns:
+        A newline-terminated JSONL string, or an empty string if *messages*
+        is empty.
+    """
+    lines: list[str] = []
+    last_uuid: str = ""  # root entry uses empty string, not null
+    for msg in messages:
+        role = msg.get("role", "user")
+        entry_type = "assistant" if role == "assistant" else "user"
+        uid = str(uuid4())
+        content = msg.get("content", "")
+        if role == "assistant":
+            message: dict = {
+                "role": "assistant",
+                "model": "",
+                "id": f"{COMPACT_MSG_ID_PREFIX}{uuid4().hex[:24]}",
+                "type": ENTRY_TYPE_MESSAGE,
+                "content": [{"type": "text", "text": content}] if content else [],
+                "stop_reason": STOP_REASON_END_TURN,
+                "stop_sequence": None,
+            }
+        else:
+            message = {"role": role, "content": content}
+        entry = {
+            "type": entry_type,
+            "uuid": uid,
+            "parentUuid": last_uuid,
+            "message": message,
+        }
+        lines.append(json.dumps(entry, separators=(",", ":")))
+        last_uuid = uid
+    return "\n".join(lines) + "\n" if lines else ""
+
+
+_COMPACTION_TIMEOUT_SECONDS = 60
+_TRUNCATION_TIMEOUT_SECONDS = 30
+
+
+async def _run_compression(
+    messages: list[dict],
+    model: str,
+    log_prefix: str,
+) -> CompressResult:
+    """Run LLM-based compression with truncation fallback.
+
+    Uses the shared OpenAI client from ``get_openai_client()``.
+    If no client is configured or the LLM call fails, falls back to
+    truncation-based compression which drops older messages without
+    summarization.
+
+    A 60-second timeout prevents a hung LLM call from blocking the
+    retry path indefinitely.  The truncation fallback also has a
+    30-second timeout to guard against slow tokenization on very large
+    transcripts.
+    """
+    client = get_openai_client()
+    if client is None:
+        logger.warning("%s No OpenAI client configured, using truncation", log_prefix)
+        return await asyncio.wait_for(
+            compress_context(messages=messages, model=model, client=None),
+            timeout=_TRUNCATION_TIMEOUT_SECONDS,
+        )
+    try:
+        return await asyncio.wait_for(
+            compress_context(messages=messages, model=model, client=client),
+            timeout=_COMPACTION_TIMEOUT_SECONDS,
+        )
+    except Exception as e:
+        logger.warning("%s LLM compaction failed, using truncation: %s", log_prefix, e)
+        return await asyncio.wait_for(
+            compress_context(messages=messages, model=model, client=None),
+            timeout=_TRUNCATION_TIMEOUT_SECONDS,
+        )
+
+
+async def compact_transcript(
+    content: str,
+    *,
+    model: str,
+    log_prefix: str = "[Transcript]",
+) -> str | None:
+    """Compact an oversized JSONL transcript using LLM summarization.
+
+    Converts transcript entries to plain messages, runs ``compress_context``
+    (the same compressor used for pre-query history), and rebuilds JSONL.
+
+    Structured content (``tool_use`` blocks, ``tool_result`` nesting, images)
+    is flattened to plain text for compression.  This matches the fidelity of
+    the Plan C (DB compression) fallback path, where
+    ``_format_conversation_context`` similarly renders tool calls as
+    ``You called tool: name(args)`` and results as ``Tool result: ...``.
+    Neither path preserves structured API content blocks — the compacted
+    context serves as text history for the LLM, which creates proper
+    structured tool calls going forward.
+
+    Images are per-turn attachments loaded from workspace storage by file ID
+    (via ``_prepare_file_attachments``), not part of the conversation history.
+    They are re-attached each turn and are unaffected by compaction.
+
+    Returns the compacted JSONL string, or ``None`` on failure.
+
+    See also:
+        ``_compress_messages`` in ``service.py`` — compresses ``ChatMessage``
+        lists for pre-query DB history.  Both share ``compress_context()``
+        but operate on different input formats (JSONL transcript entries
+        here vs. ChatMessage dicts there).
+    """
+    messages = _transcript_to_messages(content)
+    if len(messages) < 2:
+        logger.warning("%s Too few messages to compact (%d)", log_prefix, len(messages))
+        return None
+    try:
+        result = await _run_compression(messages, model, log_prefix)
+        if not result.was_compacted:
+            # Compressor says it's within budget, but the SDK rejected it.
+            # Return None so the caller falls through to DB fallback.
+            logger.warning(
+                "%s Compressor reports within budget but SDK rejected — "
+                "signalling failure",
+                log_prefix,
+            )
+            return None
+        logger.info(
+            "%s Compacted transcript: %d->%d tokens (%d summarized, %d dropped)",
+            log_prefix,
+            result.original_token_count,
+            result.token_count,
+            result.messages_summarized,
+            result.messages_dropped,
+        )
+        compacted = _messages_to_transcript(result.messages)
+        if not validate_transcript(compacted):
+            logger.warning("%s Compacted transcript failed validation", log_prefix)
+            return None
+        return compacted
+    except Exception as e:
+        logger.error(
+            "%s Transcript compaction failed: %s", log_prefix, e, exc_info=True
+        )
+        return None
--- a/autogpt_platform/backend/backend/copilot/sdk/transcript_builder.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript_builder.py
@@ -68,7 +68,7 @@ class TranscriptBuilder:
            type=entry_type,
            uuid=data.get("uuid") or str(uuid4()),
            parentUuid=data.get("parentUuid"),
-            isCompactSummary=data.get("isCompactSummary") or None,
+            isCompactSummary=data.get("isCompactSummary"),
            message=data.get("message", {}),
        )

--- a/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/transcript_test.py
@@ -1,7 +1,7 @@
 """Unit tests for JSONL transcript management utilities."""

 import os
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

@@ -382,7 +382,7 @@ class TestDeleteTranscript:
        mock_storage.delete = AsyncMock()

        with patch(
-            "backend.util.workspace_storage.get_workspace_storage",
+            "backend.copilot.sdk.transcript.get_workspace_storage",
            new_callable=AsyncMock,
            return_value=mock_storage,
        ):
@@ -402,7 +402,7 @@ class TestDeleteTranscript:
        )

        with patch(
-            "backend.util.workspace_storage.get_workspace_storage",
+            "backend.copilot.sdk.transcript.get_workspace_storage",
            new_callable=AsyncMock,
            return_value=mock_storage,
        ):
@@ -420,7 +420,7 @@ class TestDeleteTranscript:
        )

        with patch(
-            "backend.util.workspace_storage.get_workspace_storage",
+            "backend.copilot.sdk.transcript.get_workspace_storage",
            new_callable=AsyncMock,
            return_value=mock_storage,
        ):
@@ -897,3 +897,134 @@ class TestCompactionFlowIntegration:
        output2 = builder2.to_jsonl()
        lines2 = [json.loads(line) for line in output2.strip().split("\n")]
        assert lines2[-1]["parentUuid"] == "a2"
+
+
+# ---------------------------------------------------------------------------
+# _run_compression (direct tests for the 3 code paths)
+# ---------------------------------------------------------------------------
+
+
+class TestRunCompression:
+    """Direct tests for ``_run_compression`` covering all 3 code paths.
+
+    Paths:
+    (a) No OpenAI client configured → truncation fallback immediately.
+    (b) LLM success → returns LLM-compressed result.
+    (c) LLM call raises → truncation fallback.
+    """
+
+    def _make_compress_result(self, was_compacted: bool, msgs=None):
+        """Build a minimal CompressResult-like object."""
+        from types import SimpleNamespace
+
+        return SimpleNamespace(
+            was_compacted=was_compacted,
+            messages=msgs or [{"role": "user", "content": "summary"}],
+            original_token_count=500,
+            token_count=100 if was_compacted else 500,
+            messages_summarized=2 if was_compacted else 0,
+            messages_dropped=0,
+        )
+
+    @pytest.mark.asyncio
+    async def test_no_client_uses_truncation(self):
+        """Path (a): ``get_openai_client()`` returns None → truncation only."""
+        from .transcript import _run_compression
+
+        truncation_result = self._make_compress_result(
+            True, [{"role": "user", "content": "truncated"}]
+        )
+
+        with (
+            patch(
+                "backend.copilot.sdk.transcript.get_openai_client",
+                return_value=None,
+            ),
+            patch(
+                "backend.copilot.sdk.transcript.compress_context",
+                new_callable=AsyncMock,
+                return_value=truncation_result,
+            ) as mock_compress,
+        ):
+            result = await _run_compression(
+                [{"role": "user", "content": "hello"}],
+                model="test-model",
+                log_prefix="[test]",
+            )
+
+        # compress_context called with client=None (truncation mode)
+        call_kwargs = mock_compress.call_args
+        assert (
+            call_kwargs.kwargs.get("client") is None
+            or (call_kwargs.args and call_kwargs.args[2] is None)
+            or mock_compress.call_args[1].get("client") is None
+        )
+        assert result is truncation_result
+
+    @pytest.mark.asyncio
+    async def test_llm_success_returns_llm_result(self):
+        """Path (b): ``get_openai_client()`` returns a client → LLM compresses."""
+        from .transcript import _run_compression
+
+        llm_result = self._make_compress_result(
+            True, [{"role": "user", "content": "LLM summary"}]
+        )
+        mock_client = MagicMock()
+
+        with (
+            patch(
+                "backend.copilot.sdk.transcript.get_openai_client",
+                return_value=mock_client,
+            ),
+            patch(
+                "backend.copilot.sdk.transcript.compress_context",
+                new_callable=AsyncMock,
+                return_value=llm_result,
+            ) as mock_compress,
+        ):
+            result = await _run_compression(
+                [{"role": "user", "content": "long conversation"}],
+                model="test-model",
+                log_prefix="[test]",
+            )
+
+        # compress_context called with the real client
+        assert mock_compress.called
+        assert result is llm_result
+
+    @pytest.mark.asyncio
+    async def test_llm_failure_falls_back_to_truncation(self):
+        """Path (c): LLM call raises → truncation fallback used instead."""
+        from .transcript import _run_compression
+
+        truncation_result = self._make_compress_result(
+            True, [{"role": "user", "content": "truncated fallback"}]
+        )
+        mock_client = MagicMock()
+        call_count = [0]
+
+        async def _compress_side_effect(**kwargs):
+            call_count[0] += 1
+            if kwargs.get("client") is not None:
+                raise RuntimeError("LLM timeout")
+            return truncation_result
+
+        with (
+            patch(
+                "backend.copilot.sdk.transcript.get_openai_client",
+                return_value=mock_client,
+            ),
+            patch(
+                "backend.copilot.sdk.transcript.compress_context",
+                side_effect=_compress_side_effect,
+            ),
+        ):
+            result = await _run_compression(
+                [{"role": "user", "content": "long conversation"}],
+                model="test-model",
+                log_prefix="[test]",
+            )
+
+        # compress_context called twice: once for LLM (raises), once for truncation
+        assert call_count[0] == 2
+        assert result is truncation_result
--- a/autogpt_platform/backend/backend/copilot/tools/init.py
+++ b/autogpt_platform/backend/backend/copilot/tools/init.py
@@ -12,7 +12,6 @@ from .agent_browser import BrowserActTool, BrowserNavigateTool, BrowserScreensho
 from .agent_output import AgentOutputTool
 from .base import BaseTool
 from .bash_exec import BashExecTool
-from .connect_integration import ConnectIntegrationTool
 from .continue_run_block import ContinueRunBlockTool
 from .create_agent import CreateAgentTool
 from .customize_agent import CustomizeAgentTool
@@ -85,7 +84,6 @@ TOOL_REGISTRY: dict[str, BaseTool] = {
    "browser_screenshot": BrowserScreenshotTool(),
    # Sandboxed code execution (bubblewrap)
    "bash_exec": BashExecTool(),
-    "connect_integration": ConnectIntegrationTool(),
    # Persistent workspace tools (cloud storage, survives across sessions)
    # Feature request tools
    "search_feature_requests": SearchFeatureRequestsTool(),
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec.py
@@ -22,7 +22,6 @@ from e2b import AsyncSandbox
 from e2b.exceptions import TimeoutException

 from backend.copilot.context import E2B_WORKDIR, get_current_sandbox
-from backend.copilot.integration_creds import get_integration_env_vars
 from backend.copilot.model import ChatSession

 from .base import BaseTool
@@ -97,9 +96,7 @@ class BashExecTool(BaseTool):

        sandbox = get_current_sandbox()
        if sandbox is not None:
-            return await self._execute_on_e2b(
-                sandbox, command, timeout, session_id, user_id
-            )
+            return await self._execute_on_e2b(sandbox, command, timeout, session_id)

        # Bubblewrap fallback: local isolated execution.
        if not has_full_sandbox():
@@ -136,27 +133,14 @@ class BashExecTool(BaseTool):
        command: str,
        timeout: int,
        session_id: str | None,
-        user_id: str | None = None,
    ) -> ToolResponseBase:
-        """Execute *command* on the E2B sandbox via commands.run().
-
-        Integration tokens (e.g. GH_TOKEN) are injected into the sandbox env
-        for any user with connected accounts. E2B has full internet access, so
-        CLI tools like ``gh`` work without manual authentication.
-        """
-        envs: dict[str, str] = {
-            "PATH": "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin",
-        }
-        if user_id is not None:
-            integration_env = await get_integration_env_vars(user_id)
-            envs.update(integration_env)
-
+        """Execute *command* on the E2B sandbox via commands.run()."""
        try:
            result = await sandbox.commands.run(
                f"bash -c {shlex.quote(command)}",
                cwd=E2B_WORKDIR,
                timeout=timeout,
-                envs=envs,
+                envs={"PATH": "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"},
            )
            return BashExecResponse(
                message=f"Command executed on E2B (exit {result.exit_code})",
--- a/autogpt_platform/backend/backend/copilot/tools/bash_exec_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/bash_exec_test.py
@@ -1,78 +0,0 @@
-"""Tests for BashExecTool — E2B path with token injection."""
-
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from ._test_data import make_session
-from .bash_exec import BashExecTool
-from .models import BashExecResponse
-
-_USER = "user-bash-exec-test"
-
-
-def _make_tool() -> BashExecTool:
-    return BashExecTool()
-
-
-def _make_sandbox(exit_code: int = 0, stdout: str = "", stderr: str = "") -> MagicMock:
-    result = MagicMock()
-    result.exit_code = exit_code
-    result.stdout = stdout
-    result.stderr = stderr
-
-    sandbox = MagicMock()
-    sandbox.commands.run = AsyncMock(return_value=result)
-    return sandbox
-
-
-class TestBashExecE2BTokenInjection:
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_token_injected_when_user_id_set(self):
-        """When user_id is provided, integration env vars are merged into sandbox envs."""
-        tool = _make_tool()
-        session = make_session(user_id=_USER)
-        sandbox = _make_sandbox(stdout="ok")
-        env_vars = {"GH_TOKEN": "gh-secret", "GITHUB_TOKEN": "gh-secret"}
-
-        with patch(
-            "backend.copilot.tools.bash_exec.get_integration_env_vars",
-            new=AsyncMock(return_value=env_vars),
-        ) as mock_get_env:
-            result = await tool._execute_on_e2b(
-                sandbox=sandbox,
-                command="echo hi",
-                timeout=10,
-                session_id=session.session_id,
-                user_id=_USER,
-            )
-
-        mock_get_env.assert_awaited_once_with(_USER)
-        call_kwargs = sandbox.commands.run.call_args[1]
-        assert call_kwargs["envs"]["GH_TOKEN"] == "gh-secret"
-        assert call_kwargs["envs"]["GITHUB_TOKEN"] == "gh-secret"
-        assert isinstance(result, BashExecResponse)
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_no_token_injection_when_user_id_is_none(self):
-        """When user_id is None, get_integration_env_vars must NOT be called."""
-        tool = _make_tool()
-        session = make_session(user_id=_USER)
-        sandbox = _make_sandbox(stdout="ok")
-
-        with patch(
-            "backend.copilot.tools.bash_exec.get_integration_env_vars",
-            new=AsyncMock(return_value={"GH_TOKEN": "should-not-appear"}),
-        ) as mock_get_env:
-            result = await tool._execute_on_e2b(
-                sandbox=sandbox,
-                command="echo hi",
-                timeout=10,
-                session_id=session.session_id,
-                user_id=None,
-            )
-
-        mock_get_env.assert_not_called()
-        call_kwargs = sandbox.commands.run.call_args[1]
-        assert "GH_TOKEN" not in call_kwargs["envs"]
-        assert isinstance(result, BashExecResponse)
--- a/autogpt_platform/backend/backend/copilot/tools/connect_integration.py
+++ b/autogpt_platform/backend/backend/copilot/tools/connect_integration.py
@@ -1,215 +0,0 @@
-"""Tool for prompting the user to connect a required integration.
-
-When the copilot encounters an authentication failure (e.g. `gh` CLI returns
-"authentication required"), it calls this tool to surface the credentials
-setup card in the chat — the same UI that appears when a GitHub block runs
-without configured credentials.
-"""
-
-import functools
-from typing import Any, TypedDict
-
-from backend.copilot.model import ChatSession
-from backend.copilot.tools.models import (
-    ErrorResponse,
-    ResponseType,
-    SetupInfo,
-    SetupRequirementsResponse,
-    ToolResponseBase,
-    UserReadiness,
-)
-
-from .base import BaseTool
-
-
-class _ProviderInfo(TypedDict):
-    name: str
-    types: list[str]
-    # Default OAuth scopes requested when the agent doesn't specify any.
-    scopes: list[str]
-
-
-class _CredentialEntry(TypedDict):
-    """Shape of each entry inside SetupRequirementsResponse.user_readiness.missing_credentials."""
-
-    id: str
-    title: str
-    provider: str
-    provider_name: str
-    type: str
-    types: list[str]
-    scopes: list[str]
-
-
-@functools.lru_cache(maxsize=1)
-def _is_github_oauth_configured() -> bool:
-    """Return True if GitHub OAuth env vars are set.
-
-    Evaluated lazily (not at import time) to avoid triggering Secrets() during
-    module import, which can fail in environments where secrets are not loaded.
-    """
-    from backend.blocks.github._auth import GITHUB_OAUTH_IS_CONFIGURED
-
-    return GITHUB_OAUTH_IS_CONFIGURED
-
-
-# Registry of known providers: name + supported credential types for the UI.
-# When adding a new provider, also add its env var names to
-# backend.copilot.integration_creds.PROVIDER_ENV_VARS.
-def _get_provider_info() -> dict[str, _ProviderInfo]:
-    """Build the provider registry, evaluating OAuth config lazily."""
-    return {
-        "github": {
-            "name": "GitHub",
-            "types": (
-                ["api_key", "oauth2"] if _is_github_oauth_configured() else ["api_key"]
-            ),
-            # Default: repo scope covers clone/push/pull for public and private repos.
-            # Agent can request additional scopes (e.g. "read:org") via the scopes param.
-            "scopes": ["repo"],
-        },
-    }
-
-
-class ConnectIntegrationTool(BaseTool):
-    """Surface the credentials setup UI when an integration is not connected."""
-
-    @property
-    def name(self) -> str:
-        return "connect_integration"
-
-    @property
-    def description(self) -> str:
-        return (
-            "Prompt the user to connect a required integration (e.g. GitHub). "
-            "Call this when an external CLI or API call fails because the user "
-            "has not connected the relevant account. "
-            "The tool surfaces a credentials setup card in the chat so the user "
-            "can authenticate without leaving the page. "
-            "After the user connects the account, retry the operation. "
-            "In E2B/cloud sandbox mode the token (GH_TOKEN/GITHUB_TOKEN) is "
-            "automatically injected per-command in bash_exec — no manual export needed. "
-            "In local bubblewrap mode network is isolated so GitHub CLI commands "
-            "will still fail after connecting; inform the user of this limitation."
-        )
-
-    @property
-    def parameters(self) -> dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "provider": {
-                    "type": "string",
-                    "description": (
-                        "Integration provider slug, e.g. 'github'. "
-                        "Must be one of the supported providers."
-                    ),
-                    "enum": list(_get_provider_info().keys()),
-                },
-                "reason": {
-                    "type": "string",
-                    "description": (
-                        "Brief explanation of why the integration is needed, "
-                        "shown to the user in the setup card."
-                    ),
-                    "maxLength": 500,
-                },
-                "scopes": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": (
-                        "OAuth scopes to request. Omit to use the provider default. "
-                        "Add extra scopes when you need more access — e.g. for GitHub: "
-                        "'repo' (clone/push/pull), 'read:org' (org membership), "
-                        "'workflow' (GitHub Actions). "
-                        "Requesting only the scopes you actually need is best practice."
-                    ),
-                },
-            },
-            "required": ["provider"],
-        }
-
-    @property
-    def requires_auth(self) -> bool:
-        # Require auth so only authenticated users can trigger the setup card.
-        # The card itself is user-agnostic (no per-user data needed), so
-        # user_id is intentionally unused in _execute.
-        return True
-
-    async def _execute(
-        self,
-        user_id: str | None,
-        session: ChatSession,
-        **kwargs: Any,
-    ) -> ToolResponseBase:
-        del user_id  # setup card is user-agnostic; auth is enforced via requires_auth
-        session_id = session.session_id if session else None
-        provider: str = (kwargs.get("provider") or "").strip().lower()
-        reason: str = (kwargs.get("reason") or "").strip()[
-            :500
-        ]  # cap LLM-controlled text
-        extra_scopes: list[str] = [
-            str(s).strip() for s in (kwargs.get("scopes") or []) if str(s).strip()
-        ]
-
-        provider_info = _get_provider_info()
-        info = provider_info.get(provider)
-        if not info:
-            supported = ", ".join(f"'{p}'" for p in provider_info)
-            return ErrorResponse(
-                message=(
-                    f"Unknown provider '{provider}'. "
-                    f"Supported providers: {supported}."
-                ),
-                error="unknown_provider",
-                session_id=session_id,
-            )
-
-        provider_name: str = info["name"]
-        supported_types: list[str] = info["types"]
-        # Merge agent-requested scopes with provider defaults (deduplicated, order preserved).
-        default_scopes: list[str] = info["scopes"]
-        seen: set[str] = set()
-        scopes: list[str] = []
-        for s in default_scopes + extra_scopes:
-            if s not in seen:
-                seen.add(s)
-                scopes.append(s)
-        field_key = f"{provider}_credentials"
-
-        message_parts = [
-            f"To continue, please connect your {provider_name} account.",
-        ]
-        if reason:
-            message_parts.append(reason)
-
-        credential_entry: _CredentialEntry = {
-            "id": field_key,
-            "title": f"{provider_name} Credentials",
-            "provider": provider,
-            "provider_name": provider_name,
-            "type": supported_types[0],
-            "types": supported_types,
-            "scopes": scopes,
-        }
-        missing_credentials: dict[str, _CredentialEntry] = {field_key: credential_entry}
-
-        return SetupRequirementsResponse(
-            type=ResponseType.SETUP_REQUIREMENTS,
-            message=" ".join(message_parts),
-            session_id=session_id,
-            setup_info=SetupInfo(
-                agent_id=f"connect_{provider}",
-                agent_name=provider_name,
-                user_readiness=UserReadiness(
-                    has_all_credentials=False,
-                    missing_credentials=missing_credentials,
-                    ready_to_run=False,
-                ),
-                requirements={
-                    "credentials": [missing_credentials[field_key]],
-                    "inputs": [],
-                    "execution_modes": [],
-                },
-            ),
-        )
--- a/autogpt_platform/backend/backend/copilot/tools/connect_integration_test.py
+++ b/autogpt_platform/backend/backend/copilot/tools/connect_integration_test.py
@@ -1,135 +0,0 @@
-"""Tests for ConnectIntegrationTool."""
-
-import pytest
-
-from ._test_data import make_session
-from .connect_integration import ConnectIntegrationTool
-from .models import ErrorResponse, SetupRequirementsResponse
-
-_TEST_USER_ID = "test-user-connect-integration"
-
-
-class TestConnectIntegrationTool:
-    def _make_tool(self) -> ConnectIntegrationTool:
-        return ConnectIntegrationTool()
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_unknown_provider_returns_error(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="nonexistent"
-        )
-        assert isinstance(result, ErrorResponse)
-        assert result.error == "unknown_provider"
-        assert "nonexistent" in result.message
-        assert "github" in result.message
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_empty_provider_returns_error(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider=""
-        )
-        assert isinstance(result, ErrorResponse)
-        assert result.error == "unknown_provider"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_github_provider_returns_setup_response(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="github"
-        )
-        assert isinstance(result, SetupRequirementsResponse)
-        assert result.setup_info.agent_name == "GitHub"
-        assert result.setup_info.agent_id == "connect_github"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_github_has_missing_credentials_in_readiness(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="github"
-        )
-        assert isinstance(result, SetupRequirementsResponse)
-        readiness = result.setup_info.user_readiness
-        assert readiness.has_all_credentials is False
-        assert readiness.ready_to_run is False
-        assert "github_credentials" in readiness.missing_credentials
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_github_requirements_include_credential_entry(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="github"
-        )
-        assert isinstance(result, SetupRequirementsResponse)
-        creds = result.setup_info.requirements["credentials"]
-        assert len(creds) == 1
-        assert creds[0]["provider"] == "github"
-        assert creds[0]["id"] == "github_credentials"
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_reason_appears_in_message(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        reason = "Needed to create a pull request."
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="github", reason=reason
-        )
-        assert isinstance(result, SetupRequirementsResponse)
-        assert reason in result.message
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_session_id_propagated(self):
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="github"
-        )
-        assert isinstance(result, SetupRequirementsResponse)
-        assert result.session_id == session.session_id
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_provider_case_insensitive(self):
-        """Provider slug is normalised to lowercase before lookup."""
-        tool = self._make_tool()
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool._execute(
-            user_id=_TEST_USER_ID, session=session, provider="GitHub"
-        )
-        assert isinstance(result, SetupRequirementsResponse)
-
-    def test_tool_name(self):
-        assert ConnectIntegrationTool().name == "connect_integration"
-
-    def test_requires_auth(self):
-        assert ConnectIntegrationTool().requires_auth is True
-
-    @pytest.mark.asyncio(loop_scope="session")
-    async def test_unauthenticated_user_gets_need_login_response(self):
-        """execute() with user_id=None must return NeedLoginResponse, not the setup card.
-
-        This verifies that the requires_auth guard in BaseTool.execute() fires
-        before _execute() is called, so unauthenticated callers cannot probe
-        which integrations are configured.
-        """
-        import json
-
-        tool = self._make_tool()
-        # Session still needs a user_id string; the None is passed to execute()
-        # to simulate an unauthenticated call.
-        session = make_session(user_id=_TEST_USER_ID)
-        result = await tool.execute(
-            user_id=None,
-            session=session,
-            tool_call_id="test-call-id",
-            provider="github",
-        )
-        raw = result.output
-        output = json.loads(raw) if isinstance(raw, str) else raw
-        assert output.get("type") == "need_login"
-        assert result.success is False
--- a/autogpt_platform/backend/backend/copilot/tools/e2b_sandbox.py
+++ b/autogpt_platform/backend/backend/copilot/tools/e2b_sandbox.py
@@ -41,8 +41,7 @@ import contextlib
 import logging
 from typing import Any, Awaitable, Callable, Literal

-from e2b import AsyncSandbox
-from e2b.sandbox.sandbox_api import SandboxLifecycle
+from e2b import AsyncSandbox, SandboxLifecycle

 from backend.data.redis_client import get_redis_async

--- a/autogpt_platform/backend/backend/integrations/creds_manager.py
+++ b/autogpt_platform/backend/backend/integrations/creds_manager.py
@@ -25,35 +25,6 @@ logger = logging.getLogger(__name__)
 settings = Settings()


-_on_creds_changed: Callable[[str, str], None] | None = None
-
-
-def register_creds_changed_hook(hook: Callable[[str, str], None]) -> None:
-    """Register a callback invoked after any credential is created/updated/deleted.
-
-    The callback receives ``(user_id, provider)`` and should be idempotent.
-    Only one hook can be registered at a time; calling this again replaces the
-    previous hook.  Intended to be called once at application startup by the
-    copilot module to bust its token cache without creating an import cycle.
-    """
-    global _on_creds_changed
-    _on_creds_changed = hook
-
-
-def _bust_copilot_cache(user_id: str, provider: str) -> None:
-    """Invoke the registered hook (if any) to bust downstream token caches."""
-    if _on_creds_changed is not None:
-        try:
-            _on_creds_changed(user_id, provider)
-        except Exception:
-            logger.warning(
-                "Credential-change hook failed for user=%s provider=%s",
-                user_id,
-                provider,
-                exc_info=True,
-            )
-
-
 class IntegrationCredentialsManager:
    """
    Handles the lifecycle of integration credentials.
@@ -98,11 +69,7 @@ class IntegrationCredentialsManager:
        return self._locks

    async def create(self, user_id: str, credentials: Credentials) -> None:
-        result = await self.store.add_creds(user_id, credentials)
-        # Bust the copilot token cache so that the next bash_exec picks up the
-        # new credential immediately instead of waiting for _NULL_CACHE_TTL.
-        _bust_copilot_cache(user_id, credentials.provider)
-        return result
+        return await self.store.add_creds(user_id, credentials)

    async def exists(self, user_id: str, credentials_id: str) -> bool:
        return (await self.store.get_creds_by_id(user_id, credentials_id)) is not None
@@ -189,8 +156,6 @@ class IntegrationCredentialsManager:

                fresh_credentials = await oauth_handler.refresh_tokens(credentials)
                await self.store.update_creds(user_id, fresh_credentials)
-                # Bust copilot cache so the refreshed token is picked up immediately.
-                _bust_copilot_cache(user_id, fresh_credentials.provider)
                if _lock and (await _lock.locked()) and (await _lock.owned()):
                    try:
                        await _lock.release()
@@ -203,17 +168,10 @@ class IntegrationCredentialsManager:
    async def update(self, user_id: str, updated: Credentials) -> None:
        async with self._locked(user_id, updated.id):
            await self.store.update_creds(user_id, updated)
-        # Bust the copilot token cache so the updated credential is picked up immediately.
-        _bust_copilot_cache(user_id, updated.provider)

    async def delete(self, user_id: str, credentials_id: str) -> None:
        async with self._locked(user_id, credentials_id):
-            # Read inside the lock to avoid TOCTOU — another coroutine could
-            # delete the same credential between the read and the delete.
-            creds = await self.store.get_creds_by_id(user_id, credentials_id)
            await self.store.delete_creds_by_id(user_id, credentials_id)
-        if creds:
-            _bust_copilot_cache(user_id, creds.provider)

    # -- Locking utilities -- #

--- a/autogpt_platform/backend/backend/util/prompt.py
+++ b/autogpt_platform/backend/backend/util/prompt.py
@@ -70,6 +70,10 @@ def _msg_tokens(msg: dict, enc) -> int:
                # Count tool result tokens
                tool_call_tokens += _tok_len(item.get("tool_use_id", ""), enc)
                tool_call_tokens += _tok_len(item.get("content", ""), enc)
+            elif isinstance(item, dict) and item.get("type") == "text":
+                # Count text block tokens (standard: "text" key, fallback: "content")
+                text_val = item.get("text") or item.get("content", "")
+                tool_call_tokens += _tok_len(text_val, enc)
            elif isinstance(item, dict) and "content" in item:
                # Other content types with content field
                tool_call_tokens += _tok_len(item.get("content", ""), enc)
@@ -145,10 +149,16 @@ def _truncate_middle_tokens(text: str, enc, max_tok: int) -> str:
    if len(ids) <= max_tok:
        return text  # nothing to do

+    # Need at least 3 tokens (head + ellipsis + tail) for meaningful truncation
+    if max_tok < 1:
+        return ""
+    mid = enc.encode(" … ")
+    if max_tok < 3:
+        return enc.decode(ids[:max_tok])
+
    # Split the allowance between the two ends:
    head = max_tok // 2 - 1  # -1 for the ellipsis
    tail = max_tok - head - 1
-    mid = enc.encode(" … ")
    return enc.decode(ids[:head] + mid + ids[-tail:])


@@ -545,6 +555,14 @@ async def _summarize_messages_llm(
                    "- Actions taken and key decisions made\n"
                    "- Technical specifics (file names, tool outputs, function signatures)\n"
                    "- Errors encountered and resolutions applied\n\n"
+                    "IMPORTANT: Preserve all concrete references verbatim — these are small but "
+                    "critical for continuing the conversation:\n"
+                    "- File paths and directory paths (e.g. /src/app/page.tsx, ./output/result.csv)\n"
+                    "- Image/media file paths from tool outputs\n"
+                    "- URLs, API endpoints, and webhook addresses\n"
+                    "- Resource IDs, session IDs, and identifiers\n"
+                    "- Tool names that were called and their key parameters\n"
+                    "- Environment variables, config keys, and credentials names (not values)\n\n"
                    "Include ONLY the sections below that have relevant content "
                    "(skip sections with nothing to report):\n\n"
                    "## 1. Primary Request and Intent\n"
@@ -552,7 +570,8 @@ async def _summarize_messages_llm(
                    "## 2. Key Technical Concepts\n"
                    "Technologies, frameworks, tools, and patterns being used or discussed.\n\n"
                    "## 3. Files and Resources Involved\n"
-                    "Specific files examined or modified, with relevant snippets and identifiers.\n\n"
+                    "Specific files examined or modified, with relevant snippets and identifiers. "
+                    "Include exact file paths, image paths from tool outputs, and resource URLs.\n\n"
                    "## 4. Errors and Fixes\n"
                    "Problems encountered, error messages, and their resolutions.\n\n"
                    "## 5. All User Messages\n"
@@ -566,7 +585,7 @@ async def _summarize_messages_llm(
            },
            {"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
        ],
-        max_tokens=1500,
+        max_tokens=2000,
        temperature=0.3,
    )

@@ -686,11 +705,15 @@ async def compress_context(
                    msgs = [summary_msg] + recent_msgs

                logger.info(
-                    f"Context summarized: {original_count} -> {total_tokens()} tokens, "
-                    f"summarized {messages_summarized} messages"
+                    "Context summarized: %d -> %d tokens, summarized %d messages",
+                    original_count,
+                    total_tokens(),
+                    messages_summarized,
                )
            except Exception as e:
-                logger.warning(f"Summarization failed, continuing with truncation: {e}")
+                logger.warning(
+                    "Summarization failed, continuing with truncation: %s", e
+                )
                # Fall through to content truncation

    # ---- STEP 2: Normalize content ----------------------------------------
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/components/MessagePartRenderer.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/components/ChatMessagesContainer/components/MessagePartRenderer.tsx
@@ -3,7 +3,6 @@ import { ErrorCard } from "@/components/molecules/ErrorCard/ErrorCard";
 import { ExclamationMarkIcon } from "@phosphor-icons/react";
 import { ToolUIPart, UIDataTypes, UIMessage, UITools } from "ai";
 import { useState } from "react";
-import { ConnectIntegrationTool } from "../../../tools/ConnectIntegrationTool/ConnectIntegrationTool";
 import { CreateAgentTool } from "../../../tools/CreateAgent/CreateAgent";
 import { EditAgentTool } from "../../../tools/EditAgent/EditAgent";
 import {
@@ -130,8 +129,6 @@ export function MessagePartRenderer({ part, messageID, partIndex }: Props) {
    case "tool-search_docs":
    case "tool-get_doc_page":
      return <SearchDocsTool key={key} part={part as ToolUIPart} />;
-    case "tool-connect_integration":
-      return <ConnectIntegrationTool key={key} part={part as ToolUIPart} />;
    case "tool-run_block":
    case "tool-continue_run_block":
      return <RunBlockTool key={key} part={part as ToolUIPart} />;
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ConnectIntegrationTool/ConnectIntegrationTool.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/ConnectIntegrationTool/ConnectIntegrationTool.tsx
@@ -1,104 +0,0 @@
-"use client";
-
-import type { SetupRequirementsResponse } from "@/app/api/__generated__/models/setupRequirementsResponse";
-import type { ToolUIPart } from "ai";
-import { useState } from "react";
-import { MorphingTextAnimation } from "../../components/MorphingTextAnimation/MorphingTextAnimation";
-import { ContentMessage } from "../../components/ToolAccordion/AccordionContent";
-import { SetupRequirementsCard } from "../RunBlock/components/SetupRequirementsCard/SetupRequirementsCard";
-
-type Props = {
-  part: ToolUIPart;
-};
-
-function parseJson(raw: unknown): unknown {
-  if (typeof raw === "string") {
-    try {
-      return JSON.parse(raw);
-    } catch {
-      return null;
-    }
-  }
-  return raw;
-}
-
-function parseOutput(raw: unknown): SetupRequirementsResponse | null {
-  const parsed = parseJson(raw);
-  if (parsed && typeof parsed === "object" && "setup_info" in parsed) {
-    return parsed as SetupRequirementsResponse;
-  }
-  return null;
-}
-
-function parseError(raw: unknown): string | null {
-  const parsed = parseJson(raw);
-  if (parsed && typeof parsed === "object" && "message" in parsed) {
-    return String((parsed as { message: unknown }).message);
-  }
-  return null;
-}
-
-export function ConnectIntegrationTool({ part }: Props) {
-  // Persist dismissed state here so SetupRequirementsCard remounts don't re-enable Proceed.
-  const [isDismissed, setIsDismissed] = useState(false);
-
-  const isStreaming =
-    part.state === "input-streaming" || part.state === "input-available";
-  const isError = part.state === "output-error";
-
-  const output =
-    part.state === "output-available"
-      ? parseOutput((part as { output?: unknown }).output)
-      : null;
-
-  const errorMessage = isError
-    ? (parseError((part as { output?: unknown }).output) ??
-      "Failed to connect integration")
-    : null;
-
-  const rawProvider =
-    (part as { input?: { provider?: string } }).input?.provider ?? "";
-  const providerName =
-    output?.setup_info?.agent_name ??
-    // Sanitize LLM-controlled provider slug: trim and cap at 64 chars to
-    // prevent runaway text in the DOM.
-    (rawProvider ? rawProvider.trim().slice(0, 64) : "integration");
-
-  const label = isStreaming
-    ? `Connecting ${providerName}…`
-    : isError
-      ? `Failed to connect ${providerName}`
-      : output
-        ? `Connect ${output.setup_info?.agent_name ?? providerName}`
-        : `Connect ${providerName}`;
-
-  return (
-    <div className="py-2">
-      <div className="flex items-center gap-2 text-sm text-muted-foreground">
-        <MorphingTextAnimation
-          text={label}
-          className={isError ? "text-red-500" : undefined}
-        />
-      </div>
-
-      {isError && errorMessage && (
-        <p className="mt-1 text-sm text-red-500">{errorMessage}</p>
-      )}
-
-      {output && (
-        <div className="mt-2">
-          {isDismissed ? (
-            <ContentMessage>Connected. Continuing…</ContentMessage>
-          ) : (
-            <SetupRequirementsCard
-              output={output}
-              credentialsLabel={`${output.setup_info?.agent_name ?? providerName} credentials`}
-              retryInstruction="I've connected my account. Please continue."
-              onComplete={() => setIsDismissed(true)}
-            />
-          )}
-        </div>
-      )}
-    </div>
-  );
-}
--- a/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/SetupRequirementsCard/SetupRequirementsCard.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/copilot/tools/RunBlock/components/SetupRequirementsCard/SetupRequirementsCard.tsx
@@ -23,16 +23,12 @@ interface Props {
  /** Override the label shown above the credentials section.
   * Defaults to "Credentials". */
  credentialsLabel?: string;
-  /** Called after Proceed is clicked so the parent can persist the dismissed state
-   * across remounts (avoids re-enabling the Proceed button on remount). */
-  onComplete?: () => void;
 }

 export function SetupRequirementsCard({
  output,
  retryInstruction,
  credentialsLabel,
-  onComplete,
 }: Props) {
  const { onSend } = useCopilotChatActions();

@@ -72,17 +68,13 @@ export function SetupRequirementsCard({
      return v !== undefined && v !== null && v !== "";
    });

-  if (hasSent) {
-    return <ContentMessage>Connected. Continuing…</ContentMessage>;
-  }
-
  const canRun =
+    !hasSent &&
    (!needsCredentials || isAllCredentialsComplete) &&
    (!needsInputs || isAllInputsComplete);

  function handleRun() {
    setHasSent(true);
-    onComplete?.();

    const parts: string[] = [];
    if (needsCredentials) {
--- a/autogpt_platform/frontend/src/components/contextual/CredentialsInput/useCredentialsInput.ts
+++ b/autogpt_platform/frontend/src/components/contextual/CredentialsInput/useCredentialsInput.ts
@@ -125,9 +125,9 @@ export function useCredentialsInput({
      if (hasAttemptedAutoSelect.current) return;
      hasAttemptedAutoSelect.current = true;

-      // Auto-select only when there is exactly one saved credential.
-      // With multiple options the user must choose — regardless of optional/required.
-      if (savedCreds.length > 1) return;
+      // Auto-select if exactly one credential matches.
+      // For optional fields with multiple options, let the user choose.
+      if (isOptional && savedCreds.length > 1) return;

      const cred = savedCreds[0];
      onSelectCredential({