fix(copilot): use transient_api_error code for exhausted transient retries

When the except-Exception transient-retry budget was exhausted the post-loop StreamError yielded code='sdk_stream_error' instead of 'transient_api_error' and called _friendly_error_text(raw) instead of FRIENDLY_TRANSIENT_MSG. This made the client unable to show the same "Try again" affordance as the _HandledStreamError path. Add transient_exhausted flag; check it in the post-loop alongside attempts_exhausted to emit the correct code/text. Also collapse the unnecessary split f-string in the retry StreamStatus message, and add a version comment on the CLAUDE_CODE_DISABLE_* env var block.
2026-04-08 03:00:28 -04:00 · 2026-04-08 10:19:57 +07:00
parent f95772f0af
commit fff9faf13c
3 changed files with 65 additions and 13 deletions
--- a/autogpt_platform/backend/backend/copilot/sdk/env.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/env.py
@@ -85,6 +85,8 @@ def build_sdk_env(
    # Harden multi-tenant deployment: prevent loading untrusted workspace
    # .claude.md files, persisting prompt history, writing auto-memory,
    # and sending non-essential telemetry traffic.
+    # These are undocumented CLI internals validated against
+    # claude-agent-sdk 0.1.45 — re-verify when upgrading the SDK.
    env["CLAUDE_CODE_DISABLE_CLAUDE_MDS"] = "1"
    env["CLAUDE_CODE_SKIP_PROMPT_HISTORY"] = "1"
    env["CLAUDE_CODE_DISABLE_AUTO_MEMORY"] = "1"
--- a/autogpt_platform/backend/backend/copilot/sdk/p0_guardrails_test.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/p0_guardrails_test.py
@@ -483,3 +483,53 @@ class TestConfigValidators:
        assert cfg_low.claude_agent_max_transient_retries == 0
        cfg_high = _make_config(claude_agent_max_transient_retries=10)
        assert cfg_high.claude_agent_max_transient_retries == 10
+
+
+# ---------------------------------------------------------------------------
+# transient_exhausted SSE code contract
+# ---------------------------------------------------------------------------
+
+
+class TestTransientExhaustedErrorCode:
+    """Verify transient-exhausted path emits the correct SSE error code."""
+
+    def test_transient_exhausted_uses_transient_api_error_code(self):
+        """When except-Exception transient retries are exhausted, the SSE
+        StreamError must use code='transient_api_error', not 'sdk_stream_error'.
+
+        This ensures the frontend shows the same 'Try again' affordance as
+        the _HandledStreamError path.
+        """
+        from backend.copilot.constants import FRIENDLY_TRANSIENT_MSG
+
+        # Simulate the post-loop branching logic extracted from service.py
+        attempts_exhausted = False
+        transient_exhausted = True
+        stream_err: Exception | None = ConnectionResetError("ECONNRESET")
+
+        if attempts_exhausted:
+            error_code = "all_attempts_exhausted"
+            error_text = "conversation too long"
+        elif transient_exhausted:
+            error_code = "transient_api_error"
+            error_text = FRIENDLY_TRANSIENT_MSG
+        else:
+            error_code = "sdk_stream_error"
+            error_text = f"SDK stream error: {stream_err}"
+
+        assert error_code == "transient_api_error"
+        assert error_text == FRIENDLY_TRANSIENT_MSG
+
+    def test_non_transient_exhausted_uses_sdk_stream_error_code(self):
+        """Non-transient fatal errors (auth, network) keep 'sdk_stream_error'."""
+        attempts_exhausted = False
+        transient_exhausted = False
+
+        if attempts_exhausted:
+            error_code = "all_attempts_exhausted"
+        elif transient_exhausted:
+            error_code = "transient_api_error"
+        else:
+            error_code = "sdk_stream_error"
+
+        assert error_code == "sdk_stream_error"
--- a/autogpt_platform/backend/backend/copilot/sdk/service.py
+++ b/autogpt_platform/backend/backend/copilot/sdk/service.py
@@ -2115,6 +2115,7 @@ async def stream_chat_completion_sdk(
        # ---------------------------------------------------------------
        ended_with_stream_error = False
        attempts_exhausted = False
+        transient_exhausted = False
        stream_err: Exception | None = None

        # Transient retry helper — deduplicates the logic shared between
@@ -2378,8 +2379,7 @@ async def stream_chat_completion_sdk(
                            max_transient_retries,
                        )
                        yield StreamStatus(
-                            message=f"Connection interrupted, retrying "
-                            f"in {backoff}s…"
+                            message=f"Connection interrupted, retrying in {backoff}s…"
                        )
                        await asyncio.sleep(backoff)
                        state.adapter = SDKResponseAdapter(
@@ -2391,6 +2391,7 @@ async def stream_chat_completion_sdk(
                    # frontend shows "Try again" after refresh.
                    # Mirrors the _HandledStreamError exhausted-retry path
                    # at line ~2310.
+                    transient_exhausted = True
                    skip_transcript_upload = True
                    _append_error_marker(
                        session, FRIENDLY_TRANSIENT_MSG, retryable=True
@@ -2437,25 +2438,24 @@ async def stream_chat_completion_sdk(
                yield response

        if ended_with_stream_error and stream_err is not None:
-            # Use distinct error codes: "all_attempts_exhausted" when all
-            # retries were consumed vs "sdk_stream_error" for non-context
-            # errors that broke the loop immediately (network, auth, etc.).
+            # Use distinct error codes depending on how the loop ended:
+            # • "all_attempts_exhausted" — context compaction ran out of room
+            # • "transient_api_error" — 429/5xx/ECONNRESET retries exhausted
+            # • "sdk_stream_error" — non-context, non-transient fatal error
            safe_err = str(stream_err).replace("\n", " ").replace("\r", "")[:500]
            if attempts_exhausted:
                error_text = (
                    "Your conversation is too long. "
                    "Please start a new chat or clear some history."
                )
+                error_code = "all_attempts_exhausted"
+            elif transient_exhausted:
+                error_text = FRIENDLY_TRANSIENT_MSG
+                error_code = "transient_api_error"
            else:
                error_text = _friendly_error_text(safe_err)
-            yield StreamError(
-                errorText=error_text,
-                code=(
-                    "all_attempts_exhausted"
-                    if attempts_exhausted
-                    else "sdk_stream_error"
-                ),
-            )
+                error_code = "sdk_stream_error"
+            yield StreamError(errorText=error_text, code=error_code)

        # Copy token usage from retry state to outer-scope accumulators
        # so the finally block can persist them.