fix(copilot): use transient_api_error code for exhausted transient retries

When the except-Exception transient-retry budget was exhausted the post-loop
StreamError yielded code='sdk_stream_error' instead of 'transient_api_error'
and called _friendly_error_text(raw) instead of FRIENDLY_TRANSIENT_MSG.
This made the client unable to show the same "Try again" affordance as the
_HandledStreamError path.

Add transient_exhausted flag; check it in the post-loop alongside
attempts_exhausted to emit the correct code/text.  Also collapse the
unnecessary split f-string in the retry StreamStatus message, and add a
version comment on the CLAUDE_CODE_DISABLE_* env var block.
This commit is contained in:
Zamil Majdy
2026-04-08 10:19:57 +07:00
parent f95772f0af
commit fff9faf13c
3 changed files with 65 additions and 13 deletions

View File

@@ -85,6 +85,8 @@ def build_sdk_env(
# Harden multi-tenant deployment: prevent loading untrusted workspace
# .claude.md files, persisting prompt history, writing auto-memory,
# and sending non-essential telemetry traffic.
# These are undocumented CLI internals validated against
# claude-agent-sdk 0.1.45 — re-verify when upgrading the SDK.
env["CLAUDE_CODE_DISABLE_CLAUDE_MDS"] = "1"
env["CLAUDE_CODE_SKIP_PROMPT_HISTORY"] = "1"
env["CLAUDE_CODE_DISABLE_AUTO_MEMORY"] = "1"

View File

@@ -483,3 +483,53 @@ class TestConfigValidators:
assert cfg_low.claude_agent_max_transient_retries == 0
cfg_high = _make_config(claude_agent_max_transient_retries=10)
assert cfg_high.claude_agent_max_transient_retries == 10
# ---------------------------------------------------------------------------
# transient_exhausted SSE code contract
# ---------------------------------------------------------------------------
class TestTransientExhaustedErrorCode:
"""Verify transient-exhausted path emits the correct SSE error code."""
def test_transient_exhausted_uses_transient_api_error_code(self):
"""When except-Exception transient retries are exhausted, the SSE
StreamError must use code='transient_api_error', not 'sdk_stream_error'.
This ensures the frontend shows the same 'Try again' affordance as
the _HandledStreamError path.
"""
from backend.copilot.constants import FRIENDLY_TRANSIENT_MSG
# Simulate the post-loop branching logic extracted from service.py
attempts_exhausted = False
transient_exhausted = True
stream_err: Exception | None = ConnectionResetError("ECONNRESET")
if attempts_exhausted:
error_code = "all_attempts_exhausted"
error_text = "conversation too long"
elif transient_exhausted:
error_code = "transient_api_error"
error_text = FRIENDLY_TRANSIENT_MSG
else:
error_code = "sdk_stream_error"
error_text = f"SDK stream error: {stream_err}"
assert error_code == "transient_api_error"
assert error_text == FRIENDLY_TRANSIENT_MSG
def test_non_transient_exhausted_uses_sdk_stream_error_code(self):
"""Non-transient fatal errors (auth, network) keep 'sdk_stream_error'."""
attempts_exhausted = False
transient_exhausted = False
if attempts_exhausted:
error_code = "all_attempts_exhausted"
elif transient_exhausted:
error_code = "transient_api_error"
else:
error_code = "sdk_stream_error"
assert error_code == "sdk_stream_error"

View File

@@ -2115,6 +2115,7 @@ async def stream_chat_completion_sdk(
# ---------------------------------------------------------------
ended_with_stream_error = False
attempts_exhausted = False
transient_exhausted = False
stream_err: Exception | None = None
# Transient retry helper — deduplicates the logic shared between
@@ -2378,8 +2379,7 @@ async def stream_chat_completion_sdk(
max_transient_retries,
)
yield StreamStatus(
message=f"Connection interrupted, retrying "
f"in {backoff}s…"
message=f"Connection interrupted, retrying in {backoff}s…"
)
await asyncio.sleep(backoff)
state.adapter = SDKResponseAdapter(
@@ -2391,6 +2391,7 @@ async def stream_chat_completion_sdk(
# frontend shows "Try again" after refresh.
# Mirrors the _HandledStreamError exhausted-retry path
# at line ~2310.
transient_exhausted = True
skip_transcript_upload = True
_append_error_marker(
session, FRIENDLY_TRANSIENT_MSG, retryable=True
@@ -2437,25 +2438,24 @@ async def stream_chat_completion_sdk(
yield response
if ended_with_stream_error and stream_err is not None:
# Use distinct error codes: "all_attempts_exhausted" when all
# retries were consumed vs "sdk_stream_error" for non-context
# errors that broke the loop immediately (network, auth, etc.).
# Use distinct error codes depending on how the loop ended:
# • "all_attempts_exhausted" — context compaction ran out of room
# • "transient_api_error" — 429/5xx/ECONNRESET retries exhausted
# • "sdk_stream_error" — non-context, non-transient fatal error
safe_err = str(stream_err).replace("\n", " ").replace("\r", "")[:500]
if attempts_exhausted:
error_text = (
"Your conversation is too long. "
"Please start a new chat or clear some history."
)
error_code = "all_attempts_exhausted"
elif transient_exhausted:
error_text = FRIENDLY_TRANSIENT_MSG
error_code = "transient_api_error"
else:
error_text = _friendly_error_text(safe_err)
yield StreamError(
errorText=error_text,
code=(
"all_attempts_exhausted"
if attempts_exhausted
else "sdk_stream_error"
),
)
error_code = "sdk_stream_error"
yield StreamError(errorText=error_text, code=error_code)
# Copy token usage from retry state to outer-scope accumulators
# so the finally block can persist them.