mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
fix(copilot): P0 guardrails — SDK limits, security env vars, transient retry
Based on analysis of the Claude Code CLI internals, adds critical guardrails rebased on the current dev architecture (env.py extraction): 1. SDK guardrails: fallback_model (auto-retry on 529), max_turns=50 (runaway prevention), max_budget_usd=5.0 (per-query cost cap) 2. TMPDIR redirect: sets CLAUDE_CODE_TMPDIR to sdk_cwd so CLI output is routed into the per-session workspace for isolation/cleanup 3. Security env vars: DISABLE_CLAUDE_MDS, SKIP_PROMPT_HISTORY, DISABLE_AUTO_MEMORY, DISABLE_NONESSENTIAL_TRAFFIC 4. Transient error retry: 429/5xx/ECONNRESET errors now retry with exponential backoff (1s, 2s, 4s) in both _HandledStreamError and generic Exception handlers. Skips retry if events already yielded
This commit is contained in:
@@ -129,6 +129,26 @@ class ChatConfig(BaseSettings):
|
||||
description="Use --resume for multi-turn conversations instead of "
|
||||
"history compression. Falls back to compression when unavailable.",
|
||||
)
|
||||
claude_agent_fallback_model: str = Field(
|
||||
default="claude-sonnet-4-20250514",
|
||||
description="Fallback model when the primary model is unavailable (e.g. 529 "
|
||||
"overloaded). The SDK automatically retries with this cheaper model.",
|
||||
)
|
||||
claude_agent_max_turns: int = Field(
|
||||
default=50,
|
||||
description="Maximum number of agentic turns (tool-use loops) per query. "
|
||||
"Prevents runaway tool loops from burning budget.",
|
||||
)
|
||||
claude_agent_max_budget_usd: float = Field(
|
||||
default=5.0,
|
||||
description="Maximum spend in USD per SDK query. The CLI aborts the "
|
||||
"request if this budget is exceeded.",
|
||||
)
|
||||
claude_agent_max_transient_retries: int = Field(
|
||||
default=3,
|
||||
description="Maximum number of retries for transient API errors "
|
||||
"(429, 5xx, ECONNRESET) before surfacing the error to the user.",
|
||||
)
|
||||
use_openrouter: bool = Field(
|
||||
default=True,
|
||||
description="Enable routing API calls through the OpenRouter proxy. "
|
||||
|
||||
@@ -569,6 +569,22 @@ def _resolve_sdk_model() -> str | None:
|
||||
return model
|
||||
|
||||
|
||||
def _resolve_fallback_model() -> str | None:
|
||||
"""Resolve the fallback model name with the same provider-aware logic.
|
||||
|
||||
Applies the same dot-to-hyphen normalisation as :func:`_resolve_sdk_model`
|
||||
so the fallback model works correctly with both OpenRouter (dots) and
|
||||
direct Anthropic (hyphens). Returns ``None`` when no fallback is
|
||||
configured (empty string).
|
||||
"""
|
||||
raw = config.claude_agent_fallback_model
|
||||
if not raw:
|
||||
return None
|
||||
if not config.openrouter_active:
|
||||
raw = raw.replace(".", "-")
|
||||
return raw
|
||||
|
||||
|
||||
def _make_sdk_cwd(session_id: str) -> str:
|
||||
"""Create a safe, session-specific working directory path.
|
||||
|
||||
@@ -1859,6 +1875,19 @@ async def stream_chat_completion_sdk(
|
||||
|
||||
# Fail fast when no API credentials are available at all.
|
||||
sdk_env = build_sdk_env(session_id=session_id, user_id=user_id)
|
||||
|
||||
# --- Security & isolation env vars (all auth modes) ---
|
||||
# Route CLI temp files into the per-session workspace so they are
|
||||
# isolated and cleaned up automatically.
|
||||
if sdk_cwd:
|
||||
sdk_env["CLAUDE_CODE_TMPDIR"] = sdk_cwd
|
||||
# Prevent loading untrusted workspace .claude.md files, persisting
|
||||
# prompt history, writing auto-memory, and non-essential traffic.
|
||||
sdk_env["CLAUDE_CODE_DISABLE_CLAUDE_MDS"] = "1"
|
||||
sdk_env["CLAUDE_CODE_SKIP_PROMPT_HISTORY"] = "1"
|
||||
sdk_env["CLAUDE_CODE_DISABLE_AUTO_MEMORY"] = "1"
|
||||
sdk_env["CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC"] = "1"
|
||||
|
||||
if not config.api_key and not config.use_claude_code_subscription:
|
||||
raise RuntimeError(
|
||||
"No API key configured. Set OPEN_ROUTER_API_KEY, "
|
||||
@@ -1901,6 +1930,15 @@ async def stream_chat_completion_sdk(
|
||||
"cwd": sdk_cwd,
|
||||
"max_buffer_size": config.claude_agent_max_buffer_size,
|
||||
"stderr": _on_stderr,
|
||||
# --- P0 guardrails ---
|
||||
# fallback_model: SDK auto-retries with this cheaper model on
|
||||
# 529 (overloaded) errors, avoiding user-visible failures.
|
||||
"fallback_model": _resolve_fallback_model(),
|
||||
# max_turns: hard cap on agentic tool-use loops per query to
|
||||
# prevent runaway execution from burning budget.
|
||||
"max_turns": config.claude_agent_max_turns,
|
||||
# max_budget_usd: per-query spend ceiling enforced by the CLI.
|
||||
"max_budget_usd": config.claude_agent_max_budget_usd,
|
||||
}
|
||||
if sdk_model:
|
||||
sdk_options_kwargs["model"] = sdk_model
|
||||
@@ -1982,6 +2020,24 @@ async def stream_chat_completion_sdk(
|
||||
attempts_exhausted = False
|
||||
stream_err: Exception | None = None
|
||||
|
||||
# Transient retry helper — deduplicates the logic shared between
|
||||
# _HandledStreamError and the generic except-Exception handler.
|
||||
transient_retries = 0
|
||||
max_transient = config.claude_agent_max_transient_retries
|
||||
|
||||
def _can_retry_transient() -> int | None:
|
||||
"""Check if a transient retry is possible.
|
||||
|
||||
Returns the backoff seconds if a retry should be attempted,
|
||||
or ``None`` if the error should be surfaced to the user.
|
||||
Mutates outer ``transient_retries`` via nonlocal.
|
||||
"""
|
||||
nonlocal transient_retries
|
||||
transient_retries += 1
|
||||
if events_yielded > 0 or transient_retries >= max_transient:
|
||||
return None
|
||||
return 2 ** (transient_retries - 1) # 1s, 2s, 4s, ...
|
||||
|
||||
state = _RetryState(
|
||||
options=options,
|
||||
query_message=query_message,
|
||||
@@ -1995,6 +2051,10 @@ async def stream_chat_completion_sdk(
|
||||
)
|
||||
|
||||
for attempt in range(_MAX_STREAM_ATTEMPTS):
|
||||
# Reset transient retry counter per context-level attempt so
|
||||
# each attempt (original, compacted, no-transcript) gets the
|
||||
# full retry budget for transient errors.
|
||||
transient_retries = 0
|
||||
# Clear any stale stash signal from the previous attempt so
|
||||
# wait_for_stash() doesn't fire prematurely on a leftover event.
|
||||
reset_stash_event()
|
||||
@@ -2084,6 +2144,27 @@ async def stream_chat_completion_sdk(
|
||||
# session messages and set the error flag — do NOT set
|
||||
# stream_err so the post-loop code won't emit a
|
||||
# duplicate StreamError.
|
||||
session.messages = session.messages[:pre_attempt_msg_count]
|
||||
# Check if this is a transient error we can retry with backoff.
|
||||
if exc.code == "transient" or is_transient_api_error(str(exc)):
|
||||
backoff = _can_retry_transient()
|
||||
if backoff is not None:
|
||||
logger.warning(
|
||||
"%s Transient error — retrying in %ds (%d/%d)",
|
||||
log_prefix,
|
||||
backoff,
|
||||
transient_retries,
|
||||
max_transient,
|
||||
)
|
||||
yield StreamStatus(
|
||||
message=f"Connection interrupted, retrying in {backoff}s…"
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
state.adapter = SDKResponseAdapter(
|
||||
message_id=message_id, session_id=session_id
|
||||
)
|
||||
state.usage.reset()
|
||||
continue # retry the same context-level attempt
|
||||
logger.warning(
|
||||
"%s Stream error handled in attempt "
|
||||
"(attempt %d/%d, code=%s, events_yielded=%d)",
|
||||
@@ -2093,7 +2174,6 @@ async def stream_chat_completion_sdk(
|
||||
exc.code or "transient",
|
||||
events_yielded,
|
||||
)
|
||||
session.messages = session.messages[:pre_attempt_msg_count]
|
||||
# transcript_builder still contains entries from the aborted
|
||||
# attempt that no longer match session.messages. Skip upload
|
||||
# so a future --resume doesn't replay rolled-back content.
|
||||
@@ -2112,13 +2192,15 @@ async def stream_chat_completion_sdk(
|
||||
except Exception as e:
|
||||
stream_err = e
|
||||
is_context_error = _is_prompt_too_long(e)
|
||||
is_transient = is_transient_api_error(str(e))
|
||||
logger.warning(
|
||||
"%s Stream error (attempt %d/%d, context_error=%s, "
|
||||
"events_yielded=%d): %s",
|
||||
"transient=%s, events_yielded=%d): %s",
|
||||
log_prefix,
|
||||
attempt + 1,
|
||||
_MAX_STREAM_ATTEMPTS,
|
||||
is_context_error,
|
||||
is_transient,
|
||||
events_yielded,
|
||||
stream_err,
|
||||
exc_info=True,
|
||||
@@ -2136,9 +2218,32 @@ async def stream_chat_completion_sdk(
|
||||
skip_transcript_upload = True
|
||||
ended_with_stream_error = True
|
||||
break
|
||||
# Transient API errors (ECONNRESET, 429, 5xx) — retry
|
||||
# with exponential backoff via the shared helper.
|
||||
if is_transient:
|
||||
backoff = _can_retry_transient()
|
||||
if backoff is not None:
|
||||
logger.warning(
|
||||
"%s Transient exception — retrying in %ds (%d/%d)",
|
||||
log_prefix,
|
||||
backoff,
|
||||
transient_retries,
|
||||
max_transient,
|
||||
)
|
||||
yield StreamStatus(
|
||||
message=f"Connection interrupted, retrying "
|
||||
f"in {backoff}s…"
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
state.adapter = SDKResponseAdapter(
|
||||
message_id=message_id, session_id=session_id
|
||||
)
|
||||
state.usage.reset()
|
||||
continue # retry same context-level attempt
|
||||
|
||||
if not is_context_error:
|
||||
# Non-context errors (network, auth, rate-limit) should
|
||||
# not trigger compaction — surface the error immediately.
|
||||
# Non-context, non-transient errors (auth, fatal)
|
||||
# should not trigger compaction — surface immediately.
|
||||
skip_transcript_upload = True
|
||||
ended_with_stream_error = True
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user