fix: treat HTTP 503 as failover-eligible for LLM provider errors (#21086)

* fix: treat HTTP 503 as failover-eligible for LLM provider errors When LLM SDKs wrap 503 responses, the leading "503" prefix is lost (e.g. Google Gemini returns "high demand" / "UNAVAILABLE" without a numeric prefix). The existing isTransientHttpError only matches messages starting with "503 ...", so these wrapped errors silently skip failover — no profile rotation, no model fallback. This patch closes that gap: - resolveFailoverReasonFromError: map HTTP status 503 → rate_limit (covers structured error objects with a status field) - ERROR_PATTERNS.overloaded: add /\b503\b/, "service unavailable", "high demand" (covers message-only classification when the leading status prefix is absent) Existing isTransientHttpError behavior is unchanged; these additions are complementary and only fire for errors that previously fell through unclassified. * fix: address review feedback — drop /\b503\b/ pattern, add test coverage - Remove `/\b503\b/` from ERROR_PATTERNS.overloaded to resolve the semantic inconsistency noted by reviewers: `isTransientHttpError` already handles messages prefixed with "503" (→ "timeout"), so a redundant overloaded pattern would classify the same class of errors differently depending on message formatting. - Keep "service unavailable" and "high demand" patterns — these are the real gap-fillers for SDK-rewritten messages that lack a numeric prefix. - Add test case for JSON-wrapped 503 error body containing "overloaded" to strengthen coverage. * fix: unify 503 classification — status 503 → timeout (consistent with isTransientHttpError) resolveFailoverReasonFromError previously mapped status 503 → "rate_limit", while the string-based isTransientHttpError mapped "503 ..." → "timeout". Align both paths: structured {status: 503} now also returns "timeout", matching the existing transient-error convention. Both reasons are failover-eligible, so runtime behavior is unchanged. --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-02-19 18:39:20 -05:00 · 2026-02-20 04:45:09 +08:00
parent 8ae2d5110f
commit 2af3415fac
4 changed files with 23 additions and 1 deletions
--- a/src/agents/failover-error.e2e.test.ts
+++ b/src/agents/failover-error.e2e.test.ts
@@ -13,6 +13,7 @@ describe("failover-error", () => {
    expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
    expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
    expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
+    expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
  });

  it("infers format errors from error messages", () => {
--- a/src/agents/failover-error.ts
+++ b/src/agents/failover-error.ts
@@ -161,6 +161,9 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n
  if (status === 408) {
    return "timeout";
  }
+  if (status === 503) {
+    return "timeout";
+  }
  if (status === 400) {
    return "format";
  }
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts
@@ -348,4 +348,17 @@ describe("classifyFailoverReason", () => {
      "rate_limit",
    );
  });
+  it("classifies provider high-demand / service-unavailable messages as rate_limit", () => {
+    expect(
+      classifyFailoverReason(
+        "This model is currently experiencing high demand. Please try again later.",
+      ),
+    ).toBe("rate_limit");
+    expect(classifyFailoverReason("LLM error: service unavailable")).toBe("rate_limit");
+    expect(
+      classifyFailoverReason(
+        '{"error":{"code":503,"message":"The model is overloaded. Please try later","status":"UNAVAILABLE"}}',
+      ),
+    ).toBe("rate_limit");
+  });
 });
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -583,7 +583,12 @@ const ERROR_PATTERNS = {
    "resource_exhausted",
    "usage limit",
  ],
-  overloaded: [/overloaded_error|"type"\s*:\s*"overloaded_error"/i, "overloaded"],
+  overloaded: [
+    /overloaded_error|"type"\s*:\s*"overloaded_error"/i,
+    "overloaded",
+    "service unavailable",
+    "high demand",
+  ],
  timeout: [
    "timeout",
    "timed out",