mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
fix(failover): align abort timeout detection and regressions
This commit is contained in:
@@ -44,6 +44,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + no-progress-blocking ping-pong alternation loops (10/20), coalescing repeated warning spam into threshold buckets (including canonical ping-pong pairs), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
|
||||
- Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
|
||||
- Agents/Failover: classify provider abort stop-reason errors (`Unhandled stop reason: abort`, `stop reason: abort`, `reason: abort`) as timeout-class failures so configured model fallback chains trigger instead of surfacing raw abort failures. (#18618) Thanks @sauerdaniel.
|
||||
- Agents/Context: raise default total bootstrap prompt cap from `24000` to `150000` chars (keeping `bootstrapMaxChars` at `20000`), include total-cap visibility in `/context`, and mark truncation from injected-vs-raw sizes so total-cap clipping is reflected accurately.
|
||||
- Memory/QMD: scope managed collection names per agent and precreate glob-backed collection directories before registration, preventing cross-agent collection clobbering and startup ENOENT failures in fresh workspaces. (#17194) Thanks @jonathanadams96.
|
||||
- Cron: preserve per-job schedule-error isolation in post-run maintenance recompute so malformed sibling jobs no longer abort persistence of successful runs. (#17852) Thanks @pierreeurope.
|
||||
|
||||
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
coerceToFailoverError,
|
||||
describeFailoverError,
|
||||
isTimeoutError,
|
||||
resolveFailoverReasonFromError,
|
||||
} from "./failover-error.js";
|
||||
|
||||
@@ -27,6 +28,22 @@ describe("failover-error", () => {
|
||||
expect(resolveFailoverReasonFromError({ code: "ECONNRESET" })).toBe("timeout");
|
||||
});
|
||||
|
||||
it("infers timeout from abort stop-reason messages", () => {
|
||||
expect(resolveFailoverReasonFromError({ message: "Unhandled stop reason: abort" })).toBe(
|
||||
"timeout",
|
||||
);
|
||||
expect(resolveFailoverReasonFromError({ message: "stop reason: abort" })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ message: "reason: abort" })).toBe("timeout");
|
||||
});
|
||||
|
||||
it("treats AbortError reason=abort as timeout", () => {
|
||||
const err = Object.assign(new Error("aborted"), {
|
||||
name: "AbortError",
|
||||
reason: "reason: abort",
|
||||
});
|
||||
expect(isTimeoutError(err)).toBe(true);
|
||||
});
|
||||
|
||||
it("coerces failover-worthy errors into FailoverError with metadata", () => {
|
||||
const err = coerceToFailoverError("credit balance too low", {
|
||||
provider: "anthropic",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { classifyFailoverReason, type FailoverReason } from "./pi-embedded-helpers.js";
|
||||
|
||||
const TIMEOUT_HINT_RE =
|
||||
/timeout|timed out|deadline exceeded|context deadline exceeded|stop reason:\s*abort|unhandled stop reason:\s*abort/i;
|
||||
/timeout|timed out|deadline exceeded|context deadline exceeded|stop reason:\s*abort|reason:\s*abort|unhandled stop reason:\s*abort/i;
|
||||
const ABORT_TIMEOUT_RE = /request was aborted|request aborted/i;
|
||||
|
||||
export class FailoverError extends Error {
|
||||
|
||||
@@ -400,6 +400,17 @@ describe("runWithModelFallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back on abort errors with reason: abort", async () => {
|
||||
await expectFallsBackToHaiku({
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
firstError: Object.assign(new Error("aborted"), {
|
||||
name: "AbortError",
|
||||
reason: "reason: abort",
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back when message says aborted but error is a timeout", async () => {
|
||||
await expectFallsBackToHaiku({
|
||||
provider: "openai",
|
||||
|
||||
@@ -10,6 +10,7 @@ import {
|
||||
isFailoverErrorMessage,
|
||||
isImageDimensionErrorMessage,
|
||||
isLikelyContextOverflowError,
|
||||
isTimeoutErrorMessage,
|
||||
isTransientHttpError,
|
||||
parseImageDimensionError,
|
||||
parseImageSizeError,
|
||||
@@ -286,6 +287,15 @@ describe("isFailoverErrorMessage", () => {
|
||||
expect(isFailoverErrorMessage(sample)).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it("matches abort stop-reason timeout variants", () => {
|
||||
const samples = ["Unhandled stop reason: abort", "stop reason: abort", "reason: abort"];
|
||||
for (const sample of samples) {
|
||||
expect(isTimeoutErrorMessage(sample)).toBe(true);
|
||||
expect(classifyFailoverReason(sample)).toBe("timeout");
|
||||
expect(isFailoverErrorMessage(sample)).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseImageSizeError", () => {
|
||||
|
||||
Reference in New Issue
Block a user