From d224776ffbb16e87600c7d7e52135e41f3ee343a Mon Sep 17 00:00:00 2001 From: sebslight <19554889+sebslight@users.noreply.github.com> Date: Mon, 16 Feb 2026 08:07:14 -0500 Subject: [PATCH] refactor(agents): extract cooldown probe decision helper --- src/agents/model-fallback.probe.test.ts | 81 ++++++++++--------------- src/agents/model-fallback.ts | 72 +++++++++++++++------- 2 files changed, 82 insertions(+), 71 deletions(-) diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index b51c6bad53..bc8ffe704c 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -284,42 +284,6 @@ describe("runWithModelFallback – probe logic", () => { expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); }); - it("single candidate (no fallbacks) → no probe, normal skip behavior", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "openai/gpt-4.1-mini", - fallbacks: [], // no fallbacks - }, - }, - }, - } as Partial); - - // Cooldown expires within probe margin - const almostExpired = NOW + 30 * 1000; - mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); - - const run = vi.fn().mockResolvedValue("should-not-probe"); - - // With single candidate + hasFallbackCandidates === false, - // shouldProbe is false → skip with rate_limit - await expect( - runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-4.1-mini", - fallbacksOverride: [], - run, - }), - ).rejects.toThrow(); - - // run should still be called once (single candidate, no fallbacks = try it directly? - // Actually with all profiles in cooldown and no fallback candidates, - // it skips the primary and throws "all candidates exhausted" - // Let's verify the attempt shows rate_limit - }); - it("single candidate skips with rate_limit and exhausts candidates", async () => { const cfg = makeCfg({ agents: { @@ -332,27 +296,48 @@ describe("runWithModelFallback – probe logic", () => { }, } as Partial); - // Cooldown within probe margin — but probe only applies when hasFallbackCandidates const almostExpired = NOW + 30 * 1000; mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); const run = vi.fn().mockResolvedValue("unreachable"); - try { - await runWithModelFallback({ + await expect( + runWithModelFallback({ cfg, provider: "openai", model: "gpt-4.1-mini", fallbacksOverride: [], run, - }); - // Should not reach here - expect.unreachable("should have thrown"); - } catch { - // With no fallbacks and all profiles in cooldown, - // shouldProbe = isPrimary && hasFallbackCandidates(false) && ... = false - // So it skips, then exhausts all candidates - expect(run).not.toHaveBeenCalled(); - } + }), + ).rejects.toThrow("All models failed"); + + expect(run).not.toHaveBeenCalled(); + }); + + it("scopes probe throttling by agentDir to avoid cross-agent suppression", async () => { + const cfg = makeCfg(); + const almostExpired = NOW + 30 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + + const run = vi.fn().mockResolvedValue("probed-ok"); + + await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + agentDir: "/tmp/agent-a", + run, + }); + + await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + agentDir: "/tmp/agent-b", + run, + }); + + expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); + expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini"); }); }); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 09c84a0606..6e24fbda17 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -219,12 +219,47 @@ function resolveFallbackCandidates(params: { } const lastProbeAttempt = new Map(); -const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per provider +const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per key +const PROBE_MARGIN_MS = 2 * 60 * 1000; +const PROBE_SCOPE_DELIMITER = "::"; + +function resolveProbeThrottleKey(provider: string, agentDir?: string): string { + const scope = String(agentDir ?? "").trim(); + return scope ? `${scope}${PROBE_SCOPE_DELIMITER}${provider}` : provider; +} + +function shouldProbePrimaryDuringCooldown(params: { + isPrimary: boolean; + hasFallbackCandidates: boolean; + now: number; + throttleKey: string; + authStore: ReturnType; + profileIds: string[]; +}): boolean { + if (!params.isPrimary || !params.hasFallbackCandidates) { + return false; + } + + const lastProbe = lastProbeAttempt.get(params.throttleKey) ?? 0; + if (params.now - lastProbe < MIN_PROBE_INTERVAL_MS) { + return false; + } + + const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds); + if (soonest === null || !Number.isFinite(soonest)) { + return true; + } + + // Probe when cooldown already expired or within the configured margin. + return params.now >= soonest - PROBE_MARGIN_MS; +} /** @internal – exposed for unit tests only */ export const _probeThrottleInternals = { lastProbeAttempt, MIN_PROBE_INTERVAL_MS, + PROBE_MARGIN_MS, + resolveProbeThrottleKey, } as const; export async function runWithModelFallback(params: { @@ -264,27 +299,18 @@ export async function runWithModelFallback(params: { if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. // For the primary model (i === 0), probe it if the soonest cooldown - // expiry is close (within 2 minutes) or already past. This avoids - // staying on a fallback model long after the rate-limit window clears - // when exponential backoff cooldowns exceed the actual provider limit. - const isPrimary = i === 0; - const shouldProbe = - isPrimary && - hasFallbackCandidates && - (() => { - const lastProbe = lastProbeAttempt.get(candidate.provider) ?? 0; - if (Date.now() - lastProbe < MIN_PROBE_INTERVAL_MS) { - return false; // throttled - } - const soonest = getSoonestCooldownExpiry(authStore, profileIds); - if (soonest === null || !Number.isFinite(soonest)) { - return true; - } - const now = Date.now(); - // Probe when cooldown already expired or within 2 min of expiry - const PROBE_MARGIN_MS = 2 * 60 * 1000; - return now >= soonest - PROBE_MARGIN_MS; - })(); + // expiry is close or already past. This avoids staying on a fallback + // model long after the real rate-limit window clears. + const now = Date.now(); + const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir); + const shouldProbe = shouldProbePrimaryDuringCooldown({ + isPrimary: i === 0, + hasFallbackCandidates, + now, + throttleKey: probeThrottleKey, + authStore, + profileIds, + }); if (!shouldProbe) { // Skip without attempting attempts.push({ @@ -298,7 +324,7 @@ export async function runWithModelFallback(params: { // Primary model probe: attempt it despite cooldown to detect recovery. // If it fails, the error is caught below and we fall through to the // next candidate as usual. - lastProbeAttempt.set(candidate.provider, Date.now()); + lastProbeAttempt.set(probeThrottleKey, now); } } try {