From d224776ffbb16e87600c7d7e52135e41f3ee343a Mon Sep 17 00:00:00 2001
From: sebslight <19554889+sebslight@users.noreply.github.com>
Date: Mon, 16 Feb 2026 08:07:14 -0500
Subject: [PATCH] refactor(agents): extract cooldown probe decision helper

---
 src/agents/model-fallback.probe.test.ts | 81 ++++++++++---------------
 src/agents/model-fallback.ts            | 72 +++++++++++++++-------
 2 files changed, 82 insertions(+), 71 deletions(-)
diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts
index b51c6bad53..bc8ffe704c 100644
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -284,42 +284,6 @@ describe("runWithModelFallback – probe logic", () => {
     expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
   });
 
-  it("single candidate (no fallbacks) → no probe, normal skip behavior", async () => {
-    const cfg = makeCfg({
-      agents: {
-        defaults: {
-          model: {
-            primary: "openai/gpt-4.1-mini",
-            fallbacks: [], // no fallbacks
-          },
-        },
-      },
-    } as Partial<OpenClawConfig>);
-
-    // Cooldown expires within probe margin
-    const almostExpired = NOW + 30 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
-
-    const run = vi.fn().mockResolvedValue("should-not-probe");
-
-    // With single candidate + hasFallbackCandidates === false,
-    // shouldProbe is false → skip with rate_limit
-    await expect(
-      runWithModelFallback({
-        cfg,
-        provider: "openai",
-        model: "gpt-4.1-mini",
-        fallbacksOverride: [],
-        run,
-      }),
-    ).rejects.toThrow();
-
-    // run should still be called once (single candidate, no fallbacks = try it directly?
-    // Actually with all profiles in cooldown and no fallback candidates,
-    // it skips the primary and throws "all candidates exhausted"
-    // Let's verify the attempt shows rate_limit
-  });
-
   it("single candidate skips with rate_limit and exhausts candidates", async () => {
     const cfg = makeCfg({
       agents: {
@@ -332,27 +296,48 @@ describe("runWithModelFallback – probe logic", () => {
       },
     } as Partial<OpenClawConfig>);
 
-    // Cooldown within probe margin — but probe only applies when hasFallbackCandidates
     const almostExpired = NOW + 30 * 1000;
     mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
 
     const run = vi.fn().mockResolvedValue("unreachable");
 
-    try {
-      await runWithModelFallback({
+    await expect(
+      runWithModelFallback({
         cfg,
         provider: "openai",
         model: "gpt-4.1-mini",
         fallbacksOverride: [],
         run,
-      });
-      // Should not reach here
-      expect.unreachable("should have thrown");
-    } catch {
-      // With no fallbacks and all profiles in cooldown,
-      // shouldProbe = isPrimary && hasFallbackCandidates(false) && ... = false
-      // So it skips, then exhausts all candidates
-      expect(run).not.toHaveBeenCalled();
-    }
+      }),
+    ).rejects.toThrow("All models failed");
+
+    expect(run).not.toHaveBeenCalled();
+  });
+
+  it("scopes probe throttling by agentDir to avoid cross-agent suppression", async () => {
+    const cfg = makeCfg();
+    const almostExpired = NOW + 30 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+
+    const run = vi.fn().mockResolvedValue("probed-ok");
+
+    await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      agentDir: "/tmp/agent-a",
+      run,
+    });
+
+    await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      agentDir: "/tmp/agent-b",
+      run,
+    });
+
+    expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
+    expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini");
   });
 });
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 09c84a0606..6e24fbda17 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -219,12 +219,47 @@ function resolveFallbackCandidates(params: {
 }
 
 const lastProbeAttempt = new Map<string, number>();
-const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per provider
+const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per key
+const PROBE_MARGIN_MS = 2 * 60 * 1000;
+const PROBE_SCOPE_DELIMITER = "::";
+
+function resolveProbeThrottleKey(provider: string, agentDir?: string): string {
+  const scope = String(agentDir ?? "").trim();
+  return scope ? `${scope}${PROBE_SCOPE_DELIMITER}${provider}` : provider;
+}
+
+function shouldProbePrimaryDuringCooldown(params: {
+  isPrimary: boolean;
+  hasFallbackCandidates: boolean;
+  now: number;
+  throttleKey: string;
+  authStore: ReturnType<typeof ensureAuthProfileStore>;
+  profileIds: string[];
+}): boolean {
+  if (!params.isPrimary || !params.hasFallbackCandidates) {
+    return false;
+  }
+
+  const lastProbe = lastProbeAttempt.get(params.throttleKey) ?? 0;
+  if (params.now - lastProbe < MIN_PROBE_INTERVAL_MS) {
+    return false;
+  }
+
+  const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds);
+  if (soonest === null || !Number.isFinite(soonest)) {
+    return true;
+  }
+
+  // Probe when cooldown already expired or within the configured margin.
+  return params.now >= soonest - PROBE_MARGIN_MS;
+}
 
 /** @internal – exposed for unit tests only */
 export const _probeThrottleInternals = {
   lastProbeAttempt,
   MIN_PROBE_INTERVAL_MS,
+  PROBE_MARGIN_MS,
+  resolveProbeThrottleKey,
 } as const;
 
 export async function runWithModelFallback<T>(params: {
@@ -264,27 +299,18 @@ export async function runWithModelFallback<T>(params: {
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
         // For the primary model (i === 0), probe it if the soonest cooldown
-        // expiry is close (within 2 minutes) or already past. This avoids
-        // staying on a fallback model long after the rate-limit window clears
-        // when exponential backoff cooldowns exceed the actual provider limit.
-        const isPrimary = i === 0;
-        const shouldProbe =
-          isPrimary &&
-          hasFallbackCandidates &&
-          (() => {
-            const lastProbe = lastProbeAttempt.get(candidate.provider) ?? 0;
-            if (Date.now() - lastProbe < MIN_PROBE_INTERVAL_MS) {
-              return false; // throttled
-            }
-            const soonest = getSoonestCooldownExpiry(authStore, profileIds);
-            if (soonest === null || !Number.isFinite(soonest)) {
-              return true;
-            }
-            const now = Date.now();
-            // Probe when cooldown already expired or within 2 min of expiry
-            const PROBE_MARGIN_MS = 2 * 60 * 1000;
-            return now >= soonest - PROBE_MARGIN_MS;
-          })();
+        // expiry is close or already past. This avoids staying on a fallback
+        // model long after the real rate-limit window clears.
+        const now = Date.now();
+        const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir);
+        const shouldProbe = shouldProbePrimaryDuringCooldown({
+          isPrimary: i === 0,
+          hasFallbackCandidates,
+          now,
+          throttleKey: probeThrottleKey,
+          authStore,
+          profileIds,
+        });
         if (!shouldProbe) {
           // Skip without attempting
           attempts.push({
@@ -298,7 +324,7 @@ export async function runWithModelFallback<T>(params: {
         // Primary model probe: attempt it despite cooldown to detect recovery.
         // If it fails, the error is caught below and we fall through to the
         // next candidate as usual.
-        lastProbeAttempt.set(candidate.provider, Date.now());
+        lastProbeAttempt.set(probeThrottleKey, now);
       }
     }
     try {