From 274ec379b07d8b80424915cbb081d01e80d15e19 Mon Sep 17 00:00:00 2001
From: Vignesh Natarajan <vigneshnatarajan92@gmail.com>
Date: Thu, 12 Feb 2026 17:08:01 -0800
Subject: [PATCH] fix: avoid auth cooldowns for format-only failures

---
 .../run.overflow-compaction.test.ts           | 44 +++++++++++++++++++
 src/agents/pi-embedded-runner/run.ts          | 14 ++++--
 src/agents/session-transcript-repair.test.ts  | 17 +++++++
 src/agents/session-transcript-repair.ts       | 15 ++++---
 4 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
index df85d888cf..c76ccc376d 100644
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
@@ -158,6 +158,8 @@ vi.mock("../pi-embedded-helpers.js", async () => {
 });
 
 import type { EmbeddedRunAttemptResult } from "./run/types.js";
+import { markAuthProfileFailure } from "../auth-profiles.js";
+import * as piEmbeddedHelpers from "../pi-embedded-helpers.js";
 import { compactEmbeddedPiSessionDirect } from "./compact.js";
 import { log } from "./logger.js";
 import { runEmbeddedPiAgent } from "./run.js";
@@ -173,6 +175,9 @@ const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOve
 const mockedTruncateOversizedToolResultsInSession = vi.mocked(
   truncateOversizedToolResultsInSession,
 );
+const mockedMarkAuthProfileFailure = vi.mocked(markAuthProfileFailure);
+const mockedClassifyFailoverReason = vi.mocked(piEmbeddedHelpers.classifyFailoverReason);
+const mockedIsFailoverAssistantError = vi.mocked(piEmbeddedHelpers.isFailoverAssistantError);
 
 function makeAttemptResult(
   overrides: Partial<EmbeddedRunAttemptResult> = {},
@@ -433,4 +438,43 @@ describe("overflow compaction in run loop", () => {
     expect(mockedCompactDirect).not.toHaveBeenCalled();
     expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
   });
+
+  it("does not cooldown auth profile for assistant format errors", async () => {
+    mockedClassifyFailoverReason.mockReturnValue("format");
+    mockedIsFailoverAssistantError.mockReturnValue(true);
+
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        promptError: null,
+        lastAssistant: {
+          stopReason: "error",
+          errorMessage: "Cloud Code Assist format error",
+        } as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    const result = await runEmbeddedPiAgent(baseParams);
+
+    expect(result.meta.error).toBeUndefined();
+    expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not cooldown auth profile for prompt format errors", async () => {
+    mockedClassifyFailoverReason.mockReturnValue("format");
+
+    mockedRunEmbeddedAttempt.mockResolvedValueOnce(
+      makeAttemptResult({
+        promptError: new Error("Cloud Code Assist format error"),
+        lastAssistant: {
+          stopReason: "error",
+          errorMessage: "Cloud Code Assist format error",
+        } as EmbeddedRunAttemptResult["lastAssistant"],
+      }),
+    );
+
+    await expect(runEmbeddedPiAgent(baseParams)).rejects.toThrow("Cloud Code Assist format error");
+    expect(mockedMarkAuthProfileFailure).not.toHaveBeenCalled();
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1);
+  });
 });
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index bcb448f573..0d1a3a9b95 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -678,7 +678,12 @@ export async function runEmbeddedPiAgent(
             // NOT a provider/auth issue. Cooling down the profile cascades failures to
             // all sessions sharing the same auth profile.
             // See: https://github.com/openclaw/openclaw/issues/15037
-            if (promptFailoverReason && promptFailoverReason !== "timeout" && promptFailoverReason !== "format" && lastProfileId) {
+            if (
+              promptFailoverReason &&
+              promptFailoverReason !== "timeout" &&
+              promptFailoverReason !== "format" &&
+              lastProfileId
+            ) {
               await markAuthProfileFailure({
                 store: authStore,
                 profileId: lastProfileId,
@@ -758,8 +763,11 @@ export async function runEmbeddedPiAgent(
             );
           }
 
-          // Treat timeout as potential rate limit (Antigravity hangs on rate limit)
-          const shouldRotate = (!aborted && failoverFailure) || timedOut;
+          // Treat timeout as potential rate limit (Antigravity hangs on rate limit).
+          // Don't rotate profiles for format errors; those are usually session input
+          // issues and shouldn't affect shared auth profile health.
+          const shouldRotate =
+            timedOut || (!aborted && failoverFailure && assistantFailoverReason !== "format");
 
           if (shouldRotate) {
             if (lastProfileId) {
diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts
index 3bfc1ea1fc..824b45dabb 100644
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@@ -138,6 +138,23 @@ describe("sanitizeToolUseResultPairing", () => {
     expect(result.messages[0]?.role).toBe("user");
   });
 
+  it("keeps errored assistant text-only messages unchanged", () => {
+    const input = [
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "I ran into trouble and explained it." }],
+        stopReason: "error",
+      },
+      { role: "user", content: "okay" },
+    ] as AgentMessage[];
+
+    const result = repairToolUseResultPairing(input);
+
+    // No tool calls were removed, so no transcript rewrite should happen.
+    expect(result.messages).toBe(input);
+    expect(result.messages).toHaveLength(2);
+  });
+
   it("strips tool_use blocks but keeps text from errored assistant messages", () => {
     // When an errored assistant message has both text and tool_use blocks,
     // strip the tool_use blocks but keep the text content.
diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts
index 97d7c39353..281224a544 100644
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@@ -227,15 +227,20 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
     if (stopReason === "error" || stopReason === "aborted") {
       if (Array.isArray(assistant.content)) {
         const nonToolContent = assistant.content.filter((block) => {
-          if (!block || typeof block !== "object") return true;
-          const rec = block as { type?: unknown };
-          return !TOOL_CALL_TYPES.has(rec.type as string);
+          if (!block || typeof block !== "object") {
+            return true;
+          }
+          return !isToolCallBlock(block);
         });
+        const removedToolCalls = nonToolContent.length !== assistant.content.length;
         if (nonToolContent.length > 0) {
           out.push({ ...msg, content: nonToolContent } as AgentMessage);
         }
-        // If all content was tool calls, drop the entire message
-        changed = true;
+        if (removedToolCalls) {
+          // If all content was tool calls, drop the entire message.
+          // If only non-tool blocks remain, keep the message unchanged.
+          changed = true;
+        }
       } else {
         out.push(msg);
       }