fix: enforce reasoning tags across providers (#801) (thanks @mcinteerj)

2026-02-19 18:39:20 -05:00 · 2026-01-12 22:58:44 +00:00
parent cd169aceb5
commit 382681178b
5 changed files with 101 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@

 ### Fixes
 - Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.
+- Auto-reply: enforce `<final>` tag for all reasoning-tag providers (Gemini Antigravity, MiniMax, etc.), not just Ollama. (#801 — thanks @mcinteerj)
 - Models: normalize `${ENV_VAR}` apiKey config values and auto-fill missing provider `apiKey` from env/auth when custom provider models are configured (fixes MiniMax “Unknown model” on fresh installs).
 - Models/Tools: include `MiniMax-VL-01` in implicit MiniMax provider so image pairing uses a real vision model.
 - Telegram: show typing indicator in General forum topics. (#779) — thanks @azade-c.
--- a/src/auto-reply/reply.reasoning-tags.test.ts
+++ b/src/auto-reply/reply.reasoning-tags.test.ts
@@ -0,0 +1,83 @@
+import path from "node:path";
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
+import { loadModelCatalog } from "../agents/model-catalog.js";
+import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
+import { getReplyFromConfig } from "./reply.js";
+
+vi.mock("../agents/pi-embedded.js", () => ({
+  abortEmbeddedPiRun: vi.fn().mockReturnValue(false),
+  runEmbeddedPiAgent: vi.fn(),
+  queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
+  resolveEmbeddedSessionLane: (key: string) =>
+    `session:${key.trim() || "main"}`,
+  isEmbeddedPiRunActive: vi.fn().mockReturnValue(false),
+  isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false),
+}));
+
+vi.mock("../agents/model-catalog.js", () => ({
+  loadModelCatalog: vi.fn(),
+}));
+
+async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
+  return withTempHomeBase(
+    async (home) => {
+      return await fn(home);
+    },
+    { prefix: "clawdbot-reasoning-tags-" },
+  );
+}
+
+describe("reasoning tag enforcement", () => {
+  const reasoningModel = "google-antigravity/gemini-3";
+
+  beforeEach(() => {
+    vi.mocked(runEmbeddedPiAgent).mockReset();
+    vi.mocked(loadModelCatalog).mockResolvedValue([
+      { id: "gemini-3", name: "Gemini 3", provider: "google-antigravity" },
+    ]);
+  });
+
+  afterEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("sets enforceFinalTag for providers that require reasoning tags", async () => {
+    await withTempHome(async (home) => {
+      vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
+        payloads: [{ text: "ok" }],
+        meta: {
+          durationMs: 1,
+          agentMeta: {
+            sessionId: "s",
+            provider: "google-antigravity",
+            model: "gemini-3",
+          },
+        },
+      });
+
+      await getReplyFromConfig(
+        { Body: "hello", From: "+1999", To: "+2000" },
+        {},
+        {
+          agents: {
+            defaults: {
+              model: reasoningModel,
+              models: { [reasoningModel]: {} },
+              workspace: path.join(home, "clawd"),
+            },
+          },
+          whatsapp: { allowFrom: ["*"] },
+          session: { store: path.join(home, "sessions.json") },
+        },
+      );
+
+      expect(runEmbeddedPiAgent).toHaveBeenCalledTimes(1);
+      const args = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
+      expect(args?.enforceFinalTag).toBe(true);
+      expect(args?.provider).toBe("google-antigravity");
+    });
+  });
+});
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -849,7 +849,8 @@ export async function getReplyFromConfig(
      formatModelSwitchEvent,
      agentCfg,
      modelState: {
-        resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel,
+        resolveDefaultThinkingLevel: async () =>
+          (await modelState.resolveDefaultThinkingLevel()) ?? "off",
        allowedModelKeys: modelState.allowedModelKeys,
        allowedModelCatalog: modelState.allowedModelCatalog,
        resetModelOverride: modelState.resetModelOverride,
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -565,7 +565,8 @@ export async function runReplyAgent(params: {
            }
            text = stripped.text;
          }
-          if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) return { skip: true };
+          if (isSilentReplyText(text, SILENT_REPLY_TOKEN))
+            return { skip: true };
          return { text, skip: false };
        };
        const handlePartialForTyping = async (
@@ -713,8 +714,6 @@ export async function runReplyAgent(params: {
                blockStreamingEnabled && opts?.onBlockReply
                  ? async (payload) => {
                      const { text, skip } = normalizeStreamingText(payload);
-                      const hasMedia = (payload.mediaUrls?.length ?? 0) > 0;
-                      if (skip && !hasMedia) return;
                      const taggedPayload = applyReplyTagsToPayload(
                        {
                          text,
@@ -723,6 +722,10 @@ export async function runReplyAgent(params: {
                        },
                        sessionCtx.MessageSid,
                      );
+                      const hasMedia =
+                        Boolean(taggedPayload.mediaUrl) ||
+                        (taggedPayload.mediaUrls?.length ?? 0) > 0;
+                      if (skip && !hasMedia) return;
                      // Let through payloads with audioAsVoice flag even if empty (need to track it)
                      if (
                        !isRenderablePayload(taggedPayload) &&
@@ -737,9 +740,6 @@ export async function runReplyAgent(params: {
                        },
                      );
                      const cleaned = parsed.text || undefined;
-                      const hasMedia =
-                        Boolean(taggedPayload.mediaUrl) ||
-                        (taggedPayload.mediaUrls?.length ?? 0) > 0;
                      // Skip empty payloads unless they have audioAsVoice flag (need to track it)
                      if (
                        !cleaned &&
--- a/src/auto-reply/reply/directive-handling.ts
+++ b/src/auto-reply/reply/directive-handling.ts
@@ -635,7 +635,9 @@ export async function applyInlineDirectivesFastLane(params: {
    resolveDefaultThinkingLevel: () => Promise<ThinkLevel>;
    allowedModelKeys: Set<string>;
    allowedModelCatalog: Awaited<
-      ReturnType<typeof import("../../agents/model-catalog.js").loadModelCatalog>
+      ReturnType<
+        typeof import("../../agents/model-catalog.js").loadModelCatalog
+      >
    >;
    resetModelOverride: boolean;
  };
@@ -1357,7 +1359,9 @@ export async function handleDirectiveOnly(params: {
    }
  }
  if (directives.hasQueueDirective && directives.queueMode) {
-    parts.push(formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`));
+    parts.push(
+      formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`),
+    );
  } else if (directives.hasQueueDirective && directives.queueReset) {
    parts.push(formatDirectiveAck("Queue mode reset to default."));
  }
@@ -1373,7 +1377,9 @@ export async function handleDirectiveOnly(params: {
    parts.push(formatDirectiveAck(`Queue cap set to ${directives.cap}.`));
  }
  if (directives.hasQueueDirective && directives.dropPolicy) {
-    parts.push(formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`));
+    parts.push(
+      formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`),
+    );
  }
  const ack = parts.join(" ").trim();
  if (!ack && directives.hasStatusDirective) return undefined;