diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fed92e9fb..3f2113a455 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ ### Fixes - Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests. +- Auto-reply: enforce `` tag for all reasoning-tag providers (Gemini Antigravity, MiniMax, etc.), not just Ollama. (#801 — thanks @mcinteerj) - Models: normalize `${ENV_VAR}` apiKey config values and auto-fill missing provider `apiKey` from env/auth when custom provider models are configured (fixes MiniMax “Unknown model” on fresh installs). - Models/Tools: include `MiniMax-VL-01` in implicit MiniMax provider so image pairing uses a real vision model. - Telegram: show typing indicator in General forum topics. (#779) — thanks @azade-c. diff --git a/src/auto-reply/reply.reasoning-tags.test.ts b/src/auto-reply/reply.reasoning-tags.test.ts new file mode 100644 index 0000000000..4b9ed3b0ad --- /dev/null +++ b/src/auto-reply/reply.reasoning-tags.test.ts @@ -0,0 +1,83 @@ +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js"; +import { loadModelCatalog } from "../agents/model-catalog.js"; +import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; +import { getReplyFromConfig } from "./reply.js"; + +vi.mock("../agents/pi-embedded.js", () => ({ + abortEmbeddedPiRun: vi.fn().mockReturnValue(false), + runEmbeddedPiAgent: vi.fn(), + queueEmbeddedPiMessage: vi.fn().mockReturnValue(false), + resolveEmbeddedSessionLane: (key: string) => + `session:${key.trim() || "main"}`, + isEmbeddedPiRunActive: vi.fn().mockReturnValue(false), + isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false), +})); + +vi.mock("../agents/model-catalog.js", () => ({ + loadModelCatalog: vi.fn(), +})); + +async function withTempHome(fn: (home: string) => Promise): Promise { + return withTempHomeBase( + async (home) => { + return await fn(home); + }, + { prefix: "clawdbot-reasoning-tags-" }, + ); +} + +describe("reasoning tag enforcement", () => { + const reasoningModel = "google-antigravity/gemini-3"; + + beforeEach(() => { + vi.mocked(runEmbeddedPiAgent).mockReset(); + vi.mocked(loadModelCatalog).mockResolvedValue([ + { id: "gemini-3", name: "Gemini 3", provider: "google-antigravity" }, + ]); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("sets enforceFinalTag for providers that require reasoning tags", async () => { + await withTempHome(async (home) => { + vi.mocked(runEmbeddedPiAgent).mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { + durationMs: 1, + agentMeta: { + sessionId: "s", + provider: "google-antigravity", + model: "gemini-3", + }, + }, + }); + + await getReplyFromConfig( + { Body: "hello", From: "+1999", To: "+2000" }, + {}, + { + agents: { + defaults: { + model: reasoningModel, + models: { [reasoningModel]: {} }, + workspace: path.join(home, "clawd"), + }, + }, + whatsapp: { allowFrom: ["*"] }, + session: { store: path.join(home, "sessions.json") }, + }, + ); + + expect(runEmbeddedPiAgent).toHaveBeenCalledTimes(1); + const args = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0]; + expect(args?.enforceFinalTag).toBe(true); + expect(args?.provider).toBe("google-antigravity"); + }); + }); +}); diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 90aaf7e430..963bf622a2 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -849,7 +849,8 @@ export async function getReplyFromConfig( formatModelSwitchEvent, agentCfg, modelState: { - resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel, + resolveDefaultThinkingLevel: async () => + (await modelState.resolveDefaultThinkingLevel()) ?? "off", allowedModelKeys: modelState.allowedModelKeys, allowedModelCatalog: modelState.allowedModelCatalog, resetModelOverride: modelState.resetModelOverride, diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index 26d9814162..4103051045 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -565,7 +565,8 @@ export async function runReplyAgent(params: { } text = stripped.text; } - if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) return { skip: true }; + if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) + return { skip: true }; return { text, skip: false }; }; const handlePartialForTyping = async ( @@ -713,8 +714,6 @@ export async function runReplyAgent(params: { blockStreamingEnabled && opts?.onBlockReply ? async (payload) => { const { text, skip } = normalizeStreamingText(payload); - const hasMedia = (payload.mediaUrls?.length ?? 0) > 0; - if (skip && !hasMedia) return; const taggedPayload = applyReplyTagsToPayload( { text, @@ -723,6 +722,10 @@ export async function runReplyAgent(params: { }, sessionCtx.MessageSid, ); + const hasMedia = + Boolean(taggedPayload.mediaUrl) || + (taggedPayload.mediaUrls?.length ?? 0) > 0; + if (skip && !hasMedia) return; // Let through payloads with audioAsVoice flag even if empty (need to track it) if ( !isRenderablePayload(taggedPayload) && @@ -737,9 +740,6 @@ export async function runReplyAgent(params: { }, ); const cleaned = parsed.text || undefined; - const hasMedia = - Boolean(taggedPayload.mediaUrl) || - (taggedPayload.mediaUrls?.length ?? 0) > 0; // Skip empty payloads unless they have audioAsVoice flag (need to track it) if ( !cleaned && diff --git a/src/auto-reply/reply/directive-handling.ts b/src/auto-reply/reply/directive-handling.ts index 658425b125..afeb5cbabd 100644 --- a/src/auto-reply/reply/directive-handling.ts +++ b/src/auto-reply/reply/directive-handling.ts @@ -635,7 +635,9 @@ export async function applyInlineDirectivesFastLane(params: { resolveDefaultThinkingLevel: () => Promise; allowedModelKeys: Set; allowedModelCatalog: Awaited< - ReturnType + ReturnType< + typeof import("../../agents/model-catalog.js").loadModelCatalog + > >; resetModelOverride: boolean; }; @@ -1357,7 +1359,9 @@ export async function handleDirectiveOnly(params: { } } if (directives.hasQueueDirective && directives.queueMode) { - parts.push(formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`)); + parts.push( + formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`), + ); } else if (directives.hasQueueDirective && directives.queueReset) { parts.push(formatDirectiveAck("Queue mode reset to default.")); } @@ -1373,7 +1377,9 @@ export async function handleDirectiveOnly(params: { parts.push(formatDirectiveAck(`Queue cap set to ${directives.cap}.`)); } if (directives.hasQueueDirective && directives.dropPolicy) { - parts.push(formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`)); + parts.push( + formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`), + ); } const ack = parts.join(" ").trim(); if (!ack && directives.hasStatusDirective) return undefined;