fix: enforce reasoning tags across providers (#801) (thanks @mcinteerj)

This commit is contained in:
Peter Steinberger
2026-01-12 22:58:44 +00:00
parent cd169aceb5
commit 382681178b
5 changed files with 101 additions and 10 deletions

View File

@@ -34,6 +34,7 @@
### Fixes
- Auto-reply: inline `/status` now honors allowlists (authorized stripped + replied inline; unauthorized leaves text for the agent) to match command gating tests.
- Auto-reply: enforce `<final>` tag for all reasoning-tag providers (Gemini Antigravity, MiniMax, etc.), not just Ollama. (#801 — thanks @mcinteerj)
- Models: normalize `${ENV_VAR}` apiKey config values and auto-fill missing provider `apiKey` from env/auth when custom provider models are configured (fixes MiniMax “Unknown model” on fresh installs).
- Models/Tools: include `MiniMax-VL-01` in implicit MiniMax provider so image pairing uses a real vision model.
- Telegram: show typing indicator in General forum topics. (#779) — thanks @azade-c.

View File

@@ -0,0 +1,83 @@
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import { getReplyFromConfig } from "./reply.js";
vi.mock("../agents/pi-embedded.js", () => ({
abortEmbeddedPiRun: vi.fn().mockReturnValue(false),
runEmbeddedPiAgent: vi.fn(),
queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
resolveEmbeddedSessionLane: (key: string) =>
`session:${key.trim() || "main"}`,
isEmbeddedPiRunActive: vi.fn().mockReturnValue(false),
isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false),
}));
vi.mock("../agents/model-catalog.js", () => ({
loadModelCatalog: vi.fn(),
}));
async function withTempHome<T>(fn: (home: string) => Promise<T>): Promise<T> {
return withTempHomeBase(
async (home) => {
return await fn(home);
},
{ prefix: "clawdbot-reasoning-tags-" },
);
}
describe("reasoning tag enforcement", () => {
const reasoningModel = "google-antigravity/gemini-3";
beforeEach(() => {
vi.mocked(runEmbeddedPiAgent).mockReset();
vi.mocked(loadModelCatalog).mockResolvedValue([
{ id: "gemini-3", name: "Gemini 3", provider: "google-antigravity" },
]);
});
afterEach(() => {
vi.clearAllMocks();
});
it("sets enforceFinalTag for providers that require reasoning tags", async () => {
await withTempHome(async (home) => {
vi.mocked(runEmbeddedPiAgent).mockResolvedValue({
payloads: [{ text: "ok" }],
meta: {
durationMs: 1,
agentMeta: {
sessionId: "s",
provider: "google-antigravity",
model: "gemini-3",
},
},
});
await getReplyFromConfig(
{ Body: "hello", From: "+1999", To: "+2000" },
{},
{
agents: {
defaults: {
model: reasoningModel,
models: { [reasoningModel]: {} },
workspace: path.join(home, "clawd"),
},
},
whatsapp: { allowFrom: ["*"] },
session: { store: path.join(home, "sessions.json") },
},
);
expect(runEmbeddedPiAgent).toHaveBeenCalledTimes(1);
const args = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
expect(args?.enforceFinalTag).toBe(true);
expect(args?.provider).toBe("google-antigravity");
});
});
});

View File

@@ -849,7 +849,8 @@ export async function getReplyFromConfig(
formatModelSwitchEvent,
agentCfg,
modelState: {
resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel,
resolveDefaultThinkingLevel: async () =>
(await modelState.resolveDefaultThinkingLevel()) ?? "off",
allowedModelKeys: modelState.allowedModelKeys,
allowedModelCatalog: modelState.allowedModelCatalog,
resetModelOverride: modelState.resetModelOverride,

View File

@@ -565,7 +565,8 @@ export async function runReplyAgent(params: {
}
text = stripped.text;
}
if (isSilentReplyText(text, SILENT_REPLY_TOKEN)) return { skip: true };
if (isSilentReplyText(text, SILENT_REPLY_TOKEN))
return { skip: true };
return { text, skip: false };
};
const handlePartialForTyping = async (
@@ -713,8 +714,6 @@ export async function runReplyAgent(params: {
blockStreamingEnabled && opts?.onBlockReply
? async (payload) => {
const { text, skip } = normalizeStreamingText(payload);
const hasMedia = (payload.mediaUrls?.length ?? 0) > 0;
if (skip && !hasMedia) return;
const taggedPayload = applyReplyTagsToPayload(
{
text,
@@ -723,6 +722,10 @@ export async function runReplyAgent(params: {
},
sessionCtx.MessageSid,
);
const hasMedia =
Boolean(taggedPayload.mediaUrl) ||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
if (skip && !hasMedia) return;
// Let through payloads with audioAsVoice flag even if empty (need to track it)
if (
!isRenderablePayload(taggedPayload) &&
@@ -737,9 +740,6 @@ export async function runReplyAgent(params: {
},
);
const cleaned = parsed.text || undefined;
const hasMedia =
Boolean(taggedPayload.mediaUrl) ||
(taggedPayload.mediaUrls?.length ?? 0) > 0;
// Skip empty payloads unless they have audioAsVoice flag (need to track it)
if (
!cleaned &&

View File

@@ -635,7 +635,9 @@ export async function applyInlineDirectivesFastLane(params: {
resolveDefaultThinkingLevel: () => Promise<ThinkLevel>;
allowedModelKeys: Set<string>;
allowedModelCatalog: Awaited<
ReturnType<typeof import("../../agents/model-catalog.js").loadModelCatalog>
ReturnType<
typeof import("../../agents/model-catalog.js").loadModelCatalog
>
>;
resetModelOverride: boolean;
};
@@ -1357,7 +1359,9 @@ export async function handleDirectiveOnly(params: {
}
}
if (directives.hasQueueDirective && directives.queueMode) {
parts.push(formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`));
parts.push(
formatDirectiveAck(`Queue mode set to ${directives.queueMode}.`),
);
} else if (directives.hasQueueDirective && directives.queueReset) {
parts.push(formatDirectiveAck("Queue mode reset to default."));
}
@@ -1373,7 +1377,9 @@ export async function handleDirectiveOnly(params: {
parts.push(formatDirectiveAck(`Queue cap set to ${directives.cap}.`));
}
if (directives.hasQueueDirective && directives.dropPolicy) {
parts.push(formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`));
parts.push(
formatDirectiveAck(`Queue drop set to ${directives.dropPolicy}.`),
);
}
const ack = parts.join(" ").trim();
if (!ack && directives.hasStatusDirective) return undefined;