fix(agents): strip [Historical context: ...] and tool call text from streaming path

- Add [Historical context: ...] marker pattern to stripDowngradedToolCallText - Apply stripDowngradedToolCallText in emitBlockChunk streaming path - Previously only stripBlockTags ran during streaming, leaking [Tool Call: ...] markers to users - Add 7 test cases for the new pattern stripping
2026-02-19 18:39:20 -05:00 · 2026-02-10 23:51:30 +09:00
parent 67d25c6533
commit 7afecce69d
3 changed files with 48 additions and 4 deletions
--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -15,7 +15,7 @@ import {
  normalizeTextForComparison,
 } from "./pi-embedded-helpers.js";
 import { createEmbeddedPiSessionEventHandler } from "./pi-embedded-subscribe.handlers.js";
-import { formatReasoningMessage } from "./pi-embedded-utils.js";
+import { formatReasoningMessage, stripDowngradedToolCallText } from "./pi-embedded-utils.js";
 import { hasNonzeroUsage, normalizeUsage, type UsageLike } from "./usage.js";

 const THINKING_TAG_SCAN_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
@@ -449,7 +449,8 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
      return;
    }
    // Strip <think> and <final> blocks across chunk boundaries to avoid leaking reasoning.
-    const chunk = stripBlockTags(text, state.blockState).trimEnd();
+    // Also strip downgraded tool call text ([Tool Call: ...], [Historical context: ...], etc.).
+    const chunk = stripDowngradedToolCallText(stripBlockTags(text, state.blockState)).trimEnd();
    if (!chunk) {
      return;
    }
--- a/src/agents/pi-embedded-utils.test.ts
+++ b/src/agents/pi-embedded-utils.test.ts
@@ -1,6 +1,10 @@
 import type { AssistantMessage } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
-import { extractAssistantText, formatReasoningMessage } from "./pi-embedded-utils.js";
+import {
+  extractAssistantText,
+  formatReasoningMessage,
+  stripDowngradedToolCallText,
+} from "./pi-embedded-utils.js";

 describe("extractAssistantText", () => {
  it("strips Minimax tool invocation XML from text", () => {
@@ -559,3 +563,39 @@ describe("formatReasoningMessage", () => {
    );
  });
 });
+
+describe("stripDowngradedToolCallText", () => {
+  it("strips [Historical context: ...] blocks", () => {
+    const text = `[Historical context: a different model called tool "exec" with arguments {"command":"git status"}]`;
+    expect(stripDowngradedToolCallText(text)).toBe("");
+  });
+
+  it("preserves text before [Historical context: ...] blocks", () => {
+    const text = `Here is the answer.\n[Historical context: a different model called tool "read"]`;
+    expect(stripDowngradedToolCallText(text)).toBe("Here is the answer.");
+  });
+
+  it("preserves text around [Historical context: ...] blocks", () => {
+    const text = `Before.\n[Historical context: tool call info]\nAfter.`;
+    expect(stripDowngradedToolCallText(text)).toBe("Before.\nAfter.");
+  });
+
+  it("strips multiple [Historical context: ...] blocks", () => {
+    const text = `[Historical context: first tool call]\n[Historical context: second tool call]`;
+    expect(stripDowngradedToolCallText(text)).toBe("");
+  });
+
+  it("strips mixed [Tool Call: ...] and [Historical context: ...] blocks", () => {
+    const text = `Intro.\n[Tool Call: exec (ID: toolu_1)]\nArguments: { "command": "ls" }\n[Historical context: a different model called tool "read"]`;
+    expect(stripDowngradedToolCallText(text)).toBe("Intro.");
+  });
+
+  it("returns text unchanged when no markers are present", () => {
+    const text = "Just a normal response with no markers.";
+    expect(stripDowngradedToolCallText(text)).toBe("Just a normal response with no markers.");
+  });
+
+  it("returns empty string for empty input", () => {
+    expect(stripDowngradedToolCallText("")).toBe("");
+  });
+});
--- a/src/agents/pi-embedded-utils.ts
+++ b/src/agents/pi-embedded-utils.ts
@@ -37,7 +37,7 @@ export function stripDowngradedToolCallText(text: string): string {
  if (!text) {
    return text;
  }
-  if (!/\[Tool (?:Call|Result)/i.test(text)) {
+  if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
    return text;
  }

@@ -186,6 +186,9 @@ export function stripDowngradedToolCallText(text: string): string {
  // Remove [Tool Result for ID ...] blocks and their content.
  cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");

+  // Remove [Historical context: ...] markers (self-contained within brackets).
+  cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
+
  return cleaned.trim();
 }