refactor(shared): dedupe chat content text extraction

This commit is contained in:
Peter Steinberger
2026-02-15 17:21:36 +00:00
parent ac3db098ab
commit b74c3d80cc
4 changed files with 71 additions and 61 deletions

View File

@@ -3,6 +3,7 @@ import type { CronDelivery, CronMessageChannel } from "../../cron/types.js";
import { loadConfig } from "../../config/config.js";
import { normalizeCronJobCreate, normalizeCronJobPatch } from "../../cron/normalize.js";
import { parseAgentSessionKey } from "../../sessions/session-key-utils.js";
import { extractTextFromChatContent } from "../../shared/chat-content.js";
import { isRecord, truncateUtf16Safe } from "../../utils.js";
import { resolveSessionAgentId } from "../agent-scope.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
@@ -69,38 +70,13 @@ function truncateText(input: string, maxLen: number) {
return `${truncated}...`;
}
function normalizeContextText(raw: string) {
return raw.replace(/\s+/g, " ").trim();
}
function extractMessageText(message: ChatMessage): { role: string; text: string } | null {
const role = typeof message.role === "string" ? message.role : "";
if (role !== "user" && role !== "assistant") {
return null;
}
const content = message.content;
if (typeof content === "string") {
const normalized = normalizeContextText(content);
return normalized ? { role, text: normalized } : null;
}
if (!Array.isArray(content)) {
return null;
}
const chunks: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: unknown }).type !== "text") {
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
chunks.push(text);
}
}
const joined = normalizeContextText(chunks.join(" "));
return joined ? { role, text: joined } : null;
const text = extractTextFromChatContent(message.content);
return text ? { role, text } : null;
}
async function buildReminderContextLines(params: {

View File

@@ -27,6 +27,7 @@ import { callGateway } from "../../gateway/call.js";
import { logVerbose } from "../../globals.js";
import { formatTimeAgo } from "../../infra/format-time/format-relative.ts";
import { parseAgentSessionKey } from "../../routing/session-key.js";
import { extractTextFromChatContent } from "../../shared/chat-content.js";
import {
formatDurationCompact,
formatTokenUsageDisplay,
@@ -202,45 +203,15 @@ function buildSubagentsHelp() {
type ChatMessage = {
role?: unknown;
content?: unknown;
name?: unknown;
toolName?: unknown;
};
function normalizeMessageText(text: string) {
return text.replace(/\s+/g, " ").trim();
}
export function extractMessageText(message: ChatMessage): { role: string; text: string } | null {
const role = typeof message.role === "string" ? message.role : "";
const shouldSanitize = role === "assistant";
const content = message.content;
if (typeof content === "string") {
const normalized = normalizeMessageText(
shouldSanitize ? sanitizeTextContent(content) : content,
);
return normalized ? { role, text: normalized } : null;
}
if (!Array.isArray(content)) {
return null;
}
const chunks: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: unknown }).type !== "text") {
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text === "string") {
const value = shouldSanitize ? sanitizeTextContent(text) : text;
if (value.trim()) {
chunks.push(value);
}
}
}
const joined = normalizeMessageText(chunks.join(" "));
return joined ? { role, text: joined } : null;
const text = extractTextFromChatContent(message.content, {
sanitizeText: shouldSanitize ? sanitizeTextContent : undefined,
});
return text ? { role, text } : null;
}
function formatLogLines(messages: ChatMessage[]) {

View File

@@ -0,0 +1,26 @@
import { describe, expect, it } from "vitest";
import { extractTextFromChatContent } from "./chat-content.js";
describe("extractTextFromChatContent", () => {
it("normalizes string content", () => {
expect(extractTextFromChatContent(" hello\nworld ")).toBe("hello world");
});
it("extracts text blocks from array content", () => {
expect(
extractTextFromChatContent([
{ type: "text", text: " hello " },
{ type: "image_url", image_url: "https://example.com" },
{ type: "text", text: "world" },
]),
).toBe("hello world");
});
it("applies sanitizer when provided", () => {
expect(
extractTextFromChatContent("Here [Tool Call: foo (ID: 1)] ok", {
sanitizeText: (text) => text.replace(/\[Tool Call:[^\]]+\]\s*/g, ""),
}),
).toBe("Here ok");
});
});

View File

@@ -0,0 +1,37 @@
export function extractTextFromChatContent(
content: unknown,
opts?: { sanitizeText?: (text: string) => string },
): string | null {
const normalize = (text: string) => text.replace(/\s+/g, " ").trim();
if (typeof content === "string") {
const value = opts?.sanitizeText ? opts.sanitizeText(content) : content;
const normalized = normalize(value);
return normalized ? normalized : null;
}
if (!Array.isArray(content)) {
return null;
}
const chunks: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: unknown }).type !== "text") {
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text !== "string") {
continue;
}
const value = opts?.sanitizeText ? opts.sanitizeText(text) : text;
if (value.trim()) {
chunks.push(value);
}
}
const joined = normalize(chunks.join(" "));
return joined ? joined : null;
}