diff --git a/src/auto-reply/chunk.ts b/src/auto-reply/chunk.ts index e91b9e8683..a40eebb82c 100644 --- a/src/auto-reply/chunk.ts +++ b/src/auto-reply/chunk.ts @@ -6,6 +6,7 @@ import type { ChannelId } from "../channels/plugins/types.js"; import type { OpenClawConfig } from "../config/config.js"; import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js"; import { normalizeAccountId } from "../routing/session-key.js"; +import { chunkTextByBreakResolver } from "../shared/text-chunking.js"; import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js"; export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL; @@ -316,41 +317,12 @@ export function chunkText(text: string, limit: number): string[] { if (early) { return early; } - - const chunks: string[] = []; - let remaining = text; - - while (remaining.length > limit) { - const window = remaining.slice(0, limit); - + return chunkTextByBreakResolver(text, limit, (window) => { // 1) Prefer a newline break inside the window (outside parentheses). const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window); - // 2) Otherwise prefer the last whitespace (word boundary) inside the window. - let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace; - - // 3) Fallback: hard break exactly at the limit. - if (breakIdx <= 0) { - breakIdx = limit; - } - - const rawChunk = remaining.slice(0, breakIdx); - const chunk = rawChunk.trimEnd(); - if (chunk.length > 0) { - chunks.push(chunk); - } - - // If we broke on whitespace/newline, skip that separator; for hard breaks keep it. - const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); - const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0)); - remaining = remaining.slice(nextStart).trimStart(); - } - - if (remaining.length) { - chunks.push(remaining); - } - - return chunks; + return lastNewline > 0 ? lastNewline : lastWhitespace; + }); } export function chunkMarkdownText(text: string, limit: number): string[] { diff --git a/src/plugin-sdk/text-chunking.test.ts b/src/plugin-sdk/text-chunking.test.ts new file mode 100644 index 0000000000..b96b00cd99 --- /dev/null +++ b/src/plugin-sdk/text-chunking.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "vitest"; +import { chunkTextForOutbound } from "./text-chunking.js"; + +describe("chunkTextForOutbound", () => { + it("returns empty for empty input", () => { + expect(chunkTextForOutbound("", 10)).toEqual([]); + }); + + it("splits on newline or whitespace boundaries", () => { + expect(chunkTextForOutbound("alpha\nbeta gamma", 8)).toEqual(["alpha", "beta", "gamma"]); + }); + + it("falls back to hard limit when no separator exists", () => { + expect(chunkTextForOutbound("abcdefghij", 4)).toEqual(["abcd", "efgh", "ij"]); + }); +}); diff --git a/src/plugin-sdk/text-chunking.ts b/src/plugin-sdk/text-chunking.ts index 3c86e43f6f..47c98c1085 100644 --- a/src/plugin-sdk/text-chunking.ts +++ b/src/plugin-sdk/text-chunking.ts @@ -1,31 +1,9 @@ +import { chunkTextByBreakResolver } from "../shared/text-chunking.js"; + export function chunkTextForOutbound(text: string, limit: number): string[] { - if (!text) { - return []; - } - if (limit <= 0 || text.length <= limit) { - return [text]; - } - const chunks: string[] = []; - let remaining = text; - while (remaining.length > limit) { - const window = remaining.slice(0, limit); + return chunkTextByBreakResolver(text, limit, (window) => { const lastNewline = window.lastIndexOf("\n"); const lastSpace = window.lastIndexOf(" "); - let breakIdx = lastNewline > 0 ? lastNewline : lastSpace; - if (breakIdx <= 0) { - breakIdx = limit; - } - const rawChunk = remaining.slice(0, breakIdx); - const chunk = rawChunk.trimEnd(); - if (chunk.length > 0) { - chunks.push(chunk); - } - const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); - const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0)); - remaining = remaining.slice(nextStart).trimStart(); - } - if (remaining.length) { - chunks.push(remaining); - } - return chunks; + return lastNewline > 0 ? lastNewline : lastSpace; + }); } diff --git a/src/shared/text-chunking.ts b/src/shared/text-chunking.ts new file mode 100644 index 0000000000..9b75368fcd --- /dev/null +++ b/src/shared/text-chunking.ts @@ -0,0 +1,34 @@ +export function chunkTextByBreakResolver( + text: string, + limit: number, + resolveBreakIndex: (window: string) => number, +): string[] { + if (!text) { + return []; + } + if (limit <= 0 || text.length <= limit) { + return [text]; + } + const chunks: string[] = []; + let remaining = text; + while (remaining.length > limit) { + const window = remaining.slice(0, limit); + const candidateBreak = resolveBreakIndex(window); + const breakIdx = + Number.isFinite(candidateBreak) && candidateBreak > 0 && candidateBreak <= limit + ? candidateBreak + : limit; + const rawChunk = remaining.slice(0, breakIdx); + const chunk = rawChunk.trimEnd(); + if (chunk.length > 0) { + chunks.push(chunk); + } + const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); + const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0)); + remaining = remaining.slice(nextStart).trimStart(); + } + if (remaining.length) { + chunks.push(remaining); + } + return chunks; +}