refactor(shared): reuse outbound text chunking core

This commit is contained in:
Peter Steinberger
2026-02-19 06:48:14 +00:00
parent d5c58ce8d9
commit 2f6b8663ff
4 changed files with 59 additions and 59 deletions

View File

@@ -6,6 +6,7 @@ import type { ChannelId } from "../channels/plugins/types.js";
import type { OpenClawConfig } from "../config/config.js";
import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js";
import { normalizeAccountId } from "../routing/session-key.js";
import { chunkTextByBreakResolver } from "../shared/text-chunking.js";
import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";
export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL;
@@ -316,41 +317,12 @@ export function chunkText(text: string, limit: number): string[] {
if (early) {
return early;
}
const chunks: string[] = [];
let remaining = text;
while (remaining.length > limit) {
const window = remaining.slice(0, limit);
return chunkTextByBreakResolver(text, limit, (window) => {
// 1) Prefer a newline break inside the window (outside parentheses).
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
// 2) Otherwise prefer the last whitespace (word boundary) inside the window.
let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace;
// 3) Fallback: hard break exactly at the limit.
if (breakIdx <= 0) {
breakIdx = limit;
}
const rawChunk = remaining.slice(0, breakIdx);
const chunk = rawChunk.trimEnd();
if (chunk.length > 0) {
chunks.push(chunk);
}
// If we broke on whitespace/newline, skip that separator; for hard breaks keep it.
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
remaining = remaining.slice(nextStart).trimStart();
}
if (remaining.length) {
chunks.push(remaining);
}
return chunks;
return lastNewline > 0 ? lastNewline : lastWhitespace;
});
}
export function chunkMarkdownText(text: string, limit: number): string[] {

View File

@@ -0,0 +1,16 @@
import { describe, expect, it } from "vitest";
import { chunkTextForOutbound } from "./text-chunking.js";
describe("chunkTextForOutbound", () => {
it("returns empty for empty input", () => {
expect(chunkTextForOutbound("", 10)).toEqual([]);
});
it("splits on newline or whitespace boundaries", () => {
expect(chunkTextForOutbound("alpha\nbeta gamma", 8)).toEqual(["alpha", "beta", "gamma"]);
});
it("falls back to hard limit when no separator exists", () => {
expect(chunkTextForOutbound("abcdefghij", 4)).toEqual(["abcd", "efgh", "ij"]);
});
});

View File

@@ -1,31 +1,9 @@
import { chunkTextByBreakResolver } from "../shared/text-chunking.js";
export function chunkTextForOutbound(text: string, limit: number): string[] {
if (!text) {
return [];
}
if (limit <= 0 || text.length <= limit) {
return [text];
}
const chunks: string[] = [];
let remaining = text;
while (remaining.length > limit) {
const window = remaining.slice(0, limit);
return chunkTextByBreakResolver(text, limit, (window) => {
const lastNewline = window.lastIndexOf("\n");
const lastSpace = window.lastIndexOf(" ");
let breakIdx = lastNewline > 0 ? lastNewline : lastSpace;
if (breakIdx <= 0) {
breakIdx = limit;
}
const rawChunk = remaining.slice(0, breakIdx);
const chunk = rawChunk.trimEnd();
if (chunk.length > 0) {
chunks.push(chunk);
}
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
remaining = remaining.slice(nextStart).trimStart();
}
if (remaining.length) {
chunks.push(remaining);
}
return chunks;
return lastNewline > 0 ? lastNewline : lastSpace;
});
}

View File

@@ -0,0 +1,34 @@
export function chunkTextByBreakResolver(
text: string,
limit: number,
resolveBreakIndex: (window: string) => number,
): string[] {
if (!text) {
return [];
}
if (limit <= 0 || text.length <= limit) {
return [text];
}
const chunks: string[] = [];
let remaining = text;
while (remaining.length > limit) {
const window = remaining.slice(0, limit);
const candidateBreak = resolveBreakIndex(window);
const breakIdx =
Number.isFinite(candidateBreak) && candidateBreak > 0 && candidateBreak <= limit
? candidateBreak
: limit;
const rawChunk = remaining.slice(0, breakIdx);
const chunk = rawChunk.trimEnd();
if (chunk.length > 0) {
chunks.push(chunk);
}
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
remaining = remaining.slice(nextStart).trimStart();
}
if (remaining.length) {
chunks.push(remaining);
}
return chunks;
}