mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
refactor(shared): reuse outbound text chunking core
This commit is contained in:
@@ -6,6 +6,7 @@ import type { ChannelId } from "../channels/plugins/types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js";
|
||||
import { normalizeAccountId } from "../routing/session-key.js";
|
||||
import { chunkTextByBreakResolver } from "../shared/text-chunking.js";
|
||||
import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";
|
||||
|
||||
export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL;
|
||||
@@ -316,41 +317,12 @@ export function chunkText(text: string, limit: number): string[] {
|
||||
if (early) {
|
||||
return early;
|
||||
}
|
||||
|
||||
const chunks: string[] = [];
|
||||
let remaining = text;
|
||||
|
||||
while (remaining.length > limit) {
|
||||
const window = remaining.slice(0, limit);
|
||||
|
||||
return chunkTextByBreakResolver(text, limit, (window) => {
|
||||
// 1) Prefer a newline break inside the window (outside parentheses).
|
||||
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
|
||||
|
||||
// 2) Otherwise prefer the last whitespace (word boundary) inside the window.
|
||||
let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace;
|
||||
|
||||
// 3) Fallback: hard break exactly at the limit.
|
||||
if (breakIdx <= 0) {
|
||||
breakIdx = limit;
|
||||
}
|
||||
|
||||
const rawChunk = remaining.slice(0, breakIdx);
|
||||
const chunk = rawChunk.trimEnd();
|
||||
if (chunk.length > 0) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
// If we broke on whitespace/newline, skip that separator; for hard breaks keep it.
|
||||
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
||||
remaining = remaining.slice(nextStart).trimStart();
|
||||
}
|
||||
|
||||
if (remaining.length) {
|
||||
chunks.push(remaining);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
return lastNewline > 0 ? lastNewline : lastWhitespace;
|
||||
});
|
||||
}
|
||||
|
||||
export function chunkMarkdownText(text: string, limit: number): string[] {
|
||||
|
||||
16
src/plugin-sdk/text-chunking.test.ts
Normal file
16
src/plugin-sdk/text-chunking.test.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { chunkTextForOutbound } from "./text-chunking.js";
|
||||
|
||||
describe("chunkTextForOutbound", () => {
|
||||
it("returns empty for empty input", () => {
|
||||
expect(chunkTextForOutbound("", 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it("splits on newline or whitespace boundaries", () => {
|
||||
expect(chunkTextForOutbound("alpha\nbeta gamma", 8)).toEqual(["alpha", "beta", "gamma"]);
|
||||
});
|
||||
|
||||
it("falls back to hard limit when no separator exists", () => {
|
||||
expect(chunkTextForOutbound("abcdefghij", 4)).toEqual(["abcd", "efgh", "ij"]);
|
||||
});
|
||||
});
|
||||
@@ -1,31 +1,9 @@
|
||||
import { chunkTextByBreakResolver } from "../shared/text-chunking.js";
|
||||
|
||||
export function chunkTextForOutbound(text: string, limit: number): string[] {
|
||||
if (!text) {
|
||||
return [];
|
||||
}
|
||||
if (limit <= 0 || text.length <= limit) {
|
||||
return [text];
|
||||
}
|
||||
const chunks: string[] = [];
|
||||
let remaining = text;
|
||||
while (remaining.length > limit) {
|
||||
const window = remaining.slice(0, limit);
|
||||
return chunkTextByBreakResolver(text, limit, (window) => {
|
||||
const lastNewline = window.lastIndexOf("\n");
|
||||
const lastSpace = window.lastIndexOf(" ");
|
||||
let breakIdx = lastNewline > 0 ? lastNewline : lastSpace;
|
||||
if (breakIdx <= 0) {
|
||||
breakIdx = limit;
|
||||
}
|
||||
const rawChunk = remaining.slice(0, breakIdx);
|
||||
const chunk = rawChunk.trimEnd();
|
||||
if (chunk.length > 0) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
||||
remaining = remaining.slice(nextStart).trimStart();
|
||||
}
|
||||
if (remaining.length) {
|
||||
chunks.push(remaining);
|
||||
}
|
||||
return chunks;
|
||||
return lastNewline > 0 ? lastNewline : lastSpace;
|
||||
});
|
||||
}
|
||||
|
||||
34
src/shared/text-chunking.ts
Normal file
34
src/shared/text-chunking.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
export function chunkTextByBreakResolver(
|
||||
text: string,
|
||||
limit: number,
|
||||
resolveBreakIndex: (window: string) => number,
|
||||
): string[] {
|
||||
if (!text) {
|
||||
return [];
|
||||
}
|
||||
if (limit <= 0 || text.length <= limit) {
|
||||
return [text];
|
||||
}
|
||||
const chunks: string[] = [];
|
||||
let remaining = text;
|
||||
while (remaining.length > limit) {
|
||||
const window = remaining.slice(0, limit);
|
||||
const candidateBreak = resolveBreakIndex(window);
|
||||
const breakIdx =
|
||||
Number.isFinite(candidateBreak) && candidateBreak > 0 && candidateBreak <= limit
|
||||
? candidateBreak
|
||||
: limit;
|
||||
const rawChunk = remaining.slice(0, breakIdx);
|
||||
const chunk = rawChunk.trimEnd();
|
||||
if (chunk.length > 0) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
||||
remaining = remaining.slice(nextStart).trimStart();
|
||||
}
|
||||
if (remaining.length) {
|
||||
chunks.push(remaining);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
Reference in New Issue
Block a user