From aee002a39b45f456bea17d928ed898a408f352c5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 19 Feb 2026 00:17:38 +0000 Subject: [PATCH] refactor(agents): dedupe paragraph/newline break search in chunker --- src/agents/pi-embedded-block-chunker.ts | 121 ++++++++++++++++-------- 1 file changed, 79 insertions(+), 42 deletions(-) diff --git a/src/agents/pi-embedded-block-chunker.ts b/src/agents/pi-embedded-block-chunker.ts index d3b5638a08..b1266a1557 100644 --- a/src/agents/pi-embedded-block-chunker.ts +++ b/src/agents/pi-embedded-block-chunker.ts @@ -44,6 +44,53 @@ function findSafeSentenceBreakIndex( return sentenceIdx >= minChars ? sentenceIdx : -1; } +function findSafeParagraphBreakIndex(params: { + text: string; + fenceSpans: FenceSpan[]; + minChars: number; + reverse: boolean; +}): number { + const { text, fenceSpans, minChars, reverse } = params; + let paragraphIdx = reverse ? text.lastIndexOf("\n\n") : text.indexOf("\n\n"); + while (reverse ? paragraphIdx >= minChars : paragraphIdx !== -1) { + const candidates = [paragraphIdx, paragraphIdx + 1]; + for (const candidate of candidates) { + if (candidate < minChars) { + continue; + } + if (candidate < 0 || candidate >= text.length) { + continue; + } + if (isSafeFenceBreak(fenceSpans, candidate)) { + return candidate; + } + } + paragraphIdx = reverse + ? text.lastIndexOf("\n\n", paragraphIdx - 1) + : text.indexOf("\n\n", paragraphIdx + 2); + } + return -1; +} + +function findSafeNewlineBreakIndex(params: { + text: string; + fenceSpans: FenceSpan[]; + minChars: number; + reverse: boolean; +}): number { + const { text, fenceSpans, minChars, reverse } = params; + let newlineIdx = reverse ? text.lastIndexOf("\n") : text.indexOf("\n"); + while (reverse ? newlineIdx >= minChars : newlineIdx !== -1) { + if (newlineIdx >= minChars && isSafeFenceBreak(fenceSpans, newlineIdx)) { + return newlineIdx; + } + newlineIdx = reverse + ? text.lastIndexOf("\n", newlineIdx - 1) + : text.indexOf("\n", newlineIdx + 1); + } + return -1; +} + export class EmbeddedBlockChunker { #buffer = ""; readonly #chunking: BlockReplyChunking; @@ -202,31 +249,26 @@ export class EmbeddedBlockChunker { const preference = this.#chunking.breakPreference ?? "paragraph"; if (preference === "paragraph") { - let paragraphIdx = buffer.indexOf("\n\n"); - while (paragraphIdx !== -1) { - const candidates = [paragraphIdx, paragraphIdx + 1]; - for (const candidate of candidates) { - if (candidate < minChars) { - continue; - } - if (candidate < 0 || candidate >= buffer.length) { - continue; - } - if (isSafeFenceBreak(fenceSpans, candidate)) { - return { index: candidate }; - } - } - paragraphIdx = buffer.indexOf("\n\n", paragraphIdx + 2); + const paragraphIdx = findSafeParagraphBreakIndex({ + text: buffer, + fenceSpans, + minChars, + reverse: false, + }); + if (paragraphIdx !== -1) { + return { index: paragraphIdx }; } } if (preference === "paragraph" || preference === "newline") { - let newlineIdx = buffer.indexOf("\n"); - while (newlineIdx !== -1) { - if (newlineIdx >= minChars && isSafeFenceBreak(fenceSpans, newlineIdx)) { - return { index: newlineIdx }; - } - newlineIdx = buffer.indexOf("\n", newlineIdx + 1); + const newlineIdx = findSafeNewlineBreakIndex({ + text: buffer, + fenceSpans, + minChars, + reverse: false, + }); + if (newlineIdx !== -1) { + return { index: newlineIdx }; } } @@ -251,31 +293,26 @@ export class EmbeddedBlockChunker { const preference = this.#chunking.breakPreference ?? "paragraph"; if (preference === "paragraph") { - let paragraphIdx = window.lastIndexOf("\n\n"); - while (paragraphIdx >= minChars) { - const candidates = [paragraphIdx, paragraphIdx + 1]; - for (const candidate of candidates) { - if (candidate < minChars) { - continue; - } - if (candidate < 0 || candidate >= buffer.length) { - continue; - } - if (isSafeFenceBreak(fenceSpans, candidate)) { - return { index: candidate }; - } - } - paragraphIdx = window.lastIndexOf("\n\n", paragraphIdx - 1); + const paragraphIdx = findSafeParagraphBreakIndex({ + text: window, + fenceSpans, + minChars, + reverse: true, + }); + if (paragraphIdx !== -1) { + return { index: paragraphIdx }; } } if (preference === "paragraph" || preference === "newline") { - let newlineIdx = window.lastIndexOf("\n"); - while (newlineIdx >= minChars) { - if (isSafeFenceBreak(fenceSpans, newlineIdx)) { - return { index: newlineIdx }; - } - newlineIdx = window.lastIndexOf("\n", newlineIdx - 1); + const newlineIdx = findSafeNewlineBreakIndex({ + text: window, + fenceSpans, + minChars, + reverse: true, + }); + if (newlineIdx !== -1) { + return { index: newlineIdx }; } }