diff --git a/CHANGELOG.md b/CHANGELOG.md index b0c9ca8f9f..865d448a42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ ### Fixes - Sub-agents: route announce delivery through the correct channel account IDs. (#1061, #1058) — thanks @adam91holt. +- Telegram: split long media captions into follow-up text messages in bot delivery. (#1063) — thanks @mukhtharcm. - Repo: fix oxlint config filename and move ignore pattern into config. (#1064) — thanks @connorshea. - Messages: `/stop` now hard-aborts queued followups and sub-agent runs; suppress zero-count stop notes. - Sessions: reset `compactionCount` on `/new` and `/reset`, and preserve `sessions.json` file mode (0600). diff --git a/src/telegram/bot/delivery.test.ts b/src/telegram/bot/delivery.test.ts index 65328af906..72bf29f3aa 100644 --- a/src/telegram/bot/delivery.test.ts +++ b/src/telegram/bot/delivery.test.ts @@ -74,4 +74,51 @@ describe("deliverReplies", () => { expect(sendVoice).toHaveBeenCalledTimes(1); expect(events).toEqual(["recordVoice", "sendVoice"]); }); + + it("splits long captions into media + follow-up text after the first media", async () => { + const events: string[] = []; + const runtime = { error: vi.fn() }; + const sendPhoto = vi.fn(async () => { + events.push("photo"); + return { message_id: 1, chat: { id: "123" } }; + }); + const sendMessage = vi.fn(async () => { + events.push("text"); + return { message_id: 2, chat: { id: "123" } }; + }); + const bot = { api: { sendPhoto, sendMessage } } as unknown as Bot; + const longText = "A".repeat(1100); + + loadWebMedia + .mockResolvedValueOnce({ + buffer: Buffer.from("photo-a"), + contentType: "image/jpeg", + fileName: "a.jpg", + }) + .mockResolvedValueOnce({ + buffer: Buffer.from("photo-b"), + contentType: "image/jpeg", + fileName: "b.jpg", + }); + + await deliverReplies({ + replies: [{ text: longText, mediaUrls: ["https://example.com/a.jpg", "https://example.com/b.jpg"] }], + chatId: "123", + token: "tok", + runtime, + bot, + replyToMode: "off", + textLimit: 4000, + }); + + expect(sendPhoto).toHaveBeenCalledTimes(2); + expect(sendPhoto).toHaveBeenNthCalledWith( + 1, + "123", + expect.anything(), + expect.objectContaining({ caption: undefined }), + ); + expect(sendMessage).toHaveBeenCalledWith("123", longText, {}); + expect(events).toEqual(["photo", "text", "photo"]); + }); }); diff --git a/src/telegram/bot/delivery.ts b/src/telegram/bot/delivery.ts index fa7e42fc93..1af108cd68 100644 --- a/src/telegram/bot/delivery.ts +++ b/src/telegram/bot/delivery.ts @@ -73,6 +73,7 @@ export async function deliverReplies(params: { // (when caption exceeds Telegram's 1024-char limit) let pendingFollowUpText: string | undefined; for (const mediaUrl of mediaList) { + const isFirstMedia = first; const media = await loadWebMedia(mediaUrl); const kind = mediaKindFromMime(media.contentType ?? undefined); const isGif = isGifMedia({ @@ -82,11 +83,12 @@ export async function deliverReplies(params: { const fileName = media.fileName ?? (isGif ? "animation.gif" : "file"); const file = new InputFile(media.buffer, fileName); // Caption only on first item; if text exceeds limit, defer to follow-up message. - const rawCaption = first ? (reply.text ?? undefined) : undefined; - const captionTooLong = rawCaption != null && rawCaption.length > TELEGRAM_MAX_CAPTION_LENGTH; - const caption = captionTooLong ? undefined : rawCaption; - if (captionTooLong && rawCaption) { - pendingFollowUpText = rawCaption; + const rawCaption = isFirstMedia ? (reply.text ?? undefined) : undefined; + const trimmedCaption = rawCaption?.trim() ?? ""; + const captionTooLong = trimmedCaption.length > TELEGRAM_MAX_CAPTION_LENGTH; + const caption = captionTooLong ? undefined : trimmedCaption || undefined; + if (captionTooLong && trimmedCaption) { + pendingFollowUpText = trimmedCaption; } first = false; const replyToMessageId = @@ -138,22 +140,26 @@ export async function deliverReplies(params: { if (replyToId && !hasReplied) { hasReplied = true; } - } - // Send deferred follow-up text when caption was too long for media. - // Chunk it in case it's extremely long (same logic as text-only replies). - if (pendingFollowUpText) { - const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit); - for (const chunk of chunks) { - await sendTelegramText(bot, chatId, chunk.html, runtime, { - replyToMessageId: - replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined, - messageThreadId, - textMode: "html", - plainText: chunk.text, - }); - if (replyToId && !hasReplied) { - hasReplied = true; + // Send deferred follow-up text right after the first media item. + // Chunk it in case it's extremely long (same logic as text-only replies). + if (pendingFollowUpText && isFirstMedia) { + const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit); + for (const chunk of chunks) { + const replyToMessageIdFollowup = + replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined; + const textParams: Record = {}; + if (replyToMessageIdFollowup) { + textParams.reply_to_message_id = replyToMessageIdFollowup; + } + if (threadParams) { + textParams.message_thread_id = threadParams.message_thread_id; + } + await bot.api.sendMessage(chatId, chunk.text, textParams); + if (replyToId && !hasReplied) { + hasReplied = true; + } } + pendingFollowUpText = undefined; } } }