fix: split telegram captions in bot delivery (#1063) (thanks @mukhtharcm)

2026-02-19 18:39:20 -05:00 · 2026-01-17 03:41:47 +00:00
parent 6e10f1c1f2
commit 1cdd3f29da
3 changed files with 74 additions and 20 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@

 ### Fixes
 - Sub-agents: route announce delivery through the correct channel account IDs. (#1061, #1058) — thanks @adam91holt.
+- Telegram: split long media captions into follow-up text messages in bot delivery. (#1063) — thanks @mukhtharcm.
 - Repo: fix oxlint config filename and move ignore pattern into config. (#1064) — thanks @connorshea.
 - Messages: `/stop` now hard-aborts queued followups and sub-agent runs; suppress zero-count stop notes.
 - Sessions: reset `compactionCount` on `/new` and `/reset`, and preserve `sessions.json` file mode (0600).
--- a/src/telegram/bot/delivery.test.ts
+++ b/src/telegram/bot/delivery.test.ts
@@ -74,4 +74,51 @@ describe("deliverReplies", () => {
    expect(sendVoice).toHaveBeenCalledTimes(1);
    expect(events).toEqual(["recordVoice", "sendVoice"]);
  });
+
+  it("splits long captions into media + follow-up text after the first media", async () => {
+    const events: string[] = [];
+    const runtime = { error: vi.fn() };
+    const sendPhoto = vi.fn(async () => {
+      events.push("photo");
+      return { message_id: 1, chat: { id: "123" } };
+    });
+    const sendMessage = vi.fn(async () => {
+      events.push("text");
+      return { message_id: 2, chat: { id: "123" } };
+    });
+    const bot = { api: { sendPhoto, sendMessage } } as unknown as Bot;
+    const longText = "A".repeat(1100);
+
+    loadWebMedia
+      .mockResolvedValueOnce({
+        buffer: Buffer.from("photo-a"),
+        contentType: "image/jpeg",
+        fileName: "a.jpg",
+      })
+      .mockResolvedValueOnce({
+        buffer: Buffer.from("photo-b"),
+        contentType: "image/jpeg",
+        fileName: "b.jpg",
+      });
+
+    await deliverReplies({
+      replies: [{ text: longText, mediaUrls: ["https://example.com/a.jpg", "https://example.com/b.jpg"] }],
+      chatId: "123",
+      token: "tok",
+      runtime,
+      bot,
+      replyToMode: "off",
+      textLimit: 4000,
+    });
+
+    expect(sendPhoto).toHaveBeenCalledTimes(2);
+    expect(sendPhoto).toHaveBeenNthCalledWith(
+      1,
+      "123",
+      expect.anything(),
+      expect.objectContaining({ caption: undefined }),
+    );
+    expect(sendMessage).toHaveBeenCalledWith("123", longText, {});
+    expect(events).toEqual(["photo", "text", "photo"]);
+  });
 });
--- a/src/telegram/bot/delivery.ts
+++ b/src/telegram/bot/delivery.ts
@@ -73,6 +73,7 @@ export async function deliverReplies(params: {
    // (when caption exceeds Telegram's 1024-char limit)
    let pendingFollowUpText: string | undefined;
    for (const mediaUrl of mediaList) {
+      const isFirstMedia = first;
      const media = await loadWebMedia(mediaUrl);
      const kind = mediaKindFromMime(media.contentType ?? undefined);
      const isGif = isGifMedia({
@@ -82,11 +83,12 @@ export async function deliverReplies(params: {
      const fileName = media.fileName ?? (isGif ? "animation.gif" : "file");
      const file = new InputFile(media.buffer, fileName);
      // Caption only on first item; if text exceeds limit, defer to follow-up message.
-      const rawCaption = first ? (reply.text ?? undefined) : undefined;
-      const captionTooLong = rawCaption != null && rawCaption.length > TELEGRAM_MAX_CAPTION_LENGTH;
-      const caption = captionTooLong ? undefined : rawCaption;
-      if (captionTooLong && rawCaption) {
-        pendingFollowUpText = rawCaption;
+      const rawCaption = isFirstMedia ? (reply.text ?? undefined) : undefined;
+      const trimmedCaption = rawCaption?.trim() ?? "";
+      const captionTooLong = trimmedCaption.length > TELEGRAM_MAX_CAPTION_LENGTH;
+      const caption = captionTooLong ? undefined : trimmedCaption || undefined;
+      if (captionTooLong && trimmedCaption) {
+        pendingFollowUpText = trimmedCaption;
      }
      first = false;
      const replyToMessageId =
@@ -138,22 +140,26 @@ export async function deliverReplies(params: {
      if (replyToId && !hasReplied) {
        hasReplied = true;
      }
-    }
-    // Send deferred follow-up text when caption was too long for media.
-    // Chunk it in case it's extremely long (same logic as text-only replies).
-    if (pendingFollowUpText) {
-      const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit);
-      for (const chunk of chunks) {
-        await sendTelegramText(bot, chatId, chunk.html, runtime, {
-          replyToMessageId:
-            replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined,
-          messageThreadId,
-          textMode: "html",
-          plainText: chunk.text,
-        });
-        if (replyToId && !hasReplied) {
-          hasReplied = true;
+      // Send deferred follow-up text right after the first media item.
+      // Chunk it in case it's extremely long (same logic as text-only replies).
+      if (pendingFollowUpText && isFirstMedia) {
+        const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit);
+        for (const chunk of chunks) {
+          const replyToMessageIdFollowup =
+            replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
+          const textParams: Record<string, unknown> = {};
+          if (replyToMessageIdFollowup) {
+            textParams.reply_to_message_id = replyToMessageIdFollowup;
+          }
+          if (threadParams) {
+            textParams.message_thread_id = threadParams.message_thread_id;
+          }
+          await bot.api.sendMessage(chatId, chunk.text, textParams);
+          if (replyToId && !hasReplied) {
+            hasReplied = true;
+          }
        }
+        pendingFollowUpText = undefined;
      }
    }
  }