diff --git a/CHANGELOG.md b/CHANGELOG.md
index b0c9ca8f9f..865d448a42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,7 @@
 
 ### Fixes
 - Sub-agents: route announce delivery through the correct channel account IDs. (#1061, #1058) — thanks @adam91holt.
+- Telegram: split long media captions into follow-up text messages in bot delivery. (#1063) — thanks @mukhtharcm.
 - Repo: fix oxlint config filename and move ignore pattern into config. (#1064) — thanks @connorshea.
 - Messages: `/stop` now hard-aborts queued followups and sub-agent runs; suppress zero-count stop notes.
 - Sessions: reset `compactionCount` on `/new` and `/reset`, and preserve `sessions.json` file mode (0600).
diff --git a/src/telegram/bot/delivery.test.ts b/src/telegram/bot/delivery.test.ts
index 65328af906..72bf29f3aa 100644
--- a/src/telegram/bot/delivery.test.ts
+++ b/src/telegram/bot/delivery.test.ts
@@ -74,4 +74,51 @@ describe("deliverReplies", () => {
     expect(sendVoice).toHaveBeenCalledTimes(1);
     expect(events).toEqual(["recordVoice", "sendVoice"]);
   });
+
+  it("splits long captions into media + follow-up text after the first media", async () => {
+    const events: string[] = [];
+    const runtime = { error: vi.fn() };
+    const sendPhoto = vi.fn(async () => {
+      events.push("photo");
+      return { message_id: 1, chat: { id: "123" } };
+    });
+    const sendMessage = vi.fn(async () => {
+      events.push("text");
+      return { message_id: 2, chat: { id: "123" } };
+    });
+    const bot = { api: { sendPhoto, sendMessage } } as unknown as Bot;
+    const longText = "A".repeat(1100);
+
+    loadWebMedia
+      .mockResolvedValueOnce({
+        buffer: Buffer.from("photo-a"),
+        contentType: "image/jpeg",
+        fileName: "a.jpg",
+      })
+      .mockResolvedValueOnce({
+        buffer: Buffer.from("photo-b"),
+        contentType: "image/jpeg",
+        fileName: "b.jpg",
+      });
+
+    await deliverReplies({
+      replies: [{ text: longText, mediaUrls: ["https://example.com/a.jpg", "https://example.com/b.jpg"] }],
+      chatId: "123",
+      token: "tok",
+      runtime,
+      bot,
+      replyToMode: "off",
+      textLimit: 4000,
+    });
+
+    expect(sendPhoto).toHaveBeenCalledTimes(2);
+    expect(sendPhoto).toHaveBeenNthCalledWith(
+      1,
+      "123",
+      expect.anything(),
+      expect.objectContaining({ caption: undefined }),
+    );
+    expect(sendMessage).toHaveBeenCalledWith("123", longText, {});
+    expect(events).toEqual(["photo", "text", "photo"]);
+  });
 });
diff --git a/src/telegram/bot/delivery.ts b/src/telegram/bot/delivery.ts
index fa7e42fc93..1af108cd68 100644
--- a/src/telegram/bot/delivery.ts
+++ b/src/telegram/bot/delivery.ts
@@ -73,6 +73,7 @@ export async function deliverReplies(params: {
     // (when caption exceeds Telegram's 1024-char limit)
     let pendingFollowUpText: string | undefined;
     for (const mediaUrl of mediaList) {
+      const isFirstMedia = first;
       const media = await loadWebMedia(mediaUrl);
       const kind = mediaKindFromMime(media.contentType ?? undefined);
       const isGif = isGifMedia({
@@ -82,11 +83,12 @@ export async function deliverReplies(params: {
       const fileName = media.fileName ?? (isGif ? "animation.gif" : "file");
       const file = new InputFile(media.buffer, fileName);
       // Caption only on first item; if text exceeds limit, defer to follow-up message.
-      const rawCaption = first ? (reply.text ?? undefined) : undefined;
-      const captionTooLong = rawCaption != null && rawCaption.length > TELEGRAM_MAX_CAPTION_LENGTH;
-      const caption = captionTooLong ? undefined : rawCaption;
-      if (captionTooLong && rawCaption) {
-        pendingFollowUpText = rawCaption;
+      const rawCaption = isFirstMedia ? (reply.text ?? undefined) : undefined;
+      const trimmedCaption = rawCaption?.trim() ?? "";
+      const captionTooLong = trimmedCaption.length > TELEGRAM_MAX_CAPTION_LENGTH;
+      const caption = captionTooLong ? undefined : trimmedCaption || undefined;
+      if (captionTooLong && trimmedCaption) {
+        pendingFollowUpText = trimmedCaption;
       }
       first = false;
       const replyToMessageId =
@@ -138,22 +140,26 @@ export async function deliverReplies(params: {
       if (replyToId && !hasReplied) {
         hasReplied = true;
       }
-    }
-    // Send deferred follow-up text when caption was too long for media.
-    // Chunk it in case it's extremely long (same logic as text-only replies).
-    if (pendingFollowUpText) {
-      const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit);
-      for (const chunk of chunks) {
-        await sendTelegramText(bot, chatId, chunk.html, runtime, {
-          replyToMessageId:
-            replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined,
-          messageThreadId,
-          textMode: "html",
-          plainText: chunk.text,
-        });
-        if (replyToId && !hasReplied) {
-          hasReplied = true;
+      // Send deferred follow-up text right after the first media item.
+      // Chunk it in case it's extremely long (same logic as text-only replies).
+      if (pendingFollowUpText && isFirstMedia) {
+        const chunks = markdownToTelegramChunks(pendingFollowUpText, textLimit);
+        for (const chunk of chunks) {
+          const replyToMessageIdFollowup =
+            replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
+          const textParams: Record<string, unknown> = {};
+          if (replyToMessageIdFollowup) {
+            textParams.reply_to_message_id = replyToMessageIdFollowup;
+          }
+          if (threadParams) {
+            textParams.message_thread_id = threadParams.message_thread_id;
+          }
+          await bot.api.sendMessage(chatId, chunk.text, textParams);
+          if (replyToId && !hasReplied) {
+            hasReplied = true;
+          }
         }
+        pendingFollowUpText = undefined;
       }
     }
   }