From 7a0591ef879350d74ed399b66008f1525531d56b Mon Sep 17 00:00:00 2001 From: Karim Naguib Date: Wed, 11 Feb 2026 21:21:21 -0800 Subject: [PATCH] fix(whatsapp): allow media-only sends and normalize leading blank payloads (#14408) Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com> --- .github/workflows/ci.yml | 4 + .../OpenClawProtocol/GatewayModels.swift | 5 +- .../OpenClawProtocol/GatewayModels.swift | 5 +- scripts/protocol-gen-swift.ts | 2 +- src/gateway/protocol/schema/agent.ts | 2 +- src/gateway/server-methods/send.test.ts | 63 +++++++- src/gateway/server-methods/send.ts | 26 ++- src/infra/outbound/deliver.test.ts | 67 ++++++++ src/infra/outbound/deliver.ts | 26 ++- .../outbound/message-action-runner.test.ts | 152 +++++++++++++++++- src/infra/outbound/message-action-runner.ts | 14 ++ 11 files changed, 352 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e2680707a0..b84ca6da4b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,6 +84,10 @@ jobs: esac case "$path" in + # Generated protocol models are already covered by protocol:check and + # should not force the full native macOS lane. + apps/macos/Sources/OpenClawProtocol/*|apps/shared/OpenClawKit/Sources/OpenClawProtocol/*) + ;; apps/macos/*|apps/ios/*|apps/shared/*|Swabble/*) run_macos=true ;; diff --git a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift index 9e88442266..c82e218c64 100644 --- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift @@ -1,4 +1,5 @@ // Generated by scripts/protocol-gen-swift.ts — do not edit by hand +// swiftlint:disable file_length import Foundation public let GATEWAY_PROTOCOL_VERSION = 3 @@ -383,7 +384,7 @@ public struct AgentEvent: Codable, Sendable { public struct SendParams: Codable, Sendable { public let to: String - public let message: String + public let message: String? public let mediaurl: String? public let mediaurls: [String]? public let gifplayback: Bool? @@ -394,7 +395,7 @@ public struct SendParams: Codable, Sendable { public init( to: String, - message: String, + message: String?, mediaurl: String?, mediaurls: [String]?, gifplayback: Bool?, diff --git a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift index 9e88442266..c82e218c64 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift @@ -1,4 +1,5 @@ // Generated by scripts/protocol-gen-swift.ts — do not edit by hand +// swiftlint:disable file_length import Foundation public let GATEWAY_PROTOCOL_VERSION = 3 @@ -383,7 +384,7 @@ public struct AgentEvent: Codable, Sendable { public struct SendParams: Codable, Sendable { public let to: String - public let message: String + public let message: String? public let mediaurl: String? public let mediaurls: [String]? public let gifplayback: Bool? @@ -394,7 +395,7 @@ public struct SendParams: Codable, Sendable { public init( to: String, - message: String, + message: String?, mediaurl: String?, mediaurls: [String]?, gifplayback: Bool?, diff --git a/scripts/protocol-gen-swift.ts b/scripts/protocol-gen-swift.ts index 66ff0dbdb1..8c62311cda 100644 --- a/scripts/protocol-gen-swift.ts +++ b/scripts/protocol-gen-swift.ts @@ -27,7 +27,7 @@ const outPaths = [ ), ]; -const header = `// Generated by scripts/protocol-gen-swift.ts — do not edit by hand\nimport Foundation\n\npublic let GATEWAY_PROTOCOL_VERSION = ${PROTOCOL_VERSION}\n\npublic enum ErrorCode: String, Codable, Sendable {\n${Object.values( +const header = `// Generated by scripts/protocol-gen-swift.ts — do not edit by hand\n// swiftlint:disable file_length\nimport Foundation\n\npublic let GATEWAY_PROTOCOL_VERSION = ${PROTOCOL_VERSION}\n\npublic enum ErrorCode: String, Codable, Sendable {\n${Object.values( ErrorCodes, ) .map((c) => ` case ${camelCase(c)} = "${c}"`) diff --git a/src/gateway/protocol/schema/agent.ts b/src/gateway/protocol/schema/agent.ts index 3d6123df63..f82f4f98e5 100644 --- a/src/gateway/protocol/schema/agent.ts +++ b/src/gateway/protocol/schema/agent.ts @@ -15,7 +15,7 @@ export const AgentEventSchema = Type.Object( export const SendParamsSchema = Type.Object( { to: NonEmptyString, - message: NonEmptyString, + message: Type.Optional(Type.String()), mediaUrl: Type.Optional(Type.String()), mediaUrls: Type.Optional(Type.Array(Type.String())), gifPlayback: Type.Optional(Type.Boolean()), diff --git a/src/gateway/server-methods/send.test.ts b/src/gateway/server-methods/send.test.ts index e581aed2c5..96743976bf 100644 --- a/src/gateway/server-methods/send.test.ts +++ b/src/gateway/server-methods/send.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; import type { GatewayRequestContext } from "./types.js"; import { sendHandlers } from "./send.js"; @@ -47,6 +47,67 @@ const makeContext = (): GatewayRequestContext => }) as unknown as GatewayRequestContext; describe("gateway send mirroring", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("accepts media-only sends without message", async () => { + mocks.deliverOutboundPayloads.mockResolvedValue([{ messageId: "m-media", channel: "slack" }]); + + const respond = vi.fn(); + await sendHandlers.send({ + params: { + to: "channel:C1", + mediaUrl: "https://example.com/a.png", + channel: "slack", + idempotencyKey: "idem-media-only", + }, + respond, + context: makeContext(), + req: { type: "req", id: "1", method: "send" }, + client: null, + isWebchatConnect: () => false, + }); + + expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith( + expect.objectContaining({ + payloads: [{ text: "", mediaUrl: "https://example.com/a.png", mediaUrls: undefined }], + }), + ); + expect(respond).toHaveBeenCalledWith( + true, + expect.objectContaining({ messageId: "m-media" }), + undefined, + expect.objectContaining({ channel: "slack" }), + ); + }); + + it("rejects empty sends when neither text nor media is present", async () => { + const respond = vi.fn(); + await sendHandlers.send({ + params: { + to: "channel:C1", + message: " ", + channel: "slack", + idempotencyKey: "idem-empty", + }, + respond, + context: makeContext(), + req: { type: "req", id: "1", method: "send" }, + client: null, + isWebchatConnect: () => false, + }); + + expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled(); + expect(respond).toHaveBeenCalledWith( + false, + undefined, + expect.objectContaining({ + message: expect.stringContaining("text or media is required"), + }), + ); + }); + it("does not mirror when delivery returns no results", async () => { mocks.deliverOutboundPayloads.mockResolvedValue([]); diff --git a/src/gateway/server-methods/send.ts b/src/gateway/server-methods/send.ts index 246ee27e27..c7d42f7ce3 100644 --- a/src/gateway/server-methods/send.ts +++ b/src/gateway/server-methods/send.ts @@ -58,7 +58,7 @@ export const sendHandlers: GatewayRequestHandlers = { } const request = p as { to: string; - message: string; + message?: string; mediaUrl?: string; mediaUrls?: string[]; gifPlayback?: boolean; @@ -85,8 +85,24 @@ export const sendHandlers: GatewayRequestHandlers = { return; } const to = request.to.trim(); - const message = request.message.trim(); - const mediaUrls = Array.isArray(request.mediaUrls) ? request.mediaUrls : undefined; + const message = typeof request.message === "string" ? request.message.trim() : ""; + const mediaUrl = + typeof request.mediaUrl === "string" && request.mediaUrl.trim().length > 0 + ? request.mediaUrl.trim() + : undefined; + const mediaUrls = Array.isArray(request.mediaUrls) + ? request.mediaUrls + .map((entry) => (typeof entry === "string" ? entry.trim() : "")) + .filter((entry) => entry.length > 0) + : undefined; + if (!message && !mediaUrl && (mediaUrls?.length ?? 0) === 0) { + respond( + false, + undefined, + errorShape(ErrorCodes.INVALID_REQUEST, "invalid send params: text or media is required"), + ); + return; + } const channelInput = typeof request.channel === "string" ? request.channel : undefined; const normalizedChannel = channelInput ? normalizeChannelId(channelInput) : null; if (channelInput && !normalizedChannel) { @@ -132,7 +148,7 @@ export const sendHandlers: GatewayRequestHandlers = { } const outboundDeps = context.deps ? createOutboundSendDeps(context.deps) : undefined; const mirrorPayloads = normalizeReplyPayloadsForDelivery([ - { text: message, mediaUrl: request.mediaUrl, mediaUrls }, + { text: message, mediaUrl, mediaUrls }, ]); const mirrorText = mirrorPayloads .map((payload) => payload.text) @@ -170,7 +186,7 @@ export const sendHandlers: GatewayRequestHandlers = { channel: outboundChannel, to: resolved.to, accountId, - payloads: [{ text: message, mediaUrl: request.mediaUrl, mediaUrls }], + payloads: [{ text: message, mediaUrl, mediaUrls }], gifPlayback: request.gifPlayback, deps: outboundDeps, mirror: providedSessionKey diff --git a/src/infra/outbound/deliver.test.ts b/src/infra/outbound/deliver.test.ts index 417e037f03..967ac254a3 100644 --- a/src/infra/outbound/deliver.test.ts +++ b/src/infra/outbound/deliver.test.ts @@ -196,6 +196,73 @@ describe("deliverOutboundPayloads", () => { ); }); + it("strips leading blank lines for WhatsApp text payloads", async () => { + const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" }); + const cfg: OpenClawConfig = { + channels: { whatsapp: { textChunkLimit: 4000 } }, + }; + + await deliverOutboundPayloads({ + cfg, + channel: "whatsapp", + to: "+1555", + payloads: [{ text: "\n\nHello from WhatsApp" }], + deps: { sendWhatsApp }, + }); + + expect(sendWhatsApp).toHaveBeenCalledTimes(1); + expect(sendWhatsApp).toHaveBeenNthCalledWith( + 1, + "+1555", + "Hello from WhatsApp", + expect.objectContaining({ verbose: false }), + ); + }); + + it("drops whitespace-only WhatsApp text payloads when no media is attached", async () => { + const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" }); + const cfg: OpenClawConfig = { + channels: { whatsapp: { textChunkLimit: 4000 } }, + }; + + const results = await deliverOutboundPayloads({ + cfg, + channel: "whatsapp", + to: "+1555", + payloads: [{ text: " \n\t " }], + deps: { sendWhatsApp }, + }); + + expect(sendWhatsApp).not.toHaveBeenCalled(); + expect(results).toEqual([]); + }); + + it("keeps WhatsApp media payloads but clears whitespace-only captions", async () => { + const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" }); + const cfg: OpenClawConfig = { + channels: { whatsapp: { textChunkLimit: 4000 } }, + }; + + await deliverOutboundPayloads({ + cfg, + channel: "whatsapp", + to: "+1555", + payloads: [{ text: " \n\t ", mediaUrl: "https://example.com/photo.png" }], + deps: { sendWhatsApp }, + }); + + expect(sendWhatsApp).toHaveBeenCalledTimes(1); + expect(sendWhatsApp).toHaveBeenNthCalledWith( + 1, + "+1555", + "", + expect.objectContaining({ + mediaUrl: "https://example.com/photo.png", + verbose: false, + }), + ); + }); + it("preserves fenced blocks for markdown chunkers in newline mode", async () => { const chunker = vi.fn((text: string) => (text ? [text] : [])); const sendText = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({ diff --git a/src/infra/outbound/deliver.ts b/src/infra/outbound/deliver.ts index 186f30a748..f9d756f741 100644 --- a/src/infra/outbound/deliver.ts +++ b/src/infra/outbound/deliver.ts @@ -312,7 +312,31 @@ export async function deliverOutboundPayloads(params: { })), }; }; - const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads); + const normalizeWhatsAppPayload = (payload: ReplyPayload): ReplyPayload | null => { + const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0; + const rawText = typeof payload.text === "string" ? payload.text : ""; + const normalizedText = rawText.replace(/^(?:[ \t]*\r?\n)+/, ""); + if (!normalizedText.trim()) { + if (!hasMedia) { + return null; + } + return { + ...payload, + text: "", + }; + } + return { + ...payload, + text: normalizedText, + }; + }; + const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads).flatMap((payload) => { + if (channel !== "whatsapp") { + return [payload]; + } + const normalized = normalizeWhatsAppPayload(payload); + return normalized ? [normalized] : []; + }); for (const payload of normalizedPayloads) { const payloadSummary: NormalizedOutboundPayload = { text: payload.text ?? "", diff --git a/src/infra/outbound/message-action-runner.test.ts b/src/infra/outbound/message-action-runner.test.ts index 5926050ee3..6b8bfd4ef7 100644 --- a/src/infra/outbound/message-action-runner.test.ts +++ b/src/infra/outbound/message-action-runner.test.ts @@ -9,7 +9,11 @@ import { telegramPlugin } from "../../../extensions/telegram/src/channel.js"; import { whatsappPlugin } from "../../../extensions/whatsapp/src/channel.js"; import { jsonResult } from "../../agents/tools/common.js"; import { setActivePluginRegistry } from "../../plugins/runtime.js"; -import { createIMessageTestPlugin, createTestRegistry } from "../../test-utils/channel-plugins.js"; +import { + createIMessageTestPlugin, + createOutboundTestPlugin, + createTestRegistry, +} from "../../test-utils/channel-plugins.js"; import { loadWebMedia } from "../../web/media.js"; import { runMessageAction } from "./message-action-runner.js"; @@ -609,6 +613,152 @@ describe("runMessageAction sandboxed media validation", () => { }); }); +describe("runMessageAction media caption behavior", () => { + afterEach(() => { + setActivePluginRegistry(createTestRegistry([])); + }); + + it("promotes caption to message for media sends when message is empty", async () => { + const sendMedia = vi.fn().mockResolvedValue({ + channel: "testchat", + messageId: "m1", + chatId: "c1", + }); + setActivePluginRegistry( + createTestRegistry([ + { + pluginId: "testchat", + source: "test", + plugin: createOutboundTestPlugin({ + id: "testchat", + outbound: { + deliveryMode: "direct", + sendText: vi.fn().mockResolvedValue({ + channel: "testchat", + messageId: "t1", + chatId: "c1", + }), + sendMedia, + }, + }), + }, + ]), + ); + const cfg = { + channels: { + testchat: { + enabled: true, + }, + }, + } as OpenClawConfig; + + const result = await runMessageAction({ + cfg, + action: "send", + params: { + channel: "testchat", + target: "channel:abc", + media: "https://example.com/cat.png", + caption: "caption-only text", + }, + dryRun: false, + }); + + expect(result.kind).toBe("send"); + expect(sendMedia).toHaveBeenCalledWith( + expect.objectContaining({ + text: "caption-only text", + mediaUrl: "https://example.com/cat.png", + }), + ); + }); +}); + +describe("runMessageAction card-only send behavior", () => { + const handleAction = vi.fn(async ({ params }: { params: Record }) => + jsonResult({ + ok: true, + card: params.card ?? null, + message: params.message ?? null, + }), + ); + + const cardPlugin: ChannelPlugin = { + id: "cardchat", + meta: { + id: "cardchat", + label: "Card Chat", + selectionLabel: "Card Chat", + docsPath: "/channels/cardchat", + blurb: "Card-only send test plugin.", + }, + capabilities: { chatTypes: ["direct"] }, + config: { + listAccountIds: () => ["default"], + resolveAccount: () => ({ enabled: true }), + isConfigured: () => true, + }, + actions: { + listActions: () => ["send"], + supportsAction: ({ action }) => action === "send", + handleAction, + }, + }; + + beforeEach(() => { + setActivePluginRegistry( + createTestRegistry([ + { + pluginId: "cardchat", + source: "test", + plugin: cardPlugin, + }, + ]), + ); + handleAction.mockClear(); + }); + + afterEach(() => { + setActivePluginRegistry(createTestRegistry([])); + vi.clearAllMocks(); + }); + + it("allows card-only sends without text or media", async () => { + const cfg = { + channels: { + cardchat: { + enabled: true, + }, + }, + } as OpenClawConfig; + + const card = { + type: "AdaptiveCard", + version: "1.4", + body: [{ type: "TextBlock", text: "Card-only payload" }], + }; + + const result = await runMessageAction({ + cfg, + action: "send", + params: { + channel: "cardchat", + target: "channel:test-card", + card, + }, + dryRun: false, + }); + + expect(result.kind).toBe("send"); + expect(result.handledBy).toBe("plugin"); + expect(handleAction).toHaveBeenCalled(); + expect(result.payload).toMatchObject({ + ok: true, + card, + }); + }); +}); + describe("runMessageAction accountId defaults", () => { const handleAction = vi.fn(async () => jsonResult({ ok: true })); const accountPlugin: ChannelPlugin = { diff --git a/src/infra/outbound/message-action-runner.ts b/src/infra/outbound/message-action-runner.ts index fc842a7efc..16d5029ec2 100644 --- a/src/infra/outbound/message-action-runner.ts +++ b/src/infra/outbound/message-action-runner.ts @@ -745,6 +745,7 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise