fix(whatsapp): allow media-only sends and normalize leading blank payloads (#14408)

Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Karim Naguib
2026-02-11 21:21:21 -08:00
committed by GitHub
parent 186dc0363f
commit 7a0591ef87
11 changed files with 352 additions and 14 deletions

View File

@@ -84,6 +84,10 @@ jobs:
esac
case "$path" in
# Generated protocol models are already covered by protocol:check and
# should not force the full native macOS lane.
apps/macos/Sources/OpenClawProtocol/*|apps/shared/OpenClawKit/Sources/OpenClawProtocol/*)
;;
apps/macos/*|apps/ios/*|apps/shared/*|Swabble/*)
run_macos=true
;;

View File

@@ -1,4 +1,5 @@
// Generated by scripts/protocol-gen-swift.ts do not edit by hand
// swiftlint:disable file_length
import Foundation
public let GATEWAY_PROTOCOL_VERSION = 3
@@ -383,7 +384,7 @@ public struct AgentEvent: Codable, Sendable {
public struct SendParams: Codable, Sendable {
public let to: String
public let message: String
public let message: String?
public let mediaurl: String?
public let mediaurls: [String]?
public let gifplayback: Bool?
@@ -394,7 +395,7 @@ public struct SendParams: Codable, Sendable {
public init(
to: String,
message: String,
message: String?,
mediaurl: String?,
mediaurls: [String]?,
gifplayback: Bool?,

View File

@@ -1,4 +1,5 @@
// Generated by scripts/protocol-gen-swift.ts do not edit by hand
// swiftlint:disable file_length
import Foundation
public let GATEWAY_PROTOCOL_VERSION = 3
@@ -383,7 +384,7 @@ public struct AgentEvent: Codable, Sendable {
public struct SendParams: Codable, Sendable {
public let to: String
public let message: String
public let message: String?
public let mediaurl: String?
public let mediaurls: [String]?
public let gifplayback: Bool?
@@ -394,7 +395,7 @@ public struct SendParams: Codable, Sendable {
public init(
to: String,
message: String,
message: String?,
mediaurl: String?,
mediaurls: [String]?,
gifplayback: Bool?,

View File

@@ -27,7 +27,7 @@ const outPaths = [
),
];
const header = `// Generated by scripts/protocol-gen-swift.ts — do not edit by hand\nimport Foundation\n\npublic let GATEWAY_PROTOCOL_VERSION = ${PROTOCOL_VERSION}\n\npublic enum ErrorCode: String, Codable, Sendable {\n${Object.values(
const header = `// Generated by scripts/protocol-gen-swift.ts — do not edit by hand\n// swiftlint:disable file_length\nimport Foundation\n\npublic let GATEWAY_PROTOCOL_VERSION = ${PROTOCOL_VERSION}\n\npublic enum ErrorCode: String, Codable, Sendable {\n${Object.values(
ErrorCodes,
)
.map((c) => ` case ${camelCase(c)} = "${c}"`)

View File

@@ -15,7 +15,7 @@ export const AgentEventSchema = Type.Object(
export const SendParamsSchema = Type.Object(
{
to: NonEmptyString,
message: NonEmptyString,
message: Type.Optional(Type.String()),
mediaUrl: Type.Optional(Type.String()),
mediaUrls: Type.Optional(Type.Array(Type.String())),
gifPlayback: Type.Optional(Type.Boolean()),

View File

@@ -1,4 +1,4 @@
import { describe, expect, it, vi } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { GatewayRequestContext } from "./types.js";
import { sendHandlers } from "./send.js";
@@ -47,6 +47,67 @@ const makeContext = (): GatewayRequestContext =>
}) as unknown as GatewayRequestContext;
describe("gateway send mirroring", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("accepts media-only sends without message", async () => {
mocks.deliverOutboundPayloads.mockResolvedValue([{ messageId: "m-media", channel: "slack" }]);
const respond = vi.fn();
await sendHandlers.send({
params: {
to: "channel:C1",
mediaUrl: "https://example.com/a.png",
channel: "slack",
idempotencyKey: "idem-media-only",
},
respond,
context: makeContext(),
req: { type: "req", id: "1", method: "send" },
client: null,
isWebchatConnect: () => false,
});
expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith(
expect.objectContaining({
payloads: [{ text: "", mediaUrl: "https://example.com/a.png", mediaUrls: undefined }],
}),
);
expect(respond).toHaveBeenCalledWith(
true,
expect.objectContaining({ messageId: "m-media" }),
undefined,
expect.objectContaining({ channel: "slack" }),
);
});
it("rejects empty sends when neither text nor media is present", async () => {
const respond = vi.fn();
await sendHandlers.send({
params: {
to: "channel:C1",
message: " ",
channel: "slack",
idempotencyKey: "idem-empty",
},
respond,
context: makeContext(),
req: { type: "req", id: "1", method: "send" },
client: null,
isWebchatConnect: () => false,
});
expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled();
expect(respond).toHaveBeenCalledWith(
false,
undefined,
expect.objectContaining({
message: expect.stringContaining("text or media is required"),
}),
);
});
it("does not mirror when delivery returns no results", async () => {
mocks.deliverOutboundPayloads.mockResolvedValue([]);

View File

@@ -58,7 +58,7 @@ export const sendHandlers: GatewayRequestHandlers = {
}
const request = p as {
to: string;
message: string;
message?: string;
mediaUrl?: string;
mediaUrls?: string[];
gifPlayback?: boolean;
@@ -85,8 +85,24 @@ export const sendHandlers: GatewayRequestHandlers = {
return;
}
const to = request.to.trim();
const message = request.message.trim();
const mediaUrls = Array.isArray(request.mediaUrls) ? request.mediaUrls : undefined;
const message = typeof request.message === "string" ? request.message.trim() : "";
const mediaUrl =
typeof request.mediaUrl === "string" && request.mediaUrl.trim().length > 0
? request.mediaUrl.trim()
: undefined;
const mediaUrls = Array.isArray(request.mediaUrls)
? request.mediaUrls
.map((entry) => (typeof entry === "string" ? entry.trim() : ""))
.filter((entry) => entry.length > 0)
: undefined;
if (!message && !mediaUrl && (mediaUrls?.length ?? 0) === 0) {
respond(
false,
undefined,
errorShape(ErrorCodes.INVALID_REQUEST, "invalid send params: text or media is required"),
);
return;
}
const channelInput = typeof request.channel === "string" ? request.channel : undefined;
const normalizedChannel = channelInput ? normalizeChannelId(channelInput) : null;
if (channelInput && !normalizedChannel) {
@@ -132,7 +148,7 @@ export const sendHandlers: GatewayRequestHandlers = {
}
const outboundDeps = context.deps ? createOutboundSendDeps(context.deps) : undefined;
const mirrorPayloads = normalizeReplyPayloadsForDelivery([
{ text: message, mediaUrl: request.mediaUrl, mediaUrls },
{ text: message, mediaUrl, mediaUrls },
]);
const mirrorText = mirrorPayloads
.map((payload) => payload.text)
@@ -170,7 +186,7 @@ export const sendHandlers: GatewayRequestHandlers = {
channel: outboundChannel,
to: resolved.to,
accountId,
payloads: [{ text: message, mediaUrl: request.mediaUrl, mediaUrls }],
payloads: [{ text: message, mediaUrl, mediaUrls }],
gifPlayback: request.gifPlayback,
deps: outboundDeps,
mirror: providedSessionKey

View File

@@ -196,6 +196,73 @@ describe("deliverOutboundPayloads", () => {
);
});
it("strips leading blank lines for WhatsApp text payloads", async () => {
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
const cfg: OpenClawConfig = {
channels: { whatsapp: { textChunkLimit: 4000 } },
};
await deliverOutboundPayloads({
cfg,
channel: "whatsapp",
to: "+1555",
payloads: [{ text: "\n\nHello from WhatsApp" }],
deps: { sendWhatsApp },
});
expect(sendWhatsApp).toHaveBeenCalledTimes(1);
expect(sendWhatsApp).toHaveBeenNthCalledWith(
1,
"+1555",
"Hello from WhatsApp",
expect.objectContaining({ verbose: false }),
);
});
it("drops whitespace-only WhatsApp text payloads when no media is attached", async () => {
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
const cfg: OpenClawConfig = {
channels: { whatsapp: { textChunkLimit: 4000 } },
};
const results = await deliverOutboundPayloads({
cfg,
channel: "whatsapp",
to: "+1555",
payloads: [{ text: " \n\t " }],
deps: { sendWhatsApp },
});
expect(sendWhatsApp).not.toHaveBeenCalled();
expect(results).toEqual([]);
});
it("keeps WhatsApp media payloads but clears whitespace-only captions", async () => {
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
const cfg: OpenClawConfig = {
channels: { whatsapp: { textChunkLimit: 4000 } },
};
await deliverOutboundPayloads({
cfg,
channel: "whatsapp",
to: "+1555",
payloads: [{ text: " \n\t ", mediaUrl: "https://example.com/photo.png" }],
deps: { sendWhatsApp },
});
expect(sendWhatsApp).toHaveBeenCalledTimes(1);
expect(sendWhatsApp).toHaveBeenNthCalledWith(
1,
"+1555",
"",
expect.objectContaining({
mediaUrl: "https://example.com/photo.png",
verbose: false,
}),
);
});
it("preserves fenced blocks for markdown chunkers in newline mode", async () => {
const chunker = vi.fn((text: string) => (text ? [text] : []));
const sendText = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({

View File

@@ -312,7 +312,31 @@ export async function deliverOutboundPayloads(params: {
})),
};
};
const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads);
const normalizeWhatsAppPayload = (payload: ReplyPayload): ReplyPayload | null => {
const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
const rawText = typeof payload.text === "string" ? payload.text : "";
const normalizedText = rawText.replace(/^(?:[ \t]*\r?\n)+/, "");
if (!normalizedText.trim()) {
if (!hasMedia) {
return null;
}
return {
...payload,
text: "",
};
}
return {
...payload,
text: normalizedText,
};
};
const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads).flatMap((payload) => {
if (channel !== "whatsapp") {
return [payload];
}
const normalized = normalizeWhatsAppPayload(payload);
return normalized ? [normalized] : [];
});
for (const payload of normalizedPayloads) {
const payloadSummary: NormalizedOutboundPayload = {
text: payload.text ?? "",

View File

@@ -9,7 +9,11 @@ import { telegramPlugin } from "../../../extensions/telegram/src/channel.js";
import { whatsappPlugin } from "../../../extensions/whatsapp/src/channel.js";
import { jsonResult } from "../../agents/tools/common.js";
import { setActivePluginRegistry } from "../../plugins/runtime.js";
import { createIMessageTestPlugin, createTestRegistry } from "../../test-utils/channel-plugins.js";
import {
createIMessageTestPlugin,
createOutboundTestPlugin,
createTestRegistry,
} from "../../test-utils/channel-plugins.js";
import { loadWebMedia } from "../../web/media.js";
import { runMessageAction } from "./message-action-runner.js";
@@ -609,6 +613,152 @@ describe("runMessageAction sandboxed media validation", () => {
});
});
describe("runMessageAction media caption behavior", () => {
afterEach(() => {
setActivePluginRegistry(createTestRegistry([]));
});
it("promotes caption to message for media sends when message is empty", async () => {
const sendMedia = vi.fn().mockResolvedValue({
channel: "testchat",
messageId: "m1",
chatId: "c1",
});
setActivePluginRegistry(
createTestRegistry([
{
pluginId: "testchat",
source: "test",
plugin: createOutboundTestPlugin({
id: "testchat",
outbound: {
deliveryMode: "direct",
sendText: vi.fn().mockResolvedValue({
channel: "testchat",
messageId: "t1",
chatId: "c1",
}),
sendMedia,
},
}),
},
]),
);
const cfg = {
channels: {
testchat: {
enabled: true,
},
},
} as OpenClawConfig;
const result = await runMessageAction({
cfg,
action: "send",
params: {
channel: "testchat",
target: "channel:abc",
media: "https://example.com/cat.png",
caption: "caption-only text",
},
dryRun: false,
});
expect(result.kind).toBe("send");
expect(sendMedia).toHaveBeenCalledWith(
expect.objectContaining({
text: "caption-only text",
mediaUrl: "https://example.com/cat.png",
}),
);
});
});
describe("runMessageAction card-only send behavior", () => {
const handleAction = vi.fn(async ({ params }: { params: Record<string, unknown> }) =>
jsonResult({
ok: true,
card: params.card ?? null,
message: params.message ?? null,
}),
);
const cardPlugin: ChannelPlugin = {
id: "cardchat",
meta: {
id: "cardchat",
label: "Card Chat",
selectionLabel: "Card Chat",
docsPath: "/channels/cardchat",
blurb: "Card-only send test plugin.",
},
capabilities: { chatTypes: ["direct"] },
config: {
listAccountIds: () => ["default"],
resolveAccount: () => ({ enabled: true }),
isConfigured: () => true,
},
actions: {
listActions: () => ["send"],
supportsAction: ({ action }) => action === "send",
handleAction,
},
};
beforeEach(() => {
setActivePluginRegistry(
createTestRegistry([
{
pluginId: "cardchat",
source: "test",
plugin: cardPlugin,
},
]),
);
handleAction.mockClear();
});
afterEach(() => {
setActivePluginRegistry(createTestRegistry([]));
vi.clearAllMocks();
});
it("allows card-only sends without text or media", async () => {
const cfg = {
channels: {
cardchat: {
enabled: true,
},
},
} as OpenClawConfig;
const card = {
type: "AdaptiveCard",
version: "1.4",
body: [{ type: "TextBlock", text: "Card-only payload" }],
};
const result = await runMessageAction({
cfg,
action: "send",
params: {
channel: "cardchat",
target: "channel:test-card",
card,
},
dryRun: false,
});
expect(result.kind).toBe("send");
expect(result.handledBy).toBe("plugin");
expect(handleAction).toHaveBeenCalled();
expect(result.payload).toMatchObject({
ok: true,
card,
});
});
});
describe("runMessageAction accountId defaults", () => {
const handleAction = vi.fn(async () => jsonResult({ ok: true }));
const accountPlugin: ChannelPlugin = {

View File

@@ -745,6 +745,7 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise<MessageActi
readStringParam(params, "path", { trim: false }) ??
readStringParam(params, "filePath", { trim: false });
const hasCard = params.card != null && typeof params.card === "object";
const caption = readStringParam(params, "caption", { allowEmpty: true }) ?? "";
let message =
readStringParam(params, "message", {
required: !mediaHint && !hasCard,
@@ -753,6 +754,9 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise<MessageActi
if (message.includes("\\n")) {
message = message.replaceAll("\\n", "\n");
}
if (!message.trim() && caption.trim()) {
message = caption;
}
const parsed = parseReplyDirectives(message);
const mergedMediaUrls: string[] = [];
@@ -804,6 +808,16 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise<MessageActi
});
const mediaUrl = readStringParam(params, "media", { trim: false });
if (channel === "whatsapp") {
message = message.replace(/^(?:[ \t]*\r?\n)+/, "");
if (!message.trim()) {
message = "";
}
}
if (!message.trim() && !mediaUrl && mergedMediaUrls.length === 0 && !hasCard) {
throw new Error("send requires text or media");
}
params.message = message;
const gifPlayback = readBooleanParam(params, "gifPlayback") ?? false;
const bestEffort = readBooleanParam(params, "bestEffort");