mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
fix(whatsapp): allow media-only sends and normalize leading blank payloads (#14408)
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@@ -84,6 +84,10 @@ jobs:
|
||||
esac
|
||||
|
||||
case "$path" in
|
||||
# Generated protocol models are already covered by protocol:check and
|
||||
# should not force the full native macOS lane.
|
||||
apps/macos/Sources/OpenClawProtocol/*|apps/shared/OpenClawKit/Sources/OpenClawProtocol/*)
|
||||
;;
|
||||
apps/macos/*|apps/ios/*|apps/shared/*|Swabble/*)
|
||||
run_macos=true
|
||||
;;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Generated by scripts/protocol-gen-swift.ts — do not edit by hand
|
||||
// swiftlint:disable file_length
|
||||
import Foundation
|
||||
|
||||
public let GATEWAY_PROTOCOL_VERSION = 3
|
||||
@@ -383,7 +384,7 @@ public struct AgentEvent: Codable, Sendable {
|
||||
|
||||
public struct SendParams: Codable, Sendable {
|
||||
public let to: String
|
||||
public let message: String
|
||||
public let message: String?
|
||||
public let mediaurl: String?
|
||||
public let mediaurls: [String]?
|
||||
public let gifplayback: Bool?
|
||||
@@ -394,7 +395,7 @@ public struct SendParams: Codable, Sendable {
|
||||
|
||||
public init(
|
||||
to: String,
|
||||
message: String,
|
||||
message: String?,
|
||||
mediaurl: String?,
|
||||
mediaurls: [String]?,
|
||||
gifplayback: Bool?,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Generated by scripts/protocol-gen-swift.ts — do not edit by hand
|
||||
// swiftlint:disable file_length
|
||||
import Foundation
|
||||
|
||||
public let GATEWAY_PROTOCOL_VERSION = 3
|
||||
@@ -383,7 +384,7 @@ public struct AgentEvent: Codable, Sendable {
|
||||
|
||||
public struct SendParams: Codable, Sendable {
|
||||
public let to: String
|
||||
public let message: String
|
||||
public let message: String?
|
||||
public let mediaurl: String?
|
||||
public let mediaurls: [String]?
|
||||
public let gifplayback: Bool?
|
||||
@@ -394,7 +395,7 @@ public struct SendParams: Codable, Sendable {
|
||||
|
||||
public init(
|
||||
to: String,
|
||||
message: String,
|
||||
message: String?,
|
||||
mediaurl: String?,
|
||||
mediaurls: [String]?,
|
||||
gifplayback: Bool?,
|
||||
|
||||
@@ -27,7 +27,7 @@ const outPaths = [
|
||||
),
|
||||
];
|
||||
|
||||
const header = `// Generated by scripts/protocol-gen-swift.ts — do not edit by hand\nimport Foundation\n\npublic let GATEWAY_PROTOCOL_VERSION = ${PROTOCOL_VERSION}\n\npublic enum ErrorCode: String, Codable, Sendable {\n${Object.values(
|
||||
const header = `// Generated by scripts/protocol-gen-swift.ts — do not edit by hand\n// swiftlint:disable file_length\nimport Foundation\n\npublic let GATEWAY_PROTOCOL_VERSION = ${PROTOCOL_VERSION}\n\npublic enum ErrorCode: String, Codable, Sendable {\n${Object.values(
|
||||
ErrorCodes,
|
||||
)
|
||||
.map((c) => ` case ${camelCase(c)} = "${c}"`)
|
||||
|
||||
@@ -15,7 +15,7 @@ export const AgentEventSchema = Type.Object(
|
||||
export const SendParamsSchema = Type.Object(
|
||||
{
|
||||
to: NonEmptyString,
|
||||
message: NonEmptyString,
|
||||
message: Type.Optional(Type.String()),
|
||||
mediaUrl: Type.Optional(Type.String()),
|
||||
mediaUrls: Type.Optional(Type.Array(Type.String())),
|
||||
gifPlayback: Type.Optional(Type.Boolean()),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { GatewayRequestContext } from "./types.js";
|
||||
import { sendHandlers } from "./send.js";
|
||||
|
||||
@@ -47,6 +47,67 @@ const makeContext = (): GatewayRequestContext =>
|
||||
}) as unknown as GatewayRequestContext;
|
||||
|
||||
describe("gateway send mirroring", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("accepts media-only sends without message", async () => {
|
||||
mocks.deliverOutboundPayloads.mockResolvedValue([{ messageId: "m-media", channel: "slack" }]);
|
||||
|
||||
const respond = vi.fn();
|
||||
await sendHandlers.send({
|
||||
params: {
|
||||
to: "channel:C1",
|
||||
mediaUrl: "https://example.com/a.png",
|
||||
channel: "slack",
|
||||
idempotencyKey: "idem-media-only",
|
||||
},
|
||||
respond,
|
||||
context: makeContext(),
|
||||
req: { type: "req", id: "1", method: "send" },
|
||||
client: null,
|
||||
isWebchatConnect: () => false,
|
||||
});
|
||||
|
||||
expect(mocks.deliverOutboundPayloads).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
payloads: [{ text: "", mediaUrl: "https://example.com/a.png", mediaUrls: undefined }],
|
||||
}),
|
||||
);
|
||||
expect(respond).toHaveBeenCalledWith(
|
||||
true,
|
||||
expect.objectContaining({ messageId: "m-media" }),
|
||||
undefined,
|
||||
expect.objectContaining({ channel: "slack" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects empty sends when neither text nor media is present", async () => {
|
||||
const respond = vi.fn();
|
||||
await sendHandlers.send({
|
||||
params: {
|
||||
to: "channel:C1",
|
||||
message: " ",
|
||||
channel: "slack",
|
||||
idempotencyKey: "idem-empty",
|
||||
},
|
||||
respond,
|
||||
context: makeContext(),
|
||||
req: { type: "req", id: "1", method: "send" },
|
||||
client: null,
|
||||
isWebchatConnect: () => false,
|
||||
});
|
||||
|
||||
expect(mocks.deliverOutboundPayloads).not.toHaveBeenCalled();
|
||||
expect(respond).toHaveBeenCalledWith(
|
||||
false,
|
||||
undefined,
|
||||
expect.objectContaining({
|
||||
message: expect.stringContaining("text or media is required"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does not mirror when delivery returns no results", async () => {
|
||||
mocks.deliverOutboundPayloads.mockResolvedValue([]);
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ export const sendHandlers: GatewayRequestHandlers = {
|
||||
}
|
||||
const request = p as {
|
||||
to: string;
|
||||
message: string;
|
||||
message?: string;
|
||||
mediaUrl?: string;
|
||||
mediaUrls?: string[];
|
||||
gifPlayback?: boolean;
|
||||
@@ -85,8 +85,24 @@ export const sendHandlers: GatewayRequestHandlers = {
|
||||
return;
|
||||
}
|
||||
const to = request.to.trim();
|
||||
const message = request.message.trim();
|
||||
const mediaUrls = Array.isArray(request.mediaUrls) ? request.mediaUrls : undefined;
|
||||
const message = typeof request.message === "string" ? request.message.trim() : "";
|
||||
const mediaUrl =
|
||||
typeof request.mediaUrl === "string" && request.mediaUrl.trim().length > 0
|
||||
? request.mediaUrl.trim()
|
||||
: undefined;
|
||||
const mediaUrls = Array.isArray(request.mediaUrls)
|
||||
? request.mediaUrls
|
||||
.map((entry) => (typeof entry === "string" ? entry.trim() : ""))
|
||||
.filter((entry) => entry.length > 0)
|
||||
: undefined;
|
||||
if (!message && !mediaUrl && (mediaUrls?.length ?? 0) === 0) {
|
||||
respond(
|
||||
false,
|
||||
undefined,
|
||||
errorShape(ErrorCodes.INVALID_REQUEST, "invalid send params: text or media is required"),
|
||||
);
|
||||
return;
|
||||
}
|
||||
const channelInput = typeof request.channel === "string" ? request.channel : undefined;
|
||||
const normalizedChannel = channelInput ? normalizeChannelId(channelInput) : null;
|
||||
if (channelInput && !normalizedChannel) {
|
||||
@@ -132,7 +148,7 @@ export const sendHandlers: GatewayRequestHandlers = {
|
||||
}
|
||||
const outboundDeps = context.deps ? createOutboundSendDeps(context.deps) : undefined;
|
||||
const mirrorPayloads = normalizeReplyPayloadsForDelivery([
|
||||
{ text: message, mediaUrl: request.mediaUrl, mediaUrls },
|
||||
{ text: message, mediaUrl, mediaUrls },
|
||||
]);
|
||||
const mirrorText = mirrorPayloads
|
||||
.map((payload) => payload.text)
|
||||
@@ -170,7 +186,7 @@ export const sendHandlers: GatewayRequestHandlers = {
|
||||
channel: outboundChannel,
|
||||
to: resolved.to,
|
||||
accountId,
|
||||
payloads: [{ text: message, mediaUrl: request.mediaUrl, mediaUrls }],
|
||||
payloads: [{ text: message, mediaUrl, mediaUrls }],
|
||||
gifPlayback: request.gifPlayback,
|
||||
deps: outboundDeps,
|
||||
mirror: providedSessionKey
|
||||
|
||||
@@ -196,6 +196,73 @@ describe("deliverOutboundPayloads", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("strips leading blank lines for WhatsApp text payloads", async () => {
|
||||
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
|
||||
const cfg: OpenClawConfig = {
|
||||
channels: { whatsapp: { textChunkLimit: 4000 } },
|
||||
};
|
||||
|
||||
await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: "whatsapp",
|
||||
to: "+1555",
|
||||
payloads: [{ text: "\n\nHello from WhatsApp" }],
|
||||
deps: { sendWhatsApp },
|
||||
});
|
||||
|
||||
expect(sendWhatsApp).toHaveBeenCalledTimes(1);
|
||||
expect(sendWhatsApp).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"+1555",
|
||||
"Hello from WhatsApp",
|
||||
expect.objectContaining({ verbose: false }),
|
||||
);
|
||||
});
|
||||
|
||||
it("drops whitespace-only WhatsApp text payloads when no media is attached", async () => {
|
||||
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
|
||||
const cfg: OpenClawConfig = {
|
||||
channels: { whatsapp: { textChunkLimit: 4000 } },
|
||||
};
|
||||
|
||||
const results = await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: "whatsapp",
|
||||
to: "+1555",
|
||||
payloads: [{ text: " \n\t " }],
|
||||
deps: { sendWhatsApp },
|
||||
});
|
||||
|
||||
expect(sendWhatsApp).not.toHaveBeenCalled();
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
it("keeps WhatsApp media payloads but clears whitespace-only captions", async () => {
|
||||
const sendWhatsApp = vi.fn().mockResolvedValue({ messageId: "w1", toJid: "jid" });
|
||||
const cfg: OpenClawConfig = {
|
||||
channels: { whatsapp: { textChunkLimit: 4000 } },
|
||||
};
|
||||
|
||||
await deliverOutboundPayloads({
|
||||
cfg,
|
||||
channel: "whatsapp",
|
||||
to: "+1555",
|
||||
payloads: [{ text: " \n\t ", mediaUrl: "https://example.com/photo.png" }],
|
||||
deps: { sendWhatsApp },
|
||||
});
|
||||
|
||||
expect(sendWhatsApp).toHaveBeenCalledTimes(1);
|
||||
expect(sendWhatsApp).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"+1555",
|
||||
"",
|
||||
expect.objectContaining({
|
||||
mediaUrl: "https://example.com/photo.png",
|
||||
verbose: false,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves fenced blocks for markdown chunkers in newline mode", async () => {
|
||||
const chunker = vi.fn((text: string) => (text ? [text] : []));
|
||||
const sendText = vi.fn().mockImplementation(async ({ text }: { text: string }) => ({
|
||||
|
||||
@@ -312,7 +312,31 @@ export async function deliverOutboundPayloads(params: {
|
||||
})),
|
||||
};
|
||||
};
|
||||
const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads);
|
||||
const normalizeWhatsAppPayload = (payload: ReplyPayload): ReplyPayload | null => {
|
||||
const hasMedia = Boolean(payload.mediaUrl) || (payload.mediaUrls?.length ?? 0) > 0;
|
||||
const rawText = typeof payload.text === "string" ? payload.text : "";
|
||||
const normalizedText = rawText.replace(/^(?:[ \t]*\r?\n)+/, "");
|
||||
if (!normalizedText.trim()) {
|
||||
if (!hasMedia) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
...payload,
|
||||
text: "",
|
||||
};
|
||||
}
|
||||
return {
|
||||
...payload,
|
||||
text: normalizedText,
|
||||
};
|
||||
};
|
||||
const normalizedPayloads = normalizeReplyPayloadsForDelivery(payloads).flatMap((payload) => {
|
||||
if (channel !== "whatsapp") {
|
||||
return [payload];
|
||||
}
|
||||
const normalized = normalizeWhatsAppPayload(payload);
|
||||
return normalized ? [normalized] : [];
|
||||
});
|
||||
for (const payload of normalizedPayloads) {
|
||||
const payloadSummary: NormalizedOutboundPayload = {
|
||||
text: payload.text ?? "",
|
||||
|
||||
@@ -9,7 +9,11 @@ import { telegramPlugin } from "../../../extensions/telegram/src/channel.js";
|
||||
import { whatsappPlugin } from "../../../extensions/whatsapp/src/channel.js";
|
||||
import { jsonResult } from "../../agents/tools/common.js";
|
||||
import { setActivePluginRegistry } from "../../plugins/runtime.js";
|
||||
import { createIMessageTestPlugin, createTestRegistry } from "../../test-utils/channel-plugins.js";
|
||||
import {
|
||||
createIMessageTestPlugin,
|
||||
createOutboundTestPlugin,
|
||||
createTestRegistry,
|
||||
} from "../../test-utils/channel-plugins.js";
|
||||
import { loadWebMedia } from "../../web/media.js";
|
||||
import { runMessageAction } from "./message-action-runner.js";
|
||||
|
||||
@@ -609,6 +613,152 @@ describe("runMessageAction sandboxed media validation", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("runMessageAction media caption behavior", () => {
|
||||
afterEach(() => {
|
||||
setActivePluginRegistry(createTestRegistry([]));
|
||||
});
|
||||
|
||||
it("promotes caption to message for media sends when message is empty", async () => {
|
||||
const sendMedia = vi.fn().mockResolvedValue({
|
||||
channel: "testchat",
|
||||
messageId: "m1",
|
||||
chatId: "c1",
|
||||
});
|
||||
setActivePluginRegistry(
|
||||
createTestRegistry([
|
||||
{
|
||||
pluginId: "testchat",
|
||||
source: "test",
|
||||
plugin: createOutboundTestPlugin({
|
||||
id: "testchat",
|
||||
outbound: {
|
||||
deliveryMode: "direct",
|
||||
sendText: vi.fn().mockResolvedValue({
|
||||
channel: "testchat",
|
||||
messageId: "t1",
|
||||
chatId: "c1",
|
||||
}),
|
||||
sendMedia,
|
||||
},
|
||||
}),
|
||||
},
|
||||
]),
|
||||
);
|
||||
const cfg = {
|
||||
channels: {
|
||||
testchat: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = await runMessageAction({
|
||||
cfg,
|
||||
action: "send",
|
||||
params: {
|
||||
channel: "testchat",
|
||||
target: "channel:abc",
|
||||
media: "https://example.com/cat.png",
|
||||
caption: "caption-only text",
|
||||
},
|
||||
dryRun: false,
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("send");
|
||||
expect(sendMedia).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
text: "caption-only text",
|
||||
mediaUrl: "https://example.com/cat.png",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("runMessageAction card-only send behavior", () => {
|
||||
const handleAction = vi.fn(async ({ params }: { params: Record<string, unknown> }) =>
|
||||
jsonResult({
|
||||
ok: true,
|
||||
card: params.card ?? null,
|
||||
message: params.message ?? null,
|
||||
}),
|
||||
);
|
||||
|
||||
const cardPlugin: ChannelPlugin = {
|
||||
id: "cardchat",
|
||||
meta: {
|
||||
id: "cardchat",
|
||||
label: "Card Chat",
|
||||
selectionLabel: "Card Chat",
|
||||
docsPath: "/channels/cardchat",
|
||||
blurb: "Card-only send test plugin.",
|
||||
},
|
||||
capabilities: { chatTypes: ["direct"] },
|
||||
config: {
|
||||
listAccountIds: () => ["default"],
|
||||
resolveAccount: () => ({ enabled: true }),
|
||||
isConfigured: () => true,
|
||||
},
|
||||
actions: {
|
||||
listActions: () => ["send"],
|
||||
supportsAction: ({ action }) => action === "send",
|
||||
handleAction,
|
||||
},
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
setActivePluginRegistry(
|
||||
createTestRegistry([
|
||||
{
|
||||
pluginId: "cardchat",
|
||||
source: "test",
|
||||
plugin: cardPlugin,
|
||||
},
|
||||
]),
|
||||
);
|
||||
handleAction.mockClear();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
setActivePluginRegistry(createTestRegistry([]));
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("allows card-only sends without text or media", async () => {
|
||||
const cfg = {
|
||||
channels: {
|
||||
cardchat: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const card = {
|
||||
type: "AdaptiveCard",
|
||||
version: "1.4",
|
||||
body: [{ type: "TextBlock", text: "Card-only payload" }],
|
||||
};
|
||||
|
||||
const result = await runMessageAction({
|
||||
cfg,
|
||||
action: "send",
|
||||
params: {
|
||||
channel: "cardchat",
|
||||
target: "channel:test-card",
|
||||
card,
|
||||
},
|
||||
dryRun: false,
|
||||
});
|
||||
|
||||
expect(result.kind).toBe("send");
|
||||
expect(result.handledBy).toBe("plugin");
|
||||
expect(handleAction).toHaveBeenCalled();
|
||||
expect(result.payload).toMatchObject({
|
||||
ok: true,
|
||||
card,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("runMessageAction accountId defaults", () => {
|
||||
const handleAction = vi.fn(async () => jsonResult({ ok: true }));
|
||||
const accountPlugin: ChannelPlugin = {
|
||||
|
||||
@@ -745,6 +745,7 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise<MessageActi
|
||||
readStringParam(params, "path", { trim: false }) ??
|
||||
readStringParam(params, "filePath", { trim: false });
|
||||
const hasCard = params.card != null && typeof params.card === "object";
|
||||
const caption = readStringParam(params, "caption", { allowEmpty: true }) ?? "";
|
||||
let message =
|
||||
readStringParam(params, "message", {
|
||||
required: !mediaHint && !hasCard,
|
||||
@@ -753,6 +754,9 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise<MessageActi
|
||||
if (message.includes("\\n")) {
|
||||
message = message.replaceAll("\\n", "\n");
|
||||
}
|
||||
if (!message.trim() && caption.trim()) {
|
||||
message = caption;
|
||||
}
|
||||
|
||||
const parsed = parseReplyDirectives(message);
|
||||
const mergedMediaUrls: string[] = [];
|
||||
@@ -804,6 +808,16 @@ async function handleSendAction(ctx: ResolvedActionContext): Promise<MessageActi
|
||||
});
|
||||
|
||||
const mediaUrl = readStringParam(params, "media", { trim: false });
|
||||
if (channel === "whatsapp") {
|
||||
message = message.replace(/^(?:[ \t]*\r?\n)+/, "");
|
||||
if (!message.trim()) {
|
||||
message = "";
|
||||
}
|
||||
}
|
||||
if (!message.trim() && !mediaUrl && mergedMediaUrls.length === 0 && !hasCard) {
|
||||
throw new Error("send requires text or media");
|
||||
}
|
||||
params.message = message;
|
||||
const gifPlayback = readBooleanParam(params, "gifPlayback") ?? false;
|
||||
const bestEffort = readBooleanParam(params, "bestEffort");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user