fix(voice-call): harden inbound policy

This commit is contained in:
Peter Steinberger
2026-02-03 09:33:25 -08:00
parent fc40ba8e7e
commit f8dfd034f5
13 changed files with 328 additions and 33 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
- Security: guard skill installer downloads with SSRF checks (block private/localhost URLs).
- Media understanding: apply SSRF guardrails to provider fetches; allow private baseUrl overrides explicitly.
- Tests: stub SSRF DNS pinning in web auto-reply + Gemini video coverage. (#6619) Thanks @joshp123.
- fix(voice-call): harden inbound allowlist; reject anonymous callers; require Telnyx publicKey for allowlist; token-gate Twilio media streams; cap webhook body size (thanks @simecek)
## 2026.2.1

View File

@@ -0,0 +1,19 @@
export function normalizePhoneNumber(input?: string): string {
if (!input) {
return "";
}
return input.replace(/\D/g, "");
}
export function isAllowlistedCaller(
normalizedFrom: string,
allowFrom: string[] | undefined,
): boolean {
if (!normalizedFrom) {
return false;
}
return (allowFrom ?? []).some((num) => {
const normalizedAllow = normalizePhoneNumber(num);
return normalizedAllow !== "" && normalizedAllow === normalizedFrom;
});
}

View File

@@ -148,6 +148,34 @@ describe("validateProviderConfig", () => {
"plugins.entries.voice-call.config.telnyx.apiKey is required (or set TELNYX_API_KEY env)",
);
});
it("fails validation when allowlist inbound policy lacks public key", () => {
const config = createBaseConfig("telnyx");
config.inboundPolicy = "allowlist";
config.telnyx = { apiKey: "KEY123", connectionId: "CONN456" };
const result = validateProviderConfig(config);
expect(result.valid).toBe(false);
expect(result.errors).toContain(
"plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
);
});
it("passes validation when allowlist inbound policy has public key", () => {
const config = createBaseConfig("telnyx");
config.inboundPolicy = "allowlist";
config.telnyx = {
apiKey: "KEY123",
connectionId: "CONN456",
publicKey: "public-key",
};
const result = validateProviderConfig(config);
expect(result.valid).toBe(true);
expect(result.errors).toEqual([]);
});
});
describe("plivo provider", () => {

View File

@@ -448,6 +448,14 @@ export function validateProviderConfig(config: VoiceCallConfig): {
"plugins.entries.voice-call.config.telnyx.connectionId is required (or set TELNYX_CONNECTION_ID env)",
);
}
if (
(config.inboundPolicy === "allowlist" || config.inboundPolicy === "pairing") &&
!config.telnyx?.publicKey
) {
errors.push(
"plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
);
}
}
if (config.provider === "twilio") {

View File

@@ -19,6 +19,7 @@ import { CallManager } from "./manager.js";
class FakeProvider implements VoiceCallProvider {
readonly name = "plivo" as const;
readonly playTtsCalls: PlayTtsInput[] = [];
readonly hangupCalls: HangupCallInput[] = [];
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
return { ok: true };
@@ -29,7 +30,9 @@ class FakeProvider implements VoiceCallProvider {
async initiateCall(_input: InitiateCallInput): Promise<InitiateCallResult> {
return { providerCallId: "request-uuid", status: "initiated" };
}
async hangupCall(_input: HangupCallInput): Promise<void> {}
async hangupCall(input: HangupCallInput): Promise<void> {
this.hangupCalls.push(input);
}
async playTts(input: PlayTtsInput): Promise<void> {
this.playTtsCalls.push(input);
}
@@ -102,4 +105,90 @@ describe("CallManager", () => {
expect(provider.playTtsCalls).toHaveLength(1);
expect(provider.playTtsCalls[0]?.text).toBe("Hello there");
});
it("rejects inbound calls with missing caller ID when allowlist enabled", () => {
const config = VoiceCallConfigSchema.parse({
enabled: true,
provider: "plivo",
fromNumber: "+15550000000",
inboundPolicy: "allowlist",
allowFrom: ["+15550001234"],
});
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
const provider = new FakeProvider();
const manager = new CallManager(config, storePath);
manager.initialize(provider, "https://example.com/voice/webhook");
manager.processEvent({
id: "evt-allowlist-missing",
type: "call.initiated",
callId: "call-missing",
providerCallId: "provider-missing",
timestamp: Date.now(),
direction: "inbound",
to: "+15550000000",
});
expect(manager.getCallByProviderCallId("provider-missing")).toBeUndefined();
expect(provider.hangupCalls).toHaveLength(1);
expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-missing");
});
it("rejects inbound calls that only match allowlist suffixes", () => {
const config = VoiceCallConfigSchema.parse({
enabled: true,
provider: "plivo",
fromNumber: "+15550000000",
inboundPolicy: "allowlist",
allowFrom: ["+15550001234"],
});
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
const provider = new FakeProvider();
const manager = new CallManager(config, storePath);
manager.initialize(provider, "https://example.com/voice/webhook");
manager.processEvent({
id: "evt-allowlist-suffix",
type: "call.initiated",
callId: "call-suffix",
providerCallId: "provider-suffix",
timestamp: Date.now(),
direction: "inbound",
from: "+99915550001234",
to: "+15550000000",
});
expect(manager.getCallByProviderCallId("provider-suffix")).toBeUndefined();
expect(provider.hangupCalls).toHaveLength(1);
expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-suffix");
});
it("accepts inbound calls that exactly match the allowlist", () => {
const config = VoiceCallConfigSchema.parse({
enabled: true,
provider: "plivo",
fromNumber: "+15550000000",
inboundPolicy: "allowlist",
allowFrom: ["+15550001234"],
});
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
const manager = new CallManager(config, storePath);
manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
manager.processEvent({
id: "evt-allowlist-exact",
type: "call.initiated",
callId: "call-exact",
providerCallId: "provider-exact",
timestamp: Date.now(),
direction: "inbound",
from: "+15550001234",
to: "+15550000000",
});
expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
});
});

View File

@@ -5,6 +5,7 @@ import os from "node:os";
import path from "node:path";
import type { CallMode, VoiceCallConfig } from "./config.js";
import type { VoiceCallProvider } from "./providers/base.js";
import { isAllowlistedCaller, normalizePhoneNumber } from "./allowlist.js";
import {
type CallId,
type CallRecord,
@@ -474,11 +475,12 @@ export class CallManager {
case "allowlist":
case "pairing": {
const normalized = from?.replace(/\D/g, "") || "";
const allowed = (allowFrom || []).some((num) => {
const normalizedAllow = num.replace(/\D/g, "");
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
});
const normalized = normalizePhoneNumber(from);
if (!normalized) {
console.log("[voice-call] Inbound call rejected: missing caller ID");
return false;
}
const allowed = isAllowlistedCaller(normalized, allowFrom);
const status = allowed ? "accepted" : "rejected";
console.log(
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
@@ -551,7 +553,7 @@ export class CallManager {
if (!call && event.direction === "inbound" && event.providerCallId) {
// Check if we should accept this inbound call
if (!this.shouldAcceptInbound(event.from)) {
// TODO: Could hang up the call here
void this.rejectInboundCall(event);
return;
}
@@ -653,6 +655,25 @@ export class CallManager {
this.persistCallRecord(call);
}
private async rejectInboundCall(event: NormalizedEvent): Promise<void> {
if (!this.provider || !event.providerCallId) {
return;
}
const callId = event.callId || event.providerCallId;
try {
await this.provider.hangupCall({
callId,
providerCallId: event.providerCallId,
reason: "hangup-bot",
});
} catch (err) {
console.warn(
`[voice-call] Failed to reject inbound call ${event.providerCallId}:`,
err instanceof Error ? err.message : err,
);
}
}
private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
const initialMessage =
typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : "";

View File

@@ -1,6 +1,7 @@
import crypto from "node:crypto";
import type { CallRecord, CallState, NormalizedEvent } from "../types.js";
import type { CallManagerContext } from "./context.js";
import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
import { findCall } from "./lookup.js";
import { endCall } from "./outbound.js";
import { addTranscriptEntry, transitionState } from "./state.js";
@@ -29,11 +30,12 @@ function shouldAcceptInbound(
case "allowlist":
case "pairing": {
const normalized = from?.replace(/\D/g, "") || "";
const allowed = (allowFrom || []).some((num) => {
const normalizedAllow = num.replace(/\D/g, "");
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
});
const normalized = normalizePhoneNumber(from);
if (!normalized) {
console.log("[voice-call] Inbound call rejected: missing caller ID");
return false;
}
const allowed = isAllowlistedCaller(normalized, allowFrom);
const status = allowed ? "accepted" : "rejected";
console.log(
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,

View File

@@ -21,6 +21,8 @@ import type {
export interface MediaStreamConfig {
/** STT provider for transcription */
sttProvider: OpenAIRealtimeSTTProvider;
/** Validate whether to accept a media stream for the given call ID */
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
/** Callback when transcript is received */
onTranscript?: (callId: string, transcript: string) => void;
/** Callback for partial transcripts (streaming UI) */
@@ -87,6 +89,7 @@ export class MediaStreamHandler {
*/
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
let session: StreamSession | null = null;
const streamToken = this.getStreamToken(_request);
ws.on("message", async (data: Buffer) => {
try {
@@ -98,7 +101,7 @@ export class MediaStreamHandler {
break;
case "start":
session = await this.handleStart(ws, message);
session = await this.handleStart(ws, message, streamToken);
break;
case "media":
@@ -135,11 +138,28 @@ export class MediaStreamHandler {
/**
* Handle stream start event.
*/
private async handleStart(ws: WebSocket, message: TwilioMediaMessage): Promise<StreamSession> {
private async handleStart(
ws: WebSocket,
message: TwilioMediaMessage,
streamToken?: string,
): Promise<StreamSession | null> {
const streamSid = message.streamSid || "";
const callSid = message.start?.callSid || "";
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
if (!callSid) {
console.warn("[MediaStream] Missing callSid; closing stream");
ws.close(1008, "Missing callSid");
return null;
}
if (
this.config.shouldAcceptStream &&
!this.config.shouldAcceptStream({ callId: callSid, streamSid, token: streamToken })
) {
console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
ws.close(1008, "Unknown call");
return null;
}
// Create STT session
const sttSession = this.config.sttProvider.createSession();
@@ -189,6 +209,18 @@ export class MediaStreamHandler {
this.config.onDisconnect?.(session.callId);
}
private getStreamToken(request: IncomingMessage): string | undefined {
if (!request.url || !request.headers.host) {
return undefined;
}
try {
const url = new URL(request.url, `http://${request.headers.host}`);
return url.searchParams.get("token") ?? undefined;
} catch {
return undefined;
}
}
/**
* Get an active session with an open WebSocket, or undefined if unavailable.
*/

View File

@@ -21,15 +21,21 @@ import type { VoiceCallProvider } from "./base.js";
* Uses Telnyx Call Control API v2 for managing calls.
* @see https://developers.telnyx.com/docs/api/v2/call-control
*/
export interface TelnyxProviderOptions {
/** Allow unsigned webhooks when no public key is configured */
allowUnsignedWebhooks?: boolean;
}
export class TelnyxProvider implements VoiceCallProvider {
readonly name = "telnyx" as const;
private readonly apiKey: string;
private readonly connectionId: string;
private readonly publicKey: string | undefined;
private readonly options: TelnyxProviderOptions;
private readonly baseUrl = "https://api.telnyx.com/v2";
constructor(config: TelnyxConfig) {
constructor(config: TelnyxConfig, options: TelnyxProviderOptions = {}) {
if (!config.apiKey) {
throw new Error("Telnyx API key is required");
}
@@ -40,6 +46,7 @@ export class TelnyxProvider implements VoiceCallProvider {
this.apiKey = config.apiKey;
this.connectionId = config.connectionId;
this.publicKey = config.publicKey;
this.options = options;
}
/**
@@ -76,8 +83,14 @@ export class TelnyxProvider implements VoiceCallProvider {
*/
verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
if (!this.publicKey) {
// No public key configured, skip verification (not recommended for production)
return { ok: true };
if (this.options.allowUnsignedWebhooks) {
console.warn("[telnyx] Webhook verification skipped (no public key configured)");
return { ok: true, reason: "verification skipped (no public key configured)" };
}
return {
ok: false,
reason: "Missing telnyx.publicKey (configure to verify webhooks)",
};
}
const signature = ctx.headers["telnyx-signature-ed25519"];

View File

@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
import type { WebhookContext } from "../types.js";
import { TwilioProvider } from "./twilio.js";
const STREAM_URL = "wss://example.ngrok.app/voice/stream";
const STREAM_URL_PREFIX = "wss://example.ngrok.app/voice/stream?token=";
function createProvider(): TwilioProvider {
return new TwilioProvider(
@@ -24,13 +24,13 @@ function createContext(rawBody: string, query?: WebhookContext["query"]): Webhoo
describe("TwilioProvider", () => {
it("returns streaming TwiML for outbound conversation calls before in-progress", () => {
const provider = createProvider();
const ctx = createContext("CallStatus=initiated&Direction=outbound-api", {
const ctx = createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA123", {
callId: "call-1",
});
const result = provider.parseWebhookEvent(ctx);
expect(result.providerResponseBody).toContain(STREAM_URL);
expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
expect(result.providerResponseBody).toContain("<Connect>");
});
@@ -50,11 +50,11 @@ describe("TwilioProvider", () => {
it("returns streaming TwiML for inbound calls", () => {
const provider = createProvider();
const ctx = createContext("CallStatus=ringing&Direction=inbound");
const ctx = createContext("CallStatus=ringing&Direction=inbound&CallSid=CA456");
const result = provider.parseWebhookEvent(ctx);
expect(result.providerResponseBody).toContain(STREAM_URL);
expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
expect(result.providerResponseBody).toContain("<Connect>");
});
});

View File

@@ -60,6 +60,8 @@ export class TwilioProvider implements VoiceCallProvider {
/** Map of call SID to stream SID for media streams */
private callStreamMap = new Map<string, string>();
/** Per-call tokens for media stream authentication */
private streamAuthTokens = new Map<string, string>();
/** Storage for TwiML content (for notify mode with URL-based TwiML) */
private readonly twimlStorage = new Map<string, string>();
@@ -94,6 +96,7 @@ export class TwilioProvider implements VoiceCallProvider {
}
this.deleteStoredTwiml(callIdMatch[1]);
this.streamAuthTokens.delete(providerCallId);
}
constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) {
@@ -138,6 +141,19 @@ export class TwilioProvider implements VoiceCallProvider {
this.callStreamMap.delete(callSid);
}
isValidStreamToken(callSid: string, token?: string): boolean {
const expected = this.streamAuthTokens.get(callSid);
if (!expected || !token) {
return false;
}
if (expected.length !== token.length) {
const dummy = Buffer.from(expected);
crypto.timingSafeEqual(dummy, dummy);
return false;
}
return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
}
/**
* Clear TTS queue for a call (barge-in).
* Used when user starts speaking to interrupt current TTS playback.
@@ -271,11 +287,13 @@ export class TwilioProvider implements VoiceCallProvider {
case "busy":
case "no-answer":
case "failed":
this.streamAuthTokens.delete(callSid);
if (callIdOverride) {
this.deleteStoredTwiml(callIdOverride);
}
return { ...baseEvent, type: "call.ended", reason: callStatus };
case "canceled":
this.streamAuthTokens.delete(callSid);
if (callIdOverride) {
this.deleteStoredTwiml(callIdOverride);
}
@@ -308,6 +326,7 @@ export class TwilioProvider implements VoiceCallProvider {
const callStatus = params.get("CallStatus");
const direction = params.get("Direction");
const isOutbound = direction?.startsWith("outbound") ?? false;
const callSid = params.get("CallSid") || undefined;
const callIdFromQuery =
typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
? ctx.query.callId.trim()
@@ -330,7 +349,7 @@ export class TwilioProvider implements VoiceCallProvider {
// Conversation mode: return streaming TwiML immediately for outbound calls.
if (isOutbound) {
const streamUrl = this.getStreamUrl();
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
}
}
@@ -343,7 +362,7 @@ export class TwilioProvider implements VoiceCallProvider {
// Handle subsequent webhook requests (status callbacks, etc.)
// For inbound calls, answer immediately with stream
if (direction === "inbound") {
const streamUrl = this.getStreamUrl();
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
}
@@ -352,7 +371,7 @@ export class TwilioProvider implements VoiceCallProvider {
return TwilioProvider.EMPTY_TWIML;
}
const streamUrl = this.getStreamUrl();
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
}
@@ -380,6 +399,27 @@ export class TwilioProvider implements VoiceCallProvider {
return `${wsOrigin}${path}`;
}
private getStreamAuthToken(callSid: string): string {
const existing = this.streamAuthTokens.get(callSid);
if (existing) {
return existing;
}
const token = crypto.randomBytes(16).toString("base64url");
this.streamAuthTokens.set(callSid, token);
return token;
}
private getStreamUrlForCall(callSid: string): string | null {
const baseUrl = this.getStreamUrl();
if (!baseUrl) {
return null;
}
const token = this.getStreamAuthToken(callSid);
const url = new URL(baseUrl);
url.searchParams.set("token", token);
return url.toString();
}
/**
* Generate TwiML to connect a call to a WebSocket media stream.
* This enables bidirectional audio streaming for real-time STT/TTS.
@@ -444,6 +484,7 @@ export class TwilioProvider implements VoiceCallProvider {
this.deleteStoredTwimlForProviderCall(input.providerCallId);
this.callWebhookUrls.delete(input.providerCallId);
this.streamAuthTokens.delete(input.providerCallId);
await this.apiRequest(
`/Calls/${input.providerCallId}.json`,

View File

@@ -48,11 +48,17 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
switch (config.provider) {
case "telnyx":
return new TelnyxProvider({
apiKey: config.telnyx?.apiKey,
connectionId: config.telnyx?.connectionId,
publicKey: config.telnyx?.publicKey,
});
return new TelnyxProvider(
{
apiKey: config.telnyx?.apiKey,
connectionId: config.telnyx?.connectionId,
publicKey: config.telnyx?.publicKey,
},
{
allowUnsignedWebhooks:
config.inboundPolicy === "open" || config.inboundPolicy === "disabled",
},
);
case "twilio":
return new TwilioProvider(
{

View File

@@ -11,6 +11,8 @@ import type { NormalizedEvent, WebhookContext } from "./types.js";
import { MediaStreamHandler } from "./media-stream.js";
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
/**
* HTTP server for receiving voice call webhooks from providers.
* Supports WebSocket upgrades for media streams when streaming is enabled.
@@ -69,6 +71,20 @@ export class VoiceCallWebhookServer {
const streamConfig: MediaStreamConfig = {
sttProvider,
shouldAcceptStream: ({ callId, token }) => {
const call = this.manager.getCallByProviderCallId(callId);
if (!call) {
return false;
}
if (this.provider.name === "twilio") {
const twilio = this.provider as TwilioProvider;
if (!twilio.isValidStreamToken(callId, token)) {
console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`);
return false;
}
}
return true;
},
onTranscript: (providerCallId, transcript) => {
console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
@@ -224,7 +240,17 @@ export class VoiceCallWebhookServer {
}
// Read body
const body = await this.readBody(req);
let body = "";
try {
body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES);
} catch (err) {
if (err instanceof Error && err.message === "PayloadTooLarge") {
res.statusCode = 413;
res.end("Payload Too Large");
return;
}
throw err;
}
// Build webhook context
const ctx: WebhookContext = {
@@ -272,10 +298,19 @@ export class VoiceCallWebhookServer {
/**
* Read request body as string.
*/
private readBody(req: http.IncomingMessage): Promise<string> {
private readBody(req: http.IncomingMessage, maxBytes: number): Promise<string> {
return new Promise((resolve, reject) => {
const chunks: Buffer[] = [];
req.on("data", (chunk) => chunks.push(chunk));
let totalBytes = 0;
req.on("data", (chunk: Buffer) => {
totalBytes += chunk.length;
if (totalBytes > maxBytes) {
req.destroy();
reject(new Error("PayloadTooLarge"));
return;
}
chunks.push(chunk);
});
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
req.on("error", reject);
});