From f8dfd034f5d9235c5485f492a9e4ccc114e97fdb Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 3 Feb 2026 09:33:25 -0800 Subject: [PATCH] fix(voice-call): harden inbound policy --- CHANGELOG.md | 1 + extensions/voice-call/src/allowlist.ts | 19 ++++ extensions/voice-call/src/config.test.ts | 28 ++++++ extensions/voice-call/src/config.ts | 8 ++ extensions/voice-call/src/manager.test.ts | 91 ++++++++++++++++++- extensions/voice-call/src/manager.ts | 33 +++++-- extensions/voice-call/src/manager/events.ts | 12 ++- extensions/voice-call/src/media-stream.ts | 36 +++++++- extensions/voice-call/src/providers/telnyx.ts | 19 +++- .../voice-call/src/providers/twilio.test.ts | 10 +- extensions/voice-call/src/providers/twilio.ts | 47 +++++++++- extensions/voice-call/src/runtime.ts | 16 +++- extensions/voice-call/src/webhook.ts | 41 ++++++++- 13 files changed, 328 insertions(+), 33 deletions(-) create mode 100644 extensions/voice-call/src/allowlist.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d9658a1e2..f0c5b00660 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai - Security: guard skill installer downloads with SSRF checks (block private/localhost URLs). - Media understanding: apply SSRF guardrails to provider fetches; allow private baseUrl overrides explicitly. - Tests: stub SSRF DNS pinning in web auto-reply + Gemini video coverage. (#6619) Thanks @joshp123. +- fix(voice-call): harden inbound allowlist; reject anonymous callers; require Telnyx publicKey for allowlist; token-gate Twilio media streams; cap webhook body size (thanks @simecek) ## 2026.2.1 diff --git a/extensions/voice-call/src/allowlist.ts b/extensions/voice-call/src/allowlist.ts new file mode 100644 index 0000000000..595bbe8489 --- /dev/null +++ b/extensions/voice-call/src/allowlist.ts @@ -0,0 +1,19 @@ +export function normalizePhoneNumber(input?: string): string { + if (!input) { + return ""; + } + return input.replace(/\D/g, ""); +} + +export function isAllowlistedCaller( + normalizedFrom: string, + allowFrom: string[] | undefined, +): boolean { + if (!normalizedFrom) { + return false; + } + return (allowFrom ?? []).some((num) => { + const normalizedAllow = normalizePhoneNumber(num); + return normalizedAllow !== "" && normalizedAllow === normalizedFrom; + }); +} diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 68bfe18838..b5f261f9ef 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -148,6 +148,34 @@ describe("validateProviderConfig", () => { "plugins.entries.voice-call.config.telnyx.apiKey is required (or set TELNYX_API_KEY env)", ); }); + + it("fails validation when allowlist inbound policy lacks public key", () => { + const config = createBaseConfig("telnyx"); + config.inboundPolicy = "allowlist"; + config.telnyx = { apiKey: "KEY123", connectionId: "CONN456" }; + + const result = validateProviderConfig(config); + + expect(result.valid).toBe(false); + expect(result.errors).toContain( + "plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing", + ); + }); + + it("passes validation when allowlist inbound policy has public key", () => { + const config = createBaseConfig("telnyx"); + config.inboundPolicy = "allowlist"; + config.telnyx = { + apiKey: "KEY123", + connectionId: "CONN456", + publicKey: "public-key", + }; + + const result = validateProviderConfig(config); + + expect(result.valid).toBe(true); + expect(result.errors).toEqual([]); + }); }); describe("plivo provider", () => { diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index 3e2cf84705..80e7448347 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -448,6 +448,14 @@ export function validateProviderConfig(config: VoiceCallConfig): { "plugins.entries.voice-call.config.telnyx.connectionId is required (or set TELNYX_CONNECTION_ID env)", ); } + if ( + (config.inboundPolicy === "allowlist" || config.inboundPolicy === "pairing") && + !config.telnyx?.publicKey + ) { + errors.push( + "plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing", + ); + } } if (config.provider === "twilio") { diff --git a/extensions/voice-call/src/manager.test.ts b/extensions/voice-call/src/manager.test.ts index 88ea664852..9e40a6459d 100644 --- a/extensions/voice-call/src/manager.test.ts +++ b/extensions/voice-call/src/manager.test.ts @@ -19,6 +19,7 @@ import { CallManager } from "./manager.js"; class FakeProvider implements VoiceCallProvider { readonly name = "plivo" as const; readonly playTtsCalls: PlayTtsInput[] = []; + readonly hangupCalls: HangupCallInput[] = []; verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult { return { ok: true }; @@ -29,7 +30,9 @@ class FakeProvider implements VoiceCallProvider { async initiateCall(_input: InitiateCallInput): Promise { return { providerCallId: "request-uuid", status: "initiated" }; } - async hangupCall(_input: HangupCallInput): Promise {} + async hangupCall(input: HangupCallInput): Promise { + this.hangupCalls.push(input); + } async playTts(input: PlayTtsInput): Promise { this.playTtsCalls.push(input); } @@ -102,4 +105,90 @@ describe("CallManager", () => { expect(provider.playTtsCalls).toHaveLength(1); expect(provider.playTtsCalls[0]?.text).toBe("Hello there"); }); + + it("rejects inbound calls with missing caller ID when allowlist enabled", () => { + const config = VoiceCallConfigSchema.parse({ + enabled: true, + provider: "plivo", + fromNumber: "+15550000000", + inboundPolicy: "allowlist", + allowFrom: ["+15550001234"], + }); + + const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`); + const provider = new FakeProvider(); + const manager = new CallManager(config, storePath); + manager.initialize(provider, "https://example.com/voice/webhook"); + + manager.processEvent({ + id: "evt-allowlist-missing", + type: "call.initiated", + callId: "call-missing", + providerCallId: "provider-missing", + timestamp: Date.now(), + direction: "inbound", + to: "+15550000000", + }); + + expect(manager.getCallByProviderCallId("provider-missing")).toBeUndefined(); + expect(provider.hangupCalls).toHaveLength(1); + expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-missing"); + }); + + it("rejects inbound calls that only match allowlist suffixes", () => { + const config = VoiceCallConfigSchema.parse({ + enabled: true, + provider: "plivo", + fromNumber: "+15550000000", + inboundPolicy: "allowlist", + allowFrom: ["+15550001234"], + }); + + const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`); + const provider = new FakeProvider(); + const manager = new CallManager(config, storePath); + manager.initialize(provider, "https://example.com/voice/webhook"); + + manager.processEvent({ + id: "evt-allowlist-suffix", + type: "call.initiated", + callId: "call-suffix", + providerCallId: "provider-suffix", + timestamp: Date.now(), + direction: "inbound", + from: "+99915550001234", + to: "+15550000000", + }); + + expect(manager.getCallByProviderCallId("provider-suffix")).toBeUndefined(); + expect(provider.hangupCalls).toHaveLength(1); + expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-suffix"); + }); + + it("accepts inbound calls that exactly match the allowlist", () => { + const config = VoiceCallConfigSchema.parse({ + enabled: true, + provider: "plivo", + fromNumber: "+15550000000", + inboundPolicy: "allowlist", + allowFrom: ["+15550001234"], + }); + + const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`); + const manager = new CallManager(config, storePath); + manager.initialize(new FakeProvider(), "https://example.com/voice/webhook"); + + manager.processEvent({ + id: "evt-allowlist-exact", + type: "call.initiated", + callId: "call-exact", + providerCallId: "provider-exact", + timestamp: Date.now(), + direction: "inbound", + from: "+15550001234", + to: "+15550000000", + }); + + expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined(); + }); }); diff --git a/extensions/voice-call/src/manager.ts b/extensions/voice-call/src/manager.ts index 2851a6e8ce..0cfc9158ef 100644 --- a/extensions/voice-call/src/manager.ts +++ b/extensions/voice-call/src/manager.ts @@ -5,6 +5,7 @@ import os from "node:os"; import path from "node:path"; import type { CallMode, VoiceCallConfig } from "./config.js"; import type { VoiceCallProvider } from "./providers/base.js"; +import { isAllowlistedCaller, normalizePhoneNumber } from "./allowlist.js"; import { type CallId, type CallRecord, @@ -474,11 +475,12 @@ export class CallManager { case "allowlist": case "pairing": { - const normalized = from?.replace(/\D/g, "") || ""; - const allowed = (allowFrom || []).some((num) => { - const normalizedAllow = num.replace(/\D/g, ""); - return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized); - }); + const normalized = normalizePhoneNumber(from); + if (!normalized) { + console.log("[voice-call] Inbound call rejected: missing caller ID"); + return false; + } + const allowed = isAllowlistedCaller(normalized, allowFrom); const status = allowed ? "accepted" : "rejected"; console.log( `[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`, @@ -551,7 +553,7 @@ export class CallManager { if (!call && event.direction === "inbound" && event.providerCallId) { // Check if we should accept this inbound call if (!this.shouldAcceptInbound(event.from)) { - // TODO: Could hang up the call here + void this.rejectInboundCall(event); return; } @@ -653,6 +655,25 @@ export class CallManager { this.persistCallRecord(call); } + private async rejectInboundCall(event: NormalizedEvent): Promise { + if (!this.provider || !event.providerCallId) { + return; + } + const callId = event.callId || event.providerCallId; + try { + await this.provider.hangupCall({ + callId, + providerCallId: event.providerCallId, + reason: "hangup-bot", + }); + } catch (err) { + console.warn( + `[voice-call] Failed to reject inbound call ${event.providerCallId}:`, + err instanceof Error ? err.message : err, + ); + } + } + private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void { const initialMessage = typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : ""; diff --git a/extensions/voice-call/src/manager/events.ts b/extensions/voice-call/src/manager/events.ts index 7f131eb6d3..3ebc8423ef 100644 --- a/extensions/voice-call/src/manager/events.ts +++ b/extensions/voice-call/src/manager/events.ts @@ -1,6 +1,7 @@ import crypto from "node:crypto"; import type { CallRecord, CallState, NormalizedEvent } from "../types.js"; import type { CallManagerContext } from "./context.js"; +import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js"; import { findCall } from "./lookup.js"; import { endCall } from "./outbound.js"; import { addTranscriptEntry, transitionState } from "./state.js"; @@ -29,11 +30,12 @@ function shouldAcceptInbound( case "allowlist": case "pairing": { - const normalized = from?.replace(/\D/g, "") || ""; - const allowed = (allowFrom || []).some((num) => { - const normalizedAllow = num.replace(/\D/g, ""); - return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized); - }); + const normalized = normalizePhoneNumber(from); + if (!normalized) { + console.log("[voice-call] Inbound call rejected: missing caller ID"); + return false; + } + const allowed = isAllowlistedCaller(normalized, allowFrom); const status = allowed ? "accepted" : "rejected"; console.log( `[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`, diff --git a/extensions/voice-call/src/media-stream.ts b/extensions/voice-call/src/media-stream.ts index 64fe69c3e8..2525019cd4 100644 --- a/extensions/voice-call/src/media-stream.ts +++ b/extensions/voice-call/src/media-stream.ts @@ -21,6 +21,8 @@ import type { export interface MediaStreamConfig { /** STT provider for transcription */ sttProvider: OpenAIRealtimeSTTProvider; + /** Validate whether to accept a media stream for the given call ID */ + shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean; /** Callback when transcript is received */ onTranscript?: (callId: string, transcript: string) => void; /** Callback for partial transcripts (streaming UI) */ @@ -87,6 +89,7 @@ export class MediaStreamHandler { */ private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise { let session: StreamSession | null = null; + const streamToken = this.getStreamToken(_request); ws.on("message", async (data: Buffer) => { try { @@ -98,7 +101,7 @@ export class MediaStreamHandler { break; case "start": - session = await this.handleStart(ws, message); + session = await this.handleStart(ws, message, streamToken); break; case "media": @@ -135,11 +138,28 @@ export class MediaStreamHandler { /** * Handle stream start event. */ - private async handleStart(ws: WebSocket, message: TwilioMediaMessage): Promise { + private async handleStart( + ws: WebSocket, + message: TwilioMediaMessage, + streamToken?: string, + ): Promise { const streamSid = message.streamSid || ""; const callSid = message.start?.callSid || ""; console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`); + if (!callSid) { + console.warn("[MediaStream] Missing callSid; closing stream"); + ws.close(1008, "Missing callSid"); + return null; + } + if ( + this.config.shouldAcceptStream && + !this.config.shouldAcceptStream({ callId: callSid, streamSid, token: streamToken }) + ) { + console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`); + ws.close(1008, "Unknown call"); + return null; + } // Create STT session const sttSession = this.config.sttProvider.createSession(); @@ -189,6 +209,18 @@ export class MediaStreamHandler { this.config.onDisconnect?.(session.callId); } + private getStreamToken(request: IncomingMessage): string | undefined { + if (!request.url || !request.headers.host) { + return undefined; + } + try { + const url = new URL(request.url, `http://${request.headers.host}`); + return url.searchParams.get("token") ?? undefined; + } catch { + return undefined; + } + } + /** * Get an active session with an open WebSocket, or undefined if unavailable. */ diff --git a/extensions/voice-call/src/providers/telnyx.ts b/extensions/voice-call/src/providers/telnyx.ts index 14a4b76a4d..ef53f0b532 100644 --- a/extensions/voice-call/src/providers/telnyx.ts +++ b/extensions/voice-call/src/providers/telnyx.ts @@ -21,15 +21,21 @@ import type { VoiceCallProvider } from "./base.js"; * Uses Telnyx Call Control API v2 for managing calls. * @see https://developers.telnyx.com/docs/api/v2/call-control */ +export interface TelnyxProviderOptions { + /** Allow unsigned webhooks when no public key is configured */ + allowUnsignedWebhooks?: boolean; +} + export class TelnyxProvider implements VoiceCallProvider { readonly name = "telnyx" as const; private readonly apiKey: string; private readonly connectionId: string; private readonly publicKey: string | undefined; + private readonly options: TelnyxProviderOptions; private readonly baseUrl = "https://api.telnyx.com/v2"; - constructor(config: TelnyxConfig) { + constructor(config: TelnyxConfig, options: TelnyxProviderOptions = {}) { if (!config.apiKey) { throw new Error("Telnyx API key is required"); } @@ -40,6 +46,7 @@ export class TelnyxProvider implements VoiceCallProvider { this.apiKey = config.apiKey; this.connectionId = config.connectionId; this.publicKey = config.publicKey; + this.options = options; } /** @@ -76,8 +83,14 @@ export class TelnyxProvider implements VoiceCallProvider { */ verifyWebhook(ctx: WebhookContext): WebhookVerificationResult { if (!this.publicKey) { - // No public key configured, skip verification (not recommended for production) - return { ok: true }; + if (this.options.allowUnsignedWebhooks) { + console.warn("[telnyx] Webhook verification skipped (no public key configured)"); + return { ok: true, reason: "verification skipped (no public key configured)" }; + } + return { + ok: false, + reason: "Missing telnyx.publicKey (configure to verify webhooks)", + }; } const signature = ctx.headers["telnyx-signature-ed25519"]; diff --git a/extensions/voice-call/src/providers/twilio.test.ts b/extensions/voice-call/src/providers/twilio.test.ts index 98e5ddbb86..36b25005f0 100644 --- a/extensions/voice-call/src/providers/twilio.test.ts +++ b/extensions/voice-call/src/providers/twilio.test.ts @@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest"; import type { WebhookContext } from "../types.js"; import { TwilioProvider } from "./twilio.js"; -const STREAM_URL = "wss://example.ngrok.app/voice/stream"; +const STREAM_URL_PREFIX = "wss://example.ngrok.app/voice/stream?token="; function createProvider(): TwilioProvider { return new TwilioProvider( @@ -24,13 +24,13 @@ function createContext(rawBody: string, query?: WebhookContext["query"]): Webhoo describe("TwilioProvider", () => { it("returns streaming TwiML for outbound conversation calls before in-progress", () => { const provider = createProvider(); - const ctx = createContext("CallStatus=initiated&Direction=outbound-api", { + const ctx = createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA123", { callId: "call-1", }); const result = provider.parseWebhookEvent(ctx); - expect(result.providerResponseBody).toContain(STREAM_URL); + expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX); expect(result.providerResponseBody).toContain(""); }); @@ -50,11 +50,11 @@ describe("TwilioProvider", () => { it("returns streaming TwiML for inbound calls", () => { const provider = createProvider(); - const ctx = createContext("CallStatus=ringing&Direction=inbound"); + const ctx = createContext("CallStatus=ringing&Direction=inbound&CallSid=CA456"); const result = provider.parseWebhookEvent(ctx); - expect(result.providerResponseBody).toContain(STREAM_URL); + expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX); expect(result.providerResponseBody).toContain(""); }); }); diff --git a/extensions/voice-call/src/providers/twilio.ts b/extensions/voice-call/src/providers/twilio.ts index b40ec5f4b9..aaa1eb389c 100644 --- a/extensions/voice-call/src/providers/twilio.ts +++ b/extensions/voice-call/src/providers/twilio.ts @@ -60,6 +60,8 @@ export class TwilioProvider implements VoiceCallProvider { /** Map of call SID to stream SID for media streams */ private callStreamMap = new Map(); + /** Per-call tokens for media stream authentication */ + private streamAuthTokens = new Map(); /** Storage for TwiML content (for notify mode with URL-based TwiML) */ private readonly twimlStorage = new Map(); @@ -94,6 +96,7 @@ export class TwilioProvider implements VoiceCallProvider { } this.deleteStoredTwiml(callIdMatch[1]); + this.streamAuthTokens.delete(providerCallId); } constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) { @@ -138,6 +141,19 @@ export class TwilioProvider implements VoiceCallProvider { this.callStreamMap.delete(callSid); } + isValidStreamToken(callSid: string, token?: string): boolean { + const expected = this.streamAuthTokens.get(callSid); + if (!expected || !token) { + return false; + } + if (expected.length !== token.length) { + const dummy = Buffer.from(expected); + crypto.timingSafeEqual(dummy, dummy); + return false; + } + return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token)); + } + /** * Clear TTS queue for a call (barge-in). * Used when user starts speaking to interrupt current TTS playback. @@ -271,11 +287,13 @@ export class TwilioProvider implements VoiceCallProvider { case "busy": case "no-answer": case "failed": + this.streamAuthTokens.delete(callSid); if (callIdOverride) { this.deleteStoredTwiml(callIdOverride); } return { ...baseEvent, type: "call.ended", reason: callStatus }; case "canceled": + this.streamAuthTokens.delete(callSid); if (callIdOverride) { this.deleteStoredTwiml(callIdOverride); } @@ -308,6 +326,7 @@ export class TwilioProvider implements VoiceCallProvider { const callStatus = params.get("CallStatus"); const direction = params.get("Direction"); const isOutbound = direction?.startsWith("outbound") ?? false; + const callSid = params.get("CallSid") || undefined; const callIdFromQuery = typeof ctx.query?.callId === "string" && ctx.query.callId.trim() ? ctx.query.callId.trim() @@ -330,7 +349,7 @@ export class TwilioProvider implements VoiceCallProvider { // Conversation mode: return streaming TwiML immediately for outbound calls. if (isOutbound) { - const streamUrl = this.getStreamUrl(); + const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null; return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML; } } @@ -343,7 +362,7 @@ export class TwilioProvider implements VoiceCallProvider { // Handle subsequent webhook requests (status callbacks, etc.) // For inbound calls, answer immediately with stream if (direction === "inbound") { - const streamUrl = this.getStreamUrl(); + const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null; return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML; } @@ -352,7 +371,7 @@ export class TwilioProvider implements VoiceCallProvider { return TwilioProvider.EMPTY_TWIML; } - const streamUrl = this.getStreamUrl(); + const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null; return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML; } @@ -380,6 +399,27 @@ export class TwilioProvider implements VoiceCallProvider { return `${wsOrigin}${path}`; } + private getStreamAuthToken(callSid: string): string { + const existing = this.streamAuthTokens.get(callSid); + if (existing) { + return existing; + } + const token = crypto.randomBytes(16).toString("base64url"); + this.streamAuthTokens.set(callSid, token); + return token; + } + + private getStreamUrlForCall(callSid: string): string | null { + const baseUrl = this.getStreamUrl(); + if (!baseUrl) { + return null; + } + const token = this.getStreamAuthToken(callSid); + const url = new URL(baseUrl); + url.searchParams.set("token", token); + return url.toString(); + } + /** * Generate TwiML to connect a call to a WebSocket media stream. * This enables bidirectional audio streaming for real-time STT/TTS. @@ -444,6 +484,7 @@ export class TwilioProvider implements VoiceCallProvider { this.deleteStoredTwimlForProviderCall(input.providerCallId); this.callWebhookUrls.delete(input.providerCallId); + this.streamAuthTokens.delete(input.providerCallId); await this.apiRequest( `/Calls/${input.providerCallId}.json`, diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index 93a2c756a5..046c4c208c 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -48,11 +48,17 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider { switch (config.provider) { case "telnyx": - return new TelnyxProvider({ - apiKey: config.telnyx?.apiKey, - connectionId: config.telnyx?.connectionId, - publicKey: config.telnyx?.publicKey, - }); + return new TelnyxProvider( + { + apiKey: config.telnyx?.apiKey, + connectionId: config.telnyx?.connectionId, + publicKey: config.telnyx?.publicKey, + }, + { + allowUnsignedWebhooks: + config.inboundPolicy === "open" || config.inboundPolicy === "disabled", + }, + ); case "twilio": return new TwilioProvider( { diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index 58a39c0f0d..f67ff23738 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -11,6 +11,8 @@ import type { NormalizedEvent, WebhookContext } from "./types.js"; import { MediaStreamHandler } from "./media-stream.js"; import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js"; +const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024; + /** * HTTP server for receiving voice call webhooks from providers. * Supports WebSocket upgrades for media streams when streaming is enabled. @@ -69,6 +71,20 @@ export class VoiceCallWebhookServer { const streamConfig: MediaStreamConfig = { sttProvider, + shouldAcceptStream: ({ callId, token }) => { + const call = this.manager.getCallByProviderCallId(callId); + if (!call) { + return false; + } + if (this.provider.name === "twilio") { + const twilio = this.provider as TwilioProvider; + if (!twilio.isValidStreamToken(callId, token)) { + console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`); + return false; + } + } + return true; + }, onTranscript: (providerCallId, transcript) => { console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`); @@ -224,7 +240,17 @@ export class VoiceCallWebhookServer { } // Read body - const body = await this.readBody(req); + let body = ""; + try { + body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES); + } catch (err) { + if (err instanceof Error && err.message === "PayloadTooLarge") { + res.statusCode = 413; + res.end("Payload Too Large"); + return; + } + throw err; + } // Build webhook context const ctx: WebhookContext = { @@ -272,10 +298,19 @@ export class VoiceCallWebhookServer { /** * Read request body as string. */ - private readBody(req: http.IncomingMessage): Promise { + private readBody(req: http.IncomingMessage, maxBytes: number): Promise { return new Promise((resolve, reject) => { const chunks: Buffer[] = []; - req.on("data", (chunk) => chunks.push(chunk)); + let totalBytes = 0; + req.on("data", (chunk: Buffer) => { + totalBytes += chunk.length; + if (totalBytes > maxBytes) { + req.destroy(); + reject(new Error("PayloadTooLarge")); + return; + } + chunks.push(chunk); + }); req.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8"))); req.on("error", reject); });