mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
fix(voice-call): harden inbound policy
This commit is contained in:
@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Security: guard skill installer downloads with SSRF checks (block private/localhost URLs).
|
||||
- Media understanding: apply SSRF guardrails to provider fetches; allow private baseUrl overrides explicitly.
|
||||
- Tests: stub SSRF DNS pinning in web auto-reply + Gemini video coverage. (#6619) Thanks @joshp123.
|
||||
- fix(voice-call): harden inbound allowlist; reject anonymous callers; require Telnyx publicKey for allowlist; token-gate Twilio media streams; cap webhook body size (thanks @simecek)
|
||||
|
||||
## 2026.2.1
|
||||
|
||||
|
||||
19
extensions/voice-call/src/allowlist.ts
Normal file
19
extensions/voice-call/src/allowlist.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
export function normalizePhoneNumber(input?: string): string {
|
||||
if (!input) {
|
||||
return "";
|
||||
}
|
||||
return input.replace(/\D/g, "");
|
||||
}
|
||||
|
||||
export function isAllowlistedCaller(
|
||||
normalizedFrom: string,
|
||||
allowFrom: string[] | undefined,
|
||||
): boolean {
|
||||
if (!normalizedFrom) {
|
||||
return false;
|
||||
}
|
||||
return (allowFrom ?? []).some((num) => {
|
||||
const normalizedAllow = normalizePhoneNumber(num);
|
||||
return normalizedAllow !== "" && normalizedAllow === normalizedFrom;
|
||||
});
|
||||
}
|
||||
@@ -148,6 +148,34 @@ describe("validateProviderConfig", () => {
|
||||
"plugins.entries.voice-call.config.telnyx.apiKey is required (or set TELNYX_API_KEY env)",
|
||||
);
|
||||
});
|
||||
|
||||
it("fails validation when allowlist inbound policy lacks public key", () => {
|
||||
const config = createBaseConfig("telnyx");
|
||||
config.inboundPolicy = "allowlist";
|
||||
config.telnyx = { apiKey: "KEY123", connectionId: "CONN456" };
|
||||
|
||||
const result = validateProviderConfig(config);
|
||||
|
||||
expect(result.valid).toBe(false);
|
||||
expect(result.errors).toContain(
|
||||
"plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
|
||||
);
|
||||
});
|
||||
|
||||
it("passes validation when allowlist inbound policy has public key", () => {
|
||||
const config = createBaseConfig("telnyx");
|
||||
config.inboundPolicy = "allowlist";
|
||||
config.telnyx = {
|
||||
apiKey: "KEY123",
|
||||
connectionId: "CONN456",
|
||||
publicKey: "public-key",
|
||||
};
|
||||
|
||||
const result = validateProviderConfig(config);
|
||||
|
||||
expect(result.valid).toBe(true);
|
||||
expect(result.errors).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("plivo provider", () => {
|
||||
|
||||
@@ -448,6 +448,14 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
||||
"plugins.entries.voice-call.config.telnyx.connectionId is required (or set TELNYX_CONNECTION_ID env)",
|
||||
);
|
||||
}
|
||||
if (
|
||||
(config.inboundPolicy === "allowlist" || config.inboundPolicy === "pairing") &&
|
||||
!config.telnyx?.publicKey
|
||||
) {
|
||||
errors.push(
|
||||
"plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (config.provider === "twilio") {
|
||||
|
||||
@@ -19,6 +19,7 @@ import { CallManager } from "./manager.js";
|
||||
class FakeProvider implements VoiceCallProvider {
|
||||
readonly name = "plivo" as const;
|
||||
readonly playTtsCalls: PlayTtsInput[] = [];
|
||||
readonly hangupCalls: HangupCallInput[] = [];
|
||||
|
||||
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
|
||||
return { ok: true };
|
||||
@@ -29,7 +30,9 @@ class FakeProvider implements VoiceCallProvider {
|
||||
async initiateCall(_input: InitiateCallInput): Promise<InitiateCallResult> {
|
||||
return { providerCallId: "request-uuid", status: "initiated" };
|
||||
}
|
||||
async hangupCall(_input: HangupCallInput): Promise<void> {}
|
||||
async hangupCall(input: HangupCallInput): Promise<void> {
|
||||
this.hangupCalls.push(input);
|
||||
}
|
||||
async playTts(input: PlayTtsInput): Promise<void> {
|
||||
this.playTtsCalls.push(input);
|
||||
}
|
||||
@@ -102,4 +105,90 @@ describe("CallManager", () => {
|
||||
expect(provider.playTtsCalls).toHaveLength(1);
|
||||
expect(provider.playTtsCalls[0]?.text).toBe("Hello there");
|
||||
});
|
||||
|
||||
it("rejects inbound calls with missing caller ID when allowlist enabled", () => {
|
||||
const config = VoiceCallConfigSchema.parse({
|
||||
enabled: true,
|
||||
provider: "plivo",
|
||||
fromNumber: "+15550000000",
|
||||
inboundPolicy: "allowlist",
|
||||
allowFrom: ["+15550001234"],
|
||||
});
|
||||
|
||||
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
||||
const provider = new FakeProvider();
|
||||
const manager = new CallManager(config, storePath);
|
||||
manager.initialize(provider, "https://example.com/voice/webhook");
|
||||
|
||||
manager.processEvent({
|
||||
id: "evt-allowlist-missing",
|
||||
type: "call.initiated",
|
||||
callId: "call-missing",
|
||||
providerCallId: "provider-missing",
|
||||
timestamp: Date.now(),
|
||||
direction: "inbound",
|
||||
to: "+15550000000",
|
||||
});
|
||||
|
||||
expect(manager.getCallByProviderCallId("provider-missing")).toBeUndefined();
|
||||
expect(provider.hangupCalls).toHaveLength(1);
|
||||
expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-missing");
|
||||
});
|
||||
|
||||
it("rejects inbound calls that only match allowlist suffixes", () => {
|
||||
const config = VoiceCallConfigSchema.parse({
|
||||
enabled: true,
|
||||
provider: "plivo",
|
||||
fromNumber: "+15550000000",
|
||||
inboundPolicy: "allowlist",
|
||||
allowFrom: ["+15550001234"],
|
||||
});
|
||||
|
||||
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
||||
const provider = new FakeProvider();
|
||||
const manager = new CallManager(config, storePath);
|
||||
manager.initialize(provider, "https://example.com/voice/webhook");
|
||||
|
||||
manager.processEvent({
|
||||
id: "evt-allowlist-suffix",
|
||||
type: "call.initiated",
|
||||
callId: "call-suffix",
|
||||
providerCallId: "provider-suffix",
|
||||
timestamp: Date.now(),
|
||||
direction: "inbound",
|
||||
from: "+99915550001234",
|
||||
to: "+15550000000",
|
||||
});
|
||||
|
||||
expect(manager.getCallByProviderCallId("provider-suffix")).toBeUndefined();
|
||||
expect(provider.hangupCalls).toHaveLength(1);
|
||||
expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-suffix");
|
||||
});
|
||||
|
||||
it("accepts inbound calls that exactly match the allowlist", () => {
|
||||
const config = VoiceCallConfigSchema.parse({
|
||||
enabled: true,
|
||||
provider: "plivo",
|
||||
fromNumber: "+15550000000",
|
||||
inboundPolicy: "allowlist",
|
||||
allowFrom: ["+15550001234"],
|
||||
});
|
||||
|
||||
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
||||
const manager = new CallManager(config, storePath);
|
||||
manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
|
||||
|
||||
manager.processEvent({
|
||||
id: "evt-allowlist-exact",
|
||||
type: "call.initiated",
|
||||
callId: "call-exact",
|
||||
providerCallId: "provider-exact",
|
||||
timestamp: Date.now(),
|
||||
direction: "inbound",
|
||||
from: "+15550001234",
|
||||
to: "+15550000000",
|
||||
});
|
||||
|
||||
expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,7 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import type { CallMode, VoiceCallConfig } from "./config.js";
|
||||
import type { VoiceCallProvider } from "./providers/base.js";
|
||||
import { isAllowlistedCaller, normalizePhoneNumber } from "./allowlist.js";
|
||||
import {
|
||||
type CallId,
|
||||
type CallRecord,
|
||||
@@ -474,11 +475,12 @@ export class CallManager {
|
||||
|
||||
case "allowlist":
|
||||
case "pairing": {
|
||||
const normalized = from?.replace(/\D/g, "") || "";
|
||||
const allowed = (allowFrom || []).some((num) => {
|
||||
const normalizedAllow = num.replace(/\D/g, "");
|
||||
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
|
||||
});
|
||||
const normalized = normalizePhoneNumber(from);
|
||||
if (!normalized) {
|
||||
console.log("[voice-call] Inbound call rejected: missing caller ID");
|
||||
return false;
|
||||
}
|
||||
const allowed = isAllowlistedCaller(normalized, allowFrom);
|
||||
const status = allowed ? "accepted" : "rejected";
|
||||
console.log(
|
||||
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
|
||||
@@ -551,7 +553,7 @@ export class CallManager {
|
||||
if (!call && event.direction === "inbound" && event.providerCallId) {
|
||||
// Check if we should accept this inbound call
|
||||
if (!this.shouldAcceptInbound(event.from)) {
|
||||
// TODO: Could hang up the call here
|
||||
void this.rejectInboundCall(event);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -653,6 +655,25 @@ export class CallManager {
|
||||
this.persistCallRecord(call);
|
||||
}
|
||||
|
||||
private async rejectInboundCall(event: NormalizedEvent): Promise<void> {
|
||||
if (!this.provider || !event.providerCallId) {
|
||||
return;
|
||||
}
|
||||
const callId = event.callId || event.providerCallId;
|
||||
try {
|
||||
await this.provider.hangupCall({
|
||||
callId,
|
||||
providerCallId: event.providerCallId,
|
||||
reason: "hangup-bot",
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
`[voice-call] Failed to reject inbound call ${event.providerCallId}:`,
|
||||
err instanceof Error ? err.message : err,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
|
||||
const initialMessage =
|
||||
typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : "";
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import crypto from "node:crypto";
|
||||
import type { CallRecord, CallState, NormalizedEvent } from "../types.js";
|
||||
import type { CallManagerContext } from "./context.js";
|
||||
import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
|
||||
import { findCall } from "./lookup.js";
|
||||
import { endCall } from "./outbound.js";
|
||||
import { addTranscriptEntry, transitionState } from "./state.js";
|
||||
@@ -29,11 +30,12 @@ function shouldAcceptInbound(
|
||||
|
||||
case "allowlist":
|
||||
case "pairing": {
|
||||
const normalized = from?.replace(/\D/g, "") || "";
|
||||
const allowed = (allowFrom || []).some((num) => {
|
||||
const normalizedAllow = num.replace(/\D/g, "");
|
||||
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
|
||||
});
|
||||
const normalized = normalizePhoneNumber(from);
|
||||
if (!normalized) {
|
||||
console.log("[voice-call] Inbound call rejected: missing caller ID");
|
||||
return false;
|
||||
}
|
||||
const allowed = isAllowlistedCaller(normalized, allowFrom);
|
||||
const status = allowed ? "accepted" : "rejected";
|
||||
console.log(
|
||||
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
|
||||
|
||||
@@ -21,6 +21,8 @@ import type {
|
||||
export interface MediaStreamConfig {
|
||||
/** STT provider for transcription */
|
||||
sttProvider: OpenAIRealtimeSTTProvider;
|
||||
/** Validate whether to accept a media stream for the given call ID */
|
||||
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
|
||||
/** Callback when transcript is received */
|
||||
onTranscript?: (callId: string, transcript: string) => void;
|
||||
/** Callback for partial transcripts (streaming UI) */
|
||||
@@ -87,6 +89,7 @@ export class MediaStreamHandler {
|
||||
*/
|
||||
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
|
||||
let session: StreamSession | null = null;
|
||||
const streamToken = this.getStreamToken(_request);
|
||||
|
||||
ws.on("message", async (data: Buffer) => {
|
||||
try {
|
||||
@@ -98,7 +101,7 @@ export class MediaStreamHandler {
|
||||
break;
|
||||
|
||||
case "start":
|
||||
session = await this.handleStart(ws, message);
|
||||
session = await this.handleStart(ws, message, streamToken);
|
||||
break;
|
||||
|
||||
case "media":
|
||||
@@ -135,11 +138,28 @@ export class MediaStreamHandler {
|
||||
/**
|
||||
* Handle stream start event.
|
||||
*/
|
||||
private async handleStart(ws: WebSocket, message: TwilioMediaMessage): Promise<StreamSession> {
|
||||
private async handleStart(
|
||||
ws: WebSocket,
|
||||
message: TwilioMediaMessage,
|
||||
streamToken?: string,
|
||||
): Promise<StreamSession | null> {
|
||||
const streamSid = message.streamSid || "";
|
||||
const callSid = message.start?.callSid || "";
|
||||
|
||||
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
|
||||
if (!callSid) {
|
||||
console.warn("[MediaStream] Missing callSid; closing stream");
|
||||
ws.close(1008, "Missing callSid");
|
||||
return null;
|
||||
}
|
||||
if (
|
||||
this.config.shouldAcceptStream &&
|
||||
!this.config.shouldAcceptStream({ callId: callSid, streamSid, token: streamToken })
|
||||
) {
|
||||
console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
|
||||
ws.close(1008, "Unknown call");
|
||||
return null;
|
||||
}
|
||||
|
||||
// Create STT session
|
||||
const sttSession = this.config.sttProvider.createSession();
|
||||
@@ -189,6 +209,18 @@ export class MediaStreamHandler {
|
||||
this.config.onDisconnect?.(session.callId);
|
||||
}
|
||||
|
||||
private getStreamToken(request: IncomingMessage): string | undefined {
|
||||
if (!request.url || !request.headers.host) {
|
||||
return undefined;
|
||||
}
|
||||
try {
|
||||
const url = new URL(request.url, `http://${request.headers.host}`);
|
||||
return url.searchParams.get("token") ?? undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an active session with an open WebSocket, or undefined if unavailable.
|
||||
*/
|
||||
|
||||
@@ -21,15 +21,21 @@ import type { VoiceCallProvider } from "./base.js";
|
||||
* Uses Telnyx Call Control API v2 for managing calls.
|
||||
* @see https://developers.telnyx.com/docs/api/v2/call-control
|
||||
*/
|
||||
export interface TelnyxProviderOptions {
|
||||
/** Allow unsigned webhooks when no public key is configured */
|
||||
allowUnsignedWebhooks?: boolean;
|
||||
}
|
||||
|
||||
export class TelnyxProvider implements VoiceCallProvider {
|
||||
readonly name = "telnyx" as const;
|
||||
|
||||
private readonly apiKey: string;
|
||||
private readonly connectionId: string;
|
||||
private readonly publicKey: string | undefined;
|
||||
private readonly options: TelnyxProviderOptions;
|
||||
private readonly baseUrl = "https://api.telnyx.com/v2";
|
||||
|
||||
constructor(config: TelnyxConfig) {
|
||||
constructor(config: TelnyxConfig, options: TelnyxProviderOptions = {}) {
|
||||
if (!config.apiKey) {
|
||||
throw new Error("Telnyx API key is required");
|
||||
}
|
||||
@@ -40,6 +46,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
this.apiKey = config.apiKey;
|
||||
this.connectionId = config.connectionId;
|
||||
this.publicKey = config.publicKey;
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -76,8 +83,14 @@ export class TelnyxProvider implements VoiceCallProvider {
|
||||
*/
|
||||
verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
|
||||
if (!this.publicKey) {
|
||||
// No public key configured, skip verification (not recommended for production)
|
||||
return { ok: true };
|
||||
if (this.options.allowUnsignedWebhooks) {
|
||||
console.warn("[telnyx] Webhook verification skipped (no public key configured)");
|
||||
return { ok: true, reason: "verification skipped (no public key configured)" };
|
||||
}
|
||||
return {
|
||||
ok: false,
|
||||
reason: "Missing telnyx.publicKey (configure to verify webhooks)",
|
||||
};
|
||||
}
|
||||
|
||||
const signature = ctx.headers["telnyx-signature-ed25519"];
|
||||
|
||||
@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
|
||||
import type { WebhookContext } from "../types.js";
|
||||
import { TwilioProvider } from "./twilio.js";
|
||||
|
||||
const STREAM_URL = "wss://example.ngrok.app/voice/stream";
|
||||
const STREAM_URL_PREFIX = "wss://example.ngrok.app/voice/stream?token=";
|
||||
|
||||
function createProvider(): TwilioProvider {
|
||||
return new TwilioProvider(
|
||||
@@ -24,13 +24,13 @@ function createContext(rawBody: string, query?: WebhookContext["query"]): Webhoo
|
||||
describe("TwilioProvider", () => {
|
||||
it("returns streaming TwiML for outbound conversation calls before in-progress", () => {
|
||||
const provider = createProvider();
|
||||
const ctx = createContext("CallStatus=initiated&Direction=outbound-api", {
|
||||
const ctx = createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA123", {
|
||||
callId: "call-1",
|
||||
});
|
||||
|
||||
const result = provider.parseWebhookEvent(ctx);
|
||||
|
||||
expect(result.providerResponseBody).toContain(STREAM_URL);
|
||||
expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
|
||||
expect(result.providerResponseBody).toContain("<Connect>");
|
||||
});
|
||||
|
||||
@@ -50,11 +50,11 @@ describe("TwilioProvider", () => {
|
||||
|
||||
it("returns streaming TwiML for inbound calls", () => {
|
||||
const provider = createProvider();
|
||||
const ctx = createContext("CallStatus=ringing&Direction=inbound");
|
||||
const ctx = createContext("CallStatus=ringing&Direction=inbound&CallSid=CA456");
|
||||
|
||||
const result = provider.parseWebhookEvent(ctx);
|
||||
|
||||
expect(result.providerResponseBody).toContain(STREAM_URL);
|
||||
expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
|
||||
expect(result.providerResponseBody).toContain("<Connect>");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -60,6 +60,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
|
||||
/** Map of call SID to stream SID for media streams */
|
||||
private callStreamMap = new Map<string, string>();
|
||||
/** Per-call tokens for media stream authentication */
|
||||
private streamAuthTokens = new Map<string, string>();
|
||||
|
||||
/** Storage for TwiML content (for notify mode with URL-based TwiML) */
|
||||
private readonly twimlStorage = new Map<string, string>();
|
||||
@@ -94,6 +96,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
}
|
||||
|
||||
this.deleteStoredTwiml(callIdMatch[1]);
|
||||
this.streamAuthTokens.delete(providerCallId);
|
||||
}
|
||||
|
||||
constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) {
|
||||
@@ -138,6 +141,19 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
this.callStreamMap.delete(callSid);
|
||||
}
|
||||
|
||||
isValidStreamToken(callSid: string, token?: string): boolean {
|
||||
const expected = this.streamAuthTokens.get(callSid);
|
||||
if (!expected || !token) {
|
||||
return false;
|
||||
}
|
||||
if (expected.length !== token.length) {
|
||||
const dummy = Buffer.from(expected);
|
||||
crypto.timingSafeEqual(dummy, dummy);
|
||||
return false;
|
||||
}
|
||||
return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear TTS queue for a call (barge-in).
|
||||
* Used when user starts speaking to interrupt current TTS playback.
|
||||
@@ -271,11 +287,13 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
case "busy":
|
||||
case "no-answer":
|
||||
case "failed":
|
||||
this.streamAuthTokens.delete(callSid);
|
||||
if (callIdOverride) {
|
||||
this.deleteStoredTwiml(callIdOverride);
|
||||
}
|
||||
return { ...baseEvent, type: "call.ended", reason: callStatus };
|
||||
case "canceled":
|
||||
this.streamAuthTokens.delete(callSid);
|
||||
if (callIdOverride) {
|
||||
this.deleteStoredTwiml(callIdOverride);
|
||||
}
|
||||
@@ -308,6 +326,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
const callStatus = params.get("CallStatus");
|
||||
const direction = params.get("Direction");
|
||||
const isOutbound = direction?.startsWith("outbound") ?? false;
|
||||
const callSid = params.get("CallSid") || undefined;
|
||||
const callIdFromQuery =
|
||||
typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
|
||||
? ctx.query.callId.trim()
|
||||
@@ -330,7 +349,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
|
||||
// Conversation mode: return streaming TwiML immediately for outbound calls.
|
||||
if (isOutbound) {
|
||||
const streamUrl = this.getStreamUrl();
|
||||
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
|
||||
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
||||
}
|
||||
}
|
||||
@@ -343,7 +362,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
// Handle subsequent webhook requests (status callbacks, etc.)
|
||||
// For inbound calls, answer immediately with stream
|
||||
if (direction === "inbound") {
|
||||
const streamUrl = this.getStreamUrl();
|
||||
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
|
||||
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
||||
}
|
||||
|
||||
@@ -352,7 +371,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
return TwilioProvider.EMPTY_TWIML;
|
||||
}
|
||||
|
||||
const streamUrl = this.getStreamUrl();
|
||||
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
|
||||
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
||||
}
|
||||
|
||||
@@ -380,6 +399,27 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
return `${wsOrigin}${path}`;
|
||||
}
|
||||
|
||||
private getStreamAuthToken(callSid: string): string {
|
||||
const existing = this.streamAuthTokens.get(callSid);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const token = crypto.randomBytes(16).toString("base64url");
|
||||
this.streamAuthTokens.set(callSid, token);
|
||||
return token;
|
||||
}
|
||||
|
||||
private getStreamUrlForCall(callSid: string): string | null {
|
||||
const baseUrl = this.getStreamUrl();
|
||||
if (!baseUrl) {
|
||||
return null;
|
||||
}
|
||||
const token = this.getStreamAuthToken(callSid);
|
||||
const url = new URL(baseUrl);
|
||||
url.searchParams.set("token", token);
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TwiML to connect a call to a WebSocket media stream.
|
||||
* This enables bidirectional audio streaming for real-time STT/TTS.
|
||||
@@ -444,6 +484,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
this.deleteStoredTwimlForProviderCall(input.providerCallId);
|
||||
|
||||
this.callWebhookUrls.delete(input.providerCallId);
|
||||
this.streamAuthTokens.delete(input.providerCallId);
|
||||
|
||||
await this.apiRequest(
|
||||
`/Calls/${input.providerCallId}.json`,
|
||||
|
||||
@@ -48,11 +48,17 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
||||
|
||||
switch (config.provider) {
|
||||
case "telnyx":
|
||||
return new TelnyxProvider({
|
||||
apiKey: config.telnyx?.apiKey,
|
||||
connectionId: config.telnyx?.connectionId,
|
||||
publicKey: config.telnyx?.publicKey,
|
||||
});
|
||||
return new TelnyxProvider(
|
||||
{
|
||||
apiKey: config.telnyx?.apiKey,
|
||||
connectionId: config.telnyx?.connectionId,
|
||||
publicKey: config.telnyx?.publicKey,
|
||||
},
|
||||
{
|
||||
allowUnsignedWebhooks:
|
||||
config.inboundPolicy === "open" || config.inboundPolicy === "disabled",
|
||||
},
|
||||
);
|
||||
case "twilio":
|
||||
return new TwilioProvider(
|
||||
{
|
||||
|
||||
@@ -11,6 +11,8 @@ import type { NormalizedEvent, WebhookContext } from "./types.js";
|
||||
import { MediaStreamHandler } from "./media-stream.js";
|
||||
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
||||
|
||||
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
|
||||
|
||||
/**
|
||||
* HTTP server for receiving voice call webhooks from providers.
|
||||
* Supports WebSocket upgrades for media streams when streaming is enabled.
|
||||
@@ -69,6 +71,20 @@ export class VoiceCallWebhookServer {
|
||||
|
||||
const streamConfig: MediaStreamConfig = {
|
||||
sttProvider,
|
||||
shouldAcceptStream: ({ callId, token }) => {
|
||||
const call = this.manager.getCallByProviderCallId(callId);
|
||||
if (!call) {
|
||||
return false;
|
||||
}
|
||||
if (this.provider.name === "twilio") {
|
||||
const twilio = this.provider as TwilioProvider;
|
||||
if (!twilio.isValidStreamToken(callId, token)) {
|
||||
console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
},
|
||||
onTranscript: (providerCallId, transcript) => {
|
||||
console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
|
||||
|
||||
@@ -224,7 +240,17 @@ export class VoiceCallWebhookServer {
|
||||
}
|
||||
|
||||
// Read body
|
||||
const body = await this.readBody(req);
|
||||
let body = "";
|
||||
try {
|
||||
body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES);
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.message === "PayloadTooLarge") {
|
||||
res.statusCode = 413;
|
||||
res.end("Payload Too Large");
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Build webhook context
|
||||
const ctx: WebhookContext = {
|
||||
@@ -272,10 +298,19 @@ export class VoiceCallWebhookServer {
|
||||
/**
|
||||
* Read request body as string.
|
||||
*/
|
||||
private readBody(req: http.IncomingMessage): Promise<string> {
|
||||
private readBody(req: http.IncomingMessage, maxBytes: number): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks: Buffer[] = [];
|
||||
req.on("data", (chunk) => chunks.push(chunk));
|
||||
let totalBytes = 0;
|
||||
req.on("data", (chunk: Buffer) => {
|
||||
totalBytes += chunk.length;
|
||||
if (totalBytes > maxBytes) {
|
||||
req.destroy();
|
||||
reject(new Error("PayloadTooLarge"));
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
|
||||
req.on("error", reject);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user