From ee2cfa5ca11421cc4c45f9b2643dc3dd5e6b4c24 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 19 Jan 2026 06:00:01 +0000 Subject: [PATCH] fix: harden voice-call webhooks (#1213) (thanks @andrew-kurin) --- CHANGELOG.md | 1 + extensions/voice-call/src/providers/twilio.ts | 15 ++++--- .../src/providers/twilio/api.test.ts | 39 +++++++++++++++++++ .../voice-call/src/providers/twilio/api.ts | 1 + extensions/voice-call/src/tunnel.ts | 3 +- .../voice-call/src/webhook-security.test.ts | 31 ++++++++++++++- extensions/voice-call/src/webhook-security.ts | 2 +- src/agents/system-prompt.ts | 2 +- 8 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 extensions/voice-call/src/providers/twilio/api.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index c80584414..21bfc163e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.clawd.bot - TUI: show generic empty-state text for searchable pickers. (#1201) — thanks @vignesh07. - Doctor: canonicalize legacy session keys in session stores to prevent stale metadata. (#1169) - CLI: centralize CLI command registration to keep fast-path routing and program wiring in sync. (#1207) — thanks @gumadeiras. +- Voice call: keep Twilio notify callbacks out of streaming paths, normalize Tailscale serve paths, and honor ASCII signature ordering. (#1213) — thanks @andrew-kurin. ## 2026.1.18-5 diff --git a/extensions/voice-call/src/providers/twilio.ts b/extensions/voice-call/src/providers/twilio.ts index 06115d662..45d434d94 100644 --- a/extensions/voice-call/src/providers/twilio.ts +++ b/extensions/voice-call/src/providers/twilio.ts @@ -64,17 +64,21 @@ export class TwilioProvider implements VoiceCallProvider { /** Storage for TwiML content (for notify mode with URL-based TwiML) */ private readonly twimlStorage = new Map(); - /** Track notify-mode calls to avoid streaming on follow-up callbacks */ + /** Track notify-mode calls to avoid streaming on follow-up callbacks. */ private readonly notifyCalls = new Set(); /** * Delete stored TwiML for a given `callId`. * * We keep TwiML in-memory only long enough to satisfy the initial Twilio - * webhook request (notify mode). Subsequent webhooks should not reuse it. + * webhook request (notify mode). The notify guard is cleared separately. */ private deleteStoredTwiml(callId: string): void { this.twimlStorage.delete(callId); + } + + private clearNotifyState(callId: string): void { + this.twimlStorage.delete(callId); this.notifyCalls.delete(callId); } @@ -90,7 +94,7 @@ export class TwilioProvider implements VoiceCallProvider { const callIdMatch = webhookUrl.match(/callId=([^&]+)/); if (!callIdMatch) return; - this.deleteStoredTwiml(callIdMatch[1]); + this.clearNotifyState(callIdMatch[1]); } constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) { @@ -260,12 +264,12 @@ export class TwilioProvider implements VoiceCallProvider { case "no-answer": case "failed": if (callIdOverride) { - this.deleteStoredTwiml(callIdOverride); + this.clearNotifyState(callIdOverride); } return { ...baseEvent, type: "call.ended", reason: callStatus }; case "canceled": if (callIdOverride) { - this.deleteStoredTwiml(callIdOverride); + this.clearNotifyState(callIdOverride); } return { ...baseEvent, type: "call.ended", reason: "hangup-bot" }; default: @@ -399,6 +403,7 @@ export class TwilioProvider implements VoiceCallProvider { // We now serve it from the webhook endpoint instead of sending inline if (input.inlineTwiml) { this.twimlStorage.set(input.callId, input.inlineTwiml); + // Keep notify-mode calls out of streaming TwiML for any follow-up callbacks. this.notifyCalls.add(input.callId); } diff --git a/extensions/voice-call/src/providers/twilio/api.test.ts b/extensions/voice-call/src/providers/twilio/api.test.ts new file mode 100644 index 000000000..545dd6ddd --- /dev/null +++ b/extensions/voice-call/src/providers/twilio/api.test.ts @@ -0,0 +1,39 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { twilioApiRequest } from "./api.js"; + +describe("twilioApiRequest", () => { + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + }); + + it("encodes array params as repeated form fields", async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + text: async () => "", + }); + + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + await twilioApiRequest({ + baseUrl: "https://api.example.com", + accountSid: "AC123", + authToken: "token", + endpoint: "/Calls.json", + body: { + To: "+15555550123", + StatusCallbackEvent: ["initiated", "completed"], + }, + }); + + const [, init] = fetchMock.mock.calls[0] ?? []; + const body = init?.body as URLSearchParams | undefined; + + expect(body?.getAll("StatusCallbackEvent")).toEqual([ + "initiated", + "completed", + ]); + expect(body?.get("To")).toBe("+15555550123"); + }); +}); diff --git a/extensions/voice-call/src/providers/twilio/api.ts b/extensions/voice-call/src/providers/twilio/api.ts index 9fcb202a8..a8bc74a2c 100644 --- a/extensions/voice-call/src/providers/twilio/api.ts +++ b/extensions/voice-call/src/providers/twilio/api.ts @@ -29,6 +29,7 @@ export async function twilioApiRequest(params: { Authorization: `Basic ${Buffer.from(`${params.accountSid}:${params.authToken}`).toString("base64")}`, "Content-Type": "application/x-www-form-urlencoded", }, + // URL-encoded body required by Twilio, with repeated keys for arrays. body: bodyParams, }); diff --git a/extensions/voice-call/src/tunnel.ts b/extensions/voice-call/src/tunnel.ts index 973c7b70f..652c206c6 100644 --- a/extensions/voice-call/src/tunnel.ts +++ b/extensions/voice-call/src/tunnel.ts @@ -231,7 +231,8 @@ export async function startTailscaleTunnel(config: { } const path = config.path.startsWith("/") ? config.path : `/${config.path}`; - const localUrl = `http://127.0.0.1:${config.port}${path}`; + // --set-path already mounts the path; keep the target base URL to avoid /path/path. + const localUrl = `http://127.0.0.1:${config.port}`; return new Promise((resolve, reject) => { const proc = spawn( diff --git a/extensions/voice-call/src/webhook-security.test.ts b/extensions/voice-call/src/webhook-security.test.ts index c31d7225a..f6d43a813 100644 --- a/extensions/voice-call/src/webhook-security.test.ts +++ b/extensions/voice-call/src/webhook-security.test.ts @@ -79,7 +79,7 @@ function twilioSignature(params: { let dataToSign = params.url; const sortedParams = Array.from( new URLSearchParams(params.postBody).entries(), - ).sort((a, b) => a[0].localeCompare(b[0])); + ).sort((a, b) => (a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0)); for (const [key, value] of sortedParams) { dataToSign += key + value; @@ -205,4 +205,33 @@ describe("verifyTwilioWebhook", () => { expect(result.ok).toBe(true); }); + + it("sorts params using ASCII order", () => { + const authToken = "test-auth-token"; + const publicUrl = "https://example.com/voice/webhook"; + const postBody = "Z=first&a=second"; + + const signature = twilioSignature({ + authToken, + url: publicUrl, + postBody, + }); + + const result = verifyTwilioWebhook( + { + headers: { + host: "example.com", + "x-forwarded-proto": "https", + "x-twilio-signature": signature, + }, + rawBody: postBody, + url: "http://local/voice/webhook", + method: "POST", + }, + authToken, + { publicUrl }, + ); + + expect(result.ok).toBe(true); + }); }); diff --git a/extensions/voice-call/src/webhook-security.ts b/extensions/voice-call/src/webhook-security.ts index 79bd96099..70a4bada2 100644 --- a/extensions/voice-call/src/webhook-security.ts +++ b/extensions/voice-call/src/webhook-security.ts @@ -23,7 +23,7 @@ export function validateTwilioSignature( // Build the string to sign: URL + sorted params (key+value pairs) let dataToSign = url; - // Sort params alphabetically and append key+value + // Sort params by ASCII order (Twilio spec) and append key+value. const sortedParams = Array.from(params.entries()).sort((a, b) => a[0] < b[0] ? -1 : a[0] > b[0] ? 1 : 0, ); diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 40ecd8607..d0c881ca9 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -553,7 +553,7 @@ export function buildAgentSystemPrompt(params: { } export function buildRuntimeLine( - runtimeInfo: { + runtimeInfo?: { agentId?: string; host?: string; os?: string;