From 70364631f47d029a8dec8e3dbedc499ac8d8cc4f Mon Sep 17 00:00:00 2001 From: Joao Lisboa Date: Fri, 5 Dec 2025 10:51:21 -0300 Subject: [PATCH] twilio: rewrite typing indicator with direct API calls and robust retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Twilio SDK's request method wasn't working for the typing indicator API. This rewrites the implementation to use direct fetch() calls with comprehensive error handling and retry logic. Key changes: - Switch from SDK to direct fetch() with Basic auth - Add time-based retry loop (30s budget) for MDR propagation delays - Add 10s per-request timeout via AbortController to prevent hangs - Add exponential backoff with jitter (1s-5s, +200ms jitter) - Track succeeded SIDs to skip retry on refresh calls - Add typed errors (NetworkError, FetchTimeoutError) for proper handling - Fail fast on non-retryable errors (401, 403, non-MDR 400) - Fire-and-forget pattern to avoid blocking reply flow 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/auto-reply/reply.ts | 11 +- src/index.core.test.ts | 20 ++- src/twilio/types.ts | 3 +- src/twilio/typing.ts | 327 +++++++++++++++++++++++++++++++---- src/twilio/update-webhook.ts | 5 +- src/twilio/webhook.ts | 22 ++- 6 files changed, 348 insertions(+), 40 deletions(-) diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index a931d852d..1c29eb853 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -702,6 +702,9 @@ type TwilioLikeClient = TwilioRequester & { body: string; }) => Promise; }; + // Twilio SDK exposes credentials via username/password for Basic auth + username?: string; + password?: string; }; export async function autoReplyIfConfigured( @@ -812,7 +815,13 @@ export async function autoReplyIfConfigured( const replyResult = await getReplyFromConfig( ctx, { - onReplyStart: () => sendTypingIndicator(client, runtime, message.sid), + onReplyStart: () => + client.username && client.password + ? sendTypingIndicator( + { accountSid: client.username, authToken: client.password }, + message.sid, + ) + : undefined, onPartialReply: partialSender, }, cfg, diff --git a/src/index.core.test.ts b/src/index.core.test.ts index 5793a3973..d5d5647f3 100644 --- a/src/index.core.test.ts +++ b/src/index.core.test.ts @@ -1907,14 +1907,22 @@ describe("twilio interactions", () => { }); it("sendTypingIndicator skips missing messageSid and sends when present", async () => { - const client = twilioFactory._createClient(); - await index.sendTypingIndicator(client, index.defaultRuntime, undefined); - expect(client.request).not.toHaveBeenCalled(); + const mockFetch = vi + .spyOn(global, "fetch") + .mockResolvedValue(new Response(JSON.stringify({ success: true }))); + const creds = { accountSid: "AC123", authToken: "token123" }; - await index.sendTypingIndicator(client, index.defaultRuntime, "SM123"); - expect(client.request).toHaveBeenCalledWith( - expect.objectContaining({ method: "post" }), + index.sendTypingIndicator(creds, undefined); + expect(mockFetch).not.toHaveBeenCalled(); + + index.sendTypingIndicator(creds, "SM123"); + // Wait for initial delay (3000ms) + some buffer for the fire-and-forget async + await new Promise((r) => setTimeout(r, 3200)); + expect(mockFetch).toHaveBeenCalledWith( + "https://messaging.twilio.com/v2/Indicators/Typing.json", + expect.objectContaining({ method: "POST" }), ); + mockFetch.mockRestore(); }); it("sendMessage wraps Twilio client and returns sid", async () => { diff --git a/src/twilio/types.ts b/src/twilio/types.ts index 9ea56ac7d..f8f47c912 100644 --- a/src/twilio/types.ts +++ b/src/twilio/types.ts @@ -1,8 +1,9 @@ export type TwilioRequestOptions = { method: "get" | "post"; uri: string; + headers?: Record; params?: Record; - form?: Record; + data?: Record; body?: unknown; contentType?: string; }; diff --git a/src/twilio/typing.ts b/src/twilio/typing.ts index adf234fa7..3b69ff5c9 100644 --- a/src/twilio/typing.ts +++ b/src/twilio/typing.ts @@ -1,43 +1,312 @@ -import { isVerbose, logVerbose, warn } from "../globals.js"; -import type { RuntimeEnv } from "../runtime.js"; +import { logVerbose } from "../globals.js"; -type TwilioRequestOptions = { - method: "get" | "post"; - uri: string; - params?: Record; - form?: Record; - body?: unknown; - contentType?: string; +// --- Configuration constants --- +const INITIAL_MDR_DELAY_MS = 3000; // MDR needs time to propagate after webhook +const MAX_TOTAL_RETRY_TIME_MS = 30_000; // 30 second total budget +const BASE_RETRY_DELAY_MS = 1000; +const MAX_RETRY_DELAY_MS = 5000; +const JITTER_MAX_MS = 200; +const CLEANUP_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes +const FETCH_TIMEOUT_MS = 10_000; // 10 second per-request timeout + +// --- Types --- +type TwilioCredentials = { + accountSid: string; + authToken: string; }; -type TwilioRequester = { - request: (options: TwilioRequestOptions) => Promise; +type TwilioErrorResponse = { + message?: string; + code?: number; + status?: number; }; -export async function sendTypingIndicator( - client: TwilioRequester, - runtime: RuntimeEnv, +// Track which message SIDs have successfully sent typing indicators +const succeededSids = new Set(); +// Track pending retry operations by Promise (prevents race conditions) +const pendingPromises = new Map>(); +// Track cleanup timers to prevent memory leaks and allow graceful shutdown +const cleanupTimers = new Map(); + +// --- Helper functions --- + +/** Safely parse a Twilio error response */ +function parseTwilioError(data: unknown): TwilioErrorResponse | null { + if (typeof data === "object" && data !== null && "message" in data) { + return data as TwilioErrorResponse; + } + return null; +} + +/** Check if the error indicates MDR is not ready yet (should keep retrying) */ +function isMdrNotReadyError(data: unknown): boolean { + const error = parseTwilioError(data); + const msg = error?.message?.toLowerCase() ?? ""; + return ( + msg.includes("mdr") && + (msg.includes("incomplete") || + msg.includes("not ready") || + msg.includes("invalid")) + ); +} + +/** Check if this is a permanent error we shouldn't retry */ +function isPermanentError(status: number): boolean { + // Auth errors or bad request (non-MDR) - don't waste time retrying + return status === 401 || status === 403; +} + +/** Custom error for fetch timeouts to distinguish from other errors */ +class FetchTimeoutError extends Error { + constructor(message: string) { + super(message); + this.name = "FetchTimeoutError"; + } +} + +/** Custom error for network/fetch failures that should be retried */ +class NetworkError extends Error { + constructor(message: string) { + super(message); + this.name = "NetworkError"; + } +} + +async function doTypingRequest( + auth: string, + messageSid: string, +): Promise<{ ok: boolean; status: number; data: unknown }> { + const body = new URLSearchParams({ + messageId: messageSid, + channel: "whatsapp", + }); + + // Add timeout to prevent hanging forever if Twilio doesn't respond + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + + let response: Response; + try { + response = await fetch( + "https://messaging.twilio.com/v2/Indicators/Typing.json", + { + method: "POST", + headers: { + Authorization: `Basic ${auth}`, + "Content-Type": "application/x-www-form-urlencoded", + }, + body: body.toString(), + signal: controller.signal, + }, + ); + } catch (err) { + // Convert fetch errors to our typed errors for proper handling upstream + if (err instanceof Error && err.name === "AbortError") { + throw new FetchTimeoutError( + `Typing indicator request timed out after ${FETCH_TIMEOUT_MS}ms`, + ); + } + // Network errors (DNS, connection refused, etc.) + throw new NetworkError( + `Fetch failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } finally { + clearTimeout(timeoutId); + } + + // Validate Content-Type before parsing to avoid JSON parse errors on HTML error pages + const contentType = response.headers.get("content-type"); + if (!contentType?.includes("application/json")) { + throw new NetworkError( + `Unexpected Content-Type: ${contentType} (status ${response.status})`, + ); + } + + const data = await response.json(); + return { ok: response.ok, status: response.status, data }; +} + +/** Calculate backoff delay with exponential growth capped at MAX_RETRY_DELAY_MS */ +function getBackoffDelay(attempt: number): number { + const exponentialDelay = BASE_RETRY_DELAY_MS * 2 ** (attempt - 1); + const cappedDelay = Math.min(exponentialDelay, MAX_RETRY_DELAY_MS); + const jitter = Math.random() * JITTER_MAX_MS; + return cappedDelay + jitter; +} + +/** Send typing indicator without retry (for refreshes after initial success) */ +async function sendTypingIndicatorRefresh( + auth: string, + messageSid: string, +): Promise { + try { + const result = await doTypingRequest(auth, messageSid); + logVerbose( + `Sent typing indicator for ${messageSid}, status: ${result.status}`, + ); + return result.ok; + } catch { + // Silently ignore errors on refresh calls + return false; + } +} + +/** Send typing indicator with time-based retry loop for MDR propagation delays */ +async function sendTypingIndicatorWithRetry( + auth: string, + messageSid: string, +): Promise { + logVerbose(`Typing indicator: starting MDR wait (${messageSid})`); + + // Wait for MDR to propagate before first attempt + await new Promise((r) => setTimeout(r, INITIAL_MDR_DELAY_MS)); + + const startTime = Date.now(); + let attempt = 0; + + while (Date.now() - startTime < MAX_TOTAL_RETRY_TIME_MS) { + attempt++; + try { + const result = await doTypingRequest(auth, messageSid); + + if (result.ok) { + // Clear any existing cleanup timer for this messageSid to avoid duplicates + const existingTimer = cleanupTimers.get(messageSid); + if (existingTimer) { + clearTimeout(existingTimer); + } + + succeededSids.add(messageSid); + const timer = setTimeout(() => { + succeededSids.delete(messageSid); + cleanupTimers.delete(messageSid); + }, CLEANUP_TIMEOUT_MS); + cleanupTimers.set(messageSid, timer); + + logVerbose( + `Typing indicator sent for ${messageSid} (attempt ${attempt})`, + ); + return true; + } + + // Permanent auth error - stop immediately + if (isPermanentError(result.status)) { + logVerbose( + `Typing indicator: permanent error ${result.status}, giving up`, + ); + return false; + } + + // MDR not ready or transient error - continue retrying silently + if (isMdrNotReadyError(result.data) || result.status >= 500) { + const delay = getBackoffDelay(attempt); + await new Promise((r) => setTimeout(r, delay)); + continue; + } + + // Non-MDR 400 error (e.g., invalid message SID) - don't retry + if (result.status === 400 && !isMdrNotReadyError(result.data)) { + const error = parseTwilioError(result.data); + logVerbose( + `Typing indicator: bad request (code ${error?.code ?? "unknown"}): ${error?.message ?? "unknown error"}`, + ); + return false; + } + + // Unknown 4xx error - retry a few times then give up + if (attempt >= 3) { + logVerbose( + `Typing indicator: unknown error after ${attempt} attempts, giving up`, + ); + return false; + } + + const delay = getBackoffDelay(attempt); + await new Promise((r) => setTimeout(r, delay)); + } catch (err) { + // Only retry on network/fetch errors, not programming bugs + if (err instanceof NetworkError || err instanceof FetchTimeoutError) { + logVerbose(`Typing indicator: ${err.message}, retrying...`); + const delay = getBackoffDelay(attempt); + await new Promise((r) => setTimeout(r, delay)); + continue; + } + // Unexpected error (programming bug) - fail fast, don't mask it + logVerbose( + `Typing indicator: unexpected error: ${err instanceof Error ? err.message : String(err)}`, + ); + return false; + } + } + + // Exhausted 30 second budget + logVerbose( + `Typing indicator: gave up after ${attempt} attempts over ${Math.round((Date.now() - startTime) / 1000)}s`, + ); + return false; +} + +export function sendTypingIndicator( + creds: TwilioCredentials, messageSid?: string, -) { +): void { // Best-effort WhatsApp typing indicator (public beta as of Nov 2025). + // Fire-and-forget - don't block the main reply flow. if (!messageSid) { logVerbose("Skipping typing indicator: missing MessageSid"); return; } - try { - await client.request({ - method: "post", - uri: "https://messaging.twilio.com/v2/Indicators/Typing.json", - form: { - messageId: messageSid, - channel: "whatsapp", - }, - }); - logVerbose(`Sent typing indicator for inbound ${messageSid}`); - } catch (err) { - if (isVerbose()) { - runtime.error(warn("Typing indicator failed (continuing without it)")); - runtime.error(err as Error); + + const auth = Buffer.from(`${creds.accountSid}:${creds.authToken}`).toString( + "base64", + ); + + // If we've already succeeded for this message, just send without retry + // Also check if a refresh is already pending to avoid duplicate concurrent refreshes + if (succeededSids.has(messageSid)) { + const refreshKey = `refresh:${messageSid}`; + // Atomically claim the slot before starting async work + if (pendingPromises.has(refreshKey)) { + return; } + // Set a sentinel promise immediately to prevent races, then replace with real one + const sentinel = Promise.resolve(false); + pendingPromises.set(refreshKey, sentinel); + + const refreshPromise = sendTypingIndicatorRefresh(auth, messageSid); + pendingPromises.set(refreshKey, refreshPromise); + void refreshPromise.finally(() => { + pendingPromises.delete(refreshKey); + }); + return; } + + // Atomically claim the slot before starting async work to prevent race conditions. + // Even though JS is single-threaded, this pattern makes the intent explicit and + // ensures correctness even if the event loop interleaves callbacks unexpectedly. + if (pendingPromises.has(messageSid)) { + return; + } + // Set a sentinel promise immediately to claim the slot + const sentinel = Promise.resolve(false); + pendingPromises.set(messageSid, sentinel); + + // Now start the actual work and replace the sentinel + const promise = sendTypingIndicatorWithRetry(auth, messageSid); + pendingPromises.set(messageSid, promise); + + // Clean up when done (success or failure) + void promise.finally(() => { + pendingPromises.delete(messageSid); + }); +} + +/** Clean up all typing indicator state for graceful shutdown */ +export function cleanupTypingIndicators(): void { + for (const timer of cleanupTimers.values()) { + clearTimeout(timer); + } + cleanupTimers.clear(); + succeededSids.clear(); + pendingPromises.clear(); } diff --git a/src/twilio/update-webhook.ts b/src/twilio/update-webhook.ts index 0d6dc2fc7..31ff8785b 100644 --- a/src/twilio/update-webhook.ts +++ b/src/twilio/update-webhook.ts @@ -120,7 +120,10 @@ export async function updateWebhook( await requester.request({ method: "post", uri: `https://messaging.twilio.com/v2/Channels/Senders/${senderSid}`, - form: { + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + data: { "Webhook.CallbackUrl": url, "Webhook.CallbackMethod": method, }, diff --git a/src/twilio/webhook.ts b/src/twilio/webhook.ts index e82970d10..24208f51c 100644 --- a/src/twilio/webhook.ts +++ b/src/twilio/webhook.ts @@ -4,7 +4,7 @@ import chalk from "chalk"; import express, { type Request, type Response } from "express"; import { getReplyFromConfig, type ReplyPayload } from "../auto-reply/reply.js"; import { type EnvConfig, readEnv } from "../env.js"; -import { danger, success } from "../globals.js"; +import { danger, logVerbose, success } from "../globals.js"; import * as mediaHost from "../media/host.js"; import { attachMediaRoutes } from "../media/server.js"; import { saveMediaSource } from "../media/store.js"; @@ -83,7 +83,12 @@ export async function startWebhook( MediaType: mediaType, }, { - onReplyStart: () => sendTypingIndicator(client, runtime, MessageSid), + onReplyStart: () => { + const creds = getAuthTokenCreds(env); + if (creds) { + sendTypingIndicator(creds, MessageSid); + } + }, }, ); } @@ -160,3 +165,16 @@ function buildTwilioBasicAuth(env: EnvConfig) { "base64", ); } + +function getAuthTokenCreds( + env: EnvConfig, +): { accountSid: string; authToken: string } | null { + if ("authToken" in env.auth) { + return { accountSid: env.accountSid, authToken: env.auth.authToken }; + } + // Typing indicator requires authToken, API key auth is not supported + logVerbose( + "Skipping typing indicator: API key auth is not supported by Twilio's typing indicator API (requires authToken)", + ); + return null; +}