From 2ba4d4e66940b08970500d7bbcb4a5918fac1bc0 Mon Sep 17 00:00:00 2001
From: Franco Viotti <francov@telnyx.com>
Date: Wed, 28 Jan 2026 23:07:43 -0300
Subject: [PATCH 1/4] feat(tts): add Telnyx as TTS provider with WebSocket
 streaming

---
 src/config/types.tts.ts       |  10 ++-
 src/config/zod-schema.core.ts |  10 ++-
 src/tts/tts.test.ts           |  25 +++++++
 src/tts/tts.ts                | 123 +++++++++++++++++++++++++++++++++-
 4 files changed, 163 insertions(+), 5 deletions(-)

diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts
index 4eb4989b9..c7824f177 100644
--- a/src/config/types.tts.ts
+++ b/src/config/types.tts.ts
@@ -1,4 +1,4 @@
-export type TtsProvider = "elevenlabs" | "openai" | "edge";
+export type TtsProvider = "elevenlabs" | "openai" | "edge" | "telnyx";
 
 export type TtsMode = "final" | "all";
 
@@ -73,6 +73,14 @@ export type TtsConfig = {
     proxy?: string;
     timeoutMs?: number;
   };
+  /** Telnyx TTS configuration. */
+  telnyx?: {
+    apiKey?: string;
+    /** Voice ID (e.g. "Telnyx.NaturalHD.astra", "Telnyx.Kokoro.af_heart"). */
+    voice?: string;
+    /** WebSocket inactivity timeout in seconds (default: 20). */
+    inactivityTimeout?: number;
+  };
   /** Optional path for local TTS user preferences JSON. */
   prefsPath?: string;
   /** Hard cap for text sent to TTS (chars). */
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
index 4a8c80bcc..1d4a3f811 100644
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -156,7 +156,7 @@ export const MarkdownConfigSchema = z
   .strict()
   .optional();
 
-export const TtsProviderSchema = z.enum(["elevenlabs", "openai", "edge"]);
+export const TtsProviderSchema = z.enum(["elevenlabs", "openai", "edge", "telnyx"]);
 export const TtsModeSchema = z.enum(["final", "all"]);
 export const TtsAutoSchema = z.enum(["off", "always", "inbound", "tagged"]);
 export const TtsConfigSchema = z
@@ -224,6 +224,14 @@ export const TtsConfigSchema = z
       })
       .strict()
       .optional(),
+    telnyx: z
+      .object({
+        apiKey: z.string().optional(),
+        voice: z.string().optional(),
+        inactivityTimeout: z.number().int().min(1).max(300).optional(),
+      })
+      .strict()
+      .optional(),
     prefsPath: z.string().optional(),
     maxTextLength: z.number().int().min(1).optional(),
     timeoutMs: z.number().int().min(1000).max(120000).optional(),
diff --git a/src/tts/tts.test.ts b/src/tts/tts.test.ts
index 8462cba01..c1c987a67 100644
--- a/src/tts/tts.test.ts
+++ b/src/tts/tts.test.ts
@@ -202,6 +202,14 @@ describe("tts", () => {
       expect(result.overrides.provider).toBe("edge");
     });
 
+    it("accepts telnyx as provider override", () => {
+      const policy = resolveModelOverridePolicy({ enabled: true });
+      const input = "Hello [[tts:provider=telnyx]] world";
+      const result = parseTtsDirectives(input, policy);
+
+      expect(result.overrides.provider).toBe("telnyx");
+    });
+
     it("keeps text intact when overrides are disabled", () => {
       const policy = resolveModelOverridePolicy({ enabled: false });
       const input = "Hello [[tts:voice=alloy]] world";
@@ -426,6 +434,7 @@ describe("tts", () => {
           OPENAI_API_KEY: undefined,
           ELEVENLABS_API_KEY: undefined,
           XI_API_KEY: undefined,
+          TELNYX_API_KEY: undefined,
         },
         () => {
           const config = resolveTtsConfig(baseCfg);
@@ -434,6 +443,22 @@ describe("tts", () => {
         },
       );
     });
+
+    it("prefers Telnyx when OpenAI and ElevenLabs are missing and Telnyx key exists", () => {
+      withEnv(
+        {
+          OPENAI_API_KEY: undefined,
+          ELEVENLABS_API_KEY: undefined,
+          XI_API_KEY: undefined,
+          TELNYX_API_KEY: "test-telnyx-key",
+        },
+        () => {
+          const config = resolveTtsConfig(baseCfg);
+          const provider = getTtsProvider(config, "/tmp/tts-prefs-telnyx.json");
+          expect(provider).toBe("telnyx");
+        },
+      );
+    });
   });
 
   describe("maybeApplyTtsToPayload", () => {
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index af3d7fda5..fec413cb1 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -13,6 +13,7 @@ import path from "node:path";
 
 import { completeSimple, type TextContent } from "@mariozechner/pi-ai";
 import { EdgeTTS } from "node-edge-tts";
+import WebSocket from "ws";
 
 import type { ReplyPayload } from "../auto-reply/types.js";
 import { normalizeChannelId } from "../channels/plugins/index.js";
@@ -51,6 +52,9 @@ const DEFAULT_OPENAI_VOICE = "alloy";
 const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural";
 const DEFAULT_EDGE_LANG = "en-US";
 const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
+const DEFAULT_TELNYX_VOICE = "Telnyx.NaturalHD.astra";
+const DEFAULT_TELNYX_INACTIVITY_TIMEOUT = 20;
+const TELNYX_WS_URL = "wss://api.telnyx.com/v2/text-to-speech/speech";
 
 const DEFAULT_ELEVENLABS_VOICE_SETTINGS = {
   stability: 0.5,
@@ -65,6 +69,8 @@ const TELEGRAM_OUTPUT = {
   // ElevenLabs output formats use codec_sample_rate_bitrate naming.
   // Opus @ 48kHz/64kbps is a good voice-note tradeoff for Telegram.
   elevenlabs: "opus_48000_64",
+  // Telnyx outputs MP3 only (16kHz); not ideal for Telegram voice bubbles but works.
+  telnyx: "mp3_16000" as const,
   extension: ".opus",
   voiceCompatible: true,
 };
@@ -72,6 +78,7 @@ const TELEGRAM_OUTPUT = {
 const DEFAULT_OUTPUT = {
   openai: "mp3" as const,
   elevenlabs: "mp3_44100_128",
+  telnyx: "mp3_16000" as const,
   extension: ".mp3",
   voiceCompatible: false,
 };
@@ -124,6 +131,11 @@ export type ResolvedTtsConfig = {
     proxy?: string;
     timeoutMs?: number;
   };
+  telnyx: {
+    apiKey?: string;
+    voice: string;
+    inactivityTimeout: number;
+  };
   prefsPath?: string;
   maxTextLength: number;
   timeoutMs: number;
@@ -296,6 +308,11 @@ export function resolveTtsConfig(cfg: MoltbotConfig): ResolvedTtsConfig {
       proxy: raw.edge?.proxy?.trim() || undefined,
       timeoutMs: raw.edge?.timeoutMs,
     },
+    telnyx: {
+      apiKey: raw.telnyx?.apiKey,
+      voice: raw.telnyx?.voice?.trim() || DEFAULT_TELNYX_VOICE,
+      inactivityTimeout: raw.telnyx?.inactivityTimeout ?? DEFAULT_TELNYX_INACTIVITY_TIMEOUT,
+    },
     prefsPath: raw.prefsPath,
     maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH,
     timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS,
@@ -412,6 +429,7 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
 
   if (resolveTtsApiKey(config, "openai")) return "openai";
   if (resolveTtsApiKey(config, "elevenlabs")) return "elevenlabs";
+  if (resolveTtsApiKey(config, "telnyx")) return "telnyx";
   return "edge";
 }
 
@@ -474,10 +492,13 @@ export function resolveTtsApiKey(
   if (provider === "openai") {
     return config.openai.apiKey || process.env.OPENAI_API_KEY;
   }
+  if (provider === "telnyx") {
+    return config.telnyx.apiKey || process.env.TELNYX_API_KEY;
+  }
   return undefined;
 }
 
-export const TTS_PROVIDERS = ["openai", "elevenlabs", "edge"] as const;
+export const TTS_PROVIDERS = ["openai", "elevenlabs", "telnyx", "edge"] as const;
 
 export function resolveTtsProviderOrder(primary: TtsProvider): TtsProvider[] {
   return [primary, ...TTS_PROVIDERS.filter((provider) => provider !== primary)];
@@ -485,6 +506,7 @@ export function resolveTtsProviderOrder(primary: TtsProvider): TtsProvider[] {
 
 export function isTtsProviderConfigured(config: ResolvedTtsConfig, provider: TtsProvider): boolean {
   if (provider === "edge") return config.edge.enabled;
+  if (provider === "telnyx") return Boolean(resolveTtsApiKey(config, "telnyx"));
   return Boolean(resolveTtsApiKey(config, provider));
 }
 
@@ -587,7 +609,12 @@ function parseTtsDirectives(
         switch (key) {
           case "provider":
             if (!policy.allowProvider) break;
-            if (rawValue === "openai" || rawValue === "elevenlabs" || rawValue === "edge") {
+            if (
+              rawValue === "openai" ||
+              rawValue === "elevenlabs" ||
+              rawValue === "edge" ||
+              rawValue === "telnyx"
+            ) {
               overrides.provider = rawValue;
             } else {
               warnings.push(`unsupported provider "${rawValue}"`);
@@ -1068,6 +1095,79 @@ async function edgeTTS(params: {
   await tts.ttsPromise(text, outputPath);
 }
 
+async function telnyxTTS(params: {
+  text: string;
+  apiKey: string;
+  voice: string;
+  inactivityTimeout: number;
+  timeoutMs: number;
+}): Promise<Buffer> {
+  const { text, apiKey, voice, inactivityTimeout, timeoutMs } = params;
+
+  return new Promise((resolve, reject) => {
+    const url = new URL(TELNYX_WS_URL);
+    url.searchParams.set("voice", voice);
+    if (inactivityTimeout !== DEFAULT_TELNYX_INACTIVITY_TIMEOUT) {
+      url.searchParams.set("inactivity_timeout", String(inactivityTimeout));
+    }
+
+    const ws = new WebSocket(url.toString(), {
+      headers: { Authorization: `Bearer ${apiKey}` },
+    });
+
+    const audioChunks: Buffer[] = [];
+    let completed = false;
+
+    const timeout = setTimeout(() => {
+      if (!completed) {
+        completed = true;
+        ws.close();
+        reject(new Error("Telnyx TTS request timed out"));
+      }
+    }, timeoutMs);
+
+    ws.on("open", () => {
+      // Send initialization frame (required first)
+      ws.send(JSON.stringify({ text: " " }));
+      // Send text frame
+      ws.send(JSON.stringify({ text }));
+      // Send stop frame to signal completion
+      ws.send(JSON.stringify({ text: "" }));
+    });
+
+    ws.on("message", (data: Buffer | string) => {
+      try {
+        const message = JSON.parse(data.toString()) as { audio?: string };
+        if (message.audio) {
+          audioChunks.push(Buffer.from(message.audio, "base64"));
+        }
+      } catch {
+        // Ignore non-JSON messages
+      }
+    });
+
+    ws.on("close", () => {
+      clearTimeout(timeout);
+      if (!completed) {
+        completed = true;
+        if (audioChunks.length === 0) {
+          reject(new Error("Telnyx TTS returned no audio"));
+        } else {
+          resolve(Buffer.concat(audioChunks));
+        }
+      }
+    });
+
+    ws.on("error", (err) => {
+      clearTimeout(timeout);
+      if (!completed) {
+        completed = true;
+        reject(new Error(`Telnyx TTS WebSocket error: ${err.message}`));
+      }
+    });
+  });
+}
+
 export async function textToSpeech(params: {
   text: string;
   cfg: MoltbotConfig;
@@ -1195,6 +1295,14 @@ export async function textToSpeech(params: {
           voiceSettings,
           timeoutMs: config.timeoutMs,
         });
+      } else if (provider === "telnyx") {
+        audioBuffer = await telnyxTTS({
+          text: params.text,
+          apiKey,
+          voice: config.telnyx.voice,
+          inactivityTimeout: config.telnyx.inactivityTimeout,
+          timeoutMs: config.timeoutMs,
+        });
       } else {
         const openaiModelOverride = params.overrides?.openai?.model;
         const openaiVoiceOverride = params.overrides?.openai?.voice;
@@ -1220,7 +1328,12 @@ export async function textToSpeech(params: {
         audioPath,
         latencyMs,
         provider,
-        outputFormat: provider === "openai" ? output.openai : output.elevenlabs,
+        outputFormat:
+          provider === "openai"
+            ? output.openai
+            : provider === "telnyx"
+              ? output.telnyx
+              : output.elevenlabs,
         voiceCompatible: output.voiceCompatible,
       };
     } catch (err) {
@@ -1266,6 +1379,10 @@ export async function textToSpeechTelephony(params: {
         lastError = "edge: unsupported for telephony";
         continue;
       }
+      if (provider === "telnyx") {
+        lastError = "telnyx: unsupported for telephony (MP3 output only)";
+        continue;
+      }
 
       const apiKey = resolveTtsApiKey(config, provider);
       if (!apiKey) {

From 4ebeb160c73b05b37f68b8e64ecefcf5269812db Mon Sep 17 00:00:00 2001
From: Franco Viotti <francov@telnyx.com>
Date: Thu, 29 Jan 2026 08:08:33 -0300
Subject: [PATCH 2/4] docs(tts): add Telnyx as TTS provider

---
 docs/tts.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/tts.md b/docs/tts.md
index c3899ac08..003579811 100644
--- a/docs/tts.md
+++ b/docs/tts.md
@@ -8,13 +8,14 @@ read_when:
 
 # Text-to-speech (TTS)
 
-Moltbot can convert outbound replies into audio using ElevenLabs, OpenAI, or Edge TTS.
+Moltbot can convert outbound replies into audio using ElevenLabs, OpenAI, Telnyx, or Edge TTS.
 It works anywhere Moltbot can send audio; Telegram gets a round voice-note bubble.
 
 ## Supported services
 
 - **ElevenLabs** (primary or fallback provider)
 - **OpenAI** (primary or fallback provider; also used for summaries)
+- **Telnyx** (primary or fallback provider; great quality, cheaper than ElevenLabs)
 - **Edge TTS** (primary or fallback provider; uses `node-edge-tts`, default when no API keys)
 
 ### Edge TTS notes
@@ -31,9 +32,10 @@ does not publish limits, so assume similar or lower limits. citeturn0searc
 
 ## Optional keys
 
-If you want OpenAI or ElevenLabs:
+If you want OpenAI, ElevenLabs, or Telnyx:
 - `ELEVENLABS_API_KEY` (or `XI_API_KEY`)
 - `OPENAI_API_KEY`
+- `TELNYX_API_KEY`
 
 Edge TTS does **not** require an API key. If no API keys are found, Moltbot defaults
 to Edge TTS (unless disabled via `messages.tts.edge.enabled=false`).
@@ -202,9 +204,9 @@ Then run:
   - `tagged` only sends audio when the reply includes `[[tts]]` tags.
 - `enabled`: legacy toggle (doctor migrates this to `auto`).
 - `mode`: `"final"` (default) or `"all"` (includes tool/block replies).
-- `provider`: `"elevenlabs"`, `"openai"`, or `"edge"` (fallback is automatic).
+- `provider`: `"elevenlabs"`, `"openai"`, `"telnyx"`, or `"edge"` (fallback is automatic).
 - If `provider` is **unset**, Moltbot prefers `openai` (if key), then `elevenlabs` (if key),
-  otherwise `edge`.
+  then `telnyx` (if key), otherwise `edge`.
 - `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`.
   - Accepts `provider/model` or a configured model alias.
 - `modelOverrides`: allow the model to emit TTS directives (on by default).
@@ -250,7 +252,7 @@ Here you go.
 ```
 
 Available directive keys (when enabled):
-- `provider` (`openai` | `elevenlabs` | `edge`)
+- `provider` (`openai` | `elevenlabs` | `telnyx` | `edge`)
 - `voice` (OpenAI voice) or `voiceId` (ElevenLabs)
 - `model` (OpenAI TTS model or ElevenLabs model id)
 - `stability`, `similarityBoost`, `style`, `speed`, `useSpeakerBoost`

From ff47117510626fb7a88cfb54f9b249b2f68f1637 Mon Sep 17 00:00:00 2001
From: Franco Viotti <francov@telnyx.com>
Date: Thu, 29 Jan 2026 08:56:47 -0300
Subject: [PATCH 3/4] feat(tts): add Telnyx to /tts provider command

---
 src/auto-reply/reply/commands-tts.ts | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts
index 04b60a4e9..aca79d5c9 100644
--- a/src/auto-reply/reply/commands-tts.ts
+++ b/src/auto-reply/reply/commands-tts.ts
@@ -50,7 +50,8 @@ function ttsUsage(): ReplyPayload {
       `**Providers:**\n` +
       `• edge — Free, fast (default)\n` +
       `• openai — High quality (requires API key)\n` +
-      `• elevenlabs — Premium voices (requires API key)\n\n` +
+      `• elevenlabs — Premium voices (requires API key)\n` +
+      `• telnyx — Great quality, cheaper than ElevenLabs (requires API key)\n\n` +
       `**Text Limit (default: 1500, max: 4096):**\n` +
       `When text exceeds the limit:\n` +
       `• Summary ON: AI summarizes, then generates audio\n` +
@@ -151,6 +152,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
     if (!args.trim()) {
       const hasOpenAI = Boolean(resolveTtsApiKey(config, "openai"));
       const hasElevenLabs = Boolean(resolveTtsApiKey(config, "elevenlabs"));
+      const hasTelnyx = Boolean(resolveTtsApiKey(config, "telnyx"));
       const hasEdge = isTtsProviderConfigured(config, "edge");
       return {
         shouldContinue: false,
@@ -160,18 +162,24 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
             `Primary: ${currentProvider}\n` +
             `OpenAI key: ${hasOpenAI ? "✅" : "❌"}\n` +
             `ElevenLabs key: ${hasElevenLabs ? "✅" : "❌"}\n` +
+            `Telnyx key: ${hasTelnyx ? "✅" : "❌"}\n` +
             `Edge enabled: ${hasEdge ? "✅" : "❌"}\n` +
-            `Usage: /tts provider openai | elevenlabs | edge`,
+            `Usage: /tts provider openai | elevenlabs | telnyx | edge`,
         },
       };
     }
 
     const requested = args.trim().toLowerCase();
-    if (requested !== "openai" && requested !== "elevenlabs" && requested !== "edge") {
+    if (
+      requested !== "openai" &&
+      requested !== "elevenlabs" &&
+      requested !== "telnyx" &&
+      requested !== "edge"
+    ) {
       return { shouldContinue: false, reply: ttsUsage() };
     }
 
-    setTtsProvider(prefsPath, requested);
+    setTtsProvider(prefsPath, requested as "openai" | "elevenlabs" | "telnyx" | "edge");
     return {
       shouldContinue: false,
       reply: { text: `✅ TTS provider set to ${requested}.` },

From bf5f4e9b7aa63013afdba79b583bd1d674950d4a Mon Sep 17 00:00:00 2001
From: Franco Viotti <francov@telnyx.com>
Date: Thu, 29 Jan 2026 15:09:40 -0300
Subject: [PATCH 4/4] fix(tts): clarify Telnyx telephony limitation and remove
 dismissive comment

---
 src/tts/tts.ts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index fec413cb1..c1473c671 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -69,7 +69,6 @@ const TELEGRAM_OUTPUT = {
   // ElevenLabs output formats use codec_sample_rate_bitrate naming.
   // Opus @ 48kHz/64kbps is a good voice-note tradeoff for Telegram.
   elevenlabs: "opus_48000_64",
-  // Telnyx outputs MP3 only (16kHz); not ideal for Telegram voice bubbles but works.
   telnyx: "mp3_16000" as const,
   extension: ".opus",
   voiceCompatible: true,
@@ -1380,7 +1379,7 @@ export async function textToSpeechTelephony(params: {
         continue;
       }
       if (provider === "telnyx") {
-        lastError = "telnyx: unsupported for telephony (MP3 output only)";
+        lastError = "telnyx: WebSocket API outputs MP3, telephony requires PCM";
         continue;
       }