From 2ba4d4e66940b08970500d7bbcb4a5918fac1bc0 Mon Sep 17 00:00:00 2001 From: Franco Viotti Date: Wed, 28 Jan 2026 23:07:43 -0300 Subject: [PATCH 1/4] feat(tts): add Telnyx as TTS provider with WebSocket streaming --- src/config/types.tts.ts | 10 ++- src/config/zod-schema.core.ts | 10 ++- src/tts/tts.test.ts | 25 +++++++ src/tts/tts.ts | 123 +++++++++++++++++++++++++++++++++- 4 files changed, 163 insertions(+), 5 deletions(-) diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts index 4eb4989b9..c7824f177 100644 --- a/src/config/types.tts.ts +++ b/src/config/types.tts.ts @@ -1,4 +1,4 @@ -export type TtsProvider = "elevenlabs" | "openai" | "edge"; +export type TtsProvider = "elevenlabs" | "openai" | "edge" | "telnyx"; export type TtsMode = "final" | "all"; @@ -73,6 +73,14 @@ export type TtsConfig = { proxy?: string; timeoutMs?: number; }; + /** Telnyx TTS configuration. */ + telnyx?: { + apiKey?: string; + /** Voice ID (e.g. "Telnyx.NaturalHD.astra", "Telnyx.Kokoro.af_heart"). */ + voice?: string; + /** WebSocket inactivity timeout in seconds (default: 20). */ + inactivityTimeout?: number; + }; /** Optional path for local TTS user preferences JSON. */ prefsPath?: string; /** Hard cap for text sent to TTS (chars). */ diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 4a8c80bcc..1d4a3f811 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -156,7 +156,7 @@ export const MarkdownConfigSchema = z .strict() .optional(); -export const TtsProviderSchema = z.enum(["elevenlabs", "openai", "edge"]); +export const TtsProviderSchema = z.enum(["elevenlabs", "openai", "edge", "telnyx"]); export const TtsModeSchema = z.enum(["final", "all"]); export const TtsAutoSchema = z.enum(["off", "always", "inbound", "tagged"]); export const TtsConfigSchema = z @@ -224,6 +224,14 @@ export const TtsConfigSchema = z }) .strict() .optional(), + telnyx: z + .object({ + apiKey: z.string().optional(), + voice: z.string().optional(), + inactivityTimeout: z.number().int().min(1).max(300).optional(), + }) + .strict() + .optional(), prefsPath: z.string().optional(), maxTextLength: z.number().int().min(1).optional(), timeoutMs: z.number().int().min(1000).max(120000).optional(), diff --git a/src/tts/tts.test.ts b/src/tts/tts.test.ts index 8462cba01..c1c987a67 100644 --- a/src/tts/tts.test.ts +++ b/src/tts/tts.test.ts @@ -202,6 +202,14 @@ describe("tts", () => { expect(result.overrides.provider).toBe("edge"); }); + it("accepts telnyx as provider override", () => { + const policy = resolveModelOverridePolicy({ enabled: true }); + const input = "Hello [[tts:provider=telnyx]] world"; + const result = parseTtsDirectives(input, policy); + + expect(result.overrides.provider).toBe("telnyx"); + }); + it("keeps text intact when overrides are disabled", () => { const policy = resolveModelOverridePolicy({ enabled: false }); const input = "Hello [[tts:voice=alloy]] world"; @@ -426,6 +434,7 @@ describe("tts", () => { OPENAI_API_KEY: undefined, ELEVENLABS_API_KEY: undefined, XI_API_KEY: undefined, + TELNYX_API_KEY: undefined, }, () => { const config = resolveTtsConfig(baseCfg); @@ -434,6 +443,22 @@ describe("tts", () => { }, ); }); + + it("prefers Telnyx when OpenAI and ElevenLabs are missing and Telnyx key exists", () => { + withEnv( + { + OPENAI_API_KEY: undefined, + ELEVENLABS_API_KEY: undefined, + XI_API_KEY: undefined, + TELNYX_API_KEY: "test-telnyx-key", + }, + () => { + const config = resolveTtsConfig(baseCfg); + const provider = getTtsProvider(config, "/tmp/tts-prefs-telnyx.json"); + expect(provider).toBe("telnyx"); + }, + ); + }); }); describe("maybeApplyTtsToPayload", () => { diff --git a/src/tts/tts.ts b/src/tts/tts.ts index af3d7fda5..fec413cb1 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -13,6 +13,7 @@ import path from "node:path"; import { completeSimple, type TextContent } from "@mariozechner/pi-ai"; import { EdgeTTS } from "node-edge-tts"; +import WebSocket from "ws"; import type { ReplyPayload } from "../auto-reply/types.js"; import { normalizeChannelId } from "../channels/plugins/index.js"; @@ -51,6 +52,9 @@ const DEFAULT_OPENAI_VOICE = "alloy"; const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural"; const DEFAULT_EDGE_LANG = "en-US"; const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"; +const DEFAULT_TELNYX_VOICE = "Telnyx.NaturalHD.astra"; +const DEFAULT_TELNYX_INACTIVITY_TIMEOUT = 20; +const TELNYX_WS_URL = "wss://api.telnyx.com/v2/text-to-speech/speech"; const DEFAULT_ELEVENLABS_VOICE_SETTINGS = { stability: 0.5, @@ -65,6 +69,8 @@ const TELEGRAM_OUTPUT = { // ElevenLabs output formats use codec_sample_rate_bitrate naming. // Opus @ 48kHz/64kbps is a good voice-note tradeoff for Telegram. elevenlabs: "opus_48000_64", + // Telnyx outputs MP3 only (16kHz); not ideal for Telegram voice bubbles but works. + telnyx: "mp3_16000" as const, extension: ".opus", voiceCompatible: true, }; @@ -72,6 +78,7 @@ const TELEGRAM_OUTPUT = { const DEFAULT_OUTPUT = { openai: "mp3" as const, elevenlabs: "mp3_44100_128", + telnyx: "mp3_16000" as const, extension: ".mp3", voiceCompatible: false, }; @@ -124,6 +131,11 @@ export type ResolvedTtsConfig = { proxy?: string; timeoutMs?: number; }; + telnyx: { + apiKey?: string; + voice: string; + inactivityTimeout: number; + }; prefsPath?: string; maxTextLength: number; timeoutMs: number; @@ -296,6 +308,11 @@ export function resolveTtsConfig(cfg: MoltbotConfig): ResolvedTtsConfig { proxy: raw.edge?.proxy?.trim() || undefined, timeoutMs: raw.edge?.timeoutMs, }, + telnyx: { + apiKey: raw.telnyx?.apiKey, + voice: raw.telnyx?.voice?.trim() || DEFAULT_TELNYX_VOICE, + inactivityTimeout: raw.telnyx?.inactivityTimeout ?? DEFAULT_TELNYX_INACTIVITY_TIMEOUT, + }, prefsPath: raw.prefsPath, maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH, timeoutMs: raw.timeoutMs ?? DEFAULT_TIMEOUT_MS, @@ -412,6 +429,7 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt if (resolveTtsApiKey(config, "openai")) return "openai"; if (resolveTtsApiKey(config, "elevenlabs")) return "elevenlabs"; + if (resolveTtsApiKey(config, "telnyx")) return "telnyx"; return "edge"; } @@ -474,10 +492,13 @@ export function resolveTtsApiKey( if (provider === "openai") { return config.openai.apiKey || process.env.OPENAI_API_KEY; } + if (provider === "telnyx") { + return config.telnyx.apiKey || process.env.TELNYX_API_KEY; + } return undefined; } -export const TTS_PROVIDERS = ["openai", "elevenlabs", "edge"] as const; +export const TTS_PROVIDERS = ["openai", "elevenlabs", "telnyx", "edge"] as const; export function resolveTtsProviderOrder(primary: TtsProvider): TtsProvider[] { return [primary, ...TTS_PROVIDERS.filter((provider) => provider !== primary)]; @@ -485,6 +506,7 @@ export function resolveTtsProviderOrder(primary: TtsProvider): TtsProvider[] { export function isTtsProviderConfigured(config: ResolvedTtsConfig, provider: TtsProvider): boolean { if (provider === "edge") return config.edge.enabled; + if (provider === "telnyx") return Boolean(resolveTtsApiKey(config, "telnyx")); return Boolean(resolveTtsApiKey(config, provider)); } @@ -587,7 +609,12 @@ function parseTtsDirectives( switch (key) { case "provider": if (!policy.allowProvider) break; - if (rawValue === "openai" || rawValue === "elevenlabs" || rawValue === "edge") { + if ( + rawValue === "openai" || + rawValue === "elevenlabs" || + rawValue === "edge" || + rawValue === "telnyx" + ) { overrides.provider = rawValue; } else { warnings.push(`unsupported provider "${rawValue}"`); @@ -1068,6 +1095,79 @@ async function edgeTTS(params: { await tts.ttsPromise(text, outputPath); } +async function telnyxTTS(params: { + text: string; + apiKey: string; + voice: string; + inactivityTimeout: number; + timeoutMs: number; +}): Promise { + const { text, apiKey, voice, inactivityTimeout, timeoutMs } = params; + + return new Promise((resolve, reject) => { + const url = new URL(TELNYX_WS_URL); + url.searchParams.set("voice", voice); + if (inactivityTimeout !== DEFAULT_TELNYX_INACTIVITY_TIMEOUT) { + url.searchParams.set("inactivity_timeout", String(inactivityTimeout)); + } + + const ws = new WebSocket(url.toString(), { + headers: { Authorization: `Bearer ${apiKey}` }, + }); + + const audioChunks: Buffer[] = []; + let completed = false; + + const timeout = setTimeout(() => { + if (!completed) { + completed = true; + ws.close(); + reject(new Error("Telnyx TTS request timed out")); + } + }, timeoutMs); + + ws.on("open", () => { + // Send initialization frame (required first) + ws.send(JSON.stringify({ text: " " })); + // Send text frame + ws.send(JSON.stringify({ text })); + // Send stop frame to signal completion + ws.send(JSON.stringify({ text: "" })); + }); + + ws.on("message", (data: Buffer | string) => { + try { + const message = JSON.parse(data.toString()) as { audio?: string }; + if (message.audio) { + audioChunks.push(Buffer.from(message.audio, "base64")); + } + } catch { + // Ignore non-JSON messages + } + }); + + ws.on("close", () => { + clearTimeout(timeout); + if (!completed) { + completed = true; + if (audioChunks.length === 0) { + reject(new Error("Telnyx TTS returned no audio")); + } else { + resolve(Buffer.concat(audioChunks)); + } + } + }); + + ws.on("error", (err) => { + clearTimeout(timeout); + if (!completed) { + completed = true; + reject(new Error(`Telnyx TTS WebSocket error: ${err.message}`)); + } + }); + }); +} + export async function textToSpeech(params: { text: string; cfg: MoltbotConfig; @@ -1195,6 +1295,14 @@ export async function textToSpeech(params: { voiceSettings, timeoutMs: config.timeoutMs, }); + } else if (provider === "telnyx") { + audioBuffer = await telnyxTTS({ + text: params.text, + apiKey, + voice: config.telnyx.voice, + inactivityTimeout: config.telnyx.inactivityTimeout, + timeoutMs: config.timeoutMs, + }); } else { const openaiModelOverride = params.overrides?.openai?.model; const openaiVoiceOverride = params.overrides?.openai?.voice; @@ -1220,7 +1328,12 @@ export async function textToSpeech(params: { audioPath, latencyMs, provider, - outputFormat: provider === "openai" ? output.openai : output.elevenlabs, + outputFormat: + provider === "openai" + ? output.openai + : provider === "telnyx" + ? output.telnyx + : output.elevenlabs, voiceCompatible: output.voiceCompatible, }; } catch (err) { @@ -1266,6 +1379,10 @@ export async function textToSpeechTelephony(params: { lastError = "edge: unsupported for telephony"; continue; } + if (provider === "telnyx") { + lastError = "telnyx: unsupported for telephony (MP3 output only)"; + continue; + } const apiKey = resolveTtsApiKey(config, provider); if (!apiKey) { From 4ebeb160c73b05b37f68b8e64ecefcf5269812db Mon Sep 17 00:00:00 2001 From: Franco Viotti Date: Thu, 29 Jan 2026 08:08:33 -0300 Subject: [PATCH 2/4] docs(tts): add Telnyx as TTS provider --- docs/tts.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/tts.md b/docs/tts.md index c3899ac08..003579811 100644 --- a/docs/tts.md +++ b/docs/tts.md @@ -8,13 +8,14 @@ read_when: # Text-to-speech (TTS) -Moltbot can convert outbound replies into audio using ElevenLabs, OpenAI, or Edge TTS. +Moltbot can convert outbound replies into audio using ElevenLabs, OpenAI, Telnyx, or Edge TTS. It works anywhere Moltbot can send audio; Telegram gets a round voice-note bubble. ## Supported services - **ElevenLabs** (primary or fallback provider) - **OpenAI** (primary or fallback provider; also used for summaries) +- **Telnyx** (primary or fallback provider; great quality, cheaper than ElevenLabs) - **Edge TTS** (primary or fallback provider; uses `node-edge-tts`, default when no API keys) ### Edge TTS notes @@ -31,9 +32,10 @@ does not publish limits, so assume similar or lower limits. citeturn0searc ## Optional keys -If you want OpenAI or ElevenLabs: +If you want OpenAI, ElevenLabs, or Telnyx: - `ELEVENLABS_API_KEY` (or `XI_API_KEY`) - `OPENAI_API_KEY` +- `TELNYX_API_KEY` Edge TTS does **not** require an API key. If no API keys are found, Moltbot defaults to Edge TTS (unless disabled via `messages.tts.edge.enabled=false`). @@ -202,9 +204,9 @@ Then run: - `tagged` only sends audio when the reply includes `[[tts]]` tags. - `enabled`: legacy toggle (doctor migrates this to `auto`). - `mode`: `"final"` (default) or `"all"` (includes tool/block replies). -- `provider`: `"elevenlabs"`, `"openai"`, or `"edge"` (fallback is automatic). +- `provider`: `"elevenlabs"`, `"openai"`, `"telnyx"`, or `"edge"` (fallback is automatic). - If `provider` is **unset**, Moltbot prefers `openai` (if key), then `elevenlabs` (if key), - otherwise `edge`. + then `telnyx` (if key), otherwise `edge`. - `summaryModel`: optional cheap model for auto-summary; defaults to `agents.defaults.model.primary`. - Accepts `provider/model` or a configured model alias. - `modelOverrides`: allow the model to emit TTS directives (on by default). @@ -250,7 +252,7 @@ Here you go. ``` Available directive keys (when enabled): -- `provider` (`openai` | `elevenlabs` | `edge`) +- `provider` (`openai` | `elevenlabs` | `telnyx` | `edge`) - `voice` (OpenAI voice) or `voiceId` (ElevenLabs) - `model` (OpenAI TTS model or ElevenLabs model id) - `stability`, `similarityBoost`, `style`, `speed`, `useSpeakerBoost` From ff47117510626fb7a88cfb54f9b249b2f68f1637 Mon Sep 17 00:00:00 2001 From: Franco Viotti Date: Thu, 29 Jan 2026 08:56:47 -0300 Subject: [PATCH 3/4] feat(tts): add Telnyx to /tts provider command --- src/auto-reply/reply/commands-tts.ts | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts index 04b60a4e9..aca79d5c9 100644 --- a/src/auto-reply/reply/commands-tts.ts +++ b/src/auto-reply/reply/commands-tts.ts @@ -50,7 +50,8 @@ function ttsUsage(): ReplyPayload { `**Providers:**\n` + `• edge — Free, fast (default)\n` + `• openai — High quality (requires API key)\n` + - `• elevenlabs — Premium voices (requires API key)\n\n` + + `• elevenlabs — Premium voices (requires API key)\n` + + `• telnyx — Great quality, cheaper than ElevenLabs (requires API key)\n\n` + `**Text Limit (default: 1500, max: 4096):**\n` + `When text exceeds the limit:\n` + `• Summary ON: AI summarizes, then generates audio\n` + @@ -151,6 +152,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand if (!args.trim()) { const hasOpenAI = Boolean(resolveTtsApiKey(config, "openai")); const hasElevenLabs = Boolean(resolveTtsApiKey(config, "elevenlabs")); + const hasTelnyx = Boolean(resolveTtsApiKey(config, "telnyx")); const hasEdge = isTtsProviderConfigured(config, "edge"); return { shouldContinue: false, @@ -160,18 +162,24 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand `Primary: ${currentProvider}\n` + `OpenAI key: ${hasOpenAI ? "✅" : "❌"}\n` + `ElevenLabs key: ${hasElevenLabs ? "✅" : "❌"}\n` + + `Telnyx key: ${hasTelnyx ? "✅" : "❌"}\n` + `Edge enabled: ${hasEdge ? "✅" : "❌"}\n` + - `Usage: /tts provider openai | elevenlabs | edge`, + `Usage: /tts provider openai | elevenlabs | telnyx | edge`, }, }; } const requested = args.trim().toLowerCase(); - if (requested !== "openai" && requested !== "elevenlabs" && requested !== "edge") { + if ( + requested !== "openai" && + requested !== "elevenlabs" && + requested !== "telnyx" && + requested !== "edge" + ) { return { shouldContinue: false, reply: ttsUsage() }; } - setTtsProvider(prefsPath, requested); + setTtsProvider(prefsPath, requested as "openai" | "elevenlabs" | "telnyx" | "edge"); return { shouldContinue: false, reply: { text: `✅ TTS provider set to ${requested}.` }, From bf5f4e9b7aa63013afdba79b583bd1d674950d4a Mon Sep 17 00:00:00 2001 From: Franco Viotti Date: Thu, 29 Jan 2026 15:09:40 -0300 Subject: [PATCH 4/4] fix(tts): clarify Telnyx telephony limitation and remove dismissive comment --- src/tts/tts.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tts/tts.ts b/src/tts/tts.ts index fec413cb1..c1473c671 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -69,7 +69,6 @@ const TELEGRAM_OUTPUT = { // ElevenLabs output formats use codec_sample_rate_bitrate naming. // Opus @ 48kHz/64kbps is a good voice-note tradeoff for Telegram. elevenlabs: "opus_48000_64", - // Telnyx outputs MP3 only (16kHz); not ideal for Telegram voice bubbles but works. telnyx: "mp3_16000" as const, extension: ".opus", voiceCompatible: true, @@ -1380,7 +1379,7 @@ export async function textToSpeechTelephony(params: { continue; } if (provider === "telnyx") { - lastError = "telnyx: unsupported for telephony (MP3 output only)"; + lastError = "telnyx: WebSocket API outputs MP3, telephony requires PCM"; continue; }