feat(tts): Convert Edge TTS MP3 to Opus for Telegram voice messages

When using Edge TTS (the free provider), Telegram was delivering voice
messages as MP3 files, which appear as audio files rather than native
voice notes. This meant no caption/text could accompany the voice message.

This change adds an ffmpeg post-processing step that converts Edge TTS
MP3 output to Opus when the target channel is Telegram, giving Edge TTS
feature parity with OpenAI and ElevenLabs providers.

Requirements:
- ffmpeg must be installed with libopus support
- Falls back gracefully to MP3 if ffmpeg is unavailable

Fixes #2949
This commit is contained in:
Nero 2026-01-27 16:07:41 -06:00
parent 0b1c8db0ca
commit 7874f1db8d

View File

@ -1152,16 +1152,42 @@ export async function textToSpeech(params: {
}
}
// Convert Edge TTS MP3 to Opus for Telegram voice compatibility
let finalAudioPath = edgeResult.audioPath;
let finalVoiceCompatible = isVoiceCompatibleAudio({ fileName: edgeResult.audioPath });
if (channelId === "telegram" && !finalVoiceCompatible) {
try {
const opusPath = edgeResult.audioPath.replace(/\.[^.]+$/, ".opus");
const { execSync } = await import("node:child_process");
execSync(
`ffmpeg -i "${edgeResult.audioPath}" -c:a libopus -b:a 64k -application voip "${opusPath}" -y`,
{ stdio: "pipe" },
);
// Remove original MP3
try {
unlinkSync(edgeResult.audioPath);
} catch {
// ignore cleanup errors
}
finalAudioPath = opusPath;
finalVoiceCompatible = true;
logVerbose("TTS: Converted Edge MP3 to Opus for Telegram voice compatibility.");
} catch (convErr) {
const convError = convErr as Error;
logVerbose(`TTS: ffmpeg Opus conversion failed: ${convError.message}`);
// Fall back to original MP3
}
}
scheduleCleanup(tempDir);
const voiceCompatible = isVoiceCompatibleAudio({ fileName: edgeResult.audioPath });
return {
success: true,
audioPath: edgeResult.audioPath,
audioPath: finalAudioPath,
latencyMs: Date.now() - providerStart,
provider,
outputFormat: edgeResult.outputFormat,
voiceCompatible,
outputFormat: finalVoiceCompatible ? "opus" : edgeResult.outputFormat,
voiceCompatible: finalVoiceCompatible,
};
}