feat(tts): Convert Edge TTS MP3 to Opus for Telegram voice messages
When using Edge TTS (the free provider), Telegram was delivering voice messages as MP3 files, which appear as audio files rather than native voice notes. This meant no caption/text could accompany the voice message. This change adds an ffmpeg post-processing step that converts Edge TTS MP3 output to Opus when the target channel is Telegram, giving Edge TTS feature parity with OpenAI and ElevenLabs providers. Requirements: - ffmpeg must be installed with libopus support - Falls back gracefully to MP3 if ffmpeg is unavailable Fixes #2949
This commit is contained in:
parent
0b1c8db0ca
commit
7874f1db8d
@ -1152,16 +1152,42 @@ export async function textToSpeech(params: {
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Edge TTS MP3 to Opus for Telegram voice compatibility
|
||||
let finalAudioPath = edgeResult.audioPath;
|
||||
let finalVoiceCompatible = isVoiceCompatibleAudio({ fileName: edgeResult.audioPath });
|
||||
if (channelId === "telegram" && !finalVoiceCompatible) {
|
||||
try {
|
||||
const opusPath = edgeResult.audioPath.replace(/\.[^.]+$/, ".opus");
|
||||
const { execSync } = await import("node:child_process");
|
||||
execSync(
|
||||
`ffmpeg -i "${edgeResult.audioPath}" -c:a libopus -b:a 64k -application voip "${opusPath}" -y`,
|
||||
{ stdio: "pipe" },
|
||||
);
|
||||
// Remove original MP3
|
||||
try {
|
||||
unlinkSync(edgeResult.audioPath);
|
||||
} catch {
|
||||
// ignore cleanup errors
|
||||
}
|
||||
finalAudioPath = opusPath;
|
||||
finalVoiceCompatible = true;
|
||||
logVerbose("TTS: Converted Edge MP3 to Opus for Telegram voice compatibility.");
|
||||
} catch (convErr) {
|
||||
const convError = convErr as Error;
|
||||
logVerbose(`TTS: ffmpeg Opus conversion failed: ${convError.message}`);
|
||||
// Fall back to original MP3
|
||||
}
|
||||
}
|
||||
|
||||
scheduleCleanup(tempDir);
|
||||
const voiceCompatible = isVoiceCompatibleAudio({ fileName: edgeResult.audioPath });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
audioPath: edgeResult.audioPath,
|
||||
audioPath: finalAudioPath,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
provider,
|
||||
outputFormat: edgeResult.outputFormat,
|
||||
voiceCompatible,
|
||||
outputFormat: finalVoiceCompatible ? "opus" : edgeResult.outputFormat,
|
||||
voiceCompatible: finalVoiceCompatible,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user