diff --git a/src/tts/tts.ts b/src/tts/tts.ts index 5fa06f8d4..41a829433 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -736,7 +736,15 @@ function parseTtsDirectives( }; } -export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"] as const; +export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const; + +/** + * Custom OpenAI-compatible TTS endpoint. + * When set, model/voice validation is relaxed to allow non-OpenAI models. + * Example: OPENAI_TTS_BASE_URL=http://localhost:8880/v1 + */ +const OPENAI_TTS_BASE_URL = process.env.OPENAI_TTS_BASE_URL?.trim() || "https://api.openai.com/v1"; +const isCustomOpenAIEndpoint = OPENAI_TTS_BASE_URL !== "https://api.openai.com/v1"; export const OPENAI_TTS_VOICES = [ "alloy", "ash", @@ -752,10 +760,14 @@ export const OPENAI_TTS_VOICES = [ type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number]; function isValidOpenAIModel(model: string): boolean { + // Allow any model when using custom endpoint (e.g., Kokoro, LocalAI) + if (isCustomOpenAIEndpoint) return true; return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]); } function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice { + // Allow any voice when using custom endpoint (e.g., Kokoro Chinese voices) + if (isCustomOpenAIEndpoint) return true; return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice); } @@ -982,7 +994,7 @@ async function openaiTTS(params: { const timeout = setTimeout(() => controller.abort(), timeoutMs); try { - const response = await fetch("https://api.openai.com/v1/audio/speech", { + const response = await fetch(`${OPENAI_TTS_BASE_URL}/audio/speech`, { method: "POST", headers: { Authorization: `Bearer ${apiKey}`,