feat(tts): add speed parameter support for OpenAI TTS

Add configurable speech speed (0.25 to 4.0, default 1.0) for OpenAI TTS.

Config example:
```yaml
messages:
  tts:
    openai:
      speed: 1.25
```

Closes #4131
This commit is contained in:
HirokiKobayashi-R 2026-01-30 04:33:45 +09:00
parent 6372242da7
commit 483a05e4d6
3 changed files with 11 additions and 1 deletions

View File

@ -58,6 +58,8 @@ export type TtsConfig = {
apiKey?: string; apiKey?: string;
model?: string; model?: string;
voice?: string; voice?: string;
/** Speech speed (0.25 to 4.0, default 1.0). */
speed?: number;
}; };
/** Microsoft Edge (node-edge-tts) configuration. */ /** Microsoft Edge (node-edge-tts) configuration. */
edge?: { edge?: {

View File

@ -206,6 +206,7 @@ export const TtsConfigSchema = z
apiKey: z.string().optional(), apiKey: z.string().optional(),
model: z.string().optional(), model: z.string().optional(),
voice: z.string().optional(), voice: z.string().optional(),
speed: z.number().min(0.25).max(4).optional(),
}) })
.strict() .strict()
.optional(), .optional(),

View File

@ -48,6 +48,7 @@ const DEFAULT_ELEVENLABS_VOICE_ID = "pMsXgVXv3BLzUgSXRplE";
const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2"; const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2";
const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts"; const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts";
const DEFAULT_OPENAI_VOICE = "alloy"; const DEFAULT_OPENAI_VOICE = "alloy";
const DEFAULT_OPENAI_SPEED = 1.0;
const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural"; const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural";
const DEFAULT_EDGE_LANG = "en-US"; const DEFAULT_EDGE_LANG = "en-US";
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"; const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
@ -110,6 +111,7 @@ export type ResolvedTtsConfig = {
apiKey?: string; apiKey?: string;
model: string; model: string;
voice: string; voice: string;
speed: number;
}; };
edge: { edge: {
enabled: boolean; enabled: boolean;
@ -282,6 +284,7 @@ export function resolveTtsConfig(cfg: MoltbotConfig): ResolvedTtsConfig {
apiKey: raw.openai?.apiKey, apiKey: raw.openai?.apiKey,
model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL, model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL,
voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE, voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE,
speed: raw.openai?.speed ?? DEFAULT_OPENAI_SPEED,
}, },
edge: { edge: {
enabled: raw.edge?.enabled ?? true, enabled: raw.edge?.enabled ?? true,
@ -1003,10 +1006,11 @@ async function openaiTTS(params: {
apiKey: string; apiKey: string;
model: string; model: string;
voice: string; voice: string;
speed: number;
responseFormat: "mp3" | "opus" | "pcm"; responseFormat: "mp3" | "opus" | "pcm";
timeoutMs: number; timeoutMs: number;
}): Promise<Buffer> { }): Promise<Buffer> {
const { text, apiKey, model, voice, responseFormat, timeoutMs } = params; const { text, apiKey, model, voice, speed, responseFormat, timeoutMs } = params;
if (!isValidOpenAIModel(model)) { if (!isValidOpenAIModel(model)) {
throw new Error(`Invalid model: ${model}`); throw new Error(`Invalid model: ${model}`);
@ -1029,6 +1033,7 @@ async function openaiTTS(params: {
model, model,
input: text, input: text,
voice, voice,
speed,
response_format: responseFormat, response_format: responseFormat,
}), }),
signal: controller.signal, signal: controller.signal,
@ -1211,6 +1216,7 @@ export async function textToSpeech(params: {
apiKey, apiKey,
model: openaiModelOverride ?? config.openai.model, model: openaiModelOverride ?? config.openai.model,
voice: openaiVoiceOverride ?? config.openai.voice, voice: openaiVoiceOverride ?? config.openai.voice,
speed: config.openai.speed,
responseFormat: output.openai, responseFormat: output.openai,
timeoutMs: config.timeoutMs, timeoutMs: config.timeoutMs,
}); });
@ -1313,6 +1319,7 @@ export async function textToSpeechTelephony(params: {
apiKey, apiKey,
model: config.openai.model, model: config.openai.model,
voice: config.openai.voice, voice: config.openai.voice,
speed: config.openai.speed,
responseFormat: output.format, responseFormat: output.format,
timeoutMs: config.timeoutMs, timeoutMs: config.timeoutMs,
}); });