diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts index 4eb4989b9..2d313091a 100644 --- a/src/config/types.tts.ts +++ b/src/config/types.tts.ts @@ -58,6 +58,8 @@ export type TtsConfig = { apiKey?: string; model?: string; voice?: string; + /** Speech speed (0.25 to 4.0, default 1.0). */ + speed?: number; }; /** Microsoft Edge (node-edge-tts) configuration. */ edge?: { diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 4a8c80bcc..622f09d7f 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -206,6 +206,7 @@ export const TtsConfigSchema = z apiKey: z.string().optional(), model: z.string().optional(), voice: z.string().optional(), + speed: z.number().min(0.25).max(4).optional(), }) .strict() .optional(), diff --git a/src/tts/tts.ts b/src/tts/tts.ts index c4c9ce307..c024fa2a7 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -48,6 +48,7 @@ const DEFAULT_ELEVENLABS_VOICE_ID = "pMsXgVXv3BLzUgSXRplE"; const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2"; const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts"; const DEFAULT_OPENAI_VOICE = "alloy"; +const DEFAULT_OPENAI_SPEED = 1.0; const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural"; const DEFAULT_EDGE_LANG = "en-US"; const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"; @@ -110,6 +111,7 @@ export type ResolvedTtsConfig = { apiKey?: string; model: string; voice: string; + speed: number; }; edge: { enabled: boolean; @@ -282,6 +284,7 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig { apiKey: raw.openai?.apiKey, model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL, voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE, + speed: raw.openai?.speed ?? DEFAULT_OPENAI_SPEED, }, edge: { enabled: raw.edge?.enabled ?? true, @@ -1003,10 +1006,11 @@ async function openaiTTS(params: { apiKey: string; model: string; voice: string; + speed: number; responseFormat: "mp3" | "opus" | "pcm"; timeoutMs: number; }): Promise { - const { text, apiKey, model, voice, responseFormat, timeoutMs } = params; + const { text, apiKey, model, voice, speed, responseFormat, timeoutMs } = params; if (!isValidOpenAIModel(model)) { throw new Error(`Invalid model: ${model}`); @@ -1029,6 +1033,7 @@ async function openaiTTS(params: { model, input: text, voice, + speed, response_format: responseFormat, }), signal: controller.signal, @@ -1211,6 +1216,7 @@ export async function textToSpeech(params: { apiKey, model: openaiModelOverride ?? config.openai.model, voice: openaiVoiceOverride ?? config.openai.voice, + speed: config.openai.speed, responseFormat: output.openai, timeoutMs: config.timeoutMs, }); @@ -1313,6 +1319,7 @@ export async function textToSpeechTelephony(params: { apiKey, model: config.openai.model, voice: config.openai.voice, + speed: config.openai.speed, responseFormat: output.format, timeoutMs: config.timeoutMs, });