feat(tts): add speed parameter support for OpenAI TTS
Add configurable speech speed (0.25 to 4.0, default 1.0) for OpenAI TTS.
Config example:
```yaml
messages:
tts:
openai:
speed: 1.25
```
Closes #4131
This commit is contained in:
parent
6372242da7
commit
483a05e4d6
@ -58,6 +58,8 @@ export type TtsConfig = {
|
|||||||
apiKey?: string;
|
apiKey?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
voice?: string;
|
voice?: string;
|
||||||
|
/** Speech speed (0.25 to 4.0, default 1.0). */
|
||||||
|
speed?: number;
|
||||||
};
|
};
|
||||||
/** Microsoft Edge (node-edge-tts) configuration. */
|
/** Microsoft Edge (node-edge-tts) configuration. */
|
||||||
edge?: {
|
edge?: {
|
||||||
|
|||||||
@ -206,6 +206,7 @@ export const TtsConfigSchema = z
|
|||||||
apiKey: z.string().optional(),
|
apiKey: z.string().optional(),
|
||||||
model: z.string().optional(),
|
model: z.string().optional(),
|
||||||
voice: z.string().optional(),
|
voice: z.string().optional(),
|
||||||
|
speed: z.number().min(0.25).max(4).optional(),
|
||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.optional(),
|
.optional(),
|
||||||
|
|||||||
@ -48,6 +48,7 @@ const DEFAULT_ELEVENLABS_VOICE_ID = "pMsXgVXv3BLzUgSXRplE";
|
|||||||
const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2";
|
const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2";
|
||||||
const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts";
|
const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts";
|
||||||
const DEFAULT_OPENAI_VOICE = "alloy";
|
const DEFAULT_OPENAI_VOICE = "alloy";
|
||||||
|
const DEFAULT_OPENAI_SPEED = 1.0;
|
||||||
const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural";
|
const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural";
|
||||||
const DEFAULT_EDGE_LANG = "en-US";
|
const DEFAULT_EDGE_LANG = "en-US";
|
||||||
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
|
||||||
@ -110,6 +111,7 @@ export type ResolvedTtsConfig = {
|
|||||||
apiKey?: string;
|
apiKey?: string;
|
||||||
model: string;
|
model: string;
|
||||||
voice: string;
|
voice: string;
|
||||||
|
speed: number;
|
||||||
};
|
};
|
||||||
edge: {
|
edge: {
|
||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
@ -282,6 +284,7 @@ export function resolveTtsConfig(cfg: MoltbotConfig): ResolvedTtsConfig {
|
|||||||
apiKey: raw.openai?.apiKey,
|
apiKey: raw.openai?.apiKey,
|
||||||
model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL,
|
model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL,
|
||||||
voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE,
|
voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE,
|
||||||
|
speed: raw.openai?.speed ?? DEFAULT_OPENAI_SPEED,
|
||||||
},
|
},
|
||||||
edge: {
|
edge: {
|
||||||
enabled: raw.edge?.enabled ?? true,
|
enabled: raw.edge?.enabled ?? true,
|
||||||
@ -1003,10 +1006,11 @@ async function openaiTTS(params: {
|
|||||||
apiKey: string;
|
apiKey: string;
|
||||||
model: string;
|
model: string;
|
||||||
voice: string;
|
voice: string;
|
||||||
|
speed: number;
|
||||||
responseFormat: "mp3" | "opus" | "pcm";
|
responseFormat: "mp3" | "opus" | "pcm";
|
||||||
timeoutMs: number;
|
timeoutMs: number;
|
||||||
}): Promise<Buffer> {
|
}): Promise<Buffer> {
|
||||||
const { text, apiKey, model, voice, responseFormat, timeoutMs } = params;
|
const { text, apiKey, model, voice, speed, responseFormat, timeoutMs } = params;
|
||||||
|
|
||||||
if (!isValidOpenAIModel(model)) {
|
if (!isValidOpenAIModel(model)) {
|
||||||
throw new Error(`Invalid model: ${model}`);
|
throw new Error(`Invalid model: ${model}`);
|
||||||
@ -1029,6 +1033,7 @@ async function openaiTTS(params: {
|
|||||||
model,
|
model,
|
||||||
input: text,
|
input: text,
|
||||||
voice,
|
voice,
|
||||||
|
speed,
|
||||||
response_format: responseFormat,
|
response_format: responseFormat,
|
||||||
}),
|
}),
|
||||||
signal: controller.signal,
|
signal: controller.signal,
|
||||||
@ -1211,6 +1216,7 @@ export async function textToSpeech(params: {
|
|||||||
apiKey,
|
apiKey,
|
||||||
model: openaiModelOverride ?? config.openai.model,
|
model: openaiModelOverride ?? config.openai.model,
|
||||||
voice: openaiVoiceOverride ?? config.openai.voice,
|
voice: openaiVoiceOverride ?? config.openai.voice,
|
||||||
|
speed: config.openai.speed,
|
||||||
responseFormat: output.openai,
|
responseFormat: output.openai,
|
||||||
timeoutMs: config.timeoutMs,
|
timeoutMs: config.timeoutMs,
|
||||||
});
|
});
|
||||||
@ -1313,6 +1319,7 @@ export async function textToSpeechTelephony(params: {
|
|||||||
apiKey,
|
apiKey,
|
||||||
model: config.openai.model,
|
model: config.openai.model,
|
||||||
voice: config.openai.voice,
|
voice: config.openai.voice,
|
||||||
|
speed: config.openai.speed,
|
||||||
responseFormat: output.format,
|
responseFormat: output.format,
|
||||||
timeoutMs: config.timeoutMs,
|
timeoutMs: config.timeoutMs,
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user