This commit is contained in:
lpjhelder 2026-01-30 13:30:27 -03:00 committed by GitHub
commit 16c02f624a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 17 additions and 3 deletions

View File

@ -42,6 +42,8 @@ export type TtsConfig = {
baseUrl?: string;
voiceId?: string;
modelId?: string;
/** Output format (e.g. mp3_22050_32, mp3_44100_128, pcm_16000, pcm_22050, pcm_24000). */
outputFormat?: string;
seed?: number;
applyTextNormalization?: "auto" | "on" | "off";
languageCode?: string;

View File

@ -185,6 +185,7 @@ export const TtsConfigSchema = z
baseUrl: z.string().optional(),
voiceId: z.string().optional(),
modelId: z.string().optional(),
outputFormat: z.string().optional(),
seed: z.number().int().min(0).max(4294967295).optional(),
applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
languageCode: z.string().optional(),

View File

@ -95,6 +95,7 @@ export type ResolvedTtsConfig = {
baseUrl: string;
voiceId: string;
modelId: string;
outputFormat?: string;
seed?: number;
applyTextNormalization?: "auto" | "on" | "off";
languageCode?: string;
@ -262,6 +263,7 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
baseUrl: raw.elevenlabs?.baseUrl?.trim() || DEFAULT_ELEVENLABS_BASE_URL,
voiceId: raw.elevenlabs?.voiceId ?? DEFAULT_ELEVENLABS_VOICE_ID,
modelId: raw.elevenlabs?.modelId ?? DEFAULT_ELEVENLABS_MODEL_ID,
outputFormat: raw.elevenlabs?.outputFormat?.trim() || undefined,
seed: raw.elevenlabs?.seed,
applyTextNormalization: raw.elevenlabs?.applyTextNormalization,
languageCode: raw.elevenlabs?.languageCode,
@ -1180,6 +1182,9 @@ export async function textToSpeech(params: {
}
let audioBuffer: Buffer;
let effectiveOutputFormat: string;
let effectiveExtension: string;
if (provider === "elevenlabs") {
const voiceIdOverride = params.overrides?.elevenlabs?.voiceId;
const modelIdOverride = params.overrides?.elevenlabs?.modelId;
@ -1190,13 +1195,17 @@ export async function textToSpeech(params: {
const seedOverride = params.overrides?.elevenlabs?.seed;
const normalizationOverride = params.overrides?.elevenlabs?.applyTextNormalization;
const languageOverride = params.overrides?.elevenlabs?.languageCode;
effectiveOutputFormat = config.elevenlabs.outputFormat ?? output.elevenlabs;
effectiveExtension = config.elevenlabs.outputFormat
? inferEdgeExtension(config.elevenlabs.outputFormat)
: output.extension;
audioBuffer = await elevenLabsTTS({
text: params.text,
apiKey,
baseUrl: config.elevenlabs.baseUrl,
voiceId: voiceIdOverride ?? config.elevenlabs.voiceId,
modelId: modelIdOverride ?? config.elevenlabs.modelId,
outputFormat: output.elevenlabs,
outputFormat: effectiveOutputFormat,
seed: seedOverride ?? config.elevenlabs.seed,
applyTextNormalization: normalizationOverride ?? config.elevenlabs.applyTextNormalization,
languageCode: languageOverride ?? config.elevenlabs.languageCode,
@ -1206,6 +1215,8 @@ export async function textToSpeech(params: {
} else {
const openaiModelOverride = params.overrides?.openai?.model;
const openaiVoiceOverride = params.overrides?.openai?.voice;
effectiveOutputFormat = output.openai;
effectiveExtension = output.extension;
audioBuffer = await openaiTTS({
text: params.text,
apiKey,
@ -1219,7 +1230,7 @@ export async function textToSpeech(params: {
const latencyMs = Date.now() - providerStart;
const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
const audioPath = path.join(tempDir, `voice-${Date.now()}${output.extension}`);
const audioPath = path.join(tempDir, `voice-${Date.now()}${effectiveExtension}`);
writeFileSync(audioPath, audioBuffer);
scheduleCleanup(tempDir);
@ -1228,7 +1239,7 @@ export async function textToSpeech(params: {
audioPath,
latencyMs,
provider,
outputFormat: provider === "openai" ? output.openai : output.elevenlabs,
outputFormat: effectiveOutputFormat,
voiceCompatible: output.voiceCompatible,
};
} catch (err) {