Merge 5e58242997 into 09be5d45d5
This commit is contained in:
commit
16c02f624a
@ -42,6 +42,8 @@ export type TtsConfig = {
|
|||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
voiceId?: string;
|
voiceId?: string;
|
||||||
modelId?: string;
|
modelId?: string;
|
||||||
|
/** Output format (e.g. mp3_22050_32, mp3_44100_128, pcm_16000, pcm_22050, pcm_24000). */
|
||||||
|
outputFormat?: string;
|
||||||
seed?: number;
|
seed?: number;
|
||||||
applyTextNormalization?: "auto" | "on" | "off";
|
applyTextNormalization?: "auto" | "on" | "off";
|
||||||
languageCode?: string;
|
languageCode?: string;
|
||||||
|
|||||||
@ -185,6 +185,7 @@ export const TtsConfigSchema = z
|
|||||||
baseUrl: z.string().optional(),
|
baseUrl: z.string().optional(),
|
||||||
voiceId: z.string().optional(),
|
voiceId: z.string().optional(),
|
||||||
modelId: z.string().optional(),
|
modelId: z.string().optional(),
|
||||||
|
outputFormat: z.string().optional(),
|
||||||
seed: z.number().int().min(0).max(4294967295).optional(),
|
seed: z.number().int().min(0).max(4294967295).optional(),
|
||||||
applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
|
applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
|
||||||
languageCode: z.string().optional(),
|
languageCode: z.string().optional(),
|
||||||
|
|||||||
@ -95,6 +95,7 @@ export type ResolvedTtsConfig = {
|
|||||||
baseUrl: string;
|
baseUrl: string;
|
||||||
voiceId: string;
|
voiceId: string;
|
||||||
modelId: string;
|
modelId: string;
|
||||||
|
outputFormat?: string;
|
||||||
seed?: number;
|
seed?: number;
|
||||||
applyTextNormalization?: "auto" | "on" | "off";
|
applyTextNormalization?: "auto" | "on" | "off";
|
||||||
languageCode?: string;
|
languageCode?: string;
|
||||||
@ -262,6 +263,7 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
|
|||||||
baseUrl: raw.elevenlabs?.baseUrl?.trim() || DEFAULT_ELEVENLABS_BASE_URL,
|
baseUrl: raw.elevenlabs?.baseUrl?.trim() || DEFAULT_ELEVENLABS_BASE_URL,
|
||||||
voiceId: raw.elevenlabs?.voiceId ?? DEFAULT_ELEVENLABS_VOICE_ID,
|
voiceId: raw.elevenlabs?.voiceId ?? DEFAULT_ELEVENLABS_VOICE_ID,
|
||||||
modelId: raw.elevenlabs?.modelId ?? DEFAULT_ELEVENLABS_MODEL_ID,
|
modelId: raw.elevenlabs?.modelId ?? DEFAULT_ELEVENLABS_MODEL_ID,
|
||||||
|
outputFormat: raw.elevenlabs?.outputFormat?.trim() || undefined,
|
||||||
seed: raw.elevenlabs?.seed,
|
seed: raw.elevenlabs?.seed,
|
||||||
applyTextNormalization: raw.elevenlabs?.applyTextNormalization,
|
applyTextNormalization: raw.elevenlabs?.applyTextNormalization,
|
||||||
languageCode: raw.elevenlabs?.languageCode,
|
languageCode: raw.elevenlabs?.languageCode,
|
||||||
@ -1180,6 +1182,9 @@ export async function textToSpeech(params: {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let audioBuffer: Buffer;
|
let audioBuffer: Buffer;
|
||||||
|
let effectiveOutputFormat: string;
|
||||||
|
let effectiveExtension: string;
|
||||||
|
|
||||||
if (provider === "elevenlabs") {
|
if (provider === "elevenlabs") {
|
||||||
const voiceIdOverride = params.overrides?.elevenlabs?.voiceId;
|
const voiceIdOverride = params.overrides?.elevenlabs?.voiceId;
|
||||||
const modelIdOverride = params.overrides?.elevenlabs?.modelId;
|
const modelIdOverride = params.overrides?.elevenlabs?.modelId;
|
||||||
@ -1190,13 +1195,17 @@ export async function textToSpeech(params: {
|
|||||||
const seedOverride = params.overrides?.elevenlabs?.seed;
|
const seedOverride = params.overrides?.elevenlabs?.seed;
|
||||||
const normalizationOverride = params.overrides?.elevenlabs?.applyTextNormalization;
|
const normalizationOverride = params.overrides?.elevenlabs?.applyTextNormalization;
|
||||||
const languageOverride = params.overrides?.elevenlabs?.languageCode;
|
const languageOverride = params.overrides?.elevenlabs?.languageCode;
|
||||||
|
effectiveOutputFormat = config.elevenlabs.outputFormat ?? output.elevenlabs;
|
||||||
|
effectiveExtension = config.elevenlabs.outputFormat
|
||||||
|
? inferEdgeExtension(config.elevenlabs.outputFormat)
|
||||||
|
: output.extension;
|
||||||
audioBuffer = await elevenLabsTTS({
|
audioBuffer = await elevenLabsTTS({
|
||||||
text: params.text,
|
text: params.text,
|
||||||
apiKey,
|
apiKey,
|
||||||
baseUrl: config.elevenlabs.baseUrl,
|
baseUrl: config.elevenlabs.baseUrl,
|
||||||
voiceId: voiceIdOverride ?? config.elevenlabs.voiceId,
|
voiceId: voiceIdOverride ?? config.elevenlabs.voiceId,
|
||||||
modelId: modelIdOverride ?? config.elevenlabs.modelId,
|
modelId: modelIdOverride ?? config.elevenlabs.modelId,
|
||||||
outputFormat: output.elevenlabs,
|
outputFormat: effectiveOutputFormat,
|
||||||
seed: seedOverride ?? config.elevenlabs.seed,
|
seed: seedOverride ?? config.elevenlabs.seed,
|
||||||
applyTextNormalization: normalizationOverride ?? config.elevenlabs.applyTextNormalization,
|
applyTextNormalization: normalizationOverride ?? config.elevenlabs.applyTextNormalization,
|
||||||
languageCode: languageOverride ?? config.elevenlabs.languageCode,
|
languageCode: languageOverride ?? config.elevenlabs.languageCode,
|
||||||
@ -1206,6 +1215,8 @@ export async function textToSpeech(params: {
|
|||||||
} else {
|
} else {
|
||||||
const openaiModelOverride = params.overrides?.openai?.model;
|
const openaiModelOverride = params.overrides?.openai?.model;
|
||||||
const openaiVoiceOverride = params.overrides?.openai?.voice;
|
const openaiVoiceOverride = params.overrides?.openai?.voice;
|
||||||
|
effectiveOutputFormat = output.openai;
|
||||||
|
effectiveExtension = output.extension;
|
||||||
audioBuffer = await openaiTTS({
|
audioBuffer = await openaiTTS({
|
||||||
text: params.text,
|
text: params.text,
|
||||||
apiKey,
|
apiKey,
|
||||||
@ -1219,7 +1230,7 @@ export async function textToSpeech(params: {
|
|||||||
const latencyMs = Date.now() - providerStart;
|
const latencyMs = Date.now() - providerStart;
|
||||||
|
|
||||||
const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
|
const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
|
||||||
const audioPath = path.join(tempDir, `voice-${Date.now()}${output.extension}`);
|
const audioPath = path.join(tempDir, `voice-${Date.now()}${effectiveExtension}`);
|
||||||
writeFileSync(audioPath, audioBuffer);
|
writeFileSync(audioPath, audioBuffer);
|
||||||
scheduleCleanup(tempDir);
|
scheduleCleanup(tempDir);
|
||||||
|
|
||||||
@ -1228,7 +1239,7 @@ export async function textToSpeech(params: {
|
|||||||
audioPath,
|
audioPath,
|
||||||
latencyMs,
|
latencyMs,
|
||||||
provider,
|
provider,
|
||||||
outputFormat: provider === "openai" ? output.openai : output.elevenlabs,
|
outputFormat: effectiveOutputFormat,
|
||||||
voiceCompatible: output.voiceCompatible,
|
voiceCompatible: output.voiceCompatible,
|
||||||
};
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user