diff --git a/src/config/types.tts.ts b/src/config/types.tts.ts
index 4eb4989b9..e1a7b64a1 100644
--- a/src/config/types.tts.ts
+++ b/src/config/types.tts.ts
@@ -42,6 +42,8 @@ export type TtsConfig = {
     baseUrl?: string;
     voiceId?: string;
     modelId?: string;
+    /** Output format (e.g. mp3_22050_32, mp3_44100_128, pcm_16000, pcm_22050, pcm_24000). */
+    outputFormat?: string;
     seed?: number;
     applyTextNormalization?: "auto" | "on" | "off";
     languageCode?: string;
diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts
index 4a8c80bcc..03306551c 100644
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -185,6 +185,7 @@ export const TtsConfigSchema = z
         baseUrl: z.string().optional(),
         voiceId: z.string().optional(),
         modelId: z.string().optional(),
+        outputFormat: z.string().optional(),
         seed: z.number().int().min(0).max(4294967295).optional(),
         applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
         languageCode: z.string().optional(),
diff --git a/src/tts/tts.ts b/src/tts/tts.ts
index c4c9ce307..5f807dd27 100644
--- a/src/tts/tts.ts
+++ b/src/tts/tts.ts
@@ -95,6 +95,7 @@ export type ResolvedTtsConfig = {
     baseUrl: string;
     voiceId: string;
     modelId: string;
+    outputFormat?: string;
     seed?: number;
     applyTextNormalization?: "auto" | "on" | "off";
     languageCode?: string;
@@ -262,6 +263,7 @@ export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig {
       baseUrl: raw.elevenlabs?.baseUrl?.trim() || DEFAULT_ELEVENLABS_BASE_URL,
       voiceId: raw.elevenlabs?.voiceId ?? DEFAULT_ELEVENLABS_VOICE_ID,
       modelId: raw.elevenlabs?.modelId ?? DEFAULT_ELEVENLABS_MODEL_ID,
+      outputFormat: raw.elevenlabs?.outputFormat?.trim() || undefined,
       seed: raw.elevenlabs?.seed,
       applyTextNormalization: raw.elevenlabs?.applyTextNormalization,
       languageCode: raw.elevenlabs?.languageCode,
@@ -1180,6 +1182,9 @@ export async function textToSpeech(params: {
       }
 
       let audioBuffer: Buffer;
+      let effectiveOutputFormat: string;
+      let effectiveExtension: string;
+
       if (provider === "elevenlabs") {
         const voiceIdOverride = params.overrides?.elevenlabs?.voiceId;
         const modelIdOverride = params.overrides?.elevenlabs?.modelId;
@@ -1190,13 +1195,17 @@ export async function textToSpeech(params: {
         const seedOverride = params.overrides?.elevenlabs?.seed;
         const normalizationOverride = params.overrides?.elevenlabs?.applyTextNormalization;
         const languageOverride = params.overrides?.elevenlabs?.languageCode;
+        effectiveOutputFormat = config.elevenlabs.outputFormat ?? output.elevenlabs;
+        effectiveExtension = config.elevenlabs.outputFormat
+          ? inferEdgeExtension(config.elevenlabs.outputFormat)
+          : output.extension;
         audioBuffer = await elevenLabsTTS({
           text: params.text,
           apiKey,
           baseUrl: config.elevenlabs.baseUrl,
           voiceId: voiceIdOverride ?? config.elevenlabs.voiceId,
           modelId: modelIdOverride ?? config.elevenlabs.modelId,
-          outputFormat: output.elevenlabs,
+          outputFormat: effectiveOutputFormat,
           seed: seedOverride ?? config.elevenlabs.seed,
           applyTextNormalization: normalizationOverride ?? config.elevenlabs.applyTextNormalization,
           languageCode: languageOverride ?? config.elevenlabs.languageCode,
@@ -1206,6 +1215,8 @@ export async function textToSpeech(params: {
       } else {
         const openaiModelOverride = params.overrides?.openai?.model;
         const openaiVoiceOverride = params.overrides?.openai?.voice;
+        effectiveOutputFormat = output.openai;
+        effectiveExtension = output.extension;
         audioBuffer = await openaiTTS({
           text: params.text,
           apiKey,
@@ -1219,7 +1230,7 @@ export async function textToSpeech(params: {
       const latencyMs = Date.now() - providerStart;
 
       const tempDir = mkdtempSync(path.join(tmpdir(), "tts-"));
-      const audioPath = path.join(tempDir, `voice-${Date.now()}${output.extension}`);
+      const audioPath = path.join(tempDir, `voice-${Date.now()}${effectiveExtension}`);
       writeFileSync(audioPath, audioBuffer);
       scheduleCleanup(tempDir);
 
@@ -1228,7 +1239,7 @@ export async function textToSpeech(params: {
         audioPath,
         latencyMs,
         provider,
-        outputFormat: provider === "openai" ? output.openai : output.elevenlabs,
+        outputFormat: effectiveOutputFormat,
         voiceCompatible: output.voiceCompatible,
       };
     } catch (err) {