Compare commits

...

5 Commits

Author SHA1 Message Date
Peter Steinberger
d649710627 fix: support custom TTS endpoints (#1701) (thanks @louzhixian) 2026-01-25 08:01:11 +00:00
zhixian
498c15555e test: update tests for expanded OpenAI TTS model list
- Accept tts-1 and tts-1-hd as valid models
- Update OPENAI_TTS_MODELS length expectation to 3
2026-01-25 07:59:20 +00:00
zhixian
25345a909a style: format code with oxfmt 2026-01-25 07:59:20 +00:00
zhixian
bfd8321b42 fix: strip trailing slashes from OPENAI_TTS_BASE_URL
Address review feedback: normalize the base URL by removing trailing
slashes to prevent double-slash paths like /v1//audio/speech which
cause 404 errors on some OpenAI-compatible servers.
2026-01-25 07:59:20 +00:00
zhixian
fbbed439bd feat(tts): support custom OpenAI-compatible TTS endpoints
Add OPENAI_TTS_BASE_URL environment variable to allow using self-hosted
or third-party OpenAI-compatible TTS services like Kokoro, LocalAI, or
OpenedAI-Speech.

Changes:
- Add OPENAI_TTS_BASE_URL env var (defaults to OpenAI official API)
- Relax model/voice validation when using custom endpoints
- Add tts-1 and tts-1-hd to the model allowlist

This enables users to:
- Use local TTS for privacy and cost savings
- Use models with better non-English language support (Chinese, Japanese)
- Reduce latency with local inference

Example usage:
  OPENAI_TTS_BASE_URL=http://localhost:8880/v1

Tested with Kokoro-FastAPI.
2026-01-25 07:59:20 +00:00
8 changed files with 266 additions and 33 deletions

View File

@ -9,6 +9,7 @@ Docs: https://docs.clawd.bot
- Venius (Venice AI): highlight provider guide + cross-links + expanded guidance. https://docs.clawd.bot/providers/venice
### Changes
- TTS: allow custom OpenAI-compatible TTS endpoints without API keys. (#1701) Thanks @louzhixian. https://docs.clawd.bot/tts
- TTS: add Edge TTS provider fallback, defaulting to keyless Edge with MP3 retry on format failures. (#1668) Thanks @steipete. https://docs.clawd.bot/tts
- Web search: add Brave freshness filter parameter for time-scoped results. (#1688) Thanks @JonUleis. https://docs.clawd.bot/tools/web
- TTS: add auto mode enum (off/always/inbound/tagged) with per-session `/tts` override. (#1667) Thanks @sebslight. https://docs.clawd.bot/tts

View File

@ -1538,6 +1538,7 @@ voice notes; other channels send MP3 audio.
},
openai: {
apiKey: "openai_api_key",
baseUrl: "https://api.openai.com/v1",
model: "gpt-4o-mini-tts",
voice: "alloy"
}
@ -1558,6 +1559,8 @@ Notes:
- `/tts limit` and `/tts summary` control per-user summarization settings.
- `apiKey` values fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`.
- `elevenlabs.baseUrl` overrides the ElevenLabs API base URL.
- `openai.baseUrl` overrides the OpenAI TTS base URL (defaults to `https://api.openai.com/v1`,
or `OPENAI_TTS_BASE_URL` when set). Custom endpoints do not require an API key.
- `elevenlabs.voiceSettings` supports `stability`/`similarityBoost`/`style` (0..1),
`useSpeakerBoost`, and `speed` (0.5..2.0).

View File

@ -33,11 +33,18 @@ does not publish limits, so assume similar or lower limits. citeturn0searc
If you want OpenAI or ElevenLabs:
- `ELEVENLABS_API_KEY` (or `XI_API_KEY`)
- `OPENAI_API_KEY`
- `OPENAI_API_KEY` (optional for custom OpenAI-compatible endpoints)
Optional:
- `OPENAI_TTS_BASE_URL` (defaults to `https://api.openai.com/v1`)
Edge TTS does **not** require an API key. If no API keys are found, Clawdbot defaults
to Edge TTS (unless disabled via `messages.tts.edge.enabled=false`).
When `OPENAI_TTS_BASE_URL` points to a non-OpenAI endpoint, Clawdbot relaxes
OpenAI model/voice validation and does **not** require an API key. If your
endpoint needs authentication, set `messages.tts.openai.apiKey` or `OPENAI_API_KEY`.
If multiple providers are configured, the selected provider is used first and the others are fallback options.
Auto-summary uses the configured `summaryModel` (or `agents.defaults.model.primary`),
so that provider must also be authenticated if you enable summaries.
@ -115,6 +122,32 @@ Full schema is in [Gateway configuration](/gateway/configuration).
}
```
### Custom OpenAI-compatible endpoint
Set an endpoint in the environment:
```bash
export OPENAI_TTS_BASE_URL="http://localhost:8880/v1"
```
Then configure TTS as usual (API key optional for custom endpoints):
```json5
{
messages: {
tts: {
auto: "always",
provider: "openai",
openai: {
baseUrl: "http://localhost:8880/v1",
model: "kokoro",
voice: "zm_yunxia"
}
}
}
}
```
### Edge TTS primary (no API key)
```json5

View File

@ -56,6 +56,7 @@ export type TtsConfig = {
/** OpenAI configuration. */
openai?: {
apiKey?: string;
baseUrl?: string;
model?: string;
voice?: string;
};

View File

@ -204,6 +204,7 @@ export const TtsConfigSchema = z
openai: z
.object({
apiKey: z.string().optional(),
baseUrl: z.string().optional(),
model: z.string().optional(),
voice: z.string().optional(),
})

View File

@ -131,7 +131,7 @@ export const ttsHandlers: GatewayRequestHandlers = {
{
id: "openai",
name: "OpenAI",
configured: Boolean(resolveTtsApiKey(config, "openai")),
configured: isTtsProviderConfigured(config, "openai"),
models: [...OPENAI_TTS_MODELS],
voices: [...OPENAI_TTS_VOICES],
},

View File

@ -106,26 +106,36 @@ describe("tts", () => {
expect(isValidOpenAIVoice("alloy ")).toBe(false);
expect(isValidOpenAIVoice(" alloy")).toBe(false);
});
it("accepts custom voices for custom endpoints", () => {
expect(isValidOpenAIVoice("zm_yunxia", { baseUrl: "http://localhost:8880/v1" })).toBe(true);
});
});
describe("isValidOpenAIModel", () => {
it("accepts gpt-4o-mini-tts model", () => {
it("accepts supported models", () => {
expect(isValidOpenAIModel("gpt-4o-mini-tts")).toBe(true);
expect(isValidOpenAIModel("tts-1")).toBe(true);
expect(isValidOpenAIModel("tts-1-hd")).toBe(true);
});
it("rejects other models", () => {
expect(isValidOpenAIModel("tts-1")).toBe(false);
expect(isValidOpenAIModel("tts-1-hd")).toBe(false);
expect(isValidOpenAIModel("invalid")).toBe(false);
expect(isValidOpenAIModel("")).toBe(false);
expect(isValidOpenAIModel("gpt-4")).toBe(false);
});
it("accepts custom models for custom endpoints", () => {
expect(isValidOpenAIModel("kokoro", { baseUrl: "http://localhost:8880/v1" })).toBe(true);
});
});
describe("OPENAI_TTS_MODELS", () => {
it("contains only gpt-4o-mini-tts", () => {
it("contains supported models", () => {
expect(OPENAI_TTS_MODELS).toContain("gpt-4o-mini-tts");
expect(OPENAI_TTS_MODELS).toHaveLength(1);
expect(OPENAI_TTS_MODELS).toContain("tts-1");
expect(OPENAI_TTS_MODELS).toContain("tts-1-hd");
expect(OPENAI_TTS_MODELS).toHaveLength(3);
});
it("is a non-empty array", () => {
@ -200,6 +210,30 @@ describe("tts", () => {
expect(result.overrides.provider).toBe("edge");
});
it("routes custom OpenAI models when custom endpoints are used", () => {
const policy = resolveModelOverridePolicy({ enabled: true });
const input = "Hello [[tts:provider=openai model=kokoro voice=zm_yunxia]] world";
const result = parseTtsDirectives(input, policy, {
defaultProvider: "openai",
openaiBaseUrl: "http://localhost:8880/v1",
});
expect(result.overrides.openai?.model).toBe("kokoro");
expect(result.overrides.openai?.voice).toBe("zm_yunxia");
});
it("routes model overrides to ElevenLabs when provider is ElevenLabs", () => {
const policy = resolveModelOverridePolicy({ enabled: true });
const input = "Hello [[tts:provider=elevenlabs model=eleven_multilingual_v2]] world";
const result = parseTtsDirectives(input, policy, {
defaultProvider: "openai",
openaiBaseUrl: "http://localhost:8880/v1",
});
expect(result.overrides.elevenlabs?.modelId).toBe("eleven_multilingual_v2");
expect(result.overrides.openai?.model).toBeUndefined();
});
it("keeps text intact when overrides are disabled", () => {
const policy = resolveModelOverridePolicy({ enabled: false });
const input = "Hello [[tts:voice=alloy]] world";
@ -357,7 +391,12 @@ describe("tts", () => {
};
const restoreEnv = (snapshot: Record<string, string | undefined>) => {
const keys = ["OPENAI_API_KEY", "ELEVENLABS_API_KEY", "XI_API_KEY"] as const;
const keys = [
"OPENAI_API_KEY",
"OPENAI_TTS_BASE_URL",
"ELEVENLABS_API_KEY",
"XI_API_KEY",
] as const;
for (const key of keys) {
const value = snapshot[key];
if (value === undefined) {
@ -371,6 +410,7 @@ describe("tts", () => {
const withEnv = (env: Record<string, string | undefined>, run: () => void) => {
const snapshot = {
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
OPENAI_TTS_BASE_URL: process.env.OPENAI_TTS_BASE_URL,
ELEVENLABS_API_KEY: process.env.ELEVENLABS_API_KEY,
XI_API_KEY: process.env.XI_API_KEY,
};
@ -392,6 +432,7 @@ describe("tts", () => {
withEnv(
{
OPENAI_API_KEY: "test-openai-key",
OPENAI_TTS_BASE_URL: undefined,
ELEVENLABS_API_KEY: undefined,
XI_API_KEY: undefined,
},
@ -407,6 +448,7 @@ describe("tts", () => {
withEnv(
{
OPENAI_API_KEY: undefined,
OPENAI_TTS_BASE_URL: undefined,
ELEVENLABS_API_KEY: "test-elevenlabs-key",
XI_API_KEY: undefined,
},
@ -422,6 +464,7 @@ describe("tts", () => {
withEnv(
{
OPENAI_API_KEY: undefined,
OPENAI_TTS_BASE_URL: undefined,
ELEVENLABS_API_KEY: undefined,
XI_API_KEY: undefined,
},
@ -432,6 +475,22 @@ describe("tts", () => {
},
);
});
it("prefers OpenAI when a custom endpoint is configured without API keys", () => {
withEnv(
{
OPENAI_API_KEY: undefined,
OPENAI_TTS_BASE_URL: "http://localhost:8880/v1",
ELEVENLABS_API_KEY: undefined,
XI_API_KEY: undefined,
},
() => {
const config = resolveTtsConfig(baseCfg);
const provider = getTtsProvider(config, "/tmp/tts-prefs-openai-custom.json");
expect(provider).toBe("openai");
},
);
});
});
describe("maybeApplyTtsToPayload", () => {
@ -495,6 +554,55 @@ describe("tts", () => {
process.env.CLAWDBOT_TTS_PREFS = prevPrefs;
});
it("allows custom OpenAI endpoints without API keys", async () => {
const prevPrefs = process.env.CLAWDBOT_TTS_PREFS;
const prevBaseUrl = process.env.OPENAI_TTS_BASE_URL;
const prevOpenAiKey = process.env.OPENAI_API_KEY;
process.env.CLAWDBOT_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`;
process.env.OPENAI_TTS_BASE_URL = "http://localhost:8880/v1";
delete process.env.OPENAI_API_KEY;
const originalFetch = globalThis.fetch;
const fetchMock = vi.fn(async () => ({
ok: true,
arrayBuffer: async () => new ArrayBuffer(1),
}));
globalThis.fetch = fetchMock as unknown as typeof fetch;
const cfg = {
agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },
messages: {
tts: {
auto: "always",
provider: "openai",
openai: { model: "kokoro", voice: "zm_yunxia" },
},
},
};
const result = await maybeApplyTtsToPayload({
payload: { text: "Hello world" },
cfg,
kind: "final",
});
expect(result.mediaUrl).toBeDefined();
expect(fetchMock).toHaveBeenCalledTimes(1);
const [url, options] = fetchMock.mock.calls[0] ?? [];
expect(url).toBe("http://localhost:8880/v1/audio/speech");
expect(
(options as { headers?: Record<string, string> })?.headers?.Authorization,
).toBeUndefined();
globalThis.fetch = originalFetch;
process.env.OPENAI_TTS_BASE_URL = prevBaseUrl;
if (prevOpenAiKey === undefined) {
delete process.env.OPENAI_API_KEY;
} else {
process.env.OPENAI_API_KEY = prevOpenAiKey;
}
process.env.CLAWDBOT_TTS_PREFS = prevPrefs;
});
it("skips auto-TTS in tagged mode unless a tts tag is present", async () => {
const prevPrefs = process.env.CLAWDBOT_TTS_PREFS;
process.env.CLAWDBOT_TTS_PREFS = `/tmp/tts-test-${Date.now()}.json`;

View File

@ -48,6 +48,7 @@ const DEFAULT_ELEVENLABS_VOICE_ID = "pMsXgVXv3BLzUgSXRplE";
const DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2";
const DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts";
const DEFAULT_OPENAI_VOICE = "alloy";
const DEFAULT_OPENAI_TTS_BASE_URL = "https://api.openai.com/v1";
const DEFAULT_EDGE_VOICE = "en-US-MichelleNeural";
const DEFAULT_EDGE_LANG = "en-US";
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
@ -103,6 +104,7 @@ export type ResolvedTtsConfig = {
};
openai: {
apiKey?: string;
baseUrl: string;
model: string;
voice: string;
};
@ -234,6 +236,9 @@ export function resolveTtsConfig(cfg: ClawdbotConfig): ResolvedTtsConfig {
const raw: TtsConfig = cfg.messages?.tts ?? {};
const providerSource = raw.provider ? "config" : "default";
const edgeOutputFormat = raw.edge?.outputFormat?.trim();
const openaiBaseUrl = normalizeOpenAiTtsBaseUrl(
raw.openai?.baseUrl?.trim() || process.env.OPENAI_TTS_BASE_URL,
);
const auto = normalizeTtsAutoMode(raw.auto) ?? (raw.enabled ? "always" : "off");
return {
auto,
@ -265,6 +270,7 @@ export function resolveTtsConfig(cfg: ClawdbotConfig): ResolvedTtsConfig {
},
openai: {
apiKey: raw.openai?.apiKey,
baseUrl: openaiBaseUrl,
model: raw.openai?.model ?? DEFAULT_OPENAI_MODEL,
voice: raw.openai?.voice ?? DEFAULT_OPENAI_VOICE,
},
@ -395,7 +401,9 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
if (prefs.tts?.provider) return prefs.tts.provider;
if (config.providerSource === "config") return config.provider;
if (resolveTtsApiKey(config, "openai")) return "openai";
if (resolveTtsApiKey(config, "openai") || isCustomOpenAiTtsEndpoint(config.openai.baseUrl)) {
return "openai";
}
if (resolveTtsApiKey(config, "elevenlabs")) return "elevenlabs";
return "edge";
}
@ -470,6 +478,12 @@ export function resolveTtsProviderOrder(primary: TtsProvider): TtsProvider[] {
export function isTtsProviderConfigured(config: ResolvedTtsConfig, provider: TtsProvider): boolean {
if (provider === "edge") return config.edge.enabled;
if (provider === "openai") {
return (
Boolean(resolveTtsApiKey(config, provider)) ||
isCustomOpenAiTtsEndpoint(config.openai.baseUrl)
);
}
return Boolean(resolveTtsApiKey(config, provider));
}
@ -483,6 +497,16 @@ function normalizeElevenLabsBaseUrl(baseUrl: string): string {
return trimmed.replace(/\/+$/, "");
}
function normalizeOpenAiTtsBaseUrl(baseUrl?: string): string {
const trimmed = baseUrl?.trim();
if (!trimmed) return DEFAULT_OPENAI_TTS_BASE_URL;
return trimmed.replace(/\/+$/, "");
}
function isCustomOpenAiTtsEndpoint(baseUrl: string): boolean {
return normalizeOpenAiTtsBaseUrl(baseUrl) !== DEFAULT_OPENAI_TTS_BASE_URL;
}
function requireInRange(value: number, min: number, max: number, label: string): void {
if (!Number.isFinite(value) || value < min || value > max) {
throw new Error(`${label} must be between ${min} and ${max}`);
@ -538,6 +562,7 @@ function parseNumberValue(value: string): number | undefined {
function parseTtsDirectives(
text: string,
policy: ResolvedTtsModelOverrides,
options: { defaultProvider?: TtsProvider; openaiBaseUrl?: string } = {},
): TtsDirectiveParseResult {
if (!policy.enabled) {
return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false };
@ -561,6 +586,30 @@ function parseTtsDirectives(
cleanedText = cleanedText.replace(directiveRegex, (_match, body: string) => {
hasDirective = true;
const tokens = body.split(/\s+/).filter(Boolean);
let providerOverride: TtsProvider | undefined;
for (const token of tokens) {
const eqIndex = token.indexOf("=");
if (eqIndex === -1) continue;
const rawKey = token.slice(0, eqIndex).trim();
const rawValue = token.slice(eqIndex + 1).trim();
if (!rawKey || !rawValue) continue;
if (rawKey.toLowerCase() !== "provider") continue;
if (!policy.allowProvider) break;
if (rawValue === "openai" || rawValue === "elevenlabs" || rawValue === "edge") {
providerOverride = rawValue;
} else {
warnings.push(`unsupported provider "${rawValue}"`);
}
break;
}
if (providerOverride) {
overrides.provider = providerOverride;
}
const providerHint = overrides.provider ?? options.defaultProvider;
const openAiContext = { baseUrl: options.openaiBaseUrl };
for (const token of tokens) {
const eqIndex = token.indexOf("=");
if (eqIndex === -1) continue;
@ -571,18 +620,12 @@ function parseTtsDirectives(
try {
switch (key) {
case "provider":
if (!policy.allowProvider) break;
if (rawValue === "openai" || rawValue === "elevenlabs" || rawValue === "edge") {
overrides.provider = rawValue;
} else {
warnings.push(`unsupported provider "${rawValue}"`);
}
break;
case "voice":
case "openai_voice":
case "openaivoice":
if (!policy.allowVoice) break;
if (isValidOpenAIVoice(rawValue)) {
if (isValidOpenAIVoice(rawValue, openAiContext)) {
overrides.openai = { ...overrides.openai, voice: rawValue };
} else {
warnings.push(`invalid OpenAI voice "${rawValue}"`);
@ -602,17 +645,40 @@ function parseTtsDirectives(
case "model":
case "modelid":
case "model_id":
case "elevenlabs_model":
case "elevenlabsmodel":
case "openai_model":
case "openaimodel":
if (!policy.allowModelId) break;
if (isValidOpenAIModel(rawValue)) {
if (key === "openai_model" || key === "openaimodel") {
if (isValidOpenAIModel(rawValue, openAiContext)) {
overrides.openai = { ...overrides.openai, model: rawValue };
} else {
warnings.push(`invalid OpenAI model "${rawValue}"`);
}
break;
}
if (providerHint === "openai") {
if (isValidOpenAIModel(rawValue, openAiContext)) {
overrides.openai = { ...overrides.openai, model: rawValue };
} else {
warnings.push(`invalid OpenAI model "${rawValue}"`);
}
break;
}
if (providerHint === "elevenlabs") {
overrides.elevenlabs = { ...overrides.elevenlabs, modelId: rawValue };
break;
}
if (isValidOpenAIModel(rawValue, openAiContext)) {
overrides.openai = { ...overrides.openai, model: rawValue };
} else {
overrides.elevenlabs = { ...overrides.elevenlabs, modelId: rawValue };
}
break;
case "elevenlabs_model":
case "elevenlabsmodel":
if (!policy.allowModelId) break;
overrides.elevenlabs = { ...overrides.elevenlabs, modelId: rawValue };
break;
case "stability":
if (!policy.allowVoiceSettings) break;
{
@ -736,7 +802,7 @@ function parseTtsDirectives(
};
}
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts"] as const;
export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as const;
export const OPENAI_TTS_VOICES = [
"alloy",
"ash",
@ -750,12 +816,15 @@ export const OPENAI_TTS_VOICES = [
] as const;
type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number];
type OpenAiTtsValidationContext = { baseUrl?: string };
function isValidOpenAIModel(model: string): boolean {
function isValidOpenAIModel(model: string, context: OpenAiTtsValidationContext = {}): boolean {
if (context.baseUrl && isCustomOpenAiTtsEndpoint(context.baseUrl)) return true;
return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]);
}
function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice {
function isValidOpenAIVoice(voice: string, context: OpenAiTtsValidationContext = {}): boolean {
if (context.baseUrl && isCustomOpenAiTtsEndpoint(context.baseUrl)) return true;
return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice);
}
@ -963,18 +1032,21 @@ async function elevenLabsTTS(params: {
async function openaiTTS(params: {
text: string;
apiKey: string;
apiKey?: string;
baseUrl: string;
model: string;
voice: string;
responseFormat: "mp3" | "opus";
timeoutMs: number;
}): Promise<Buffer> {
const { text, apiKey, model, voice, responseFormat, timeoutMs } = params;
const { text, model, voice, responseFormat, timeoutMs } = params;
const apiKey = params.apiKey?.trim();
const baseUrl = normalizeOpenAiTtsBaseUrl(params.baseUrl);
if (!isValidOpenAIModel(model)) {
if (!isValidOpenAIModel(model, { baseUrl })) {
throw new Error(`Invalid model: ${model}`);
}
if (!isValidOpenAIVoice(voice)) {
if (!isValidOpenAIVoice(voice, { baseUrl })) {
throw new Error(`Invalid voice: ${voice}`);
}
@ -982,12 +1054,15 @@ async function openaiTTS(params: {
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch("https://api.openai.com/v1/audio/speech", {
const headers: Record<string, string> = {
"Content-Type": "application/json",
};
if (apiKey) {
headers.Authorization = `Bearer ${apiKey}`;
}
const response = await fetch(`${baseUrl}/audio/speech`, {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
headers,
body: JSON.stringify({
model,
input: text,
@ -1137,13 +1212,19 @@ export async function textToSpeech(params: {
}
const apiKey = resolveTtsApiKey(config, provider);
if (!apiKey) {
const allowMissingKey =
provider === "openai" && isCustomOpenAiTtsEndpoint(config.openai.baseUrl);
if (!apiKey && !allowMissingKey) {
lastError = `No API key for ${provider}`;
continue;
}
let audioBuffer: Buffer;
if (provider === "elevenlabs") {
if (!apiKey) {
lastError = "No API key for elevenlabs";
continue;
}
const voiceIdOverride = params.overrides?.elevenlabs?.voiceId;
const modelIdOverride = params.overrides?.elevenlabs?.modelId;
const voiceSettings = {
@ -1172,6 +1253,7 @@ export async function textToSpeech(params: {
audioBuffer = await openaiTTS({
text: params.text,
apiKey,
baseUrl: config.openai.baseUrl,
model: openaiModelOverride ?? config.openai.model,
voice: openaiVoiceOverride ?? config.openai.voice,
responseFormat: output.openai,
@ -1227,8 +1309,12 @@ export async function maybeApplyTtsToPayload(params: {
});
if (autoMode === "off") return params.payload;
const defaultProvider = getTtsProvider(config, prefsPath);
const text = params.payload.text ?? "";
const directives = parseTtsDirectives(text, config.modelOverrides);
const directives = parseTtsDirectives(text, config.modelOverrides, {
defaultProvider,
openaiBaseUrl: config.openai.baseUrl,
});
if (directives.warnings.length > 0) {
logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
}