diff --git a/CHANGELOG.md b/CHANGELOG.md index 604acc8c1..539e2b92e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.clawd.bot ### Changes - TTS: add Edge TTS provider fallback, defaulting to keyless Edge with MP3 retry on format failures. (#1668) Thanks @steipete. https://docs.clawd.bot/tts +- TTS: add auto mode enum (off/always/inbound/tagged) with per-session `/tts` override. (#1667) Thanks @sebslight. https://docs.clawd.bot/tts - Docs: expand FAQ (migration, scheduling, concurrency, model recommendations, OpenAI subscription auth, Pi sizing, hackable install, docs SSL workaround). - Docs: add verbose installer troubleshooting guidance. - Docs: update Fly.io guide notes. diff --git a/src/auto-reply/reply/commands-tts.ts b/src/auto-reply/reply/commands-tts.ts index 0d963fbea..5c65fb94c 100644 --- a/src/auto-reply/reply/commands-tts.ts +++ b/src/auto-reply/reply/commands-tts.ts @@ -76,13 +76,16 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand action === "on" ? "always" : action === "off" ? "off" : action, ); if (requestedAuto) { - if (params.sessionEntry && params.sessionStore && params.sessionKey) { - params.sessionEntry.ttsAuto = requestedAuto; - params.sessionEntry.updatedAt = Date.now(); - params.sessionStore[params.sessionKey] = params.sessionEntry; + const entry = params.sessionEntry; + const sessionKey = params.sessionKey; + const store = params.sessionStore; + if (entry && store && sessionKey) { + entry.ttsAuto = requestedAuto; + entry.updatedAt = Date.now(); + store[sessionKey] = entry; if (params.storePath) { await updateSessionStore(params.storePath, (store) => { - store[params.sessionKey] = params.sessionEntry; + store[sessionKey] = entry; }); } } diff --git a/src/auto-reply/reply/session.ts b/src/auto-reply/reply/session.ts index 40133d9a2..45f37afdb 100644 --- a/src/auto-reply/reply/session.ts +++ b/src/auto-reply/reply/session.ts @@ -5,6 +5,7 @@ import path from "node:path"; import { CURRENT_SESSION_VERSION, SessionManager } from "@mariozechner/pi-coding-agent"; import { resolveSessionAgentId } from "../../agents/agent-scope.js"; import type { ClawdbotConfig } from "../../config/config.js"; +import type { TtsAutoMode } from "../../config/types.tts.js"; import { DEFAULT_RESET_TRIGGERS, deriveSessionMetaPatch, @@ -128,7 +129,7 @@ export async function initSessionState(params: { let persistedThinking: string | undefined; let persistedVerbose: string | undefined; let persistedReasoning: string | undefined; - let persistedTtsAuto: string | undefined; + let persistedTtsAuto: TtsAutoMode | undefined; let persistedModelOverride: string | undefined; let persistedProviderOverride: string | undefined; diff --git a/src/config/legacy.migrations.part-3.ts b/src/config/legacy.migrations.part-3.ts index 13c0168e7..9db9e3ede 100644 --- a/src/config/legacy.migrations.part-3.ts +++ b/src/config/legacy.migrations.part-3.ts @@ -57,7 +57,7 @@ export const LEGACY_CONFIG_MIGRATIONS_PART_3: LegacyConfigMigration[] = [ if (typeof tts.enabled !== "boolean") return; tts.auto = tts.enabled ? "always" : "off"; delete tts.enabled; - changes.push(`Moved messages.tts.enabled → messages.tts.auto (${tts.auto}).`); + changes.push(`Moved messages.tts.enabled → messages.tts.auto (${String(tts.auto)}).`); }, }, { diff --git a/src/discord/monitor/message-handler.process.ts b/src/discord/monitor/message-handler.process.ts index 2cff0da44..c5b632c16 100644 --- a/src/discord/monitor/message-handler.process.ts +++ b/src/discord/monitor/message-handler.process.ts @@ -134,8 +134,9 @@ export async function processDiscordMessage(ctx: DiscordMessagePreflightContext) threadParentType === ChannelType.GuildForum || threadParentType === ChannelType.GuildMedia; const forumParentSlug = isForumParent && threadParentName ? normalizeDiscordSlug(threadParentName) : ""; + const threadChannelId = threadChannel?.id; const isForumStarter = - Boolean(threadChannel && isForumParent && forumParentSlug) && message.id === threadChannel.id; + Boolean(threadChannelId && isForumParent && forumParentSlug) && message.id === threadChannelId; const forumContextLine = isForumStarter ? `[Forum parent: #${forumParentSlug}]` : null; const groupChannel = isGuildMessage && displayChannelSlug ? `#${displayChannelSlug}` : undefined; const groupSubject = isDirectMessage ? undefined : groupChannel; diff --git a/src/tts/tts.ts b/src/tts/tts.ts index e084871ea..5fa06f8d4 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -306,7 +306,7 @@ function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefin export function resolveTtsAutoMode(params: { config: ResolvedTtsConfig; prefsPath: string; - sessionAuto?: TtsAutoMode | string; + sessionAuto?: string; }): TtsAutoMode { const sessionAuto = normalizeTtsAutoMode(params.sessionAuto); if (sessionAuto) return sessionAuto; @@ -372,7 +372,7 @@ function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void): export function isTtsEnabled( config: ResolvedTtsConfig, prefsPath: string, - sessionAuto?: TtsAutoMode | string, + sessionAuto?: string, ): boolean { return resolveTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off"; } @@ -1216,7 +1216,7 @@ export async function maybeApplyTtsToPayload(params: { channel?: string; kind?: "tool" | "block" | "final"; inboundAudio?: boolean; - ttsAuto?: TtsAutoMode | string; + ttsAuto?: string; }): Promise { const config = resolveTtsConfig(params.cfg); const prefsPath = resolveTtsPrefsPath(config); diff --git a/src/types/node-edge-tts.d.ts b/src/types/node-edge-tts.d.ts new file mode 100644 index 000000000..eaaaa9cdf --- /dev/null +++ b/src/types/node-edge-tts.d.ts @@ -0,0 +1,18 @@ +declare module "node-edge-tts" { + export type EdgeTTSOptions = { + voice?: string; + lang?: string; + outputFormat?: string; + saveSubtitles?: boolean; + proxy?: string; + rate?: string; + pitch?: string; + volume?: string; + timeout?: number; + }; + + export class EdgeTTS { + constructor(options?: EdgeTTSOptions); + ttsPromise(text: string, outputPath: string): Promise; + } +} diff --git a/src/web/inbound.media.test.ts b/src/web/inbound.media.test.ts index fcd53a68b..de23f10a9 100644 --- a/src/web/inbound.media.test.ts +++ b/src/web/inbound.media.test.ts @@ -127,9 +127,9 @@ describe("web inbound media saves with extension", () => { realSock.ev.emit("messages.upsert", upsert); // Allow a brief window for the async handler to fire on slower hosts. - for (let i = 0; i < 10; i++) { + for (let i = 0; i < 50; i++) { if (onMessage.mock.calls.length > 0) break; - await new Promise((resolve) => setTimeout(resolve, 5)); + await new Promise((resolve) => setTimeout(resolve, 10)); } expect(onMessage).toHaveBeenCalledTimes(1); @@ -178,9 +178,9 @@ describe("web inbound media saves with extension", () => { realSock.ev.emit("messages.upsert", upsert); - for (let i = 0; i < 10; i++) { + for (let i = 0; i < 50; i++) { if (onMessage.mock.calls.length > 0) break; - await new Promise((resolve) => setTimeout(resolve, 5)); + await new Promise((resolve) => setTimeout(resolve, 10)); } expect(onMessage).toHaveBeenCalledTimes(1); @@ -218,9 +218,9 @@ describe("web inbound media saves with extension", () => { realSock.ev.emit("messages.upsert", upsert); - for (let i = 0; i < 10; i++) { + for (let i = 0; i < 50; i++) { if (onMessage.mock.calls.length > 0) break; - await new Promise((resolve) => setTimeout(resolve, 5)); + await new Promise((resolve) => setTimeout(resolve, 10)); } expect(onMessage).toHaveBeenCalledTimes(1);