From 65c3718c96de3c3b65b6e1d13645e71303569e1d Mon Sep 17 00:00:00 2001 From: Muhammed Mukhthar CM Date: Tue, 27 Jan 2026 10:11:04 +0000 Subject: [PATCH] telegram-user: support voice-note media --- .../telegram-user/src/monitor/handler.ts | 1 + extensions/telegram-user/src/send.test.ts | 110 ++++++++++++++++++ extensions/telegram-user/src/send.ts | 49 +++++++- 3 files changed, 155 insertions(+), 5 deletions(-) create mode 100644 extensions/telegram-user/src/send.test.ts diff --git a/extensions/telegram-user/src/monitor/handler.ts b/extensions/telegram-user/src/monitor/handler.ts index 748bba880..acbb2d7fd 100644 --- a/extensions/telegram-user/src/monitor/handler.ts +++ b/extensions/telegram-user/src/monitor/handler.ts @@ -663,6 +663,7 @@ export function createTelegramUserMessageHandler(params: TelegramUserHandlerPara replyToId, threadId, mediaUrl, + audioAsVoice: payload.audioAsVoice === true, maxBytes: mediaMaxMb * 1024 * 1024, }); } catch (err) { diff --git a/extensions/telegram-user/src/send.test.ts b/extensions/telegram-user/src/send.test.ts new file mode 100644 index 000000000..ea47c58e0 --- /dev/null +++ b/extensions/telegram-user/src/send.test.ts @@ -0,0 +1,110 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const loadWebMedia = vi.fn(); + +vi.mock("./runtime.js", () => { + return { + getTelegramUserRuntime: () => ({ + config: { loadConfig: () => ({}) }, + media: { + loadWebMedia: (...args: unknown[]) => loadWebMedia(...args), + }, + }), + }; +}); + +const inputMediaAuto = vi.fn((file: unknown, params: unknown) => ({ + type: "auto", + file, + ...(params && typeof params === "object" ? params : {}), +})); +const inputMediaVoice = vi.fn((file: unknown, params: unknown) => ({ + type: "voice", + file, + ...(params && typeof params === "object" ? params : {}), +})); + +vi.mock("@mtcute/core", () => { + return { + InputMedia: { + auto: (...args: unknown[]) => inputMediaAuto(...args), + voice: (...args: unknown[]) => inputMediaVoice(...args), + poll: () => ({ type: "poll" }), + }, + }; +}); + +describe("telegram-user send", () => { + beforeEach(() => { + loadWebMedia.mockReset(); + inputMediaAuto.mockClear(); + inputMediaVoice.mockClear(); + }); + + it("sends audio media as voice note when audioAsVoice is set", async () => { + loadWebMedia.mockResolvedValue({ + buffer: Buffer.from("voice"), + contentType: "audio/ogg", + fileName: "note.ogg", + }); + + const sendMedia = vi.fn(async () => ({ id: 123 })); + const { sendMediaTelegramUser } = await import("./send.js"); + await sendMediaTelegramUser("telegram-user:123", "hi", { + client: { sendMedia } as unknown as import("@mtcute/node").TelegramClient, + mediaUrl: "https://example.com/note.ogg", + audioAsVoice: true, + }); + + expect(inputMediaVoice).toHaveBeenCalledTimes(1); + expect(sendMedia).toHaveBeenCalledTimes(1); + const [, media] = sendMedia.mock.calls[0] ?? []; + expect(media).toMatchObject({ type: "voice" }); + }); + + it("falls back to normal media when audioAsVoice is set but media is not voice-compatible", async () => { + loadWebMedia.mockResolvedValue({ + buffer: Buffer.from("img"), + contentType: "image/png", + fileName: "image.png", + }); + + const sendMedia = vi.fn(async () => ({ id: 123 })); + const { sendMediaTelegramUser } = await import("./send.js"); + await sendMediaTelegramUser("telegram-user:123", "hi", { + client: { sendMedia } as unknown as import("@mtcute/node").TelegramClient, + mediaUrl: "https://example.com/image.png", + audioAsVoice: true, + }); + + expect(inputMediaVoice).toHaveBeenCalledTimes(0); + expect(inputMediaAuto).toHaveBeenCalledTimes(1); + }); + + it("falls back to auto when voice messages are forbidden", async () => { + loadWebMedia.mockResolvedValue({ + buffer: Buffer.from("voice"), + contentType: "audio/ogg", + fileName: "note.ogg", + }); + + const sendMedia = vi.fn(async (_to: unknown, media: unknown) => { + if (media && typeof media === "object" && (media as { type?: string }).type === "voice") { + throw new Error("VOICE_MESSAGES_FORBIDDEN"); + } + return { id: 123 }; + }); + + const { sendMediaTelegramUser } = await import("./send.js"); + await sendMediaTelegramUser("telegram-user:123", "hi", { + client: { sendMedia } as unknown as import("@mtcute/node").TelegramClient, + mediaUrl: "https://example.com/note.ogg", + audioAsVoice: true, + }); + + expect(inputMediaVoice).toHaveBeenCalledTimes(1); + expect(inputMediaAuto).toHaveBeenCalledTimes(1); + expect(sendMedia).toHaveBeenCalledTimes(2); + }); +}); + diff --git a/extensions/telegram-user/src/send.ts b/extensions/telegram-user/src/send.ts index 2afbd4645..5d55b0354 100644 --- a/extensions/telegram-user/src/send.ts +++ b/extensions/telegram-user/src/send.ts @@ -39,6 +39,7 @@ export type TelegramUserSendOpts = { replyToId?: number; threadId?: string | number | null; mediaUrl?: string; + audioAsVoice?: boolean; }; function normalizeTarget(raw: string): string { @@ -69,6 +70,24 @@ function resolveTargetAndThread(raw: string, threadId?: string | number | null) return { target, threadId: parsedThreadId }; } +function isVoiceMessagesForbidden(err: unknown): boolean { + const message = err instanceof Error ? err.message : String(err); + return /VOICE_MESSAGES_FORBIDDEN/i.test(message); +} + +function shouldSendAsVoice(params: { + wantsVoice: boolean; + contentType?: string | null; + fileName?: string | null; +}): boolean { + if (!params.wantsVoice) return false; + const contentType = params.contentType?.toLowerCase() ?? ""; + const fileName = params.fileName?.toLowerCase() ?? ""; + if (/(^|\/)(ogg|opus)(;|$)/.test(contentType)) return true; + if (/\.(ogg|opus|oga)$/.test(fileName)) return true; + return false; +} + export function normalizeTelegramUserMessagingTarget(raw: string): string { return normalizeTarget(raw); } @@ -192,11 +211,24 @@ export async function sendMediaTelegramUser( const resolved = resolveTargetAndThread(to, opts.threadId); const target = resolveTelegramUserPeer(resolved.target); const media = await getTelegramUserRuntime().media.loadWebMedia(opts.mediaUrl, opts.maxBytes); - const input = InputMedia.auto(media.buffer, { - fileName: media.fileName ?? undefined, - fileMime: media.contentType, - caption: text, + const wantsVoice = shouldSendAsVoice({ + wantsVoice: opts.audioAsVoice === true, + contentType: media.contentType, + fileName: media.fileName, }); + const buildAuto = () => + InputMedia.auto(media.buffer, { + fileName: media.fileName ?? undefined, + fileMime: media.contentType, + caption: text, + }); + const buildVoice = () => + InputMedia.voice(media.buffer, { + fileName: media.fileName ?? undefined, + fileMime: media.contentType, + caption: text, + }); + const input = wantsVoice ? buildVoice() : buildAuto(); let message: Awaited> | null = null; try { message = await client.sendMedia(target, input, { @@ -204,7 +236,14 @@ export async function sendMediaTelegramUser( ...(resolved.threadId ? { threadId: resolved.threadId } : {}), }); } catch (err) { - if (!isDestroyedClientError(err)) throw err; + if (wantsVoice && isVoiceMessagesForbidden(err)) { + message = await client.sendMedia(target, buildAuto(), { + ...(opts.replyToId ? { replyTo: opts.replyToId } : {}), + ...(resolved.threadId ? { threadId: resolved.threadId } : {}), + }); + } else if (!isDestroyedClientError(err)) { + throw err; + } } if (!message) { return { messageId: "", chatId: String(target) };