diff --git a/CHANGELOG.md b/CHANGELOG.md index ff9b73b9a..130b363d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - Google: merge consecutive messages to satisfy strict role alternation for Google provider models. Thanks @Asleep123 for PR #266. - Postinstall: handle targetDir symlinks in the install script. Thanks @obviyus for PR #272. - WhatsApp/Telegram: add groupPolicy handling for group messages and normalize allowFrom matching (tg/telegram prefixes). Thanks @mneves75. +- Telegram: warn users when inbound media exceeds the 5MB limit. Thanks @jarvis-medmatic. - Auto-reply: add configurable ack reactions for inbound messages (default 👀 or `identity.emoji`) with scope controls. Thanks @obviyus for PR #178. - Polls: unify WhatsApp + Discord poll sends via the gateway + CLI (`clawdbot poll`). (#123) — thanks @dbhurley - Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step. diff --git a/apps/macos/Sources/ClawdbotProtocol/GatewayModels.swift b/apps/macos/Sources/ClawdbotProtocol/GatewayModels.swift index 85ee13fdb..77d74cf85 100644 --- a/apps/macos/Sources/ClawdbotProtocol/GatewayModels.swift +++ b/apps/macos/Sources/ClawdbotProtocol/GatewayModels.swift @@ -655,23 +655,27 @@ public struct SessionsListParams: Codable, Sendable { public let activeminutes: Int? public let includeglobal: Bool? public let includeunknown: Bool? + public let spawnedby: String? public init( limit: Int?, activeminutes: Int?, includeglobal: Bool?, - includeunknown: Bool? + includeunknown: Bool?, + spawnedby: String? ) { self.limit = limit self.activeminutes = activeminutes self.includeglobal = includeglobal self.includeunknown = includeunknown + self.spawnedby = spawnedby } private enum CodingKeys: String, CodingKey { case limit case activeminutes = "activeMinutes" case includeglobal = "includeGlobal" case includeunknown = "includeUnknown" + case spawnedby = "spawnedBy" } } @@ -681,6 +685,7 @@ public struct SessionsPatchParams: Codable, Sendable { public let verboselevel: AnyCodable? public let elevatedlevel: AnyCodable? public let model: AnyCodable? + public let spawnedby: AnyCodable? public let sendpolicy: AnyCodable? public let groupactivation: AnyCodable? @@ -690,6 +695,7 @@ public struct SessionsPatchParams: Codable, Sendable { verboselevel: AnyCodable?, elevatedlevel: AnyCodable?, model: AnyCodable?, + spawnedby: AnyCodable?, sendpolicy: AnyCodable?, groupactivation: AnyCodable? ) { @@ -698,6 +704,7 @@ public struct SessionsPatchParams: Codable, Sendable { self.verboselevel = verboselevel self.elevatedlevel = elevatedlevel self.model = model + self.spawnedby = spawnedby self.sendpolicy = sendpolicy self.groupactivation = groupactivation } @@ -707,6 +714,7 @@ public struct SessionsPatchParams: Codable, Sendable { case verboselevel = "verboseLevel" case elevatedlevel = "elevatedLevel" case model + case spawnedby = "spawnedBy" case sendpolicy = "sendPolicy" case groupactivation = "groupActivation" } diff --git a/docs/configuration.md b/docs/configuration.md index d5a3a909c..febcbd17e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -340,7 +340,6 @@ Set `telegram.enabled: false` to disable automatic startup. botToken: "your-bot-token", requireMention: true, allowFrom: ["123456789"], - mediaMaxMb: 5, proxy: "socks5://localhost:9050", webhookUrl: "https://example.com/telegram-webhook", webhookSecret: "secret", @@ -615,7 +614,6 @@ If you configure the same alias name (case-insensitive) yourself, your value win verboseDefault: "off", elevatedDefault: "on", timeoutSeconds: 600, - mediaMaxMb: 5, heartbeat: { every: "30m", target: "last" diff --git a/docs/grammy.md b/docs/grammy.md index 7e0c3366a..864053e46 100644 --- a/docs/grammy.md +++ b/docs/grammy.md @@ -18,7 +18,7 @@ Updated: 2025-12-07 - **Proxy:** optional `telegram.proxy` uses `undici.ProxyAgent` through grammY’s `client.baseFetch`. - **Webhook support:** `webhook-set.ts` wraps `setWebhook/deleteWebhook`; `webhook.ts` hosts the callback with health + graceful shutdown. Gateway enables webhook mode when `telegram.webhookUrl` is set (otherwise it long-polls). - **Sessions:** direct chats map to `main`; groups map to `telegram:group:`; replies route back to the same surface. -- **Config knobs:** `telegram.botToken`, `telegram.groups` (allowlist + mention defaults), `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`. +- **Config knobs:** `telegram.botToken`, `telegram.groups` (allowlist + mention defaults), `telegram.allowFrom`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`. - **Tests:** grammy mocks cover DM + group mention gating and outbound send; more media/webhook fixtures still welcome. Open questions diff --git a/docs/telegram.md b/docs/telegram.md index df53c8e5e..d3027010c 100644 --- a/docs/telegram.md +++ b/docs/telegram.md @@ -33,13 +33,14 @@ Status: ready for bot-mode use with grammY (long-polling by default; webhook sup - Sees only messages sent after it’s added to a chat; no pre-history access. - Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster. - File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types). +- Inbound media saved to disk is capped at 5MB (hard limit). - Typing indicators (`sendChatAction`) supported; native replies are **off by default** and enabled via `telegram.replyToMode` + reply tags. ## Planned implementation details - Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits. - Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block (includes `id:` when available); groups require @bot mention or a `routing.groupChat.mentionPatterns` match by default (override per chat in config). - Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort. -- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups` (group allowlist + mention defaults), `telegram.allowFrom`, `telegram.groupAllowFrom`, `telegram.groupPolicy`, `telegram.mediaMaxMb`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported. +- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups` (group allowlist + mention defaults), `telegram.allowFrom`, `telegram.groupAllowFrom`, `telegram.groupPolicy`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported. - Ack reactions are controlled globally via `messages.ackReaction` + `messages.ackReactionScope`. - Mention gating precedence (most specific wins): `telegram.groups..requireMention` → `telegram.groups."*".requireMention` → default `true`. @@ -57,7 +58,6 @@ Example config: allowFrom: ["123456789"], // direct chat ids allowed (or "*") groupPolicy: "allowlist", groupAllowFrom: ["tg:123456789", "@alice"], - mediaMaxMb: 5, proxy: "socks5://localhost:9050", webhookSecret: "mysecret", webhookPath: "/telegram-webhook", diff --git a/src/config/types.ts b/src/config/types.ts index 7d7bb92b5..426439886 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -247,7 +247,6 @@ export type TelegramConfig = { groupPolicy?: GroupPolicy; /** Outbound text chunk size (chars). Default: 4000. */ textChunkLimit?: number; - mediaMaxMb?: number; proxy?: string; webhookUrl?: string; webhookSecret?: string; diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index 6039afb70..f1daf78bb 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -516,7 +516,6 @@ export const ClawdbotSchema = z.object({ }) .optional(), timeoutSeconds: z.number().int().positive().optional(), - mediaMaxMb: z.number().positive().optional(), typingIntervalSeconds: z.number().int().positive().optional(), heartbeat: HeartbeatSchema, maxConcurrent: z.number().int().positive().optional(), @@ -728,7 +727,6 @@ export const ClawdbotSchema = z.object({ groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(), groupPolicy: GroupPolicySchema.optional().default("open"), textChunkLimit: z.number().int().positive().optional(), - mediaMaxMb: z.number().positive().optional(), proxy: z.string().optional(), webhookUrl: z.string().optional(), webhookSecret: z.string().optional(), @@ -749,7 +747,6 @@ export const ClawdbotSchema = z.object({ ephemeral: z.boolean().optional(), }) .optional(), - mediaMaxMb: z.number().positive().optional(), historyLimit: z.number().int().min(0).optional(), actions: z .object({ @@ -814,7 +811,6 @@ export const ClawdbotSchema = z.object({ appToken: z.string().optional(), groupPolicy: GroupPolicySchema.optional().default("open"), textChunkLimit: z.number().int().positive().optional(), - mediaMaxMb: z.number().positive().optional(), reactionNotifications: z .enum(["off", "own", "all", "allowlist"]) .optional(), @@ -879,7 +875,6 @@ export const ClawdbotSchema = z.object({ groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(), groupPolicy: GroupPolicySchema.optional().default("open"), textChunkLimit: z.number().int().positive().optional(), - mediaMaxMb: z.number().positive().optional(), }) .optional(), imessage: z @@ -895,7 +890,6 @@ export const ClawdbotSchema = z.object({ groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(), groupPolicy: GroupPolicySchema.optional().default("open"), includeAttachments: z.boolean().optional(), - mediaMaxMb: z.number().positive().optional(), textChunkLimit: z.number().int().positive().optional(), groups: z .record( diff --git a/src/media/store.test.ts b/src/media/store.test.ts index 20448cfe7..d7da309e3 100644 --- a/src/media/store.test.ts +++ b/src/media/store.test.ts @@ -52,6 +52,9 @@ describe("media store", () => { await expect(store.saveMediaBuffer(huge)).rejects.toThrow( "Media exceeds 5MB limit", ); + await expect(store.saveMediaBuffer(huge)).rejects.toBeInstanceOf( + store.MediaTooLargeError, + ); }); it("copies local files and cleans old media", async () => { diff --git a/src/media/store.ts b/src/media/store.ts index 4bd99cca7..a23dabe1c 100644 --- a/src/media/store.ts +++ b/src/media/store.ts @@ -9,6 +9,18 @@ import { detectMime, extensionForMime } from "./mime.js"; const MEDIA_DIR = path.join(CONFIG_DIR, "media"); const MAX_BYTES = 5 * 1024 * 1024; // 5MB default + +export class MediaTooLargeError extends Error { + maxBytes: number; + + constructor(maxBytes: number) { + const limitMb = (maxBytes / (1024 * 1024)).toFixed(0); + super(`Media exceeds ${limitMb}MB limit`); + this.name = "MediaTooLargeError"; + this.maxBytes = maxBytes; + } +} + const DEFAULT_TTL_MS = 2 * 60 * 1000; // 2 minutes export function getMediaDir() { @@ -144,7 +156,7 @@ export async function saveMediaSource( throw new Error("Media path is not a file"); } if (stat.size > MAX_BYTES) { - throw new Error("Media exceeds 5MB limit"); + throw new MediaTooLargeError(MAX_BYTES); } const buffer = await fs.readFile(source); const mime = await detectMime({ buffer, filePath: source }); @@ -162,9 +174,7 @@ export async function saveMediaBuffer( maxBytes = MAX_BYTES, ): Promise { if (buffer.byteLength > maxBytes) { - throw new Error( - `Media exceeds ${(maxBytes / (1024 * 1024)).toFixed(0)}MB limit`, - ); + throw new MediaTooLargeError(maxBytes); } const dir = path.join(MEDIA_DIR, subdir); await fs.mkdir(dir, { recursive: true }); diff --git a/src/telegram/bot.media.test.ts b/src/telegram/bot.media.test.ts index 09ca06c20..64b4f248f 100644 --- a/src/telegram/bot.media.test.ts +++ b/src/telegram/bot.media.test.ts @@ -4,15 +4,18 @@ const useSpy = vi.fn(); const onSpy = vi.fn(); const stopSpy = vi.fn(); const sendChatActionSpy = vi.fn(); +const sendMessageSpy = vi.fn().mockResolvedValue({}); type ApiStub = { config: { use: (arg: unknown) => void }; sendChatAction: typeof sendChatActionSpy; + sendMessage: typeof sendMessageSpy; }; const apiStub: ApiStub = { config: { use: useSpy }, sendChatAction: sendChatActionSpy, + sendMessage: sendMessageSpy, }; vi.mock("grammy", () => ({ @@ -31,6 +34,16 @@ vi.mock("@grammyjs/transformer-throttler", () => ({ apiThrottler: () => throttlerSpy(), })); +const saveMediaBufferSpy = vi.fn(); +vi.mock("../media/store.js", async (importOriginal) => { + const actual = await importOriginal(); + saveMediaBufferSpy.mockImplementation(actual.saveMediaBuffer); + return { + ...actual, + saveMediaBuffer: saveMediaBufferSpy, + }; +}); + vi.mock("../config/config.js", async (importOriginal) => { const actual = await importOriginal(); return { @@ -108,6 +121,71 @@ describe("telegram inbound media", () => { fetchSpy.mockRestore(); }); + it("notifies when media exceeds size limit", async () => { + const { createTelegramBot } = await import("./bot.js"); + const replyModule = await import("../auto-reply/reply.js"); + const storeModule = await import("../media/store.js"); + const replySpy = replyModule.__replySpy as unknown as ReturnType< + typeof vi.fn + >; + + onSpy.mockReset(); + replySpy.mockReset(); + sendChatActionSpy.mockReset(); + sendMessageSpy.mockClear(); + saveMediaBufferSpy.mockClear(); + + saveMediaBufferSpy.mockRejectedValueOnce( + new storeModule.MediaTooLargeError(5 * 1024 * 1024), + ); + + const runtimeError = vi.fn(); + createTelegramBot({ + token: "tok", + runtime: { + log: vi.fn(), + error: runtimeError, + exit: () => { + throw new Error("exit"); + }, + }, + }); + const handler = onSpy.mock.calls[0]?.[1] as ( + ctx: Record, + ) => Promise; + + const fetchSpy = vi + .spyOn(globalThis, "fetch" as never) + .mockResolvedValueOnce({ + ok: true, + status: 200, + statusText: "OK", + headers: { get: () => "image/jpeg" }, + arrayBuffer: async () => + new Uint8Array([0xff, 0xd8, 0xff, 0x00]).buffer, + } as Response); + + await handler({ + message: { + message_id: 4, + chat: { id: 1234, type: "private" }, + photo: [{ file_id: "fid" }], + }, + me: { username: "clawdbot_bot" }, + getFile: async () => ({ file_path: "photos/too-big.jpg" }), + }); + + expect(sendMessageSpy).toHaveBeenCalledWith( + 1234, + "⚠️ File too large. Maximum size is 5MB.", + { reply_to_message_id: 4 }, + ); + expect(replySpy).not.toHaveBeenCalled(); + expect(runtimeError).not.toHaveBeenCalled(); + + fetchSpy.mockRestore(); + }); + it("prefers proxyFetch over global fetch", async () => { const { createTelegramBot } = await import("./bot.js"); diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index f6b7317f5..7d04eacc7 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -27,7 +27,7 @@ import { formatErrorMessage } from "../infra/errors.js"; import { getChildLogger } from "../logging.js"; import { mediaKindFromMime } from "../media/constants.js"; import { detectMime, isGifMedia } from "../media/mime.js"; -import { saveMediaBuffer } from "../media/store.js"; +import { MediaTooLargeError, saveMediaBuffer } from "../media/store.js"; import { formatLocationText, type NormalizedLocation, @@ -43,6 +43,8 @@ const PARSE_ERR_RE = // with a shared media_group_id. We buffer them and process as a single message after a short delay. const MEDIA_GROUP_TIMEOUT_MS = 500; +const TELEGRAM_MEDIA_MAX_BYTES = 5 * 1024 * 1024; + type TelegramMessage = Message.CommonMessage; type MediaGroupEntry = { @@ -87,7 +89,6 @@ export type TelegramBotOptions = { requireMention?: boolean; allowFrom?: Array; groupAllowFrom?: Array; - mediaMaxMb?: number; replyToMode?: ReplyToMode; proxyFetch?: typeof fetch; }; @@ -155,8 +156,7 @@ export function createTelegramBot(opts: TelegramBotOptions) { const replyToMode = opts.replyToMode ?? cfg.telegram?.replyToMode ?? "off"; const ackReaction = (cfg.messages?.ackReaction ?? "").trim(); const ackReactionScope = cfg.messages?.ackReactionScope ?? "group-mentions"; - const mediaMaxBytes = - (opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024; + const mediaMaxBytes = TELEGRAM_MEDIA_MAX_BYTES; const logger = getChildLogger({ module: "telegram-auto-reply" }); const mentionRegexes = buildMentionRegexes(cfg); const resolveGroupPolicy = (chatId: string | number) => @@ -174,6 +174,22 @@ export function createTelegramBot(opts: TelegramBotOptions) { overrideOrder: "after-config", }); + const notifyMediaTooLarge = async ( + chatId: string | number, + messageId: number | undefined, + err: MediaTooLargeError, + ) => { + const limitMb = Math.max(1, Math.floor(err.maxBytes / (1024 * 1024))); + await bot.api + .sendMessage( + chatId, + `⚠️ File too large. Maximum size is ${limitMb}MB.`, + messageId ? { reply_to_message_id: messageId } : undefined, + ) + .catch(() => {}); + logger.warn({ chatId, error: err.message }, "media exceeds size limit"); + }; + const processMessage = async ( primaryCtx: TelegramContext, allMedia: Array<{ path: string; contentType?: string }>, @@ -492,17 +508,8 @@ export function createTelegramBot(opts: TelegramBotOptions) { opts.proxyFetch, ); } catch (mediaErr) { - const errMsg = String(mediaErr); - if (errMsg.includes("exceeds") && errMsg.includes("MB limit")) { - const limitMb = Math.round(mediaMaxBytes / (1024 * 1024)); - await bot.api - .sendMessage( - chatId, - `⚠️ File too large. Maximum size is ${limitMb}MB.`, - { reply_to_message_id: msg.message_id }, - ) - .catch(() => {}); - logger.warn({ chatId, error: errMsg }, "media exceeds size limit"); + if (mediaErr instanceof MediaTooLargeError) { + await notifyMediaTooLarge(chatId, msg.message_id, mediaErr); return; } throw mediaErr; @@ -527,12 +534,25 @@ export function createTelegramBot(opts: TelegramBotOptions) { const allMedia: Array<{ path: string; contentType?: string }> = []; for (const { ctx } of entry.messages) { - const media = await resolveMedia( - ctx, - mediaMaxBytes, - opts.token, - opts.proxyFetch, - ); + let media: Awaited> = null; + try { + media = await resolveMedia( + ctx, + mediaMaxBytes, + opts.token, + opts.proxyFetch, + ); + } catch (mediaErr) { + if (mediaErr instanceof MediaTooLargeError) { + await notifyMediaTooLarge( + ctx.message.chat.id, + ctx.message.message_id, + mediaErr, + ); + return; + } + throw mediaErr; + } if (media) { allMedia.push({ path: media.path, contentType: media.contentType }); }