fix(telegram): warn on oversized media

This commit is contained in:
Peter Steinberger 2026-01-06 16:16:14 +01:00
parent aae5926db9
commit 9254f4b738
11 changed files with 149 additions and 38 deletions

View File

@ -24,6 +24,7 @@
- Google: merge consecutive messages to satisfy strict role alternation for Google provider models. Thanks @Asleep123 for PR #266.
- Postinstall: handle targetDir symlinks in the install script. Thanks @obviyus for PR #272.
- WhatsApp/Telegram: add groupPolicy handling for group messages and normalize allowFrom matching (tg/telegram prefixes). Thanks @mneves75.
- Telegram: warn users when inbound media exceeds the 5MB limit. Thanks @jarvis-medmatic.
- Auto-reply: add configurable ack reactions for inbound messages (default 👀 or `identity.emoji`) with scope controls. Thanks @obviyus for PR #178.
- Polls: unify WhatsApp + Discord poll sends via the gateway + CLI (`clawdbot poll`). (#123) — thanks @dbhurley
- Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step.

View File

@ -655,23 +655,27 @@ public struct SessionsListParams: Codable, Sendable {
public let activeminutes: Int?
public let includeglobal: Bool?
public let includeunknown: Bool?
public let spawnedby: String?
public init(
limit: Int?,
activeminutes: Int?,
includeglobal: Bool?,
includeunknown: Bool?
includeunknown: Bool?,
spawnedby: String?
) {
self.limit = limit
self.activeminutes = activeminutes
self.includeglobal = includeglobal
self.includeunknown = includeunknown
self.spawnedby = spawnedby
}
private enum CodingKeys: String, CodingKey {
case limit
case activeminutes = "activeMinutes"
case includeglobal = "includeGlobal"
case includeunknown = "includeUnknown"
case spawnedby = "spawnedBy"
}
}
@ -681,6 +685,7 @@ public struct SessionsPatchParams: Codable, Sendable {
public let verboselevel: AnyCodable?
public let elevatedlevel: AnyCodable?
public let model: AnyCodable?
public let spawnedby: AnyCodable?
public let sendpolicy: AnyCodable?
public let groupactivation: AnyCodable?
@ -690,6 +695,7 @@ public struct SessionsPatchParams: Codable, Sendable {
verboselevel: AnyCodable?,
elevatedlevel: AnyCodable?,
model: AnyCodable?,
spawnedby: AnyCodable?,
sendpolicy: AnyCodable?,
groupactivation: AnyCodable?
) {
@ -698,6 +704,7 @@ public struct SessionsPatchParams: Codable, Sendable {
self.verboselevel = verboselevel
self.elevatedlevel = elevatedlevel
self.model = model
self.spawnedby = spawnedby
self.sendpolicy = sendpolicy
self.groupactivation = groupactivation
}
@ -707,6 +714,7 @@ public struct SessionsPatchParams: Codable, Sendable {
case verboselevel = "verboseLevel"
case elevatedlevel = "elevatedLevel"
case model
case spawnedby = "spawnedBy"
case sendpolicy = "sendPolicy"
case groupactivation = "groupActivation"
}

View File

@ -340,7 +340,6 @@ Set `telegram.enabled: false` to disable automatic startup.
botToken: "your-bot-token",
requireMention: true,
allowFrom: ["123456789"],
mediaMaxMb: 5,
proxy: "socks5://localhost:9050",
webhookUrl: "https://example.com/telegram-webhook",
webhookSecret: "secret",
@ -615,7 +614,6 @@ If you configure the same alias name (case-insensitive) yourself, your value win
verboseDefault: "off",
elevatedDefault: "on",
timeoutSeconds: 600,
mediaMaxMb: 5,
heartbeat: {
every: "30m",
target: "last"

View File

@ -18,7 +18,7 @@ Updated: 2025-12-07
- **Proxy:** optional `telegram.proxy` uses `undici.ProxyAgent` through grammYs `client.baseFetch`.
- **Webhook support:** `webhook-set.ts` wraps `setWebhook/deleteWebhook`; `webhook.ts` hosts the callback with health + graceful shutdown. Gateway enables webhook mode when `telegram.webhookUrl` is set (otherwise it long-polls).
- **Sessions:** direct chats map to `main`; groups map to `telegram:group:<chatId>`; replies route back to the same surface.
- **Config knobs:** `telegram.botToken`, `telegram.groups` (allowlist + mention defaults), `telegram.allowFrom`, `telegram.mediaMaxMb`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`.
- **Config knobs:** `telegram.botToken`, `telegram.groups` (allowlist + mention defaults), `telegram.allowFrom`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`.
- **Tests:** grammy mocks cover DM + group mention gating and outbound send; more media/webhook fixtures still welcome.
Open questions

View File

@ -33,13 +33,14 @@ Status: ready for bot-mode use with grammY (long-polling by default; webhook sup
- Sees only messages sent after its added to a chat; no pre-history access.
- Cannot DM users first; they must initiate. Channels are receive-only unless the bot is an admin poster.
- File size caps follow Telegram Bot API (up to 2 GB for documents; smaller for some media types).
- Inbound media saved to disk is capped at 5MB (hard limit).
- Typing indicators (`sendChatAction`) supported; native replies are **off by default** and enabled via `telegram.replyToMode` + reply tags.
## Planned implementation details
- Library: grammY is the only client for send + gateway (fetch fallback removed); grammY throttler is enabled by default to stay under Bot API limits.
- Inbound normalization: maps Bot API updates to `MsgContext` with `Surface: "telegram"`, `ChatType: direct|group`, `SenderName`, `MediaPath`/`MediaType` when attachments arrive, `Timestamp`, and reply-to metadata (`ReplyToId`, `ReplyToBody`, `ReplyToSender`) when the user replies; reply context is appended to `Body` as a `[Replying to ...]` block (includes `id:` when available); groups require @bot mention or a `routing.groupChat.mentionPatterns` match by default (override per chat in config).
- Outbound: text and media (photo/video/audio/document) with optional caption; chunked to limits. Typing cue sent best-effort.
- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups` (group allowlist + mention defaults), `telegram.allowFrom`, `telegram.groupAllowFrom`, `telegram.groupPolicy`, `telegram.mediaMaxMb`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
- Config: `TELEGRAM_BOT_TOKEN` env or `telegram.botToken` required; `telegram.groups` (group allowlist + mention defaults), `telegram.allowFrom`, `telegram.groupAllowFrom`, `telegram.groupPolicy`, `telegram.replyToMode`, `telegram.proxy`, `telegram.webhookSecret`, `telegram.webhookUrl`, `telegram.webhookPath` supported.
- Ack reactions are controlled globally via `messages.ackReaction` + `messages.ackReactionScope`.
- Mention gating precedence (most specific wins): `telegram.groups.<chatId>.requireMention``telegram.groups."*".requireMention` → default `true`.
@ -57,7 +58,6 @@ Example config:
allowFrom: ["123456789"], // direct chat ids allowed (or "*")
groupPolicy: "allowlist",
groupAllowFrom: ["tg:123456789", "@alice"],
mediaMaxMb: 5,
proxy: "socks5://localhost:9050",
webhookSecret: "mysecret",
webhookPath: "/telegram-webhook",

View File

@ -247,7 +247,6 @@ export type TelegramConfig = {
groupPolicy?: GroupPolicy;
/** Outbound text chunk size (chars). Default: 4000. */
textChunkLimit?: number;
mediaMaxMb?: number;
proxy?: string;
webhookUrl?: string;
webhookSecret?: string;

View File

@ -516,7 +516,6 @@ export const ClawdbotSchema = z.object({
})
.optional(),
timeoutSeconds: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
typingIntervalSeconds: z.number().int().positive().optional(),
heartbeat: HeartbeatSchema,
maxConcurrent: z.number().int().positive().optional(),
@ -728,7 +727,6 @@ export const ClawdbotSchema = z.object({
groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(),
groupPolicy: GroupPolicySchema.optional().default("open"),
textChunkLimit: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
proxy: z.string().optional(),
webhookUrl: z.string().optional(),
webhookSecret: z.string().optional(),
@ -749,7 +747,6 @@ export const ClawdbotSchema = z.object({
ephemeral: z.boolean().optional(),
})
.optional(),
mediaMaxMb: z.number().positive().optional(),
historyLimit: z.number().int().min(0).optional(),
actions: z
.object({
@ -814,7 +811,6 @@ export const ClawdbotSchema = z.object({
appToken: z.string().optional(),
groupPolicy: GroupPolicySchema.optional().default("open"),
textChunkLimit: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
reactionNotifications: z
.enum(["off", "own", "all", "allowlist"])
.optional(),
@ -879,7 +875,6 @@ export const ClawdbotSchema = z.object({
groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(),
groupPolicy: GroupPolicySchema.optional().default("open"),
textChunkLimit: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
})
.optional(),
imessage: z
@ -895,7 +890,6 @@ export const ClawdbotSchema = z.object({
groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(),
groupPolicy: GroupPolicySchema.optional().default("open"),
includeAttachments: z.boolean().optional(),
mediaMaxMb: z.number().positive().optional(),
textChunkLimit: z.number().int().positive().optional(),
groups: z
.record(

View File

@ -52,6 +52,9 @@ describe("media store", () => {
await expect(store.saveMediaBuffer(huge)).rejects.toThrow(
"Media exceeds 5MB limit",
);
await expect(store.saveMediaBuffer(huge)).rejects.toBeInstanceOf(
store.MediaTooLargeError,
);
});
it("copies local files and cleans old media", async () => {

View File

@ -9,6 +9,18 @@ import { detectMime, extensionForMime } from "./mime.js";
const MEDIA_DIR = path.join(CONFIG_DIR, "media");
const MAX_BYTES = 5 * 1024 * 1024; // 5MB default
export class MediaTooLargeError extends Error {
maxBytes: number;
constructor(maxBytes: number) {
const limitMb = (maxBytes / (1024 * 1024)).toFixed(0);
super(`Media exceeds ${limitMb}MB limit`);
this.name = "MediaTooLargeError";
this.maxBytes = maxBytes;
}
}
const DEFAULT_TTL_MS = 2 * 60 * 1000; // 2 minutes
export function getMediaDir() {
@ -144,7 +156,7 @@ export async function saveMediaSource(
throw new Error("Media path is not a file");
}
if (stat.size > MAX_BYTES) {
throw new Error("Media exceeds 5MB limit");
throw new MediaTooLargeError(MAX_BYTES);
}
const buffer = await fs.readFile(source);
const mime = await detectMime({ buffer, filePath: source });
@ -162,9 +174,7 @@ export async function saveMediaBuffer(
maxBytes = MAX_BYTES,
): Promise<SavedMedia> {
if (buffer.byteLength > maxBytes) {
throw new Error(
`Media exceeds ${(maxBytes / (1024 * 1024)).toFixed(0)}MB limit`,
);
throw new MediaTooLargeError(maxBytes);
}
const dir = path.join(MEDIA_DIR, subdir);
await fs.mkdir(dir, { recursive: true });

View File

@ -4,15 +4,18 @@ const useSpy = vi.fn();
const onSpy = vi.fn();
const stopSpy = vi.fn();
const sendChatActionSpy = vi.fn();
const sendMessageSpy = vi.fn().mockResolvedValue({});
type ApiStub = {
config: { use: (arg: unknown) => void };
sendChatAction: typeof sendChatActionSpy;
sendMessage: typeof sendMessageSpy;
};
const apiStub: ApiStub = {
config: { use: useSpy },
sendChatAction: sendChatActionSpy,
sendMessage: sendMessageSpy,
};
vi.mock("grammy", () => ({
@ -31,6 +34,16 @@ vi.mock("@grammyjs/transformer-throttler", () => ({
apiThrottler: () => throttlerSpy(),
}));
const saveMediaBufferSpy = vi.fn();
vi.mock("../media/store.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../media/store.js")>();
saveMediaBufferSpy.mockImplementation(actual.saveMediaBuffer);
return {
...actual,
saveMediaBuffer: saveMediaBufferSpy,
};
});
vi.mock("../config/config.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../config/config.js")>();
return {
@ -108,6 +121,71 @@ describe("telegram inbound media", () => {
fetchSpy.mockRestore();
});
it("notifies when media exceeds size limit", async () => {
const { createTelegramBot } = await import("./bot.js");
const replyModule = await import("../auto-reply/reply.js");
const storeModule = await import("../media/store.js");
const replySpy = replyModule.__replySpy as unknown as ReturnType<
typeof vi.fn
>;
onSpy.mockReset();
replySpy.mockReset();
sendChatActionSpy.mockReset();
sendMessageSpy.mockClear();
saveMediaBufferSpy.mockClear();
saveMediaBufferSpy.mockRejectedValueOnce(
new storeModule.MediaTooLargeError(5 * 1024 * 1024),
);
const runtimeError = vi.fn();
createTelegramBot({
token: "tok",
runtime: {
log: vi.fn(),
error: runtimeError,
exit: () => {
throw new Error("exit");
},
},
});
const handler = onSpy.mock.calls[0]?.[1] as (
ctx: Record<string, unknown>,
) => Promise<void>;
const fetchSpy = vi
.spyOn(globalThis, "fetch" as never)
.mockResolvedValueOnce({
ok: true,
status: 200,
statusText: "OK",
headers: { get: () => "image/jpeg" },
arrayBuffer: async () =>
new Uint8Array([0xff, 0xd8, 0xff, 0x00]).buffer,
} as Response);
await handler({
message: {
message_id: 4,
chat: { id: 1234, type: "private" },
photo: [{ file_id: "fid" }],
},
me: { username: "clawdbot_bot" },
getFile: async () => ({ file_path: "photos/too-big.jpg" }),
});
expect(sendMessageSpy).toHaveBeenCalledWith(
1234,
"⚠️ File too large. Maximum size is 5MB.",
{ reply_to_message_id: 4 },
);
expect(replySpy).not.toHaveBeenCalled();
expect(runtimeError).not.toHaveBeenCalled();
fetchSpy.mockRestore();
});
it("prefers proxyFetch over global fetch", async () => {
const { createTelegramBot } = await import("./bot.js");

View File

@ -27,7 +27,7 @@ import { formatErrorMessage } from "../infra/errors.js";
import { getChildLogger } from "../logging.js";
import { mediaKindFromMime } from "../media/constants.js";
import { detectMime, isGifMedia } from "../media/mime.js";
import { saveMediaBuffer } from "../media/store.js";
import { MediaTooLargeError, saveMediaBuffer } from "../media/store.js";
import {
formatLocationText,
type NormalizedLocation,
@ -43,6 +43,8 @@ const PARSE_ERR_RE =
// with a shared media_group_id. We buffer them and process as a single message after a short delay.
const MEDIA_GROUP_TIMEOUT_MS = 500;
const TELEGRAM_MEDIA_MAX_BYTES = 5 * 1024 * 1024;
type TelegramMessage = Message.CommonMessage;
type MediaGroupEntry = {
@ -87,7 +89,6 @@ export type TelegramBotOptions = {
requireMention?: boolean;
allowFrom?: Array<string | number>;
groupAllowFrom?: Array<string | number>;
mediaMaxMb?: number;
replyToMode?: ReplyToMode;
proxyFetch?: typeof fetch;
};
@ -155,8 +156,7 @@ export function createTelegramBot(opts: TelegramBotOptions) {
const replyToMode = opts.replyToMode ?? cfg.telegram?.replyToMode ?? "off";
const ackReaction = (cfg.messages?.ackReaction ?? "").trim();
const ackReactionScope = cfg.messages?.ackReactionScope ?? "group-mentions";
const mediaMaxBytes =
(opts.mediaMaxMb ?? cfg.telegram?.mediaMaxMb ?? 5) * 1024 * 1024;
const mediaMaxBytes = TELEGRAM_MEDIA_MAX_BYTES;
const logger = getChildLogger({ module: "telegram-auto-reply" });
const mentionRegexes = buildMentionRegexes(cfg);
const resolveGroupPolicy = (chatId: string | number) =>
@ -174,6 +174,22 @@ export function createTelegramBot(opts: TelegramBotOptions) {
overrideOrder: "after-config",
});
const notifyMediaTooLarge = async (
chatId: string | number,
messageId: number | undefined,
err: MediaTooLargeError,
) => {
const limitMb = Math.max(1, Math.floor(err.maxBytes / (1024 * 1024)));
await bot.api
.sendMessage(
chatId,
`⚠️ File too large. Maximum size is ${limitMb}MB.`,
messageId ? { reply_to_message_id: messageId } : undefined,
)
.catch(() => {});
logger.warn({ chatId, error: err.message }, "media exceeds size limit");
};
const processMessage = async (
primaryCtx: TelegramContext,
allMedia: Array<{ path: string; contentType?: string }>,
@ -492,17 +508,8 @@ export function createTelegramBot(opts: TelegramBotOptions) {
opts.proxyFetch,
);
} catch (mediaErr) {
const errMsg = String(mediaErr);
if (errMsg.includes("exceeds") && errMsg.includes("MB limit")) {
const limitMb = Math.round(mediaMaxBytes / (1024 * 1024));
await bot.api
.sendMessage(
chatId,
`⚠️ File too large. Maximum size is ${limitMb}MB.`,
{ reply_to_message_id: msg.message_id },
)
.catch(() => {});
logger.warn({ chatId, error: errMsg }, "media exceeds size limit");
if (mediaErr instanceof MediaTooLargeError) {
await notifyMediaTooLarge(chatId, msg.message_id, mediaErr);
return;
}
throw mediaErr;
@ -527,12 +534,25 @@ export function createTelegramBot(opts: TelegramBotOptions) {
const allMedia: Array<{ path: string; contentType?: string }> = [];
for (const { ctx } of entry.messages) {
const media = await resolveMedia(
ctx,
mediaMaxBytes,
opts.token,
opts.proxyFetch,
);
let media: Awaited<ReturnType<typeof resolveMedia>> = null;
try {
media = await resolveMedia(
ctx,
mediaMaxBytes,
opts.token,
opts.proxyFetch,
);
} catch (mediaErr) {
if (mediaErr instanceof MediaTooLargeError) {
await notifyMediaTooLarge(
ctx.message.chat.id,
ctx.message.message_id,
mediaErr,
);
return;
}
throw mediaErr;
}
if (media) {
allMedia.push({ path: media.path, contentType: media.contentType });
}