diff --git a/CHANGELOG.md b/CHANGELOG.md index 4baf10ea1..59ddcd117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Docs: https://docs.clawd.bot ### Changes - macOS: strip prerelease/build suffixes when parsing gateway semver patches. (#1110) — thanks @zerone0x. +### Fixes +- Matrix: send voice/image-specific media payloads and keep legacy poll parsing. (#1088) — thanks @sibbl. + ## 2026.1.16-2 ### Changes diff --git a/extensions/matrix/src/matrix/monitor/replies.ts b/extensions/matrix/src/matrix/monitor/replies.ts index e347df1c9..bb8c205ea 100644 --- a/extensions/matrix/src/matrix/monitor/replies.ts +++ b/extensions/matrix/src/matrix/monitor/replies.ts @@ -2,7 +2,7 @@ import type { MatrixClient } from "matrix-js-sdk"; import { chunkMarkdownText } from "../../../../../src/auto-reply/chunk.js"; import type { ReplyPayload } from "../../../../../src/auto-reply/types.js"; -import { danger } from "../../../../../src/globals.js"; +import { danger, logVerbose } from "../../../../../src/globals.js"; import type { RuntimeEnv } from "../../../../../src/runtime.js"; import { sendMessageMatrix } from "../send.js"; @@ -18,7 +18,12 @@ export async function deliverMatrixReplies(params: { const chunkLimit = Math.min(params.textLimit, 4000); let hasReplied = false; for (const reply of params.replies) { - if (!reply?.text && !reply?.mediaUrl && !(reply?.mediaUrls?.length ?? 0)) { + const hasMedia = Boolean(reply?.mediaUrl) || (reply?.mediaUrls?.length ?? 0) > 0; + if (!reply?.text && !hasMedia) { + if (reply?.audioAsVoice) { + logVerbose("matrix reply has audioAsVoice without media/text; skipping"); + continue; + } params.runtime.error?.(danger("matrix reply missing text/media")); continue; } @@ -57,6 +62,7 @@ export async function deliverMatrixReplies(params: { mediaUrl, replyToId: shouldIncludeReply(replyToId) ? replyToId : undefined, threadId: params.threadId, + audioAsVoice: reply.audioAsVoice, }); if (shouldIncludeReply(replyToId)) { hasReplied = true; diff --git a/extensions/matrix/src/matrix/poll-types.test.ts b/extensions/matrix/src/matrix/poll-types.test.ts new file mode 100644 index 000000000..f2d885622 --- /dev/null +++ b/extensions/matrix/src/matrix/poll-types.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from "vitest"; + +import { parsePollStartContent } from "./poll-types.js"; + +describe("parsePollStartContent", () => { + it("parses legacy m.poll payloads", () => { + const summary = parsePollStartContent({ + "m.poll": { + question: { "m.text": "Lunch?" }, + kind: "m.poll.disclosed", + max_selections: 1, + answers: [ + { id: "answer1", "m.text": "Yes" }, + { id: "answer2", "m.text": "No" }, + ], + }, + }); + + expect(summary?.question).toBe("Lunch?"); + expect(summary?.answers).toEqual(["Yes", "No"]); + }); +}); diff --git a/extensions/matrix/src/matrix/poll-types.ts b/extensions/matrix/src/matrix/poll-types.ts index 68ddc9412..d25c4e686 100644 --- a/extensions/matrix/src/matrix/poll-types.ts +++ b/extensions/matrix/src/matrix/poll-types.ts @@ -7,15 +7,17 @@ * - m.poll.end - Closes a poll */ +import type { TimelineEvents } from "matrix-js-sdk/lib/@types/event.js"; +import type { ExtensibleAnyMessageEventContent } from "matrix-js-sdk/lib/@types/extensible_events.js"; import type { PollInput } from "../../../../src/polls.js"; -export const M_POLL_START = "m.poll.start"; -export const M_POLL_RESPONSE = "m.poll.response"; -export const M_POLL_END = "m.poll.end"; +export const M_POLL_START = "m.poll.start" as const; +export const M_POLL_RESPONSE = "m.poll.response" as const; +export const M_POLL_END = "m.poll.end" as const; -export const ORG_POLL_START = "org.matrix.msc3381.poll.start"; -export const ORG_POLL_RESPONSE = "org.matrix.msc3381.poll.response"; -export const ORG_POLL_END = "org.matrix.msc3381.poll.end"; +export const ORG_POLL_START = "org.matrix.msc3381.poll.start" as const; +export const ORG_POLL_RESPONSE = "org.matrix.msc3381.poll.response" as const; +export const ORG_POLL_END = "org.matrix.msc3381.poll.end" as const; export const POLL_EVENT_TYPES = [ M_POLL_START, @@ -32,9 +34,7 @@ export const POLL_END_TYPES = [M_POLL_END, ORG_POLL_END]; export type PollKind = "m.poll.disclosed" | "m.poll.undisclosed"; -export type TextContent = { - "m.text"?: string; - "org.matrix.msc1767.text"?: string; +export type TextContent = ExtensibleAnyMessageEventContent & { body?: string; }; @@ -42,25 +42,19 @@ export type PollAnswer = { id: string; } & TextContent; -export type PollStartContent = { - "m.poll"?: { - question: TextContent; - kind?: PollKind; - max_selections?: number; - answers: PollAnswer[]; - }; - "org.matrix.msc3381.poll.start"?: { - question: TextContent; - kind?: PollKind; - max_selections?: number; - answers: PollAnswer[]; - }; - "m.relates_to"?: { - rel_type: "m.reference"; - event_id: string; - }; +export type PollStartSubtype = { + question: TextContent; + kind?: PollKind; + max_selections?: number; + answers: PollAnswer[]; }; +export type LegacyPollStartContent = { + "m.poll"?: PollStartSubtype; +}; + +export type PollStartContent = TimelineEvents[typeof M_POLL_START] | LegacyPollStartContent; + export type PollSummary = { eventId: string; roomId: string; @@ -82,7 +76,9 @@ export function getTextContent(text?: TextContent): string { } export function parsePollStartContent(content: PollStartContent): PollSummary | null { - const poll = content["m.poll"] ?? content["org.matrix.msc3381.poll.start"]; + const poll = (content as Record)[M_POLL_START] + ?? (content as Record)[ORG_POLL_START] + ?? (content as Record)["m.poll"]; if (!poll) return null; const question = getTextContent(poll.question); @@ -121,6 +117,11 @@ function buildTextContent(body: string): TextContent { }; } +function buildPollFallbackText(question: string, answers: string[]): string { + if (answers.length === 0) return question; + return `${question}\n${answers.map((answer, idx) => `${idx + 1}. ${answer}`).join("\n")}`; +} + export function buildPollStartContent(poll: PollInput): PollStartContent { const question = poll.question.trim(); const answers = poll.options @@ -132,13 +133,19 @@ export function buildPollStartContent(poll: PollInput): PollStartContent { })); const maxSelections = poll.multiple ? Math.max(1, answers.length) : 1; + const fallbackText = buildPollFallbackText( + question, + answers.map((answer) => getTextContent(answer)), + ); return { - "m.poll": { + [M_POLL_START]: { question: buildTextContent(question), kind: poll.multiple ? "m.poll.undisclosed" : "m.poll.disclosed", max_selections: maxSelections, answers, }, + "m.text": fallbackText, + "org.matrix.msc1767.text": fallbackText, }; } diff --git a/extensions/matrix/src/matrix/send.test.ts b/extensions/matrix/src/matrix/send.test.ts index 1d9c746e4..71ba3c79f 100644 --- a/extensions/matrix/src/matrix/send.test.ts +++ b/extensions/matrix/src/matrix/send.test.ts @@ -31,6 +31,11 @@ vi.mock("../../../../src/web/media.js", () => ({ }), })); +vi.mock("../../../../src/media/image-ops.js", () => ({ + getImageMetadata: vi.fn().mockResolvedValue(null), + resizeToJpeg: vi.fn(), +})); + let sendMessageMatrix: typeof import("./send.js").sendMessageMatrix; const makeClient = () => { @@ -65,13 +70,13 @@ describe("sendMessageMatrix media", () => { const uploadArg = uploadContent.mock.calls[0]?.[0]; expect(Buffer.isBuffer(uploadArg)).toBe(true); - const content = sendMessage.mock.calls[0]?.[2] as { + const content = sendMessage.mock.calls[0]?.[1] as { url?: string; msgtype?: string; format?: string; formatted_body?: string; }; - expect(content.msgtype).toBe("m.file"); + expect(content.msgtype).toBe("m.image"); expect(content.format).toBe("org.matrix.custom.html"); expect(content.formatted_body).toContain("caption"); expect(content.url).toBe("mxc://example/file"); diff --git a/extensions/matrix/src/matrix/send.ts b/extensions/matrix/src/matrix/send.ts index 47b566eb3..7e1e2b2fe 100644 --- a/extensions/matrix/src/matrix/send.ts +++ b/extensions/matrix/src/matrix/send.ts @@ -1,12 +1,15 @@ import type { AccountDataEvents, MatrixClient } from "matrix-js-sdk"; import { EventType, MsgType, RelationType } from "matrix-js-sdk"; import type { - ReactionEventContent, RoomMessageEventContent, + ReactionEventContent, } from "matrix-js-sdk/lib/@types/events.js"; import { chunkMarkdownText, resolveTextChunkLimit } from "../../../../src/auto-reply/chunk.js"; import { loadConfig } from "../../../../src/config/config.js"; +import { isVoiceCompatibleAudio } from "../../../../src/media/audio.js"; +import { mediaKindFromMime } from "../../../../src/media/constants.js"; +import { getImageMetadata, resizeToJpeg } from "../../../../src/media/image-ops.js"; import type { PollInput } from "../../../../src/polls.js"; import { loadWebMedia } from "../../../../src/web/media.js"; import { getActiveMatrixClient } from "./active-client.js"; @@ -47,6 +50,8 @@ export type MatrixSendOpts = { replyToId?: string; threadId?: string | number | null; timeoutMs?: number; + /** Send audio as voice message (voice bubble) instead of audio file. Defaults to false. */ + audioAsVoice?: boolean; }; function ensureNodeRuntime() { @@ -71,6 +76,12 @@ function normalizeTarget(raw: string): string { return trimmed; } +function normalizeThreadId(raw?: string | number | null): string | null { + if (raw === undefined || raw === null) return null; + const trimmed = String(raw).trim(); + return trimmed ? trimmed : null; +} + async function resolveDirectRoomId(client: MatrixClient, userId: string): Promise { const trimmed = userId.trim(); if (!trimmed.startsWith("@")) { @@ -119,6 +130,18 @@ export async function resolveMatrixRoomId( return target; } +type MatrixImageInfo = { + w?: number; + h?: number; + thumbnail_url?: string; + thumbnail_info?: { + w: number; + h: number; + mimetype: string; + size: number; + }; +}; + function buildMediaContent(params: { msgtype: MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File; body: string; @@ -127,8 +150,24 @@ function buildMediaContent(params: { mimetype?: string; size: number; relation?: MatrixReplyRelation; + isVoice?: boolean; + durationMs?: number; + imageInfo?: MatrixImageInfo; }): RoomMessageEventContent { - const info = { mimetype: params.mimetype, size: params.size }; + const info: Record = { mimetype: params.mimetype, size: params.size }; + if (params.durationMs !== undefined) { + info.duration = params.durationMs; + } + if (params.imageInfo) { + if (params.imageInfo.w) info.w = params.imageInfo.w; + if (params.imageInfo.h) info.h = params.imageInfo.h; + if (params.imageInfo.thumbnail_url) { + info.thumbnail_url = params.imageInfo.thumbnail_url; + if (params.imageInfo.thumbnail_info) { + info.thumbnail_info = params.imageInfo.thumbnail_info; + } + } + } const base: MatrixMessageContent = { msgtype: params.msgtype, body: params.body, @@ -136,6 +175,12 @@ function buildMediaContent(params: { info, url: params.url, }; + if (params.isVoice) { + base["org.matrix.msc3245.voice"] = {}; + base["org.matrix.msc1767.audio"] = { + duration: params.durationMs, + }; + } if (params.relation) { base["m.relates_to"] = params.relation; } @@ -171,6 +216,75 @@ function buildReplyRelation(replyToId?: string): MatrixReplyRelation | undefined return { "m.in_reply_to": { event_id: trimmed } }; } +function resolveMatrixMsgType( + contentType?: string, + fileName?: string, +): MsgType.Image | MsgType.Audio | MsgType.Video | MsgType.File { + const kind = mediaKindFromMime(contentType ?? ""); + switch (kind) { + case "image": + return MsgType.Image; + case "audio": + return MsgType.Audio; + case "video": + return MsgType.Video; + default: + return MsgType.File; + } +} + +function resolveMatrixVoiceDecision(opts: { + wantsVoice: boolean; + contentType?: string; + fileName?: string; +}): { useVoice: boolean } { + if (!opts.wantsVoice) return { useVoice: false }; + if (isVoiceCompatibleAudio({ contentType: opts.contentType, fileName: opts.fileName })) { + return { useVoice: true }; + } + return { useVoice: false }; +} + +const THUMBNAIL_MAX_SIDE = 800; +const THUMBNAIL_QUALITY = 80; + +async function prepareImageInfo(params: { + buffer: Buffer; + client: MatrixClient; +}): Promise { + const meta = await getImageMetadata(params.buffer).catch(() => null); + if (!meta) return undefined; + const imageInfo: MatrixImageInfo = { w: meta.width, h: meta.height }; + const maxDim = Math.max(meta.width, meta.height); + if (maxDim > THUMBNAIL_MAX_SIDE) { + try { + const thumbBuffer = await resizeToJpeg({ + buffer: params.buffer, + maxSide: THUMBNAIL_MAX_SIDE, + quality: THUMBNAIL_QUALITY, + withoutEnlargement: true, + }); + const thumbMeta = await getImageMetadata(thumbBuffer).catch(() => null); + const thumbUri = await params.client.uploadContent(thumbBuffer as MatrixUploadContent, { + type: "image/jpeg", + name: "thumbnail.jpg", + }); + imageInfo.thumbnail_url = thumbUri.content_uri; + if (thumbMeta) { + imageInfo.thumbnail_info = { + w: thumbMeta.width, + h: thumbMeta.height, + mimetype: "image/jpeg", + size: thumbBuffer.byteLength, + }; + } + } catch { + // Thumbnail generation failed, continue without it + } + } + return imageInfo; +} + async function uploadFile( client: MatrixClient, file: MatrixUploadContent | Buffer, @@ -238,14 +352,10 @@ export async function sendMessageMatrix( const textLimit = resolveTextChunkLimit(cfg, "matrix"); const chunkLimit = Math.min(textLimit, MATRIX_TEXT_LIMIT); const chunks = chunkMarkdownText(trimmedMessage, chunkLimit); - const rawThreadId = opts.threadId; - const threadId = - rawThreadId !== undefined && rawThreadId !== null - ? String(rawThreadId).trim() - : null; + const threadId = normalizeThreadId(opts.threadId); const relation = threadId ? undefined : buildReplyRelation(opts.replyToId); const sendContent = (content: RoomMessageEventContent) => - client.sendMessage(roomId, threadId ?? undefined, content); + threadId ? client.sendMessage(roomId, threadId, content) : client.sendMessage(roomId, content); let lastMessageId = ""; if (opts.mediaUrl) { @@ -255,9 +365,17 @@ export async function sendMessageMatrix( contentType: media.contentType, filename: media.fileName, }); - const msgtype = MsgType.File; + const baseMsgType = resolveMatrixMsgType(media.contentType, media.fileName); + const { useVoice } = resolveMatrixVoiceDecision({ + wantsVoice: opts.audioAsVoice === true, + contentType: media.contentType, + fileName: media.fileName, + }); + const msgtype = useVoice ? MsgType.Audio : baseMsgType; + const isImage = msgtype === MsgType.Image; + const imageInfo = isImage ? await prepareImageInfo({ buffer: media.buffer, client }) : undefined; const [firstChunk, ...rest] = chunks; - const body = firstChunk ?? media.fileName ?? "(file)"; + const body = useVoice ? "Voice message" : (firstChunk ?? media.fileName ?? "(file)"); const content = buildMediaContent({ msgtype, body, @@ -266,10 +384,13 @@ export async function sendMessageMatrix( mimetype: media.contentType, size: media.buffer.byteLength, relation, + isVoice: useVoice, + imageInfo, }); const response = await sendContent(content); lastMessageId = response.event_id ?? lastMessageId; - for (const chunk of rest) { + const textChunks = useVoice ? chunks : rest; + for (const chunk of textChunks) { const text = chunk.trim(); if (!text) continue; const followup = buildTextContent(text); @@ -316,17 +437,19 @@ export async function sendPollMatrix( try { const roomId = await resolveMatrixRoomId(client, to); const pollContent = buildPollStartContent(poll); - const rawThreadId = opts.threadId; - const threadId = - rawThreadId !== undefined && rawThreadId !== null - ? String(rawThreadId).trim() - : null; - const response = await client.sendEvent( - roomId, - threadId ?? undefined, - M_POLL_START as EventType.RoomMessage, - pollContent as unknown as RoomMessageEventContent, - ); + const threadId = normalizeThreadId(opts.threadId); + const response = threadId + ? await client.sendEvent( + roomId, + threadId, + M_POLL_START, + pollContent, + ) + : await client.sendEvent( + roomId, + M_POLL_START, + pollContent, + ); return { eventId: response.event_id ?? "unknown",