Merge 586cf569fd into 09be5d45d5
This commit is contained in:
commit
80d912f9c7
@ -162,6 +162,48 @@ function looksLikeUtf8Text(buffer?: Buffer): boolean {
|
||||
return printable / total > 0.85;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects binary audio/video formats by magic bytes (file signatures).
|
||||
*
|
||||
* OGG files (used by Telegram voice messages as OGG Opus) have ASCII-heavy
|
||||
* headers that can pass looksLikeUtf8Text() due to Vorbis/Opus comment metadata
|
||||
* containing printable strings. This causes them to be misidentified as text.
|
||||
*
|
||||
* Magic bytes provide authoritative format detection independent of MIME type
|
||||
* or file extension, following established file format specifications.
|
||||
*
|
||||
* References:
|
||||
* - OGG container: RFC 3533 (https://datatracker.ietf.org/doc/html/rfc3533)
|
||||
* Section 6: "OggS" capture pattern at byte offset 0
|
||||
* - MP3 ID3v2: id3.org spec (https://id3.org/id3v2.4.0-structure)
|
||||
* Section 3.1: "ID3" identifier at file start
|
||||
*
|
||||
* @see https://github.com/moltbot/moltbot/issues/1989
|
||||
*/
|
||||
function hasBinaryAudioMagic(buffer?: Buffer): boolean {
|
||||
if (!buffer || buffer.length < 4) return false;
|
||||
// OGG container format: "OggS" signature (RFC 3533 Section 6)
|
||||
// Covers OGG Vorbis, OGG Opus (Telegram voice), OGG Theora, etc.
|
||||
if (
|
||||
buffer[0] === 0x4f && // 'O'
|
||||
buffer[1] === 0x67 && // 'g'
|
||||
buffer[2] === 0x67 && // 'g'
|
||||
buffer[3] === 0x53 // 'S'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
// MP3 with ID3v2 tag: "ID3" signature (id3.org spec Section 3.1)
|
||||
// ID3v2 tags can contain large amounts of ASCII text (lyrics, comments)
|
||||
if (
|
||||
buffer[0] === 0x49 && // 'I'
|
||||
buffer[1] === 0x44 && // 'D'
|
||||
buffer[2] === 0x33 // '3'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function decodeTextSample(buffer?: Buffer): string {
|
||||
if (!buffer || buffer.length === 0) return "";
|
||||
const sample = buffer.subarray(0, Math.min(buffer.length, 8192));
|
||||
@ -216,7 +258,7 @@ async function extractFileBlocks(params: {
|
||||
}
|
||||
const forcedTextMime = resolveTextMimeFromName(attachment.path ?? attachment.url ?? "");
|
||||
const kind = forcedTextMime ? "document" : resolveAttachmentKind(attachment);
|
||||
if (!forcedTextMime && (kind === "image" || kind === "video")) {
|
||||
if (!forcedTextMime && (kind === "image" || kind === "audio" || kind === "video")) {
|
||||
continue;
|
||||
}
|
||||
if (!limits.allowUrl && attachment.url && !attachment.path) {
|
||||
@ -242,7 +284,11 @@ async function extractFileBlocks(params: {
|
||||
const forcedTextMimeResolved = forcedTextMime ?? resolveTextMimeFromName(nameHint ?? "");
|
||||
const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
|
||||
const textSample = decodeTextSample(bufferResult?.buffer);
|
||||
const textLike = Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer);
|
||||
// Check if content looks like text, but exclude files with known binary audio magic bytes
|
||||
// OGG files can pass looksLikeUtf8Text() due to ASCII-heavy headers (>85% printable)
|
||||
const textLike =
|
||||
(Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer)) &&
|
||||
!hasBinaryAudioMagic(bufferResult?.buffer);
|
||||
if (!forcedTextMimeResolved && kind === "audio" && !textLike) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user