Merge 463c0788bc into 09be5d45d5
This commit is contained in:
commit
d9016921bd
@ -547,6 +547,73 @@ describe("applyMediaUnderstanding", () => {
|
|||||||
expect(ctx.Body).toContain("a\tb\tc");
|
expect(ctx.Body).toContain("a\tb\tc");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("does not treat OGG audio as text even with ASCII-heavy headers (issue #1989)", async () => {
|
||||||
|
const { applyMediaUnderstanding } = await loadApply();
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-media-"));
|
||||||
|
const oggPath = path.join(dir, "voice.ogg");
|
||||||
|
// Simulate OGG file with "OggS" magic bytes followed by ASCII-heavy content
|
||||||
|
// that would pass looksLikeUtf8Text() if not for magic byte detection
|
||||||
|
// Real OGG Opus files (Telegram voice) have similar structure
|
||||||
|
const oggMagic = Buffer.from([0x4f, 0x67, 0x67, 0x53]); // "OggS"
|
||||||
|
const fakeMetadata = Buffer.from("ENCODER=test\tVERSION=1\nTITLE=hello\t");
|
||||||
|
const oggBuffer = Buffer.concat([oggMagic, fakeMetadata]);
|
||||||
|
await fs.writeFile(oggPath, oggBuffer);
|
||||||
|
|
||||||
|
const ctx: MsgContext = {
|
||||||
|
Body: "<media:audio>",
|
||||||
|
MediaPath: oggPath,
|
||||||
|
MediaType: "audio/ogg",
|
||||||
|
};
|
||||||
|
const cfg: MoltbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
audio: { enabled: false },
|
||||||
|
image: { enabled: false },
|
||||||
|
video: { enabled: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await applyMediaUnderstanding({ ctx, cfg });
|
||||||
|
|
||||||
|
// OGG should NOT be treated as a text file
|
||||||
|
expect(result.appliedFile).toBe(false);
|
||||||
|
expect(ctx.Body).not.toContain("<file");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not treat MP3 with ID3 tag as text even with ASCII-heavy metadata", async () => {
|
||||||
|
const { applyMediaUnderstanding } = await loadApply();
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-media-"));
|
||||||
|
const mp3Path = path.join(dir, "song.mp3");
|
||||||
|
// Simulate MP3 file with ID3v2 tag followed by ASCII-heavy metadata
|
||||||
|
// ID3 tags can contain lyrics, comments, and other text that passes looksLikeUtf8Text()
|
||||||
|
const id3Magic = Buffer.from([0x49, 0x44, 0x33]); // "ID3"
|
||||||
|
const fakeMetadata = Buffer.from("TIT2=Song Title\tTPE1=Artist Name\nTALB=Album\t");
|
||||||
|
const mp3Buffer = Buffer.concat([id3Magic, fakeMetadata]);
|
||||||
|
await fs.writeFile(mp3Path, mp3Buffer);
|
||||||
|
|
||||||
|
const ctx: MsgContext = {
|
||||||
|
Body: "<media:audio>",
|
||||||
|
MediaPath: mp3Path,
|
||||||
|
MediaType: "audio/mpeg",
|
||||||
|
};
|
||||||
|
const cfg: MoltbotConfig = {
|
||||||
|
tools: {
|
||||||
|
media: {
|
||||||
|
audio: { enabled: false },
|
||||||
|
image: { enabled: false },
|
||||||
|
video: { enabled: false },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await applyMediaUnderstanding({ ctx, cfg });
|
||||||
|
|
||||||
|
// MP3 with ID3 should NOT be treated as a text file
|
||||||
|
expect(result.appliedFile).toBe(false);
|
||||||
|
expect(ctx.Body).not.toContain("<file");
|
||||||
|
});
|
||||||
|
|
||||||
it("escapes XML special characters in filenames to prevent injection", async () => {
|
it("escapes XML special characters in filenames to prevent injection", async () => {
|
||||||
const { applyMediaUnderstanding } = await loadApply();
|
const { applyMediaUnderstanding } = await loadApply();
|
||||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-media-"));
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-media-"));
|
||||||
|
|||||||
@ -162,6 +162,48 @@ function looksLikeUtf8Text(buffer?: Buffer): boolean {
|
|||||||
return printable / total > 0.85;
|
return printable / total > 0.85;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detects binary audio/video formats by magic bytes (file signatures).
|
||||||
|
*
|
||||||
|
* OGG files (used by Telegram voice messages as OGG Opus) have ASCII-heavy
|
||||||
|
* headers that can pass looksLikeUtf8Text() due to Vorbis/Opus comment metadata
|
||||||
|
* containing printable strings. This causes them to be misidentified as text.
|
||||||
|
*
|
||||||
|
* Magic bytes provide authoritative format detection independent of MIME type
|
||||||
|
* or file extension, following established file format specifications.
|
||||||
|
*
|
||||||
|
* References:
|
||||||
|
* - OGG container: RFC 3533 (https://datatracker.ietf.org/doc/html/rfc3533)
|
||||||
|
* Section 6: "OggS" capture pattern at byte offset 0
|
||||||
|
* - MP3 ID3v2: id3.org spec (https://id3.org/id3v2.4.0-structure)
|
||||||
|
* Section 3.1: "ID3" identifier at file start
|
||||||
|
*
|
||||||
|
* @see https://github.com/moltbot/moltbot/issues/1989
|
||||||
|
*/
|
||||||
|
function hasBinaryAudioMagic(buffer?: Buffer): boolean {
|
||||||
|
if (!buffer || buffer.length < 4) return false;
|
||||||
|
// OGG container format: "OggS" signature (RFC 3533 Section 6)
|
||||||
|
// Covers OGG Vorbis, OGG Opus (Telegram voice), OGG Theora, etc.
|
||||||
|
if (
|
||||||
|
buffer[0] === 0x4f && // 'O'
|
||||||
|
buffer[1] === 0x67 && // 'g'
|
||||||
|
buffer[2] === 0x67 && // 'g'
|
||||||
|
buffer[3] === 0x53 // 'S'
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// MP3 with ID3v2 tag: "ID3" signature (id3.org spec Section 3.1)
|
||||||
|
// ID3v2 tags can contain large amounts of ASCII text (lyrics, comments)
|
||||||
|
if (
|
||||||
|
buffer[0] === 0x49 && // 'I'
|
||||||
|
buffer[1] === 0x44 && // 'D'
|
||||||
|
buffer[2] === 0x33 // '3'
|
||||||
|
) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
function decodeTextSample(buffer?: Buffer): string {
|
function decodeTextSample(buffer?: Buffer): string {
|
||||||
if (!buffer || buffer.length === 0) return "";
|
if (!buffer || buffer.length === 0) return "";
|
||||||
const sample = buffer.subarray(0, Math.min(buffer.length, 8192));
|
const sample = buffer.subarray(0, Math.min(buffer.length, 8192));
|
||||||
@ -242,7 +284,11 @@ async function extractFileBlocks(params: {
|
|||||||
const forcedTextMimeResolved = forcedTextMime ?? resolveTextMimeFromName(nameHint ?? "");
|
const forcedTextMimeResolved = forcedTextMime ?? resolveTextMimeFromName(nameHint ?? "");
|
||||||
const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
|
const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
|
||||||
const textSample = decodeTextSample(bufferResult?.buffer);
|
const textSample = decodeTextSample(bufferResult?.buffer);
|
||||||
const textLike = Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer);
|
// Check if content looks like text, but exclude files with known binary audio magic bytes
|
||||||
|
// OGG files can pass looksLikeUtf8Text() due to ASCII-heavy headers (>85% printable)
|
||||||
|
const textLike =
|
||||||
|
(Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer)) &&
|
||||||
|
!hasBinaryAudioMagic(bufferResult?.buffer);
|
||||||
if (!forcedTextMimeResolved && kind === "audio" && !textLike) {
|
if (!forcedTextMimeResolved && kind === "audio" && !textLike) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user