fix(media): skip audio files in extractFileBlocks text extraction

Audio files (especially OGG/Opus from Telegram voice messages) were being misidentified as text by looksLikeUtf8Text() because OGG headers contain >85% printable ASCII. This caused guessDelimitedMime() to classify them as text/tab-separated-values, injecting raw binary into the model context. Add audio to the skip list alongside image and video in extractFileBlocks() so audio attachments are routed to the transcription pipeline instead of being treated as text files. Fixes #1989
2026-01-29 23:45:40 +01:00 · 2026-01-29 23:45:40 +01:00 · 67bbacb3b7
commit 67bbacb3b7
parent 4583f88626
1 changed files with 1 additions and 1 deletions
--- a/src/media-understanding/apply.ts
+++ b/src/media-understanding/apply.ts
@ -216,7 +216,7 @@ async function extractFileBlocks(params: {
    }
    const forcedTextMime = resolveTextMimeFromName(attachment.path ?? attachment.url ?? "");
    const kind = forcedTextMime ? "document" : resolveAttachmentKind(attachment);
-    if (!forcedTextMime && (kind === "image" || kind === "video")) {
+    if (!forcedTextMime && (kind === "image" || kind === "audio" || kind === "video")) {
      continue;
    }
    if (!limits.allowUrl && attachment.url && !attachment.path) {