From da6f3e21a5965ff3e9a65be2cb91167e37ea26e9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Takahara Date: Fri, 30 Jan 2026 08:19:25 +0000 Subject: [PATCH 1/2] fix(slack): reject HTML responses when downloading media Slack sometimes returns HTML login pages instead of binary media when authentication fails or URLs expire. This change detects HTML responses by checking content-type header and buffer content, then skips to the next available file URL. --- src/slack/monitor/media.test.ts | 85 +++++++++++++++++++++++++++++++++ src/slack/monitor/media.ts | 26 ++++++++++ 2 files changed, 111 insertions(+) diff --git a/src/slack/monitor/media.test.ts b/src/slack/monitor/media.test.ts index bfe70f005..822e38a9d 100644 --- a/src/slack/monitor/media.test.ts +++ b/src/slack/monitor/media.test.ts @@ -242,6 +242,91 @@ describe("resolveSlackMedia", () => { expect(mockFetch).not.toHaveBeenCalled(); }); + it("rejects HTML response (auth failure) and returns null", async () => { + const { resolveSlackMedia } = await import("./media.js"); + + // Simulate Slack returning an HTML login page instead of the image + const htmlContent = ` + +Slack + + + +`; + const htmlResponse = new Response(htmlContent, { + status: 200, + headers: { "content-type": "text/html; charset=utf-8" }, + }); + mockFetch.mockResolvedValueOnce(htmlResponse); + + const result = await resolveSlackMedia({ + files: [{ url_private_download: "https://files.slack.com/test.png", name: "test.png" }], + token: "invalid-or-expired-token", + maxBytes: 10 * 1024 * 1024, + }); + + // Should reject HTML and return null + expect(result).toBeNull(); + }); + + it("rejects HTML response detected by buffer content even if content-type header is missing", async () => { + const { resolveSlackMedia } = await import("./media.js"); + + // HTML content but no content-type header (edge case) + const htmlContent = `Slack login page`; + const htmlResponse = new Response(htmlContent, { + status: 200, + // No content-type header + }); + mockFetch.mockResolvedValueOnce(htmlResponse); + + const result = await resolveSlackMedia({ + files: [{ url_private: "https://files.slack.com/test.jpg", name: "test.jpg" }], + token: "xoxb-test-token", + maxBytes: 10 * 1024 * 1024, + }); + + expect(result).toBeNull(); + }); + + it("falls through to next file when first returns HTML", async () => { + // Mock the store module + vi.doMock("../../media/store.js", () => ({ + saveMediaBuffer: vi.fn().mockResolvedValue({ + path: "/tmp/test.jpg", + contentType: "image/jpeg", + }), + })); + + const { resolveSlackMedia } = await import("./media.js"); + + // First file: HTML (auth failure) + const htmlResponse = new Response("login", { + status: 200, + headers: { "content-type": "text/html" }, + }); + // Second file: actual image + const imageResponse = new Response(Buffer.from("image data"), { + status: 200, + headers: { "content-type": "image/jpeg" }, + }); + + mockFetch.mockResolvedValueOnce(htmlResponse).mockResolvedValueOnce(imageResponse); + + const result = await resolveSlackMedia({ + files: [ + { url_private: "https://files.slack.com/first.png", name: "first.png" }, + { url_private: "https://files.slack.com/second.jpg", name: "second.jpg" }, + ], + token: "xoxb-test-token", + maxBytes: 10 * 1024 * 1024, + }); + + // Should skip HTML and succeed with the second file + expect(result).not.toBeNull(); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + it("falls through to next file when first file returns error", async () => { // Mock the store module vi.doMock("../../media/store.js", () => ({ diff --git a/src/slack/monitor/media.ts b/src/slack/monitor/media.ts index 2674e2d50..0770fb43f 100644 --- a/src/slack/monitor/media.ts +++ b/src/slack/monitor/media.ts @@ -3,8 +3,23 @@ import type { WebClient as SlackWebClient } from "@slack/web-api"; import type { FetchLike } from "../../media/fetch.js"; import { fetchRemoteMedia } from "../../media/fetch.js"; import { saveMediaBuffer } from "../../media/store.js"; +import { logWarn } from "../../logger.js"; import type { SlackFile } from "../types.js"; +/** + * Detects if buffer content looks like HTML (login page / error page). + * Slack sometimes returns HTML login pages when auth fails instead of binary media. + */ +function looksLikeHtml(buffer: Buffer): boolean { + const head = buffer.subarray(0, 512).toString("utf-8").trim().toLowerCase(); + return ( + head.startsWith(" params.maxBytes) continue; + + // Guard: reject if we received HTML instead of expected media. + // This happens when Slack auth fails and returns a login page. + const detectedMime = fetched.contentType?.split(";")[0]?.trim().toLowerCase(); + if (detectedMime === "text/html" || looksLikeHtml(fetched.buffer)) { + const fileId = file.name ?? file.id ?? "unknown"; + logWarn( + `slack: received HTML instead of media for file ${fileId}; possible auth failure or expired URL`, + ); + continue; + } const saved = await saveMediaBuffer( fetched.buffer, fetched.contentType ?? file.mimetype, From b305640e69a30aad0f856007c4529da6cafc4dbc Mon Sep 17 00:00:00 2001 From: Yoshihiro Takahara Date: Fri, 30 Jan 2026 13:52:46 +0000 Subject: [PATCH 2/2] fix: allow genuine HTML file downloads Address Codex review feedback: only reject HTML responses when the file metadata indicates a non-HTML type. Genuine HTML files (with mimetype text/html or .html extension) are now allowed through. - Check file.mimetype and file.name extension before rejecting HTML - Add tests for legitimate HTML file downloads --- src/slack/monitor/media.test.ts | 70 +++++++++++++++++++++++++++++++++ src/slack/monitor/media.ts | 6 ++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/slack/monitor/media.test.ts b/src/slack/monitor/media.test.ts index 822e38a9d..9a49b249e 100644 --- a/src/slack/monitor/media.test.ts +++ b/src/slack/monitor/media.test.ts @@ -289,6 +289,76 @@ describe("resolveSlackMedia", () => { expect(result).toBeNull(); }); + it("allows genuine HTML file when mimetype indicates text/html", async () => { + // Mock the store module + vi.doMock("../../media/store.js", () => ({ + saveMediaBuffer: vi.fn().mockResolvedValue({ + path: "/tmp/test.html", + contentType: "text/html", + }), + })); + + const { resolveSlackMedia } = await import("./media.js"); + + // A genuine HTML file shared by a user + const htmlContent = `User's HTML page`; + const htmlResponse = new Response(htmlContent, { + status: 200, + headers: { "content-type": "text/html; charset=utf-8" }, + }); + mockFetch.mockResolvedValueOnce(htmlResponse); + + const result = await resolveSlackMedia({ + files: [ + { + url_private: "https://files.slack.com/page.html", + name: "page.html", + mimetype: "text/html", + }, + ], + token: "xoxb-test-token", + maxBytes: 10 * 1024 * 1024, + }); + + // Should allow genuine HTML files + expect(result).not.toBeNull(); + expect(result?.contentType).toBe("text/html"); + }); + + it("allows HTML file based on .html extension even without mimetype", async () => { + // Mock the store module + vi.doMock("../../media/store.js", () => ({ + saveMediaBuffer: vi.fn().mockResolvedValue({ + path: "/tmp/test.html", + contentType: "text/html", + }), + })); + + const { resolveSlackMedia } = await import("./media.js"); + + const htmlContent = `User's HTML snippet`; + const htmlResponse = new Response(htmlContent, { + status: 200, + headers: { "content-type": "text/html" }, + }); + mockFetch.mockResolvedValueOnce(htmlResponse); + + const result = await resolveSlackMedia({ + files: [ + { + url_private: "https://files.slack.com/snippet.html", + name: "snippet.html", + // No mimetype set + }, + ], + token: "xoxb-test-token", + maxBytes: 10 * 1024 * 1024, + }); + + // Should allow based on .html extension + expect(result).not.toBeNull(); + }); + it("falls through to next file when first returns HTML", async () => { // Mock the store module vi.doMock("../../media/store.js", () => ({ diff --git a/src/slack/monitor/media.ts b/src/slack/monitor/media.ts index 0770fb43f..743ee50a9 100644 --- a/src/slack/monitor/media.ts +++ b/src/slack/monitor/media.ts @@ -82,8 +82,12 @@ export async function resolveSlackMedia(params: { // Guard: reject if we received HTML instead of expected media. // This happens when Slack auth fails and returns a login page. + // Skip this check if the file metadata indicates it's genuinely an HTML file. const detectedMime = fetched.contentType?.split(";")[0]?.trim().toLowerCase(); - if (detectedMime === "text/html" || looksLikeHtml(fetched.buffer)) { + const expectedMime = file.mimetype?.split(";")[0]?.trim().toLowerCase(); + const isExpectedHtml = + expectedMime === "text/html" || file.name?.toLowerCase().endsWith(".html"); + if (!isExpectedHtml && (detectedMime === "text/html" || looksLikeHtml(fetched.buffer))) { const fileId = file.name ?? file.id ?? "unknown"; logWarn( `slack: received HTML instead of media for file ${fileId}; possible auth failure or expired URL`,