import { describe, expect, it } from "vitest"; import { sanitizeEmailBody } from "./sanitize-email-body.js"; describe("sanitizeEmailBody", () => { it("returns empty string for falsy input", () => { expect(sanitizeEmailBody("")).toBe(""); expect(sanitizeEmailBody(null as unknown as string)).toBe(""); expect(sanitizeEmailBody(undefined as unknown as string)).toBe(""); }); it("passes through plain text unchanged", () => { expect(sanitizeEmailBody("Hello world")).toBe("Hello world"); }); // --- HTML stripping --- it("strips basic HTML tags", () => { expect(sanitizeEmailBody("

Hello world

")).toBe("Hello world"); }); it("removes style blocks and their contents", () => { const html = '

Content

'; expect(sanitizeEmailBody(html)).toBe("Content"); }); it("removes script blocks", () => { const html = "

Safe

"; expect(sanitizeEmailBody(html)).toBe("Safe"); }); it("removes HTML comments", () => { const html = "

Visible

"; expect(sanitizeEmailBody(html)).toBe("Visible"); }); // --- Newline conversion --- it("converts
to newlines", () => { expect(sanitizeEmailBody("Line 1
Line 2
Line 3")).toBe("Line 1\nLine 2\nLine 3"); }); it("converts block-level closing tags to newlines", () => { const html = "
Block 1
Block 2
"; const result = sanitizeEmailBody(html); expect(result).toContain("Block 1"); expect(result).toContain("Block 2"); expect(result).toMatch(/Block 1\n+Block 2/); }); // --- HTML entities --- it("decodes named HTML entities", () => { expect(sanitizeEmailBody("& < > "  ")).toBe('& < > "'); }); it("decodes numeric decimal entities", () => { expect(sanitizeEmailBody("ABC")).toBe("ABC"); }); it("decodes numeric hex entities", () => { expect(sanitizeEmailBody("ABC")).toBe("ABC"); }); it("decodes typographic entities", () => { expect(sanitizeEmailBody("“Hello” — world")).toBe( "\u201CHello\u201D — world", ); }); // --- Data URIs / base64 --- it("removes base64 data URIs", () => { const html = 'pic

Text

'; expect(sanitizeEmailBody(html)).toBe("Text"); }); it("removes inline base64 content", () => { const html = "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP some text after"; const result = sanitizeEmailBody(html); expect(result).not.toContain("base64"); expect(result).toContain("some text after"); }); // --- Tracking pixels --- it("removes 1x1 tracking pixel images", () => { const html = '

Content

'; expect(sanitizeEmailBody(html)).toBe("Content"); }); it("removes display:none images", () => { const html = '

Content

'; expect(sanitizeEmailBody(html)).toBe("Content"); }); // --- Links --- it("keeps link text, drops tracking hrefs", () => { const html = 'Click here'; expect(sanitizeEmailBody(html)).toBe(""); }); it("keeps useful link text", () => { const html = 'Read more'; expect(sanitizeEmailBody(html)).toBe("Read more"); }); // --- Footer patterns --- it("removes unsubscribe text", () => { const html = "

Real content

Unsubscribe from this mailing list

"; const result = sanitizeEmailBody(html); expect(result).toContain("Real content"); expect(result).not.toMatch(/unsubscribe/i); }); it("removes 'sent from my iPhone'", () => { const html = "

Hey!

Sent from my iPhone

"; const result = sanitizeEmailBody(html); expect(result).toContain("Hey!"); expect(result).not.toMatch(/sent from my iphone/i); }); it("removes 'Get Outlook for iOS'", () => { const html = "

Meeting at 3

Get Outlook for iOS

"; const result = sanitizeEmailBody(html); expect(result).toContain("Meeting at 3"); expect(result).not.toMatch(/get outlook/i); }); it("removes confidentiality notices", () => { const html = "

Actual content

Confidentiality notice: This email is intended solely for the use of the individual to whom it is addressed.

"; const result = sanitizeEmailBody(html); expect(result).toContain("Actual content"); expect(result).not.toMatch(/confidentiality notice/i); }); it("removes copyright notices", () => { const html = "

Content

© 2024 Acme Corp. All rights reserved.

"; const result = sanitizeEmailBody(html); expect(result).toContain("Content"); expect(result).not.toMatch(/all rights reserved/i); }); it("removes 'you are receiving this email because'", () => { const html = "

Newsletter

You are receiving this email because you signed up on our website.

"; const result = sanitizeEmailBody(html); expect(result).toContain("Newsletter"); expect(result).not.toMatch(/you are receiving/i); }); it("removes privacy policy / terms of service", () => { const html = "

Content

Privacy Policy | Terms of Service

"; const result = sanitizeEmailBody(html); expect(result).toContain("Content"); expect(result).not.toMatch(/privacy policy/i); }); // --- Whitespace collapsing --- it("collapses excessive blank lines to max 2 newlines", () => { const html = "

A

\n\n\n\n\n

B

"; const result = sanitizeEmailBody(html); expect(result).not.toMatch(/\n{3,}/); expect(result).toContain("A"); expect(result).toContain("B"); }); it("trims each line and removes leading/trailing whitespace", () => { const html = "

Hello

World

"; const result = sanitizeEmailBody(html); expect(result).not.toMatch(/^\s/); expect(result).not.toMatch(/\s$/); for (const line of result.split("\n")) { expect(line).toBe(line.trim()); } }); it("collapses multiple spaces to single space", () => { expect(sanitizeEmailBody("Hello world here")).toBe("Hello world here"); }); // --- Real-world-ish email --- it("handles a typical marketing email", () => { const html = `
Logo

Big Sale!

Save 50% on everything this weekend.

Use code: SAVE50

Shop Now
`; const result = sanitizeEmailBody(html); // Should keep expect(result).toContain("Big Sale!"); expect(result).toContain("Save 50% on everything this weekend."); expect(result).toContain("SAVE50"); expect(result).toContain("Shop Now"); // Should remove expect(result).not.toContain("<"); expect(result).not.toContain("style"); expect(result).not.toMatch(/tracker/); expect(result).not.toMatch(/unsubscribe/i); expect(result).not.toMatch(/all rights reserved/i); expect(result).not.toMatch(/privacy policy/i); }); });