openclaw/src/auto-reply/chunk.test.ts

import { describe, expect, it } from "vitest";

import {
  chunkMarkdownText,
  chunkText,
  resolveTextChunkLimit,
} from "./chunk.js";

function expectFencesBalanced(chunks: string[]) {
  for (const chunk of chunks) {
    let open: { markerChar: string; markerLen: number } | null = null;
    for (const line of chunk.split("\n")) {
      const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/);
      if (!match) continue;
      const marker = match[2];
      if (!open) {
        open = { markerChar: marker[0], markerLen: marker.length };
        continue;
      }
      if (open.markerChar === marker[0] && marker.length >= open.markerLen) {
        open = null;
      }
    }
    expect(open).toBe(null);
  }
}

type ChunkCase = {
  name: string;
  text: string;
  limit: number;
  expected: string[];
};

function runChunkCases(
  chunker: (text: string, limit: number) => string[],
  cases: ChunkCase[],
) {
  for (const { name, text, limit, expected } of cases) {
    it(name, () => {
      expect(chunker(text, limit)).toEqual(expected);
    });
  }
}

const parentheticalCases: ChunkCase[] = [
  {
    name: "keeps parenthetical phrases together",
    text: "Heads up now (Though now I'm curious)ok",
    limit: 35,
    expected: ["Heads up now", "(Though now I'm curious)ok"],
  },
  {
    name: "handles nested parentheses",
    text: "Hello (outer (inner) end) world",
    limit: 26,
    expected: ["Hello (outer (inner) end)", "world"],
  },
  {
    name: "ignores unmatched closing parentheses",
    text: "Hello) world (ok)",
    limit: 12,
    expected: ["Hello)", "world (ok)"],
  },
];

describe("chunkText", () => {
  it("keeps multi-line text in one chunk when under limit", () => {
    const text = "Line one\n\nLine two\n\nLine three";
    const chunks = chunkText(text, 1600);
    expect(chunks).toEqual([text]);
  });

  it("splits only when text exceeds the limit", () => {
    const part = "a".repeat(20);
    const text = part.repeat(5); // 100 chars
    const chunks = chunkText(text, 60);
    expect(chunks.length).toBe(2);
    expect(chunks[0].length).toBe(60);
    expect(chunks[1].length).toBe(40);
    expect(chunks.join("")).toBe(text);
  });

  it("prefers breaking at a newline before the limit", () => {
    const text = `paragraph one line\n\nparagraph two starts here and continues`;
    const chunks = chunkText(text, 40);
    expect(chunks).toEqual([
      "paragraph one line",
      "paragraph two starts here and continues",
    ]);
  });

  it("otherwise breaks at the last whitespace under the limit", () => {
    const text =
      "This is a message that should break nicely near a word boundary.";
    const chunks = chunkText(text, 30);
    expect(chunks[0].length).toBeLessThanOrEqual(30);
    expect(chunks[1].length).toBeLessThanOrEqual(30);
    expect(chunks.join(" ").replace(/\s+/g, " ").trim()).toBe(
      text.replace(/\s+/g, " ").trim(),
    );
  });

  it("falls back to a hard break when no whitespace is present", () => {
    const text = "Supercalifragilisticexpialidocious"; // 34 chars
    const chunks = chunkText(text, 10);
    expect(chunks).toEqual(["Supercalif", "ragilistic", "expialidoc", "ious"]);
  });

  runChunkCases(chunkText, [parentheticalCases[0]]);
});

describe("resolveTextChunkLimit", () => {
  it("uses per-provider defaults", () => {
    expect(resolveTextChunkLimit(undefined, "whatsapp")).toBe(4000);
    expect(resolveTextChunkLimit(undefined, "telegram")).toBe(4000);
    expect(resolveTextChunkLimit(undefined, "slack")).toBe(4000);
    expect(resolveTextChunkLimit(undefined, "signal")).toBe(4000);
    expect(resolveTextChunkLimit(undefined, "imessage")).toBe(4000);
    expect(resolveTextChunkLimit(undefined, "discord")).toBe(4000);
    expect(
      resolveTextChunkLimit(undefined, "discord", undefined, {
        fallbackLimit: 2000,
      }),
    ).toBe(2000);
  });

  it("supports provider overrides", () => {
    const cfg = { telegram: { textChunkLimit: 1234 } };
    expect(resolveTextChunkLimit(cfg, "whatsapp")).toBe(4000);
    expect(resolveTextChunkLimit(cfg, "telegram")).toBe(1234);
  });

  it("prefers account overrides when provided", () => {
    const cfg = {
      telegram: {
        textChunkLimit: 2000,
        accounts: {
          default: { textChunkLimit: 1234 },
          primary: { textChunkLimit: 777 },
        },
      },
    };
    expect(resolveTextChunkLimit(cfg, "telegram", "primary")).toBe(777);
    expect(resolveTextChunkLimit(cfg, "telegram", "default")).toBe(1234);
  });

  it("uses the matching provider override", () => {
    const cfg = {
      discord: { textChunkLimit: 111 },
      slack: { textChunkLimit: 222 },
    };
    expect(resolveTextChunkLimit(cfg, "discord")).toBe(111);
    expect(resolveTextChunkLimit(cfg, "slack")).toBe(222);
    expect(resolveTextChunkLimit(cfg, "telegram")).toBe(4000);
  });
});

describe("chunkMarkdownText", () => {
  it("keeps fenced blocks intact when a safe break exists", () => {
    const prefix = "p".repeat(60);
    const fence = "```bash\nline1\nline2\n```";
    const suffix = "s".repeat(60);
    const text = `${prefix}\n\n${fence}\n\n${suffix}`;

    const chunks = chunkMarkdownText(text, 40);
    expect(chunks.some((chunk) => chunk.trimEnd() === fence)).toBe(true);
    expectFencesBalanced(chunks);
  });

  it("reopens fenced blocks when forced to split inside them", () => {
    const text = `\`\`\`txt\n${"a".repeat(500)}\n\`\`\``;
    const limit = 120;
    const chunks = chunkMarkdownText(text, limit);
    expect(chunks.length).toBeGreaterThan(1);
    for (const chunk of chunks) {
      expect(chunk.length).toBeLessThanOrEqual(limit);
      expect(chunk.startsWith("```txt\n")).toBe(true);
      expect(chunk.trimEnd().endsWith("```")).toBe(true);
    }
    expectFencesBalanced(chunks);
  });

  it("supports tilde fences", () => {
    const text = `~~~sh\n${"x".repeat(600)}\n~~~`;
    const limit = 140;
    const chunks = chunkMarkdownText(text, limit);
    expect(chunks.length).toBeGreaterThan(1);
    for (const chunk of chunks) {
      expect(chunk.length).toBeLessThanOrEqual(limit);
      expect(chunk.startsWith("~~~sh\n")).toBe(true);
      expect(chunk.trimEnd().endsWith("~~~")).toBe(true);
    }
    expectFencesBalanced(chunks);
  });

  it("supports longer fence markers for close", () => {
    const text = `\`\`\`\`md\n${"y".repeat(600)}\n\`\`\`\``;
    const limit = 140;
    const chunks = chunkMarkdownText(text, limit);
    expect(chunks.length).toBeGreaterThan(1);
    for (const chunk of chunks) {
      expect(chunk.length).toBeLessThanOrEqual(limit);
      expect(chunk.startsWith("````md\n")).toBe(true);
      expect(chunk.trimEnd().endsWith("````")).toBe(true);
    }
    expectFencesBalanced(chunks);
  });

  it("preserves indentation for indented fences", () => {
    const text = `  \`\`\`js\n  ${"z".repeat(600)}\n  \`\`\``;
    const limit = 160;
    const chunks = chunkMarkdownText(text, limit);
    expect(chunks.length).toBeGreaterThan(1);
    for (const chunk of chunks) {
      expect(chunk.length).toBeLessThanOrEqual(limit);
      expect(chunk.startsWith("  ```js\n")).toBe(true);
      expect(chunk.trimEnd().endsWith("  ```")).toBe(true);
    }
    expectFencesBalanced(chunks);
  });

  it("never produces an empty fenced chunk when splitting", () => {
    const text = `\`\`\`txt\n${"a".repeat(300)}\n\`\`\``;
    const chunks = chunkMarkdownText(text, 60);
    for (const chunk of chunks) {
      const nonFenceLines = chunk
        .split("\n")
        .filter((line) => !/^( {0,3})(`{3,}|~{3,})(.*)$/.test(line));
      expect(nonFenceLines.join("\n").trim()).not.toBe("");
    }
  });

  runChunkCases(chunkMarkdownText, parentheticalCases);

  it("hard-breaks when a parenthetical exceeds the limit", () => {
    const text = `(${"a".repeat(80)})`;
    const chunks = chunkMarkdownText(text, 20);
    expect(chunks[0]?.length).toBe(20);
    expect(chunks.join("")).toBe(text);
  });
});