fix(telegram): prevent message truncation when mentioning think tags

When users discuss <think> tags in prose (e.g., "Use the <think> tag to
enable thinking"), the text was being truncated because the unclosed tag
caused all trailing content to be dropped.

This fix adds a heuristic to differentiate between:
1. Model thinking output (tag at start) - strip as before
2. Prose mentioning tags (content before tag) - preserve the text

Fixes #3952

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
zerone0x 2026-01-29 22:52:07 +08:00
parent 5f4715acfc
commit 08fe479313
2 changed files with 182 additions and 1 deletions

View File

@ -0,0 +1,147 @@
import { describe, expect, it } from "vitest";
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
describe("stripReasoningTagsFromText", () => {
describe("basic functionality", () => {
it("returns empty string as-is", () => {
expect(stripReasoningTagsFromText("")).toBe("");
});
it("returns text without think tags as-is", () => {
expect(stripReasoningTagsFromText("Hello world")).toBe("Hello world");
});
it("strips complete <think>...</think> blocks", () => {
const input = "Before <think>reasoning content</think> After";
expect(stripReasoningTagsFromText(input)).toBe("Before After");
});
it("strips complete <thinking>...</thinking> blocks", () => {
const input = "Before <thinking>reasoning content</thinking> After";
expect(stripReasoningTagsFromText(input)).toBe("Before After");
});
it("strips multiple think blocks", () => {
const input = "A <think>x</think> B <think>y</think> C";
expect(stripReasoningTagsFromText(input)).toBe("A B C");
});
it("strips nested content (flat parse)", () => {
const input = "Start <think>outer <think>inner</think> more</think> End";
expect(stripReasoningTagsFromText(input)).toBe("Start more End");
});
});
describe("unclosed tag handling - issue #3952", () => {
it("preserves content when <think> appears in prose (not at start)", () => {
const input = "Use the <think> tag to enable thinking. Then do X.";
// Content before <think> tag - treat as prose mentioning the tag
expect(stripReasoningTagsFromText(input)).toBe(
"Use the <think> tag to enable thinking. Then do X.",
);
});
it("preserves content when <thinking> appears in prose", () => {
const input = "The <thinking> mode is useful. Try it!";
expect(stripReasoningTagsFromText(input)).toBe("The <thinking> mode is useful. Try it!");
});
it("preserves code block containing <think> tag", () => {
const input = "Example:\n```\n<think>\nprint('hello')\n```\nMore text";
expect(stripReasoningTagsFromText(input)).toBe(
"Example:\n```\n<think>\nprint('hello')\n```\nMore text",
);
});
it("strips paired tags even when discussing them", () => {
const input = "You can use <think> and </think> to wrap reasoning.";
// Tags are properly paired, so content between is stripped
expect(stripReasoningTagsFromText(input)).toBe("You can use to wrap reasoning.");
});
it("preserves multiple unclosed tags when in prose", () => {
const input = "First <think> then <thinking> more text";
expect(stripReasoningTagsFromText(input)).toBe("First <think> then <thinking> more text");
});
it("strips unclosed tag at start (model thinking output)", () => {
// When <think> is at the very start, treat as model thinking that was cut off
const input = "<think>Pensando sobre el problema...";
expect(stripReasoningTagsFromText(input)).toBe("");
});
it("strips unclosed tag at start with whitespace", () => {
const input = " <think>Some thinking content...";
expect(stripReasoningTagsFromText(input)).toBe("");
});
});
describe("preserve mode", () => {
it("strips thinking tags and preserves trailing content in preserve mode", () => {
// In preserve mode with unclosed tags, it still strips but preserves trailing
const input = "Before <think>content without close";
const result = stripReasoningTagsFromText(input, { mode: "preserve" });
// preserve mode strips the tag but keeps trailing content
expect(result).toBe("Before content without close");
});
it("strips properly paired tags in preserve mode", () => {
const input = "Before <think>hidden</think> After";
const result = stripReasoningTagsFromText(input, { mode: "preserve" });
expect(result).toBe("Before After");
});
});
describe("trim options", () => {
it("trims both ends by default", () => {
const input = " <think>x</think> result ";
expect(stripReasoningTagsFromText(input)).toBe("result");
});
it("respects trim: none", () => {
const input = " <think>x</think> result ";
expect(stripReasoningTagsFromText(input, { trim: "none" })).toBe(" result ");
});
it("respects trim: start", () => {
const input = " <think>x</think> result ";
// trim: start only trims from the start, not end
expect(stripReasoningTagsFromText(input, { trim: "start" })).toBe("result ");
});
});
describe("final tag handling", () => {
it("strips <final> tags", () => {
const input = "Before <final>answer</final> After";
expect(stripReasoningTagsFromText(input)).toBe("Before answer After");
});
it("strips only the tags, not content inside <final>", () => {
const input = "<final>The answer is 42</final>";
expect(stripReasoningTagsFromText(input)).toBe("The answer is 42");
});
});
describe("edge cases", () => {
it("handles whitespace in tags", () => {
const input = "Before < think >content< / think > After";
expect(stripReasoningTagsFromText(input)).toBe("Before After");
});
it("handles case variations", () => {
const input = "Before <THINK>content</THINK> After";
expect(stripReasoningTagsFromText(input)).toBe("Before After");
});
it("handles <thought> variant", () => {
const input = "Before <thought>content</thought> After";
expect(stripReasoningTagsFromText(input)).toBe("Before After");
});
it("handles <antthinking> variant", () => {
const input = "Before <antthinking>content</antthinking> After";
expect(stripReasoningTagsFromText(input)).toBe("Before After");
});
});
});

View File

@ -32,12 +32,46 @@ export function stripReasoningTagsFromText(
FINAL_TAG_RE.lastIndex = 0;
}
// Collect all tag matches to check for proper pairing
THINKING_TAG_RE.lastIndex = 0;
const matches = [...cleaned.matchAll(THINKING_TAG_RE)];
// Check if tags are properly paired (every open tag has a matching close)
let openCount = 0;
let firstOpenIdx: number | null = null;
for (const match of matches) {
const isClose = match[1] === "/";
if (isClose) {
if (openCount > 0) {
openCount--;
}
} else {
if (firstOpenIdx === null) {
firstOpenIdx = match.index ?? 0;
}
openCount++;
}
}
// If we end with unclosed tags and there's content before the first tag,
// treat as literal text (user prose mentioning tags). This prevents data loss
// when users discuss <think> tags in prose or code.
// But if the first open tag is at the start (after trimming), treat as model
// thinking output that was interrupted and strip it.
if (openCount > 0 && mode !== "preserve") {
const textBeforeFirstTag = firstOpenIdx !== null ? cleaned.slice(0, firstOpenIdx).trim() : "";
if (textBeforeFirstTag.length > 0) {
// Has content before the tag - likely prose mentioning the tag
return applyTrim(cleaned, trimMode);
}
// Tag at start - likely model thinking output, continue with stripping
}
let result = "";
let lastIndex = 0;
let inThinking = false;
for (const match of cleaned.matchAll(THINKING_TAG_RE)) {
for (const match of matches) {
const idx = match.index ?? 0;
const isClose = match[1] === "/";