fix(telegram): prevent message truncation when mentioning think tags
When users discuss <think> tags in prose (e.g., "Use the <think> tag to enable thinking"), the text was being truncated because the unclosed tag caused all trailing content to be dropped. This fix adds a heuristic to differentiate between: 1. Model thinking output (tag at start) - strip as before 2. Prose mentioning tags (content before tag) - preserve the text Fixes #3952 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
5f4715acfc
commit
08fe479313
147
src/shared/text/reasoning-tags.test.ts
Normal file
147
src/shared/text/reasoning-tags.test.ts
Normal file
@ -0,0 +1,147 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
|
||||
|
||||
describe("stripReasoningTagsFromText", () => {
|
||||
describe("basic functionality", () => {
|
||||
it("returns empty string as-is", () => {
|
||||
expect(stripReasoningTagsFromText("")).toBe("");
|
||||
});
|
||||
|
||||
it("returns text without think tags as-is", () => {
|
||||
expect(stripReasoningTagsFromText("Hello world")).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("strips complete <think>...</think> blocks", () => {
|
||||
const input = "Before <think>reasoning content</think> After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before After");
|
||||
});
|
||||
|
||||
it("strips complete <thinking>...</thinking> blocks", () => {
|
||||
const input = "Before <thinking>reasoning content</thinking> After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before After");
|
||||
});
|
||||
|
||||
it("strips multiple think blocks", () => {
|
||||
const input = "A <think>x</think> B <think>y</think> C";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("A B C");
|
||||
});
|
||||
|
||||
it("strips nested content (flat parse)", () => {
|
||||
const input = "Start <think>outer <think>inner</think> more</think> End";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Start more End");
|
||||
});
|
||||
});
|
||||
|
||||
describe("unclosed tag handling - issue #3952", () => {
|
||||
it("preserves content when <think> appears in prose (not at start)", () => {
|
||||
const input = "Use the <think> tag to enable thinking. Then do X.";
|
||||
// Content before <think> tag - treat as prose mentioning the tag
|
||||
expect(stripReasoningTagsFromText(input)).toBe(
|
||||
"Use the <think> tag to enable thinking. Then do X.",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves content when <thinking> appears in prose", () => {
|
||||
const input = "The <thinking> mode is useful. Try it!";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("The <thinking> mode is useful. Try it!");
|
||||
});
|
||||
|
||||
it("preserves code block containing <think> tag", () => {
|
||||
const input = "Example:\n```\n<think>\nprint('hello')\n```\nMore text";
|
||||
expect(stripReasoningTagsFromText(input)).toBe(
|
||||
"Example:\n```\n<think>\nprint('hello')\n```\nMore text",
|
||||
);
|
||||
});
|
||||
|
||||
it("strips paired tags even when discussing them", () => {
|
||||
const input = "You can use <think> and </think> to wrap reasoning.";
|
||||
// Tags are properly paired, so content between is stripped
|
||||
expect(stripReasoningTagsFromText(input)).toBe("You can use to wrap reasoning.");
|
||||
});
|
||||
|
||||
it("preserves multiple unclosed tags when in prose", () => {
|
||||
const input = "First <think> then <thinking> more text";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("First <think> then <thinking> more text");
|
||||
});
|
||||
|
||||
it("strips unclosed tag at start (model thinking output)", () => {
|
||||
// When <think> is at the very start, treat as model thinking that was cut off
|
||||
const input = "<think>Pensando sobre el problema...";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("");
|
||||
});
|
||||
|
||||
it("strips unclosed tag at start with whitespace", () => {
|
||||
const input = " <think>Some thinking content...";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("preserve mode", () => {
|
||||
it("strips thinking tags and preserves trailing content in preserve mode", () => {
|
||||
// In preserve mode with unclosed tags, it still strips but preserves trailing
|
||||
const input = "Before <think>content without close";
|
||||
const result = stripReasoningTagsFromText(input, { mode: "preserve" });
|
||||
// preserve mode strips the tag but keeps trailing content
|
||||
expect(result).toBe("Before content without close");
|
||||
});
|
||||
|
||||
it("strips properly paired tags in preserve mode", () => {
|
||||
const input = "Before <think>hidden</think> After";
|
||||
const result = stripReasoningTagsFromText(input, { mode: "preserve" });
|
||||
expect(result).toBe("Before After");
|
||||
});
|
||||
});
|
||||
|
||||
describe("trim options", () => {
|
||||
it("trims both ends by default", () => {
|
||||
const input = " <think>x</think> result ";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("result");
|
||||
});
|
||||
|
||||
it("respects trim: none", () => {
|
||||
const input = " <think>x</think> result ";
|
||||
expect(stripReasoningTagsFromText(input, { trim: "none" })).toBe(" result ");
|
||||
});
|
||||
|
||||
it("respects trim: start", () => {
|
||||
const input = " <think>x</think> result ";
|
||||
// trim: start only trims from the start, not end
|
||||
expect(stripReasoningTagsFromText(input, { trim: "start" })).toBe("result ");
|
||||
});
|
||||
});
|
||||
|
||||
describe("final tag handling", () => {
|
||||
it("strips <final> tags", () => {
|
||||
const input = "Before <final>answer</final> After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before answer After");
|
||||
});
|
||||
|
||||
it("strips only the tags, not content inside <final>", () => {
|
||||
const input = "<final>The answer is 42</final>";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("The answer is 42");
|
||||
});
|
||||
});
|
||||
|
||||
describe("edge cases", () => {
|
||||
it("handles whitespace in tags", () => {
|
||||
const input = "Before < think >content< / think > After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before After");
|
||||
});
|
||||
|
||||
it("handles case variations", () => {
|
||||
const input = "Before <THINK>content</THINK> After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before After");
|
||||
});
|
||||
|
||||
it("handles <thought> variant", () => {
|
||||
const input = "Before <thought>content</thought> After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before After");
|
||||
});
|
||||
|
||||
it("handles <antthinking> variant", () => {
|
||||
const input = "Before <antthinking>content</antthinking> After";
|
||||
expect(stripReasoningTagsFromText(input)).toBe("Before After");
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -32,12 +32,46 @@ export function stripReasoningTagsFromText(
|
||||
FINAL_TAG_RE.lastIndex = 0;
|
||||
}
|
||||
|
||||
// Collect all tag matches to check for proper pairing
|
||||
THINKING_TAG_RE.lastIndex = 0;
|
||||
const matches = [...cleaned.matchAll(THINKING_TAG_RE)];
|
||||
|
||||
// Check if tags are properly paired (every open tag has a matching close)
|
||||
let openCount = 0;
|
||||
let firstOpenIdx: number | null = null;
|
||||
for (const match of matches) {
|
||||
const isClose = match[1] === "/";
|
||||
if (isClose) {
|
||||
if (openCount > 0) {
|
||||
openCount--;
|
||||
}
|
||||
} else {
|
||||
if (firstOpenIdx === null) {
|
||||
firstOpenIdx = match.index ?? 0;
|
||||
}
|
||||
openCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// If we end with unclosed tags and there's content before the first tag,
|
||||
// treat as literal text (user prose mentioning tags). This prevents data loss
|
||||
// when users discuss <think> tags in prose or code.
|
||||
// But if the first open tag is at the start (after trimming), treat as model
|
||||
// thinking output that was interrupted and strip it.
|
||||
if (openCount > 0 && mode !== "preserve") {
|
||||
const textBeforeFirstTag = firstOpenIdx !== null ? cleaned.slice(0, firstOpenIdx).trim() : "";
|
||||
if (textBeforeFirstTag.length > 0) {
|
||||
// Has content before the tag - likely prose mentioning the tag
|
||||
return applyTrim(cleaned, trimMode);
|
||||
}
|
||||
// Tag at start - likely model thinking output, continue with stripping
|
||||
}
|
||||
|
||||
let result = "";
|
||||
let lastIndex = 0;
|
||||
let inThinking = false;
|
||||
|
||||
for (const match of cleaned.matchAll(THINKING_TAG_RE)) {
|
||||
for (const match of matches) {
|
||||
const idx = match.index ?? 0;
|
||||
const isClose = match[1] === "/";
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user