diff --git a/src/shared/text/reasoning-tags.test.ts b/src/shared/text/reasoning-tags.test.ts new file mode 100644 index 000000000..932f38449 --- /dev/null +++ b/src/shared/text/reasoning-tags.test.ts @@ -0,0 +1,147 @@ +import { describe, expect, it } from "vitest"; + +import { stripReasoningTagsFromText } from "./reasoning-tags.js"; + +describe("stripReasoningTagsFromText", () => { + describe("basic functionality", () => { + it("returns empty string as-is", () => { + expect(stripReasoningTagsFromText("")).toBe(""); + }); + + it("returns text without think tags as-is", () => { + expect(stripReasoningTagsFromText("Hello world")).toBe("Hello world"); + }); + + it("strips complete ... blocks", () => { + const input = "Before reasoning content After"; + expect(stripReasoningTagsFromText(input)).toBe("Before After"); + }); + + it("strips complete ... blocks", () => { + const input = "Before reasoning content After"; + expect(stripReasoningTagsFromText(input)).toBe("Before After"); + }); + + it("strips multiple think blocks", () => { + const input = "A x B y C"; + expect(stripReasoningTagsFromText(input)).toBe("A B C"); + }); + + it("strips nested content (flat parse)", () => { + const input = "Start outer inner more End"; + expect(stripReasoningTagsFromText(input)).toBe("Start more End"); + }); + }); + + describe("unclosed tag handling - issue #3952", () => { + it("preserves content when appears in prose (not at start)", () => { + const input = "Use the tag to enable thinking. Then do X."; + // Content before tag - treat as prose mentioning the tag + expect(stripReasoningTagsFromText(input)).toBe( + "Use the tag to enable thinking. Then do X.", + ); + }); + + it("preserves content when appears in prose", () => { + const input = "The mode is useful. Try it!"; + expect(stripReasoningTagsFromText(input)).toBe("The mode is useful. Try it!"); + }); + + it("preserves code block containing tag", () => { + const input = "Example:\n```\n\nprint('hello')\n```\nMore text"; + expect(stripReasoningTagsFromText(input)).toBe( + "Example:\n```\n\nprint('hello')\n```\nMore text", + ); + }); + + it("strips paired tags even when discussing them", () => { + const input = "You can use and to wrap reasoning."; + // Tags are properly paired, so content between is stripped + expect(stripReasoningTagsFromText(input)).toBe("You can use to wrap reasoning."); + }); + + it("preserves multiple unclosed tags when in prose", () => { + const input = "First then more text"; + expect(stripReasoningTagsFromText(input)).toBe("First then more text"); + }); + + it("strips unclosed tag at start (model thinking output)", () => { + // When is at the very start, treat as model thinking that was cut off + const input = "Pensando sobre el problema..."; + expect(stripReasoningTagsFromText(input)).toBe(""); + }); + + it("strips unclosed tag at start with whitespace", () => { + const input = " Some thinking content..."; + expect(stripReasoningTagsFromText(input)).toBe(""); + }); + }); + + describe("preserve mode", () => { + it("strips thinking tags and preserves trailing content in preserve mode", () => { + // In preserve mode with unclosed tags, it still strips but preserves trailing + const input = "Before content without close"; + const result = stripReasoningTagsFromText(input, { mode: "preserve" }); + // preserve mode strips the tag but keeps trailing content + expect(result).toBe("Before content without close"); + }); + + it("strips properly paired tags in preserve mode", () => { + const input = "Before hidden After"; + const result = stripReasoningTagsFromText(input, { mode: "preserve" }); + expect(result).toBe("Before After"); + }); + }); + + describe("trim options", () => { + it("trims both ends by default", () => { + const input = " x result "; + expect(stripReasoningTagsFromText(input)).toBe("result"); + }); + + it("respects trim: none", () => { + const input = " x result "; + expect(stripReasoningTagsFromText(input, { trim: "none" })).toBe(" result "); + }); + + it("respects trim: start", () => { + const input = " x result "; + // trim: start only trims from the start, not end + expect(stripReasoningTagsFromText(input, { trim: "start" })).toBe("result "); + }); + }); + + describe("final tag handling", () => { + it("strips tags", () => { + const input = "Before answer After"; + expect(stripReasoningTagsFromText(input)).toBe("Before answer After"); + }); + + it("strips only the tags, not content inside ", () => { + const input = "The answer is 42"; + expect(stripReasoningTagsFromText(input)).toBe("The answer is 42"); + }); + }); + + describe("edge cases", () => { + it("handles whitespace in tags", () => { + const input = "Before < think >content< / think > After"; + expect(stripReasoningTagsFromText(input)).toBe("Before After"); + }); + + it("handles case variations", () => { + const input = "Before content After"; + expect(stripReasoningTagsFromText(input)).toBe("Before After"); + }); + + it("handles variant", () => { + const input = "Before content After"; + expect(stripReasoningTagsFromText(input)).toBe("Before After"); + }); + + it("handles variant", () => { + const input = "Before content After"; + expect(stripReasoningTagsFromText(input)).toBe("Before After"); + }); + }); +}); diff --git a/src/shared/text/reasoning-tags.ts b/src/shared/text/reasoning-tags.ts index 822138e55..ab37f2897 100644 --- a/src/shared/text/reasoning-tags.ts +++ b/src/shared/text/reasoning-tags.ts @@ -32,12 +32,46 @@ export function stripReasoningTagsFromText( FINAL_TAG_RE.lastIndex = 0; } + // Collect all tag matches to check for proper pairing THINKING_TAG_RE.lastIndex = 0; + const matches = [...cleaned.matchAll(THINKING_TAG_RE)]; + + // Check if tags are properly paired (every open tag has a matching close) + let openCount = 0; + let firstOpenIdx: number | null = null; + for (const match of matches) { + const isClose = match[1] === "/"; + if (isClose) { + if (openCount > 0) { + openCount--; + } + } else { + if (firstOpenIdx === null) { + firstOpenIdx = match.index ?? 0; + } + openCount++; + } + } + + // If we end with unclosed tags and there's content before the first tag, + // treat as literal text (user prose mentioning tags). This prevents data loss + // when users discuss tags in prose or code. + // But if the first open tag is at the start (after trimming), treat as model + // thinking output that was interrupted and strip it. + if (openCount > 0 && mode !== "preserve") { + const textBeforeFirstTag = firstOpenIdx !== null ? cleaned.slice(0, firstOpenIdx).trim() : ""; + if (textBeforeFirstTag.length > 0) { + // Has content before the tag - likely prose mentioning the tag + return applyTrim(cleaned, trimMode); + } + // Tag at start - likely model thinking output, continue with stripping + } + let result = ""; let lastIndex = 0; let inThinking = false; - for (const match of cleaned.matchAll(THINKING_TAG_RE)) { + for (const match of matches) { const idx = match.index ?? 0; const isClose = match[1] === "/";