From ccf00e10cb886138408320585c3cef3788191869 Mon Sep 17 00:00:00 2001 From: Nathan Hangen Date: Wed, 28 Jan 2026 19:18:14 -0500 Subject: [PATCH 1/3] fix(session): refine sanitization logic and add tests Addresses review feedback: parse valid JSON strings, add type guards, add logging, handle arguments alias. --- src/agents/session-transcript-repair.test.ts | 77 ++++++++++++++++++++ src/agents/session-transcript-repair.ts | 72 ++++++++++++++++++ 2 files changed, 149 insertions(+) diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index ccc63ec7f..96d42ae1a 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -110,3 +110,80 @@ describe("sanitizeToolUseResultPairing", () => { expect(out.map((m) => m.role)).toEqual(["user", "assistant"]); }); }); + +import { sanitizeToolUseArgs } from "./session-transcript-repair.js"; + +describe("sanitizeToolUseArgs", () => { + it("preserves valid objects in input/arguments", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolUse", id: "1", name: "tool", input: { key: "value" } }, + ], + }, + ] as any; + const out = sanitizeToolUseArgs(input); + expect((out[0].content[0] as any).input).toEqual({ key: "value" }); + expect(out).toBe(input); // No change, referentially equal + }); + + it("parses valid JSON strings in input", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolUse", id: "1", name: "tool", input: '{"key": "value"}' }, + ], + }, + ] as any; + const out = sanitizeToolUseArgs(input); + expect((out[0].content[0] as any).input).toEqual({ key: "value" }); + expect(out).not.toBe(input); // Changed + }); + + it("sanitizes invalid JSON strings in input", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolUse", id: "1", name: "tool", input: '{ bad json }' }, + ], + }, + ] as any; + const out = sanitizeToolUseArgs(input); + const block = out[0].content[0] as any; + expect(block.input).toEqual({}); + expect(block._sanitized).toBe(true); + expect(block._originalInput).toBe('{ bad json }'); + }); + + it("handles 'arguments' alias", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "1", name: "tool", arguments: '{"key": "val"}' }, + ], + }, + ] as any; + const out = sanitizeToolUseArgs(input); + const block = out[0].content[0] as any; + expect(block.arguments).toEqual({ key: "val" }); + }); + + it("sanitizes invalid JSON in 'arguments' alias", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "1", name: "tool", arguments: 'bad' }, + ], + }, + ] as any; + const out = sanitizeToolUseArgs(input); + const block = out[0].content[0] as any; + expect(block.arguments).toEqual({}); + expect(block._sanitized).toBe(true); + }); +}); diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index d680beb4d..e5e4786ff 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -56,6 +56,78 @@ function makeMissingToolResult(params: { export { makeMissingToolResult }; +export function sanitizeToolUseArgs(messages: AgentMessage[]): AgentMessage[] { + // Creates new message objects only when sanitization is needed; otherwise + // returns the original messages to avoid unnecessary copying, while guarding + // against corrupt JSON in tool arguments that could break the session. + const out: AgentMessage[] = []; + let changed = false; + + for (const msg of messages) { + if (msg.role !== "assistant" || !Array.isArray(msg.content)) { + out.push(msg); + continue; + } + + const content = msg.content; + const nextContent: any[] = []; + let msgChanged = false; + + for (const block of content) { + const anyBlock = block as any; + if ( + anyBlock && + typeof anyBlock === "object" && + (anyBlock.type === "toolUse" || anyBlock.type === "toolCall" || anyBlock.type === "functionCall") + ) { + const toolBlock = block as any; + // Handle both 'input' and 'arguments' fields (some providers use arguments) + const inputField = "input" in toolBlock ? "input" : "arguments" in toolBlock ? "arguments" : null; + + if (inputField && typeof toolBlock[inputField] === "string") { + try { + // Consistency: Always parse valid JSON strings into objects + const parsed = JSON.parse(toolBlock[inputField]); + nextContent.push({ + ...toolBlock, + [inputField]: parsed, + }); + msgChanged = true; + } catch { + // Invalid JSON found in tool args. + // Replace with empty object to prevent downstream crashes. + console.warn( + `[SessionRepair] Sanitized malformed JSON in tool use '${toolBlock.name || "unknown"}'. Original: ${toolBlock[inputField]}` + ); + nextContent.push({ + ...toolBlock, + [inputField]: {}, + _sanitized: true, + _originalInput: toolBlock[inputField], + }); + msgChanged = true; + } + } else { + nextContent.push(block); + } + } else { + nextContent.push(block); + } + } + + if (msgChanged) { + out.push({ + ...msg, + content: nextContent, + } as AgentMessage); + changed = true; + } else { + out.push(msg); + } + } + + return changed ? out : messages; +} export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMessage[] { return repairToolUseResultPairing(messages).messages; } From dc2c40468d6c0eca487941c05dd12588b4ecdee1 Mon Sep 17 00:00:00 2001 From: Nathan Hangen Date: Thu, 29 Jan 2026 10:09:47 -0500 Subject: [PATCH 2/3] style: fix formatting in session transcript repair --- src/agents/session-transcript-repair.test.ts | 22 ++++++-------------- src/agents/session-transcript-repair.ts | 9 +++++--- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index 96d42ae1a..5eaff31a1 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -118,9 +118,7 @@ describe("sanitizeToolUseArgs", () => { const input = [ { role: "assistant", - content: [ - { type: "toolUse", id: "1", name: "tool", input: { key: "value" } }, - ], + content: [{ type: "toolUse", id: "1", name: "tool", input: { key: "value" } }], }, ] as any; const out = sanitizeToolUseArgs(input); @@ -132,9 +130,7 @@ describe("sanitizeToolUseArgs", () => { const input = [ { role: "assistant", - content: [ - { type: "toolUse", id: "1", name: "tool", input: '{"key": "value"}' }, - ], + content: [{ type: "toolUse", id: "1", name: "tool", input: '{"key": "value"}' }], }, ] as any; const out = sanitizeToolUseArgs(input); @@ -146,25 +142,21 @@ describe("sanitizeToolUseArgs", () => { const input = [ { role: "assistant", - content: [ - { type: "toolUse", id: "1", name: "tool", input: '{ bad json }' }, - ], + content: [{ type: "toolUse", id: "1", name: "tool", input: "{ bad json }" }], }, ] as any; const out = sanitizeToolUseArgs(input); const block = out[0].content[0] as any; expect(block.input).toEqual({}); expect(block._sanitized).toBe(true); - expect(block._originalInput).toBe('{ bad json }'); + expect(block._originalInput).toBe("{ bad json }"); }); it("handles 'arguments' alias", () => { const input = [ { role: "assistant", - content: [ - { type: "toolCall", id: "1", name: "tool", arguments: '{"key": "val"}' }, - ], + content: [{ type: "toolCall", id: "1", name: "tool", arguments: '{"key": "val"}' }], }, ] as any; const out = sanitizeToolUseArgs(input); @@ -176,9 +168,7 @@ describe("sanitizeToolUseArgs", () => { const input = [ { role: "assistant", - content: [ - { type: "toolCall", id: "1", name: "tool", arguments: 'bad' }, - ], + content: [{ type: "toolCall", id: "1", name: "tool", arguments: "bad" }], }, ] as any; const out = sanitizeToolUseArgs(input); diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index e5e4786ff..61bcb8f8d 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -78,11 +78,14 @@ export function sanitizeToolUseArgs(messages: AgentMessage[]): AgentMessage[] { if ( anyBlock && typeof anyBlock === "object" && - (anyBlock.type === "toolUse" || anyBlock.type === "toolCall" || anyBlock.type === "functionCall") + (anyBlock.type === "toolUse" || + anyBlock.type === "toolCall" || + anyBlock.type === "functionCall") ) { const toolBlock = block as any; // Handle both 'input' and 'arguments' fields (some providers use arguments) - const inputField = "input" in toolBlock ? "input" : "arguments" in toolBlock ? "arguments" : null; + const inputField = + "input" in toolBlock ? "input" : "arguments" in toolBlock ? "arguments" : null; if (inputField && typeof toolBlock[inputField] === "string") { try { @@ -97,7 +100,7 @@ export function sanitizeToolUseArgs(messages: AgentMessage[]): AgentMessage[] { // Invalid JSON found in tool args. // Replace with empty object to prevent downstream crashes. console.warn( - `[SessionRepair] Sanitized malformed JSON in tool use '${toolBlock.name || "unknown"}'. Original: ${toolBlock[inputField]}` + `[SessionRepair] Sanitized malformed JSON in tool use '${toolBlock.name || "unknown"}'. Original: ${toolBlock[inputField]}`, ); nextContent.push({ ...toolBlock, From f041a95006ee65368a22dc3940cdf106cff63b37 Mon Sep 17 00:00:00 2001 From: Nathan Hangen Date: Thu, 29 Jan 2026 11:12:11 -0500 Subject: [PATCH 3/3] refactor(session): improve sanitization observability and add type-safe tests --- src/agents/session-transcript-repair.test.ts | 173 +++++++++++++------ src/agents/session-transcript-repair.ts | 24 ++- 2 files changed, 140 insertions(+), 57 deletions(-) diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index 5eaff31a1..0b79b2c3d 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -1,26 +1,33 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it } from "vitest"; -import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; +import { sanitizeToolUseArgs, sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; + +const now = Date.now(); describe("sanitizeToolUseResultPairing", () => { it("moves tool results directly after tool calls and inserts missing results", () => { - const input = [ + const input: AgentMessage[] = [ { role: "assistant", content: [ { type: "toolCall", id: "call_1", name: "read", arguments: {} }, { type: "toolCall", id: "call_2", name: "exec", arguments: {} }, ], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - { role: "user", content: "user message that should come after tool use" }, + { role: "user", content: "user message that should come after tool use", timestamp: now }, { role: "toolResult", toolCallId: "call_2", toolName: "exec", content: [{ type: "text", text: "ok" }], isError: false, + timestamp: now, }, - ] satisfies AgentMessage[]; + ]; const out = sanitizeToolUseResultPairing(input); expect(out[0]?.role).toBe("assistant"); @@ -32,10 +39,14 @@ describe("sanitizeToolUseResultPairing", () => { }); it("drops duplicate tool results for the same id within a span", () => { - const input = [ + const input: AgentMessage[] = [ { role: "assistant", content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, { role: "toolResult", @@ -43,6 +54,7 @@ describe("sanitizeToolUseResultPairing", () => { toolName: "read", content: [{ type: "text", text: "first" }], isError: false, + timestamp: now, }, { role: "toolResult", @@ -50,19 +62,24 @@ describe("sanitizeToolUseResultPairing", () => { toolName: "read", content: [{ type: "text", text: "second" }], isError: false, + timestamp: now, }, - { role: "user", content: "ok" }, - ] satisfies AgentMessage[]; + { role: "user", content: "ok", timestamp: now }, + ]; const out = sanitizeToolUseResultPairing(input); expect(out.filter((m) => m.role === "toolResult")).toHaveLength(1); }); it("drops duplicate tool results for the same id across the transcript", () => { - const input = [ + const input: AgentMessage[] = [ { role: "assistant", content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, { role: "toolResult", @@ -70,16 +87,25 @@ describe("sanitizeToolUseResultPairing", () => { toolName: "read", content: [{ type: "text", text: "first" }], isError: false, + timestamp: now, + }, + { + role: "assistant", + content: [{ type: "text", text: "ok" }], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - { role: "assistant", content: [{ type: "text", text: "ok" }] }, { role: "toolResult", toolCallId: "call_1", toolName: "read", content: [{ type: "text", text: "second (duplicate)" }], isError: false, + timestamp: now, }, - ] satisfies AgentMessage[]; + ]; const out = sanitizeToolUseResultPairing(input); const results = out.filter((m) => m.role === "toolResult") as Array<{ @@ -90,20 +116,25 @@ describe("sanitizeToolUseResultPairing", () => { }); it("drops orphan tool results that do not match any tool call", () => { - const input = [ - { role: "user", content: "hello" }, + const input: AgentMessage[] = [ + { role: "user", content: "hello", timestamp: now }, { role: "toolResult", toolCallId: "call_orphan", toolName: "read", content: [{ type: "text", text: "orphan" }], isError: false, + timestamp: now, }, { role: "assistant", content: [{ type: "text", text: "ok" }], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - ] satisfies AgentMessage[]; + ]; const out = sanitizeToolUseResultPairing(input); expect(out.some((m) => m.role === "toolResult")).toBe(false); @@ -111,69 +142,105 @@ describe("sanitizeToolUseResultPairing", () => { }); }); -import { sanitizeToolUseArgs } from "./session-transcript-repair.js"; - describe("sanitizeToolUseArgs", () => { - it("preserves valid objects in input/arguments", () => { - const input = [ + it("preserves valid JSON strings in input fields", () => { + const input: AgentMessage[] = [ { role: "assistant", - content: [{ type: "toolUse", id: "1", name: "tool", input: { key: "value" } }], + content: [ + { type: "toolCall", id: "call_1", name: "read", input: '{"path":"foo.txt"}' } as any, + ], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - ] as any; - const out = sanitizeToolUseArgs(input); - expect((out[0].content[0] as any).input).toEqual({ key: "value" }); - expect(out).toBe(input); // No change, referentially equal + ]; + + const result = sanitizeToolUseArgs(input); + expect(result.changed).toBe(true); + const tool = (result.messages[0] as any).content[0]; + expect(tool.input).toEqual({ path: "foo.txt" }); + expect(result.sanitizedCount).toBe(0); }); - it("parses valid JSON strings in input", () => { - const input = [ + it("replaces invalid JSON strings with {} and sets metadata", () => { + const input: AgentMessage[] = [ { role: "assistant", - content: [{ type: "toolUse", id: "1", name: "tool", input: '{"key": "value"}' }], + content: [{ type: "toolCall", id: "call_1", name: "read", input: "{ invalid json" } as any], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - ] as any; - const out = sanitizeToolUseArgs(input); - expect((out[0].content[0] as any).input).toEqual({ key: "value" }); - expect(out).not.toBe(input); // Changed + ]; + + const result = sanitizeToolUseArgs(input); + expect(result.changed).toBe(true); + expect(result.sanitizedCount).toBe(1); + const tool = (result.messages[0] as any).content[0]; + expect(tool.input).toEqual({}); + expect(tool._sanitized).toBe(true); + expect(tool._originalInput).toBe("{ invalid json"); }); - it("sanitizes invalid JSON strings in input", () => { - const input = [ + it("preserves already-parsed object values in input fields", () => { + const input: AgentMessage[] = [ { role: "assistant", - content: [{ type: "toolUse", id: "1", name: "tool", input: "{ bad json }" }], + content: [ + { type: "toolCall", id: "call_1", name: "read", input: { path: "bar.txt" } } as any, + ], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - ] as any; - const out = sanitizeToolUseArgs(input); - const block = out[0].content[0] as any; - expect(block.input).toEqual({}); - expect(block._sanitized).toBe(true); - expect(block._originalInput).toBe("{ bad json }"); + ]; + + const result = sanitizeToolUseArgs(input); + expect(result.changed).toBe(false); + expect(result.messages).toBe(input); + const tool = (result.messages[0] as any).content[0]; + expect(tool.input).toEqual({ path: "bar.txt" }); }); - it("handles 'arguments' alias", () => { - const input = [ + it("handles the 'arguments' alias used by some providers", () => { + const input: AgentMessage[] = [ { role: "assistant", - content: [{ type: "toolCall", id: "1", name: "tool", arguments: '{"key": "val"}' }], + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: '{"path":"baz.txt"}' } as any, + ], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - ] as any; - const out = sanitizeToolUseArgs(input); - const block = out[0].content[0] as any; - expect(block.arguments).toEqual({ key: "val" }); + ]; + + const result = sanitizeToolUseArgs(input); + expect(result.changed).toBe(true); + const tool = (result.messages[0] as any).content[0]; + expect(tool.arguments).toEqual({ path: "baz.txt" }); }); - it("sanitizes invalid JSON in 'arguments' alias", () => { - const input = [ + it("leaves messages without tool blocks unchanged", () => { + const input: AgentMessage[] = [ + { role: "user", content: "hello", timestamp: now }, { role: "assistant", - content: [{ type: "toolCall", id: "1", name: "tool", arguments: "bad" }], + content: [{ type: "text", text: "hi" }], + timestamp: now, + api: "openai", + provider: "openai", + model: "gpt-4", }, - ] as any; - const out = sanitizeToolUseArgs(input); - const block = out[0].content[0] as any; - expect(block.arguments).toEqual({}); - expect(block._sanitized).toBe(true); + ]; + + const result = sanitizeToolUseArgs(input); + expect(result.changed).toBe(false); + expect(result.messages).toBe(input); }); }); diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index 61bcb8f8d..76c08f37f 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -56,12 +56,19 @@ function makeMissingToolResult(params: { export { makeMissingToolResult }; -export function sanitizeToolUseArgs(messages: AgentMessage[]): AgentMessage[] { +export type ToolUseSanitizationReport = { + messages: AgentMessage[]; + sanitizedCount: number; + changed: boolean; +}; + +export function sanitizeToolUseArgs(messages: AgentMessage[]): ToolUseSanitizationReport { // Creates new message objects only when sanitization is needed; otherwise // returns the original messages to avoid unnecessary copying, while guarding // against corrupt JSON in tool arguments that could break the session. const out: AgentMessage[] = []; let changed = false; + let sanitizedCount = 0; for (const msg of messages) { if (msg.role !== "assistant" || !Array.isArray(msg.content)) { @@ -99,8 +106,11 @@ export function sanitizeToolUseArgs(messages: AgentMessage[]): AgentMessage[] { } catch { // Invalid JSON found in tool args. // Replace with empty object to prevent downstream crashes. + sanitizedCount += 1; + const original = String(toolBlock[inputField]); + const sample = original.length > 100 ? `${original.slice(0, 100)}...` : original; console.warn( - `[SessionRepair] Sanitized malformed JSON in tool use '${toolBlock.name || "unknown"}'. Original: ${toolBlock[inputField]}`, + `[SessionRepair] Sanitized malformed JSON in tool use '${toolBlock.name || "unknown"}'. Original: ${sample}`, ); nextContent.push({ ...toolBlock, @@ -129,10 +139,16 @@ export function sanitizeToolUseArgs(messages: AgentMessage[]): AgentMessage[] { } } - return changed ? out : messages; + return { + messages: changed ? out : messages, + sanitizedCount, + changed, + }; } + export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMessage[] { - return repairToolUseResultPairing(messages).messages; + const sanitized = sanitizeToolUseArgs(messages); + return repairToolUseResultPairing(sanitized.messages).messages; } export type ToolUseRepairReport = {