From c90cda794644e15dd2dfa8d1c03f079199b92c24 Mon Sep 17 00:00:00 2001 From: kiranjd Date: Thu, 29 Jan 2026 02:15:51 +0530 Subject: [PATCH] fix(sessions): truncate at incomplete tool calls instead of inserting synthetic results When session history contains an assistant message with tool calls but missing results (e.g., due to branching, interruption, or crash), the API rejects the request. Previously, we tried to repair this by inserting synthetic error results, but this approach was fragile and could lead to confusing conversation states. New approach: truncate history before the first incomplete tool call sequence. This is simpler and always produces valid history: - Agent continues working (just with less context) - No confusing synthetic error results - User may need to repeat their last request The truncation is logged with details about which tool call IDs were missing, helping track and diagnose the root cause. Co-Authored-By: Claude Opus 4.5 --- src/agents/pi-embedded-runner/google.ts | 21 +++- src/agents/session-transcript-repair.test.ts | 50 +++++++- src/agents/session-transcript-repair.ts | 124 +++++++++++++++---- 3 files changed, 166 insertions(+), 29 deletions(-) diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 7b26d0d04..8c96037f4 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -12,7 +12,7 @@ import { sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, } from "../pi-embedded-helpers.js"; -import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js"; +import { repairToolUseResultPairing } from "../session-transcript-repair.js"; import { log } from "./logger.js"; import { describeUnknownError } from "./utils.js"; import { cleanToolSchemaForGemini } from "../pi-tools.schema.js"; @@ -332,9 +332,22 @@ export async function sanitizeSessionHistory(params: { const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks ? sanitizeAntigravityThinkingBlocks(sanitizedImages) : sanitizedImages; - const repairedTools = policy.repairToolUseResultPairing - ? sanitizeToolUseResultPairing(sanitizedThinking) - : sanitizedThinking; + + let repairedTools = sanitizedThinking; + if (policy.repairToolUseResultPairing) { + const repairReport = repairToolUseResultPairing(sanitizedThinking); + repairedTools = repairReport.messages; + + // Log when session recovery truncation occurs - this helps track the issue + if (repairReport.truncation) { + log.warn( + `Session recovery: truncated ${repairReport.truncation.messagesDropped} messages ` + + `due to incomplete tool call sequence. ` + + `Missing tool results: [${repairReport.truncation.missingToolCallIds.join(", ")}]. ` + + `sessionId=${params.sessionId}`, + ); + } + } const isOpenAIResponsesApi = params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses"; diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index ccc63ec7f..59f9b41d9 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -1,9 +1,48 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it } from "vitest"; -import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; +import { + repairToolUseResultPairing, + sanitizeToolUseResultPairing, +} from "./session-transcript-repair.js"; describe("sanitizeToolUseResultPairing", () => { - it("moves tool results directly after tool calls and inserts missing results", () => { + it("truncates at assistant with incomplete tool calls (missing results)", () => { + // When an assistant has tool calls but some results are missing, + // we truncate BEFORE that assistant to produce valid history. + // This is safer than inserting synthetic error results. + const input = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "toolCall", id: "call_2", name: "exec", arguments: {} }, + ], + }, + { role: "user", content: "user message that should come after tool use" }, + { + role: "toolResult", + toolCallId: "call_2", + toolName: "exec", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const report = repairToolUseResultPairing(input); + + // Truncates before the assistant with incomplete tool calls + expect(report.messages).toHaveLength(1); + expect(report.messages[0]?.role).toBe("user"); + + // Report shows truncation details + expect(report.truncation).toBeDefined(); + expect(report.truncation?.truncatedAtIndex).toBe(1); + expect(report.truncation?.missingToolCallIds).toEqual(["call_1"]); + expect(report.truncation?.messagesDropped).toBe(3); + }); + + it("moves tool results directly after tool calls when all results exist", () => { const input = [ { role: "assistant", @@ -13,6 +52,13 @@ describe("sanitizeToolUseResultPairing", () => { ], }, { role: "user", content: "user message that should come after tool use" }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "file contents" }], + isError: false, + }, { role: "toolResult", toolCallId: "call_2", diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index d680beb4d..c98f74e3b 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -66,22 +66,107 @@ export type ToolUseRepairReport = { droppedDuplicateCount: number; droppedOrphanCount: number; moved: boolean; + /** If truncation occurred, details about what was dropped. */ + truncation?: { + /** Index in original messages where truncation happened. */ + truncatedAtIndex: number; + /** Tool call IDs that were missing results. */ + missingToolCallIds: string[]; + /** Number of messages dropped. */ + messagesDropped: number; + }; }; +/** + * Find all tool result IDs that exist anywhere in the message array. + */ +function indexAllToolResultIds(messages: AgentMessage[]): Set { + const ids = new Set(); + for (const msg of messages) { + if (!msg || typeof msg !== "object") continue; + if ((msg as { role?: unknown }).role === "toolResult") { + const id = extractToolResultId(msg as Extract); + if (id) ids.add(id); + } + } + return ids; +} + +/** + * Find the first assistant message with tool calls that has ANY missing results. + * Returns the index of that assistant message, or -1 if all are complete. + */ +function findFirstIncompleteToolCallIndex(messages: AgentMessage[]): { + index: number; + missingIds: string[]; +} | null { + const allResultIds = indexAllToolResultIds(messages); + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (!msg || typeof msg !== "object") continue; + if ((msg as { role?: unknown }).role !== "assistant") continue; + + const assistant = msg as Extract; + const toolCalls = extractToolCallsFromAssistant(assistant); + if (toolCalls.length === 0) continue; + + const missingIds: string[] = []; + for (const call of toolCalls) { + if (!allResultIds.has(call.id)) { + missingIds.push(call.id); + } + } + + if (missingIds.length > 0) { + return { index: i, missingIds }; + } + } + + return null; +} + export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport { - // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not - // immediately followed by matching tool results. Session files can end up with results - // displaced (e.g. after user turns) or duplicated. Repair by: - // - moving matching toolResult messages directly after their assistant toolCall turn - // - inserting synthetic error toolResults for missing ids - // - dropping duplicate toolResults for the same id (anywhere in the transcript) + // Anthropic (and similar APIs) reject transcripts where assistant tool calls are not + // followed by matching tool results. This can happen when: + // - Session branching separates tool calls from their results + // - Interruptions (crash, network, user edit) occur mid-tool-execution + // - Compaction/pruning breaks the pairing + // + // Strategy: TRUNCATE at the first incomplete tool call sequence. + // This is simpler and safer than trying to repair with synthetic results: + // - Always produces valid history (just shorter) + // - No confusing synthetic error results in conversation + // - Agent continues working, may just need to redo some work + // + // After truncation, we still: + // - Move displaced tool results to correct positions + // - Drop duplicate tool results + // - Drop orphaned tool results (results without matching calls) + + // First pass: check if truncation is needed + const incomplete = findFirstIncompleteToolCallIndex(messages); + let workingMessages = messages; + let truncation: ToolUseRepairReport["truncation"]; + + if (incomplete) { + // Truncate before the incomplete assistant message + workingMessages = messages.slice(0, incomplete.index); + truncation = { + truncatedAtIndex: incomplete.index, + missingToolCallIds: incomplete.missingIds, + messagesDropped: messages.length - incomplete.index, + }; + } + + // Second pass: repair ordering and duplicates in the (possibly truncated) messages const out: AgentMessage[] = []; const added: Array> = []; const seenToolResultIds = new Set(); let droppedDuplicateCount = 0; let droppedOrphanCount = 0; let moved = false; - let changed = false; + let changed = truncation !== undefined; const pushToolResult = (msg: Extract) => { const id = extractToolResultId(msg); @@ -94,8 +179,8 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep out.push(msg); }; - for (let i = 0; i < messages.length; i += 1) { - const msg = messages[i] as AgentMessage; + for (let i = 0; i < workingMessages.length; i += 1) { + const msg = workingMessages[i] as AgentMessage; if (!msg || typeof msg !== "object") { out.push(msg); continue; @@ -104,8 +189,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep const role = (msg as { role?: unknown }).role; if (role !== "assistant") { // Tool results must only appear directly after the matching assistant tool call turn. - // Any "free-floating" toolResult entries in session history can make strict providers - // (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request. + // Any "free-floating" toolResult entries can make strict providers reject the request. if (role !== "toolResult") { out.push(msg); } else { @@ -123,13 +207,12 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep } const toolCallIds = new Set(toolCalls.map((t) => t.id)); - const spanResultsById = new Map>(); const remainder: AgentMessage[] = []; let j = i + 1; - for (; j < messages.length; j += 1) { - const next = messages[j] as AgentMessage; + for (; j < workingMessages.length; j += 1) { + const next = workingMessages[j] as AgentMessage; if (!next || typeof next !== "object") { remainder.push(next); continue; @@ -170,19 +253,13 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep changed = true; } + // All tool calls should have results (we truncated incomplete ones above) for (const call of toolCalls) { const existing = spanResultsById.get(call.id); if (existing) { pushToolResult(existing); - } else { - const missing = makeMissingToolResult({ - toolCallId: call.id, - toolName: call.name, - }); - added.push(missing); - changed = true; - pushToolResult(missing); } + // No synthetic results - we truncated incomplete sequences } for (const rem of remainder) { @@ -197,10 +274,11 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep const changedOrMoved = changed || moved; return { - messages: changedOrMoved ? out : messages, + messages: changedOrMoved ? out : workingMessages, added, droppedDuplicateCount, droppedOrphanCount, moved: changedOrMoved, + truncation, }; }