diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 7b26d0d04..8c96037f4 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -12,7 +12,7 @@ import { sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, } from "../pi-embedded-helpers.js"; -import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js"; +import { repairToolUseResultPairing } from "../session-transcript-repair.js"; import { log } from "./logger.js"; import { describeUnknownError } from "./utils.js"; import { cleanToolSchemaForGemini } from "../pi-tools.schema.js"; @@ -332,9 +332,22 @@ export async function sanitizeSessionHistory(params: { const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks ? sanitizeAntigravityThinkingBlocks(sanitizedImages) : sanitizedImages; - const repairedTools = policy.repairToolUseResultPairing - ? sanitizeToolUseResultPairing(sanitizedThinking) - : sanitizedThinking; + + let repairedTools = sanitizedThinking; + if (policy.repairToolUseResultPairing) { + const repairReport = repairToolUseResultPairing(sanitizedThinking); + repairedTools = repairReport.messages; + + // Log when session recovery truncation occurs - this helps track the issue + if (repairReport.truncation) { + log.warn( + `Session recovery: truncated ${repairReport.truncation.messagesDropped} messages ` + + `due to incomplete tool call sequence. ` + + `Missing tool results: [${repairReport.truncation.missingToolCallIds.join(", ")}]. ` + + `sessionId=${params.sessionId}`, + ); + } + } const isOpenAIResponsesApi = params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses"; diff --git a/src/agents/session-transcript-repair.test.ts b/src/agents/session-transcript-repair.test.ts index ccc63ec7f..59f9b41d9 100644 --- a/src/agents/session-transcript-repair.test.ts +++ b/src/agents/session-transcript-repair.test.ts @@ -1,9 +1,48 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { describe, expect, it } from "vitest"; -import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; +import { + repairToolUseResultPairing, + sanitizeToolUseResultPairing, +} from "./session-transcript-repair.js"; describe("sanitizeToolUseResultPairing", () => { - it("moves tool results directly after tool calls and inserts missing results", () => { + it("truncates at assistant with incomplete tool calls (missing results)", () => { + // When an assistant has tool calls but some results are missing, + // we truncate BEFORE that assistant to produce valid history. + // This is safer than inserting synthetic error results. + const input = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "read", arguments: {} }, + { type: "toolCall", id: "call_2", name: "exec", arguments: {} }, + ], + }, + { role: "user", content: "user message that should come after tool use" }, + { + role: "toolResult", + toolCallId: "call_2", + toolName: "exec", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const report = repairToolUseResultPairing(input); + + // Truncates before the assistant with incomplete tool calls + expect(report.messages).toHaveLength(1); + expect(report.messages[0]?.role).toBe("user"); + + // Report shows truncation details + expect(report.truncation).toBeDefined(); + expect(report.truncation?.truncatedAtIndex).toBe(1); + expect(report.truncation?.missingToolCallIds).toEqual(["call_1"]); + expect(report.truncation?.messagesDropped).toBe(3); + }); + + it("moves tool results directly after tool calls when all results exist", () => { const input = [ { role: "assistant", @@ -13,6 +52,13 @@ describe("sanitizeToolUseResultPairing", () => { ], }, { role: "user", content: "user message that should come after tool use" }, + { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "text", text: "file contents" }], + isError: false, + }, { role: "toolResult", toolCallId: "call_2", diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index d680beb4d..c98f74e3b 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -66,22 +66,107 @@ export type ToolUseRepairReport = { droppedDuplicateCount: number; droppedOrphanCount: number; moved: boolean; + /** If truncation occurred, details about what was dropped. */ + truncation?: { + /** Index in original messages where truncation happened. */ + truncatedAtIndex: number; + /** Tool call IDs that were missing results. */ + missingToolCallIds: string[]; + /** Number of messages dropped. */ + messagesDropped: number; + }; }; +/** + * Find all tool result IDs that exist anywhere in the message array. + */ +function indexAllToolResultIds(messages: AgentMessage[]): Set { + const ids = new Set(); + for (const msg of messages) { + if (!msg || typeof msg !== "object") continue; + if ((msg as { role?: unknown }).role === "toolResult") { + const id = extractToolResultId(msg as Extract); + if (id) ids.add(id); + } + } + return ids; +} + +/** + * Find the first assistant message with tool calls that has ANY missing results. + * Returns the index of that assistant message, or -1 if all are complete. + */ +function findFirstIncompleteToolCallIndex(messages: AgentMessage[]): { + index: number; + missingIds: string[]; +} | null { + const allResultIds = indexAllToolResultIds(messages); + + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (!msg || typeof msg !== "object") continue; + if ((msg as { role?: unknown }).role !== "assistant") continue; + + const assistant = msg as Extract; + const toolCalls = extractToolCallsFromAssistant(assistant); + if (toolCalls.length === 0) continue; + + const missingIds: string[] = []; + for (const call of toolCalls) { + if (!allResultIds.has(call.id)) { + missingIds.push(call.id); + } + } + + if (missingIds.length > 0) { + return { index: i, missingIds }; + } + } + + return null; +} + export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport { - // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not - // immediately followed by matching tool results. Session files can end up with results - // displaced (e.g. after user turns) or duplicated. Repair by: - // - moving matching toolResult messages directly after their assistant toolCall turn - // - inserting synthetic error toolResults for missing ids - // - dropping duplicate toolResults for the same id (anywhere in the transcript) + // Anthropic (and similar APIs) reject transcripts where assistant tool calls are not + // followed by matching tool results. This can happen when: + // - Session branching separates tool calls from their results + // - Interruptions (crash, network, user edit) occur mid-tool-execution + // - Compaction/pruning breaks the pairing + // + // Strategy: TRUNCATE at the first incomplete tool call sequence. + // This is simpler and safer than trying to repair with synthetic results: + // - Always produces valid history (just shorter) + // - No confusing synthetic error results in conversation + // - Agent continues working, may just need to redo some work + // + // After truncation, we still: + // - Move displaced tool results to correct positions + // - Drop duplicate tool results + // - Drop orphaned tool results (results without matching calls) + + // First pass: check if truncation is needed + const incomplete = findFirstIncompleteToolCallIndex(messages); + let workingMessages = messages; + let truncation: ToolUseRepairReport["truncation"]; + + if (incomplete) { + // Truncate before the incomplete assistant message + workingMessages = messages.slice(0, incomplete.index); + truncation = { + truncatedAtIndex: incomplete.index, + missingToolCallIds: incomplete.missingIds, + messagesDropped: messages.length - incomplete.index, + }; + } + + // Second pass: repair ordering and duplicates in the (possibly truncated) messages const out: AgentMessage[] = []; const added: Array> = []; const seenToolResultIds = new Set(); let droppedDuplicateCount = 0; let droppedOrphanCount = 0; let moved = false; - let changed = false; + let changed = truncation !== undefined; const pushToolResult = (msg: Extract) => { const id = extractToolResultId(msg); @@ -94,8 +179,8 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep out.push(msg); }; - for (let i = 0; i < messages.length; i += 1) { - const msg = messages[i] as AgentMessage; + for (let i = 0; i < workingMessages.length; i += 1) { + const msg = workingMessages[i] as AgentMessage; if (!msg || typeof msg !== "object") { out.push(msg); continue; @@ -104,8 +189,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep const role = (msg as { role?: unknown }).role; if (role !== "assistant") { // Tool results must only appear directly after the matching assistant tool call turn. - // Any "free-floating" toolResult entries in session history can make strict providers - // (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request. + // Any "free-floating" toolResult entries can make strict providers reject the request. if (role !== "toolResult") { out.push(msg); } else { @@ -123,13 +207,12 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep } const toolCallIds = new Set(toolCalls.map((t) => t.id)); - const spanResultsById = new Map>(); const remainder: AgentMessage[] = []; let j = i + 1; - for (; j < messages.length; j += 1) { - const next = messages[j] as AgentMessage; + for (; j < workingMessages.length; j += 1) { + const next = workingMessages[j] as AgentMessage; if (!next || typeof next !== "object") { remainder.push(next); continue; @@ -170,19 +253,13 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep changed = true; } + // All tool calls should have results (we truncated incomplete ones above) for (const call of toolCalls) { const existing = spanResultsById.get(call.id); if (existing) { pushToolResult(existing); - } else { - const missing = makeMissingToolResult({ - toolCallId: call.id, - toolName: call.name, - }); - added.push(missing); - changed = true; - pushToolResult(missing); } + // No synthetic results - we truncated incomplete sequences } for (const rem of remainder) { @@ -197,10 +274,11 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep const changedOrMoved = changed || moved; return { - messages: changedOrMoved ? out : messages, + messages: changedOrMoved ? out : workingMessages, added, droppedDuplicateCount, droppedOrphanCount, moved: changedOrMoved, + truncation, }; }