Merge c90cda7946 into 4583f88626

2026-01-29 19:00:19 +00:00 · 2026-01-29 19:00:19 +00:00 · 499b293d44
commit 499b293d44
parent 4583f88626 c90cda7946
3 changed files with 166 additions and 29 deletions
--- a/src/agents/pi-embedded-runner/google.ts
+++ b/src/agents/pi-embedded-runner/google.ts
@ -12,7 +12,7 @@ import {
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
 } from "../pi-embedded-helpers.js";
-import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
+import { repairToolUseResultPairing } from "../session-transcript-repair.js";
 import { log } from "./logger.js";
 import { describeUnknownError } from "./utils.js";
 import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
@ -332,9 +332,22 @@ export async function sanitizeSessionHistory(params: {
  const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
    ? sanitizeAntigravityThinkingBlocks(sanitizedImages)
    : sanitizedImages;
-  const repairedTools = policy.repairToolUseResultPairing
-    ? sanitizeToolUseResultPairing(sanitizedThinking)
-    : sanitizedThinking;
+
+  let repairedTools = sanitizedThinking;
+  if (policy.repairToolUseResultPairing) {
+    const repairReport = repairToolUseResultPairing(sanitizedThinking);
+    repairedTools = repairReport.messages;
+
+    // Log when session recovery truncation occurs - this helps track the issue
+    if (repairReport.truncation) {
+      log.warn(
+        `Session recovery: truncated ${repairReport.truncation.messagesDropped} messages ` +
+          `due to incomplete tool call sequence. ` +
+          `Missing tool results: [${repairReport.truncation.missingToolCallIds.join(", ")}]. ` +
+          `sessionId=${params.sessionId}`,
+      );
+    }
+  }

  const isOpenAIResponsesApi =
    params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@ -1,9 +1,48 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { describe, expect, it } from "vitest";
-import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
+import {
+  repairToolUseResultPairing,
+  sanitizeToolUseResultPairing,
+} from "./session-transcript-repair.js";

 describe("sanitizeToolUseResultPairing", () => {
-  it("moves tool results directly after tool calls and inserts missing results", () => {
+  it("truncates at assistant with incomplete tool calls (missing results)", () => {
+    // When an assistant has tool calls but some results are missing,
+    // we truncate BEFORE that assistant to produce valid history.
+    // This is safer than inserting synthetic error results.
+    const input = [
+      { role: "user", content: "hello" },
+      {
+        role: "assistant",
+        content: [
+          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+          { type: "toolCall", id: "call_2", name: "exec", arguments: {} },
+        ],
+      },
+      { role: "user", content: "user message that should come after tool use" },
+      {
+        role: "toolResult",
+        toolCallId: "call_2",
+        toolName: "exec",
+        content: [{ type: "text", text: "ok" }],
+        isError: false,
+      },
+    ] satisfies AgentMessage[];
+
+    const report = repairToolUseResultPairing(input);
+
+    // Truncates before the assistant with incomplete tool calls
+    expect(report.messages).toHaveLength(1);
+    expect(report.messages[0]?.role).toBe("user");
+
+    // Report shows truncation details
+    expect(report.truncation).toBeDefined();
+    expect(report.truncation?.truncatedAtIndex).toBe(1);
+    expect(report.truncation?.missingToolCallIds).toEqual(["call_1"]);
+    expect(report.truncation?.messagesDropped).toBe(3);
+  });
+
+  it("moves tool results directly after tool calls when all results exist", () => {
    const input = [
      {
        role: "assistant",
@ -13,6 +52,13 @@ describe("sanitizeToolUseResultPairing", () => {
        ],
      },
      { role: "user", content: "user message that should come after tool use" },
+      {
+        role: "toolResult",
+        toolCallId: "call_1",
+        toolName: "read",
+        content: [{ type: "text", text: "file contents" }],
+        isError: false,
+      },
      {
        role: "toolResult",
        toolCallId: "call_2",
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@ -66,22 +66,107 @@ export type ToolUseRepairReport = {
  droppedDuplicateCount: number;
  droppedOrphanCount: number;
  moved: boolean;
+  /** If truncation occurred, details about what was dropped. */
+  truncation?: {
+    /** Index in original messages where truncation happened. */
+    truncatedAtIndex: number;
+    /** Tool call IDs that were missing results. */
+    missingToolCallIds: string[];
+    /** Number of messages dropped. */
+    messagesDropped: number;
+  };
 };

+/**
+ * Find all tool result IDs that exist anywhere in the message array.
+ */
+function indexAllToolResultIds(messages: AgentMessage[]): Set<string> {
+  const ids = new Set<string>();
+  for (const msg of messages) {
+    if (!msg || typeof msg !== "object") continue;
+    if ((msg as { role?: unknown }).role === "toolResult") {
+      const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
+      if (id) ids.add(id);
+    }
+  }
+  return ids;
+}
+
+/**
+ * Find the first assistant message with tool calls that has ANY missing results.
+ * Returns the index of that assistant message, or -1 if all are complete.
+ */
+function findFirstIncompleteToolCallIndex(messages: AgentMessage[]): {
+  index: number;
+  missingIds: string[];
+} | null {
+  const allResultIds = indexAllToolResultIds(messages);
+
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    if (!msg || typeof msg !== "object") continue;
+    if ((msg as { role?: unknown }).role !== "assistant") continue;
+
+    const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
+    const toolCalls = extractToolCallsFromAssistant(assistant);
+    if (toolCalls.length === 0) continue;
+
+    const missingIds: string[] = [];
+    for (const call of toolCalls) {
+      if (!allResultIds.has(call.id)) {
+        missingIds.push(call.id);
+      }
+    }
+
+    if (missingIds.length > 0) {
+      return { index: i, missingIds };
+    }
+  }
+
+  return null;
+}
+
 export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
-  // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
-  // immediately followed by matching tool results. Session files can end up with results
-  // displaced (e.g. after user turns) or duplicated. Repair by:
-  // - moving matching toolResult messages directly after their assistant toolCall turn
-  // - inserting synthetic error toolResults for missing ids
-  // - dropping duplicate toolResults for the same id (anywhere in the transcript)
+  // Anthropic (and similar APIs) reject transcripts where assistant tool calls are not
+  // followed by matching tool results. This can happen when:
+  // - Session branching separates tool calls from their results
+  // - Interruptions (crash, network, user edit) occur mid-tool-execution
+  // - Compaction/pruning breaks the pairing
+  //
+  // Strategy: TRUNCATE at the first incomplete tool call sequence.
+  // This is simpler and safer than trying to repair with synthetic results:
+  // - Always produces valid history (just shorter)
+  // - No confusing synthetic error results in conversation
+  // - Agent continues working, may just need to redo some work
+  //
+  // After truncation, we still:
+  // - Move displaced tool results to correct positions
+  // - Drop duplicate tool results
+  // - Drop orphaned tool results (results without matching calls)
+
+  // First pass: check if truncation is needed
+  const incomplete = findFirstIncompleteToolCallIndex(messages);
+  let workingMessages = messages;
+  let truncation: ToolUseRepairReport["truncation"];
+
+  if (incomplete) {
+    // Truncate before the incomplete assistant message
+    workingMessages = messages.slice(0, incomplete.index);
+    truncation = {
+      truncatedAtIndex: incomplete.index,
+      missingToolCallIds: incomplete.missingIds,
+      messagesDropped: messages.length - incomplete.index,
+    };
+  }
+
+  // Second pass: repair ordering and duplicates in the (possibly truncated) messages
  const out: AgentMessage[] = [];
  const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
  const seenToolResultIds = new Set<string>();
  let droppedDuplicateCount = 0;
  let droppedOrphanCount = 0;
  let moved = false;
-  let changed = false;
+  let changed = truncation !== undefined;

  const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
    const id = extractToolResultId(msg);
@ -94,8 +179,8 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
    out.push(msg);
  };

-  for (let i = 0; i < messages.length; i += 1) {
-    const msg = messages[i] as AgentMessage;
+  for (let i = 0; i < workingMessages.length; i += 1) {
+    const msg = workingMessages[i] as AgentMessage;
    if (!msg || typeof msg !== "object") {
      out.push(msg);
      continue;
@ -104,8 +189,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
    const role = (msg as { role?: unknown }).role;
    if (role !== "assistant") {
      // Tool results must only appear directly after the matching assistant tool call turn.
-      // Any "free-floating" toolResult entries in session history can make strict providers
-      // (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request.
+      // Any "free-floating" toolResult entries can make strict providers reject the request.
      if (role !== "toolResult") {
        out.push(msg);
      } else {
@ -123,13 +207,12 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
    }

    const toolCallIds = new Set(toolCalls.map((t) => t.id));
-
    const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
    const remainder: AgentMessage[] = [];

    let j = i + 1;
-    for (; j < messages.length; j += 1) {
-      const next = messages[j] as AgentMessage;
+    for (; j < workingMessages.length; j += 1) {
+      const next = workingMessages[j] as AgentMessage;
      if (!next || typeof next !== "object") {
        remainder.push(next);
        continue;
@ -170,19 +253,13 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
      changed = true;
    }

+    // All tool calls should have results (we truncated incomplete ones above)
    for (const call of toolCalls) {
      const existing = spanResultsById.get(call.id);
      if (existing) {
        pushToolResult(existing);
-      } else {
-        const missing = makeMissingToolResult({
-          toolCallId: call.id,
-          toolName: call.name,
-        });
-        added.push(missing);
-        changed = true;
-        pushToolResult(missing);
      }
+      // No synthetic results - we truncated incomplete sequences
    }

    for (const rem of remainder) {
@ -197,10 +274,11 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep

  const changedOrMoved = changed || moved;
  return {
-    messages: changedOrMoved ? out : messages,
+    messages: changedOrMoved ? out : workingMessages,
    added,
    droppedDuplicateCount,
    droppedOrphanCount,
    moved: changedOrMoved,
+    truncation,
  };
 }