Merge c90cda7946 into 4583f88626

2026-01-29 19:00:19 +00:00 · 2026-01-29 19:00:19 +00:00 · 499b293d44
commit 499b293d44
parent 4583f88626 c90cda7946
3 changed files with 166 additions and 29 deletions
--- a/src/agents/pi-embedded-runner/google.ts
+++ b/src/agents/pi-embedded-runner/google.ts
@ -12,7 +12,7 @@ import {
  sanitizeGoogleTurnOrdering,
  sanitizeSessionMessagesImages,
 } from "../pi-embedded-helpers.js";
-import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
+import { repairToolUseResultPairing } from "../session-transcript-repair.js";
 import { log } from "./logger.js";
 import { describeUnknownError } from "./utils.js";
 import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
@ -332,9 +332,22 @@ export async function sanitizeSessionHistory(params: {
  const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
    ? sanitizeAntigravityThinkingBlocks(sanitizedImages)
    : sanitizedImages;
-  const repairedTools = policy.repairToolUseResultPairing
+
-    ? sanitizeToolUseResultPairing(sanitizedThinking)
+  let repairedTools = sanitizedThinking;
-    : sanitizedThinking;
+  if (policy.repairToolUseResultPairing) {
    const repairReport = repairToolUseResultPairing(sanitizedThinking);
    repairedTools = repairReport.messages;
    // Log when session recovery truncation occurs - this helps track the issue
    if (repairReport.truncation) {
      log.warn(
        `Session recovery: truncated ${repairReport.truncation.messagesDropped} messages ` +
          `due to incomplete tool call sequence. ` +
          `Missing tool results: [${repairReport.truncation.missingToolCallIds.join(", ")}]. ` +
          `sessionId=${params.sessionId}`,
      );
    }
  }
  const isOpenAIResponsesApi =
    params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@ -1,9 +1,48 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import { describe, expect, it } from "vitest";
-import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
+import {
  repairToolUseResultPairing,
  sanitizeToolUseResultPairing,
 } from "./session-transcript-repair.js";
 describe("sanitizeToolUseResultPairing", () => {
-  it("moves tool results directly after tool calls and inserts missing results", () => {
+  it("truncates at assistant with incomplete tool calls (missing results)", () => {
    // When an assistant has tool calls but some results are missing,
    // we truncate BEFORE that assistant to produce valid history.
    // This is safer than inserting synthetic error results.
    const input = [
      { role: "user", content: "hello" },
      {
        role: "assistant",
        content: [
          { type: "toolCall", id: "call_1", name: "read", arguments: {} },
          { type: "toolCall", id: "call_2", name: "exec", arguments: {} },
        ],
      },
      { role: "user", content: "user message that should come after tool use" },
      {
        role: "toolResult",
        toolCallId: "call_2",
        toolName: "exec",
        content: [{ type: "text", text: "ok" }],
        isError: false,
      },
    ] satisfies AgentMessage[];
    const report = repairToolUseResultPairing(input);
    // Truncates before the assistant with incomplete tool calls
    expect(report.messages).toHaveLength(1);
    expect(report.messages[0]?.role).toBe("user");
    // Report shows truncation details
    expect(report.truncation).toBeDefined();
    expect(report.truncation?.truncatedAtIndex).toBe(1);
    expect(report.truncation?.missingToolCallIds).toEqual(["call_1"]);
    expect(report.truncation?.messagesDropped).toBe(3);
  });
  it("moves tool results directly after tool calls when all results exist", () => {
    const input = [
      {
        role: "assistant",
@ -13,6 +52,13 @@ describe("sanitizeToolUseResultPairing", () => {
        ],
      },
      { role: "user", content: "user message that should come after tool use" },
      {
        role: "toolResult",
        toolCallId: "call_1",
        toolName: "read",
        content: [{ type: "text", text: "file contents" }],
        isError: false,
      },
      {
        role: "toolResult",
        toolCallId: "call_2",
--- a/src/agents/session-transcript-repair.ts
+++ b/src/agents/session-transcript-repair.ts
@ -66,22 +66,107 @@ export type ToolUseRepairReport = {
  droppedDuplicateCount: number;
  droppedOrphanCount: number;
  moved: boolean;
  /** If truncation occurred, details about what was dropped. */
  truncation?: {
    /** Index in original messages where truncation happened. */
    truncatedAtIndex: number;
    /** Tool call IDs that were missing results. */
    missingToolCallIds: string[];
    /** Number of messages dropped. */
    messagesDropped: number;
  };
 };
 /**
 * Find all tool result IDs that exist anywhere in the message array.
 */
 function indexAllToolResultIds(messages: AgentMessage[]): Set<string> {
  const ids = new Set<string>();
  for (const msg of messages) {
    if (!msg || typeof msg !== "object") continue;
    if ((msg as { role?: unknown }).role === "toolResult") {
      const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
      if (id) ids.add(id);
    }
  }
  return ids;
 }
 /**
 * Find the first assistant message with tool calls that has ANY missing results.
 * Returns the index of that assistant message, or -1 if all are complete.
 */
 function findFirstIncompleteToolCallIndex(messages: AgentMessage[]): {
  index: number;
  missingIds: string[];
 } | null {
  const allResultIds = indexAllToolResultIds(messages);
  for (let i = 0; i < messages.length; i++) {
    const msg = messages[i];
    if (!msg || typeof msg !== "object") continue;
    if ((msg as { role?: unknown }).role !== "assistant") continue;
    const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
    const toolCalls = extractToolCallsFromAssistant(assistant);
    if (toolCalls.length === 0) continue;
    const missingIds: string[] = [];
    for (const call of toolCalls) {
      if (!allResultIds.has(call.id)) {
        missingIds.push(call.id);
      }
    }
    if (missingIds.length > 0) {
      return { index: i, missingIds };
    }
  }
  return null;
 }
 export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
-  // Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
+  // Anthropic (and similar APIs) reject transcripts where assistant tool calls are not
-  // immediately followed by matching tool results. Session files can end up with results
+  // followed by matching tool results. This can happen when:
-  // displaced (e.g. after user turns) or duplicated. Repair by:
+  // - Session branching separates tool calls from their results
-  // - moving matching toolResult messages directly after their assistant toolCall turn
+  // - Interruptions (crash, network, user edit) occur mid-tool-execution
-  // - inserting synthetic error toolResults for missing ids
+  // - Compaction/pruning breaks the pairing
-  // - dropping duplicate toolResults for the same id (anywhere in the transcript)
+  //
  // Strategy: TRUNCATE at the first incomplete tool call sequence.
  // This is simpler and safer than trying to repair with synthetic results:
  // - Always produces valid history (just shorter)
  // - No confusing synthetic error results in conversation
  // - Agent continues working, may just need to redo some work
  //
  // After truncation, we still:
  // - Move displaced tool results to correct positions
  // - Drop duplicate tool results
  // - Drop orphaned tool results (results without matching calls)
  // First pass: check if truncation is needed
  const incomplete = findFirstIncompleteToolCallIndex(messages);
  let workingMessages = messages;
  let truncation: ToolUseRepairReport["truncation"];
  if (incomplete) {
    // Truncate before the incomplete assistant message
    workingMessages = messages.slice(0, incomplete.index);
    truncation = {
      truncatedAtIndex: incomplete.index,
      missingToolCallIds: incomplete.missingIds,
      messagesDropped: messages.length - incomplete.index,
    };
  }
  // Second pass: repair ordering and duplicates in the (possibly truncated) messages
  const out: AgentMessage[] = [];
  const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
  const seenToolResultIds = new Set<string>();
  let droppedDuplicateCount = 0;
  let droppedOrphanCount = 0;
  let moved = false;
-  let changed = false;
+  let changed = truncation !== undefined;
  const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
    const id = extractToolResultId(msg);
@ -94,8 +179,8 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
    out.push(msg);
  };
-  for (let i = 0; i < messages.length; i += 1) {
+  for (let i = 0; i < workingMessages.length; i += 1) {
-    const msg = messages[i] as AgentMessage;
+    const msg = workingMessages[i] as AgentMessage;
    if (!msg || typeof msg !== "object") {
      out.push(msg);
      continue;
@ -104,8 +189,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
    const role = (msg as { role?: unknown }).role;
    if (role !== "assistant") {
      // Tool results must only appear directly after the matching assistant tool call turn.
-      // Any "free-floating" toolResult entries in session history can make strict providers
+      // Any "free-floating" toolResult entries can make strict providers reject the request.
      // (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request.
      if (role !== "toolResult") {
        out.push(msg);
      } else {
@ -123,13 +207,12 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
    }
    const toolCallIds = new Set(toolCalls.map((t) => t.id));
    const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
    const remainder: AgentMessage[] = [];
    let j = i + 1;
-    for (; j < messages.length; j += 1) {
+    for (; j < workingMessages.length; j += 1) {
-      const next = messages[j] as AgentMessage;
+      const next = workingMessages[j] as AgentMessage;
      if (!next || typeof next !== "object") {
        remainder.push(next);
        continue;
@ -170,19 +253,13 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
      changed = true;
    }
    // All tool calls should have results (we truncated incomplete ones above)
    for (const call of toolCalls) {
      const existing = spanResultsById.get(call.id);
      if (existing) {
        pushToolResult(existing);
      } else {
        const missing = makeMissingToolResult({
          toolCallId: call.id,
          toolName: call.name,
        });
        added.push(missing);
        changed = true;
        pushToolResult(missing);
      }
      // No synthetic results - we truncated incomplete sequences
    }
    for (const rem of remainder) {
@ -197,10 +274,11 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
  const changedOrMoved = changed || moved;
  return {
-    messages: changedOrMoved ? out : messages,
+    messages: changedOrMoved ? out : workingMessages,
    added,
    droppedDuplicateCount,
    droppedOrphanCount,
    moved: changedOrMoved,
    truncation,
  };
 }