Merge 155eca6b3e into da71eaebd2

2026-01-30 11:55:33 +00:00 · 2026-01-30 11:55:33 +00:00 · b9e818de2d
commit b9e818de2d
parent da71eaebd2 155eca6b3e
2 changed files with 152 additions and 3 deletions
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@ -52,6 +52,7 @@ import {
 import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
 import { buildSystemPromptReport } from "../../system-prompt-report.js";
 import { resolveDefaultModelForAgent } from "../../model-selection.js";
+import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js";

 import { isAbortError } from "../abort.js";
 import { buildEmbeddedExtensionPaths } from "../extensions.js";
@ -535,9 +536,11 @@ export async function runEmbeddedAttempt(
          validated,
          getDmHistoryLimitFromSessionKey(params.sessionKey, params.config),
        );
-        cacheTrace?.recordStage("session:limited", { messages: limited });
-        if (limited.length > 0) {
-          activeSession.agent.replaceMessages(limited);
+        // Fix: Repair tool_use/tool_result pairings AFTER truncation (issue #4367)
+        const repaired = sanitizeToolUseResultPairing(limited);
+        cacheTrace?.recordStage("session:limited", { messages: repaired });
+        if (repaired.length > 0) {
+          activeSession.agent.replaceMessages(repaired);
        }
      } catch (err) {
        sessionManager.flushPendingToolResults?.();
--- a/src/agents/session-transcript-repair.test.ts
+++ b/src/agents/session-transcript-repair.test.ts
@ -109,4 +109,150 @@ describe("sanitizeToolUseResultPairing", () => {
    expect(out.some((m) => m.role === "toolResult")).toBe(false);
    expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
  });
+
+  // Tests for PR #4387: tool_use_id mismatch fix after history truncation
+  describe("after history truncation", () => {
+    it("repairs orphaned tool_use by inserting synthetic error result", () => {
+      // Simulates truncation that removed the tool_result but kept the assistant tool_use
+      const truncatedHistory = [
+        {
+          role: "assistant",
+          content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
+        },
+        { role: "user", content: "what did you find?" },
+      ] satisfies AgentMessage[];
+
+      const out = sanitizeToolUseResultPairing(truncatedHistory);
+
+      // Should have inserted a synthetic error result after the assistant message
+      expect(out).toHaveLength(3);
+      expect(out[0]?.role).toBe("assistant");
+      expect(out[1]?.role).toBe("toolResult");
+      expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
+      expect((out[1] as { isError?: boolean }).isError).toBe(true);
+      expect((out[1] as { content?: Array<{ text?: string }> }).content?.[0]?.text).toContain(
+        "missing tool result",
+      );
+      expect(out[2]?.role).toBe("user");
+    });
+
+    it("drops orphaned tool_result that no longer has matching tool_use", () => {
+      // Simulates truncation that removed the assistant tool_use but kept the tool_result
+      const truncatedHistory = [
+        { role: "user", content: "please read the file" },
+        {
+          role: "toolResult",
+          toolCallId: "call_orphan",
+          toolName: "read",
+          content: [{ type: "text", text: "file contents" }],
+          isError: false,
+        },
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Here's what I found..." }],
+        },
+      ] satisfies AgentMessage[];
+
+      const out = sanitizeToolUseResultPairing(truncatedHistory);
+
+      // Orphaned tool_result should be dropped
+      expect(out).toHaveLength(2);
+      expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
+      expect(out.some((m) => m.role === "toolResult")).toBe(false);
+    });
+
+    it("passes through normal history unchanged", () => {
+      // Well-formed history with proper tool_use/tool_result pairing
+      const normalHistory = [
+        { role: "user", content: "please read the file" },
+        {
+          role: "assistant",
+          content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call_1",
+          toolName: "read",
+          content: [{ type: "text", text: "file contents" }],
+          isError: false,
+        },
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Here's what I found..." }],
+        },
+        { role: "user", content: "thanks!" },
+      ] satisfies AgentMessage[];
+
+      const out = sanitizeToolUseResultPairing(normalHistory);
+
+      // Should return the same array reference (no modifications)
+      expect(out).toBe(normalHistory);
+      expect(out).toHaveLength(5);
+      expect(out.map((m) => m.role)).toEqual([
+        "user",
+        "assistant",
+        "toolResult",
+        "assistant",
+        "user",
+      ]);
+    });
+
+    it("handles multiple orphaned tool_use blocks after truncation", () => {
+      const truncatedHistory = [
+        {
+          role: "assistant",
+          content: [
+            { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+            { type: "toolCall", id: "call_2", name: "exec", arguments: {} },
+          ],
+        },
+        { role: "user", content: "what happened?" },
+      ] satisfies AgentMessage[];
+
+      const out = sanitizeToolUseResultPairing(truncatedHistory);
+
+      // Should insert synthetic results for both orphaned tool calls
+      expect(out).toHaveLength(4);
+      expect(out[0]?.role).toBe("assistant");
+      expect(out[1]?.role).toBe("toolResult");
+      expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
+      expect(out[2]?.role).toBe("toolResult");
+      expect((out[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
+      expect(out[3]?.role).toBe("user");
+    });
+
+    it("repairs mixed scenario: some results present, some missing after truncation", () => {
+      const truncatedHistory = [
+        {
+          role: "assistant",
+          content: [
+            { type: "toolCall", id: "call_1", name: "read", arguments: {} },
+            { type: "toolCall", id: "call_2", name: "exec", arguments: {} },
+          ],
+        },
+        {
+          role: "toolResult",
+          toolCallId: "call_1",
+          toolName: "read",
+          content: [{ type: "text", text: "contents" }],
+          isError: false,
+        },
+        // call_2's result was truncated away
+        { role: "user", content: "ok" },
+      ] satisfies AgentMessage[];
+
+      const out = sanitizeToolUseResultPairing(truncatedHistory);
+
+      // Should keep existing result and insert synthetic result for missing one
+      expect(out).toHaveLength(4);
+      expect(out[0]?.role).toBe("assistant");
+      expect(out[1]?.role).toBe("toolResult");
+      expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
+      expect((out[1] as { isError?: boolean }).isError).toBe(false);
+      expect(out[2]?.role).toBe("toolResult");
+      expect((out[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
+      expect((out[2] as { isError?: boolean }).isError).toBe(true);
+      expect(out[3]?.role).toBe("user");
+    });
+  });
 });