fix(sessions): truncate at incomplete tool calls instead of inserting synthetic results
When session history contains an assistant message with tool calls but missing results (e.g., due to branching, interruption, or crash), the API rejects the request. Previously, we tried to repair this by inserting synthetic error results, but this approach was fragile and could lead to confusing conversation states. New approach: truncate history before the first incomplete tool call sequence. This is simpler and always produces valid history: - Agent continues working (just with less context) - No confusing synthetic error results - User may need to repeat their last request The truncation is logged with details about which tool call IDs were missing, helping track and diagnose the root cause. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
109ac1c549
commit
c90cda7946
@ -12,7 +12,7 @@ import {
|
|||||||
sanitizeGoogleTurnOrdering,
|
sanitizeGoogleTurnOrdering,
|
||||||
sanitizeSessionMessagesImages,
|
sanitizeSessionMessagesImages,
|
||||||
} from "../pi-embedded-helpers.js";
|
} from "../pi-embedded-helpers.js";
|
||||||
import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
|
import { repairToolUseResultPairing } from "../session-transcript-repair.js";
|
||||||
import { log } from "./logger.js";
|
import { log } from "./logger.js";
|
||||||
import { describeUnknownError } from "./utils.js";
|
import { describeUnknownError } from "./utils.js";
|
||||||
import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
|
import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
|
||||||
@ -332,9 +332,22 @@ export async function sanitizeSessionHistory(params: {
|
|||||||
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
|
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
|
||||||
? sanitizeAntigravityThinkingBlocks(sanitizedImages)
|
? sanitizeAntigravityThinkingBlocks(sanitizedImages)
|
||||||
: sanitizedImages;
|
: sanitizedImages;
|
||||||
const repairedTools = policy.repairToolUseResultPairing
|
|
||||||
? sanitizeToolUseResultPairing(sanitizedThinking)
|
let repairedTools = sanitizedThinking;
|
||||||
: sanitizedThinking;
|
if (policy.repairToolUseResultPairing) {
|
||||||
|
const repairReport = repairToolUseResultPairing(sanitizedThinking);
|
||||||
|
repairedTools = repairReport.messages;
|
||||||
|
|
||||||
|
// Log when session recovery truncation occurs - this helps track the issue
|
||||||
|
if (repairReport.truncation) {
|
||||||
|
log.warn(
|
||||||
|
`Session recovery: truncated ${repairReport.truncation.messagesDropped} messages ` +
|
||||||
|
`due to incomplete tool call sequence. ` +
|
||||||
|
`Missing tool results: [${repairReport.truncation.missingToolCallIds.join(", ")}]. ` +
|
||||||
|
`sessionId=${params.sessionId}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const isOpenAIResponsesApi =
|
const isOpenAIResponsesApi =
|
||||||
params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
|
params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
|
||||||
|
|||||||
@ -1,9 +1,48 @@
|
|||||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
|
import {
|
||||||
|
repairToolUseResultPairing,
|
||||||
|
sanitizeToolUseResultPairing,
|
||||||
|
} from "./session-transcript-repair.js";
|
||||||
|
|
||||||
describe("sanitizeToolUseResultPairing", () => {
|
describe("sanitizeToolUseResultPairing", () => {
|
||||||
it("moves tool results directly after tool calls and inserts missing results", () => {
|
it("truncates at assistant with incomplete tool calls (missing results)", () => {
|
||||||
|
// When an assistant has tool calls but some results are missing,
|
||||||
|
// we truncate BEFORE that assistant to produce valid history.
|
||||||
|
// This is safer than inserting synthetic error results.
|
||||||
|
const input = [
|
||||||
|
{ role: "user", content: "hello" },
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||||
|
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: "user", content: "user message that should come after tool use" },
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_2",
|
||||||
|
toolName: "exec",
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const report = repairToolUseResultPairing(input);
|
||||||
|
|
||||||
|
// Truncates before the assistant with incomplete tool calls
|
||||||
|
expect(report.messages).toHaveLength(1);
|
||||||
|
expect(report.messages[0]?.role).toBe("user");
|
||||||
|
|
||||||
|
// Report shows truncation details
|
||||||
|
expect(report.truncation).toBeDefined();
|
||||||
|
expect(report.truncation?.truncatedAtIndex).toBe(1);
|
||||||
|
expect(report.truncation?.missingToolCallIds).toEqual(["call_1"]);
|
||||||
|
expect(report.truncation?.messagesDropped).toBe(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("moves tool results directly after tool calls when all results exist", () => {
|
||||||
const input = [
|
const input = [
|
||||||
{
|
{
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
@ -13,6 +52,13 @@ describe("sanitizeToolUseResultPairing", () => {
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
{ role: "user", content: "user message that should come after tool use" },
|
{ role: "user", content: "user message that should come after tool use" },
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "file contents" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
role: "toolResult",
|
role: "toolResult",
|
||||||
toolCallId: "call_2",
|
toolCallId: "call_2",
|
||||||
|
|||||||
@ -66,22 +66,107 @@ export type ToolUseRepairReport = {
|
|||||||
droppedDuplicateCount: number;
|
droppedDuplicateCount: number;
|
||||||
droppedOrphanCount: number;
|
droppedOrphanCount: number;
|
||||||
moved: boolean;
|
moved: boolean;
|
||||||
|
/** If truncation occurred, details about what was dropped. */
|
||||||
|
truncation?: {
|
||||||
|
/** Index in original messages where truncation happened. */
|
||||||
|
truncatedAtIndex: number;
|
||||||
|
/** Tool call IDs that were missing results. */
|
||||||
|
missingToolCallIds: string[];
|
||||||
|
/** Number of messages dropped. */
|
||||||
|
messagesDropped: number;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find all tool result IDs that exist anywhere in the message array.
|
||||||
|
*/
|
||||||
|
function indexAllToolResultIds(messages: AgentMessage[]): Set<string> {
|
||||||
|
const ids = new Set<string>();
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (!msg || typeof msg !== "object") continue;
|
||||||
|
if ((msg as { role?: unknown }).role === "toolResult") {
|
||||||
|
const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
|
||||||
|
if (id) ids.add(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the first assistant message with tool calls that has ANY missing results.
|
||||||
|
* Returns the index of that assistant message, or -1 if all are complete.
|
||||||
|
*/
|
||||||
|
function findFirstIncompleteToolCallIndex(messages: AgentMessage[]): {
|
||||||
|
index: number;
|
||||||
|
missingIds: string[];
|
||||||
|
} | null {
|
||||||
|
const allResultIds = indexAllToolResultIds(messages);
|
||||||
|
|
||||||
|
for (let i = 0; i < messages.length; i++) {
|
||||||
|
const msg = messages[i];
|
||||||
|
if (!msg || typeof msg !== "object") continue;
|
||||||
|
if ((msg as { role?: unknown }).role !== "assistant") continue;
|
||||||
|
|
||||||
|
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
|
||||||
|
const toolCalls = extractToolCallsFromAssistant(assistant);
|
||||||
|
if (toolCalls.length === 0) continue;
|
||||||
|
|
||||||
|
const missingIds: string[] = [];
|
||||||
|
for (const call of toolCalls) {
|
||||||
|
if (!allResultIds.has(call.id)) {
|
||||||
|
missingIds.push(call.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (missingIds.length > 0) {
|
||||||
|
return { index: i, missingIds };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
|
export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
|
||||||
// Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
|
// Anthropic (and similar APIs) reject transcripts where assistant tool calls are not
|
||||||
// immediately followed by matching tool results. Session files can end up with results
|
// followed by matching tool results. This can happen when:
|
||||||
// displaced (e.g. after user turns) or duplicated. Repair by:
|
// - Session branching separates tool calls from their results
|
||||||
// - moving matching toolResult messages directly after their assistant toolCall turn
|
// - Interruptions (crash, network, user edit) occur mid-tool-execution
|
||||||
// - inserting synthetic error toolResults for missing ids
|
// - Compaction/pruning breaks the pairing
|
||||||
// - dropping duplicate toolResults for the same id (anywhere in the transcript)
|
//
|
||||||
|
// Strategy: TRUNCATE at the first incomplete tool call sequence.
|
||||||
|
// This is simpler and safer than trying to repair with synthetic results:
|
||||||
|
// - Always produces valid history (just shorter)
|
||||||
|
// - No confusing synthetic error results in conversation
|
||||||
|
// - Agent continues working, may just need to redo some work
|
||||||
|
//
|
||||||
|
// After truncation, we still:
|
||||||
|
// - Move displaced tool results to correct positions
|
||||||
|
// - Drop duplicate tool results
|
||||||
|
// - Drop orphaned tool results (results without matching calls)
|
||||||
|
|
||||||
|
// First pass: check if truncation is needed
|
||||||
|
const incomplete = findFirstIncompleteToolCallIndex(messages);
|
||||||
|
let workingMessages = messages;
|
||||||
|
let truncation: ToolUseRepairReport["truncation"];
|
||||||
|
|
||||||
|
if (incomplete) {
|
||||||
|
// Truncate before the incomplete assistant message
|
||||||
|
workingMessages = messages.slice(0, incomplete.index);
|
||||||
|
truncation = {
|
||||||
|
truncatedAtIndex: incomplete.index,
|
||||||
|
missingToolCallIds: incomplete.missingIds,
|
||||||
|
messagesDropped: messages.length - incomplete.index,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: repair ordering and duplicates in the (possibly truncated) messages
|
||||||
const out: AgentMessage[] = [];
|
const out: AgentMessage[] = [];
|
||||||
const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
|
const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
|
||||||
const seenToolResultIds = new Set<string>();
|
const seenToolResultIds = new Set<string>();
|
||||||
let droppedDuplicateCount = 0;
|
let droppedDuplicateCount = 0;
|
||||||
let droppedOrphanCount = 0;
|
let droppedOrphanCount = 0;
|
||||||
let moved = false;
|
let moved = false;
|
||||||
let changed = false;
|
let changed = truncation !== undefined;
|
||||||
|
|
||||||
const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
|
const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
|
||||||
const id = extractToolResultId(msg);
|
const id = extractToolResultId(msg);
|
||||||
@ -94,8 +179,8 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
out.push(msg);
|
out.push(msg);
|
||||||
};
|
};
|
||||||
|
|
||||||
for (let i = 0; i < messages.length; i += 1) {
|
for (let i = 0; i < workingMessages.length; i += 1) {
|
||||||
const msg = messages[i] as AgentMessage;
|
const msg = workingMessages[i] as AgentMessage;
|
||||||
if (!msg || typeof msg !== "object") {
|
if (!msg || typeof msg !== "object") {
|
||||||
out.push(msg);
|
out.push(msg);
|
||||||
continue;
|
continue;
|
||||||
@ -104,8 +189,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
const role = (msg as { role?: unknown }).role;
|
const role = (msg as { role?: unknown }).role;
|
||||||
if (role !== "assistant") {
|
if (role !== "assistant") {
|
||||||
// Tool results must only appear directly after the matching assistant tool call turn.
|
// Tool results must only appear directly after the matching assistant tool call turn.
|
||||||
// Any "free-floating" toolResult entries in session history can make strict providers
|
// Any "free-floating" toolResult entries can make strict providers reject the request.
|
||||||
// (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request.
|
|
||||||
if (role !== "toolResult") {
|
if (role !== "toolResult") {
|
||||||
out.push(msg);
|
out.push(msg);
|
||||||
} else {
|
} else {
|
||||||
@ -123,13 +207,12 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
}
|
}
|
||||||
|
|
||||||
const toolCallIds = new Set(toolCalls.map((t) => t.id));
|
const toolCallIds = new Set(toolCalls.map((t) => t.id));
|
||||||
|
|
||||||
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
|
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
|
||||||
const remainder: AgentMessage[] = [];
|
const remainder: AgentMessage[] = [];
|
||||||
|
|
||||||
let j = i + 1;
|
let j = i + 1;
|
||||||
for (; j < messages.length; j += 1) {
|
for (; j < workingMessages.length; j += 1) {
|
||||||
const next = messages[j] as AgentMessage;
|
const next = workingMessages[j] as AgentMessage;
|
||||||
if (!next || typeof next !== "object") {
|
if (!next || typeof next !== "object") {
|
||||||
remainder.push(next);
|
remainder.push(next);
|
||||||
continue;
|
continue;
|
||||||
@ -170,19 +253,13 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All tool calls should have results (we truncated incomplete ones above)
|
||||||
for (const call of toolCalls) {
|
for (const call of toolCalls) {
|
||||||
const existing = spanResultsById.get(call.id);
|
const existing = spanResultsById.get(call.id);
|
||||||
if (existing) {
|
if (existing) {
|
||||||
pushToolResult(existing);
|
pushToolResult(existing);
|
||||||
} else {
|
|
||||||
const missing = makeMissingToolResult({
|
|
||||||
toolCallId: call.id,
|
|
||||||
toolName: call.name,
|
|
||||||
});
|
|
||||||
added.push(missing);
|
|
||||||
changed = true;
|
|
||||||
pushToolResult(missing);
|
|
||||||
}
|
}
|
||||||
|
// No synthetic results - we truncated incomplete sequences
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const rem of remainder) {
|
for (const rem of remainder) {
|
||||||
@ -197,10 +274,11 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
|
|
||||||
const changedOrMoved = changed || moved;
|
const changedOrMoved = changed || moved;
|
||||||
return {
|
return {
|
||||||
messages: changedOrMoved ? out : messages,
|
messages: changedOrMoved ? out : workingMessages,
|
||||||
added,
|
added,
|
||||||
droppedDuplicateCount,
|
droppedDuplicateCount,
|
||||||
droppedOrphanCount,
|
droppedOrphanCount,
|
||||||
moved: changedOrMoved,
|
moved: changedOrMoved,
|
||||||
|
truncation,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user