Merge c90cda7946 into 4583f88626
This commit is contained in:
commit
499b293d44
@ -12,7 +12,7 @@ import {
|
|||||||
sanitizeGoogleTurnOrdering,
|
sanitizeGoogleTurnOrdering,
|
||||||
sanitizeSessionMessagesImages,
|
sanitizeSessionMessagesImages,
|
||||||
} from "../pi-embedded-helpers.js";
|
} from "../pi-embedded-helpers.js";
|
||||||
import { sanitizeToolUseResultPairing } from "../session-transcript-repair.js";
|
import { repairToolUseResultPairing } from "../session-transcript-repair.js";
|
||||||
import { log } from "./logger.js";
|
import { log } from "./logger.js";
|
||||||
import { describeUnknownError } from "./utils.js";
|
import { describeUnknownError } from "./utils.js";
|
||||||
import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
|
import { cleanToolSchemaForGemini } from "../pi-tools.schema.js";
|
||||||
@ -332,9 +332,22 @@ export async function sanitizeSessionHistory(params: {
|
|||||||
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
|
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
|
||||||
? sanitizeAntigravityThinkingBlocks(sanitizedImages)
|
? sanitizeAntigravityThinkingBlocks(sanitizedImages)
|
||||||
: sanitizedImages;
|
: sanitizedImages;
|
||||||
const repairedTools = policy.repairToolUseResultPairing
|
|
||||||
? sanitizeToolUseResultPairing(sanitizedThinking)
|
let repairedTools = sanitizedThinking;
|
||||||
: sanitizedThinking;
|
if (policy.repairToolUseResultPairing) {
|
||||||
|
const repairReport = repairToolUseResultPairing(sanitizedThinking);
|
||||||
|
repairedTools = repairReport.messages;
|
||||||
|
|
||||||
|
// Log when session recovery truncation occurs - this helps track the issue
|
||||||
|
if (repairReport.truncation) {
|
||||||
|
log.warn(
|
||||||
|
`Session recovery: truncated ${repairReport.truncation.messagesDropped} messages ` +
|
||||||
|
`due to incomplete tool call sequence. ` +
|
||||||
|
`Missing tool results: [${repairReport.truncation.missingToolCallIds.join(", ")}]. ` +
|
||||||
|
`sessionId=${params.sessionId}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const isOpenAIResponsesApi =
|
const isOpenAIResponsesApi =
|
||||||
params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
|
params.modelApi === "openai-responses" || params.modelApi === "openai-codex-responses";
|
||||||
|
|||||||
@ -1,9 +1,48 @@
|
|||||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import { sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
|
import {
|
||||||
|
repairToolUseResultPairing,
|
||||||
|
sanitizeToolUseResultPairing,
|
||||||
|
} from "./session-transcript-repair.js";
|
||||||
|
|
||||||
describe("sanitizeToolUseResultPairing", () => {
|
describe("sanitizeToolUseResultPairing", () => {
|
||||||
it("moves tool results directly after tool calls and inserts missing results", () => {
|
it("truncates at assistant with incomplete tool calls (missing results)", () => {
|
||||||
|
// When an assistant has tool calls but some results are missing,
|
||||||
|
// we truncate BEFORE that assistant to produce valid history.
|
||||||
|
// This is safer than inserting synthetic error results.
|
||||||
|
const input = [
|
||||||
|
{ role: "user", content: "hello" },
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||||
|
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: "user", content: "user message that should come after tool use" },
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_2",
|
||||||
|
toolName: "exec",
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const report = repairToolUseResultPairing(input);
|
||||||
|
|
||||||
|
// Truncates before the assistant with incomplete tool calls
|
||||||
|
expect(report.messages).toHaveLength(1);
|
||||||
|
expect(report.messages[0]?.role).toBe("user");
|
||||||
|
|
||||||
|
// Report shows truncation details
|
||||||
|
expect(report.truncation).toBeDefined();
|
||||||
|
expect(report.truncation?.truncatedAtIndex).toBe(1);
|
||||||
|
expect(report.truncation?.missingToolCallIds).toEqual(["call_1"]);
|
||||||
|
expect(report.truncation?.messagesDropped).toBe(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("moves tool results directly after tool calls when all results exist", () => {
|
||||||
const input = [
|
const input = [
|
||||||
{
|
{
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
@ -13,6 +52,13 @@ describe("sanitizeToolUseResultPairing", () => {
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
{ role: "user", content: "user message that should come after tool use" },
|
{ role: "user", content: "user message that should come after tool use" },
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "file contents" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
role: "toolResult",
|
role: "toolResult",
|
||||||
toolCallId: "call_2",
|
toolCallId: "call_2",
|
||||||
|
|||||||
@ -66,22 +66,107 @@ export type ToolUseRepairReport = {
|
|||||||
droppedDuplicateCount: number;
|
droppedDuplicateCount: number;
|
||||||
droppedOrphanCount: number;
|
droppedOrphanCount: number;
|
||||||
moved: boolean;
|
moved: boolean;
|
||||||
|
/** If truncation occurred, details about what was dropped. */
|
||||||
|
truncation?: {
|
||||||
|
/** Index in original messages where truncation happened. */
|
||||||
|
truncatedAtIndex: number;
|
||||||
|
/** Tool call IDs that were missing results. */
|
||||||
|
missingToolCallIds: string[];
|
||||||
|
/** Number of messages dropped. */
|
||||||
|
messagesDropped: number;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find all tool result IDs that exist anywhere in the message array.
|
||||||
|
*/
|
||||||
|
function indexAllToolResultIds(messages: AgentMessage[]): Set<string> {
|
||||||
|
const ids = new Set<string>();
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (!msg || typeof msg !== "object") continue;
|
||||||
|
if ((msg as { role?: unknown }).role === "toolResult") {
|
||||||
|
const id = extractToolResultId(msg as Extract<AgentMessage, { role: "toolResult" }>);
|
||||||
|
if (id) ids.add(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the first assistant message with tool calls that has ANY missing results.
|
||||||
|
* Returns the index of that assistant message, or -1 if all are complete.
|
||||||
|
*/
|
||||||
|
function findFirstIncompleteToolCallIndex(messages: AgentMessage[]): {
|
||||||
|
index: number;
|
||||||
|
missingIds: string[];
|
||||||
|
} | null {
|
||||||
|
const allResultIds = indexAllToolResultIds(messages);
|
||||||
|
|
||||||
|
for (let i = 0; i < messages.length; i++) {
|
||||||
|
const msg = messages[i];
|
||||||
|
if (!msg || typeof msg !== "object") continue;
|
||||||
|
if ((msg as { role?: unknown }).role !== "assistant") continue;
|
||||||
|
|
||||||
|
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
|
||||||
|
const toolCalls = extractToolCallsFromAssistant(assistant);
|
||||||
|
if (toolCalls.length === 0) continue;
|
||||||
|
|
||||||
|
const missingIds: string[] = [];
|
||||||
|
for (const call of toolCalls) {
|
||||||
|
if (!allResultIds.has(call.id)) {
|
||||||
|
missingIds.push(call.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (missingIds.length > 0) {
|
||||||
|
return { index: i, missingIds };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
|
export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRepairReport {
|
||||||
// Anthropic (and Cloud Code Assist) reject transcripts where assistant tool calls are not
|
// Anthropic (and similar APIs) reject transcripts where assistant tool calls are not
|
||||||
// immediately followed by matching tool results. Session files can end up with results
|
// followed by matching tool results. This can happen when:
|
||||||
// displaced (e.g. after user turns) or duplicated. Repair by:
|
// - Session branching separates tool calls from their results
|
||||||
// - moving matching toolResult messages directly after their assistant toolCall turn
|
// - Interruptions (crash, network, user edit) occur mid-tool-execution
|
||||||
// - inserting synthetic error toolResults for missing ids
|
// - Compaction/pruning breaks the pairing
|
||||||
// - dropping duplicate toolResults for the same id (anywhere in the transcript)
|
//
|
||||||
|
// Strategy: TRUNCATE at the first incomplete tool call sequence.
|
||||||
|
// This is simpler and safer than trying to repair with synthetic results:
|
||||||
|
// - Always produces valid history (just shorter)
|
||||||
|
// - No confusing synthetic error results in conversation
|
||||||
|
// - Agent continues working, may just need to redo some work
|
||||||
|
//
|
||||||
|
// After truncation, we still:
|
||||||
|
// - Move displaced tool results to correct positions
|
||||||
|
// - Drop duplicate tool results
|
||||||
|
// - Drop orphaned tool results (results without matching calls)
|
||||||
|
|
||||||
|
// First pass: check if truncation is needed
|
||||||
|
const incomplete = findFirstIncompleteToolCallIndex(messages);
|
||||||
|
let workingMessages = messages;
|
||||||
|
let truncation: ToolUseRepairReport["truncation"];
|
||||||
|
|
||||||
|
if (incomplete) {
|
||||||
|
// Truncate before the incomplete assistant message
|
||||||
|
workingMessages = messages.slice(0, incomplete.index);
|
||||||
|
truncation = {
|
||||||
|
truncatedAtIndex: incomplete.index,
|
||||||
|
missingToolCallIds: incomplete.missingIds,
|
||||||
|
messagesDropped: messages.length - incomplete.index,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: repair ordering and duplicates in the (possibly truncated) messages
|
||||||
const out: AgentMessage[] = [];
|
const out: AgentMessage[] = [];
|
||||||
const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
|
const added: Array<Extract<AgentMessage, { role: "toolResult" }>> = [];
|
||||||
const seenToolResultIds = new Set<string>();
|
const seenToolResultIds = new Set<string>();
|
||||||
let droppedDuplicateCount = 0;
|
let droppedDuplicateCount = 0;
|
||||||
let droppedOrphanCount = 0;
|
let droppedOrphanCount = 0;
|
||||||
let moved = false;
|
let moved = false;
|
||||||
let changed = false;
|
let changed = truncation !== undefined;
|
||||||
|
|
||||||
const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
|
const pushToolResult = (msg: Extract<AgentMessage, { role: "toolResult" }>) => {
|
||||||
const id = extractToolResultId(msg);
|
const id = extractToolResultId(msg);
|
||||||
@ -94,8 +179,8 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
out.push(msg);
|
out.push(msg);
|
||||||
};
|
};
|
||||||
|
|
||||||
for (let i = 0; i < messages.length; i += 1) {
|
for (let i = 0; i < workingMessages.length; i += 1) {
|
||||||
const msg = messages[i] as AgentMessage;
|
const msg = workingMessages[i] as AgentMessage;
|
||||||
if (!msg || typeof msg !== "object") {
|
if (!msg || typeof msg !== "object") {
|
||||||
out.push(msg);
|
out.push(msg);
|
||||||
continue;
|
continue;
|
||||||
@ -104,8 +189,7 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
const role = (msg as { role?: unknown }).role;
|
const role = (msg as { role?: unknown }).role;
|
||||||
if (role !== "assistant") {
|
if (role !== "assistant") {
|
||||||
// Tool results must only appear directly after the matching assistant tool call turn.
|
// Tool results must only appear directly after the matching assistant tool call turn.
|
||||||
// Any "free-floating" toolResult entries in session history can make strict providers
|
// Any "free-floating" toolResult entries can make strict providers reject the request.
|
||||||
// (Anthropic-compatible APIs, MiniMax, Cloud Code Assist) reject the entire request.
|
|
||||||
if (role !== "toolResult") {
|
if (role !== "toolResult") {
|
||||||
out.push(msg);
|
out.push(msg);
|
||||||
} else {
|
} else {
|
||||||
@ -123,13 +207,12 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
}
|
}
|
||||||
|
|
||||||
const toolCallIds = new Set(toolCalls.map((t) => t.id));
|
const toolCallIds = new Set(toolCalls.map((t) => t.id));
|
||||||
|
|
||||||
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
|
const spanResultsById = new Map<string, Extract<AgentMessage, { role: "toolResult" }>>();
|
||||||
const remainder: AgentMessage[] = [];
|
const remainder: AgentMessage[] = [];
|
||||||
|
|
||||||
let j = i + 1;
|
let j = i + 1;
|
||||||
for (; j < messages.length; j += 1) {
|
for (; j < workingMessages.length; j += 1) {
|
||||||
const next = messages[j] as AgentMessage;
|
const next = workingMessages[j] as AgentMessage;
|
||||||
if (!next || typeof next !== "object") {
|
if (!next || typeof next !== "object") {
|
||||||
remainder.push(next);
|
remainder.push(next);
|
||||||
continue;
|
continue;
|
||||||
@ -170,19 +253,13 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All tool calls should have results (we truncated incomplete ones above)
|
||||||
for (const call of toolCalls) {
|
for (const call of toolCalls) {
|
||||||
const existing = spanResultsById.get(call.id);
|
const existing = spanResultsById.get(call.id);
|
||||||
if (existing) {
|
if (existing) {
|
||||||
pushToolResult(existing);
|
pushToolResult(existing);
|
||||||
} else {
|
|
||||||
const missing = makeMissingToolResult({
|
|
||||||
toolCallId: call.id,
|
|
||||||
toolName: call.name,
|
|
||||||
});
|
|
||||||
added.push(missing);
|
|
||||||
changed = true;
|
|
||||||
pushToolResult(missing);
|
|
||||||
}
|
}
|
||||||
|
// No synthetic results - we truncated incomplete sequences
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const rem of remainder) {
|
for (const rem of remainder) {
|
||||||
@ -197,10 +274,11 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
|||||||
|
|
||||||
const changedOrMoved = changed || moved;
|
const changedOrMoved = changed || moved;
|
||||||
return {
|
return {
|
||||||
messages: changedOrMoved ? out : messages,
|
messages: changedOrMoved ? out : workingMessages,
|
||||||
added,
|
added,
|
||||||
droppedDuplicateCount,
|
droppedDuplicateCount,
|
||||||
droppedOrphanCount,
|
droppedOrphanCount,
|
||||||
moved: changedOrMoved,
|
moved: changedOrMoved,
|
||||||
|
truncation,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user