Merge 155eca6b3e into da71eaebd2
This commit is contained in:
commit
b9e818de2d
@ -52,6 +52,7 @@ import {
|
|||||||
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
|
import { DEFAULT_BOOTSTRAP_FILENAME } from "../../workspace.js";
|
||||||
import { buildSystemPromptReport } from "../../system-prompt-report.js";
|
import { buildSystemPromptReport } from "../../system-prompt-report.js";
|
||||||
import { resolveDefaultModelForAgent } from "../../model-selection.js";
|
import { resolveDefaultModelForAgent } from "../../model-selection.js";
|
||||||
|
import { sanitizeToolUseResultPairing } from "../../session-transcript-repair.js";
|
||||||
|
|
||||||
import { isAbortError } from "../abort.js";
|
import { isAbortError } from "../abort.js";
|
||||||
import { buildEmbeddedExtensionPaths } from "../extensions.js";
|
import { buildEmbeddedExtensionPaths } from "../extensions.js";
|
||||||
@ -535,9 +536,11 @@ export async function runEmbeddedAttempt(
|
|||||||
validated,
|
validated,
|
||||||
getDmHistoryLimitFromSessionKey(params.sessionKey, params.config),
|
getDmHistoryLimitFromSessionKey(params.sessionKey, params.config),
|
||||||
);
|
);
|
||||||
cacheTrace?.recordStage("session:limited", { messages: limited });
|
// Fix: Repair tool_use/tool_result pairings AFTER truncation (issue #4367)
|
||||||
if (limited.length > 0) {
|
const repaired = sanitizeToolUseResultPairing(limited);
|
||||||
activeSession.agent.replaceMessages(limited);
|
cacheTrace?.recordStage("session:limited", { messages: repaired });
|
||||||
|
if (repaired.length > 0) {
|
||||||
|
activeSession.agent.replaceMessages(repaired);
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
sessionManager.flushPendingToolResults?.();
|
sessionManager.flushPendingToolResults?.();
|
||||||
|
|||||||
@ -109,4 +109,150 @@ describe("sanitizeToolUseResultPairing", () => {
|
|||||||
expect(out.some((m) => m.role === "toolResult")).toBe(false);
|
expect(out.some((m) => m.role === "toolResult")).toBe(false);
|
||||||
expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
|
expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Tests for PR #4387: tool_use_id mismatch fix after history truncation
|
||||||
|
describe("after history truncation", () => {
|
||||||
|
it("repairs orphaned tool_use by inserting synthetic error result", () => {
|
||||||
|
// Simulates truncation that removed the tool_result but kept the assistant tool_use
|
||||||
|
const truncatedHistory = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
|
||||||
|
},
|
||||||
|
{ role: "user", content: "what did you find?" },
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = sanitizeToolUseResultPairing(truncatedHistory);
|
||||||
|
|
||||||
|
// Should have inserted a synthetic error result after the assistant message
|
||||||
|
expect(out).toHaveLength(3);
|
||||||
|
expect(out[0]?.role).toBe("assistant");
|
||||||
|
expect(out[1]?.role).toBe("toolResult");
|
||||||
|
expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
|
||||||
|
expect((out[1] as { isError?: boolean }).isError).toBe(true);
|
||||||
|
expect((out[1] as { content?: Array<{ text?: string }> }).content?.[0]?.text).toContain(
|
||||||
|
"missing tool result",
|
||||||
|
);
|
||||||
|
expect(out[2]?.role).toBe("user");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("drops orphaned tool_result that no longer has matching tool_use", () => {
|
||||||
|
// Simulates truncation that removed the assistant tool_use but kept the tool_result
|
||||||
|
const truncatedHistory = [
|
||||||
|
{ role: "user", content: "please read the file" },
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_orphan",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "file contents" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "text", text: "Here's what I found..." }],
|
||||||
|
},
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = sanitizeToolUseResultPairing(truncatedHistory);
|
||||||
|
|
||||||
|
// Orphaned tool_result should be dropped
|
||||||
|
expect(out).toHaveLength(2);
|
||||||
|
expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
|
||||||
|
expect(out.some((m) => m.role === "toolResult")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("passes through normal history unchanged", () => {
|
||||||
|
// Well-formed history with proper tool_use/tool_result pairing
|
||||||
|
const normalHistory = [
|
||||||
|
{ role: "user", content: "please read the file" },
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "file contents" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "text", text: "Here's what I found..." }],
|
||||||
|
},
|
||||||
|
{ role: "user", content: "thanks!" },
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = sanitizeToolUseResultPairing(normalHistory);
|
||||||
|
|
||||||
|
// Should return the same array reference (no modifications)
|
||||||
|
expect(out).toBe(normalHistory);
|
||||||
|
expect(out).toHaveLength(5);
|
||||||
|
expect(out.map((m) => m.role)).toEqual([
|
||||||
|
"user",
|
||||||
|
"assistant",
|
||||||
|
"toolResult",
|
||||||
|
"assistant",
|
||||||
|
"user",
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles multiple orphaned tool_use blocks after truncation", () => {
|
||||||
|
const truncatedHistory = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||||
|
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: "user", content: "what happened?" },
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = sanitizeToolUseResultPairing(truncatedHistory);
|
||||||
|
|
||||||
|
// Should insert synthetic results for both orphaned tool calls
|
||||||
|
expect(out).toHaveLength(4);
|
||||||
|
expect(out[0]?.role).toBe("assistant");
|
||||||
|
expect(out[1]?.role).toBe("toolResult");
|
||||||
|
expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
|
||||||
|
expect(out[2]?.role).toBe("toolResult");
|
||||||
|
expect((out[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
|
||||||
|
expect(out[3]?.role).toBe("user");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("repairs mixed scenario: some results present, some missing after truncation", () => {
|
||||||
|
const truncatedHistory = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "toolCall", id: "call_1", name: "read", arguments: {} },
|
||||||
|
{ type: "toolCall", id: "call_2", name: "exec", arguments: {} },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "contents" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
// call_2's result was truncated away
|
||||||
|
{ role: "user", content: "ok" },
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = sanitizeToolUseResultPairing(truncatedHistory);
|
||||||
|
|
||||||
|
// Should keep existing result and insert synthetic result for missing one
|
||||||
|
expect(out).toHaveLength(4);
|
||||||
|
expect(out[0]?.role).toBe("assistant");
|
||||||
|
expect(out[1]?.role).toBe("toolResult");
|
||||||
|
expect((out[1] as { toolCallId?: string }).toolCallId).toBe("call_1");
|
||||||
|
expect((out[1] as { isError?: boolean }).isError).toBe(false);
|
||||||
|
expect(out[2]?.role).toBe("toolResult");
|
||||||
|
expect((out[2] as { toolCallId?: string }).toolCallId).toBe("call_2");
|
||||||
|
expect((out[2] as { isError?: boolean }).isError).toBe(true);
|
||||||
|
expect(out[3]?.role).toBe("user");
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user