fix: repair tool_use/tool_result pairing for Claude on any provider
When using Claude models via non-Anthropic providers (github-copilot, openrouter, amazon-bedrock, etc), the repairToolUseResultPairing sanitizer was not running, causing 400 errors when sessions had orphaned tool_use blocks without matching tool_result. Added isClaudeModel() helper that detects Claude by modelId, and wired it into: - repairToolUseResultPairing (fixes the 400 error) - validateAnthropicTurns (Claude needs Anthropic-style turn validation) - allowSyntheticToolResults (allows inserting synthetic results for missing) Added comprehensive test suite covering: - Direct Anthropic provider - Claude via github-copilot, openrouter, opencode, amazon-bedrock - Non-Claude models (GPT, Llama, Gemini) - Edge cases (null/empty modelId) - Case-insensitive detection Fixes: '400 messages.220: tool_use ids were found without tool_result blocks'
This commit is contained in:
parent
3f83afe4a6
commit
3f6fcabd9d
238
src/agents/transcript-policy.test.ts
Normal file
238
src/agents/transcript-policy.test.ts
Normal file
@ -0,0 +1,238 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import { resolveTranscriptPolicy } from "./transcript-policy.js";
|
||||
|
||||
describe("resolveTranscriptPolicy", () => {
|
||||
describe("Anthropic provider (direct)", () => {
|
||||
it("enables Claude-specific sanitizers for anthropic provider", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-sonnet-4-20250514",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
|
||||
it("enables Claude-specific sanitizers for anthropic-messages API", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
modelApi: "anthropic-messages",
|
||||
modelId: "claude-3-opus-20240229",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Claude models via non-Anthropic providers", () => {
|
||||
it("enables Claude-specific sanitizers for github-copilot with Claude model", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "github-copilot",
|
||||
modelId: "claude-sonnet-4",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
|
||||
it("enables Claude-specific sanitizers for openrouter with Claude model", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openrouter",
|
||||
modelId: "anthropic/claude-3.5-sonnet",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
|
||||
it("enables Claude-specific sanitizers for opencode with Claude model", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "opencode",
|
||||
modelId: "claude-3-haiku-20240307",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
|
||||
it("enables Claude-specific sanitizers for amazon-bedrock with Claude model", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "amazon-bedrock",
|
||||
modelId: "anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
|
||||
it("handles case-insensitive Claude model detection", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openrouter",
|
||||
modelId: "CLAUDE-3-OPUS",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("OpenAI short-circuit", () => {
|
||||
it("disables Claude-specific sanitizers for OpenAI provider even with Claude in modelId", () => {
|
||||
// Edge case: OpenAI provider should short-circuit regardless of modelId
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openai",
|
||||
modelId: "gpt-4-claude-variant", // hypothetical edge case
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
|
||||
it("disables Claude-specific sanitizers for openai-codex provider", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openai-codex",
|
||||
modelId: "codex-davinci",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Google models", () => {
|
||||
it("enables repairToolUseResultPairing and allowSyntheticToolResults for Google", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
modelApi: "google-gemini-cli",
|
||||
modelId: "gemini-2.0-flash",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
expect(policy.validateGeminiTurns).toBe(true);
|
||||
expect(policy.applyGoogleTurnOrdering).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("non-Claude models via third-party providers", () => {
|
||||
it("does not enable Claude-specific sanitizers for GPT models on openrouter", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openrouter",
|
||||
modelId: "openai/gpt-4-turbo",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
|
||||
it("does not enable Claude-specific sanitizers for Llama models on github-copilot", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "github-copilot",
|
||||
modelId: "llama-3.1-70b",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
|
||||
it("does not enable Claude sanitizers for Gemini models on openrouter (but enables Gemini-specific)", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openrouter",
|
||||
modelId: "google/gemini-pro",
|
||||
});
|
||||
|
||||
// Gemini on openrouter gets special thought signature handling but not Claude sanitizers
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
expect(policy.sanitizeThoughtSignatures).toEqual({
|
||||
allowBase64Only: true,
|
||||
includeCamelCase: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Mistral models", () => {
|
||||
it("enables Mistral-specific sanitizers", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "mistral",
|
||||
modelId: "mistral-large",
|
||||
});
|
||||
|
||||
expect(policy.sanitizeToolCallIds).toBe(true);
|
||||
expect(policy.toolCallIdMode).toBe("strict9");
|
||||
expect(policy.sanitizeMode).toBe("full");
|
||||
});
|
||||
|
||||
it("detects Mistral models by modelId hint", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openrouter",
|
||||
modelId: "mistralai/mixtral-8x7b",
|
||||
});
|
||||
|
||||
expect(policy.sanitizeToolCallIds).toBe(true);
|
||||
expect(policy.toolCallIdMode).toBe("strict9");
|
||||
});
|
||||
});
|
||||
|
||||
describe("edge cases", () => {
|
||||
it("handles null/undefined modelId gracefully", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "github-copilot",
|
||||
modelId: null,
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
|
||||
it("handles empty string modelId gracefully", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
provider: "openrouter",
|
||||
modelId: "",
|
||||
});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
|
||||
it("handles undefined params gracefully", () => {
|
||||
const policy = resolveTranscriptPolicy({});
|
||||
|
||||
expect(policy.repairToolUseResultPairing).toBe(false);
|
||||
expect(policy.validateAnthropicTurns).toBe(false);
|
||||
expect(policy.allowSyntheticToolResults).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("google-antigravity Claude models", () => {
|
||||
it("enables antigravity-specific settings for Claude via google-antigravity", () => {
|
||||
const policy = resolveTranscriptPolicy({
|
||||
modelApi: "google-antigravity",
|
||||
provider: "google-antigravity",
|
||||
modelId: "claude-3-5-sonnet",
|
||||
});
|
||||
|
||||
expect(policy.preserveSignatures).toBe(true);
|
||||
expect(policy.normalizeAntigravityThinkingBlocks).toBe(true);
|
||||
// Also enables Claude-specific sanitizers
|
||||
expect(policy.repairToolUseResultPairing).toBe(true);
|
||||
expect(policy.validateAnthropicTurns).toBe(true);
|
||||
expect(policy.allowSyntheticToolResults).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -62,6 +62,16 @@ function isMistralModel(params: { provider?: string | null; modelId?: string | n
|
||||
return MISTRAL_MODEL_HINTS.some((hint) => modelId.includes(hint));
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects Claude models by checking if the modelId contains 'claude'.
|
||||
* This catches Claude models accessed via non-Anthropic providers like
|
||||
* github-copilot, openrouter, etc.
|
||||
*/
|
||||
function isClaudeModel(modelId?: string | null): boolean {
|
||||
if (!modelId) return false;
|
||||
return modelId.toLowerCase().includes("claude");
|
||||
}
|
||||
|
||||
export function resolveTranscriptPolicy(params: {
|
||||
modelApi?: string | null;
|
||||
provider?: string | null;
|
||||
@ -81,6 +91,8 @@ export function resolveTranscriptPolicy(params: {
|
||||
provider,
|
||||
modelId,
|
||||
});
|
||||
// Detect Claude models by modelId (catches github-copilot, openrouter, etc.)
|
||||
const isClaude = isClaudeModel(modelId);
|
||||
|
||||
const needsNonImageSanitize = isGoogle || isAnthropic || isMistral || isOpenRouterGemini;
|
||||
|
||||
@ -90,7 +102,7 @@ export function resolveTranscriptPolicy(params: {
|
||||
: sanitizeToolCallIds
|
||||
? "strict"
|
||||
: undefined;
|
||||
const repairToolUseResultPairing = isGoogle || isAnthropic;
|
||||
const repairToolUseResultPairing = isGoogle || isAnthropic || isClaude;
|
||||
const sanitizeThoughtSignatures = isOpenRouterGemini
|
||||
? { allowBase64Only: true, includeCamelCase: true }
|
||||
: undefined;
|
||||
@ -106,7 +118,7 @@ export function resolveTranscriptPolicy(params: {
|
||||
normalizeAntigravityThinkingBlocks,
|
||||
applyGoogleTurnOrdering: !isOpenAi && isGoogle,
|
||||
validateGeminiTurns: !isOpenAi && isGoogle,
|
||||
validateAnthropicTurns: !isOpenAi && isAnthropic,
|
||||
allowSyntheticToolResults: !isOpenAi && (isGoogle || isAnthropic),
|
||||
validateAnthropicTurns: !isOpenAi && (isAnthropic || isClaude),
|
||||
allowSyntheticToolResults: !isOpenAi && (isGoogle || isAnthropic || isClaude),
|
||||
};
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user