fix(litellm): use anthropic-messages API for Claude models

- Set api: 'anthropic-messages' for claude-* models through LiteLLM
- Add LiteLLM to resolveCacheControlTtl for cache parameter passthrough
- Enables proper Anthropic cache control headers for cost savings

This fixes the missing cache support by ensuring:
1. Claude models use the correct API format (anthropic-messages)
2. Cache control TTL is resolved and passed through for LiteLLM
This commit is contained in:
Charles-Henri ROBICHE 2026-01-29 00:01:57 +01:00
parent c50d5c7f34
commit 920fe168de
No known key found for this signature in database
2 changed files with 7 additions and 0 deletions

View File

@ -22,9 +22,14 @@ export type LitellmModelEntry = {
};
export function buildLitellmModelDefinition(entry: LitellmModelEntry): ModelDefinitionConfig {
// Detect Claude models and use Anthropic Messages API for proper cache control support
const isClaude = entry.id.toLowerCase().startsWith("claude-");
return {
id: entry.id,
name: entry.name,
// Claude models through LiteLLM should use anthropic-messages API for cache control
...(isClaude ? { api: "anthropic-messages" as const } : {}),
reasoning: entry.reasoning ?? false,
input: entry.input ? [...entry.input] : ["text"],
cost: LITELLM_DEFAULT_COST,

View File

@ -32,6 +32,8 @@ function resolveCacheControlTtl(
if (raw !== "5m" && raw !== "1h") return undefined;
if (provider === "anthropic") return raw;
if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
// LiteLLM proxying to Anthropic models (claude-*)
if (provider === "litellm" && modelId.startsWith("claude-")) return raw;
return undefined;
}