fix(litellm): complete prompt caching support for Claude models

This commit completes the LiteLLM prompt caching implementation by:

1. Applying cacheControlTtl defaults to litellm/claude-* models in addition
   to anthropic/* models. Previously, only direct Anthropic models received
   the default 1h cache TTL, causing LiteLLM Claude models to skip caching.

2. Setting api: "anthropic-messages" for Claude models during onboarding.
   LiteLLM was using openai-completions API which doesn't support Anthropic's
   cache control headers. The anthropic-messages API is required for proper
   prompt caching functionality.

Result: 90% cost reduction for LiteLLM Claude usage (from $0.47 to $0.05 per
message with ~94K token conversations), matching direct Anthropic API costs.

Co-Authored-By: Claude (claude-sonnet-4-5) <noreply@anthropic.com>
This commit is contained in:
Charles-Henri ROBICHE 2026-01-29 00:26:44 +01:00
parent 920fe168de
commit 2e0dd2ce72
No known key found for this signature in database
2 changed files with 28 additions and 12 deletions

View File

@ -436,9 +436,13 @@ export function applyLitellmProviderConfig(
const providers = { ...cfg.models?.providers }; const providers = { ...cfg.models?.providers };
const existingProvider = providers.litellm; const existingProvider = providers.litellm;
const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : []; const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
// Detect Claude models and use Anthropic Messages API for proper cache control support
const isClaude = params.modelId.toLowerCase().startsWith("claude-");
const newModel = { const newModel = {
id: params.modelId, id: params.modelId,
name: params.modelName ?? params.modelId, name: params.modelName ?? params.modelId,
// Claude models through LiteLLM should use anthropic-messages API for cache control
...(isClaude ? { api: "anthropic-messages" as const } : {}),
reasoning: false, reasoning: false,
input: ["text"] as ("text" | "image")[], input: ["text"] as ("text" | "image")[],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

View File

@ -323,7 +323,12 @@ export function applyContextPruningDefaults(cfg: MoltbotConfig): MoltbotConfig {
for (const [key, entry] of Object.entries(nextModels)) { for (const [key, entry] of Object.entries(nextModels)) {
const parsed = parseModelRef(key, "anthropic"); const parsed = parseModelRef(key, "anthropic");
if (!parsed || parsed.provider !== "anthropic") continue; if (!parsed) continue;
// Apply cache control to Anthropic models and LiteLLM Claude models
const isAnthropicProvider = parsed.provider === "anthropic";
const isLitellmClaude =
parsed.provider === "litellm" && parsed.model.toLowerCase().startsWith("claude-");
if (!isAnthropicProvider && !isLitellmClaude) continue;
const current = entry ?? {}; const current = entry ?? {};
const params = (current as { params?: Record<string, unknown> }).params ?? {}; const params = (current as { params?: Record<string, unknown> }).params ?? {};
if (typeof params.cacheControlTtl === "string") continue; if (typeof params.cacheControlTtl === "string") continue;
@ -337,7 +342,13 @@ export function applyContextPruningDefaults(cfg: MoltbotConfig): MoltbotConfig {
const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined); const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined);
if (primary) { if (primary) {
const parsedPrimary = parseModelRef(primary, "anthropic"); const parsedPrimary = parseModelRef(primary, "anthropic");
if (parsedPrimary?.provider === "anthropic") { if (parsedPrimary) {
// Apply cache control to Anthropic models and LiteLLM Claude models
const isAnthropicProvider = parsedPrimary.provider === "anthropic";
const isLitellmClaude =
parsedPrimary.provider === "litellm" &&
parsedPrimary.model.toLowerCase().startsWith("claude-");
if (isAnthropicProvider || isLitellmClaude) {
const key = `${parsedPrimary.provider}/${parsedPrimary.model}`; const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
const entry = nextModels[key]; const entry = nextModels[key];
const current = entry ?? {}; const current = entry ?? {};
@ -351,6 +362,7 @@ export function applyContextPruningDefaults(cfg: MoltbotConfig): MoltbotConfig {
} }
} }
} }
}
if (modelsMutated) { if (modelsMutated) {
nextDefaults.models = nextModels; nextDefaults.models = nextModels;