fix(litellm): complete prompt caching support for Claude models
This commit completes the LiteLLM prompt caching implementation by: 1. Applying cacheControlTtl defaults to litellm/claude-* models in addition to anthropic/* models. Previously, only direct Anthropic models received the default 1h cache TTL, causing LiteLLM Claude models to skip caching. 2. Setting api: "anthropic-messages" for Claude models during onboarding. LiteLLM was using openai-completions API which doesn't support Anthropic's cache control headers. The anthropic-messages API is required for proper prompt caching functionality. Result: 90% cost reduction for LiteLLM Claude usage (from $0.47 to $0.05 per message with ~94K token conversations), matching direct Anthropic API costs. Co-Authored-By: Claude (claude-sonnet-4-5) <noreply@anthropic.com>
This commit is contained in:
parent
920fe168de
commit
2e0dd2ce72
@ -436,9 +436,13 @@ export function applyLitellmProviderConfig(
|
|||||||
const providers = { ...cfg.models?.providers };
|
const providers = { ...cfg.models?.providers };
|
||||||
const existingProvider = providers.litellm;
|
const existingProvider = providers.litellm;
|
||||||
const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
|
const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
|
||||||
|
// Detect Claude models and use Anthropic Messages API for proper cache control support
|
||||||
|
const isClaude = params.modelId.toLowerCase().startsWith("claude-");
|
||||||
const newModel = {
|
const newModel = {
|
||||||
id: params.modelId,
|
id: params.modelId,
|
||||||
name: params.modelName ?? params.modelId,
|
name: params.modelName ?? params.modelId,
|
||||||
|
// Claude models through LiteLLM should use anthropic-messages API for cache control
|
||||||
|
...(isClaude ? { api: "anthropic-messages" as const } : {}),
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"] as ("text" | "image")[],
|
input: ["text"] as ("text" | "image")[],
|
||||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
|||||||
@ -323,7 +323,12 @@ export function applyContextPruningDefaults(cfg: MoltbotConfig): MoltbotConfig {
|
|||||||
|
|
||||||
for (const [key, entry] of Object.entries(nextModels)) {
|
for (const [key, entry] of Object.entries(nextModels)) {
|
||||||
const parsed = parseModelRef(key, "anthropic");
|
const parsed = parseModelRef(key, "anthropic");
|
||||||
if (!parsed || parsed.provider !== "anthropic") continue;
|
if (!parsed) continue;
|
||||||
|
// Apply cache control to Anthropic models and LiteLLM Claude models
|
||||||
|
const isAnthropicProvider = parsed.provider === "anthropic";
|
||||||
|
const isLitellmClaude =
|
||||||
|
parsed.provider === "litellm" && parsed.model.toLowerCase().startsWith("claude-");
|
||||||
|
if (!isAnthropicProvider && !isLitellmClaude) continue;
|
||||||
const current = entry ?? {};
|
const current = entry ?? {};
|
||||||
const params = (current as { params?: Record<string, unknown> }).params ?? {};
|
const params = (current as { params?: Record<string, unknown> }).params ?? {};
|
||||||
if (typeof params.cacheControlTtl === "string") continue;
|
if (typeof params.cacheControlTtl === "string") continue;
|
||||||
@ -337,17 +342,24 @@ export function applyContextPruningDefaults(cfg: MoltbotConfig): MoltbotConfig {
|
|||||||
const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined);
|
const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined);
|
||||||
if (primary) {
|
if (primary) {
|
||||||
const parsedPrimary = parseModelRef(primary, "anthropic");
|
const parsedPrimary = parseModelRef(primary, "anthropic");
|
||||||
if (parsedPrimary?.provider === "anthropic") {
|
if (parsedPrimary) {
|
||||||
const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
|
// Apply cache control to Anthropic models and LiteLLM Claude models
|
||||||
const entry = nextModels[key];
|
const isAnthropicProvider = parsedPrimary.provider === "anthropic";
|
||||||
const current = entry ?? {};
|
const isLitellmClaude =
|
||||||
const params = (current as { params?: Record<string, unknown> }).params ?? {};
|
parsedPrimary.provider === "litellm" &&
|
||||||
if (typeof params.cacheControlTtl !== "string") {
|
parsedPrimary.model.toLowerCase().startsWith("claude-");
|
||||||
nextModels[key] = {
|
if (isAnthropicProvider || isLitellmClaude) {
|
||||||
...(current as Record<string, unknown>),
|
const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
|
||||||
params: { ...params, cacheControlTtl: "1h" },
|
const entry = nextModels[key];
|
||||||
};
|
const current = entry ?? {};
|
||||||
modelsMutated = true;
|
const params = (current as { params?: Record<string, unknown> }).params ?? {};
|
||||||
|
if (typeof params.cacheControlTtl !== "string") {
|
||||||
|
nextModels[key] = {
|
||||||
|
...(current as Record<string, unknown>),
|
||||||
|
params: { ...params, cacheControlTtl: "1h" },
|
||||||
|
};
|
||||||
|
modelsMutated = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user