fix(litellm): enable prompt caching for Anthropic models
- Add LiteLLM + Claude model detection to isCacheTtlEligibleProvider - Reduces cost by 90% for Claude models through LiteLLM proxy - Add test coverage for cache eligibility detection - Document prompt caching behavior and cost savings Before: $0.47 per message (no caching) After: $0.05 per message (90% cached) Closes #2683
This commit is contained in:
parent
ac464845a8
commit
c50d5c7f34
@ -91,9 +91,41 @@ Then switch models using:
|
|||||||
clawdbot config set agents.defaults.model.primary litellm/claude-3-opus
|
clawdbot config set agents.defaults.model.primary litellm/claude-3-opus
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Prompt caching
|
||||||
|
|
||||||
|
When using Anthropic models through LiteLLM (e.g., `claude-opus-4-5`, `claude-sonnet-4-5`), Moltbot automatically enables **prompt caching** to reduce costs:
|
||||||
|
|
||||||
|
```json5
|
||||||
|
{
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
models: {
|
||||||
|
"litellm/claude-opus-4-5": {
|
||||||
|
params: {
|
||||||
|
cacheControlTtl: "1h" // Auto-configured for Claude models
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cost savings with caching
|
||||||
|
|
||||||
|
- **Without caching**: Every message pays full price for the entire conversation history
|
||||||
|
- **With caching** (enabled by default): Repeated context costs 10x less
|
||||||
|
|
||||||
|
Example from actual usage:
|
||||||
|
- Without caching: 93k tokens × $0.000005 = **$0.47** per message
|
||||||
|
- With caching: 123k tokens (mostly cached) = **$0.05** per message (90% savings!)
|
||||||
|
|
||||||
|
Caching is **automatically enabled** for all `claude-*` models through LiteLLM.
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- Model refs use `litellm/<modelId>` where `modelId` matches your LiteLLM config.
|
- Model refs use `litellm/<modelId>` where `modelId` matches your LiteLLM config.
|
||||||
- The base URL should not include `/v1` - Clawdbot's OpenAI client appends it.
|
- The base URL should not include `/v1` - Moltbot's OpenAI client appends it.
|
||||||
- Supported LiteLLM models depend on your proxy configuration.
|
- Supported LiteLLM models depend on your proxy configuration.
|
||||||
|
- **Prompt caching works automatically** when using Claude models through LiteLLM.
|
||||||
- See [Model providers](/concepts/model-providers) for provider rules.
|
- See [Model providers](/concepts/model-providers) for provider rules.
|
||||||
|
|||||||
31
src/agents/pi-embedded-runner/cache-ttl.test.ts
Normal file
31
src/agents/pi-embedded-runner/cache-ttl.test.ts
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { isCacheTtlEligibleProvider } from "./cache-ttl.js";
|
||||||
|
|
||||||
|
describe("isCacheTtlEligibleProvider", () => {
|
||||||
|
it("returns true for direct Anthropic provider", () => {
|
||||||
|
expect(isCacheTtlEligibleProvider("anthropic", "claude-opus-4-5")).toBe(true);
|
||||||
|
expect(isCacheTtlEligibleProvider("Anthropic", "claude-sonnet-4-5")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns true for OpenRouter with Anthropic models", () => {
|
||||||
|
expect(isCacheTtlEligibleProvider("openrouter", "anthropic/claude-opus-4-5")).toBe(true);
|
||||||
|
expect(isCacheTtlEligibleProvider("OpenRouter", "anthropic/claude-3-opus")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns true for LiteLLM with Claude models", () => {
|
||||||
|
expect(isCacheTtlEligibleProvider("litellm", "claude-opus-4-5")).toBe(true);
|
||||||
|
expect(isCacheTtlEligibleProvider("litellm", "claude-sonnet-4-5")).toBe(true);
|
||||||
|
expect(isCacheTtlEligibleProvider("LiteLLM", "Claude-3-Opus")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false for LiteLLM with non-Claude models", () => {
|
||||||
|
expect(isCacheTtlEligibleProvider("litellm", "gpt-4")).toBe(false);
|
||||||
|
expect(isCacheTtlEligibleProvider("litellm", "gemini-pro")).toBe(false);
|
||||||
|
expect(isCacheTtlEligibleProvider("litellm", "llama-3")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false for other providers", () => {
|
||||||
|
expect(isCacheTtlEligibleProvider("openai", "gpt-4")).toBe(false);
|
||||||
|
expect(isCacheTtlEligibleProvider("google", "gemini-pro")).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -14,6 +14,8 @@ export function isCacheTtlEligibleProvider(provider: string, modelId: string): b
|
|||||||
if (normalizedProvider === "anthropic") return true;
|
if (normalizedProvider === "anthropic") return true;
|
||||||
if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
|
if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
|
||||||
return true;
|
return true;
|
||||||
|
// LiteLLM proxying to Anthropic models (claude-*)
|
||||||
|
if (normalizedProvider === "litellm" && normalizedModelId.startsWith("claude-")) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user