From c50d5c7f34d2e07b2c61cc70b2da4f1983d64249 Mon Sep 17 00:00:00 2001
From: Charles-Henri ROBICHE <chrobiche@gmail.com>
Date: Wed, 28 Jan 2026 23:44:44 +0100
Subject: [PATCH] fix(litellm): enable prompt caching for Anthropic models

- Add LiteLLM + Claude model detection to isCacheTtlEligibleProvider
- Reduces cost by 90% for Claude models through LiteLLM proxy
- Add test coverage for cache eligibility detection
- Document prompt caching behavior and cost savings

Before: $0.47 per message (no caching)
After: $0.05 per message (90% cached)

Closes #2683
---
 docs/providers/litellm.md                     | 34 ++++++++++++++++++-
 .../pi-embedded-runner/cache-ttl.test.ts      | 31 +++++++++++++++++
 src/agents/pi-embedded-runner/cache-ttl.ts    |  2 ++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 src/agents/pi-embedded-runner/cache-ttl.test.ts
diff --git a/docs/providers/litellm.md b/docs/providers/litellm.md
index 2a8a7dc40..ea5715ec6 100644
--- a/docs/providers/litellm.md
+++ b/docs/providers/litellm.md
@@ -91,9 +91,41 @@ Then switch models using:
 clawdbot config set agents.defaults.model.primary litellm/claude-3-opus
 ```
 
+## Prompt caching
+
+When using Anthropic models through LiteLLM (e.g., `claude-opus-4-5`, `claude-sonnet-4-5`), Moltbot automatically enables **prompt caching** to reduce costs:
+
+```json5
+{
+  agents: {
+    defaults: {
+      models: {
+        "litellm/claude-opus-4-5": {
+          params: {
+            cacheControlTtl: "1h"  // Auto-configured for Claude models
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### Cost savings with caching
+
+- **Without caching**: Every message pays full price for the entire conversation history
+- **With caching** (enabled by default): Repeated context costs 10x less
+
+Example from actual usage:
+- Without caching: 93k tokens × $0.000005 = **$0.47** per message
+- With caching: 123k tokens (mostly cached) = **$0.05** per message (90% savings!)
+
+Caching is **automatically enabled** for all `claude-*` models through LiteLLM.
+
 ## Notes
 
 - Model refs use `litellm/<modelId>` where `modelId` matches your LiteLLM config.
-- The base URL should not include `/v1` - Clawdbot's OpenAI client appends it.
+- The base URL should not include `/v1` - Moltbot's OpenAI client appends it.
 - Supported LiteLLM models depend on your proxy configuration.
+- **Prompt caching works automatically** when using Claude models through LiteLLM.
 - See [Model providers](/concepts/model-providers) for provider rules.
diff --git a/src/agents/pi-embedded-runner/cache-ttl.test.ts b/src/agents/pi-embedded-runner/cache-ttl.test.ts
new file mode 100644
index 000000000..7a9e57320
--- /dev/null
+++ b/src/agents/pi-embedded-runner/cache-ttl.test.ts
@@ -0,0 +1,31 @@
+import { describe, it, expect } from "vitest";
+import { isCacheTtlEligibleProvider } from "./cache-ttl.js";
+
+describe("isCacheTtlEligibleProvider", () => {
+  it("returns true for direct Anthropic provider", () => {
+    expect(isCacheTtlEligibleProvider("anthropic", "claude-opus-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("Anthropic", "claude-sonnet-4-5")).toBe(true);
+  });
+
+  it("returns true for OpenRouter with Anthropic models", () => {
+    expect(isCacheTtlEligibleProvider("openrouter", "anthropic/claude-opus-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("OpenRouter", "anthropic/claude-3-opus")).toBe(true);
+  });
+
+  it("returns true for LiteLLM with Claude models", () => {
+    expect(isCacheTtlEligibleProvider("litellm", "claude-opus-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("litellm", "claude-sonnet-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("LiteLLM", "Claude-3-Opus")).toBe(true);
+  });
+
+  it("returns false for LiteLLM with non-Claude models", () => {
+    expect(isCacheTtlEligibleProvider("litellm", "gpt-4")).toBe(false);
+    expect(isCacheTtlEligibleProvider("litellm", "gemini-pro")).toBe(false);
+    expect(isCacheTtlEligibleProvider("litellm", "llama-3")).toBe(false);
+  });
+
+  it("returns false for other providers", () => {
+    expect(isCacheTtlEligibleProvider("openai", "gpt-4")).toBe(false);
+    expect(isCacheTtlEligibleProvider("google", "gemini-pro")).toBe(false);
+  });
+});
diff --git a/src/agents/pi-embedded-runner/cache-ttl.ts b/src/agents/pi-embedded-runner/cache-ttl.ts
index deb96d6c9..fe1379fe6 100644
--- a/src/agents/pi-embedded-runner/cache-ttl.ts
+++ b/src/agents/pi-embedded-runner/cache-ttl.ts
@@ -14,6 +14,8 @@ export function isCacheTtlEligibleProvider(provider: string, modelId: string): b
   if (normalizedProvider === "anthropic") return true;
   if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
     return true;
+  // LiteLLM proxying to Anthropic models (claude-*)
+  if (normalizedProvider === "litellm" && normalizedModelId.startsWith("claude-")) return true;
   return false;
 }