Merge 81518603e4 into da71eaebd2

2026-01-30 11:59:48 +00:00 · 2026-01-30 11:59:48 +00:00 · 34e56da345
commit 34e56da345
parent da71eaebd2 81518603e4
21 changed files with 820 additions and 84 deletions
--- a/docs/docs.json
+++ b/docs/docs.json
@ -1030,6 +1030,7 @@
          "providers/vercel-ai-gateway",
          "providers/openrouter",
          "providers/synthetic",
+          "providers/litellm",
          "providers/opencode",
          "providers/glm",
          "providers/zai"
--- a/docs/providers/litellm.md
+++ b/docs/providers/litellm.md
@ -0,0 +1,131 @@
+---
+summary: "Use LiteLLM as an OpenAI-compatible proxy in OpenClaw"
+read_when:
+  - You want to use LiteLLM as a model provider
+  - You need to connect to a self-hosted LiteLLM proxy
+  - You want to use any model through an OpenAI-compatible API
+---
+# LiteLLM
+
+LiteLLM is an OpenAI-compatible proxy that supports 100+ LLM APIs. OpenClaw
+registers it as the `litellm` provider and uses the OpenAI Completions API.
+
+## Quick setup
+
+1) Set up your LiteLLM proxy (see [LiteLLM docs](https://docs.litellm.ai/))
+2) Set environment variables (optional):
+   - `LITELLM_API_KEY` - your LiteLLM API key
+   - `LITELLM_BASE_URL` - your LiteLLM endpoint (default: `http://localhost:4000`)
+   - `LITELLM_MODEL` - default model name (default: `gpt-4`)
+3) Run onboarding:
+
+```bash
+openclaw onboard --auth-choice litellm-api-key
+```
+
+The wizard will prompt for:
+- Base URL (your LiteLLM proxy endpoint)
+- API key
+- Model name (as configured in your LiteLLM proxy)
+
+## Config example
+
+```json5
+{
+  env: { LITELLM_API_KEY: "sk-..." },
+  agents: {
+    defaults: {
+      model: { primary: "litellm/gpt-4" },
+      models: { "litellm/gpt-4": { alias: "GPT-4" } }
+    }
+  },
+  models: {
+    mode: "merge",
+    providers: {
+      litellm: {
+        baseUrl: "http://localhost:4000",
+        apiKey: "${LITELLM_API_KEY}",
+        api: "openai-completions",
+        models: [
+          {
+            id: "gpt-4",
+            name: "GPT-4",
+            reasoning: false,
+            input: ["text"],
+            contextWindow: 128000,
+            maxTokens: 8192
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+## Multiple models
+
+Add additional models to your config as needed:
+
+```json5
+{
+  models: {
+    providers: {
+      litellm: {
+        baseUrl: "http://localhost:4000",
+        apiKey: "${LITELLM_API_KEY}",
+        api: "openai-completions",
+        models: [
+          { id: "gpt-4", name: "GPT-4", contextWindow: 128000, maxTokens: 8192 },
+          { id: "claude-3-opus", name: "Claude Opus", contextWindow: 200000, maxTokens: 4096 },
+          { id: "gemini-pro", name: "Gemini Pro", contextWindow: 32000, maxTokens: 8192 }
+        ]
+      }
+    }
+  }
+}
+```
+
+Then switch models using:
+
+```bash
+openclaw config set agents.defaults.model.primary litellm/claude-3-opus
+```
+
+## Prompt caching
+
+When using Anthropic models through LiteLLM (e.g., `claude-opus-4-5`, `claude-sonnet-4-5`), OpenClaw automatically enables **prompt caching** to reduce costs:
+
+```json5
+{
+  agents: {
+    defaults: {
+      models: {
+        "litellm/claude-opus-4-5": {
+          params: {
+            cacheControlTtl: "1h"  // Auto-configured for Claude models
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### Cost savings with caching
+
+- **Without caching**: Every message pays full price for the entire conversation history
+- **With caching** (enabled by default): Repeated context costs 10x less
+
+Example from actual usage:
+- Without caching: 93k tokens × $0.000005 = **$0.47** per message
+- With caching: 123k tokens (mostly cached) = **$0.05** per message (90% savings!)
+
+Caching is **automatically enabled** for all `claude-*` models through LiteLLM.
+
+## Notes
+
+- Model refs use `litellm/<modelId>` where `modelId` matches your LiteLLM config.
+- The base URL should not include `/v1` - OpenClaw's OpenAI client appends it.
+- Supported LiteLLM models depend on your proxy configuration.
+- **Prompt caching works automatically** when using Claude models through LiteLLM.
+- See [Model providers](/concepts/model-providers) for provider rules.
--- a/src/agents/litellm-models.ts
+++ b/src/agents/litellm-models.ts
@ -0,0 +1,50 @@
+import type { ModelDefinitionConfig } from "../config/types.js";
+
+// LiteLLM is a proxy that supports many models, so the base URL and model
+// are user-configurable. We provide sensible defaults for onboarding.
+export const LITELLM_DEFAULT_BASE_URL = "http://localhost:4000";
+export const LITELLM_DEFAULT_MODEL_ID = "gpt-4";
+export const LITELLM_DEFAULT_MODEL_REF = `litellm/${LITELLM_DEFAULT_MODEL_ID}`;
+export const LITELLM_DEFAULT_COST = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+};
+
+export type LitellmModelEntry = {
+  id: string;
+  name: string;
+  reasoning?: boolean;
+  input?: readonly ("text" | "image")[];
+  contextWindow?: number;
+  maxTokens?: number;
+};
+
+export function buildLitellmModelDefinition(entry: LitellmModelEntry): ModelDefinitionConfig {
+  // Detect Claude models and use Anthropic Messages API for proper cache control support
+  const isClaude = entry.id.toLowerCase().startsWith("claude-");
+
+  return {
+    id: entry.id,
+    name: entry.name,
+    // Claude models through LiteLLM should use anthropic-messages API for cache control
+    ...(isClaude ? { api: "anthropic-messages" as const } : {}),
+    reasoning: entry.reasoning ?? false,
+    input: entry.input ? [...entry.input] : ["text"],
+    cost: LITELLM_DEFAULT_COST,
+    contextWindow: entry.contextWindow ?? 128000,
+    maxTokens: entry.maxTokens ?? 8192,
+    // LiteLLM proxies to various providers that may not support the OpenAI Responses API
+    // `store` parameter. Disable it by default to avoid "Extra inputs are not permitted" errors.
+    compat: { supportsStore: false },
+  };
+}
+
+/**
+ * Creates a model reference for a LiteLLM model.
+ * The model ID can be any model supported by the LiteLLM proxy.
+ */
+export function litellmModelRef(modelId: string): string {
+  return `litellm/${modelId}`;
+}
--- a/src/agents/model-auth.ts
+++ b/src/agents/model-auth.ts
@ -286,6 +286,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
    venice: "VENICE_API_KEY",
    mistral: "MISTRAL_API_KEY",
    opencode: "OPENCODE_API_KEY",
+    litellm: "LITELLM_API_KEY",
  };
  const envVar = envMap[normalized];
  if (!envVar) return null;
--- a/src/agents/pi-embedded-runner/cache-ttl.test.ts
+++ b/src/agents/pi-embedded-runner/cache-ttl.test.ts
@ -0,0 +1,31 @@
+import { describe, it, expect } from "vitest";
+import { isCacheTtlEligibleProvider } from "./cache-ttl.js";
+
+describe("isCacheTtlEligibleProvider", () => {
+  it("returns true for direct Anthropic provider", () => {
+    expect(isCacheTtlEligibleProvider("anthropic", "claude-opus-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("Anthropic", "claude-sonnet-4-5")).toBe(true);
+  });
+
+  it("returns true for OpenRouter with Anthropic models", () => {
+    expect(isCacheTtlEligibleProvider("openrouter", "anthropic/claude-opus-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("OpenRouter", "anthropic/claude-3-opus")).toBe(true);
+  });
+
+  it("returns true for LiteLLM with Claude models", () => {
+    expect(isCacheTtlEligibleProvider("litellm", "claude-opus-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("litellm", "claude-sonnet-4-5")).toBe(true);
+    expect(isCacheTtlEligibleProvider("LiteLLM", "Claude-3-Opus")).toBe(true);
+  });
+
+  it("returns false for LiteLLM with non-Claude models", () => {
+    expect(isCacheTtlEligibleProvider("litellm", "gpt-4")).toBe(false);
+    expect(isCacheTtlEligibleProvider("litellm", "gemini-pro")).toBe(false);
+    expect(isCacheTtlEligibleProvider("litellm", "llama-3")).toBe(false);
+  });
+
+  it("returns false for other providers", () => {
+    expect(isCacheTtlEligibleProvider("openai", "gpt-4")).toBe(false);
+    expect(isCacheTtlEligibleProvider("google", "gemini-pro")).toBe(false);
+  });
+});
--- a/src/agents/pi-embedded-runner/cache-ttl.ts
+++ b/src/agents/pi-embedded-runner/cache-ttl.ts
@ -14,6 +14,8 @@ export function isCacheTtlEligibleProvider(provider: string, modelId: string): b
  if (normalizedProvider === "anthropic") return true;
  if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
    return true;
+  // LiteLLM proxying to Anthropic models (claude-*)
+  if (normalizedProvider === "litellm" && normalizedModelId.startsWith("claude-")) return true;
  return false;
 }

--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@ -32,6 +32,8 @@ function resolveCacheControlTtl(
  if (raw !== "5m" && raw !== "1h") return undefined;
  if (provider === "anthropic") return raw;
  if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
+  // LiteLLM proxying to Anthropic models (claude-*)
+  if (provider === "litellm" && modelId.startsWith("claude-")) return raw;
  return undefined;
 }

--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@ -77,17 +77,25 @@ export function resolveModel(
    }
    const providerCfg = providers[provider];
    if (providerCfg || modelId.startsWith("mock-")) {
+      // Find the matching model definition from provider config to get compat settings
+      const modelDef = providerCfg?.models?.find((m) => m.id === modelId);
      const fallbackModel: Model<Api> = normalizeModelCompat({
        id: modelId,
-        name: modelId,
-        api: providerCfg?.api ?? "openai-responses",
+        name: modelDef?.name ?? modelId,
+        api: modelDef?.api ?? providerCfg?.api ?? "openai-responses",
        provider,
        baseUrl: providerCfg?.baseUrl,
-        reasoning: false,
-        input: ["text"],
-        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-        contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS,
-        maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+        reasoning: modelDef?.reasoning ?? false,
+        input: modelDef?.input ?? ["text"],
+        cost: modelDef?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow:
+          modelDef?.contextWindow ??
+          providerCfg?.models?.[0]?.contextWindow ??
+          DEFAULT_CONTEXT_TOKENS,
+        maxTokens:
+          modelDef?.maxTokens ?? providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+        // Preserve compat settings for provider-specific quirks (e.g., supportsStore for LiteLLM)
+        compat: modelDef?.compat,
      } as Model<Api>);
      return { model: fallbackModel, authStorage, modelRegistry };
    }
--- a/src/cli/program/register.onboard.ts
+++ b/src/cli/program/register.onboard.ts
@ -52,7 +52,7 @@ export function registerOnboardCommand(program: Command) {
    .option("--mode <mode>", "Wizard mode: local|remote")
    .option(
      "--auth-choice <choice>",
-      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip",
+      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|litellm-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip",
    )
    .option(
      "--token-provider <id>",
@ -77,6 +77,9 @@ export function registerOnboardCommand(program: Command) {
    .option("--synthetic-api-key <key>", "Synthetic API key")
    .option("--venice-api-key <key>", "Venice API key")
    .option("--opencode-zen-api-key <key>", "OpenCode Zen API key")
+    .option("--litellm-api-key <key>", "LiteLLM API key")
+    .option("--litellm-base-url <url>", "LiteLLM base URL (default: http://localhost:4000)")
+    .option("--litellm-model <model>", "LiteLLM model name")
    .option("--gateway-port <port>", "Gateway port")
    .option("--gateway-bind <mode>", "Gateway bind: loopback|tailnet|lan|auto|custom")
    .option("--gateway-auth <mode>", "Gateway auth: token|password")
--- a/src/commands/agents.commands.add.ts
+++ b/src/commands/agents.commands.add.ts
@ -254,27 +254,41 @@ export async function agentsAddCommand(
      const authStore = ensureAuthProfileStore(agentDir, {
        allowKeychainPrompt: false,
      });
-      const authChoice = await promptAuthChoiceGrouped({
-        prompter,
-        store: authStore,
-        includeSkip: true,
-      });

-      const authResult = await applyAuthChoice({
-        authChoice,
-        config: nextConfig,
-        prompter,
-        runtime,
-        agentDir,
-        setDefaultModel: false,
-        agentId,
-      });
-      nextConfig = authResult.config;
-      if (authResult.agentModelOverride) {
-        nextConfig = applyAgentConfig(nextConfig, {
-          agentId,
-          model: authResult.agentModelOverride,
+      // Loop to allow retrying auth choice if user cancels during configuration
+      while (true) {
+        const authChoice = await promptAuthChoiceGrouped({
+          prompter,
+          store: authStore,
+          includeSkip: true,
        });
+
+        try {
+          const authResult = await applyAuthChoice({
+            authChoice,
+            config: nextConfig,
+            prompter,
+            runtime,
+            agentDir,
+            setDefaultModel: false,
+            agentId,
+          });
+          nextConfig = authResult.config;
+          if (authResult.agentModelOverride) {
+            nextConfig = applyAgentConfig(nextConfig, {
+              agentId,
+              model: authResult.agentModelOverride,
+            });
+          }
+          break; // Success - exit the loop
+        } catch (error) {
+          // If user cancelled to go back to auth selection, loop again
+          if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") {
+            continue;
+          }
+          // Re-throw other errors
+          throw error;
+        }
      }
    }

--- a/src/commands/auth-choice-options.ts
+++ b/src/commands/auth-choice-options.ts
@ -21,7 +21,8 @@ export type AuthChoiceGroupId =
  | "minimax"
  | "synthetic"
  | "venice"
-  | "qwen";
+  | "qwen"
+  | "litellm";

 export type AuthChoiceGroup = {
  value: AuthChoiceGroupId;
@ -120,6 +121,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
    hint: "API key",
    choices: ["opencode-zen"],
  },
+  {
+    value: "litellm",
+    label: "LiteLLM",
+    hint: "OpenAI-compatible proxy (self-hosted)",
+    choices: ["litellm-api-key"],
+  },
 ];

 export function buildAuthChoiceOptions(params: {
@ -194,6 +201,11 @@ export function buildAuthChoiceOptions(params: {
    label: "MiniMax M2.1 Lightning",
    hint: "Faster, higher output cost",
  });
+  options.push({
+    value: "litellm-api-key",
+    label: "LiteLLM API key",
+    hint: "OpenAI-compatible proxy (any model)",
+  });
  if (params.includeSkip) {
    options.push({ value: "skip", label: "Skip for now" });
  }
--- a/src/commands/auth-choice.apply.api-providers.ts
+++ b/src/commands/auth-choice.apply.api-providers.ts
@ -15,6 +15,8 @@ import {
  applyAuthProfileConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
+  applyLitellmConfig,
+  applyLitellmProviderConfig,
  applyMoonshotConfig,
  applyMoonshotProviderConfig,
  applyOpencodeZenConfig,
@ -39,6 +41,7 @@ import {
  XIAOMI_DEFAULT_MODEL_REF,
  setGeminiApiKey,
  setKimiCodeApiKey,
+  setLitellmApiKey,
  setMoonshotApiKey,
  setOpencodeZenApiKey,
  setOpenrouterApiKey,
@ -91,6 +94,8 @@ export async function applyAuthChoiceApiProviders(
      authChoice = "venice-api-key";
    } else if (params.opts.tokenProvider === "opencode") {
      authChoice = "opencode-zen";
+    } else if (params.opts.tokenProvider === "litellm") {
+      authChoice = "litellm-api-key";
    }
  }

@ -633,5 +638,296 @@ export async function applyAuthChoiceApiProviders(
    return { config: nextConfig, agentModelOverride };
  }

+  if (authChoice === "litellm-api-key") {
+    let hasCredential = false;
+    let apiKey: string | undefined;
+
+    // Check for pre-provided API key via CLI options (--litellm-api-key or --token with --token-provider litellm)
+    if (!hasCredential && params.opts?.litellmApiKey) {
+      apiKey = normalizeApiKeyInput(params.opts.litellmApiKey);
+      await setLitellmApiKey(apiKey, params.agentDir);
+      hasCredential = true;
+    }
+    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "litellm") {
+      apiKey = normalizeApiKeyInput(params.opts.token);
+      await setLitellmApiKey(apiKey, params.agentDir);
+      hasCredential = true;
+    }
+
+    if (!hasCredential) {
+      await params.prompter.note(
+        [
+          "LiteLLM is an OpenAI-compatible proxy that supports many models.",
+          "You'll need to provide:",
+          "  1. Base URL (e.g., http://localhost:4000)",
+          "  2. API key",
+          "  3. Model selection (fetched from your LiteLLM instance)",
+        ].join("\n"),
+        "LiteLLM",
+      );
+    }
+
+    // Check for existing env key
+    const envKey = resolveEnvApiKey("litellm");
+    if (!hasCredential && envKey) {
+      const useExisting = await params.prompter.confirm({
+        message: `Use existing LITELLM_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
+        initialValue: true,
+      });
+      if (useExisting) {
+        apiKey = envKey.apiKey;
+        await setLitellmApiKey(apiKey, params.agentDir);
+        hasCredential = true;
+      }
+    }
+
+    // Helper function to prompt for API key
+    const promptForApiKey = async () => {
+      const key = await params.prompter.text({
+        message: "Enter LiteLLM API key",
+        validate: validateApiKeyInput,
+      });
+      return normalizeApiKeyInput(String(key));
+    };
+
+    // Helper function to prompt for base URL
+    const promptForBaseUrl = async () => {
+      const defaultBaseUrl = process.env.LITELLM_BASE_URL ?? "http://localhost:4000";
+      const baseUrl = await params.prompter.text({
+        message: "Enter LiteLLM base URL",
+        initialValue: defaultBaseUrl,
+        placeholder: defaultBaseUrl,
+        validate: (value) => {
+          if (!value?.trim()) return "Base URL is required";
+          try {
+            new URL(value);
+            return undefined;
+          } catch {
+            return "Invalid URL format";
+          }
+        },
+      });
+      return String(baseUrl).trim();
+    };
+
+    if (!hasCredential) {
+      apiKey = await promptForApiKey();
+      await setLitellmApiKey(apiKey, params.agentDir);
+    }
+
+    // Check for pre-provided base URL via CLI option (--litellm-base-url)
+    let normalizedBaseUrl: string;
+    if (params.opts?.litellmBaseUrl) {
+      normalizedBaseUrl = params.opts.litellmBaseUrl.trim();
+    } else {
+      normalizedBaseUrl = await promptForBaseUrl();
+    }
+
+    // Try to fetch available models from LiteLLM
+    type LitellmModelInfo = { id: string; maxInputTokens?: number; maxOutputTokens?: number };
+    let availableModels: LitellmModelInfo[] = [];
+    const authHeaders: Record<string, string> = apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
+
+    // First fetch model list from /v1/models
+    try {
+      const modelsUrl = new URL("/v1/models", normalizedBaseUrl).toString();
+      const response = await fetch(modelsUrl, {
+        headers: authHeaders,
+        signal: AbortSignal.timeout(10000),
+      });
+      if (response.ok) {
+        const data = (await response.json()) as {
+          data?: Array<{ id: string }>;
+        };
+        if (data.data && Array.isArray(data.data)) {
+          availableModels = data.data.map((m) => ({ id: m.id }));
+        }
+      }
+    } catch {
+      // Fetching models failed - will fall back to manual entry
+    }
+
+    // Then fetch detailed model info from /model/info (LiteLLM-specific endpoint)
+    // This provides context window and max tokens info
+    type ModelInfoEntry = {
+      model_name: string;
+      model_info?: {
+        max_input_tokens?: number;
+        max_tokens?: number;
+        max_output_tokens?: number;
+      };
+    };
+    const modelInfoMap = new Map<string, { maxInputTokens?: number; maxOutputTokens?: number }>();
+    try {
+      const modelInfoUrl = new URL("/model/info", normalizedBaseUrl).toString();
+      const response = await fetch(modelInfoUrl, {
+        headers: authHeaders,
+        signal: AbortSignal.timeout(10000),
+      });
+      if (response.ok) {
+        const data = (await response.json()) as { data?: ModelInfoEntry[] };
+        if (data.data && Array.isArray(data.data)) {
+          for (const entry of data.data) {
+            if (entry.model_name && entry.model_info) {
+              modelInfoMap.set(entry.model_name, {
+                maxInputTokens: entry.model_info.max_input_tokens,
+                maxOutputTokens: entry.model_info.max_output_tokens ?? entry.model_info.max_tokens,
+              });
+            }
+          }
+        }
+      }
+    } catch {
+      // Model info fetch failed - context window will need manual entry
+    }
+
+    // Merge model info into available models
+    availableModels = availableModels.map((m) => {
+      const info = modelInfoMap.get(m.id);
+      return {
+        id: m.id,
+        maxInputTokens: info?.maxInputTokens,
+        maxOutputTokens: info?.maxOutputTokens,
+      };
+    });
+
+    let normalizedModelId: string;
+    let contextWindow: number | undefined;
+    let maxTokens: number | undefined;
+
+    // Check for pre-provided model via CLI option (--litellm-model)
+    if (params.opts?.litellmModel) {
+      normalizedModelId = params.opts.litellmModel.trim();
+      // Try to get context info from model info map
+      const modelInfo = availableModels.find((m) => m.id === normalizedModelId);
+      if (modelInfo?.maxInputTokens) {
+        contextWindow = modelInfo.maxInputTokens;
+      }
+      if (modelInfo?.maxOutputTokens) {
+        maxTokens = modelInfo.maxOutputTokens;
+      }
+    } else if (availableModels.length > 0) {
+      // Let user select from available models
+      type SelectOption = { value: string; label: string; hint?: string };
+      const modelOptions: SelectOption[] = availableModels.map((m) => ({
+        value: m.id,
+        label: m.id,
+        hint: m.maxInputTokens ? `${Math.round(m.maxInputTokens / 1000)}k context` : undefined,
+      }));
+
+      const selectedModel = await params.prompter.select({
+        message: `Select model (${availableModels.length} available)`,
+        options: modelOptions,
+      });
+
+      normalizedModelId = String(selectedModel);
+      const modelInfo = availableModels.find((m) => m.id === normalizedModelId);
+      if (modelInfo?.maxInputTokens) {
+        contextWindow = modelInfo.maxInputTokens;
+      }
+      if (modelInfo?.maxOutputTokens) {
+        maxTokens = modelInfo.maxOutputTokens;
+      }
+    } else {
+      // No models available from LiteLLM - offer manual entry or retry
+      await params.prompter.note(
+        [
+          "Could not fetch models from LiteLLM server.",
+          `Server: ${normalizedBaseUrl}`,
+          "",
+          "This could be due to:",
+          "  • Invalid API key",
+          "  • Server not accessible",
+          "  • Network connectivity issues",
+        ].join("\n"),
+        "Model fetch failed",
+      );
+
+      const action = await params.prompter.select({
+        message: "How would you like to proceed?",
+        options: [
+          { value: "retry-apikey", label: "Re-enter API key" },
+          { value: "retry-baseurl", label: "Re-enter base URL" },
+          { value: "cancel", label: "Go back to auth method selection" },
+        ],
+      });
+
+      if (action === "cancel") {
+        // Throw an error with a specific message that signals to restart auth selection
+        // The caller should catch this and re-prompt for auth choice
+        throw new Error("AUTH_CHOICE_CANCELLED");
+      }
+
+      if (action === "retry-apikey") {
+        // Re-prompt for API key and retry the entire flow
+        // Clear the CLI-provided options to force prompting
+        const newParams = {
+          ...params,
+          authChoice: "litellm-api-key" as const,
+          opts: {
+            ...params.opts,
+            litellmApiKey: undefined, // Clear the CLI-provided API key so we can prompt
+            token: undefined, // Also clear token if it was used
+          },
+        };
+        return await applyAuthChoiceApiProviders(newParams);
+      }
+
+      if (action === "retry-baseurl") {
+        // Re-prompt for base URL and retry the entire flow
+        // This ensures we go through the full fetch process again with the new URL
+        const newParams = {
+          ...params,
+          authChoice: "litellm-api-key" as const,
+          opts: {
+            ...params.opts,
+            litellmBaseUrl: undefined, // Clear the CLI-provided URL so we can prompt
+          },
+        };
+        return await applyAuthChoiceApiProviders(newParams);
+      }
+
+      // This should never be reached, but throw error as fallback
+      throw new Error("Failed to configure LiteLLM provider");
+    }
+
+    // Strip litellm/ prefix if the API returned it (avoid litellm/litellm/model)
+    if (normalizedModelId.startsWith("litellm/")) {
+      normalizedModelId = normalizedModelId.slice("litellm/".length);
+    }
+
+    const modelRef = `litellm/${normalizedModelId}`;
+
+    nextConfig = applyAuthProfileConfig(nextConfig, {
+      profileId: "litellm:default",
+      provider: "litellm",
+      mode: "api_key",
+    });
+
+    if (params.setDefaultModel) {
+      nextConfig = applyLitellmConfig(nextConfig, {
+        baseUrl: normalizedBaseUrl,
+        modelId: normalizedModelId,
+        contextWindow,
+        maxTokens,
+      });
+      await params.prompter.note(
+        `Default model set to ${modelRef}${contextWindow ? ` (${Math.round(contextWindow / 1000)}k context)` : ""}`,
+        "Model configured",
+      );
+    } else {
+      nextConfig = applyLitellmProviderConfig(nextConfig, {
+        baseUrl: normalizedBaseUrl,
+        modelId: normalizedModelId,
+        contextWindow,
+        maxTokens,
+      });
+      agentModelOverride = modelRef;
+      await noteAgentModel(modelRef);
+    }
+
+    return { config: nextConfig, agentModelOverride };
+  }
+
  return null;
 }
--- a/src/commands/auth-choice.apply.ts
+++ b/src/commands/auth-choice.apply.ts
@ -24,6 +24,12 @@ export type ApplyAuthChoiceParams = {
  opts?: {
    tokenProvider?: string;
    token?: string;
+    // LiteLLM-specific options
+    litellmApiKey?: string;
+    litellmBaseUrl?: string;
+    litellmModel?: string;
+    // Non-interactive mode flag
+    nonInteractive?: boolean;
  };
 };

--- a/src/commands/configure.gateway-auth.ts
+++ b/src/commands/configure.gateway-auth.ts
@ -4,6 +4,7 @@ import type { RuntimeEnv } from "../runtime.js";
 import type { WizardPrompter } from "../wizard/prompts.js";
 import { applyAuthChoice, resolvePreferredProviderForAuthChoice } from "./auth-choice.js";
 import { promptAuthChoiceGrouped } from "./auth-choice-prompt.js";
+import type { AuthChoice } from "./onboard-types.js";
 import {
  applyModelAllowlist,
  applyModelFallbacksFromSelection,
@ -41,34 +42,50 @@ export async function promptAuthConfig(
  runtime: RuntimeEnv,
  prompter: WizardPrompter,
 ): Promise<OpenClawConfig> {
-  const authChoice = await promptAuthChoiceGrouped({
-    prompter,
-    store: ensureAuthProfileStore(undefined, {
-      allowKeychainPrompt: false,
-    }),
-    includeSkip: true,
-  });
-
  let next = cfg;
-  if (authChoice !== "skip") {
-    const applied = await applyAuthChoice({
-      authChoice,
-      config: next,
+  let authChoice: AuthChoice;
+
+  // Loop to allow retrying auth choice if user cancels during configuration
+  while (true) {
+    authChoice = await promptAuthChoiceGrouped({
      prompter,
-      runtime,
-      setDefaultModel: true,
+      store: ensureAuthProfileStore(undefined, {
+        allowKeychainPrompt: false,
+      }),
+      includeSkip: true,
    });
-    next = applied.config;
-  } else {
-    const modelSelection = await promptDefaultModel({
-      config: next,
-      prompter,
-      allowKeep: true,
-      ignoreAllowlist: true,
-      preferredProvider: resolvePreferredProviderForAuthChoice(authChoice),
-    });
-    if (modelSelection.model) {
-      next = applyPrimaryModel(next, modelSelection.model);
+
+    if (authChoice !== "skip") {
+      try {
+        const applied = await applyAuthChoice({
+          authChoice,
+          config: next,
+          prompter,
+          runtime,
+          setDefaultModel: true,
+        });
+        next = applied.config;
+        break; // Success - exit the loop
+      } catch (error) {
+        // If user cancelled to go back to auth selection, loop again
+        if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") {
+          continue;
+        }
+        // Re-throw other errors
+        throw error;
+      }
+    } else {
+      const modelSelection = await promptDefaultModel({
+        config: next,
+        prompter,
+        allowKeep: true,
+        ignoreAllowlist: true,
+        preferredProvider: resolvePreferredProviderForAuthChoice(authChoice),
+      });
+      if (modelSelection.model) {
+        next = applyPrimaryModel(next, modelSelection.model);
+      }
+      break; // Skip selected - exit the loop
    }
  }

--- a/src/commands/onboard-auth.config-core.ts
+++ b/src/commands/onboard-auth.config-core.ts
@ -484,6 +484,115 @@ export function applyVeniceConfig(cfg: OpenClawConfig): OpenClawConfig {
  };
 }

+/**
+ * Apply LiteLLM provider configuration without changing the default model.
+ * LiteLLM is a flexible proxy that supports many models, so base URL and model
+ * are user-configurable.
+ */
+export function applyLitellmProviderConfig(
+  cfg: OpenClawConfig,
+  params: {
+    baseUrl: string;
+    modelId: string;
+    modelName?: string;
+    contextWindow?: number;
+    maxTokens?: number;
+  },
+): OpenClawConfig {
+  const modelRef = `litellm/${params.modelId}`;
+  const models = { ...cfg.agents?.defaults?.models };
+  models[modelRef] = {
+    ...models[modelRef],
+    alias: models[modelRef]?.alias ?? params.modelName ?? params.modelId,
+  };
+
+  const providers = { ...cfg.models?.providers };
+  const existingProvider = providers.litellm;
+  const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
+  // Detect Claude models and use Anthropic Messages API for proper cache control support
+  const isClaude = params.modelId.toLowerCase().startsWith("claude-");
+  const newModel = {
+    id: params.modelId,
+    name: params.modelName ?? params.modelId,
+    // Claude models through LiteLLM should use anthropic-messages API for cache control
+    ...(isClaude ? { api: "anthropic-messages" as const } : {}),
+    reasoning: false,
+    input: ["text"] as ("text" | "image")[],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: params.contextWindow ?? 128000,
+    maxTokens: params.maxTokens ?? 8192,
+    // LiteLLM proxies to various providers that may not support the OpenAI Responses API
+    // `store` parameter. Disable it to avoid "Extra inputs are not permitted" errors.
+    compat: { supportsStore: false },
+  };
+  const hasModel = existingModels.some((model) => model.id === params.modelId);
+  const mergedModels = hasModel ? existingModels : [...existingModels, newModel];
+  const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record<
+    string,
+    unknown
+  > as { apiKey?: string };
+  const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined;
+  const normalizedApiKey = resolvedApiKey?.trim();
+  providers.litellm = {
+    ...existingProviderRest,
+    baseUrl: params.baseUrl,
+    api: "openai-completions",
+    ...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}),
+    models: mergedModels.length > 0 ? mergedModels : [newModel],
+  };
+
+  return {
+    ...cfg,
+    agents: {
+      ...cfg.agents,
+      defaults: {
+        ...cfg.agents?.defaults,
+        models,
+      },
+    },
+    models: {
+      mode: cfg.models?.mode ?? "merge",
+      providers,
+    },
+  };
+}
+
+/**
+ * Apply LiteLLM provider configuration AND set LiteLLM as the default model.
+ * Use this when LiteLLM is the primary provider choice during onboarding.
+ */
+export function applyLitellmConfig(
+  cfg: OpenClawConfig,
+  params: {
+    baseUrl: string;
+    modelId: string;
+    modelName?: string;
+    contextWindow?: number;
+    maxTokens?: number;
+  },
+): OpenClawConfig {
+  const next = applyLitellmProviderConfig(cfg, params);
+  const modelRef = `litellm/${params.modelId}`;
+  const existingModel = next.agents?.defaults?.model;
+  return {
+    ...next,
+    agents: {
+      ...next.agents,
+      defaults: {
+        ...next.agents?.defaults,
+        model: {
+          ...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
+            ? {
+                fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
+              }
+            : undefined),
+          primary: modelRef,
+        },
+      },
+    },
+  };
+}
+
 export function applyAuthProfileConfig(
  cfg: OpenClawConfig,
  params: {
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@ -177,3 +177,17 @@ export async function setOpencodeZenApiKey(key: string, agentDir?: string) {
    agentDir: resolveAuthAgentDir(agentDir),
  });
 }
+
+export const LITELLM_DEFAULT_MODEL_REF = "litellm/gpt-4";
+
+export async function setLitellmApiKey(key: string, agentDir?: string) {
+  upsertAuthProfile({
+    profileId: "litellm:default",
+    credential: {
+      type: "api_key",
+      provider: "litellm",
+      key,
+    },
+    agentDir: resolveAuthAgentDir(agentDir),
+  });
+}
--- a/src/commands/onboard-auth.ts
+++ b/src/commands/onboard-auth.ts
@ -7,6 +7,8 @@ export {
  applyAuthProfileConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
+  applyLitellmConfig,
+  applyLitellmProviderConfig,
  applyMoonshotConfig,
  applyMoonshotProviderConfig,
  applyOpenrouterConfig,
@ -35,10 +37,12 @@ export {
  applyOpencodeZenProviderConfig,
 } from "./onboard-auth.config-opencode.js";
 export {
+  LITELLM_DEFAULT_MODEL_REF,
  OPENROUTER_DEFAULT_MODEL_REF,
  setAnthropicApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
+  setLitellmApiKey,
  setMinimaxApiKey,
  setMoonshotApiKey,
  setOpencodeZenApiKey,
--- a/src/commands/onboard-types.ts
+++ b/src/commands/onboard-types.ts
@ -17,6 +17,7 @@ export type AuthChoice =
  | "kimi-code-api-key"
  | "synthetic-api-key"
  | "venice-api-key"
+  | "litellm-api-key"
  | "codex-cli"
  | "apiKey"
  | "gemini-api-key"
@ -73,6 +74,9 @@ export type OnboardOptions = {
  syntheticApiKey?: string;
  veniceApiKey?: string;
  opencodeZenApiKey?: string;
+  litellmApiKey?: string;
+  litellmBaseUrl?: string;
+  litellmModel?: string;
  gatewayPort?: number;
  gatewayBind?: GatewayBind;
  gatewayAuth?: GatewayAuthChoice;
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@ -323,7 +323,12 @@ export function applyContextPruningDefaults(cfg: OpenClawConfig): OpenClawConfig

    for (const [key, entry] of Object.entries(nextModels)) {
      const parsed = parseModelRef(key, "anthropic");
-      if (!parsed || parsed.provider !== "anthropic") continue;
+      if (!parsed) continue;
+      // Apply cache control to Anthropic models and LiteLLM Claude models
+      const isAnthropicProvider = parsed.provider === "anthropic";
+      const isLitellmClaude =
+        parsed.provider === "litellm" && parsed.model.toLowerCase().startsWith("claude-");
+      if (!isAnthropicProvider && !isLitellmClaude) continue;
      const current = entry ?? {};
      const params = (current as { params?: Record<string, unknown> }).params ?? {};
      if (typeof params.cacheControlTtl === "string") continue;
@ -337,17 +342,24 @@ export function applyContextPruningDefaults(cfg: OpenClawConfig): OpenClawConfig
    const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined);
    if (primary) {
      const parsedPrimary = parseModelRef(primary, "anthropic");
-      if (parsedPrimary?.provider === "anthropic") {
-        const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
-        const entry = nextModels[key];
-        const current = entry ?? {};
-        const params = (current as { params?: Record<string, unknown> }).params ?? {};
-        if (typeof params.cacheControlTtl !== "string") {
-          nextModels[key] = {
-            ...(current as Record<string, unknown>),
-            params: { ...params, cacheControlTtl: "1h" },
-          };
-          modelsMutated = true;
+      if (parsedPrimary) {
+        // Apply cache control to Anthropic models and LiteLLM Claude models
+        const isAnthropicProvider = parsedPrimary.provider === "anthropic";
+        const isLitellmClaude =
+          parsedPrimary.provider === "litellm" &&
+          parsedPrimary.model.toLowerCase().startsWith("claude-");
+        if (isAnthropicProvider || isLitellmClaude) {
+          const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
+          const entry = nextModels[key];
+          const current = entry ?? {};
+          const params = (current as { params?: Record<string, unknown> }).params ?? {};
+          if (typeof params.cacheControlTtl !== "string") {
+            nextModels[key] = {
+              ...(current as Record<string, unknown>),
+              params: { ...params, cacheControlTtl: "1h" },
+            };
+            modelsMutated = true;
+          }
        }
      }
    }
--- a/src/config/io.ts
+++ b/src/config/io.ts
@ -48,6 +48,7 @@ const SHELL_ENV_EXPECTED_KEYS = [
  "AI_GATEWAY_API_KEY",
  "MINIMAX_API_KEY",
  "SYNTHETIC_API_KEY",
+  "LITELLM_API_KEY",
  "ELEVENLABS_API_KEY",
  "TELEGRAM_BOT_TOKEN",
  "DISCORD_BOT_TOKEN",
--- a/src/wizard/onboarding.ts
+++ b/src/wizard/onboarding.ts
@ -21,6 +21,7 @@ import { promptRemoteGatewayConfig } from "../commands/onboard-remote.js";
 import { setupSkills } from "../commands/onboard-skills.js";
 import { setupInternalHooks } from "../commands/onboard-hooks.js";
 import type {
+  AuthChoice,
  GatewayAuthChoice,
  OnboardMode,
  OnboardOptions,
@ -354,26 +355,43 @@ export async function runOnboardingWizard(
    allowKeychainPrompt: false,
  });
  const authChoiceFromPrompt = opts.authChoice === undefined;
-  const authChoice =
-    opts.authChoice ??
-    (await promptAuthChoiceGrouped({
-      prompter,
-      store: authStore,
-      includeSkip: true,
-    }));
+  let authChoice: AuthChoice;

-  const authResult = await applyAuthChoice({
-    authChoice,
-    config: nextConfig,
-    prompter,
-    runtime,
-    setDefaultModel: true,
-    opts: {
-      tokenProvider: opts.tokenProvider,
-      token: opts.authChoice === "apiKey" && opts.token ? opts.token : undefined,
-    },
-  });
-  nextConfig = authResult.config;
+  // Loop to allow retrying auth choice if user cancels during configuration
+  while (true) {
+    authChoice =
+      opts.authChoice ??
+      (await promptAuthChoiceGrouped({
+        prompter,
+        store: authStore,
+        includeSkip: true,
+      }));
+
+    try {
+      const authResult = await applyAuthChoice({
+        authChoice,
+        config: nextConfig,
+        prompter,
+        runtime,
+        setDefaultModel: true,
+        opts: {
+          tokenProvider: opts.tokenProvider,
+          token: opts.authChoice === "apiKey" && opts.token ? opts.token : undefined,
+        },
+      });
+      nextConfig = authResult.config;
+      break; // Success - exit the loop
+    } catch (error) {
+      // If user cancelled to go back to auth selection, loop again
+      if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") {
+        // Clear opts.authChoice so we prompt again
+        opts.authChoice = undefined;
+        continue;
+      }
+      // Re-throw other errors
+      throw error;
+    }
+  }

  if (authChoiceFromPrompt) {
    const modelSelection = await promptDefaultModel({