Merge 81518603e4 into da71eaebd2

2026-01-30 11:59:48 +00:00 · 2026-01-30 11:59:48 +00:00 · 34e56da345
commit 34e56da345
parent da71eaebd2 81518603e4
21 changed files with 820 additions and 84 deletions
--- a/docs/docs.json
+++ b/docs/docs.json
@ -1030,6 +1030,7 @@
          "providers/vercel-ai-gateway",
          "providers/openrouter",
          "providers/synthetic",
          "providers/litellm",
          "providers/opencode",
          "providers/glm",
          "providers/zai"
--- a/docs/providers/litellm.md
+++ b/docs/providers/litellm.md
@ -0,0 +1,131 @@
 ---
 summary: "Use LiteLLM as an OpenAI-compatible proxy in OpenClaw"
 read_when:
  - You want to use LiteLLM as a model provider
  - You need to connect to a self-hosted LiteLLM proxy
  - You want to use any model through an OpenAI-compatible API
 ---
 # LiteLLM
 LiteLLM is an OpenAI-compatible proxy that supports 100+ LLM APIs. OpenClaw
 registers it as the `litellm` provider and uses the OpenAI Completions API.
 ## Quick setup
 1) Set up your LiteLLM proxy (see [LiteLLM docs](https://docs.litellm.ai/))
 2) Set environment variables (optional):
   - `LITELLM_API_KEY` - your LiteLLM API key
   - `LITELLM_BASE_URL` - your LiteLLM endpoint (default: `http://localhost:4000`)
   - `LITELLM_MODEL` - default model name (default: `gpt-4`)
 3) Run onboarding:
 ```bash
 openclaw onboard --auth-choice litellm-api-key
 ```
 The wizard will prompt for:
 - Base URL (your LiteLLM proxy endpoint)
 - API key
 - Model name (as configured in your LiteLLM proxy)
 ## Config example
 ```json5
 {
  env: { LITELLM_API_KEY: "sk-..." },
  agents: {
    defaults: {
      model: { primary: "litellm/gpt-4" },
      models: { "litellm/gpt-4": { alias: "GPT-4" } }
    }
  },
  models: {
    mode: "merge",
    providers: {
      litellm: {
        baseUrl: "http://localhost:4000",
        apiKey: "${LITELLM_API_KEY}",
        api: "openai-completions",
        models: [
          {
            id: "gpt-4",
            name: "GPT-4",
            reasoning: false,
            input: ["text"],
            contextWindow: 128000,
            maxTokens: 8192
          }
        ]
      }
    }
  }
 }
 ```
 ## Multiple models
 Add additional models to your config as needed:
 ```json5
 {
  models: {
    providers: {
      litellm: {
        baseUrl: "http://localhost:4000",
        apiKey: "${LITELLM_API_KEY}",
        api: "openai-completions",
        models: [
          { id: "gpt-4", name: "GPT-4", contextWindow: 128000, maxTokens: 8192 },
          { id: "claude-3-opus", name: "Claude Opus", contextWindow: 200000, maxTokens: 4096 },
          { id: "gemini-pro", name: "Gemini Pro", contextWindow: 32000, maxTokens: 8192 }
        ]
      }
    }
  }
 }
 ```
 Then switch models using:
 ```bash
 openclaw config set agents.defaults.model.primary litellm/claude-3-opus
 ```
 ## Prompt caching
 When using Anthropic models through LiteLLM (e.g., `claude-opus-4-5`, `claude-sonnet-4-5`), OpenClaw automatically enables **prompt caching** to reduce costs:
 ```json5
 {
  agents: {
    defaults: {
      models: {
        "litellm/claude-opus-4-5": {
          params: {
            cacheControlTtl: "1h"  // Auto-configured for Claude models
          }
        }
      }
    }
  }
 }
 ```
 ### Cost savings with caching
 - **Without caching**: Every message pays full price for the entire conversation history
 - **With caching** (enabled by default): Repeated context costs 10x less
 Example from actual usage:
 - Without caching: 93k tokens × $0.000005 = **$0.47** per message
 - With caching: 123k tokens (mostly cached) = **$0.05** per message (90% savings!)
 Caching is **automatically enabled** for all `claude-*` models through LiteLLM.
 ## Notes
 - Model refs use `litellm/<modelId>` where `modelId` matches your LiteLLM config.
 - The base URL should not include `/v1` - OpenClaw's OpenAI client appends it.
 - Supported LiteLLM models depend on your proxy configuration.
 - **Prompt caching works automatically** when using Claude models through LiteLLM.
 - See [Model providers](/concepts/model-providers) for provider rules.
--- a/src/agents/litellm-models.ts
+++ b/src/agents/litellm-models.ts
@ -0,0 +1,50 @@
 import type { ModelDefinitionConfig } from "../config/types.js";
 // LiteLLM is a proxy that supports many models, so the base URL and model
 // are user-configurable. We provide sensible defaults for onboarding.
 export const LITELLM_DEFAULT_BASE_URL = "http://localhost:4000";
 export const LITELLM_DEFAULT_MODEL_ID = "gpt-4";
 export const LITELLM_DEFAULT_MODEL_REF = `litellm/${LITELLM_DEFAULT_MODEL_ID}`;
 export const LITELLM_DEFAULT_COST = {
  input: 0,
  output: 0,
  cacheRead: 0,
  cacheWrite: 0,
 };
 export type LitellmModelEntry = {
  id: string;
  name: string;
  reasoning?: boolean;
  input?: readonly ("text" | "image")[];
  contextWindow?: number;
  maxTokens?: number;
 };
 export function buildLitellmModelDefinition(entry: LitellmModelEntry): ModelDefinitionConfig {
  // Detect Claude models and use Anthropic Messages API for proper cache control support
  const isClaude = entry.id.toLowerCase().startsWith("claude-");
  return {
    id: entry.id,
    name: entry.name,
    // Claude models through LiteLLM should use anthropic-messages API for cache control
    ...(isClaude ? { api: "anthropic-messages" as const } : {}),
    reasoning: entry.reasoning ?? false,
    input: entry.input ? [...entry.input] : ["text"],
    cost: LITELLM_DEFAULT_COST,
    contextWindow: entry.contextWindow ?? 128000,
    maxTokens: entry.maxTokens ?? 8192,
    // LiteLLM proxies to various providers that may not support the OpenAI Responses API
    // `store` parameter. Disable it by default to avoid "Extra inputs are not permitted" errors.
    compat: { supportsStore: false },
  };
 }
 /**
 * Creates a model reference for a LiteLLM model.
 * The model ID can be any model supported by the LiteLLM proxy.
 */
 export function litellmModelRef(modelId: string): string {
  return `litellm/${modelId}`;
 }
--- a/src/agents/model-auth.ts
+++ b/src/agents/model-auth.ts
@ -286,6 +286,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
    venice: "VENICE_API_KEY",
    mistral: "MISTRAL_API_KEY",
    opencode: "OPENCODE_API_KEY",
    litellm: "LITELLM_API_KEY",
  };
  const envVar = envMap[normalized];
  if (!envVar) return null;
--- a/src/agents/pi-embedded-runner/cache-ttl.test.ts
+++ b/src/agents/pi-embedded-runner/cache-ttl.test.ts
@ -0,0 +1,31 @@
 import { describe, it, expect } from "vitest";
 import { isCacheTtlEligibleProvider } from "./cache-ttl.js";
 describe("isCacheTtlEligibleProvider", () => {
  it("returns true for direct Anthropic provider", () => {
    expect(isCacheTtlEligibleProvider("anthropic", "claude-opus-4-5")).toBe(true);
    expect(isCacheTtlEligibleProvider("Anthropic", "claude-sonnet-4-5")).toBe(true);
  });
  it("returns true for OpenRouter with Anthropic models", () => {
    expect(isCacheTtlEligibleProvider("openrouter", "anthropic/claude-opus-4-5")).toBe(true);
    expect(isCacheTtlEligibleProvider("OpenRouter", "anthropic/claude-3-opus")).toBe(true);
  });
  it("returns true for LiteLLM with Claude models", () => {
    expect(isCacheTtlEligibleProvider("litellm", "claude-opus-4-5")).toBe(true);
    expect(isCacheTtlEligibleProvider("litellm", "claude-sonnet-4-5")).toBe(true);
    expect(isCacheTtlEligibleProvider("LiteLLM", "Claude-3-Opus")).toBe(true);
  });
  it("returns false for LiteLLM with non-Claude models", () => {
    expect(isCacheTtlEligibleProvider("litellm", "gpt-4")).toBe(false);
    expect(isCacheTtlEligibleProvider("litellm", "gemini-pro")).toBe(false);
    expect(isCacheTtlEligibleProvider("litellm", "llama-3")).toBe(false);
  });
  it("returns false for other providers", () => {
    expect(isCacheTtlEligibleProvider("openai", "gpt-4")).toBe(false);
    expect(isCacheTtlEligibleProvider("google", "gemini-pro")).toBe(false);
  });
 });
--- a/src/agents/pi-embedded-runner/cache-ttl.ts
+++ b/src/agents/pi-embedded-runner/cache-ttl.ts
@ -14,6 +14,8 @@ export function isCacheTtlEligibleProvider(provider: string, modelId: string): b
  if (normalizedProvider === "anthropic") return true;
  if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/"))
    return true;
  // LiteLLM proxying to Anthropic models (claude-*)
  if (normalizedProvider === "litellm" && normalizedModelId.startsWith("claude-")) return true;
  return false;
 }
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@ -32,6 +32,8 @@ function resolveCacheControlTtl(
  if (raw !== "5m" && raw !== "1h") return undefined;
  if (provider === "anthropic") return raw;
  if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw;
  // LiteLLM proxying to Anthropic models (claude-*)
  if (provider === "litellm" && modelId.startsWith("claude-")) return raw;
  return undefined;
 }
--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@ -77,17 +77,25 @@ export function resolveModel(
    }
    const providerCfg = providers[provider];
    if (providerCfg || modelId.startsWith("mock-")) {
      // Find the matching model definition from provider config to get compat settings
      const modelDef = providerCfg?.models?.find((m) => m.id === modelId);
      const fallbackModel: Model<Api> = normalizeModelCompat({
        id: modelId,
-        name: modelId,
+        name: modelDef?.name ?? modelId,
-        api: providerCfg?.api ?? "openai-responses",
+        api: modelDef?.api ?? providerCfg?.api ?? "openai-responses",
        provider,
        baseUrl: providerCfg?.baseUrl,
-        reasoning: false,
+        reasoning: modelDef?.reasoning ?? false,
-        input: ["text"],
+        input: modelDef?.input ?? ["text"],
-        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        cost: modelDef?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-        contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS,
+        contextWindow:
-        maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
+          modelDef?.contextWindow ??
          providerCfg?.models?.[0]?.contextWindow ??
          DEFAULT_CONTEXT_TOKENS,
        maxTokens:
          modelDef?.maxTokens ?? providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
        // Preserve compat settings for provider-specific quirks (e.g., supportsStore for LiteLLM)
        compat: modelDef?.compat,
      } as Model<Api>);
      return { model: fallbackModel, authStorage, modelRegistry };
    }
--- a/src/cli/program/register.onboard.ts
+++ b/src/cli/program/register.onboard.ts
@ -52,7 +52,7 @@ export function registerOnboardCommand(program: Command) {
    .option("--mode <mode>", "Wizard mode: local|remote")
    .option(
      "--auth-choice <choice>",
-      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip",
+      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|litellm-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip",
    )
    .option(
      "--token-provider <id>",
@ -77,6 +77,9 @@ export function registerOnboardCommand(program: Command) {
    .option("--synthetic-api-key <key>", "Synthetic API key")
    .option("--venice-api-key <key>", "Venice API key")
    .option("--opencode-zen-api-key <key>", "OpenCode Zen API key")
    .option("--litellm-api-key <key>", "LiteLLM API key")
    .option("--litellm-base-url <url>", "LiteLLM base URL (default: http://localhost:4000)")
    .option("--litellm-model <model>", "LiteLLM model name")
    .option("--gateway-port <port>", "Gateway port")
    .option("--gateway-bind <mode>", "Gateway bind: loopback|tailnet|lan|auto|custom")
    .option("--gateway-auth <mode>", "Gateway auth: token|password")
--- a/src/commands/agents.commands.add.ts
+++ b/src/commands/agents.commands.add.ts
@ -254,27 +254,41 @@ export async function agentsAddCommand(
      const authStore = ensureAuthProfileStore(agentDir, {
        allowKeychainPrompt: false,
      });
      const authChoice = await promptAuthChoiceGrouped({
        prompter,
        store: authStore,
        includeSkip: true,
      });
-      const authResult = await applyAuthChoice({
+      // Loop to allow retrying auth choice if user cancels during configuration
-        authChoice,
+      while (true) {
-        config: nextConfig,
+        const authChoice = await promptAuthChoiceGrouped({
-        prompter,
+          prompter,
-        runtime,
+          store: authStore,
-        agentDir,
+          includeSkip: true,
        setDefaultModel: false,
        agentId,
      });
      nextConfig = authResult.config;
      if (authResult.agentModelOverride) {
        nextConfig = applyAgentConfig(nextConfig, {
          agentId,
          model: authResult.agentModelOverride,
        });
        try {
          const authResult = await applyAuthChoice({
            authChoice,
            config: nextConfig,
            prompter,
            runtime,
            agentDir,
            setDefaultModel: false,
            agentId,
          });
          nextConfig = authResult.config;
          if (authResult.agentModelOverride) {
            nextConfig = applyAgentConfig(nextConfig, {
              agentId,
              model: authResult.agentModelOverride,
            });
          }
          break; // Success - exit the loop
        } catch (error) {
          // If user cancelled to go back to auth selection, loop again
          if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") {
            continue;
          }
          // Re-throw other errors
          throw error;
        }
      }
    }
--- a/src/commands/auth-choice-options.ts
+++ b/src/commands/auth-choice-options.ts
@ -21,7 +21,8 @@ export type AuthChoiceGroupId =
  | "minimax"
  | "synthetic"
  | "venice"
-  | "qwen";
+  | "qwen"
  | "litellm";
 export type AuthChoiceGroup = {
  value: AuthChoiceGroupId;
@ -120,6 +121,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
    hint: "API key",
    choices: ["opencode-zen"],
  },
  {
    value: "litellm",
    label: "LiteLLM",
    hint: "OpenAI-compatible proxy (self-hosted)",
    choices: ["litellm-api-key"],
  },
 ];
 export function buildAuthChoiceOptions(params: {
@ -194,6 +201,11 @@ export function buildAuthChoiceOptions(params: {
    label: "MiniMax M2.1 Lightning",
    hint: "Faster, higher output cost",
  });
  options.push({
    value: "litellm-api-key",
    label: "LiteLLM API key",
    hint: "OpenAI-compatible proxy (any model)",
  });
  if (params.includeSkip) {
    options.push({ value: "skip", label: "Skip for now" });
  }
--- a/src/commands/auth-choice.apply.api-providers.ts
+++ b/src/commands/auth-choice.apply.api-providers.ts
@ -15,6 +15,8 @@ import {
  applyAuthProfileConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
  applyLitellmConfig,
  applyLitellmProviderConfig,
  applyMoonshotConfig,
  applyMoonshotProviderConfig,
  applyOpencodeZenConfig,
@ -39,6 +41,7 @@ import {
  XIAOMI_DEFAULT_MODEL_REF,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setLitellmApiKey,
  setMoonshotApiKey,
  setOpencodeZenApiKey,
  setOpenrouterApiKey,
@ -91,6 +94,8 @@ export async function applyAuthChoiceApiProviders(
      authChoice = "venice-api-key";
    } else if (params.opts.tokenProvider === "opencode") {
      authChoice = "opencode-zen";
    } else if (params.opts.tokenProvider === "litellm") {
      authChoice = "litellm-api-key";
    }
  }
@ -633,5 +638,296 @@ export async function applyAuthChoiceApiProviders(
    return { config: nextConfig, agentModelOverride };
  }
  if (authChoice === "litellm-api-key") {
    let hasCredential = false;
    let apiKey: string | undefined;
    // Check for pre-provided API key via CLI options (--litellm-api-key or --token with --token-provider litellm)
    if (!hasCredential && params.opts?.litellmApiKey) {
      apiKey = normalizeApiKeyInput(params.opts.litellmApiKey);
      await setLitellmApiKey(apiKey, params.agentDir);
      hasCredential = true;
    }
    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "litellm") {
      apiKey = normalizeApiKeyInput(params.opts.token);
      await setLitellmApiKey(apiKey, params.agentDir);
      hasCredential = true;
    }
    if (!hasCredential) {
      await params.prompter.note(
        [
          "LiteLLM is an OpenAI-compatible proxy that supports many models.",
          "You'll need to provide:",
          "  1. Base URL (e.g., http://localhost:4000)",
          "  2. API key",
          "  3. Model selection (fetched from your LiteLLM instance)",
        ].join("\n"),
        "LiteLLM",
      );
    }
    // Check for existing env key
    const envKey = resolveEnvApiKey("litellm");
    if (!hasCredential && envKey) {
      const useExisting = await params.prompter.confirm({
        message: `Use existing LITELLM_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
        initialValue: true,
      });
      if (useExisting) {
        apiKey = envKey.apiKey;
        await setLitellmApiKey(apiKey, params.agentDir);
        hasCredential = true;
      }
    }
    // Helper function to prompt for API key
    const promptForApiKey = async () => {
      const key = await params.prompter.text({
        message: "Enter LiteLLM API key",
        validate: validateApiKeyInput,
      });
      return normalizeApiKeyInput(String(key));
    };
    // Helper function to prompt for base URL
    const promptForBaseUrl = async () => {
      const defaultBaseUrl = process.env.LITELLM_BASE_URL ?? "http://localhost:4000";
      const baseUrl = await params.prompter.text({
        message: "Enter LiteLLM base URL",
        initialValue: defaultBaseUrl,
        placeholder: defaultBaseUrl,
        validate: (value) => {
          if (!value?.trim()) return "Base URL is required";
          try {
            new URL(value);
            return undefined;
          } catch {
            return "Invalid URL format";
          }
        },
      });
      return String(baseUrl).trim();
    };
    if (!hasCredential) {
      apiKey = await promptForApiKey();
      await setLitellmApiKey(apiKey, params.agentDir);
    }
    // Check for pre-provided base URL via CLI option (--litellm-base-url)
    let normalizedBaseUrl: string;
    if (params.opts?.litellmBaseUrl) {
      normalizedBaseUrl = params.opts.litellmBaseUrl.trim();
    } else {
      normalizedBaseUrl = await promptForBaseUrl();
    }
    // Try to fetch available models from LiteLLM
    type LitellmModelInfo = { id: string; maxInputTokens?: number; maxOutputTokens?: number };
    let availableModels: LitellmModelInfo[] = [];
    const authHeaders: Record<string, string> = apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
    // First fetch model list from /v1/models
    try {
      const modelsUrl = new URL("/v1/models", normalizedBaseUrl).toString();
      const response = await fetch(modelsUrl, {
        headers: authHeaders,
        signal: AbortSignal.timeout(10000),
      });
      if (response.ok) {
        const data = (await response.json()) as {
          data?: Array<{ id: string }>;
        };
        if (data.data && Array.isArray(data.data)) {
          availableModels = data.data.map((m) => ({ id: m.id }));
        }
      }
    } catch {
      // Fetching models failed - will fall back to manual entry
    }
    // Then fetch detailed model info from /model/info (LiteLLM-specific endpoint)
    // This provides context window and max tokens info
    type ModelInfoEntry = {
      model_name: string;
      model_info?: {
        max_input_tokens?: number;
        max_tokens?: number;
        max_output_tokens?: number;
      };
    };
    const modelInfoMap = new Map<string, { maxInputTokens?: number; maxOutputTokens?: number }>();
    try {
      const modelInfoUrl = new URL("/model/info", normalizedBaseUrl).toString();
      const response = await fetch(modelInfoUrl, {
        headers: authHeaders,
        signal: AbortSignal.timeout(10000),
      });
      if (response.ok) {
        const data = (await response.json()) as { data?: ModelInfoEntry[] };
        if (data.data && Array.isArray(data.data)) {
          for (const entry of data.data) {
            if (entry.model_name && entry.model_info) {
              modelInfoMap.set(entry.model_name, {
                maxInputTokens: entry.model_info.max_input_tokens,
                maxOutputTokens: entry.model_info.max_output_tokens ?? entry.model_info.max_tokens,
              });
            }
          }
        }
      }
    } catch {
      // Model info fetch failed - context window will need manual entry
    }
    // Merge model info into available models
    availableModels = availableModels.map((m) => {
      const info = modelInfoMap.get(m.id);
      return {
        id: m.id,
        maxInputTokens: info?.maxInputTokens,
        maxOutputTokens: info?.maxOutputTokens,
      };
    });
    let normalizedModelId: string;
    let contextWindow: number | undefined;
    let maxTokens: number | undefined;
    // Check for pre-provided model via CLI option (--litellm-model)
    if (params.opts?.litellmModel) {
      normalizedModelId = params.opts.litellmModel.trim();
      // Try to get context info from model info map
      const modelInfo = availableModels.find((m) => m.id === normalizedModelId);
      if (modelInfo?.maxInputTokens) {
        contextWindow = modelInfo.maxInputTokens;
      }
      if (modelInfo?.maxOutputTokens) {
        maxTokens = modelInfo.maxOutputTokens;
      }
    } else if (availableModels.length > 0) {
      // Let user select from available models
      type SelectOption = { value: string; label: string; hint?: string };
      const modelOptions: SelectOption[] = availableModels.map((m) => ({
        value: m.id,
        label: m.id,
        hint: m.maxInputTokens ? `${Math.round(m.maxInputTokens / 1000)}k context` : undefined,
      }));
      const selectedModel = await params.prompter.select({
        message: `Select model (${availableModels.length} available)`,
        options: modelOptions,
      });
      normalizedModelId = String(selectedModel);
      const modelInfo = availableModels.find((m) => m.id === normalizedModelId);
      if (modelInfo?.maxInputTokens) {
        contextWindow = modelInfo.maxInputTokens;
      }
      if (modelInfo?.maxOutputTokens) {
        maxTokens = modelInfo.maxOutputTokens;
      }
    } else {
      // No models available from LiteLLM - offer manual entry or retry
      await params.prompter.note(
        [
          "Could not fetch models from LiteLLM server.",
          `Server: ${normalizedBaseUrl}`,
          "",
          "This could be due to:",
          "  • Invalid API key",
          "  • Server not accessible",
          "  • Network connectivity issues",
        ].join("\n"),
        "Model fetch failed",
      );
      const action = await params.prompter.select({
        message: "How would you like to proceed?",
        options: [
          { value: "retry-apikey", label: "Re-enter API key" },
          { value: "retry-baseurl", label: "Re-enter base URL" },
          { value: "cancel", label: "Go back to auth method selection" },
        ],
      });
      if (action === "cancel") {
        // Throw an error with a specific message that signals to restart auth selection
        // The caller should catch this and re-prompt for auth choice
        throw new Error("AUTH_CHOICE_CANCELLED");
      }
      if (action === "retry-apikey") {
        // Re-prompt for API key and retry the entire flow
        // Clear the CLI-provided options to force prompting
        const newParams = {
          ...params,
          authChoice: "litellm-api-key" as const,
          opts: {
            ...params.opts,
            litellmApiKey: undefined, // Clear the CLI-provided API key so we can prompt
            token: undefined, // Also clear token if it was used
          },
        };
        return await applyAuthChoiceApiProviders(newParams);
      }
      if (action === "retry-baseurl") {
        // Re-prompt for base URL and retry the entire flow
        // This ensures we go through the full fetch process again with the new URL
        const newParams = {
          ...params,
          authChoice: "litellm-api-key" as const,
          opts: {
            ...params.opts,
            litellmBaseUrl: undefined, // Clear the CLI-provided URL so we can prompt
          },
        };
        return await applyAuthChoiceApiProviders(newParams);
      }
      // This should never be reached, but throw error as fallback
      throw new Error("Failed to configure LiteLLM provider");
    }
    // Strip litellm/ prefix if the API returned it (avoid litellm/litellm/model)
    if (normalizedModelId.startsWith("litellm/")) {
      normalizedModelId = normalizedModelId.slice("litellm/".length);
    }
    const modelRef = `litellm/${normalizedModelId}`;
    nextConfig = applyAuthProfileConfig(nextConfig, {
      profileId: "litellm:default",
      provider: "litellm",
      mode: "api_key",
    });
    if (params.setDefaultModel) {
      nextConfig = applyLitellmConfig(nextConfig, {
        baseUrl: normalizedBaseUrl,
        modelId: normalizedModelId,
        contextWindow,
        maxTokens,
      });
      await params.prompter.note(
        `Default model set to ${modelRef}${contextWindow ? ` (${Math.round(contextWindow / 1000)}k context)` : ""}`,
        "Model configured",
      );
    } else {
      nextConfig = applyLitellmProviderConfig(nextConfig, {
        baseUrl: normalizedBaseUrl,
        modelId: normalizedModelId,
        contextWindow,
        maxTokens,
      });
      agentModelOverride = modelRef;
      await noteAgentModel(modelRef);
    }
    return { config: nextConfig, agentModelOverride };
  }
  return null;
 }
--- a/src/commands/auth-choice.apply.ts
+++ b/src/commands/auth-choice.apply.ts
@ -24,6 +24,12 @@ export type ApplyAuthChoiceParams = {
  opts?: {
    tokenProvider?: string;
    token?: string;
    // LiteLLM-specific options
    litellmApiKey?: string;
    litellmBaseUrl?: string;
    litellmModel?: string;
    // Non-interactive mode flag
    nonInteractive?: boolean;
  };
 };
--- a/src/commands/configure.gateway-auth.ts
+++ b/src/commands/configure.gateway-auth.ts
@ -4,6 +4,7 @@ import type { RuntimeEnv } from "../runtime.js";
 import type { WizardPrompter } from "../wizard/prompts.js";
 import { applyAuthChoice, resolvePreferredProviderForAuthChoice } from "./auth-choice.js";
 import { promptAuthChoiceGrouped } from "./auth-choice-prompt.js";
 import type { AuthChoice } from "./onboard-types.js";
 import {
  applyModelAllowlist,
  applyModelFallbacksFromSelection,
@ -41,34 +42,50 @@ export async function promptAuthConfig(
  runtime: RuntimeEnv,
  prompter: WizardPrompter,
 ): Promise<OpenClawConfig> {
  const authChoice = await promptAuthChoiceGrouped({
    prompter,
    store: ensureAuthProfileStore(undefined, {
      allowKeychainPrompt: false,
    }),
    includeSkip: true,
  });
  let next = cfg;
-  if (authChoice !== "skip") {
+  let authChoice: AuthChoice;
-    const applied = await applyAuthChoice({
+
-      authChoice,
+  // Loop to allow retrying auth choice if user cancels during configuration
-      config: next,
+  while (true) {
    authChoice = await promptAuthChoiceGrouped({
      prompter,
-      runtime,
+      store: ensureAuthProfileStore(undefined, {
-      setDefaultModel: true,
+        allowKeychainPrompt: false,
      }),
      includeSkip: true,
    });
-    next = applied.config;
+
-  } else {
+    if (authChoice !== "skip") {
-    const modelSelection = await promptDefaultModel({
+      try {
-      config: next,
+        const applied = await applyAuthChoice({
-      prompter,
+          authChoice,
-      allowKeep: true,
+          config: next,
-      ignoreAllowlist: true,
+          prompter,
-      preferredProvider: resolvePreferredProviderForAuthChoice(authChoice),
+          runtime,
-    });
+          setDefaultModel: true,
-    if (modelSelection.model) {
+        });
-      next = applyPrimaryModel(next, modelSelection.model);
+        next = applied.config;
        break; // Success - exit the loop
      } catch (error) {
        // If user cancelled to go back to auth selection, loop again
        if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") {
          continue;
        }
        // Re-throw other errors
        throw error;
      }
    } else {
      const modelSelection = await promptDefaultModel({
        config: next,
        prompter,
        allowKeep: true,
        ignoreAllowlist: true,
        preferredProvider: resolvePreferredProviderForAuthChoice(authChoice),
      });
      if (modelSelection.model) {
        next = applyPrimaryModel(next, modelSelection.model);
      }
      break; // Skip selected - exit the loop
    }
  }
--- a/src/commands/onboard-auth.config-core.ts
+++ b/src/commands/onboard-auth.config-core.ts
@ -484,6 +484,115 @@ export function applyVeniceConfig(cfg: OpenClawConfig): OpenClawConfig {
  };
 }
 /**
 * Apply LiteLLM provider configuration without changing the default model.
 * LiteLLM is a flexible proxy that supports many models, so base URL and model
 * are user-configurable.
 */
 export function applyLitellmProviderConfig(
  cfg: OpenClawConfig,
  params: {
    baseUrl: string;
    modelId: string;
    modelName?: string;
    contextWindow?: number;
    maxTokens?: number;
  },
 ): OpenClawConfig {
  const modelRef = `litellm/${params.modelId}`;
  const models = { ...cfg.agents?.defaults?.models };
  models[modelRef] = {
    ...models[modelRef],
    alias: models[modelRef]?.alias ?? params.modelName ?? params.modelId,
  };
  const providers = { ...cfg.models?.providers };
  const existingProvider = providers.litellm;
  const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
  // Detect Claude models and use Anthropic Messages API for proper cache control support
  const isClaude = params.modelId.toLowerCase().startsWith("claude-");
  const newModel = {
    id: params.modelId,
    name: params.modelName ?? params.modelId,
    // Claude models through LiteLLM should use anthropic-messages API for cache control
    ...(isClaude ? { api: "anthropic-messages" as const } : {}),
    reasoning: false,
    input: ["text"] as ("text" | "image")[],
    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
    contextWindow: params.contextWindow ?? 128000,
    maxTokens: params.maxTokens ?? 8192,
    // LiteLLM proxies to various providers that may not support the OpenAI Responses API
    // `store` parameter. Disable it to avoid "Extra inputs are not permitted" errors.
    compat: { supportsStore: false },
  };
  const hasModel = existingModels.some((model) => model.id === params.modelId);
  const mergedModels = hasModel ? existingModels : [...existingModels, newModel];
  const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record<
    string,
    unknown
  > as { apiKey?: string };
  const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined;
  const normalizedApiKey = resolvedApiKey?.trim();
  providers.litellm = {
    ...existingProviderRest,
    baseUrl: params.baseUrl,
    api: "openai-completions",
    ...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}),
    models: mergedModels.length > 0 ? mergedModels : [newModel],
  };
  return {
    ...cfg,
    agents: {
      ...cfg.agents,
      defaults: {
        ...cfg.agents?.defaults,
        models,
      },
    },
    models: {
      mode: cfg.models?.mode ?? "merge",
      providers,
    },
  };
 }
 /**
 * Apply LiteLLM provider configuration AND set LiteLLM as the default model.
 * Use this when LiteLLM is the primary provider choice during onboarding.
 */
 export function applyLitellmConfig(
  cfg: OpenClawConfig,
  params: {
    baseUrl: string;
    modelId: string;
    modelName?: string;
    contextWindow?: number;
    maxTokens?: number;
  },
 ): OpenClawConfig {
  const next = applyLitellmProviderConfig(cfg, params);
  const modelRef = `litellm/${params.modelId}`;
  const existingModel = next.agents?.defaults?.model;
  return {
    ...next,
    agents: {
      ...next.agents,
      defaults: {
        ...next.agents?.defaults,
        model: {
          ...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
            ? {
                fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
              }
            : undefined),
          primary: modelRef,
        },
      },
    },
  };
 }
 export function applyAuthProfileConfig(
  cfg: OpenClawConfig,
  params: {
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@ -177,3 +177,17 @@ export async function setOpencodeZenApiKey(key: string, agentDir?: string) {
    agentDir: resolveAuthAgentDir(agentDir),
  });
 }
 export const LITELLM_DEFAULT_MODEL_REF = "litellm/gpt-4";
 export async function setLitellmApiKey(key: string, agentDir?: string) {
  upsertAuthProfile({
    profileId: "litellm:default",
    credential: {
      type: "api_key",
      provider: "litellm",
      key,
    },
    agentDir: resolveAuthAgentDir(agentDir),
  });
 }
--- a/src/commands/onboard-auth.ts
+++ b/src/commands/onboard-auth.ts
@ -7,6 +7,8 @@ export {
  applyAuthProfileConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
  applyLitellmConfig,
  applyLitellmProviderConfig,
  applyMoonshotConfig,
  applyMoonshotProviderConfig,
  applyOpenrouterConfig,
@ -35,10 +37,12 @@ export {
  applyOpencodeZenProviderConfig,
 } from "./onboard-auth.config-opencode.js";
 export {
  LITELLM_DEFAULT_MODEL_REF,
  OPENROUTER_DEFAULT_MODEL_REF,
  setAnthropicApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setLitellmApiKey,
  setMinimaxApiKey,
  setMoonshotApiKey,
  setOpencodeZenApiKey,
--- a/src/commands/onboard-types.ts
+++ b/src/commands/onboard-types.ts
@ -17,6 +17,7 @@ export type AuthChoice =
  | "kimi-code-api-key"
  | "synthetic-api-key"
  | "venice-api-key"
  | "litellm-api-key"
  | "codex-cli"
  | "apiKey"
  | "gemini-api-key"
@ -73,6 +74,9 @@ export type OnboardOptions = {
  syntheticApiKey?: string;
  veniceApiKey?: string;
  opencodeZenApiKey?: string;
  litellmApiKey?: string;
  litellmBaseUrl?: string;
  litellmModel?: string;
  gatewayPort?: number;
  gatewayBind?: GatewayBind;
  gatewayAuth?: GatewayAuthChoice;
--- a/src/config/defaults.ts
+++ b/src/config/defaults.ts
@ -323,7 +323,12 @@ export function applyContextPruningDefaults(cfg: OpenClawConfig): OpenClawConfig
    for (const [key, entry] of Object.entries(nextModels)) {
      const parsed = parseModelRef(key, "anthropic");
-      if (!parsed || parsed.provider !== "anthropic") continue;
+      if (!parsed) continue;
      // Apply cache control to Anthropic models and LiteLLM Claude models
      const isAnthropicProvider = parsed.provider === "anthropic";
      const isLitellmClaude =
        parsed.provider === "litellm" && parsed.model.toLowerCase().startsWith("claude-");
      if (!isAnthropicProvider && !isLitellmClaude) continue;
      const current = entry ?? {};
      const params = (current as { params?: Record<string, unknown> }).params ?? {};
      if (typeof params.cacheControlTtl === "string") continue;
@ -337,17 +342,24 @@ export function applyContextPruningDefaults(cfg: OpenClawConfig): OpenClawConfig
    const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined);
    if (primary) {
      const parsedPrimary = parseModelRef(primary, "anthropic");
-      if (parsedPrimary?.provider === "anthropic") {
+      if (parsedPrimary) {
-        const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
+        // Apply cache control to Anthropic models and LiteLLM Claude models
-        const entry = nextModels[key];
+        const isAnthropicProvider = parsedPrimary.provider === "anthropic";
-        const current = entry ?? {};
+        const isLitellmClaude =
-        const params = (current as { params?: Record<string, unknown> }).params ?? {};
+          parsedPrimary.provider === "litellm" &&
-        if (typeof params.cacheControlTtl !== "string") {
+          parsedPrimary.model.toLowerCase().startsWith("claude-");
-          nextModels[key] = {
+        if (isAnthropicProvider || isLitellmClaude) {
-            ...(current as Record<string, unknown>),
+          const key = `${parsedPrimary.provider}/${parsedPrimary.model}`;
-            params: { ...params, cacheControlTtl: "1h" },
+          const entry = nextModels[key];
-          };
+          const current = entry ?? {};
-          modelsMutated = true;
+          const params = (current as { params?: Record<string, unknown> }).params ?? {};
          if (typeof params.cacheControlTtl !== "string") {
            nextModels[key] = {
              ...(current as Record<string, unknown>),
              params: { ...params, cacheControlTtl: "1h" },
            };
            modelsMutated = true;
          }
        }
      }
    }
--- a/src/config/io.ts
+++ b/src/config/io.ts
@ -48,6 +48,7 @@ const SHELL_ENV_EXPECTED_KEYS = [
  "AI_GATEWAY_API_KEY",
  "MINIMAX_API_KEY",
  "SYNTHETIC_API_KEY",
  "LITELLM_API_KEY",
  "ELEVENLABS_API_KEY",
  "TELEGRAM_BOT_TOKEN",
  "DISCORD_BOT_TOKEN",
--- a/src/wizard/onboarding.ts
+++ b/src/wizard/onboarding.ts
@ -21,6 +21,7 @@ import { promptRemoteGatewayConfig } from "../commands/onboard-remote.js";
 import { setupSkills } from "../commands/onboard-skills.js";
 import { setupInternalHooks } from "../commands/onboard-hooks.js";
 import type {
  AuthChoice,
  GatewayAuthChoice,
  OnboardMode,
  OnboardOptions,
@ -354,26 +355,43 @@ export async function runOnboardingWizard(
    allowKeychainPrompt: false,
  });
  const authChoiceFromPrompt = opts.authChoice === undefined;
-  const authChoice =
+  let authChoice: AuthChoice;
    opts.authChoice ??
    (await promptAuthChoiceGrouped({
      prompter,
      store: authStore,
      includeSkip: true,
    }));
-  const authResult = await applyAuthChoice({
+  // Loop to allow retrying auth choice if user cancels during configuration
-    authChoice,
+  while (true) {
-    config: nextConfig,
+    authChoice =
-    prompter,
+      opts.authChoice ??
-    runtime,
+      (await promptAuthChoiceGrouped({
-    setDefaultModel: true,
+        prompter,
-    opts: {
+        store: authStore,
-      tokenProvider: opts.tokenProvider,
+        includeSkip: true,
-      token: opts.authChoice === "apiKey" && opts.token ? opts.token : undefined,
+      }));
-    },
+
-  });
+    try {
-  nextConfig = authResult.config;
+      const authResult = await applyAuthChoice({
        authChoice,
        config: nextConfig,
        prompter,
        runtime,
        setDefaultModel: true,
        opts: {
          tokenProvider: opts.tokenProvider,
          token: opts.authChoice === "apiKey" && opts.token ? opts.token : undefined,
        },
      });
      nextConfig = authResult.config;
      break; // Success - exit the loop
    } catch (error) {
      // If user cancelled to go back to auth selection, loop again
      if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") {
        // Clear opts.authChoice so we prompt again
        opts.authChoice = undefined;
        continue;
      }
      // Re-throw other errors
      throw error;
    }
  }
  if (authChoiceFromPrompt) {
    const modelSelection = await promptDefaultModel({