diff --git a/docs/docs.json b/docs/docs.json index a676004f6..a033160a6 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1030,6 +1030,7 @@ "providers/vercel-ai-gateway", "providers/openrouter", "providers/synthetic", + "providers/litellm", "providers/opencode", "providers/glm", "providers/zai" diff --git a/docs/providers/litellm.md b/docs/providers/litellm.md new file mode 100644 index 000000000..c0392a18c --- /dev/null +++ b/docs/providers/litellm.md @@ -0,0 +1,131 @@ +--- +summary: "Use LiteLLM as an OpenAI-compatible proxy in OpenClaw" +read_when: + - You want to use LiteLLM as a model provider + - You need to connect to a self-hosted LiteLLM proxy + - You want to use any model through an OpenAI-compatible API +--- +# LiteLLM + +LiteLLM is an OpenAI-compatible proxy that supports 100+ LLM APIs. OpenClaw +registers it as the `litellm` provider and uses the OpenAI Completions API. + +## Quick setup + +1) Set up your LiteLLM proxy (see [LiteLLM docs](https://docs.litellm.ai/)) +2) Set environment variables (optional): + - `LITELLM_API_KEY` - your LiteLLM API key + - `LITELLM_BASE_URL` - your LiteLLM endpoint (default: `http://localhost:4000`) + - `LITELLM_MODEL` - default model name (default: `gpt-4`) +3) Run onboarding: + +```bash +openclaw onboard --auth-choice litellm-api-key +``` + +The wizard will prompt for: +- Base URL (your LiteLLM proxy endpoint) +- API key +- Model name (as configured in your LiteLLM proxy) + +## Config example + +```json5 +{ + env: { LITELLM_API_KEY: "sk-..." }, + agents: { + defaults: { + model: { primary: "litellm/gpt-4" }, + models: { "litellm/gpt-4": { alias: "GPT-4" } } + } + }, + models: { + mode: "merge", + providers: { + litellm: { + baseUrl: "http://localhost:4000", + apiKey: "${LITELLM_API_KEY}", + api: "openai-completions", + models: [ + { + id: "gpt-4", + name: "GPT-4", + reasoning: false, + input: ["text"], + contextWindow: 128000, + maxTokens: 8192 + } + ] + } + } + } +} +``` + +## Multiple models + +Add additional models to your config as needed: + +```json5 +{ + models: { + providers: { + litellm: { + baseUrl: "http://localhost:4000", + apiKey: "${LITELLM_API_KEY}", + api: "openai-completions", + models: [ + { id: "gpt-4", name: "GPT-4", contextWindow: 128000, maxTokens: 8192 }, + { id: "claude-3-opus", name: "Claude Opus", contextWindow: 200000, maxTokens: 4096 }, + { id: "gemini-pro", name: "Gemini Pro", contextWindow: 32000, maxTokens: 8192 } + ] + } + } + } +} +``` + +Then switch models using: + +```bash +openclaw config set agents.defaults.model.primary litellm/claude-3-opus +``` + +## Prompt caching + +When using Anthropic models through LiteLLM (e.g., `claude-opus-4-5`, `claude-sonnet-4-5`), OpenClaw automatically enables **prompt caching** to reduce costs: + +```json5 +{ + agents: { + defaults: { + models: { + "litellm/claude-opus-4-5": { + params: { + cacheControlTtl: "1h" // Auto-configured for Claude models + } + } + } + } + } +} +``` + +### Cost savings with caching + +- **Without caching**: Every message pays full price for the entire conversation history +- **With caching** (enabled by default): Repeated context costs 10x less + +Example from actual usage: +- Without caching: 93k tokens × $0.000005 = **$0.47** per message +- With caching: 123k tokens (mostly cached) = **$0.05** per message (90% savings!) + +Caching is **automatically enabled** for all `claude-*` models through LiteLLM. + +## Notes + +- Model refs use `litellm/` where `modelId` matches your LiteLLM config. +- The base URL should not include `/v1` - OpenClaw's OpenAI client appends it. +- Supported LiteLLM models depend on your proxy configuration. +- **Prompt caching works automatically** when using Claude models through LiteLLM. +- See [Model providers](/concepts/model-providers) for provider rules. diff --git a/src/agents/litellm-models.ts b/src/agents/litellm-models.ts new file mode 100644 index 000000000..62a9e13ef --- /dev/null +++ b/src/agents/litellm-models.ts @@ -0,0 +1,50 @@ +import type { ModelDefinitionConfig } from "../config/types.js"; + +// LiteLLM is a proxy that supports many models, so the base URL and model +// are user-configurable. We provide sensible defaults for onboarding. +export const LITELLM_DEFAULT_BASE_URL = "http://localhost:4000"; +export const LITELLM_DEFAULT_MODEL_ID = "gpt-4"; +export const LITELLM_DEFAULT_MODEL_REF = `litellm/${LITELLM_DEFAULT_MODEL_ID}`; +export const LITELLM_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + +export type LitellmModelEntry = { + id: string; + name: string; + reasoning?: boolean; + input?: readonly ("text" | "image")[]; + contextWindow?: number; + maxTokens?: number; +}; + +export function buildLitellmModelDefinition(entry: LitellmModelEntry): ModelDefinitionConfig { + // Detect Claude models and use Anthropic Messages API for proper cache control support + const isClaude = entry.id.toLowerCase().startsWith("claude-"); + + return { + id: entry.id, + name: entry.name, + // Claude models through LiteLLM should use anthropic-messages API for cache control + ...(isClaude ? { api: "anthropic-messages" as const } : {}), + reasoning: entry.reasoning ?? false, + input: entry.input ? [...entry.input] : ["text"], + cost: LITELLM_DEFAULT_COST, + contextWindow: entry.contextWindow ?? 128000, + maxTokens: entry.maxTokens ?? 8192, + // LiteLLM proxies to various providers that may not support the OpenAI Responses API + // `store` parameter. Disable it by default to avoid "Extra inputs are not permitted" errors. + compat: { supportsStore: false }, + }; +} + +/** + * Creates a model reference for a LiteLLM model. + * The model ID can be any model supported by the LiteLLM proxy. + */ +export function litellmModelRef(modelId: string): string { + return `litellm/${modelId}`; +} diff --git a/src/agents/model-auth.ts b/src/agents/model-auth.ts index 1445b53f7..3de5c3a1e 100644 --- a/src/agents/model-auth.ts +++ b/src/agents/model-auth.ts @@ -286,6 +286,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null { venice: "VENICE_API_KEY", mistral: "MISTRAL_API_KEY", opencode: "OPENCODE_API_KEY", + litellm: "LITELLM_API_KEY", }; const envVar = envMap[normalized]; if (!envVar) return null; diff --git a/src/agents/pi-embedded-runner/cache-ttl.test.ts b/src/agents/pi-embedded-runner/cache-ttl.test.ts new file mode 100644 index 000000000..7a9e57320 --- /dev/null +++ b/src/agents/pi-embedded-runner/cache-ttl.test.ts @@ -0,0 +1,31 @@ +import { describe, it, expect } from "vitest"; +import { isCacheTtlEligibleProvider } from "./cache-ttl.js"; + +describe("isCacheTtlEligibleProvider", () => { + it("returns true for direct Anthropic provider", () => { + expect(isCacheTtlEligibleProvider("anthropic", "claude-opus-4-5")).toBe(true); + expect(isCacheTtlEligibleProvider("Anthropic", "claude-sonnet-4-5")).toBe(true); + }); + + it("returns true for OpenRouter with Anthropic models", () => { + expect(isCacheTtlEligibleProvider("openrouter", "anthropic/claude-opus-4-5")).toBe(true); + expect(isCacheTtlEligibleProvider("OpenRouter", "anthropic/claude-3-opus")).toBe(true); + }); + + it("returns true for LiteLLM with Claude models", () => { + expect(isCacheTtlEligibleProvider("litellm", "claude-opus-4-5")).toBe(true); + expect(isCacheTtlEligibleProvider("litellm", "claude-sonnet-4-5")).toBe(true); + expect(isCacheTtlEligibleProvider("LiteLLM", "Claude-3-Opus")).toBe(true); + }); + + it("returns false for LiteLLM with non-Claude models", () => { + expect(isCacheTtlEligibleProvider("litellm", "gpt-4")).toBe(false); + expect(isCacheTtlEligibleProvider("litellm", "gemini-pro")).toBe(false); + expect(isCacheTtlEligibleProvider("litellm", "llama-3")).toBe(false); + }); + + it("returns false for other providers", () => { + expect(isCacheTtlEligibleProvider("openai", "gpt-4")).toBe(false); + expect(isCacheTtlEligibleProvider("google", "gemini-pro")).toBe(false); + }); +}); diff --git a/src/agents/pi-embedded-runner/cache-ttl.ts b/src/agents/pi-embedded-runner/cache-ttl.ts index c727bf439..f88385182 100644 --- a/src/agents/pi-embedded-runner/cache-ttl.ts +++ b/src/agents/pi-embedded-runner/cache-ttl.ts @@ -14,6 +14,8 @@ export function isCacheTtlEligibleProvider(provider: string, modelId: string): b if (normalizedProvider === "anthropic") return true; if (normalizedProvider === "openrouter" && normalizedModelId.startsWith("anthropic/")) return true; + // LiteLLM proxying to Anthropic models (claude-*) + if (normalizedProvider === "litellm" && normalizedModelId.startsWith("claude-")) return true; return false; } diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 6599b9fd6..3b6197356 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -32,6 +32,8 @@ function resolveCacheControlTtl( if (raw !== "5m" && raw !== "1h") return undefined; if (provider === "anthropic") return raw; if (provider === "openrouter" && modelId.startsWith("anthropic/")) return raw; + // LiteLLM proxying to Anthropic models (claude-*) + if (provider === "litellm" && modelId.startsWith("claude-")) return raw; return undefined; } diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 9dd94cd43..eccef9f12 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -77,17 +77,25 @@ export function resolveModel( } const providerCfg = providers[provider]; if (providerCfg || modelId.startsWith("mock-")) { + // Find the matching model definition from provider config to get compat settings + const modelDef = providerCfg?.models?.find((m) => m.id === modelId); const fallbackModel: Model = normalizeModelCompat({ id: modelId, - name: modelId, - api: providerCfg?.api ?? "openai-responses", + name: modelDef?.name ?? modelId, + api: modelDef?.api ?? providerCfg?.api ?? "openai-responses", provider, baseUrl: providerCfg?.baseUrl, - reasoning: false, - input: ["text"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, - maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS, + reasoning: modelDef?.reasoning ?? false, + input: modelDef?.input ?? ["text"], + cost: modelDef?.cost ?? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: + modelDef?.contextWindow ?? + providerCfg?.models?.[0]?.contextWindow ?? + DEFAULT_CONTEXT_TOKENS, + maxTokens: + modelDef?.maxTokens ?? providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS, + // Preserve compat settings for provider-specific quirks (e.g., supportsStore for LiteLLM) + compat: modelDef?.compat, } as Model); return { model: fallbackModel, authStorage, modelRegistry }; } diff --git a/src/cli/program/register.onboard.ts b/src/cli/program/register.onboard.ts index 3f81a5ee8..be735a323 100644 --- a/src/cli/program/register.onboard.ts +++ b/src/cli/program/register.onboard.ts @@ -52,7 +52,7 @@ export function registerOnboardCommand(program: Command) { .option("--mode ", "Wizard mode: local|remote") .option( "--auth-choice ", - "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip", + "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|litellm-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip", ) .option( "--token-provider ", @@ -77,6 +77,9 @@ export function registerOnboardCommand(program: Command) { .option("--synthetic-api-key ", "Synthetic API key") .option("--venice-api-key ", "Venice API key") .option("--opencode-zen-api-key ", "OpenCode Zen API key") + .option("--litellm-api-key ", "LiteLLM API key") + .option("--litellm-base-url ", "LiteLLM base URL (default: http://localhost:4000)") + .option("--litellm-model ", "LiteLLM model name") .option("--gateway-port ", "Gateway port") .option("--gateway-bind ", "Gateway bind: loopback|tailnet|lan|auto|custom") .option("--gateway-auth ", "Gateway auth: token|password") diff --git a/src/commands/agents.commands.add.ts b/src/commands/agents.commands.add.ts index e98d1e5b3..7be8ff307 100644 --- a/src/commands/agents.commands.add.ts +++ b/src/commands/agents.commands.add.ts @@ -254,27 +254,41 @@ export async function agentsAddCommand( const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false, }); - const authChoice = await promptAuthChoiceGrouped({ - prompter, - store: authStore, - includeSkip: true, - }); - const authResult = await applyAuthChoice({ - authChoice, - config: nextConfig, - prompter, - runtime, - agentDir, - setDefaultModel: false, - agentId, - }); - nextConfig = authResult.config; - if (authResult.agentModelOverride) { - nextConfig = applyAgentConfig(nextConfig, { - agentId, - model: authResult.agentModelOverride, + // Loop to allow retrying auth choice if user cancels during configuration + while (true) { + const authChoice = await promptAuthChoiceGrouped({ + prompter, + store: authStore, + includeSkip: true, }); + + try { + const authResult = await applyAuthChoice({ + authChoice, + config: nextConfig, + prompter, + runtime, + agentDir, + setDefaultModel: false, + agentId, + }); + nextConfig = authResult.config; + if (authResult.agentModelOverride) { + nextConfig = applyAgentConfig(nextConfig, { + agentId, + model: authResult.agentModelOverride, + }); + } + break; // Success - exit the loop + } catch (error) { + // If user cancelled to go back to auth selection, loop again + if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") { + continue; + } + // Re-throw other errors + throw error; + } } } diff --git a/src/commands/auth-choice-options.ts b/src/commands/auth-choice-options.ts index 5acddf4e3..8881546f3 100644 --- a/src/commands/auth-choice-options.ts +++ b/src/commands/auth-choice-options.ts @@ -21,7 +21,8 @@ export type AuthChoiceGroupId = | "minimax" | "synthetic" | "venice" - | "qwen"; + | "qwen" + | "litellm"; export type AuthChoiceGroup = { value: AuthChoiceGroupId; @@ -120,6 +121,12 @@ const AUTH_CHOICE_GROUP_DEFS: { hint: "API key", choices: ["opencode-zen"], }, + { + value: "litellm", + label: "LiteLLM", + hint: "OpenAI-compatible proxy (self-hosted)", + choices: ["litellm-api-key"], + }, ]; export function buildAuthChoiceOptions(params: { @@ -194,6 +201,11 @@ export function buildAuthChoiceOptions(params: { label: "MiniMax M2.1 Lightning", hint: "Faster, higher output cost", }); + options.push({ + value: "litellm-api-key", + label: "LiteLLM API key", + hint: "OpenAI-compatible proxy (any model)", + }); if (params.includeSkip) { options.push({ value: "skip", label: "Skip for now" }); } diff --git a/src/commands/auth-choice.apply.api-providers.ts b/src/commands/auth-choice.apply.api-providers.ts index fa4fc77e7..b21cadfba 100644 --- a/src/commands/auth-choice.apply.api-providers.ts +++ b/src/commands/auth-choice.apply.api-providers.ts @@ -15,6 +15,8 @@ import { applyAuthProfileConfig, applyKimiCodeConfig, applyKimiCodeProviderConfig, + applyLitellmConfig, + applyLitellmProviderConfig, applyMoonshotConfig, applyMoonshotProviderConfig, applyOpencodeZenConfig, @@ -39,6 +41,7 @@ import { XIAOMI_DEFAULT_MODEL_REF, setGeminiApiKey, setKimiCodeApiKey, + setLitellmApiKey, setMoonshotApiKey, setOpencodeZenApiKey, setOpenrouterApiKey, @@ -91,6 +94,8 @@ export async function applyAuthChoiceApiProviders( authChoice = "venice-api-key"; } else if (params.opts.tokenProvider === "opencode") { authChoice = "opencode-zen"; + } else if (params.opts.tokenProvider === "litellm") { + authChoice = "litellm-api-key"; } } @@ -633,5 +638,296 @@ export async function applyAuthChoiceApiProviders( return { config: nextConfig, agentModelOverride }; } + if (authChoice === "litellm-api-key") { + let hasCredential = false; + let apiKey: string | undefined; + + // Check for pre-provided API key via CLI options (--litellm-api-key or --token with --token-provider litellm) + if (!hasCredential && params.opts?.litellmApiKey) { + apiKey = normalizeApiKeyInput(params.opts.litellmApiKey); + await setLitellmApiKey(apiKey, params.agentDir); + hasCredential = true; + } + if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "litellm") { + apiKey = normalizeApiKeyInput(params.opts.token); + await setLitellmApiKey(apiKey, params.agentDir); + hasCredential = true; + } + + if (!hasCredential) { + await params.prompter.note( + [ + "LiteLLM is an OpenAI-compatible proxy that supports many models.", + "You'll need to provide:", + " 1. Base URL (e.g., http://localhost:4000)", + " 2. API key", + " 3. Model selection (fetched from your LiteLLM instance)", + ].join("\n"), + "LiteLLM", + ); + } + + // Check for existing env key + const envKey = resolveEnvApiKey("litellm"); + if (!hasCredential && envKey) { + const useExisting = await params.prompter.confirm({ + message: `Use existing LITELLM_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`, + initialValue: true, + }); + if (useExisting) { + apiKey = envKey.apiKey; + await setLitellmApiKey(apiKey, params.agentDir); + hasCredential = true; + } + } + + // Helper function to prompt for API key + const promptForApiKey = async () => { + const key = await params.prompter.text({ + message: "Enter LiteLLM API key", + validate: validateApiKeyInput, + }); + return normalizeApiKeyInput(String(key)); + }; + + // Helper function to prompt for base URL + const promptForBaseUrl = async () => { + const defaultBaseUrl = process.env.LITELLM_BASE_URL ?? "http://localhost:4000"; + const baseUrl = await params.prompter.text({ + message: "Enter LiteLLM base URL", + initialValue: defaultBaseUrl, + placeholder: defaultBaseUrl, + validate: (value) => { + if (!value?.trim()) return "Base URL is required"; + try { + new URL(value); + return undefined; + } catch { + return "Invalid URL format"; + } + }, + }); + return String(baseUrl).trim(); + }; + + if (!hasCredential) { + apiKey = await promptForApiKey(); + await setLitellmApiKey(apiKey, params.agentDir); + } + + // Check for pre-provided base URL via CLI option (--litellm-base-url) + let normalizedBaseUrl: string; + if (params.opts?.litellmBaseUrl) { + normalizedBaseUrl = params.opts.litellmBaseUrl.trim(); + } else { + normalizedBaseUrl = await promptForBaseUrl(); + } + + // Try to fetch available models from LiteLLM + type LitellmModelInfo = { id: string; maxInputTokens?: number; maxOutputTokens?: number }; + let availableModels: LitellmModelInfo[] = []; + const authHeaders: Record = apiKey ? { Authorization: `Bearer ${apiKey}` } : {}; + + // First fetch model list from /v1/models + try { + const modelsUrl = new URL("/v1/models", normalizedBaseUrl).toString(); + const response = await fetch(modelsUrl, { + headers: authHeaders, + signal: AbortSignal.timeout(10000), + }); + if (response.ok) { + const data = (await response.json()) as { + data?: Array<{ id: string }>; + }; + if (data.data && Array.isArray(data.data)) { + availableModels = data.data.map((m) => ({ id: m.id })); + } + } + } catch { + // Fetching models failed - will fall back to manual entry + } + + // Then fetch detailed model info from /model/info (LiteLLM-specific endpoint) + // This provides context window and max tokens info + type ModelInfoEntry = { + model_name: string; + model_info?: { + max_input_tokens?: number; + max_tokens?: number; + max_output_tokens?: number; + }; + }; + const modelInfoMap = new Map(); + try { + const modelInfoUrl = new URL("/model/info", normalizedBaseUrl).toString(); + const response = await fetch(modelInfoUrl, { + headers: authHeaders, + signal: AbortSignal.timeout(10000), + }); + if (response.ok) { + const data = (await response.json()) as { data?: ModelInfoEntry[] }; + if (data.data && Array.isArray(data.data)) { + for (const entry of data.data) { + if (entry.model_name && entry.model_info) { + modelInfoMap.set(entry.model_name, { + maxInputTokens: entry.model_info.max_input_tokens, + maxOutputTokens: entry.model_info.max_output_tokens ?? entry.model_info.max_tokens, + }); + } + } + } + } + } catch { + // Model info fetch failed - context window will need manual entry + } + + // Merge model info into available models + availableModels = availableModels.map((m) => { + const info = modelInfoMap.get(m.id); + return { + id: m.id, + maxInputTokens: info?.maxInputTokens, + maxOutputTokens: info?.maxOutputTokens, + }; + }); + + let normalizedModelId: string; + let contextWindow: number | undefined; + let maxTokens: number | undefined; + + // Check for pre-provided model via CLI option (--litellm-model) + if (params.opts?.litellmModel) { + normalizedModelId = params.opts.litellmModel.trim(); + // Try to get context info from model info map + const modelInfo = availableModels.find((m) => m.id === normalizedModelId); + if (modelInfo?.maxInputTokens) { + contextWindow = modelInfo.maxInputTokens; + } + if (modelInfo?.maxOutputTokens) { + maxTokens = modelInfo.maxOutputTokens; + } + } else if (availableModels.length > 0) { + // Let user select from available models + type SelectOption = { value: string; label: string; hint?: string }; + const modelOptions: SelectOption[] = availableModels.map((m) => ({ + value: m.id, + label: m.id, + hint: m.maxInputTokens ? `${Math.round(m.maxInputTokens / 1000)}k context` : undefined, + })); + + const selectedModel = await params.prompter.select({ + message: `Select model (${availableModels.length} available)`, + options: modelOptions, + }); + + normalizedModelId = String(selectedModel); + const modelInfo = availableModels.find((m) => m.id === normalizedModelId); + if (modelInfo?.maxInputTokens) { + contextWindow = modelInfo.maxInputTokens; + } + if (modelInfo?.maxOutputTokens) { + maxTokens = modelInfo.maxOutputTokens; + } + } else { + // No models available from LiteLLM - offer manual entry or retry + await params.prompter.note( + [ + "Could not fetch models from LiteLLM server.", + `Server: ${normalizedBaseUrl}`, + "", + "This could be due to:", + " • Invalid API key", + " • Server not accessible", + " • Network connectivity issues", + ].join("\n"), + "Model fetch failed", + ); + + const action = await params.prompter.select({ + message: "How would you like to proceed?", + options: [ + { value: "retry-apikey", label: "Re-enter API key" }, + { value: "retry-baseurl", label: "Re-enter base URL" }, + { value: "cancel", label: "Go back to auth method selection" }, + ], + }); + + if (action === "cancel") { + // Throw an error with a specific message that signals to restart auth selection + // The caller should catch this and re-prompt for auth choice + throw new Error("AUTH_CHOICE_CANCELLED"); + } + + if (action === "retry-apikey") { + // Re-prompt for API key and retry the entire flow + // Clear the CLI-provided options to force prompting + const newParams = { + ...params, + authChoice: "litellm-api-key" as const, + opts: { + ...params.opts, + litellmApiKey: undefined, // Clear the CLI-provided API key so we can prompt + token: undefined, // Also clear token if it was used + }, + }; + return await applyAuthChoiceApiProviders(newParams); + } + + if (action === "retry-baseurl") { + // Re-prompt for base URL and retry the entire flow + // This ensures we go through the full fetch process again with the new URL + const newParams = { + ...params, + authChoice: "litellm-api-key" as const, + opts: { + ...params.opts, + litellmBaseUrl: undefined, // Clear the CLI-provided URL so we can prompt + }, + }; + return await applyAuthChoiceApiProviders(newParams); + } + + // This should never be reached, but throw error as fallback + throw new Error("Failed to configure LiteLLM provider"); + } + + // Strip litellm/ prefix if the API returned it (avoid litellm/litellm/model) + if (normalizedModelId.startsWith("litellm/")) { + normalizedModelId = normalizedModelId.slice("litellm/".length); + } + + const modelRef = `litellm/${normalizedModelId}`; + + nextConfig = applyAuthProfileConfig(nextConfig, { + profileId: "litellm:default", + provider: "litellm", + mode: "api_key", + }); + + if (params.setDefaultModel) { + nextConfig = applyLitellmConfig(nextConfig, { + baseUrl: normalizedBaseUrl, + modelId: normalizedModelId, + contextWindow, + maxTokens, + }); + await params.prompter.note( + `Default model set to ${modelRef}${contextWindow ? ` (${Math.round(contextWindow / 1000)}k context)` : ""}`, + "Model configured", + ); + } else { + nextConfig = applyLitellmProviderConfig(nextConfig, { + baseUrl: normalizedBaseUrl, + modelId: normalizedModelId, + contextWindow, + maxTokens, + }); + agentModelOverride = modelRef; + await noteAgentModel(modelRef); + } + + return { config: nextConfig, agentModelOverride }; + } + return null; } diff --git a/src/commands/auth-choice.apply.ts b/src/commands/auth-choice.apply.ts index c36a3981a..55f85067f 100644 --- a/src/commands/auth-choice.apply.ts +++ b/src/commands/auth-choice.apply.ts @@ -24,6 +24,12 @@ export type ApplyAuthChoiceParams = { opts?: { tokenProvider?: string; token?: string; + // LiteLLM-specific options + litellmApiKey?: string; + litellmBaseUrl?: string; + litellmModel?: string; + // Non-interactive mode flag + nonInteractive?: boolean; }; }; diff --git a/src/commands/configure.gateway-auth.ts b/src/commands/configure.gateway-auth.ts index 508ed6cf0..15fda6e6a 100644 --- a/src/commands/configure.gateway-auth.ts +++ b/src/commands/configure.gateway-auth.ts @@ -4,6 +4,7 @@ import type { RuntimeEnv } from "../runtime.js"; import type { WizardPrompter } from "../wizard/prompts.js"; import { applyAuthChoice, resolvePreferredProviderForAuthChoice } from "./auth-choice.js"; import { promptAuthChoiceGrouped } from "./auth-choice-prompt.js"; +import type { AuthChoice } from "./onboard-types.js"; import { applyModelAllowlist, applyModelFallbacksFromSelection, @@ -41,34 +42,50 @@ export async function promptAuthConfig( runtime: RuntimeEnv, prompter: WizardPrompter, ): Promise { - const authChoice = await promptAuthChoiceGrouped({ - prompter, - store: ensureAuthProfileStore(undefined, { - allowKeychainPrompt: false, - }), - includeSkip: true, - }); - let next = cfg; - if (authChoice !== "skip") { - const applied = await applyAuthChoice({ - authChoice, - config: next, + let authChoice: AuthChoice; + + // Loop to allow retrying auth choice if user cancels during configuration + while (true) { + authChoice = await promptAuthChoiceGrouped({ prompter, - runtime, - setDefaultModel: true, + store: ensureAuthProfileStore(undefined, { + allowKeychainPrompt: false, + }), + includeSkip: true, }); - next = applied.config; - } else { - const modelSelection = await promptDefaultModel({ - config: next, - prompter, - allowKeep: true, - ignoreAllowlist: true, - preferredProvider: resolvePreferredProviderForAuthChoice(authChoice), - }); - if (modelSelection.model) { - next = applyPrimaryModel(next, modelSelection.model); + + if (authChoice !== "skip") { + try { + const applied = await applyAuthChoice({ + authChoice, + config: next, + prompter, + runtime, + setDefaultModel: true, + }); + next = applied.config; + break; // Success - exit the loop + } catch (error) { + // If user cancelled to go back to auth selection, loop again + if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") { + continue; + } + // Re-throw other errors + throw error; + } + } else { + const modelSelection = await promptDefaultModel({ + config: next, + prompter, + allowKeep: true, + ignoreAllowlist: true, + preferredProvider: resolvePreferredProviderForAuthChoice(authChoice), + }); + if (modelSelection.model) { + next = applyPrimaryModel(next, modelSelection.model); + } + break; // Skip selected - exit the loop } } diff --git a/src/commands/onboard-auth.config-core.ts b/src/commands/onboard-auth.config-core.ts index c94eeb51b..bc166b03f 100644 --- a/src/commands/onboard-auth.config-core.ts +++ b/src/commands/onboard-auth.config-core.ts @@ -484,6 +484,115 @@ export function applyVeniceConfig(cfg: OpenClawConfig): OpenClawConfig { }; } +/** + * Apply LiteLLM provider configuration without changing the default model. + * LiteLLM is a flexible proxy that supports many models, so base URL and model + * are user-configurable. + */ +export function applyLitellmProviderConfig( + cfg: OpenClawConfig, + params: { + baseUrl: string; + modelId: string; + modelName?: string; + contextWindow?: number; + maxTokens?: number; + }, +): OpenClawConfig { + const modelRef = `litellm/${params.modelId}`; + const models = { ...cfg.agents?.defaults?.models }; + models[modelRef] = { + ...models[modelRef], + alias: models[modelRef]?.alias ?? params.modelName ?? params.modelId, + }; + + const providers = { ...cfg.models?.providers }; + const existingProvider = providers.litellm; + const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : []; + // Detect Claude models and use Anthropic Messages API for proper cache control support + const isClaude = params.modelId.toLowerCase().startsWith("claude-"); + const newModel = { + id: params.modelId, + name: params.modelName ?? params.modelId, + // Claude models through LiteLLM should use anthropic-messages API for cache control + ...(isClaude ? { api: "anthropic-messages" as const } : {}), + reasoning: false, + input: ["text"] as ("text" | "image")[], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: params.contextWindow ?? 128000, + maxTokens: params.maxTokens ?? 8192, + // LiteLLM proxies to various providers that may not support the OpenAI Responses API + // `store` parameter. Disable it to avoid "Extra inputs are not permitted" errors. + compat: { supportsStore: false }, + }; + const hasModel = existingModels.some((model) => model.id === params.modelId); + const mergedModels = hasModel ? existingModels : [...existingModels, newModel]; + const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record< + string, + unknown + > as { apiKey?: string }; + const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined; + const normalizedApiKey = resolvedApiKey?.trim(); + providers.litellm = { + ...existingProviderRest, + baseUrl: params.baseUrl, + api: "openai-completions", + ...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}), + models: mergedModels.length > 0 ? mergedModels : [newModel], + }; + + return { + ...cfg, + agents: { + ...cfg.agents, + defaults: { + ...cfg.agents?.defaults, + models, + }, + }, + models: { + mode: cfg.models?.mode ?? "merge", + providers, + }, + }; +} + +/** + * Apply LiteLLM provider configuration AND set LiteLLM as the default model. + * Use this when LiteLLM is the primary provider choice during onboarding. + */ +export function applyLitellmConfig( + cfg: OpenClawConfig, + params: { + baseUrl: string; + modelId: string; + modelName?: string; + contextWindow?: number; + maxTokens?: number; + }, +): OpenClawConfig { + const next = applyLitellmProviderConfig(cfg, params); + const modelRef = `litellm/${params.modelId}`; + const existingModel = next.agents?.defaults?.model; + return { + ...next, + agents: { + ...next.agents, + defaults: { + ...next.agents?.defaults, + model: { + ...(existingModel && "fallbacks" in (existingModel as Record) + ? { + fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks, + } + : undefined), + primary: modelRef, + }, + }, + }, + }; +} + export function applyAuthProfileConfig( cfg: OpenClawConfig, params: { diff --git a/src/commands/onboard-auth.credentials.ts b/src/commands/onboard-auth.credentials.ts index fbf6dbfb9..c32f0a728 100644 --- a/src/commands/onboard-auth.credentials.ts +++ b/src/commands/onboard-auth.credentials.ts @@ -177,3 +177,17 @@ export async function setOpencodeZenApiKey(key: string, agentDir?: string) { agentDir: resolveAuthAgentDir(agentDir), }); } + +export const LITELLM_DEFAULT_MODEL_REF = "litellm/gpt-4"; + +export async function setLitellmApiKey(key: string, agentDir?: string) { + upsertAuthProfile({ + profileId: "litellm:default", + credential: { + type: "api_key", + provider: "litellm", + key, + }, + agentDir: resolveAuthAgentDir(agentDir), + }); +} diff --git a/src/commands/onboard-auth.ts b/src/commands/onboard-auth.ts index 612b24865..9a006b351 100644 --- a/src/commands/onboard-auth.ts +++ b/src/commands/onboard-auth.ts @@ -7,6 +7,8 @@ export { applyAuthProfileConfig, applyKimiCodeConfig, applyKimiCodeProviderConfig, + applyLitellmConfig, + applyLitellmProviderConfig, applyMoonshotConfig, applyMoonshotProviderConfig, applyOpenrouterConfig, @@ -35,10 +37,12 @@ export { applyOpencodeZenProviderConfig, } from "./onboard-auth.config-opencode.js"; export { + LITELLM_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF, setAnthropicApiKey, setGeminiApiKey, setKimiCodeApiKey, + setLitellmApiKey, setMinimaxApiKey, setMoonshotApiKey, setOpencodeZenApiKey, diff --git a/src/commands/onboard-types.ts b/src/commands/onboard-types.ts index f4154bc6d..b9dab483f 100644 --- a/src/commands/onboard-types.ts +++ b/src/commands/onboard-types.ts @@ -17,6 +17,7 @@ export type AuthChoice = | "kimi-code-api-key" | "synthetic-api-key" | "venice-api-key" + | "litellm-api-key" | "codex-cli" | "apiKey" | "gemini-api-key" @@ -73,6 +74,9 @@ export type OnboardOptions = { syntheticApiKey?: string; veniceApiKey?: string; opencodeZenApiKey?: string; + litellmApiKey?: string; + litellmBaseUrl?: string; + litellmModel?: string; gatewayPort?: number; gatewayBind?: GatewayBind; gatewayAuth?: GatewayAuthChoice; diff --git a/src/config/defaults.ts b/src/config/defaults.ts index d68be922f..11c841b04 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -323,7 +323,12 @@ export function applyContextPruningDefaults(cfg: OpenClawConfig): OpenClawConfig for (const [key, entry] of Object.entries(nextModels)) { const parsed = parseModelRef(key, "anthropic"); - if (!parsed || parsed.provider !== "anthropic") continue; + if (!parsed) continue; + // Apply cache control to Anthropic models and LiteLLM Claude models + const isAnthropicProvider = parsed.provider === "anthropic"; + const isLitellmClaude = + parsed.provider === "litellm" && parsed.model.toLowerCase().startsWith("claude-"); + if (!isAnthropicProvider && !isLitellmClaude) continue; const current = entry ?? {}; const params = (current as { params?: Record }).params ?? {}; if (typeof params.cacheControlTtl === "string") continue; @@ -337,17 +342,24 @@ export function applyContextPruningDefaults(cfg: OpenClawConfig): OpenClawConfig const primary = resolvePrimaryModelRef(defaults.model?.primary ?? undefined); if (primary) { const parsedPrimary = parseModelRef(primary, "anthropic"); - if (parsedPrimary?.provider === "anthropic") { - const key = `${parsedPrimary.provider}/${parsedPrimary.model}`; - const entry = nextModels[key]; - const current = entry ?? {}; - const params = (current as { params?: Record }).params ?? {}; - if (typeof params.cacheControlTtl !== "string") { - nextModels[key] = { - ...(current as Record), - params: { ...params, cacheControlTtl: "1h" }, - }; - modelsMutated = true; + if (parsedPrimary) { + // Apply cache control to Anthropic models and LiteLLM Claude models + const isAnthropicProvider = parsedPrimary.provider === "anthropic"; + const isLitellmClaude = + parsedPrimary.provider === "litellm" && + parsedPrimary.model.toLowerCase().startsWith("claude-"); + if (isAnthropicProvider || isLitellmClaude) { + const key = `${parsedPrimary.provider}/${parsedPrimary.model}`; + const entry = nextModels[key]; + const current = entry ?? {}; + const params = (current as { params?: Record }).params ?? {}; + if (typeof params.cacheControlTtl !== "string") { + nextModels[key] = { + ...(current as Record), + params: { ...params, cacheControlTtl: "1h" }, + }; + modelsMutated = true; + } } } } diff --git a/src/config/io.ts b/src/config/io.ts index ac32bed5b..e6e45f991 100644 --- a/src/config/io.ts +++ b/src/config/io.ts @@ -48,6 +48,7 @@ const SHELL_ENV_EXPECTED_KEYS = [ "AI_GATEWAY_API_KEY", "MINIMAX_API_KEY", "SYNTHETIC_API_KEY", + "LITELLM_API_KEY", "ELEVENLABS_API_KEY", "TELEGRAM_BOT_TOKEN", "DISCORD_BOT_TOKEN", diff --git a/src/wizard/onboarding.ts b/src/wizard/onboarding.ts index ef2e349c6..32e1857fe 100644 --- a/src/wizard/onboarding.ts +++ b/src/wizard/onboarding.ts @@ -21,6 +21,7 @@ import { promptRemoteGatewayConfig } from "../commands/onboard-remote.js"; import { setupSkills } from "../commands/onboard-skills.js"; import { setupInternalHooks } from "../commands/onboard-hooks.js"; import type { + AuthChoice, GatewayAuthChoice, OnboardMode, OnboardOptions, @@ -354,26 +355,43 @@ export async function runOnboardingWizard( allowKeychainPrompt: false, }); const authChoiceFromPrompt = opts.authChoice === undefined; - const authChoice = - opts.authChoice ?? - (await promptAuthChoiceGrouped({ - prompter, - store: authStore, - includeSkip: true, - })); + let authChoice: AuthChoice; - const authResult = await applyAuthChoice({ - authChoice, - config: nextConfig, - prompter, - runtime, - setDefaultModel: true, - opts: { - tokenProvider: opts.tokenProvider, - token: opts.authChoice === "apiKey" && opts.token ? opts.token : undefined, - }, - }); - nextConfig = authResult.config; + // Loop to allow retrying auth choice if user cancels during configuration + while (true) { + authChoice = + opts.authChoice ?? + (await promptAuthChoiceGrouped({ + prompter, + store: authStore, + includeSkip: true, + })); + + try { + const authResult = await applyAuthChoice({ + authChoice, + config: nextConfig, + prompter, + runtime, + setDefaultModel: true, + opts: { + tokenProvider: opts.tokenProvider, + token: opts.authChoice === "apiKey" && opts.token ? opts.token : undefined, + }, + }); + nextConfig = authResult.config; + break; // Success - exit the loop + } catch (error) { + // If user cancelled to go back to auth selection, loop again + if (error instanceof Error && error.message === "AUTH_CHOICE_CANCELLED") { + // Clear opts.authChoice so we prompt again + opts.authChoice = undefined; + continue; + } + // Re-throw other errors + throw error; + } + } if (authChoiceFromPrompt) { const modelSelection = await promptDefaultModel({