From e31ff452ae30ba0eb8016165e0db4af705d22ae6 Mon Sep 17 00:00:00 2001 From: kkkamur07 Date: Mon, 26 Jan 2026 23:57:04 +0100 Subject: [PATCH] added cerebras as a model provider. --- docs/concepts/model-providers.md | 8 +- docs/gateway/configuration.md | 33 +++++++- docs/providers/cerebras.md | 49 ++++++++++++ docs/providers/index.md | 1 + src/agents/models-config.providers.ts | 80 +++++++++++++++++++ src/cli/program/register.onboard.ts | 1 + src/commands/auth-choice-options.ts | 12 +++ .../auth-choice.apply.api-providers.ts | 65 +++++++++++++++ src/commands/onboard-auth.config-core.ts | 61 ++++++++++++++ src/commands/onboard-auth.credentials.ts | 13 +++ src/commands/onboard-auth.ts | 4 + .../local/auth-choice.ts | 21 +++++ src/commands/onboard-types.ts | 2 + 13 files changed, 346 insertions(+), 4 deletions(-) create mode 100644 docs/providers/cerebras.md diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 46dc4f749..a4f1c693f 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -114,8 +114,12 @@ Clawdbot ships with the pi‑ai catalog. These providers require **no** - xAI: `xai` (`XAI_API_KEY`) - Groq: `groq` (`GROQ_API_KEY`) - Cerebras: `cerebras` (`CEREBRAS_API_KEY`) - - GLM models on Cerebras use ids `zai-glm-4.7` and `zai-glm-4.6`. - - OpenAI-compatible base URL: `https://api.cerebras.ai/v1`. + - Ultra-fast inference with custom AI accelerator chips + - Native models: `llama3.1-8b`, `llama-3.3-70b`, `gpt-oss-120b`, `qwen-3-32b`, `qwen-3-235b-a22b-instruct-2507`, `zai-glm-4.7` + - Example: `cerebras/llama3.1-8b` + - OpenAI-compatible base URL: `https://api.cerebras.ai/v1` + - CLI: `clawdbot onboard --auth-choice cerebras-api-key` + - See [/providers/cerebras](/providers/cerebras) for full setup - Mistral: `mistral` (`MISTRAL_API_KEY`) - GitHub Copilot: `github-copilot` (`COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN`) diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index eaba866b1..c0ef9aa02 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -2556,9 +2556,35 @@ Notes: - Available model: `MiniMax-M2.1` (default). - Update pricing in `models.json` if you need exact cost tracking. -### Cerebras (GLM 4.6 / 4.7) +### Cerebras (Llama + GLM models) -Use Cerebras via their OpenAI-compatible endpoint: +Cerebras provides ultra-fast inference with Llama models and GLM models via their OpenAI-compatible endpoint. + +**Quick setup with native Llama models:** + +```bash +clawdbot onboard --auth-choice cerebras-api-key +``` + +**Config snippet (Llama models):** + +```json5 +{ + env: { CEREBRAS_API_KEY: "..." }, + agents: { + defaults: { + model: { primary: "cerebras/llama3.1-8b" }, + models: { + "cerebras/llama3.1-8b": { alias: "Llama 3.1 8B" }, + "cerebras/llama3.1-70b": { alias: "Llama 3.1 70B" }, + "cerebras/llama-3.3-70b": { alias: "Llama 3.3 70B" } + } + } + } +} +``` + +**Advanced: GLM models via Cerebras:** ```json5 { @@ -2593,8 +2619,11 @@ Use Cerebras via their OpenAI-compatible endpoint: ``` Notes: +- Native Llama models: `cerebras/llama3.1-8b`, `cerebras/llama3.1-70b`, `cerebras/llama-3.3-70b` - Use `cerebras/zai-glm-4.7` for Cerebras; use `zai/glm-4.7` for Z.AI direct. - Set `CEREBRAS_API_KEY` in the environment or config. +- Get your API key at [cloud.cerebras.ai](https://cloud.cerebras.ai/). +- See [/providers/cerebras](/providers/cerebras) for more details. Notes: - Supported APIs: `openai-completions`, `openai-responses`, `anthropic-messages`, diff --git a/docs/providers/cerebras.md b/docs/providers/cerebras.md new file mode 100644 index 000000000..0513b8c09 --- /dev/null +++ b/docs/providers/cerebras.md @@ -0,0 +1,49 @@ +--- +summary: "Use Cerebras ultra-fast inference for LLaMA, Qwen, GLM models via OpenAI-compatible API" +read_when: + - You want to use Cerebras inference + - You need ultra-fast model responses +--- +# Cerebras + +Cerebras provides **ultra-fast inference** using their custom AI accelerator chips, delivering industry-leading speed for popular open-source models through an OpenAI-compatible API. + +## CLI setup + +```bash +clawdbot onboard --auth-choice cerebras-api-key +# or non-interactive +clawdbot onboard --cerebras-api-key "$CEREBRAS_API_KEY" +``` + +## Config snippet + +```json5 +{ + env: { CEREBRAS_API_KEY: "csk-..." }, + agents: { + defaults: { + model: { primary: "cerebras/llama3.1-8b" } + } + } +} +``` + +## Available models + +All models run at FP16 or FP16/FP8 precision: + +- `cerebras/llama3.1-8b` - LLaMA 3.1 8B (FP16) +- `cerebras/llama-3.3-70b` - LLaMA 3.3 70B (FP16) +- `cerebras/gpt-oss-120b` - GPT OSS 120B (FP16/FP8) +- `cerebras/qwen-3-32b` - Qwen 3 32B (FP16) +- `cerebras/qwen-3-235b-a22b-instruct-2507` - Qwen 3 235B (FP16/FP8) +- `cerebras/zai-glm-4.7` - GLM 4.7 (FP16/FP8) + +## Notes + +- Base URL: `https://api.cerebras.ai/v1` +- OpenAI-compatible API (drop-in replacement) +- Model refs use `cerebras/` format +- Get API key at: https://cloud.cerebras.ai/ +- For more model options, see [/concepts/model-providers](/concepts/model-providers) diff --git a/docs/providers/index.md b/docs/providers/index.md index b4779d201..dca8be750 100644 --- a/docs/providers/index.md +++ b/docs/providers/index.md @@ -45,6 +45,7 @@ See [Venice AI](/providers/venice). - [GLM models](/providers/glm) - [MiniMax](/providers/minimax) - [Venius (Venice AI, privacy-focused)](/providers/venice) +- [Cerebras (ultra-fast inference, Llama/Qwen/GLM)](/providers/cerebras) - [Ollama (local models)](/providers/ollama) ## Transcription providers diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index 996f09dd0..57c47b856 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -75,6 +75,16 @@ const OLLAMA_DEFAULT_COST = { cacheWrite: 0, }; +const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"; +const CEREBRAS_DEFAULT_CONTEXT_WINDOW = 128000; +const CEREBRAS_DEFAULT_MAX_TOKENS = 8192; +const CEREBRAS_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + interface OllamaModel { name: string; modified_at: string; @@ -359,6 +369,69 @@ async function buildOllamaProvider(): Promise { }; } +function buildCerebrasProvider(): ProviderConfig { + return { + baseUrl: CEREBRAS_BASE_URL, + api: "openai-completions", + models: [ + { + id: "llama3.1-8b", + name: "Llama 3.1 8B", + reasoning: false, + input: ["text"], + cost: CEREBRAS_DEFAULT_COST, + contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW, + maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS, + }, + { + id: "llama-3.3-70b", + name: "Llama 3.3 70B", + reasoning: false, + input: ["text"], + cost: CEREBRAS_DEFAULT_COST, + contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW, + maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS, + }, + { + id: "gpt-oss-120b", + name: "GPT OSS 120B", + reasoning: false, + input: ["text"], + cost: CEREBRAS_DEFAULT_COST, + contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW, + maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS, + }, + { + id: "qwen-3-32b", + name: "Qwen 3 32B", + reasoning: false, + input: ["text"], + cost: CEREBRAS_DEFAULT_COST, + contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW, + maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS, + }, + { + id: "qwen-3-235b-a22b-instruct-2507", + name: "Qwen 3 235B A22B Instruct", + reasoning: false, + input: ["text"], + cost: CEREBRAS_DEFAULT_COST, + contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW, + maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS, + }, + { + id: "zai-glm-4.7", + name: "GLM 4.7", + reasoning: false, + input: ["text"], + cost: CEREBRAS_DEFAULT_COST, + contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW, + maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS, + }, + ], + }; +} + export async function resolveImplicitProviders(params: { agentDir: string; }): Promise { @@ -418,6 +491,13 @@ export async function resolveImplicitProviders(params: { providers.ollama = { ...(await buildOllamaProvider()), apiKey: ollamaKey }; } + const cerebrasKey = + resolveEnvApiKeyVarName("cerebras") ?? + resolveApiKeyFromProfiles({ provider: "cerebras", store: authStore }); + if (cerebrasKey) { + providers.cerebras = { ...buildCerebrasProvider(), apiKey: cerebrasKey }; + } + return providers; } diff --git a/src/cli/program/register.onboard.ts b/src/cli/program/register.onboard.ts index eac6a60df..65f68e9a0 100644 --- a/src/cli/program/register.onboard.ts +++ b/src/cli/program/register.onboard.ts @@ -76,6 +76,7 @@ export function registerOnboardCommand(program: Command) { .option("--synthetic-api-key ", "Synthetic API key") .option("--venice-api-key ", "Venice API key") .option("--opencode-zen-api-key ", "OpenCode Zen API key") + .option("--cerebras-api-key ", "Cerebras API key") .option("--gateway-port ", "Gateway port") .option("--gateway-bind ", "Gateway bind: loopback|tailnet|lan|auto|custom") .option("--gateway-auth ", "Gateway auth: token|password") diff --git a/src/commands/auth-choice-options.ts b/src/commands/auth-choice-options.ts index 6b49ff17b..93351efbc 100644 --- a/src/commands/auth-choice-options.ts +++ b/src/commands/auth-choice-options.ts @@ -20,6 +20,7 @@ export type AuthChoiceGroupId = | "minimax" | "synthetic" | "venice" + | "cerebras" | "qwen"; export type AuthChoiceGroup = { @@ -71,6 +72,12 @@ const AUTH_CHOICE_GROUP_DEFS: { hint: "Privacy-focused (uncensored models)", choices: ["venice-api-key"], }, + { + value: "cerebras", + label: "Cerebras", + hint: "Ultra-fast inference (Llama/Qwen/GLM)", + choices: ["cerebras-api-key"], + }, { value: "google", label: "Google", @@ -147,6 +154,11 @@ export function buildAuthChoiceOptions(params: { label: "Venice AI API key", hint: "Privacy-focused inference (uncensored models)", }); + options.push({ + value: "cerebras-api-key", + label: "Cerebras API key", + hint: "Ultra-fast inference (Llama/Qwen/GLM)", + }); options.push({ value: "github-copilot", label: "GitHub Copilot (GitHub device login)", diff --git a/src/commands/auth-choice.apply.api-providers.ts b/src/commands/auth-choice.apply.api-providers.ts index 8be02008b..2c4a88dcf 100644 --- a/src/commands/auth-choice.apply.api-providers.ts +++ b/src/commands/auth-choice.apply.api-providers.ts @@ -13,6 +13,8 @@ import { } from "./google-gemini-model-default.js"; import { applyAuthProfileConfig, + applyCerebrasConfig, + applyCerebrasProviderConfig, applyKimiCodeConfig, applyKimiCodeProviderConfig, applyMoonshotConfig, @@ -28,12 +30,14 @@ import { applyVercelAiGatewayConfig, applyVercelAiGatewayProviderConfig, applyZaiConfig, + CEREBRAS_DEFAULT_MODEL_REF, KIMI_CODE_MODEL_REF, MOONSHOT_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF, SYNTHETIC_DEFAULT_MODEL_REF, VENICE_DEFAULT_MODEL_REF, VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF, + setCerebrasApiKey, setGeminiApiKey, setKimiCodeApiKey, setMoonshotApiKey, @@ -85,6 +89,8 @@ export async function applyAuthChoiceApiProviders( authChoice = "venice-api-key"; } else if (params.opts.tokenProvider === "opencode") { authChoice = "opencode-zen"; + } else if (params.opts.tokenProvider === "cerebras") { + authChoice = "cerebras-api-key"; } } @@ -522,6 +528,65 @@ export async function applyAuthChoiceApiProviders( return { config: nextConfig, agentModelOverride }; } + if (authChoice === "cerebras-api-key") { + let hasCredential = false; + + if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "cerebras") { + await setCerebrasApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir); + hasCredential = true; + } + + if (!hasCredential) { + await params.prompter.note( + [ + "Cerebras provides ultra-fast inference with their custom AI chips.", + "Get your API key at: https://cloud.cerebras.ai/", + "Available models: llama3.1-8b, llama-3.3-70b, gpt-oss-120b, qwen-3-32b, qwen-3-235b, zai-glm-4.7", + ].join("\n"), + "Cerebras", + ); + } + + const envKey = resolveEnvApiKey("cerebras"); + if (envKey) { + const useExisting = await params.prompter.confirm({ + message: `Use existing CEREBRAS_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`, + initialValue: true, + }); + if (useExisting) { + await setCerebrasApiKey(envKey.apiKey, params.agentDir); + hasCredential = true; + } + } + if (!hasCredential) { + const key = await params.prompter.text({ + message: "Enter Cerebras API key", + validate: validateApiKeyInput, + }); + await setCerebrasApiKey(normalizeApiKeyInput(String(key)), params.agentDir); + } + nextConfig = applyAuthProfileConfig(nextConfig, { + profileId: "cerebras:default", + provider: "cerebras", + mode: "api_key", + }); + { + const applied = await applyDefaultModelChoice({ + config: nextConfig, + setDefaultModel: params.setDefaultModel, + defaultModel: CEREBRAS_DEFAULT_MODEL_REF, + applyDefaultConfig: applyCerebrasConfig, + applyProviderConfig: applyCerebrasProviderConfig, + noteDefault: CEREBRAS_DEFAULT_MODEL_REF, + noteAgentModel, + prompter: params.prompter, + }); + nextConfig = applied.config; + agentModelOverride = applied.agentModelOverride ?? agentModelOverride; + } + return { config: nextConfig, agentModelOverride }; + } + if (authChoice === "opencode-zen") { let hasCredential = false; if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "opencode") { diff --git a/src/commands/onboard-auth.config-core.ts b/src/commands/onboard-auth.config-core.ts index 0d3a8523a..b208b5cc8 100644 --- a/src/commands/onboard-auth.config-core.ts +++ b/src/commands/onboard-auth.config-core.ts @@ -459,3 +459,64 @@ export function applyAuthProfileConfig( }, }; } + +export function applyCerebrasProviderConfig(cfg: ClawdbotConfig): ClawdbotConfig { + const models = { ...cfg.agents?.defaults?.models }; + models["cerebras/llama3.1-8b"] = { + ...models["cerebras/llama3.1-8b"], + alias: models["cerebras/llama3.1-8b"]?.alias ?? "Llama 3.1 8B", + }; + models["cerebras/llama-3.3-70b"] = { + ...models["cerebras/llama-3.3-70b"], + alias: models["cerebras/llama-3.3-70b"]?.alias ?? "Llama 3.3 70B", + }; + models["cerebras/gpt-oss-120b"] = { + ...models["cerebras/gpt-oss-120b"], + alias: models["cerebras/gpt-oss-120b"]?.alias ?? "GPT OSS 120B", + }; + models["cerebras/qwen-3-32b"] = { + ...models["cerebras/qwen-3-32b"], + alias: models["cerebras/qwen-3-32b"]?.alias ?? "Qwen 3 32B", + }; + models["cerebras/qwen-3-235b-a22b-instruct-2507"] = { + ...models["cerebras/qwen-3-235b-a22b-instruct-2507"], + alias: models["cerebras/qwen-3-235b-a22b-instruct-2507"]?.alias ?? "Qwen 3 235B", + }; + models["cerebras/zai-glm-4.7"] = { + ...models["cerebras/zai-glm-4.7"], + alias: models["cerebras/zai-glm-4.7"]?.alias ?? "GLM 4.7", + }; + + return { + ...cfg, + agents: { + ...cfg.agents, + defaults: { + ...cfg.agents?.defaults, + models, + }, + }, + }; +} + +export function applyCerebrasConfig(cfg: ClawdbotConfig): ClawdbotConfig { + const next = applyCerebrasProviderConfig(cfg); + const existingModel = next.agents?.defaults?.model; + return { + ...next, + agents: { + ...next.agents, + defaults: { + ...next.agents?.defaults, + model: { + ...(existingModel && "fallbacks" in (existingModel as Record) + ? { + fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks, + } + : undefined), + primary: "cerebras/llama3.1-8b", + }, + }, + }, + }; +} diff --git a/src/commands/onboard-auth.credentials.ts b/src/commands/onboard-auth.credentials.ts index 0c7dff409..af68726ff 100644 --- a/src/commands/onboard-auth.credentials.ts +++ b/src/commands/onboard-auth.credentials.ts @@ -115,6 +115,7 @@ export async function setVeniceApiKey(key: string, agentDir?: string) { export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7"; export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto"; export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5"; +export const CEREBRAS_DEFAULT_MODEL_REF = "cerebras/llama3.1-8b"; export async function setZaiApiKey(key: string, agentDir?: string) { // Write to resolved agent dir so gateway finds credentials on startup. @@ -164,3 +165,15 @@ export async function setOpencodeZenApiKey(key: string, agentDir?: string) { agentDir: resolveAuthAgentDir(agentDir), }); } + +export async function setCerebrasApiKey(key: string, agentDir?: string) { + upsertAuthProfile({ + profileId: "cerebras:default", + credential: { + type: "api_key", + provider: "cerebras", + key, + }, + agentDir: resolveAuthAgentDir(agentDir), + }); +} diff --git a/src/commands/onboard-auth.ts b/src/commands/onboard-auth.ts index b122d89cf..ce2ee0e4d 100644 --- a/src/commands/onboard-auth.ts +++ b/src/commands/onboard-auth.ts @@ -5,6 +5,8 @@ export { export { VENICE_DEFAULT_MODEL_ID, VENICE_DEFAULT_MODEL_REF } from "../agents/venice-models.js"; export { applyAuthProfileConfig, + applyCerebrasConfig, + applyCerebrasProviderConfig, applyKimiCodeConfig, applyKimiCodeProviderConfig, applyMoonshotConfig, @@ -34,7 +36,9 @@ export { } from "./onboard-auth.config-opencode.js"; export { OPENROUTER_DEFAULT_MODEL_REF, + CEREBRAS_DEFAULT_MODEL_REF, setAnthropicApiKey, + setCerebrasApiKey, setGeminiApiKey, setKimiCodeApiKey, setMinimaxApiKey, diff --git a/src/commands/onboard-non-interactive/local/auth-choice.ts b/src/commands/onboard-non-interactive/local/auth-choice.ts index c5558596a..128ee0749 100644 --- a/src/commands/onboard-non-interactive/local/auth-choice.ts +++ b/src/commands/onboard-non-interactive/local/auth-choice.ts @@ -8,6 +8,7 @@ import { buildTokenProfileId, validateAnthropicSetupToken } from "../../auth-tok import { applyGoogleGeminiModelDefault } from "../../google-gemini-model-default.js"; import { applyAuthProfileConfig, + applyCerebrasConfig, applyKimiCodeConfig, applyMinimaxApiConfig, applyMinimaxConfig, @@ -19,6 +20,7 @@ import { applyVercelAiGatewayConfig, applyZaiConfig, setAnthropicApiKey, + setCerebrasApiKey, setGeminiApiKey, setKimiCodeApiKey, setMinimaxApiKey, @@ -309,6 +311,25 @@ export async function applyNonInteractiveAuthChoice(params: { return applyVeniceConfig(nextConfig); } + if (authChoice === "cerebras-api-key") { + const resolved = await resolveNonInteractiveApiKey({ + provider: "cerebras", + cfg: baseConfig, + flagValue: opts.cerebrasApiKey, + flagName: "--cerebras-api-key", + envVar: "CEREBRAS_API_KEY", + runtime, + }); + if (!resolved) return null; + if (resolved.source !== "profile") await setCerebrasApiKey(resolved.key); + nextConfig = applyAuthProfileConfig(nextConfig, { + profileId: "cerebras:default", + provider: "cerebras", + mode: "api_key", + }); + return applyCerebrasConfig(nextConfig); + } + if ( authChoice === "minimax-cloud" || authChoice === "minimax-api" || diff --git a/src/commands/onboard-types.ts b/src/commands/onboard-types.ts index aa1d9afe0..ca28add66 100644 --- a/src/commands/onboard-types.ts +++ b/src/commands/onboard-types.ts @@ -17,6 +17,7 @@ export type AuthChoice = | "kimi-code-api-key" | "synthetic-api-key" | "venice-api-key" + | "cerebras-api-key" | "codex-cli" | "apiKey" | "gemini-api-key" @@ -71,6 +72,7 @@ export type OnboardOptions = { syntheticApiKey?: string; veniceApiKey?: string; opencodeZenApiKey?: string; + cerebrasApiKey?: string; gatewayPort?: number; gatewayBind?: GatewayBind; gatewayAuth?: GatewayAuthChoice;