added cerebras as a model provider.

2026-01-26 23:57:04 +01:00 · 2026-01-26 23:57:04 +01:00 · e31ff452ae
commit e31ff452ae
parent 820ab8765a
13 changed files with 346 additions and 4 deletions
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@ -114,8 +114,12 @@ Clawdbot ships with the pi‑ai catalog. These providers require **no**
 - xAI: `xai` (`XAI_API_KEY`)
 - Groq: `groq` (`GROQ_API_KEY`)
 - Cerebras: `cerebras` (`CEREBRAS_API_KEY`)
-  - GLM models on Cerebras use ids `zai-glm-4.7` and `zai-glm-4.6`.
+  - Ultra-fast inference with custom AI accelerator chips
-  - OpenAI-compatible base URL: `https://api.cerebras.ai/v1`.
+  - Native models: `llama3.1-8b`, `llama-3.3-70b`, `gpt-oss-120b`, `qwen-3-32b`, `qwen-3-235b-a22b-instruct-2507`, `zai-glm-4.7`
  - Example: `cerebras/llama3.1-8b`
  - OpenAI-compatible base URL: `https://api.cerebras.ai/v1`
  - CLI: `clawdbot onboard --auth-choice cerebras-api-key`
  - See [/providers/cerebras](/providers/cerebras) for full setup
 - Mistral: `mistral` (`MISTRAL_API_KEY`)
 - GitHub Copilot: `github-copilot` (`COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN`)
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@ -2556,9 +2556,35 @@ Notes:
 - Available model: `MiniMax-M2.1` (default).
 - Update pricing in `models.json` if you need exact cost tracking.
-### Cerebras (GLM 4.6 / 4.7)
+### Cerebras (Llama + GLM models)
-Use Cerebras via their OpenAI-compatible endpoint:
+Cerebras provides ultra-fast inference with Llama models and GLM models via their OpenAI-compatible endpoint.
 **Quick setup with native Llama models:**
 ```bash
 clawdbot onboard --auth-choice cerebras-api-key
 ```
 **Config snippet (Llama models):**
 ```json5
 {
  env: { CEREBRAS_API_KEY: "..." },
  agents: {
    defaults: {
      model: { primary: "cerebras/llama3.1-8b" },
      models: {
        "cerebras/llama3.1-8b": { alias: "Llama 3.1 8B" },
        "cerebras/llama3.1-70b": { alias: "Llama 3.1 70B" },
        "cerebras/llama-3.3-70b": { alias: "Llama 3.3 70B" }
      }
    }
  }
 }
 ```
 **Advanced: GLM models via Cerebras:**
 ```json5
 {
@ -2593,8 +2619,11 @@ Use Cerebras via their OpenAI-compatible endpoint:
 ```
 Notes:
 - Native Llama models: `cerebras/llama3.1-8b`, `cerebras/llama3.1-70b`, `cerebras/llama-3.3-70b`
 - Use `cerebras/zai-glm-4.7` for Cerebras; use `zai/glm-4.7` for Z.AI direct.
 - Set `CEREBRAS_API_KEY` in the environment or config.
 - Get your API key at [cloud.cerebras.ai](https://cloud.cerebras.ai/).
 - See [/providers/cerebras](/providers/cerebras) for more details.
 Notes:
 - Supported APIs: `openai-completions`, `openai-responses`, `anthropic-messages`,
--- a/docs/providers/cerebras.md
+++ b/docs/providers/cerebras.md
@ -0,0 +1,49 @@
 ---
 summary: "Use Cerebras ultra-fast inference for LLaMA, Qwen, GLM models via OpenAI-compatible API"
 read_when:
  - You want to use Cerebras inference
  - You need ultra-fast model responses
 ---
 # Cerebras
 Cerebras provides **ultra-fast inference** using their custom AI accelerator chips, delivering industry-leading speed for popular open-source models through an OpenAI-compatible API.
 ## CLI setup
 ```bash
 clawdbot onboard --auth-choice cerebras-api-key
 # or non-interactive
 clawdbot onboard --cerebras-api-key "$CEREBRAS_API_KEY"
 ```
 ## Config snippet
 ```json5
 {
  env: { CEREBRAS_API_KEY: "csk-..." },
  agents: {
    defaults: {
      model: { primary: "cerebras/llama3.1-8b" }
    }
  }
 }
 ```
 ## Available models
 All models run at FP16 or FP16/FP8 precision:
 - `cerebras/llama3.1-8b` - LLaMA 3.1 8B (FP16)
 - `cerebras/llama-3.3-70b` - LLaMA 3.3 70B (FP16)
 - `cerebras/gpt-oss-120b` - GPT OSS 120B (FP16/FP8)
 - `cerebras/qwen-3-32b` - Qwen 3 32B (FP16)
 - `cerebras/qwen-3-235b-a22b-instruct-2507` - Qwen 3 235B (FP16/FP8)
 - `cerebras/zai-glm-4.7` - GLM 4.7 (FP16/FP8)
 ## Notes
 - Base URL: `https://api.cerebras.ai/v1`
 - OpenAI-compatible API (drop-in replacement)
 - Model refs use `cerebras/<model>` format
 - Get API key at: https://cloud.cerebras.ai/
 - For more model options, see [/concepts/model-providers](/concepts/model-providers)
--- a/docs/providers/index.md
+++ b/docs/providers/index.md
@ -45,6 +45,7 @@ See [Venice AI](/providers/venice).
 - [GLM models](/providers/glm)
 - [MiniMax](/providers/minimax)
 - [Venius (Venice AI, privacy-focused)](/providers/venice)
 - [Cerebras (ultra-fast inference, Llama/Qwen/GLM)](/providers/cerebras)
 - [Ollama (local models)](/providers/ollama)
 ## Transcription providers
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@ -75,6 +75,16 @@ const OLLAMA_DEFAULT_COST = {
  cacheWrite: 0,
 };
 const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
 const CEREBRAS_DEFAULT_CONTEXT_WINDOW = 128000;
 const CEREBRAS_DEFAULT_MAX_TOKENS = 8192;
 const CEREBRAS_DEFAULT_COST = {
  input: 0,
  output: 0,
  cacheRead: 0,
  cacheWrite: 0,
 };
 interface OllamaModel {
  name: string;
  modified_at: string;
@ -359,6 +369,69 @@ async function buildOllamaProvider(): Promise<ProviderConfig> {
  };
 }
 function buildCerebrasProvider(): ProviderConfig {
  return {
    baseUrl: CEREBRAS_BASE_URL,
    api: "openai-completions",
    models: [
      {
        id: "llama3.1-8b",
        name: "Llama 3.1 8B",
        reasoning: false,
        input: ["text"],
        cost: CEREBRAS_DEFAULT_COST,
        contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
        maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "llama-3.3-70b",
        name: "Llama 3.3 70B",
        reasoning: false,
        input: ["text"],
        cost: CEREBRAS_DEFAULT_COST,
        contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
        maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "gpt-oss-120b",
        name: "GPT OSS 120B",
        reasoning: false,
        input: ["text"],
        cost: CEREBRAS_DEFAULT_COST,
        contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
        maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "qwen-3-32b",
        name: "Qwen 3 32B",
        reasoning: false,
        input: ["text"],
        cost: CEREBRAS_DEFAULT_COST,
        contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
        maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "qwen-3-235b-a22b-instruct-2507",
        name: "Qwen 3 235B A22B Instruct",
        reasoning: false,
        input: ["text"],
        cost: CEREBRAS_DEFAULT_COST,
        contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
        maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "zai-glm-4.7",
        name: "GLM 4.7",
        reasoning: false,
        input: ["text"],
        cost: CEREBRAS_DEFAULT_COST,
        contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
        maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
      },
    ],
  };
 }
 export async function resolveImplicitProviders(params: {
  agentDir: string;
 }): Promise<ModelsConfig["providers"]> {
@ -418,6 +491,13 @@ export async function resolveImplicitProviders(params: {
    providers.ollama = { ...(await buildOllamaProvider()), apiKey: ollamaKey };
  }
  const cerebrasKey =
    resolveEnvApiKeyVarName("cerebras") ??
    resolveApiKeyFromProfiles({ provider: "cerebras", store: authStore });
  if (cerebrasKey) {
    providers.cerebras = { ...buildCerebrasProvider(), apiKey: cerebrasKey };
  }
  return providers;
 }
--- a/src/cli/program/register.onboard.ts
+++ b/src/cli/program/register.onboard.ts
@ -76,6 +76,7 @@ export function registerOnboardCommand(program: Command) {
    .option("--synthetic-api-key <key>", "Synthetic API key")
    .option("--venice-api-key <key>", "Venice API key")
    .option("--opencode-zen-api-key <key>", "OpenCode Zen API key")
    .option("--cerebras-api-key <key>", "Cerebras API key")
    .option("--gateway-port <port>", "Gateway port")
    .option("--gateway-bind <mode>", "Gateway bind: loopback|tailnet|lan|auto|custom")
    .option("--gateway-auth <mode>", "Gateway auth: token|password")
--- a/src/commands/auth-choice-options.ts
+++ b/src/commands/auth-choice-options.ts
@ -20,6 +20,7 @@ export type AuthChoiceGroupId =
  | "minimax"
  | "synthetic"
  | "venice"
  | "cerebras"
  | "qwen";
 export type AuthChoiceGroup = {
@ -71,6 +72,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
    hint: "Privacy-focused (uncensored models)",
    choices: ["venice-api-key"],
  },
  {
    value: "cerebras",
    label: "Cerebras",
    hint: "Ultra-fast inference (Llama/Qwen/GLM)",
    choices: ["cerebras-api-key"],
  },
  {
    value: "google",
    label: "Google",
@ -147,6 +154,11 @@ export function buildAuthChoiceOptions(params: {
    label: "Venice AI API key",
    hint: "Privacy-focused inference (uncensored models)",
  });
  options.push({
    value: "cerebras-api-key",
    label: "Cerebras API key",
    hint: "Ultra-fast inference (Llama/Qwen/GLM)",
  });
  options.push({
    value: "github-copilot",
    label: "GitHub Copilot (GitHub device login)",
--- a/src/commands/auth-choice.apply.api-providers.ts
+++ b/src/commands/auth-choice.apply.api-providers.ts
@ -13,6 +13,8 @@ import {
 } from "./google-gemini-model-default.js";
 import {
  applyAuthProfileConfig,
  applyCerebrasConfig,
  applyCerebrasProviderConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
  applyMoonshotConfig,
@ -28,12 +30,14 @@ import {
  applyVercelAiGatewayConfig,
  applyVercelAiGatewayProviderConfig,
  applyZaiConfig,
  CEREBRAS_DEFAULT_MODEL_REF,
  KIMI_CODE_MODEL_REF,
  MOONSHOT_DEFAULT_MODEL_REF,
  OPENROUTER_DEFAULT_MODEL_REF,
  SYNTHETIC_DEFAULT_MODEL_REF,
  VENICE_DEFAULT_MODEL_REF,
  VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF,
  setCerebrasApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMoonshotApiKey,
@ -85,6 +89,8 @@ export async function applyAuthChoiceApiProviders(
      authChoice = "venice-api-key";
    } else if (params.opts.tokenProvider === "opencode") {
      authChoice = "opencode-zen";
    } else if (params.opts.tokenProvider === "cerebras") {
      authChoice = "cerebras-api-key";
    }
  }
@ -522,6 +528,65 @@ export async function applyAuthChoiceApiProviders(
    return { config: nextConfig, agentModelOverride };
  }
  if (authChoice === "cerebras-api-key") {
    let hasCredential = false;
    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "cerebras") {
      await setCerebrasApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir);
      hasCredential = true;
    }
    if (!hasCredential) {
      await params.prompter.note(
        [
          "Cerebras provides ultra-fast inference with their custom AI chips.",
          "Get your API key at: https://cloud.cerebras.ai/",
          "Available models: llama3.1-8b, llama-3.3-70b, gpt-oss-120b, qwen-3-32b, qwen-3-235b, zai-glm-4.7",
        ].join("\n"),
        "Cerebras",
      );
    }
    const envKey = resolveEnvApiKey("cerebras");
    if (envKey) {
      const useExisting = await params.prompter.confirm({
        message: `Use existing CEREBRAS_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
        initialValue: true,
      });
      if (useExisting) {
        await setCerebrasApiKey(envKey.apiKey, params.agentDir);
        hasCredential = true;
      }
    }
    if (!hasCredential) {
      const key = await params.prompter.text({
        message: "Enter Cerebras API key",
        validate: validateApiKeyInput,
      });
      await setCerebrasApiKey(normalizeApiKeyInput(String(key)), params.agentDir);
    }
    nextConfig = applyAuthProfileConfig(nextConfig, {
      profileId: "cerebras:default",
      provider: "cerebras",
      mode: "api_key",
    });
    {
      const applied = await applyDefaultModelChoice({
        config: nextConfig,
        setDefaultModel: params.setDefaultModel,
        defaultModel: CEREBRAS_DEFAULT_MODEL_REF,
        applyDefaultConfig: applyCerebrasConfig,
        applyProviderConfig: applyCerebrasProviderConfig,
        noteDefault: CEREBRAS_DEFAULT_MODEL_REF,
        noteAgentModel,
        prompter: params.prompter,
      });
      nextConfig = applied.config;
      agentModelOverride = applied.agentModelOverride ?? agentModelOverride;
    }
    return { config: nextConfig, agentModelOverride };
  }
  if (authChoice === "opencode-zen") {
    let hasCredential = false;
    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "opencode") {
--- a/src/commands/onboard-auth.config-core.ts
+++ b/src/commands/onboard-auth.config-core.ts
@ -459,3 +459,64 @@ export function applyAuthProfileConfig(
    },
  };
 }
 export function applyCerebrasProviderConfig(cfg: ClawdbotConfig): ClawdbotConfig {
  const models = { ...cfg.agents?.defaults?.models };
  models["cerebras/llama3.1-8b"] = {
    ...models["cerebras/llama3.1-8b"],
    alias: models["cerebras/llama3.1-8b"]?.alias ?? "Llama 3.1 8B",
  };
  models["cerebras/llama-3.3-70b"] = {
    ...models["cerebras/llama-3.3-70b"],
    alias: models["cerebras/llama-3.3-70b"]?.alias ?? "Llama 3.3 70B",
  };
  models["cerebras/gpt-oss-120b"] = {
    ...models["cerebras/gpt-oss-120b"],
    alias: models["cerebras/gpt-oss-120b"]?.alias ?? "GPT OSS 120B",
  };
  models["cerebras/qwen-3-32b"] = {
    ...models["cerebras/qwen-3-32b"],
    alias: models["cerebras/qwen-3-32b"]?.alias ?? "Qwen 3 32B",
  };
  models["cerebras/qwen-3-235b-a22b-instruct-2507"] = {
    ...models["cerebras/qwen-3-235b-a22b-instruct-2507"],
    alias: models["cerebras/qwen-3-235b-a22b-instruct-2507"]?.alias ?? "Qwen 3 235B",
  };
  models["cerebras/zai-glm-4.7"] = {
    ...models["cerebras/zai-glm-4.7"],
    alias: models["cerebras/zai-glm-4.7"]?.alias ?? "GLM 4.7",
  };
  return {
    ...cfg,
    agents: {
      ...cfg.agents,
      defaults: {
        ...cfg.agents?.defaults,
        models,
      },
    },
  };
 }
 export function applyCerebrasConfig(cfg: ClawdbotConfig): ClawdbotConfig {
  const next = applyCerebrasProviderConfig(cfg);
  const existingModel = next.agents?.defaults?.model;
  return {
    ...next,
    agents: {
      ...next.agents,
      defaults: {
        ...next.agents?.defaults,
        model: {
          ...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
            ? {
                fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
              }
            : undefined),
          primary: "cerebras/llama3.1-8b",
        },
      },
    },
  };
 }
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@ -115,6 +115,7 @@ export async function setVeniceApiKey(key: string, agentDir?: string) {
 export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
 export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
 export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
 export const CEREBRAS_DEFAULT_MODEL_REF = "cerebras/llama3.1-8b";
 export async function setZaiApiKey(key: string, agentDir?: string) {
  // Write to resolved agent dir so gateway finds credentials on startup.
@ -164,3 +165,15 @@ export async function setOpencodeZenApiKey(key: string, agentDir?: string) {
    agentDir: resolveAuthAgentDir(agentDir),
  });
 }
 export async function setCerebrasApiKey(key: string, agentDir?: string) {
  upsertAuthProfile({
    profileId: "cerebras:default",
    credential: {
      type: "api_key",
      provider: "cerebras",
      key,
    },
    agentDir: resolveAuthAgentDir(agentDir),
  });
 }
--- a/src/commands/onboard-auth.ts
+++ b/src/commands/onboard-auth.ts
@ -5,6 +5,8 @@ export {
 export { VENICE_DEFAULT_MODEL_ID, VENICE_DEFAULT_MODEL_REF } from "../agents/venice-models.js";
 export {
  applyAuthProfileConfig,
  applyCerebrasConfig,
  applyCerebrasProviderConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
  applyMoonshotConfig,
@ -34,7 +36,9 @@ export {
 } from "./onboard-auth.config-opencode.js";
 export {
  OPENROUTER_DEFAULT_MODEL_REF,
  CEREBRAS_DEFAULT_MODEL_REF,
  setAnthropicApiKey,
  setCerebrasApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMinimaxApiKey,
--- a/src/commands/onboard-non-interactive/local/auth-choice.ts
+++ b/src/commands/onboard-non-interactive/local/auth-choice.ts
@ -8,6 +8,7 @@ import { buildTokenProfileId, validateAnthropicSetupToken } from "../../auth-tok
 import { applyGoogleGeminiModelDefault } from "../../google-gemini-model-default.js";
 import {
  applyAuthProfileConfig,
  applyCerebrasConfig,
  applyKimiCodeConfig,
  applyMinimaxApiConfig,
  applyMinimaxConfig,
@ -19,6 +20,7 @@ import {
  applyVercelAiGatewayConfig,
  applyZaiConfig,
  setAnthropicApiKey,
  setCerebrasApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMinimaxApiKey,
@ -309,6 +311,25 @@ export async function applyNonInteractiveAuthChoice(params: {
    return applyVeniceConfig(nextConfig);
  }
  if (authChoice === "cerebras-api-key") {
    const resolved = await resolveNonInteractiveApiKey({
      provider: "cerebras",
      cfg: baseConfig,
      flagValue: opts.cerebrasApiKey,
      flagName: "--cerebras-api-key",
      envVar: "CEREBRAS_API_KEY",
      runtime,
    });
    if (!resolved) return null;
    if (resolved.source !== "profile") await setCerebrasApiKey(resolved.key);
    nextConfig = applyAuthProfileConfig(nextConfig, {
      profileId: "cerebras:default",
      provider: "cerebras",
      mode: "api_key",
    });
    return applyCerebrasConfig(nextConfig);
  }
  if (
    authChoice === "minimax-cloud" ||
    authChoice === "minimax-api" ||
--- a/src/commands/onboard-types.ts
+++ b/src/commands/onboard-types.ts
@ -17,6 +17,7 @@ export type AuthChoice =
  | "kimi-code-api-key"
  | "synthetic-api-key"
  | "venice-api-key"
  | "cerebras-api-key"
  | "codex-cli"
  | "apiKey"
  | "gemini-api-key"
@ -71,6 +72,7 @@ export type OnboardOptions = {
  syntheticApiKey?: string;
  veniceApiKey?: string;
  opencodeZenApiKey?: string;
  cerebrasApiKey?: string;
  gatewayPort?: number;
  gatewayBind?: GatewayBind;
  gatewayAuth?: GatewayAuthChoice;