added cerebras as a model provider.

This commit is contained in:
kkkamur07 2026-01-26 23:57:04 +01:00
parent 820ab8765a
commit e31ff452ae
13 changed files with 346 additions and 4 deletions

View File

@ -114,8 +114,12 @@ Clawdbot ships with the piai catalog. These providers require **no**
- xAI: `xai` (`XAI_API_KEY`) - xAI: `xai` (`XAI_API_KEY`)
- Groq: `groq` (`GROQ_API_KEY`) - Groq: `groq` (`GROQ_API_KEY`)
- Cerebras: `cerebras` (`CEREBRAS_API_KEY`) - Cerebras: `cerebras` (`CEREBRAS_API_KEY`)
- GLM models on Cerebras use ids `zai-glm-4.7` and `zai-glm-4.6`. - Ultra-fast inference with custom AI accelerator chips
- OpenAI-compatible base URL: `https://api.cerebras.ai/v1`. - Native models: `llama3.1-8b`, `llama-3.3-70b`, `gpt-oss-120b`, `qwen-3-32b`, `qwen-3-235b-a22b-instruct-2507`, `zai-glm-4.7`
- Example: `cerebras/llama3.1-8b`
- OpenAI-compatible base URL: `https://api.cerebras.ai/v1`
- CLI: `clawdbot onboard --auth-choice cerebras-api-key`
- See [/providers/cerebras](/providers/cerebras) for full setup
- Mistral: `mistral` (`MISTRAL_API_KEY`) - Mistral: `mistral` (`MISTRAL_API_KEY`)
- GitHub Copilot: `github-copilot` (`COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN`) - GitHub Copilot: `github-copilot` (`COPILOT_GITHUB_TOKEN` / `GH_TOKEN` / `GITHUB_TOKEN`)

View File

@ -2556,9 +2556,35 @@ Notes:
- Available model: `MiniMax-M2.1` (default). - Available model: `MiniMax-M2.1` (default).
- Update pricing in `models.json` if you need exact cost tracking. - Update pricing in `models.json` if you need exact cost tracking.
### Cerebras (GLM 4.6 / 4.7) ### Cerebras (Llama + GLM models)
Use Cerebras via their OpenAI-compatible endpoint: Cerebras provides ultra-fast inference with Llama models and GLM models via their OpenAI-compatible endpoint.
**Quick setup with native Llama models:**
```bash
clawdbot onboard --auth-choice cerebras-api-key
```
**Config snippet (Llama models):**
```json5
{
env: { CEREBRAS_API_KEY: "..." },
agents: {
defaults: {
model: { primary: "cerebras/llama3.1-8b" },
models: {
"cerebras/llama3.1-8b": { alias: "Llama 3.1 8B" },
"cerebras/llama3.1-70b": { alias: "Llama 3.1 70B" },
"cerebras/llama-3.3-70b": { alias: "Llama 3.3 70B" }
}
}
}
}
```
**Advanced: GLM models via Cerebras:**
```json5 ```json5
{ {
@ -2593,8 +2619,11 @@ Use Cerebras via their OpenAI-compatible endpoint:
``` ```
Notes: Notes:
- Native Llama models: `cerebras/llama3.1-8b`, `cerebras/llama3.1-70b`, `cerebras/llama-3.3-70b`
- Use `cerebras/zai-glm-4.7` for Cerebras; use `zai/glm-4.7` for Z.AI direct. - Use `cerebras/zai-glm-4.7` for Cerebras; use `zai/glm-4.7` for Z.AI direct.
- Set `CEREBRAS_API_KEY` in the environment or config. - Set `CEREBRAS_API_KEY` in the environment or config.
- Get your API key at [cloud.cerebras.ai](https://cloud.cerebras.ai/).
- See [/providers/cerebras](/providers/cerebras) for more details.
Notes: Notes:
- Supported APIs: `openai-completions`, `openai-responses`, `anthropic-messages`, - Supported APIs: `openai-completions`, `openai-responses`, `anthropic-messages`,

View File

@ -0,0 +1,49 @@
---
summary: "Use Cerebras ultra-fast inference for LLaMA, Qwen, GLM models via OpenAI-compatible API"
read_when:
- You want to use Cerebras inference
- You need ultra-fast model responses
---
# Cerebras
Cerebras provides **ultra-fast inference** using their custom AI accelerator chips, delivering industry-leading speed for popular open-source models through an OpenAI-compatible API.
## CLI setup
```bash
clawdbot onboard --auth-choice cerebras-api-key
# or non-interactive
clawdbot onboard --cerebras-api-key "$CEREBRAS_API_KEY"
```
## Config snippet
```json5
{
env: { CEREBRAS_API_KEY: "csk-..." },
agents: {
defaults: {
model: { primary: "cerebras/llama3.1-8b" }
}
}
}
```
## Available models
All models run at FP16 or FP16/FP8 precision:
- `cerebras/llama3.1-8b` - LLaMA 3.1 8B (FP16)
- `cerebras/llama-3.3-70b` - LLaMA 3.3 70B (FP16)
- `cerebras/gpt-oss-120b` - GPT OSS 120B (FP16/FP8)
- `cerebras/qwen-3-32b` - Qwen 3 32B (FP16)
- `cerebras/qwen-3-235b-a22b-instruct-2507` - Qwen 3 235B (FP16/FP8)
- `cerebras/zai-glm-4.7` - GLM 4.7 (FP16/FP8)
## Notes
- Base URL: `https://api.cerebras.ai/v1`
- OpenAI-compatible API (drop-in replacement)
- Model refs use `cerebras/<model>` format
- Get API key at: https://cloud.cerebras.ai/
- For more model options, see [/concepts/model-providers](/concepts/model-providers)

View File

@ -45,6 +45,7 @@ See [Venice AI](/providers/venice).
- [GLM models](/providers/glm) - [GLM models](/providers/glm)
- [MiniMax](/providers/minimax) - [MiniMax](/providers/minimax)
- [Venius (Venice AI, privacy-focused)](/providers/venice) - [Venius (Venice AI, privacy-focused)](/providers/venice)
- [Cerebras (ultra-fast inference, Llama/Qwen/GLM)](/providers/cerebras)
- [Ollama (local models)](/providers/ollama) - [Ollama (local models)](/providers/ollama)
## Transcription providers ## Transcription providers

View File

@ -75,6 +75,16 @@ const OLLAMA_DEFAULT_COST = {
cacheWrite: 0, cacheWrite: 0,
}; };
const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
const CEREBRAS_DEFAULT_CONTEXT_WINDOW = 128000;
const CEREBRAS_DEFAULT_MAX_TOKENS = 8192;
const CEREBRAS_DEFAULT_COST = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
};
interface OllamaModel { interface OllamaModel {
name: string; name: string;
modified_at: string; modified_at: string;
@ -359,6 +369,69 @@ async function buildOllamaProvider(): Promise<ProviderConfig> {
}; };
} }
function buildCerebrasProvider(): ProviderConfig {
return {
baseUrl: CEREBRAS_BASE_URL,
api: "openai-completions",
models: [
{
id: "llama3.1-8b",
name: "Llama 3.1 8B",
reasoning: false,
input: ["text"],
cost: CEREBRAS_DEFAULT_COST,
contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
},
{
id: "llama-3.3-70b",
name: "Llama 3.3 70B",
reasoning: false,
input: ["text"],
cost: CEREBRAS_DEFAULT_COST,
contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
},
{
id: "gpt-oss-120b",
name: "GPT OSS 120B",
reasoning: false,
input: ["text"],
cost: CEREBRAS_DEFAULT_COST,
contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
},
{
id: "qwen-3-32b",
name: "Qwen 3 32B",
reasoning: false,
input: ["text"],
cost: CEREBRAS_DEFAULT_COST,
contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
},
{
id: "qwen-3-235b-a22b-instruct-2507",
name: "Qwen 3 235B A22B Instruct",
reasoning: false,
input: ["text"],
cost: CEREBRAS_DEFAULT_COST,
contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
},
{
id: "zai-glm-4.7",
name: "GLM 4.7",
reasoning: false,
input: ["text"],
cost: CEREBRAS_DEFAULT_COST,
contextWindow: CEREBRAS_DEFAULT_CONTEXT_WINDOW,
maxTokens: CEREBRAS_DEFAULT_MAX_TOKENS,
},
],
};
}
export async function resolveImplicitProviders(params: { export async function resolveImplicitProviders(params: {
agentDir: string; agentDir: string;
}): Promise<ModelsConfig["providers"]> { }): Promise<ModelsConfig["providers"]> {
@ -418,6 +491,13 @@ export async function resolveImplicitProviders(params: {
providers.ollama = { ...(await buildOllamaProvider()), apiKey: ollamaKey }; providers.ollama = { ...(await buildOllamaProvider()), apiKey: ollamaKey };
} }
const cerebrasKey =
resolveEnvApiKeyVarName("cerebras") ??
resolveApiKeyFromProfiles({ provider: "cerebras", store: authStore });
if (cerebrasKey) {
providers.cerebras = { ...buildCerebrasProvider(), apiKey: cerebrasKey };
}
return providers; return providers;
} }

View File

@ -76,6 +76,7 @@ export function registerOnboardCommand(program: Command) {
.option("--synthetic-api-key <key>", "Synthetic API key") .option("--synthetic-api-key <key>", "Synthetic API key")
.option("--venice-api-key <key>", "Venice API key") .option("--venice-api-key <key>", "Venice API key")
.option("--opencode-zen-api-key <key>", "OpenCode Zen API key") .option("--opencode-zen-api-key <key>", "OpenCode Zen API key")
.option("--cerebras-api-key <key>", "Cerebras API key")
.option("--gateway-port <port>", "Gateway port") .option("--gateway-port <port>", "Gateway port")
.option("--gateway-bind <mode>", "Gateway bind: loopback|tailnet|lan|auto|custom") .option("--gateway-bind <mode>", "Gateway bind: loopback|tailnet|lan|auto|custom")
.option("--gateway-auth <mode>", "Gateway auth: token|password") .option("--gateway-auth <mode>", "Gateway auth: token|password")

View File

@ -20,6 +20,7 @@ export type AuthChoiceGroupId =
| "minimax" | "minimax"
| "synthetic" | "synthetic"
| "venice" | "venice"
| "cerebras"
| "qwen"; | "qwen";
export type AuthChoiceGroup = { export type AuthChoiceGroup = {
@ -71,6 +72,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
hint: "Privacy-focused (uncensored models)", hint: "Privacy-focused (uncensored models)",
choices: ["venice-api-key"], choices: ["venice-api-key"],
}, },
{
value: "cerebras",
label: "Cerebras",
hint: "Ultra-fast inference (Llama/Qwen/GLM)",
choices: ["cerebras-api-key"],
},
{ {
value: "google", value: "google",
label: "Google", label: "Google",
@ -147,6 +154,11 @@ export function buildAuthChoiceOptions(params: {
label: "Venice AI API key", label: "Venice AI API key",
hint: "Privacy-focused inference (uncensored models)", hint: "Privacy-focused inference (uncensored models)",
}); });
options.push({
value: "cerebras-api-key",
label: "Cerebras API key",
hint: "Ultra-fast inference (Llama/Qwen/GLM)",
});
options.push({ options.push({
value: "github-copilot", value: "github-copilot",
label: "GitHub Copilot (GitHub device login)", label: "GitHub Copilot (GitHub device login)",

View File

@ -13,6 +13,8 @@ import {
} from "./google-gemini-model-default.js"; } from "./google-gemini-model-default.js";
import { import {
applyAuthProfileConfig, applyAuthProfileConfig,
applyCerebrasConfig,
applyCerebrasProviderConfig,
applyKimiCodeConfig, applyKimiCodeConfig,
applyKimiCodeProviderConfig, applyKimiCodeProviderConfig,
applyMoonshotConfig, applyMoonshotConfig,
@ -28,12 +30,14 @@ import {
applyVercelAiGatewayConfig, applyVercelAiGatewayConfig,
applyVercelAiGatewayProviderConfig, applyVercelAiGatewayProviderConfig,
applyZaiConfig, applyZaiConfig,
CEREBRAS_DEFAULT_MODEL_REF,
KIMI_CODE_MODEL_REF, KIMI_CODE_MODEL_REF,
MOONSHOT_DEFAULT_MODEL_REF, MOONSHOT_DEFAULT_MODEL_REF,
OPENROUTER_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF,
SYNTHETIC_DEFAULT_MODEL_REF, SYNTHETIC_DEFAULT_MODEL_REF,
VENICE_DEFAULT_MODEL_REF, VENICE_DEFAULT_MODEL_REF,
VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF, VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF,
setCerebrasApiKey,
setGeminiApiKey, setGeminiApiKey,
setKimiCodeApiKey, setKimiCodeApiKey,
setMoonshotApiKey, setMoonshotApiKey,
@ -85,6 +89,8 @@ export async function applyAuthChoiceApiProviders(
authChoice = "venice-api-key"; authChoice = "venice-api-key";
} else if (params.opts.tokenProvider === "opencode") { } else if (params.opts.tokenProvider === "opencode") {
authChoice = "opencode-zen"; authChoice = "opencode-zen";
} else if (params.opts.tokenProvider === "cerebras") {
authChoice = "cerebras-api-key";
} }
} }
@ -522,6 +528,65 @@ export async function applyAuthChoiceApiProviders(
return { config: nextConfig, agentModelOverride }; return { config: nextConfig, agentModelOverride };
} }
if (authChoice === "cerebras-api-key") {
let hasCredential = false;
if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "cerebras") {
await setCerebrasApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir);
hasCredential = true;
}
if (!hasCredential) {
await params.prompter.note(
[
"Cerebras provides ultra-fast inference with their custom AI chips.",
"Get your API key at: https://cloud.cerebras.ai/",
"Available models: llama3.1-8b, llama-3.3-70b, gpt-oss-120b, qwen-3-32b, qwen-3-235b, zai-glm-4.7",
].join("\n"),
"Cerebras",
);
}
const envKey = resolveEnvApiKey("cerebras");
if (envKey) {
const useExisting = await params.prompter.confirm({
message: `Use existing CEREBRAS_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
initialValue: true,
});
if (useExisting) {
await setCerebrasApiKey(envKey.apiKey, params.agentDir);
hasCredential = true;
}
}
if (!hasCredential) {
const key = await params.prompter.text({
message: "Enter Cerebras API key",
validate: validateApiKeyInput,
});
await setCerebrasApiKey(normalizeApiKeyInput(String(key)), params.agentDir);
}
nextConfig = applyAuthProfileConfig(nextConfig, {
profileId: "cerebras:default",
provider: "cerebras",
mode: "api_key",
});
{
const applied = await applyDefaultModelChoice({
config: nextConfig,
setDefaultModel: params.setDefaultModel,
defaultModel: CEREBRAS_DEFAULT_MODEL_REF,
applyDefaultConfig: applyCerebrasConfig,
applyProviderConfig: applyCerebrasProviderConfig,
noteDefault: CEREBRAS_DEFAULT_MODEL_REF,
noteAgentModel,
prompter: params.prompter,
});
nextConfig = applied.config;
agentModelOverride = applied.agentModelOverride ?? agentModelOverride;
}
return { config: nextConfig, agentModelOverride };
}
if (authChoice === "opencode-zen") { if (authChoice === "opencode-zen") {
let hasCredential = false; let hasCredential = false;
if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "opencode") { if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "opencode") {

View File

@ -459,3 +459,64 @@ export function applyAuthProfileConfig(
}, },
}; };
} }
export function applyCerebrasProviderConfig(cfg: ClawdbotConfig): ClawdbotConfig {
const models = { ...cfg.agents?.defaults?.models };
models["cerebras/llama3.1-8b"] = {
...models["cerebras/llama3.1-8b"],
alias: models["cerebras/llama3.1-8b"]?.alias ?? "Llama 3.1 8B",
};
models["cerebras/llama-3.3-70b"] = {
...models["cerebras/llama-3.3-70b"],
alias: models["cerebras/llama-3.3-70b"]?.alias ?? "Llama 3.3 70B",
};
models["cerebras/gpt-oss-120b"] = {
...models["cerebras/gpt-oss-120b"],
alias: models["cerebras/gpt-oss-120b"]?.alias ?? "GPT OSS 120B",
};
models["cerebras/qwen-3-32b"] = {
...models["cerebras/qwen-3-32b"],
alias: models["cerebras/qwen-3-32b"]?.alias ?? "Qwen 3 32B",
};
models["cerebras/qwen-3-235b-a22b-instruct-2507"] = {
...models["cerebras/qwen-3-235b-a22b-instruct-2507"],
alias: models["cerebras/qwen-3-235b-a22b-instruct-2507"]?.alias ?? "Qwen 3 235B",
};
models["cerebras/zai-glm-4.7"] = {
...models["cerebras/zai-glm-4.7"],
alias: models["cerebras/zai-glm-4.7"]?.alias ?? "GLM 4.7",
};
return {
...cfg,
agents: {
...cfg.agents,
defaults: {
...cfg.agents?.defaults,
models,
},
},
};
}
export function applyCerebrasConfig(cfg: ClawdbotConfig): ClawdbotConfig {
const next = applyCerebrasProviderConfig(cfg);
const existingModel = next.agents?.defaults?.model;
return {
...next,
agents: {
...next.agents,
defaults: {
...next.agents?.defaults,
model: {
...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
? {
fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
}
: undefined),
primary: "cerebras/llama3.1-8b",
},
},
},
};
}

View File

@ -115,6 +115,7 @@ export async function setVeniceApiKey(key: string, agentDir?: string) {
export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7"; export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto"; export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5"; export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
export const CEREBRAS_DEFAULT_MODEL_REF = "cerebras/llama3.1-8b";
export async function setZaiApiKey(key: string, agentDir?: string) { export async function setZaiApiKey(key: string, agentDir?: string) {
// Write to resolved agent dir so gateway finds credentials on startup. // Write to resolved agent dir so gateway finds credentials on startup.
@ -164,3 +165,15 @@ export async function setOpencodeZenApiKey(key: string, agentDir?: string) {
agentDir: resolveAuthAgentDir(agentDir), agentDir: resolveAuthAgentDir(agentDir),
}); });
} }
export async function setCerebrasApiKey(key: string, agentDir?: string) {
upsertAuthProfile({
profileId: "cerebras:default",
credential: {
type: "api_key",
provider: "cerebras",
key,
},
agentDir: resolveAuthAgentDir(agentDir),
});
}

View File

@ -5,6 +5,8 @@ export {
export { VENICE_DEFAULT_MODEL_ID, VENICE_DEFAULT_MODEL_REF } from "../agents/venice-models.js"; export { VENICE_DEFAULT_MODEL_ID, VENICE_DEFAULT_MODEL_REF } from "../agents/venice-models.js";
export { export {
applyAuthProfileConfig, applyAuthProfileConfig,
applyCerebrasConfig,
applyCerebrasProviderConfig,
applyKimiCodeConfig, applyKimiCodeConfig,
applyKimiCodeProviderConfig, applyKimiCodeProviderConfig,
applyMoonshotConfig, applyMoonshotConfig,
@ -34,7 +36,9 @@ export {
} from "./onboard-auth.config-opencode.js"; } from "./onboard-auth.config-opencode.js";
export { export {
OPENROUTER_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF,
CEREBRAS_DEFAULT_MODEL_REF,
setAnthropicApiKey, setAnthropicApiKey,
setCerebrasApiKey,
setGeminiApiKey, setGeminiApiKey,
setKimiCodeApiKey, setKimiCodeApiKey,
setMinimaxApiKey, setMinimaxApiKey,

View File

@ -8,6 +8,7 @@ import { buildTokenProfileId, validateAnthropicSetupToken } from "../../auth-tok
import { applyGoogleGeminiModelDefault } from "../../google-gemini-model-default.js"; import { applyGoogleGeminiModelDefault } from "../../google-gemini-model-default.js";
import { import {
applyAuthProfileConfig, applyAuthProfileConfig,
applyCerebrasConfig,
applyKimiCodeConfig, applyKimiCodeConfig,
applyMinimaxApiConfig, applyMinimaxApiConfig,
applyMinimaxConfig, applyMinimaxConfig,
@ -19,6 +20,7 @@ import {
applyVercelAiGatewayConfig, applyVercelAiGatewayConfig,
applyZaiConfig, applyZaiConfig,
setAnthropicApiKey, setAnthropicApiKey,
setCerebrasApiKey,
setGeminiApiKey, setGeminiApiKey,
setKimiCodeApiKey, setKimiCodeApiKey,
setMinimaxApiKey, setMinimaxApiKey,
@ -309,6 +311,25 @@ export async function applyNonInteractiveAuthChoice(params: {
return applyVeniceConfig(nextConfig); return applyVeniceConfig(nextConfig);
} }
if (authChoice === "cerebras-api-key") {
const resolved = await resolveNonInteractiveApiKey({
provider: "cerebras",
cfg: baseConfig,
flagValue: opts.cerebrasApiKey,
flagName: "--cerebras-api-key",
envVar: "CEREBRAS_API_KEY",
runtime,
});
if (!resolved) return null;
if (resolved.source !== "profile") await setCerebrasApiKey(resolved.key);
nextConfig = applyAuthProfileConfig(nextConfig, {
profileId: "cerebras:default",
provider: "cerebras",
mode: "api_key",
});
return applyCerebrasConfig(nextConfig);
}
if ( if (
authChoice === "minimax-cloud" || authChoice === "minimax-cloud" ||
authChoice === "minimax-api" || authChoice === "minimax-api" ||

View File

@ -17,6 +17,7 @@ export type AuthChoice =
| "kimi-code-api-key" | "kimi-code-api-key"
| "synthetic-api-key" | "synthetic-api-key"
| "venice-api-key" | "venice-api-key"
| "cerebras-api-key"
| "codex-cli" | "codex-cli"
| "apiKey" | "apiKey"
| "gemini-api-key" | "gemini-api-key"
@ -71,6 +72,7 @@ export type OnboardOptions = {
syntheticApiKey?: string; syntheticApiKey?: string;
veniceApiKey?: string; veniceApiKey?: string;
opencodeZenApiKey?: string; opencodeZenApiKey?: string;
cerebrasApiKey?: string;
gatewayPort?: number; gatewayPort?: number;
gatewayBind?: GatewayBind; gatewayBind?: GatewayBind;
gatewayAuth?: GatewayAuthChoice; gatewayAuth?: GatewayAuthChoice;