diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 9dbb984fc..23b002a08 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -239,6 +239,46 @@ MiniMax is configured via `models.providers` because it uses custom endpoints: See [/providers/minimax](/providers/minimax) for setup details, model options, and config snippets. +### Nebius Token Factory + +Nebius provides OpenAI-compatible inference for frontier and open models: + +- Provider: `nebius` +- Auth: `NEBIUS_API_KEY` +- Example model: `nebius/Qwen/Qwen3-32B-fast` +- CLI: `moltbot onboard --auth-choice nebius-api-key` + +```json5 +{ + agents: { + defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } + }, + models: { + mode: "merge", + providers: { + nebius: { + baseUrl: "https://api.tokenfactory.nebius.com/v1", + apiKey: "${NEBIUS_API_KEY}", + api: "openai-completions", + models: [ + { + id: "Qwen/Qwen3-32B-fast", + name: "Qwen3 32B Fast", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 8192 + } + ] + } + } + } +} +``` + +See [/providers/nebius](/providers/nebius) for setup details and available models. + ### Ollama Ollama is a local LLM runtime that provides an OpenAI-compatible API: diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 1d270974d..e449493c0 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -2472,6 +2472,48 @@ Notes: - Set `KIMICODE_API_KEY` in the environment or use `moltbot onboard --auth-choice kimi-code-api-key`. - Model ref: `kimi-code/kimi-for-coding`. +### Nebius Token Factory + +Use Nebius's OpenAI-compatible endpoint: + +```json5 +{ + env: { NEBIUS_API_KEY: "sk-..." }, + agents: { + defaults: { + model: { primary: "nebius/Qwen/Qwen3-32B-fast" }, + models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } } + } + }, + models: { + mode: "merge", + providers: { + nebius: { + baseUrl: "https://api.tokenfactory.nebius.com/v1", + apiKey: "${NEBIUS_API_KEY}", + api: "openai-completions", + models: [ + { + id: "Qwen/Qwen3-32B-fast", + name: "Qwen3 32B Fast", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 8192 + } + ] + } + } + } +} +``` + +Notes: +- Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`. +- Model ref: `nebius/Qwen/Qwen3-32B-fast`. +- Available models include Qwen, DeepSeek, Llama, and GLM families. + ### Synthetic (Anthropic-compatible) Use Synthetic's Anthropic-compatible endpoint: diff --git a/docs/providers/index.md b/docs/providers/index.md index a63a642cc..59696b365 100644 --- a/docs/providers/index.md +++ b/docs/providers/index.md @@ -43,6 +43,7 @@ See [Venice AI](/providers/venice). - [Amazon Bedrock](/bedrock) - [Z.AI](/providers/zai) - [Xiaomi](/providers/xiaomi) +- [Nebius Token Factory](/providers/nebius) - [GLM models](/providers/glm) - [MiniMax](/providers/minimax) - [Venius (Venice AI, privacy-focused)](/providers/venice) diff --git a/docs/providers/nebius.md b/docs/providers/nebius.md new file mode 100644 index 000000000..02d62c109 --- /dev/null +++ b/docs/providers/nebius.md @@ -0,0 +1,65 @@ +--- +summary: "Use Nebius Token Factory with Moltbot" +read_when: + - You want Nebius models in Moltbot + - You need NEBIUS_API_KEY setup +--- +# Nebius Token Factory + +Nebius Token Factory provides OpenAI-compatible inference for frontier and open models. It offers a REST API compatible with OpenAI format and uses API keys for authentication. Create your API key in the [Nebius Token Factory console](https://tokenfactory.nebius.com/). Moltbot uses the `nebius` provider with a Nebius API key. + +## Model overview + +- **Qwen3 32B Fast**: 128000-token context window, fast inference. +- **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference. +- **DeepSeek R1 Fast**: Reasoning model with chain-of-thought. +- **Llama 3.3 70B**: Strong general-purpose model with fast variant. +- **Qwen2.5 VL 72B**: Vision-language model. +- **Qwen2.5 Coder 7B Fast**: Code-specialized model. +- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI. +- Base URL: `https://api.tokenfactory.nebius.com/v1` +- Authorization: `Bearer $NEBIUS_API_KEY` + +## CLI setup + +```bash +moltbot onboard --auth-choice nebius-api-key +# or non-interactive +moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY" +``` + +## Config snippet + +```json5 +{ + env: { NEBIUS_API_KEY: "your-key" }, + agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } }, + models: { + mode: "merge", + providers: { + nebius: { + baseUrl: "https://api.tokenfactory.nebius.com/v1", + api: "openai-completions", + apiKey: "NEBIUS_API_KEY", + models: [ + { + id: "Qwen/Qwen3-32B-fast", + name: "Qwen3 32B Fast", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 8192 + } + ] + } + } + } +} +``` + +## Notes + +- Default model ref: `nebius/Qwen/Qwen3-32B-fast`. +- The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists). +- See [/concepts/model-providers](/concepts/model-providers) for provider rules. \ No newline at end of file diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index f38ad46c7..75a4d3173 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -41,6 +41,17 @@ const XIAOMI_DEFAULT_COST = { cacheWrite: 0, }; +const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1"; +const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast"; +const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000; +const NEBIUS_DEFAULT_MAX_TOKENS = 8192; +const NEBIUS_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + const MOONSHOT_BASE_URL = "https://api.moonshot.ai/v1"; const MOONSHOT_DEFAULT_MODEL_ID = "kimi-k2.5"; const MOONSHOT_DEFAULT_CONTEXT_WINDOW = 256000; @@ -370,6 +381,105 @@ export function buildXiaomiProvider(): ProviderConfig { }; } +export function buildNebiusProvider(): ProviderConfig { + return { + baseUrl: NEBIUS_BASE_URL, + api: "openai-completions", + models: [ + { + id: "Qwen/Qwen3-32B-fast", + name: "Qwen3 32B Fast", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "meta-llama/Meta-Llama-3.1-8B-Instruct-fast", + name: "Llama 3.1 8B Fast", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "meta-llama/Llama-3.3-70B-Instruct", + name: "Llama 3.3 70B", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "meta-llama/Llama-3.3-70B-Instruct-fast", + name: "Llama 3.3 70B Fast", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "deepseek-ai/DeepSeek-V3-0324-fast", + name: "DeepSeek V3 Fast", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "deepseek-ai/DeepSeek-R1-0528-fast", + name: "DeepSeek R1 Fast", + reasoning: true, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "Qwen/Qwen2.5-VL-72B-Instruct", + name: "Qwen2.5 VL 72B", + reasoning: false, + input: ["text", "image"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "Qwen/Qwen2.5-Coder-7B-fast", + name: "Qwen2.5 Coder 7B Fast", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "zai-org/GLM-4.7-FP8", + name: "GLM 4.7 FP8", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "zai-org/GLM-4.5", + name: "GLM 4.5", + reasoning: false, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + ], + }; +} + async function buildVeniceProvider(): Promise { const models = await discoverVeniceModels(); return { @@ -446,6 +556,14 @@ export async function resolveImplicitProviders(params: { providers.xiaomi = { ...buildXiaomiProvider(), apiKey: xiaomiKey }; } + const nebiusKey = + resolveEnvApiKeyVarName("nebius") ?? + resolveApiKeyFromProfiles({ provider: "nebius", store: authStore }); + + if (nebiusKey) { + providers.nebius = { ...buildNebiusProvider(), apiKey: nebiusKey }; + } + // Ollama provider - only add if explicitly configured const ollamaKey = resolveEnvApiKeyVarName("ollama") ?? diff --git a/src/commands/auth-choice-options.ts b/src/commands/auth-choice-options.ts index 5acddf4e3..0e0a1fc43 100644 --- a/src/commands/auth-choice-options.ts +++ b/src/commands/auth-choice-options.ts @@ -17,6 +17,7 @@ export type AuthChoiceGroupId = | "moonshot" | "zai" | "xiaomi" + | "nebius" | "opencode-zen" | "minimax" | "synthetic" @@ -114,6 +115,12 @@ const AUTH_CHOICE_GROUP_DEFS: { hint: "API key", choices: ["xiaomi-api-key"], }, + { + value: "nebius", + label: "Nebius", + hint: "Nebius Token Factory", + choices: ["nebius-api-key"], + }, { value: "opencode-zen", label: "OpenCode Zen", @@ -176,6 +183,11 @@ export function buildAuthChoiceOptions(params: { label: "Xiaomi API key", }); options.push({ value: "qwen-portal", label: "Qwen OAuth" }); + options.push({ + value: "nebius-api-key", + label: "Nebius API key", + hint: "Nebius Token Factory", + }); options.push({ value: "copilot-proxy", label: "Copilot Proxy (local)", diff --git a/src/commands/auth-choice.apply.api-providers.ts b/src/commands/auth-choice.apply.api-providers.ts index fa4fc77e7..24af29c8e 100644 --- a/src/commands/auth-choice.apply.api-providers.ts +++ b/src/commands/auth-choice.apply.api-providers.ts @@ -17,6 +17,8 @@ import { applyKimiCodeProviderConfig, applyMoonshotConfig, applyMoonshotProviderConfig, + applyNebiusConfig, + applyNebiusProviderConfig, applyOpencodeZenConfig, applyOpencodeZenProviderConfig, applyOpenrouterConfig, @@ -32,6 +34,7 @@ import { applyZaiConfig, KIMI_CODE_MODEL_REF, MOONSHOT_DEFAULT_MODEL_REF, + NEBIUS_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF, SYNTHETIC_DEFAULT_MODEL_REF, VENICE_DEFAULT_MODEL_REF, @@ -40,6 +43,7 @@ import { setGeminiApiKey, setKimiCodeApiKey, setMoonshotApiKey, + setNebiusApiKey, setOpencodeZenApiKey, setOpenrouterApiKey, setSyntheticApiKey, @@ -85,6 +89,8 @@ export async function applyAuthChoiceApiProviders( authChoice = "zai-api-key"; } else if (params.opts.tokenProvider === "xiaomi") { authChoice = "xiaomi-api-key"; + } else if (params.opts.tokenProvider === "nebius") { + authChoice = "nebius-api-key"; } else if (params.opts.tokenProvider === "synthetic") { authChoice = "synthetic-api-key"; } else if (params.opts.tokenProvider === "venice") { @@ -485,6 +491,69 @@ export async function applyAuthChoiceApiProviders( return { config: nextConfig, agentModelOverride }; } + if (authChoice === "nebius-api-key") { + let hasCredential = false; + + if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "nebius") { + await setNebiusApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir); + hasCredential = true; + } + + if (!hasCredential) { + await params.prompter.note( + [ + "Nebius provides OpenAI-compatible inference for frontier and open models.", + "Get your API key at: https://tokenfactory.nebius.com/", + "Available models: zai-glm-7, zai-glm-5", + ].join("\n"), + "Nebius", + ); + } + + const envKey = resolveEnvApiKey("nebius"); + if (envKey) { + const useExisting = await params.prompter.confirm({ + message: `Use existing NEBIUS_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`, + initialValue: true, + }); + if (useExisting) { + await setNebiusApiKey(envKey.apiKey, params.agentDir); + hasCredential = true; + } + } + + if (!hasCredential) { + const key = await params.prompter.text({ + message: "Enter Nebius API key", + validate: validateApiKeyInput, + }); + await setNebiusApiKey(normalizeApiKeyInput(String(key)), params.agentDir); + } + + nextConfig = applyAuthProfileConfig(nextConfig, { + profileId: "nebius:default", + provider: "nebius", + mode: "api_key", + }); + + { + const applied = await applyDefaultModelChoice({ + config: nextConfig, + setDefaultModel: params.setDefaultModel, + defaultModel: NEBIUS_DEFAULT_MODEL_REF, + applyDefaultConfig: applyNebiusConfig, + applyProviderConfig: applyNebiusProviderConfig, + noteDefault: NEBIUS_DEFAULT_MODEL_REF, + noteAgentModel, + prompter: params.prompter, + }); + nextConfig = applied.config; + agentModelOverride = applied.agentModelOverride ?? agentModelOverride; + } + + return { config: nextConfig, agentModelOverride }; + } + if (authChoice === "synthetic-api-key") { if (params.opts?.token && params.opts?.tokenProvider === "synthetic") { await setSyntheticApiKey(String(params.opts.token).trim(), params.agentDir); diff --git a/src/commands/onboard-auth.config-core.ts b/src/commands/onboard-auth.config-core.ts index 222f0a5c6..3f794db6f 100644 --- a/src/commands/onboard-auth.config-core.ts +++ b/src/commands/onboard-auth.config-core.ts @@ -1,4 +1,9 @@ -import { buildXiaomiProvider, XIAOMI_DEFAULT_MODEL_ID } from "../agents/models-config.providers.js"; +import { + buildNebiusProvider, + buildXiaomiProvider, + NEBIUS_DEFAULT_MODEL_ID, + XIAOMI_DEFAULT_MODEL_ID, +} from "../agents/models-config.providers.js"; import { buildSyntheticModelDefinition, SYNTHETIC_BASE_URL, @@ -13,6 +18,7 @@ import { } from "../agents/venice-models.js"; import type { MoltbotConfig } from "../config/config.js"; import { + NEBIUS_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF, VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF, XIAOMI_DEFAULT_MODEL_REF, @@ -409,6 +415,77 @@ export function applyXiaomiConfig(cfg: MoltbotConfig): MoltbotConfig { }; } +export function applyNebiusProviderConfig(cfg: MoltbotConfig): MoltbotConfig { + const models = { ...cfg.agents?.defaults?.models }; + models[NEBIUS_DEFAULT_MODEL_REF] = { + ...models[NEBIUS_DEFAULT_MODEL_REF], + alias: models[NEBIUS_DEFAULT_MODEL_REF]?.alias ?? "Qwen3 32B", + }; + + const providers = { ...cfg.models?.providers }; + const existingProvider = providers.nebius; + const defaultProvider = buildNebiusProvider(); + const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : []; + const defaultModels = defaultProvider.models ?? []; + const hasDefaultModel = existingModels.some((model) => model.id === NEBIUS_DEFAULT_MODEL_ID); + const mergedModels = + existingModels.length > 0 + ? hasDefaultModel + ? existingModels + : [...existingModels, ...defaultModels] + : defaultModels; + const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record< + string, + unknown + > as { apiKey?: string }; + const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined; + const normalizedApiKey = resolvedApiKey?.trim(); + providers.nebius = { + ...existingProviderRest, + baseUrl: defaultProvider.baseUrl, + api: defaultProvider.api, + ...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}), + models: mergedModels.length > 0 ? mergedModels : defaultProvider.models, + }; + + return { + ...cfg, + agents: { + ...cfg.agents, + defaults: { + ...cfg.agents?.defaults, + models, + }, + }, + models: { + mode: cfg.models?.mode ?? "merge", + providers, + }, + }; +} + +export function applyNebiusConfig(cfg: MoltbotConfig): MoltbotConfig { + const next = applyNebiusProviderConfig(cfg); + const existingModel = next.agents?.defaults?.model; + return { + ...next, + agents: { + ...next.agents, + defaults: { + ...next.agents?.defaults, + model: { + ...(existingModel && "fallbacks" in (existingModel as Record) + ? { + fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks, + } + : undefined), + primary: NEBIUS_DEFAULT_MODEL_REF, + }, + }, + }, + }; +} + /** * Apply Venice provider configuration without changing the default model. * Registers Venice models and sets up the provider, but preserves existing model selection. diff --git a/src/commands/onboard-auth.credentials.ts b/src/commands/onboard-auth.credentials.ts index 053026162..a6692d316 100644 --- a/src/commands/onboard-auth.credentials.ts +++ b/src/commands/onboard-auth.credentials.ts @@ -116,6 +116,20 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7"; export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash"; export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto"; export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5"; +export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast"; + +export async function setNebiusApiKey(key: string, agentDir?: string) { + // Write to resolved agent dir so gateway finds credentials on startup. + upsertAuthProfile({ + profileId: "nebius:default", + credential: { + type: "api_key", + provider: "nebius", + key, + }, + agentDir: resolveAuthAgentDir(agentDir), + }); +} export async function setZaiApiKey(key: string, agentDir?: string) { // Write to resolved agent dir so gateway finds credentials on startup. diff --git a/src/commands/onboard-auth.ts b/src/commands/onboard-auth.ts index 612b24865..b4c193ee7 100644 --- a/src/commands/onboard-auth.ts +++ b/src/commands/onboard-auth.ts @@ -9,6 +9,8 @@ export { applyKimiCodeProviderConfig, applyMoonshotConfig, applyMoonshotProviderConfig, + applyNebiusConfig, + applyNebiusProviderConfig, applyOpenrouterConfig, applyOpenrouterProviderConfig, applySyntheticConfig, @@ -35,12 +37,14 @@ export { applyOpencodeZenProviderConfig, } from "./onboard-auth.config-opencode.js"; export { + NEBIUS_DEFAULT_MODEL_REF, OPENROUTER_DEFAULT_MODEL_REF, setAnthropicApiKey, setGeminiApiKey, setKimiCodeApiKey, setMinimaxApiKey, setMoonshotApiKey, + setNebiusApiKey, setOpencodeZenApiKey, setOpenrouterApiKey, setSyntheticApiKey,