feat: add Nebius Token Factory provider

2026-01-30 07:04:51 +05:30 · 2026-01-30 07:04:51 +05:30 · 84a1994890
commit 84a1994890
parent 4583f88626
10 changed files with 443 additions and 1 deletions
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@ -239,6 +239,46 @@ MiniMax is configured via `models.providers` because it uses custom endpoints:

 See [/providers/minimax](/providers/minimax) for setup details, model options, and config snippets.

+### Nebius Token Factory
+
+Nebius provides OpenAI-compatible inference for frontier and open models:
+
+- Provider: `nebius`
+- Auth: `NEBIUS_API_KEY`
+- Example model: `nebius/Qwen/Qwen3-32B-fast`
+- CLI: `moltbot onboard --auth-choice nebius-api-key`
+
+```json5
+{
+  agents: {
+    defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } }
+  },
+  models: {
+    mode: "merge",
+    providers: {
+      nebius: {
+        baseUrl: "https://api.tokenfactory.nebius.com/v1",
+        apiKey: "${NEBIUS_API_KEY}",
+        api: "openai-completions",
+        models: [
+          {
+            id: "Qwen/Qwen3-32B-fast",
+            name: "Qwen3 32B Fast",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 128000,
+            maxTokens: 8192
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+See [/providers/nebius](/providers/nebius) for setup details and available models.
+
 ### Ollama

 Ollama is a local LLM runtime that provides an OpenAI-compatible API:
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@ -2472,6 +2472,48 @@ Notes:
 - Set `KIMICODE_API_KEY` in the environment or use `moltbot onboard --auth-choice kimi-code-api-key`.
 - Model ref: `kimi-code/kimi-for-coding`.

+### Nebius Token Factory
+
+Use Nebius's OpenAI-compatible endpoint:
+
+```json5
+{
+  env: { NEBIUS_API_KEY: "sk-..." },
+  agents: {
+    defaults: {
+      model: { primary: "nebius/Qwen/Qwen3-32B-fast" },
+      models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } }
+    }
+  },
+  models: {
+    mode: "merge",
+    providers: {
+      nebius: {
+        baseUrl: "https://api.tokenfactory.nebius.com/v1",
+        apiKey: "${NEBIUS_API_KEY}",
+        api: "openai-completions",
+        models: [
+          {
+            id: "Qwen/Qwen3-32B-fast",
+            name: "Qwen3 32B Fast",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 128000,
+            maxTokens: 8192
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+Notes:
+- Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
+- Model ref: `nebius/Qwen/Qwen3-32B-fast`.
+- Available models include Qwen, DeepSeek, Llama, and GLM families.
+
 ### Synthetic (Anthropic-compatible)

 Use Synthetic's Anthropic-compatible endpoint:
--- a/docs/providers/index.md
+++ b/docs/providers/index.md
@ -43,6 +43,7 @@ See [Venice AI](/providers/venice).
 - [Amazon Bedrock](/bedrock)
 - [Z.AI](/providers/zai)
 - [Xiaomi](/providers/xiaomi)
+- [Nebius Token Factory](/providers/nebius)
 - [GLM models](/providers/glm)
 - [MiniMax](/providers/minimax)
 - [Venius (Venice AI, privacy-focused)](/providers/venice)
--- a/docs/providers/nebius.md
+++ b/docs/providers/nebius.md
@ -0,0 +1,65 @@
+---
+summary: "Use Nebius Token Factory with Moltbot"
+read_when:
+  - You want Nebius models in Moltbot
+  - You need NEBIUS_API_KEY setup
+---
+# Nebius Token Factory
+
+Nebius Token Factory provides OpenAI-compatible inference for frontier and open models. It offers a REST API compatible with OpenAI format and uses API keys for authentication. Create your API key in the [Nebius Token Factory console](https://tokenfactory.nebius.com/). Moltbot uses the `nebius` provider with a Nebius API key.
+
+## Model overview
+
+- **Qwen3 32B Fast**: 128000-token context window, fast inference.
+- **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
+- **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
+- **Llama 3.3 70B**: Strong general-purpose model with fast variant.
+- **Qwen2.5 VL 72B**: Vision-language model.
+- **Qwen2.5 Coder 7B Fast**: Code-specialized model.
+- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI.
+- Base URL: `https://api.tokenfactory.nebius.com/v1`
+- Authorization: `Bearer $NEBIUS_API_KEY`
+
+## CLI setup
+
+```bash
+moltbot onboard --auth-choice nebius-api-key
+# or non-interactive
+moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
+```
+
+## Config snippet
+
+```json5
+{
+  env: { NEBIUS_API_KEY: "your-key" },
+  agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } },
+  models: {
+    mode: "merge",
+    providers: {
+      nebius: {
+        baseUrl: "https://api.tokenfactory.nebius.com/v1",
+        api: "openai-completions",
+        apiKey: "NEBIUS_API_KEY",
+        models: [
+          {
+            id: "Qwen/Qwen3-32B-fast",
+            name: "Qwen3 32B Fast",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 128000,
+            maxTokens: 8192
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+## Notes
+
+- Default model ref: `nebius/Qwen/Qwen3-32B-fast`.
+- The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
+- See [/concepts/model-providers](/concepts/model-providers) for provider rules.
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@ -41,6 +41,17 @@ const XIAOMI_DEFAULT_COST = {
  cacheWrite: 0,
 };

+const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
+const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast";
+const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000;
+const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
+const NEBIUS_DEFAULT_COST = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+};
+
 const MOONSHOT_BASE_URL = "https://api.moonshot.ai/v1";
 const MOONSHOT_DEFAULT_MODEL_ID = "kimi-k2.5";
 const MOONSHOT_DEFAULT_CONTEXT_WINDOW = 256000;
@ -370,6 +381,105 @@ export function buildXiaomiProvider(): ProviderConfig {
  };
 }

+export function buildNebiusProvider(): ProviderConfig {
+  return {
+    baseUrl: NEBIUS_BASE_URL,
+    api: "openai-completions",
+    models: [
+      {
+        id: "Qwen/Qwen3-32B-fast",
+        name: "Qwen3 32B Fast",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
+        name: "Llama 3.1 8B Fast",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "meta-llama/Llama-3.3-70B-Instruct",
+        name: "Llama 3.3 70B",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "meta-llama/Llama-3.3-70B-Instruct-fast",
+        name: "Llama 3.3 70B Fast",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "deepseek-ai/DeepSeek-V3-0324-fast",
+        name: "DeepSeek V3 Fast",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "deepseek-ai/DeepSeek-R1-0528-fast",
+        name: "DeepSeek R1 Fast",
+        reasoning: true,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "Qwen/Qwen2.5-VL-72B-Instruct",
+        name: "Qwen2.5 VL 72B",
+        reasoning: false,
+        input: ["text", "image"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "Qwen/Qwen2.5-Coder-7B-fast",
+        name: "Qwen2.5 Coder 7B Fast",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "zai-org/GLM-4.7-FP8",
+        name: "GLM 4.7 FP8",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "zai-org/GLM-4.5",
+        name: "GLM 4.5",
+        reasoning: false,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+    ],
+  };
+}
+
 async function buildVeniceProvider(): Promise<ProviderConfig> {
  const models = await discoverVeniceModels();
  return {
@ -446,6 +556,14 @@ export async function resolveImplicitProviders(params: {
    providers.xiaomi = { ...buildXiaomiProvider(), apiKey: xiaomiKey };
  }

+  const nebiusKey =
+    resolveEnvApiKeyVarName("nebius") ??
+    resolveApiKeyFromProfiles({ provider: "nebius", store: authStore });
+
+  if (nebiusKey) {
+    providers.nebius = { ...buildNebiusProvider(), apiKey: nebiusKey };
+  }
+
  // Ollama provider - only add if explicitly configured
  const ollamaKey =
    resolveEnvApiKeyVarName("ollama") ??
--- a/src/commands/auth-choice-options.ts
+++ b/src/commands/auth-choice-options.ts
@ -17,6 +17,7 @@ export type AuthChoiceGroupId =
  | "moonshot"
  | "zai"
  | "xiaomi"
+  | "nebius"
  | "opencode-zen"
  | "minimax"
  | "synthetic"
@ -114,6 +115,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
    hint: "API key",
    choices: ["xiaomi-api-key"],
  },
+  {
+    value: "nebius",
+    label: "Nebius",
+    hint: "Nebius Token Factory",
+    choices: ["nebius-api-key"],
+  },
  {
    value: "opencode-zen",
    label: "OpenCode Zen",
@ -176,6 +183,11 @@ export function buildAuthChoiceOptions(params: {
    label: "Xiaomi API key",
  });
  options.push({ value: "qwen-portal", label: "Qwen OAuth" });
+  options.push({
+    value: "nebius-api-key",
+    label: "Nebius API key",
+    hint: "Nebius Token Factory",
+  });
  options.push({
    value: "copilot-proxy",
    label: "Copilot Proxy (local)",
--- a/src/commands/auth-choice.apply.api-providers.ts
+++ b/src/commands/auth-choice.apply.api-providers.ts
@ -17,6 +17,8 @@ import {
  applyKimiCodeProviderConfig,
  applyMoonshotConfig,
  applyMoonshotProviderConfig,
+  applyNebiusConfig,
+  applyNebiusProviderConfig,
  applyOpencodeZenConfig,
  applyOpencodeZenProviderConfig,
  applyOpenrouterConfig,
@ -32,6 +34,7 @@ import {
  applyZaiConfig,
  KIMI_CODE_MODEL_REF,
  MOONSHOT_DEFAULT_MODEL_REF,
+  NEBIUS_DEFAULT_MODEL_REF,
  OPENROUTER_DEFAULT_MODEL_REF,
  SYNTHETIC_DEFAULT_MODEL_REF,
  VENICE_DEFAULT_MODEL_REF,
@ -40,6 +43,7 @@ import {
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMoonshotApiKey,
+  setNebiusApiKey,
  setOpencodeZenApiKey,
  setOpenrouterApiKey,
  setSyntheticApiKey,
@ -85,6 +89,8 @@ export async function applyAuthChoiceApiProviders(
      authChoice = "zai-api-key";
    } else if (params.opts.tokenProvider === "xiaomi") {
      authChoice = "xiaomi-api-key";
+    } else if (params.opts.tokenProvider === "nebius") {
+      authChoice = "nebius-api-key";
    } else if (params.opts.tokenProvider === "synthetic") {
      authChoice = "synthetic-api-key";
    } else if (params.opts.tokenProvider === "venice") {
@ -485,6 +491,69 @@ export async function applyAuthChoiceApiProviders(
    return { config: nextConfig, agentModelOverride };
  }

+  if (authChoice === "nebius-api-key") {
+    let hasCredential = false;
+
+    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "nebius") {
+      await setNebiusApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir);
+      hasCredential = true;
+    }
+
+    if (!hasCredential) {
+      await params.prompter.note(
+        [
+          "Nebius provides OpenAI-compatible inference for frontier and open models.",
+          "Get your API key at: https://tokenfactory.nebius.com/",
+          "Available models: zai-glm-7, zai-glm-5",
+        ].join("\n"),
+        "Nebius",
+      );
+    }
+
+    const envKey = resolveEnvApiKey("nebius");
+    if (envKey) {
+      const useExisting = await params.prompter.confirm({
+        message: `Use existing NEBIUS_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
+        initialValue: true,
+      });
+      if (useExisting) {
+        await setNebiusApiKey(envKey.apiKey, params.agentDir);
+        hasCredential = true;
+      }
+    }
+
+    if (!hasCredential) {
+      const key = await params.prompter.text({
+        message: "Enter Nebius API key",
+        validate: validateApiKeyInput,
+      });
+      await setNebiusApiKey(normalizeApiKeyInput(String(key)), params.agentDir);
+    }
+
+    nextConfig = applyAuthProfileConfig(nextConfig, {
+      profileId: "nebius:default",
+      provider: "nebius",
+      mode: "api_key",
+    });
+
+    {
+      const applied = await applyDefaultModelChoice({
+        config: nextConfig,
+        setDefaultModel: params.setDefaultModel,
+        defaultModel: NEBIUS_DEFAULT_MODEL_REF,
+        applyDefaultConfig: applyNebiusConfig,
+        applyProviderConfig: applyNebiusProviderConfig,
+        noteDefault: NEBIUS_DEFAULT_MODEL_REF,
+        noteAgentModel,
+        prompter: params.prompter,
+      });
+      nextConfig = applied.config;
+      agentModelOverride = applied.agentModelOverride ?? agentModelOverride;
+    }
+
+    return { config: nextConfig, agentModelOverride };
+  }
+
  if (authChoice === "synthetic-api-key") {
    if (params.opts?.token && params.opts?.tokenProvider === "synthetic") {
      await setSyntheticApiKey(String(params.opts.token).trim(), params.agentDir);
--- a/src/commands/onboard-auth.config-core.ts
+++ b/src/commands/onboard-auth.config-core.ts
@ -1,4 +1,9 @@
-import { buildXiaomiProvider, XIAOMI_DEFAULT_MODEL_ID } from "../agents/models-config.providers.js";
+import {
+  buildNebiusProvider,
+  buildXiaomiProvider,
+  NEBIUS_DEFAULT_MODEL_ID,
+  XIAOMI_DEFAULT_MODEL_ID,
+} from "../agents/models-config.providers.js";
 import {
  buildSyntheticModelDefinition,
  SYNTHETIC_BASE_URL,
@ -13,6 +18,7 @@ import {
 } from "../agents/venice-models.js";
 import type { MoltbotConfig } from "../config/config.js";
 import {
+  NEBIUS_DEFAULT_MODEL_REF,
  OPENROUTER_DEFAULT_MODEL_REF,
  VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF,
  XIAOMI_DEFAULT_MODEL_REF,
@ -409,6 +415,77 @@ export function applyXiaomiConfig(cfg: MoltbotConfig): MoltbotConfig {
  };
 }

+export function applyNebiusProviderConfig(cfg: MoltbotConfig): MoltbotConfig {
+  const models = { ...cfg.agents?.defaults?.models };
+  models[NEBIUS_DEFAULT_MODEL_REF] = {
+    ...models[NEBIUS_DEFAULT_MODEL_REF],
+    alias: models[NEBIUS_DEFAULT_MODEL_REF]?.alias ?? "Qwen3 32B",
+  };
+
+  const providers = { ...cfg.models?.providers };
+  const existingProvider = providers.nebius;
+  const defaultProvider = buildNebiusProvider();
+  const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
+  const defaultModels = defaultProvider.models ?? [];
+  const hasDefaultModel = existingModels.some((model) => model.id === NEBIUS_DEFAULT_MODEL_ID);
+  const mergedModels =
+    existingModels.length > 0
+      ? hasDefaultModel
+        ? existingModels
+        : [...existingModels, ...defaultModels]
+      : defaultModels;
+  const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record<
+    string,
+    unknown
+  > as { apiKey?: string };
+  const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined;
+  const normalizedApiKey = resolvedApiKey?.trim();
+  providers.nebius = {
+    ...existingProviderRest,
+    baseUrl: defaultProvider.baseUrl,
+    api: defaultProvider.api,
+    ...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}),
+    models: mergedModels.length > 0 ? mergedModels : defaultProvider.models,
+  };
+
+  return {
+    ...cfg,
+    agents: {
+      ...cfg.agents,
+      defaults: {
+        ...cfg.agents?.defaults,
+        models,
+      },
+    },
+    models: {
+      mode: cfg.models?.mode ?? "merge",
+      providers,
+    },
+  };
+}
+
+export function applyNebiusConfig(cfg: MoltbotConfig): MoltbotConfig {
+  const next = applyNebiusProviderConfig(cfg);
+  const existingModel = next.agents?.defaults?.model;
+  return {
+    ...next,
+    agents: {
+      ...next.agents,
+      defaults: {
+        ...next.agents?.defaults,
+        model: {
+          ...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
+            ? {
+                fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
+              }
+            : undefined),
+          primary: NEBIUS_DEFAULT_MODEL_REF,
+        },
+      },
+    },
+  };
+}
+
 /**
 * Apply Venice provider configuration without changing the default model.
 * Registers Venice models and sets up the provider, but preserves existing model selection.
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@ -116,6 +116,20 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
 export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
 export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
 export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
+export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast";
+
+export async function setNebiusApiKey(key: string, agentDir?: string) {
+  // Write to resolved agent dir so gateway finds credentials on startup.
+  upsertAuthProfile({
+    profileId: "nebius:default",
+    credential: {
+      type: "api_key",
+      provider: "nebius",
+      key,
+    },
+    agentDir: resolveAuthAgentDir(agentDir),
+  });
+}

 export async function setZaiApiKey(key: string, agentDir?: string) {
  // Write to resolved agent dir so gateway finds credentials on startup.
--- a/src/commands/onboard-auth.ts
+++ b/src/commands/onboard-auth.ts
@ -9,6 +9,8 @@ export {
  applyKimiCodeProviderConfig,
  applyMoonshotConfig,
  applyMoonshotProviderConfig,
+  applyNebiusConfig,
+  applyNebiusProviderConfig,
  applyOpenrouterConfig,
  applyOpenrouterProviderConfig,
  applySyntheticConfig,
@ -35,12 +37,14 @@ export {
  applyOpencodeZenProviderConfig,
 } from "./onboard-auth.config-opencode.js";
 export {
+  NEBIUS_DEFAULT_MODEL_REF,
  OPENROUTER_DEFAULT_MODEL_REF,
  setAnthropicApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMinimaxApiKey,
  setMoonshotApiKey,
+  setNebiusApiKey,
  setOpencodeZenApiKey,
  setOpenrouterApiKey,
  setSyntheticApiKey,