added kimi and deepseek v3.2 models

2026-01-30 08:08:25 +05:30 · 2026-01-30 08:08:25 +05:30 · 8fc1bb784c
commit 8fc1bb784c
parent 8da5d9fb3d
5 changed files with 49 additions and 30 deletions
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@ -245,13 +245,13 @@ Nebius provides OpenAI-compatible inference for frontier and open models:

 - Provider: `nebius`
 - Auth: `NEBIUS_API_KEY`
- Example model: `nebius/Qwen/Qwen3-32B-fast`
+- Example model: `nebius/zai-org/GLM-4.7-FP8` (default)
 - CLI: `moltbot onboard --auth-choice nebius-api-key`

 ```json5
 {
  agents: {
-    defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } }
+    defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } }
  },
  models: {
    mode: "merge",
@ -262,8 +262,8 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
        api: "openai-completions",
        models: [
          {
-            id: "Qwen/Qwen3-32B-fast",
-            name: "Qwen3 32B Fast",
+            id: "zai-org/GLM-4.7-FP8",
+            name: "GLM 4.7 FP8",
            reasoning: false,
            input: ["text"],
            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@ -2481,8 +2481,8 @@ Use Nebius's OpenAI-compatible endpoint:
  env: { NEBIUS_API_KEY: "sk-..." },
  agents: {
    defaults: {
-      model: { primary: "nebius/Qwen/Qwen3-32B-fast" },
-      models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } }
+      model: { primary: "nebius/zai-org/GLM-4.7-FP8" },
+      models: { "nebius/zai-org/GLM-4.7-FP8": { alias: "GLM 4.7" } }
    }
  },
  models: {
@ -2494,8 +2494,8 @@ Use Nebius's OpenAI-compatible endpoint:
        api: "openai-completions",
        models: [
          {
-            id: "Qwen/Qwen3-32B-fast",
-            name: "Qwen3 32B Fast",
+            id: "zai-org/GLM-4.7-FP8",
+            name: "GLM 4.7 FP8",
            reasoning: false,
            input: ["text"],
            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@ -2511,7 +2511,7 @@ Use Nebius's OpenAI-compatible endpoint:

 Notes:
 - Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
- Model ref: `nebius/Qwen/Qwen3-32B-fast`.
+- Model ref: `nebius/zai-org/GLM-4.7-FP8` (default).
 - Available models include Qwen, DeepSeek, Llama, and GLM families.

 ### Synthetic (Anthropic-compatible)
--- a/docs/providers/nebius.md
+++ b/docs/providers/nebius.md
@ -10,13 +10,14 @@ Nebius Token Factory provides OpenAI-compatible inference for frontier and open

 ## Model overview

+- **GLM 4.7 FP8** (default): GLM model from Z.AI, 128000-token context window.
 - **Qwen3 32B Fast**: 128000-token context window, fast inference.
 - **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
 - **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
 - **Llama 3.3 70B**: Strong general-purpose model with fast variant.
 - **Qwen2.5 VL 72B**: Vision-language model.
 - **Qwen2.5 Coder 7B Fast**: Code-specialized model.
- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI.
+- **GLM 4.5**: Additional GLM model from Z.AI.
 - Base URL: `https://api.tokenfactory.nebius.com/v1`
 - Authorization: `Bearer $NEBIUS_API_KEY`

@ -33,7 +34,7 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
 ```json5
 {
  env: { NEBIUS_API_KEY: "your-key" },
-  agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } },
+  agents: { defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } },
  models: {
    mode: "merge",
    providers: {
@ -43,8 +44,8 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
        apiKey: "NEBIUS_API_KEY",
        models: [
          {
-            id: "Qwen/Qwen3-32B-fast",
-            name: "Qwen3 32B Fast",
+            id: "zai-org/GLM-4.7-FP8",
+            name: "GLM 4.7 FP8",
            reasoning: false,
            input: ["text"],
            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@ -60,6 +61,6 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"

 ## Notes

- Default model ref: `nebius/Qwen/Qwen3-32B-fast`.
+- Default model ref: `nebius/zai-org/GLM-4.7-FP8`.
 - The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
 - See [/concepts/model-providers](/concepts/model-providers) for provider rules.
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@ -42,8 +42,8 @@ const XIAOMI_DEFAULT_COST = {
 };

 const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
-export const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast";
-const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000;
+export const NEBIUS_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-FP8";
+const NEBIUS_DEFAULT_CONTEXT_WINDOW = 202752;
 const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
 const NEBIUS_DEFAULT_COST = {
  input: 0,
@ -389,10 +389,10 @@ export function buildNebiusProvider(): ProviderConfig {
      {
        id: "Qwen/Qwen3-32B-fast",
        name: "Qwen3 32B Fast",
-        reasoning: false,
+        reasoning: true,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 40960,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -401,7 +401,7 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: false,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -410,7 +410,7 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: false,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -419,7 +419,7 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: false,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -428,7 +428,7 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: false,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32768,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -437,7 +437,7 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: true,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32768,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -446,7 +446,7 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: false,
        input: ["text", "image"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32000,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
@ -455,25 +455,43 @@ export function buildNebiusProvider(): ProviderConfig {
        reasoning: false,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32768,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "zai-org/GLM-4.7-FP8",
        name: "GLM 4.7 FP8",
-        reasoning: false,
+        reasoning: true,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 202752,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
      {
        id: "zai-org/GLM-4.5",
        name: "GLM 4.5",
-        reasoning: false,
+        reasoning: true,
        input: ["text"],
        cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "deepseek-ai/DeepSeek-V3.2",
+        name: "DeepSeek V3.2",
+        reasoning: true,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: 163000,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "moonshotai/Kimi-K2-Thinking",
+        name: "Kimi K2 Thinking",
+        reasoning: true,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: 262144,
        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
      },
    ],
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@ -116,7 +116,7 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
 export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
 export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
 export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
-export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast";
+export const NEBIUS_DEFAULT_MODEL_REF = "nebius/zai-org/GLM-4.7-FP8";

 export async function setNebiusApiKey(key: string, agentDir?: string) {
  // Write to resolved agent dir so gateway finds credentials on startup.