From 8fc1bb784c6d46123a44f9c39e853b9dddc30387 Mon Sep 17 00:00:00 2001
From: kiranchilledout <citadelforsomerefuge@gmail.com>
Date: Fri, 30 Jan 2026 08:08:25 +0530
Subject: [PATCH] added kimi and deepseek v3.2 models

---
 docs/concepts/model-providers.md         |  8 ++--
 docs/gateway/configuration.md            | 10 ++---
 docs/providers/nebius.md                 | 11 +++---
 src/agents/models-config.providers.ts    | 48 ++++++++++++++++--------
 src/commands/onboard-auth.credentials.ts |  2 +-
 5 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md
index 23b002a08..69f889b2e 100644
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@@ -245,13 +245,13 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
 
 - Provider: `nebius`
 - Auth: `NEBIUS_API_KEY`
-- Example model: `nebius/Qwen/Qwen3-32B-fast`
+- Example model: `nebius/zai-org/GLM-4.7-FP8` (default)
 - CLI: `moltbot onboard --auth-choice nebius-api-key`
 
 ```json5
 {
   agents: {
-    defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } }
+    defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } }
   },
   models: {
     mode: "merge",
@@ -262,8 +262,8 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
         api: "openai-completions",
         models: [
           {
-            id: "Qwen/Qwen3-32B-fast",
-            name: "Qwen3 32B Fast",
+            id: "zai-org/GLM-4.7-FP8",
+            name: "GLM 4.7 FP8",
             reasoning: false,
             input: ["text"],
             cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md
index e449493c0..ac42d3c3c 100644
--- a/docs/gateway/configuration.md
+++ b/docs/gateway/configuration.md
@@ -2481,8 +2481,8 @@ Use Nebius's OpenAI-compatible endpoint:
   env: { NEBIUS_API_KEY: "sk-..." },
   agents: {
     defaults: {
-      model: { primary: "nebius/Qwen/Qwen3-32B-fast" },
-      models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } }
+      model: { primary: "nebius/zai-org/GLM-4.7-FP8" },
+      models: { "nebius/zai-org/GLM-4.7-FP8": { alias: "GLM 4.7" } }
     }
   },
   models: {
@@ -2494,8 +2494,8 @@ Use Nebius's OpenAI-compatible endpoint:
         api: "openai-completions",
         models: [
           {
-            id: "Qwen/Qwen3-32B-fast",
-            name: "Qwen3 32B Fast",
+            id: "zai-org/GLM-4.7-FP8",
+            name: "GLM 4.7 FP8",
             reasoning: false,
             input: ["text"],
             cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -2511,7 +2511,7 @@ Use Nebius's OpenAI-compatible endpoint:
 
 Notes:
 - Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
-- Model ref: `nebius/Qwen/Qwen3-32B-fast`.
+- Model ref: `nebius/zai-org/GLM-4.7-FP8` (default).
 - Available models include Qwen, DeepSeek, Llama, and GLM families.
 
 ### Synthetic (Anthropic-compatible)
diff --git a/docs/providers/nebius.md b/docs/providers/nebius.md
index 02d62c109..161dc4a39 100644
--- a/docs/providers/nebius.md
+++ b/docs/providers/nebius.md
@@ -10,13 +10,14 @@ Nebius Token Factory provides OpenAI-compatible inference for frontier and open
 
 ## Model overview
 
+- **GLM 4.7 FP8** (default): GLM model from Z.AI, 128000-token context window.
 - **Qwen3 32B Fast**: 128000-token context window, fast inference.
 - **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
 - **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
 - **Llama 3.3 70B**: Strong general-purpose model with fast variant.
 - **Qwen2.5 VL 72B**: Vision-language model.
 - **Qwen2.5 Coder 7B Fast**: Code-specialized model.
-- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI.
+- **GLM 4.5**: Additional GLM model from Z.AI.
 - Base URL: `https://api.tokenfactory.nebius.com/v1`
 - Authorization: `Bearer $NEBIUS_API_KEY`
 
@@ -33,7 +34,7 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
 ```json5
 {
   env: { NEBIUS_API_KEY: "your-key" },
-  agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } },
+  agents: { defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } },
   models: {
     mode: "merge",
     providers: {
@@ -43,8 +44,8 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
         apiKey: "NEBIUS_API_KEY",
         models: [
           {
-            id: "Qwen/Qwen3-32B-fast",
-            name: "Qwen3 32B Fast",
+            id: "zai-org/GLM-4.7-FP8",
+            name: "GLM 4.7 FP8",
             reasoning: false,
             input: ["text"],
             cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -60,6 +61,6 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
 
 ## Notes
 
-- Default model ref: `nebius/Qwen/Qwen3-32B-fast`.
+- Default model ref: `nebius/zai-org/GLM-4.7-FP8`.
 - The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
 - See [/concepts/model-providers](/concepts/model-providers) for provider rules.
\ No newline at end of file
diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts
index 426ae73a3..badf10e11 100644
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@@ -42,8 +42,8 @@ const XIAOMI_DEFAULT_COST = {
 };
 
 const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
-export const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast";
-const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000;
+export const NEBIUS_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-FP8";
+const NEBIUS_DEFAULT_CONTEXT_WINDOW = 202752;
 const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
 const NEBIUS_DEFAULT_COST = {
   input: 0,
@@ -389,10 +389,10 @@ export function buildNebiusProvider(): ProviderConfig {
       {
         id: "Qwen/Qwen3-32B-fast",
         name: "Qwen3 32B Fast",
-        reasoning: false,
+        reasoning: true,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 40960,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -401,7 +401,7 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: false,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -410,7 +410,7 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: false,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -419,7 +419,7 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: false,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -428,7 +428,7 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: false,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32768,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -437,7 +437,7 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: true,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32768,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -446,7 +446,7 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: false,
         input: ["text", "image"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32000,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
@@ -455,25 +455,43 @@ export function buildNebiusProvider(): ProviderConfig {
         reasoning: false,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 32768,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
         id: "zai-org/GLM-4.7-FP8",
         name: "GLM 4.7 FP8",
-        reasoning: false,
+        reasoning: true,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 202752,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
       {
         id: "zai-org/GLM-4.5",
         name: "GLM 4.5",
-        reasoning: false,
+        reasoning: true,
         input: ["text"],
         cost: NEBIUS_DEFAULT_COST,
-        contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
+        contextWindow: 131072,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "deepseek-ai/DeepSeek-V3.2",
+        name: "DeepSeek V3.2",
+        reasoning: true,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: 163000,
+        maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "moonshotai/Kimi-K2-Thinking",
+        name: "Kimi K2 Thinking",
+        reasoning: true,
+        input: ["text"],
+        cost: NEBIUS_DEFAULT_COST,
+        contextWindow: 262144,
         maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
       },
     ],
diff --git a/src/commands/onboard-auth.credentials.ts b/src/commands/onboard-auth.credentials.ts
index a6692d316..314e7ea76 100644
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@@ -116,7 +116,7 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
 export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
 export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
 export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
-export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast";
+export const NEBIUS_DEFAULT_MODEL_REF = "nebius/zai-org/GLM-4.7-FP8";
 
 export async function setNebiusApiKey(key: string, agentDir?: string) {
   // Write to resolved agent dir so gateway finds credentials on startup.