From 8fc1bb784c6d46123a44f9c39e853b9dddc30387 Mon Sep 17 00:00:00 2001 From: kiranchilledout Date: Fri, 30 Jan 2026 08:08:25 +0530 Subject: [PATCH] added kimi and deepseek v3.2 models --- docs/concepts/model-providers.md | 8 ++-- docs/gateway/configuration.md | 10 ++--- docs/providers/nebius.md | 11 +++--- src/agents/models-config.providers.ts | 48 ++++++++++++++++-------- src/commands/onboard-auth.credentials.ts | 2 +- 5 files changed, 49 insertions(+), 30 deletions(-) diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 23b002a08..69f889b2e 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -245,13 +245,13 @@ Nebius provides OpenAI-compatible inference for frontier and open models: - Provider: `nebius` - Auth: `NEBIUS_API_KEY` -- Example model: `nebius/Qwen/Qwen3-32B-fast` +- Example model: `nebius/zai-org/GLM-4.7-FP8` (default) - CLI: `moltbot onboard --auth-choice nebius-api-key` ```json5 { agents: { - defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } + defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } }, models: { mode: "merge", @@ -262,8 +262,8 @@ Nebius provides OpenAI-compatible inference for frontier and open models: api: "openai-completions", models: [ { - id: "Qwen/Qwen3-32B-fast", - name: "Qwen3 32B Fast", + id: "zai-org/GLM-4.7-FP8", + name: "GLM 4.7 FP8", reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index e449493c0..ac42d3c3c 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -2481,8 +2481,8 @@ Use Nebius's OpenAI-compatible endpoint: env: { NEBIUS_API_KEY: "sk-..." }, agents: { defaults: { - model: { primary: "nebius/Qwen/Qwen3-32B-fast" }, - models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } } + model: { primary: "nebius/zai-org/GLM-4.7-FP8" }, + models: { "nebius/zai-org/GLM-4.7-FP8": { alias: "GLM 4.7" } } } }, models: { @@ -2494,8 +2494,8 @@ Use Nebius's OpenAI-compatible endpoint: api: "openai-completions", models: [ { - id: "Qwen/Qwen3-32B-fast", - name: "Qwen3 32B Fast", + id: "zai-org/GLM-4.7-FP8", + name: "GLM 4.7 FP8", reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, @@ -2511,7 +2511,7 @@ Use Nebius's OpenAI-compatible endpoint: Notes: - Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`. -- Model ref: `nebius/Qwen/Qwen3-32B-fast`. +- Model ref: `nebius/zai-org/GLM-4.7-FP8` (default). - Available models include Qwen, DeepSeek, Llama, and GLM families. ### Synthetic (Anthropic-compatible) diff --git a/docs/providers/nebius.md b/docs/providers/nebius.md index 02d62c109..161dc4a39 100644 --- a/docs/providers/nebius.md +++ b/docs/providers/nebius.md @@ -10,13 +10,14 @@ Nebius Token Factory provides OpenAI-compatible inference for frontier and open ## Model overview +- **GLM 4.7 FP8** (default): GLM model from Z.AI, 128000-token context window. - **Qwen3 32B Fast**: 128000-token context window, fast inference. - **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference. - **DeepSeek R1 Fast**: Reasoning model with chain-of-thought. - **Llama 3.3 70B**: Strong general-purpose model with fast variant. - **Qwen2.5 VL 72B**: Vision-language model. - **Qwen2.5 Coder 7B Fast**: Code-specialized model. -- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI. +- **GLM 4.5**: Additional GLM model from Z.AI. - Base URL: `https://api.tokenfactory.nebius.com/v1` - Authorization: `Bearer $NEBIUS_API_KEY` @@ -33,7 +34,7 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY" ```json5 { env: { NEBIUS_API_KEY: "your-key" }, - agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } }, + agents: { defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } }, models: { mode: "merge", providers: { @@ -43,8 +44,8 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY" apiKey: "NEBIUS_API_KEY", models: [ { - id: "Qwen/Qwen3-32B-fast", - name: "Qwen3 32B Fast", + id: "zai-org/GLM-4.7-FP8", + name: "GLM 4.7 FP8", reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, @@ -60,6 +61,6 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY" ## Notes -- Default model ref: `nebius/Qwen/Qwen3-32B-fast`. +- Default model ref: `nebius/zai-org/GLM-4.7-FP8`. - The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists). - See [/concepts/model-providers](/concepts/model-providers) for provider rules. \ No newline at end of file diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index 426ae73a3..badf10e11 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -42,8 +42,8 @@ const XIAOMI_DEFAULT_COST = { }; const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1"; -export const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast"; -const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000; +export const NEBIUS_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-FP8"; +const NEBIUS_DEFAULT_CONTEXT_WINDOW = 202752; const NEBIUS_DEFAULT_MAX_TOKENS = 8192; const NEBIUS_DEFAULT_COST = { input: 0, @@ -389,10 +389,10 @@ export function buildNebiusProvider(): ProviderConfig { { id: "Qwen/Qwen3-32B-fast", name: "Qwen3 32B Fast", - reasoning: false, + reasoning: true, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 40960, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -401,7 +401,7 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: false, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 131072, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -410,7 +410,7 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: false, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 131072, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -419,7 +419,7 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: false, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 131072, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -428,7 +428,7 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: false, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 32768, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -437,7 +437,7 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: true, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 32768, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -446,7 +446,7 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: false, input: ["text", "image"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 32000, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { @@ -455,25 +455,43 @@ export function buildNebiusProvider(): ProviderConfig { reasoning: false, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 32768, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { id: "zai-org/GLM-4.7-FP8", name: "GLM 4.7 FP8", - reasoning: false, + reasoning: true, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 202752, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, { id: "zai-org/GLM-4.5", name: "GLM 4.5", - reasoning: false, + reasoning: true, input: ["text"], cost: NEBIUS_DEFAULT_COST, - contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW, + contextWindow: 131072, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "deepseek-ai/DeepSeek-V3.2", + name: "DeepSeek V3.2", + reasoning: true, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: 163000, + maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, + }, + { + id: "moonshotai/Kimi-K2-Thinking", + name: "Kimi K2 Thinking", + reasoning: true, + input: ["text"], + cost: NEBIUS_DEFAULT_COST, + contextWindow: 262144, maxTokens: NEBIUS_DEFAULT_MAX_TOKENS, }, ], diff --git a/src/commands/onboard-auth.credentials.ts b/src/commands/onboard-auth.credentials.ts index a6692d316..314e7ea76 100644 --- a/src/commands/onboard-auth.credentials.ts +++ b/src/commands/onboard-auth.credentials.ts @@ -116,7 +116,7 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7"; export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash"; export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto"; export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5"; -export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast"; +export const NEBIUS_DEFAULT_MODEL_REF = "nebius/zai-org/GLM-4.7-FP8"; export async function setNebiusApiKey(key: string, agentDir?: string) { // Write to resolved agent dir so gateway finds credentials on startup.