added kimi and deepseek v3.2 models

This commit is contained in:
kiranchilledout 2026-01-30 08:08:25 +05:30
parent 8da5d9fb3d
commit 8fc1bb784c
5 changed files with 49 additions and 30 deletions

View File

@ -245,13 +245,13 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
- Provider: `nebius`
- Auth: `NEBIUS_API_KEY`
- Example model: `nebius/Qwen/Qwen3-32B-fast`
- Example model: `nebius/zai-org/GLM-4.7-FP8` (default)
- CLI: `moltbot onboard --auth-choice nebius-api-key`
```json5
{
agents: {
defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } }
defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } }
},
models: {
mode: "merge",
@ -262,8 +262,8 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
api: "openai-completions",
models: [
{
id: "Qwen/Qwen3-32B-fast",
name: "Qwen3 32B Fast",
id: "zai-org/GLM-4.7-FP8",
name: "GLM 4.7 FP8",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },

View File

@ -2481,8 +2481,8 @@ Use Nebius's OpenAI-compatible endpoint:
env: { NEBIUS_API_KEY: "sk-..." },
agents: {
defaults: {
model: { primary: "nebius/Qwen/Qwen3-32B-fast" },
models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } }
model: { primary: "nebius/zai-org/GLM-4.7-FP8" },
models: { "nebius/zai-org/GLM-4.7-FP8": { alias: "GLM 4.7" } }
}
},
models: {
@ -2494,8 +2494,8 @@ Use Nebius's OpenAI-compatible endpoint:
api: "openai-completions",
models: [
{
id: "Qwen/Qwen3-32B-fast",
name: "Qwen3 32B Fast",
id: "zai-org/GLM-4.7-FP8",
name: "GLM 4.7 FP8",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@ -2511,7 +2511,7 @@ Use Nebius's OpenAI-compatible endpoint:
Notes:
- Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
- Model ref: `nebius/Qwen/Qwen3-32B-fast`.
- Model ref: `nebius/zai-org/GLM-4.7-FP8` (default).
- Available models include Qwen, DeepSeek, Llama, and GLM families.
### Synthetic (Anthropic-compatible)

View File

@ -10,13 +10,14 @@ Nebius Token Factory provides OpenAI-compatible inference for frontier and open
## Model overview
- **GLM 4.7 FP8** (default): GLM model from Z.AI, 128000-token context window.
- **Qwen3 32B Fast**: 128000-token context window, fast inference.
- **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
- **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
- **Llama 3.3 70B**: Strong general-purpose model with fast variant.
- **Qwen2.5 VL 72B**: Vision-language model.
- **Qwen2.5 Coder 7B Fast**: Code-specialized model.
- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI.
- **GLM 4.5**: Additional GLM model from Z.AI.
- Base URL: `https://api.tokenfactory.nebius.com/v1`
- Authorization: `Bearer $NEBIUS_API_KEY`
@ -33,7 +34,7 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
```json5
{
env: { NEBIUS_API_KEY: "your-key" },
agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } },
agents: { defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } },
models: {
mode: "merge",
providers: {
@ -43,8 +44,8 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
apiKey: "NEBIUS_API_KEY",
models: [
{
id: "Qwen/Qwen3-32B-fast",
name: "Qwen3 32B Fast",
id: "zai-org/GLM-4.7-FP8",
name: "GLM 4.7 FP8",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@ -60,6 +61,6 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
## Notes
- Default model ref: `nebius/Qwen/Qwen3-32B-fast`.
- Default model ref: `nebius/zai-org/GLM-4.7-FP8`.
- The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
- See [/concepts/model-providers](/concepts/model-providers) for provider rules.

View File

@ -42,8 +42,8 @@ const XIAOMI_DEFAULT_COST = {
};
const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
export const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast";
const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000;
export const NEBIUS_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-FP8";
const NEBIUS_DEFAULT_CONTEXT_WINDOW = 202752;
const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
const NEBIUS_DEFAULT_COST = {
input: 0,
@ -389,10 +389,10 @@ export function buildNebiusProvider(): ProviderConfig {
{
id: "Qwen/Qwen3-32B-fast",
name: "Qwen3 32B Fast",
reasoning: false,
reasoning: true,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 40960,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -401,7 +401,7 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: false,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 131072,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -410,7 +410,7 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: false,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 131072,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -419,7 +419,7 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: false,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 131072,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -428,7 +428,7 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: false,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 32768,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -437,7 +437,7 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: true,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 32768,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -446,7 +446,7 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: false,
input: ["text", "image"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 32000,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
@ -455,25 +455,43 @@ export function buildNebiusProvider(): ProviderConfig {
reasoning: false,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 32768,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
id: "zai-org/GLM-4.7-FP8",
name: "GLM 4.7 FP8",
reasoning: false,
reasoning: true,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 202752,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
id: "zai-org/GLM-4.5",
name: "GLM 4.5",
reasoning: false,
reasoning: true,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
contextWindow: 131072,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
id: "deepseek-ai/DeepSeek-V3.2",
name: "DeepSeek V3.2",
reasoning: true,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: 163000,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
{
id: "moonshotai/Kimi-K2-Thinking",
name: "Kimi K2 Thinking",
reasoning: true,
input: ["text"],
cost: NEBIUS_DEFAULT_COST,
contextWindow: 262144,
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
},
],

View File

@ -116,7 +116,7 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast";
export const NEBIUS_DEFAULT_MODEL_REF = "nebius/zai-org/GLM-4.7-FP8";
export async function setNebiusApiKey(key: string, agentDir?: string) {
// Write to resolved agent dir so gateway finds credentials on startup.