added kimi and deepseek v3.2 models
This commit is contained in:
parent
8da5d9fb3d
commit
8fc1bb784c
@ -245,13 +245,13 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
|
|||||||
|
|
||||||
- Provider: `nebius`
|
- Provider: `nebius`
|
||||||
- Auth: `NEBIUS_API_KEY`
|
- Auth: `NEBIUS_API_KEY`
|
||||||
- Example model: `nebius/Qwen/Qwen3-32B-fast`
|
- Example model: `nebius/zai-org/GLM-4.7-FP8` (default)
|
||||||
- CLI: `moltbot onboard --auth-choice nebius-api-key`
|
- CLI: `moltbot onboard --auth-choice nebius-api-key`
|
||||||
|
|
||||||
```json5
|
```json5
|
||||||
{
|
{
|
||||||
agents: {
|
agents: {
|
||||||
defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } }
|
defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } }
|
||||||
},
|
},
|
||||||
models: {
|
models: {
|
||||||
mode: "merge",
|
mode: "merge",
|
||||||
@ -262,8 +262,8 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
|
|||||||
api: "openai-completions",
|
api: "openai-completions",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
id: "Qwen/Qwen3-32B-fast",
|
id: "zai-org/GLM-4.7-FP8",
|
||||||
name: "Qwen3 32B Fast",
|
name: "GLM 4.7 FP8",
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
|||||||
@ -2481,8 +2481,8 @@ Use Nebius's OpenAI-compatible endpoint:
|
|||||||
env: { NEBIUS_API_KEY: "sk-..." },
|
env: { NEBIUS_API_KEY: "sk-..." },
|
||||||
agents: {
|
agents: {
|
||||||
defaults: {
|
defaults: {
|
||||||
model: { primary: "nebius/Qwen/Qwen3-32B-fast" },
|
model: { primary: "nebius/zai-org/GLM-4.7-FP8" },
|
||||||
models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } }
|
models: { "nebius/zai-org/GLM-4.7-FP8": { alias: "GLM 4.7" } }
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
models: {
|
models: {
|
||||||
@ -2494,8 +2494,8 @@ Use Nebius's OpenAI-compatible endpoint:
|
|||||||
api: "openai-completions",
|
api: "openai-completions",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
id: "Qwen/Qwen3-32B-fast",
|
id: "zai-org/GLM-4.7-FP8",
|
||||||
name: "Qwen3 32B Fast",
|
name: "GLM 4.7 FP8",
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
@ -2511,7 +2511,7 @@ Use Nebius's OpenAI-compatible endpoint:
|
|||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
- Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
|
- Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
|
||||||
- Model ref: `nebius/Qwen/Qwen3-32B-fast`.
|
- Model ref: `nebius/zai-org/GLM-4.7-FP8` (default).
|
||||||
- Available models include Qwen, DeepSeek, Llama, and GLM families.
|
- Available models include Qwen, DeepSeek, Llama, and GLM families.
|
||||||
|
|
||||||
### Synthetic (Anthropic-compatible)
|
### Synthetic (Anthropic-compatible)
|
||||||
|
|||||||
@ -10,13 +10,14 @@ Nebius Token Factory provides OpenAI-compatible inference for frontier and open
|
|||||||
|
|
||||||
## Model overview
|
## Model overview
|
||||||
|
|
||||||
|
- **GLM 4.7 FP8** (default): GLM model from Z.AI, 128000-token context window.
|
||||||
- **Qwen3 32B Fast**: 128000-token context window, fast inference.
|
- **Qwen3 32B Fast**: 128000-token context window, fast inference.
|
||||||
- **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
|
- **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
|
||||||
- **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
|
- **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
|
||||||
- **Llama 3.3 70B**: Strong general-purpose model with fast variant.
|
- **Llama 3.3 70B**: Strong general-purpose model with fast variant.
|
||||||
- **Qwen2.5 VL 72B**: Vision-language model.
|
- **Qwen2.5 VL 72B**: Vision-language model.
|
||||||
- **Qwen2.5 Coder 7B Fast**: Code-specialized model.
|
- **Qwen2.5 Coder 7B Fast**: Code-specialized model.
|
||||||
- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI.
|
- **GLM 4.5**: Additional GLM model from Z.AI.
|
||||||
- Base URL: `https://api.tokenfactory.nebius.com/v1`
|
- Base URL: `https://api.tokenfactory.nebius.com/v1`
|
||||||
- Authorization: `Bearer $NEBIUS_API_KEY`
|
- Authorization: `Bearer $NEBIUS_API_KEY`
|
||||||
|
|
||||||
@ -33,7 +34,7 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
|
|||||||
```json5
|
```json5
|
||||||
{
|
{
|
||||||
env: { NEBIUS_API_KEY: "your-key" },
|
env: { NEBIUS_API_KEY: "your-key" },
|
||||||
agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } },
|
agents: { defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } },
|
||||||
models: {
|
models: {
|
||||||
mode: "merge",
|
mode: "merge",
|
||||||
providers: {
|
providers: {
|
||||||
@ -43,8 +44,8 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
|
|||||||
apiKey: "NEBIUS_API_KEY",
|
apiKey: "NEBIUS_API_KEY",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
id: "Qwen/Qwen3-32B-fast",
|
id: "zai-org/GLM-4.7-FP8",
|
||||||
name: "Qwen3 32B Fast",
|
name: "GLM 4.7 FP8",
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
@ -60,6 +61,6 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
|
|||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- Default model ref: `nebius/Qwen/Qwen3-32B-fast`.
|
- Default model ref: `nebius/zai-org/GLM-4.7-FP8`.
|
||||||
- The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
|
- The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
|
||||||
- See [/concepts/model-providers](/concepts/model-providers) for provider rules.
|
- See [/concepts/model-providers](/concepts/model-providers) for provider rules.
|
||||||
@ -42,8 +42,8 @@ const XIAOMI_DEFAULT_COST = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
|
const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
|
||||||
export const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast";
|
export const NEBIUS_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-FP8";
|
||||||
const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000;
|
const NEBIUS_DEFAULT_CONTEXT_WINDOW = 202752;
|
||||||
const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
|
const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
|
||||||
const NEBIUS_DEFAULT_COST = {
|
const NEBIUS_DEFAULT_COST = {
|
||||||
input: 0,
|
input: 0,
|
||||||
@ -389,10 +389,10 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
{
|
{
|
||||||
id: "Qwen/Qwen3-32B-fast",
|
id: "Qwen/Qwen3-32B-fast",
|
||||||
name: "Qwen3 32B Fast",
|
name: "Qwen3 32B Fast",
|
||||||
reasoning: false,
|
reasoning: true,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 40960,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -401,7 +401,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 131072,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -410,7 +410,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 131072,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -419,7 +419,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 131072,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -428,7 +428,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 32768,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -437,7 +437,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: true,
|
reasoning: true,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 32768,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -446,7 +446,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text", "image"],
|
input: ["text", "image"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 32000,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -455,25 +455,43 @@ export function buildNebiusProvider(): ProviderConfig {
|
|||||||
reasoning: false,
|
reasoning: false,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 32768,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "zai-org/GLM-4.7-FP8",
|
id: "zai-org/GLM-4.7-FP8",
|
||||||
name: "GLM 4.7 FP8",
|
name: "GLM 4.7 FP8",
|
||||||
reasoning: false,
|
reasoning: true,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 202752,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "zai-org/GLM-4.5",
|
id: "zai-org/GLM-4.5",
|
||||||
name: "GLM 4.5",
|
name: "GLM 4.5",
|
||||||
reasoning: false,
|
reasoning: true,
|
||||||
input: ["text"],
|
input: ["text"],
|
||||||
cost: NEBIUS_DEFAULT_COST,
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
contextWindow: 131072,
|
||||||
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "deepseek-ai/DeepSeek-V3.2",
|
||||||
|
name: "DeepSeek V3.2",
|
||||||
|
reasoning: true,
|
||||||
|
input: ["text"],
|
||||||
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
|
contextWindow: 163000,
|
||||||
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "moonshotai/Kimi-K2-Thinking",
|
||||||
|
name: "Kimi K2 Thinking",
|
||||||
|
reasoning: true,
|
||||||
|
input: ["text"],
|
||||||
|
cost: NEBIUS_DEFAULT_COST,
|
||||||
|
contextWindow: 262144,
|
||||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|||||||
@ -116,7 +116,7 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
|
|||||||
export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
|
export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
|
||||||
export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
|
export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
|
||||||
export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
|
export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
|
||||||
export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast";
|
export const NEBIUS_DEFAULT_MODEL_REF = "nebius/zai-org/GLM-4.7-FP8";
|
||||||
|
|
||||||
export async function setNebiusApiKey(key: string, agentDir?: string) {
|
export async function setNebiusApiKey(key: string, agentDir?: string) {
|
||||||
// Write to resolved agent dir so gateway finds credentials on startup.
|
// Write to resolved agent dir so gateway finds credentials on startup.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user