added kimi and deepseek v3.2 models
This commit is contained in:
parent
8da5d9fb3d
commit
8fc1bb784c
@ -245,13 +245,13 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
|
||||
|
||||
- Provider: `nebius`
|
||||
- Auth: `NEBIUS_API_KEY`
|
||||
- Example model: `nebius/Qwen/Qwen3-32B-fast`
|
||||
- Example model: `nebius/zai-org/GLM-4.7-FP8` (default)
|
||||
- CLI: `moltbot onboard --auth-choice nebius-api-key`
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } }
|
||||
defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } }
|
||||
},
|
||||
models: {
|
||||
mode: "merge",
|
||||
@ -262,8 +262,8 @@ Nebius provides OpenAI-compatible inference for frontier and open models:
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
id: "Qwen/Qwen3-32B-fast",
|
||||
name: "Qwen3 32B Fast",
|
||||
id: "zai-org/GLM-4.7-FP8",
|
||||
name: "GLM 4.7 FP8",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
|
||||
@ -2481,8 +2481,8 @@ Use Nebius's OpenAI-compatible endpoint:
|
||||
env: { NEBIUS_API_KEY: "sk-..." },
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "nebius/Qwen/Qwen3-32B-fast" },
|
||||
models: { "nebius/Qwen/Qwen3-32B-fast": { alias: "Qwen3 32B" } }
|
||||
model: { primary: "nebius/zai-org/GLM-4.7-FP8" },
|
||||
models: { "nebius/zai-org/GLM-4.7-FP8": { alias: "GLM 4.7" } }
|
||||
}
|
||||
},
|
||||
models: {
|
||||
@ -2494,8 +2494,8 @@ Use Nebius's OpenAI-compatible endpoint:
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
id: "Qwen/Qwen3-32B-fast",
|
||||
name: "Qwen3 32B Fast",
|
||||
id: "zai-org/GLM-4.7-FP8",
|
||||
name: "GLM 4.7 FP8",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
@ -2511,7 +2511,7 @@ Use Nebius's OpenAI-compatible endpoint:
|
||||
|
||||
Notes:
|
||||
- Set `NEBIUS_API_KEY` in the environment or use `moltbot onboard --auth-choice nebius-api-key`.
|
||||
- Model ref: `nebius/Qwen/Qwen3-32B-fast`.
|
||||
- Model ref: `nebius/zai-org/GLM-4.7-FP8` (default).
|
||||
- Available models include Qwen, DeepSeek, Llama, and GLM families.
|
||||
|
||||
### Synthetic (Anthropic-compatible)
|
||||
|
||||
@ -10,13 +10,14 @@ Nebius Token Factory provides OpenAI-compatible inference for frontier and open
|
||||
|
||||
## Model overview
|
||||
|
||||
- **GLM 4.7 FP8** (default): GLM model from Z.AI, 128000-token context window.
|
||||
- **Qwen3 32B Fast**: 128000-token context window, fast inference.
|
||||
- **DeepSeek V3 Fast**: Latest DeepSeek model with fast inference.
|
||||
- **DeepSeek R1 Fast**: Reasoning model with chain-of-thought.
|
||||
- **Llama 3.3 70B**: Strong general-purpose model with fast variant.
|
||||
- **Qwen2.5 VL 72B**: Vision-language model.
|
||||
- **Qwen2.5 Coder 7B Fast**: Code-specialized model.
|
||||
- **GLM models**: GLM 4.7 and GLM 4.5 from Z.AI.
|
||||
- **GLM 4.5**: Additional GLM model from Z.AI.
|
||||
- Base URL: `https://api.tokenfactory.nebius.com/v1`
|
||||
- Authorization: `Bearer $NEBIUS_API_KEY`
|
||||
|
||||
@ -33,7 +34,7 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
|
||||
```json5
|
||||
{
|
||||
env: { NEBIUS_API_KEY: "your-key" },
|
||||
agents: { defaults: { model: { primary: "nebius/Qwen/Qwen3-32B-fast" } } },
|
||||
agents: { defaults: { model: { primary: "nebius/zai-org/GLM-4.7-FP8" } } },
|
||||
models: {
|
||||
mode: "merge",
|
||||
providers: {
|
||||
@ -43,8 +44,8 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
|
||||
apiKey: "NEBIUS_API_KEY",
|
||||
models: [
|
||||
{
|
||||
id: "Qwen/Qwen3-32B-fast",
|
||||
name: "Qwen3 32B Fast",
|
||||
id: "zai-org/GLM-4.7-FP8",
|
||||
name: "GLM 4.7 FP8",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
@ -60,6 +61,6 @@ moltbot onboard --auth-choice nebius-api-key --nebius-api-key "$NEBIUS_API_KEY"
|
||||
|
||||
## Notes
|
||||
|
||||
- Default model ref: `nebius/Qwen/Qwen3-32B-fast`.
|
||||
- Default model ref: `nebius/zai-org/GLM-4.7-FP8`.
|
||||
- The provider is injected automatically when `NEBIUS_API_KEY` is set (or an auth profile exists).
|
||||
- See [/concepts/model-providers](/concepts/model-providers) for provider rules.
|
||||
@ -42,8 +42,8 @@ const XIAOMI_DEFAULT_COST = {
|
||||
};
|
||||
|
||||
const NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1";
|
||||
export const NEBIUS_DEFAULT_MODEL_ID = "Qwen/Qwen3-32B-fast";
|
||||
const NEBIUS_DEFAULT_CONTEXT_WINDOW = 128000;
|
||||
export const NEBIUS_DEFAULT_MODEL_ID = "zai-org/GLM-4.7-FP8";
|
||||
const NEBIUS_DEFAULT_CONTEXT_WINDOW = 202752;
|
||||
const NEBIUS_DEFAULT_MAX_TOKENS = 8192;
|
||||
const NEBIUS_DEFAULT_COST = {
|
||||
input: 0,
|
||||
@ -389,10 +389,10 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
{
|
||||
id: "Qwen/Qwen3-32B-fast",
|
||||
name: "Qwen3 32B Fast",
|
||||
reasoning: false,
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 40960,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -401,7 +401,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 131072,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -410,7 +410,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 131072,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -419,7 +419,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 131072,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -428,7 +428,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 32768,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -437,7 +437,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 32768,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -446,7 +446,7 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 32000,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
@ -455,25 +455,43 @@ export function buildNebiusProvider(): ProviderConfig {
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 32768,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
id: "zai-org/GLM-4.7-FP8",
|
||||
name: "GLM 4.7 FP8",
|
||||
reasoning: false,
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 202752,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
id: "zai-org/GLM-4.5",
|
||||
name: "GLM 4.5",
|
||||
reasoning: false,
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: NEBIUS_DEFAULT_CONTEXT_WINDOW,
|
||||
contextWindow: 131072,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
id: "deepseek-ai/DeepSeek-V3.2",
|
||||
name: "DeepSeek V3.2",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: 163000,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
{
|
||||
id: "moonshotai/Kimi-K2-Thinking",
|
||||
name: "Kimi K2 Thinking",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: NEBIUS_DEFAULT_COST,
|
||||
contextWindow: 262144,
|
||||
maxTokens: NEBIUS_DEFAULT_MAX_TOKENS,
|
||||
},
|
||||
],
|
||||
|
||||
@ -116,7 +116,7 @@ export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
|
||||
export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
|
||||
export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
|
||||
export const VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF = "vercel-ai-gateway/anthropic/claude-opus-4.5";
|
||||
export const NEBIUS_DEFAULT_MODEL_REF = "nebius/Qwen/Qwen3-32B-fast";
|
||||
export const NEBIUS_DEFAULT_MODEL_REF = "nebius/zai-org/GLM-4.7-FP8";
|
||||
|
||||
export async function setNebiusApiKey(key: string, agentDir?: string) {
|
||||
// Write to resolved agent dir so gateway finds credentials on startup.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user