baseten provider

2026-01-29 15:59:05 -05:00 · 2026-01-29 15:59:05 -05:00 · 1afcb09d35
commit 1afcb09d35
parent 4583f88626
16 changed files with 567 additions and 2 deletions
--- a/docs/cli/index.md
+++ b/docs/cli/index.md
@ -297,7 +297,7 @@ Options:
 - `--non-interactive`
 - `--mode <local|remote>`
 - `--flow <quickstart|advanced|manual>` (manual is an alias for advanced)
- `--auth-choice <setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip>`
+- `--auth-choice <setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|baseten-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip>`
 - `--token-provider <id>` (non-interactive; used with `--auth-choice token`)
 - `--token <token>` (non-interactive; used with `--auth-choice token`)
 - `--token-profile-id <id>` (non-interactive; default: `<provider>:manual`)
@ -305,12 +305,14 @@ Options:
 - `--anthropic-api-key <key>`
 - `--openai-api-key <key>`
 - `--openrouter-api-key <key>`
+- `--baseten-api-key <key>`
 - `--ai-gateway-api-key <key>`
 - `--moonshot-api-key <key>`
 - `--kimi-code-api-key <key>`
 - `--gemini-api-key <key>`
 - `--zai-api-key <key>`
 - `--minimax-api-key <key>`
+- `--venice-api-key <key>`
 - `--opencode-zen-api-key <key>`
 - `--gateway-port <port>`
 - `--gateway-bind <loopback|lan|tailnet|auto|custom>`
--- a/docs/docs.json
+++ b/docs/docs.json
@ -1021,6 +1021,7 @@
          "providers/minimax",
          "providers/vercel-ai-gateway",
          "providers/openrouter",
+          "providers/baseten",
          "providers/synthetic",
          "providers/opencode",
          "providers/glm",
--- a/docs/providers/baseten.md
+++ b/docs/providers/baseten.md
@ -0,0 +1,219 @@
+---
+summary: "Use Baseten Model APIs for high-performance LLMs in Moltbot"
+read_when:
+  - You want high-performance LLM inference in Moltbot
+  - You want Baseten Model APIs setup guidance
+---
+# Baseten
+
+Baseten provides Model APIs for instant access to high-performance LLMs through OpenAI-compatible endpoints. Point your existing OpenAI SDK at Baseten's inference endpoint and start making calls—no model deployment required.
+
+## Why Baseten in Moltbot
+
+- **High-performance LLMs** with optimized serving infrastructure.
+- **Wide model selection** including DeepSeek V3.2, GPT OSS 120B, Kimi K2, Qwen3 Coder, GLM-4.7, and more.
+- **OpenAI-compatible API** - standard `/v1` endpoints for easy integration.
+- **Serverless** - no infrastructure management, pay per token.
+
+## Features
+
+- **Model APIs**: Instant access to high-performance LLMs without deployment
+- **OpenAI-compatible API**: Standard `/v1` endpoints for easy integration
+- **Streaming**: Supported on all models
+- **Function calling**: Supported on select models (check model capabilities)
+- **Structured outputs**: Generate JSON that conforms to a schema
+- **Reasoning**: Control extended thinking for reasoning-capable models
+
+## Setup
+
+### 1. Get API Key
+
+1. Sign up at [baseten.co](https://app.baseten.co/signup/)
+2. Go to **[Settings > API Keys](https://app.baseten.co/settings/api_keys) > Create API Key**
+3. Copy your API key
+
+### 2. Configure Moltbot
+
+**Option A: Environment Variable**
+
+```bash
+export BASETEN_API_KEY="your-api-key-here"
+```
+
+**Option B: Interactive Setup (Recommended)**
+
+```bash
+moltbot onboard --auth-choice baseten-api-key
+```
+
+This will:
+1. Prompt for your API key (or use existing `BASETEN_API_KEY`)
+2. Configure the Baseten provider with available models
+3. Let you pick your default model
+4. Set up the provider automatically
+
+**Option C: Non-interactive**
+
+```bash
+moltbot onboard --non-interactive \
+  --auth-choice baseten-api-key \
+  --baseten-api-key "your-api-key-here"
+```
+
+### 3. Verify Setup
+
+```bash
+moltbot chat --model baseten/deepseek-ai/DeepSeek-V3.2 "Hello, are you working?"
+```
+
+## Model Selection
+
+Moltbot includes a curated catalog of popular Baseten Model API models. Pick based on your needs:
+
+- **Default**: `deepseek-ai/DeepSeek-V3.2` (DeepSeek V3.2) - general purpose, 131k context.
+- **Best reasoning**: `openai/gpt-oss-120b` or `moonshotai/Kimi-K2-Thinking`
+- **Coding**: `Qwen/Qwen3-Coder-480B-A35B-Instruct`
+- **Long context**: `moonshotai/Kimi-K2-Thinking` (262k context)
+
+Change your default model anytime:
+
+```bash
+moltbot models set baseten/deepseek-ai/DeepSeek-V3.2
+moltbot models set baseten/openai/gpt-oss-120b
+```
+
+List all available models:
+
+```bash
+moltbot models list | grep baseten
+```
+
+## Which Model Should I Use?
+
+| Use Case | Recommended Model | Why |
+|----------|-------------------|-----|
+| **General chat** | `deepseek-ai/DeepSeek-V3.2` | Balanced performance, 131k context |
+| **Complex reasoning** | `openai/gpt-oss-120b` | Best for step-by-step reasoning |
+| **Agentic tasks** | `openai/gpt-oss-120b` | Designed for reasoning and agentic use |
+| **Coding** | `Qwen/Qwen3-Coder-480B-A35B-Instruct` | Code-optimized, 262k context |
+| **Long context** | `moonshotai/Kimi-K2-Thinking` | 262k context window |
+| **Reasoning** | `zai-org/GLM-4.7` | Advanced thinking controls |
+
+## Available Models (9 Total)
+
+### Text Models
+
+| Model ID | Name | Context | Features |
+|----------|------|---------|----------|
+| `openai/gpt-oss-120b` | OpenAI GPT OSS 120B | 128k | Reasoning |
+| `deepseek-ai/DeepSeek-V3.2` | DeepSeek V3.2 | 131k | General |
+| `deepseek-ai/DeepSeek-V3.1` | DeepSeek V3.1 | 164k | General |
+| `deepseek-ai/DeepSeek-V3-0324` | DeepSeek V3 0324 | 164k | General |
+| `moonshotai/Kimi-K2-Thinking` | Kimi K2 Thinking | 262k | Reasoning |
+| `moonshotai/Kimi-K2-Instruct-0905` | Kimi K2 Instruct 0905 | 128k | Long context |
+| `Qwen/Qwen3-Coder-480B-A35B-Instruct` | Qwen3 Coder 480B A35B Instruct | 262k | Coding |
+| `zai-org/GLM-4.7` | GLM-4.7 | 200k | Reasoning |
+| `zai-org/GLM-4.6` | GLM-4.6 | 200k | Reasoning |
+
+## Model IDs
+
+Baseten model IDs use the format:
+
+```
+<org>/<model-name>
+```
+
+When using models in Moltbot, prefix with the provider:
+
+```bash
+moltbot chat --model baseten/deepseek-ai/DeepSeek-V3.2
+```
+
+## Streaming and Tool Support
+
+| Feature | Support |
+|---------|---------|
+| **Streaming** | All models |
+| **Function calling** | Select models (check model capabilities) |
+| **Structured outputs** | Supported via `response_format` |
+| **Reasoning** | Supported on reasoning-capable models |
+
+## Pricing
+
+Baseten uses pay-per-token pricing. Check [baseten.co](https://baseten.co) for current rates. Generally:
+
+- Smaller models: Lower cost, faster
+- Larger models: Higher quality, higher cost
+- Reasoning models: May have additional costs for extended thinking
+
+## Usage Examples
+
+```bash
+# Use DeepSeek V3.2 (recommended default)
+moltbot chat --model baseten/deepseek-ai/DeepSeek-V3.2
+
+# Use GPT OSS 120B for reasoning
+moltbot chat --model baseten/openai/gpt-oss-120b
+
+# Use coding model
+moltbot chat --model baseten/Qwen/Qwen3-Coder-480B-A35B-Instruct
+
+# Use reasoning model
+moltbot chat --model baseten/moonshotai/Kimi-K2-Thinking
+```
+
+## Troubleshooting
+
+### API key not recognized
+
+```bash
+echo $BASETEN_API_KEY
+moltbot models list | grep baseten
+```
+
+Ensure the key is valid and has not expired.
+
+### Model not available
+
+Run `moltbot models list` to see currently available models in the catalog. If a model you need is missing, you can add it manually to your config file.
+
+### Connection issues
+
+Baseten API is at `https://inference.baseten.co`. Ensure your network allows HTTPS connections.
+
+## Config file example
+
+```json5
+{
+  env: { BASETEN_API_KEY: "..." },
+  agents: { defaults: { model: { primary: "baseten/deepseek-ai/DeepSeek-V3.2" } } },
+  models: {
+    mode: "merge",
+    providers: {
+      baseten: {
+        baseUrl: "https://inference.baseten.co/v1",
+        apiKey: "${BASETEN_API_KEY}",
+        api: "openai-completions",
+        models: [
+          {
+            id: "deepseek-ai/DeepSeek-V3.2",
+            name: "DeepSeek V3.2",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 131072,
+            maxTokens: 8192
+          }
+        ]
+      }
+    }
+  }
+}
+```
+
+## Links
+
+- [Baseten](https://baseten.co)
+- [Model APIs Documentation](https://docs.baseten.co/development/model-apis/overview)
+- [API Reference](https://docs.baseten.co/reference/inference-api/chat-completions)
+- [Model Library](https://app.baseten.co/model-apis/create)
--- a/docs/providers/index.md
+++ b/docs/providers/index.md
@ -37,6 +37,7 @@ See [Venice AI](/providers/venice).
 - [Anthropic (API + Claude Code CLI)](/providers/anthropic)
 - [Qwen (OAuth)](/providers/qwen)
 - [OpenRouter](/providers/openrouter)
+- [Baseten (Model APIs for high-performance LLMs)](/providers/baseten)
 - [Vercel AI Gateway](/providers/vercel-ai-gateway)
 - [Moonshot AI (Kimi + Kimi Code)](/providers/moonshot)
 - [OpenCode Zen](/providers/opencode)
--- a/src/agents/baseten-models.ts
+++ b/src/agents/baseten-models.ts
@ -0,0 +1,128 @@
+import type { ModelDefinitionConfig } from "../config/types.js";
+
+export const BASETEN_BASE_URL = "https://inference.baseten.co/v1";
+export const BASETEN_DEFAULT_MODEL_ID = "zai-org/GLM-4.7";
+export const BASETEN_DEFAULT_MODEL_REF = `baseten/${BASETEN_DEFAULT_MODEL_ID}`;
+
+// Baseten uses pay-per-token pricing; rates vary by model.
+// Set to 0 as a default; override in models.json for accurate costs.
+export const BASETEN_DEFAULT_COST = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+};
+
+/**
+ * Static catalog of Baseten Model API models.
+ *
+ * Only includes LLM models available through Baseten's Model APIs.
+ * Model IDs use the format: <org>/<model-name>
+ */
+export const BASETEN_MODEL_CATALOG = [
+  // OpenAI GPT OSS models
+  {
+    id: "openai/gpt-oss-120b",
+    name: "OpenAI GPT OSS 120B",
+    reasoning: true,
+    input: ["text"] as const,
+    contextWindow: 131072,
+    maxTokens: 8192,
+  },
+
+  // DeepSeek models
+  {
+    id: "deepseek-ai/DeepSeek-V3.2",
+    name: "DeepSeek V3.2",
+    reasoning: false,
+    input: ["text"] as const,
+    contextWindow: 131072,
+    maxTokens: 8192,
+  },
+  {
+    id: "deepseek-ai/DeepSeek-V3.1",
+    name: "DeepSeek V3.1",
+    reasoning: false,
+    input: ["text"] as const,
+    contextWindow: 163840,
+    maxTokens: 8192,
+  },
+  {
+    id: "deepseek-ai/DeepSeek-V3-0324",
+    name: "DeepSeek V3 0324",
+    reasoning: false,
+    input: ["text"] as const,
+    contextWindow: 163840,
+    maxTokens: 8192,
+  },
+
+  // Kimi models
+  {
+    id: "moonshotai/Kimi-K2-Thinking",
+    name: "Kimi K2 Thinking",
+    reasoning: true,
+    input: ["text"] as const,
+    contextWindow: 262144,
+    maxTokens: 8192,
+  },
+  {
+    id: "moonshotai/Kimi-K2-Instruct-0905",
+    name: "Kimi K2 Instruct 0905",
+    reasoning: false,
+    input: ["text"] as const,
+    contextWindow: 131072,
+    maxTokens: 8192,
+  },
+
+  // Qwen models
+  {
+    id: "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+    name: "Qwen3 Coder 480B A35B Instruct",
+    reasoning: false,
+    input: ["text"] as const,
+    contextWindow: 262144,
+    maxTokens: 8192,
+  },
+
+  // GLM models
+  {
+    id: "zai-org/GLM-4.7",
+    name: "GLM-4.7",
+    reasoning: true,
+    input: ["text"] as const,
+    contextWindow: 200000,
+    maxTokens: 8192,
+  },
+  {
+    id: "zai-org/GLM-4.6",
+    name: "GLM-4.6",
+    reasoning: true,
+    input: ["text"] as const,
+    contextWindow: 200000,
+    maxTokens: 8192,
+  },
+] as const;
+
+export type BasetenCatalogEntry = (typeof BASETEN_MODEL_CATALOG)[number];
+
+/**
+ * Build a ModelDefinitionConfig from a Baseten catalog entry.
+ */
+export function buildBasetenModelDefinition(entry: BasetenCatalogEntry): ModelDefinitionConfig {
+  return {
+    id: entry.id,
+    name: entry.name,
+    reasoning: entry.reasoning,
+    input: [...entry.input],
+    cost: BASETEN_DEFAULT_COST,
+    contextWindow: entry.contextWindow,
+    maxTokens: entry.maxTokens,
+  };
+}
+
+/**
+ * Returns Baseten models from the static catalog.
+ */
+export function discoverBasetenModels(): ModelDefinitionConfig[] {
+  return BASETEN_MODEL_CATALOG.map(buildBasetenModelDefinition);
+}
--- a/src/agents/model-auth.ts
+++ b/src/agents/model-auth.ts
@ -277,6 +277,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
    cerebras: "CEREBRAS_API_KEY",
    xai: "XAI_API_KEY",
    openrouter: "OPENROUTER_API_KEY",
+    baseten: "BASETEN_API_KEY",
    "vercel-ai-gateway": "AI_GATEWAY_API_KEY",
    moonshot: "MOONSHOT_API_KEY",
    "kimi-code": "KIMICODE_API_KEY",
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@ -7,6 +7,11 @@ import {
 import { ensureAuthProfileStore, listProfilesForProvider } from "./auth-profiles.js";
 import { resolveAwsSdkEnvVarName, resolveEnvApiKey } from "./model-auth.js";
 import { discoverBedrockModels } from "./bedrock-discovery.js";
+import {
+  buildBasetenModelDefinition,
+  BASETEN_BASE_URL,
+  BASETEN_MODEL_CATALOG,
+} from "./baseten-models.js";
 import {
  buildSyntheticModelDefinition,
  SYNTHETIC_BASE_URL,
@ -370,6 +375,15 @@ export function buildXiaomiProvider(): ProviderConfig {
  };
 }

+function buildBasetenProvider(): ProviderConfig {
+  return {
+    baseUrl: BASETEN_BASE_URL,
+    api: "openai-completions",
+    // Baseten Inference API accepts standard Bearer tokens (OpenAI-compatible)
+    models: BASETEN_MODEL_CATALOG.map(buildBasetenModelDefinition),
+  };
+}
+
 async function buildVeniceProvider(): Promise<ProviderConfig> {
  const models = await discoverVeniceModels();
  return {
@ -431,6 +445,13 @@ export async function resolveImplicitProviders(params: {
    providers.venice = { ...(await buildVeniceProvider()), apiKey: veniceKey };
  }

+  const basetenKey =
+    resolveEnvApiKeyVarName("baseten") ??
+    resolveApiKeyFromProfiles({ provider: "baseten", store: authStore });
+  if (basetenKey) {
+    providers.baseten = { ...buildBasetenProvider(), apiKey: basetenKey };
+  }
+
  const qwenProfiles = listProfilesForProvider(authStore, "qwen-portal");
  if (qwenProfiles.length > 0) {
    providers["qwen-portal"] = {
--- a/src/cli/program/register.onboard.ts
+++ b/src/cli/program/register.onboard.ts
@ -52,7 +52,7 @@ export function registerOnboardCommand(program: Command) {
    .option("--mode <mode>", "Wizard mode: local|remote")
    .option(
      "--auth-choice <choice>",
-      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip",
+      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|openrouter-api-key|baseten-api-key|ai-gateway-api-key|moonshot-api-key|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|skip",
    )
    .option(
      "--token-provider <id>",
@ -67,6 +67,7 @@ export function registerOnboardCommand(program: Command) {
    .option("--anthropic-api-key <key>", "Anthropic API key")
    .option("--openai-api-key <key>", "OpenAI API key")
    .option("--openrouter-api-key <key>", "OpenRouter API key")
+    .option("--baseten-api-key <key>", "Baseten API key")
    .option("--ai-gateway-api-key <key>", "Vercel AI Gateway API key")
    .option("--moonshot-api-key <key>", "Moonshot API key")
    .option("--kimi-code-api-key <key>", "Kimi Code API key")
@ -118,6 +119,7 @@ export function registerOnboardCommand(program: Command) {
            anthropicApiKey: opts.anthropicApiKey as string | undefined,
            openaiApiKey: opts.openaiApiKey as string | undefined,
            openrouterApiKey: opts.openrouterApiKey as string | undefined,
+            basetenApiKey: opts.basetenApiKey as string | undefined,
            aiGatewayApiKey: opts.aiGatewayApiKey as string | undefined,
            moonshotApiKey: opts.moonshotApiKey as string | undefined,
            kimiCodeApiKey: opts.kimiCodeApiKey as string | undefined,
--- a/src/commands/auth-choice-options.ts
+++ b/src/commands/auth-choice-options.ts
@ -13,6 +13,7 @@ export type AuthChoiceGroupId =
  | "google"
  | "copilot"
  | "openrouter"
+  | "baseten"
  | "ai-gateway"
  | "moonshot"
  | "zai"
@ -90,6 +91,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
    hint: "API key",
    choices: ["openrouter-api-key"],
  },
+  {
+    value: "baseten",
+    label: "Baseten",
+    hint: "Model APIs for high-performance LLMs",
+    choices: ["baseten-api-key"],
+  },
  {
    value: "ai-gateway",
    label: "Vercel AI Gateway",
@ -142,6 +149,11 @@ export function buildAuthChoiceOptions(params: {
  options.push({ value: "chutes", label: "Chutes (OAuth)" });
  options.push({ value: "openai-api-key", label: "OpenAI API key" });
  options.push({ value: "openrouter-api-key", label: "OpenRouter API key" });
+  options.push({
+    value: "baseten-api-key",
+    label: "Baseten API key",
+    hint: "Model APIs for high-performance LLMs",
+  });
  options.push({
    value: "ai-gateway-api-key",
    label: "Vercel AI Gateway API key",
--- a/src/commands/auth-choice.apply.api-providers.ts
+++ b/src/commands/auth-choice.apply.api-providers.ts
@ -11,8 +11,11 @@ import {
  applyGoogleGeminiModelDefault,
  GOOGLE_GEMINI_DEFAULT_MODEL,
 } from "./google-gemini-model-default.js";
+import { BASETEN_DEFAULT_MODEL_REF } from "../agents/baseten-models.js";
 import {
  applyAuthProfileConfig,
+  applyBasetenConfig,
+  applyBasetenProviderConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
  applyMoonshotConfig,
@ -37,6 +40,7 @@ import {
  VENICE_DEFAULT_MODEL_REF,
  VERCEL_AI_GATEWAY_DEFAULT_MODEL_REF,
  XIAOMI_DEFAULT_MODEL_REF,
+  setBasetenApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMoonshotApiKey,
@ -89,6 +93,8 @@ export async function applyAuthChoiceApiProviders(
      authChoice = "synthetic-api-key";
    } else if (params.opts.tokenProvider === "venice") {
      authChoice = "venice-api-key";
+    } else if (params.opts.tokenProvider === "baseten") {
+      authChoice = "baseten-api-key";
    } else if (params.opts.tokenProvider === "opencode") {
      authChoice = "opencode-zen";
    }
@ -576,6 +582,54 @@ export async function applyAuthChoiceApiProviders(
    return { config: nextConfig, agentModelOverride };
  }

+  if (authChoice === "baseten-api-key") {
+    let hasCredential = false;
+
+    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "baseten") {
+      await setBasetenApiKey(normalizeApiKeyInput(params.opts.token), params.agentDir);
+      hasCredential = true;
+    }
+
+    const envKey = resolveEnvApiKey("baseten");
+    if (envKey) {
+      const useExisting = await params.prompter.confirm({
+        message: `Use existing BASETEN_API_KEY (${envKey.source}, ${formatApiKeyPreview(envKey.apiKey)})?`,
+        initialValue: true,
+      });
+      if (useExisting) {
+        await setBasetenApiKey(envKey.apiKey, params.agentDir);
+        hasCredential = true;
+      }
+    }
+    if (!hasCredential) {
+      const key = await params.prompter.text({
+        message: "Enter Baseten API key",
+        validate: validateApiKeyInput,
+      });
+      await setBasetenApiKey(normalizeApiKeyInput(String(key)), params.agentDir);
+    }
+    nextConfig = applyAuthProfileConfig(nextConfig, {
+      profileId: "baseten:default",
+      provider: "baseten",
+      mode: "api_key",
+    });
+    {
+      const applied = await applyDefaultModelChoice({
+        config: nextConfig,
+        setDefaultModel: params.setDefaultModel,
+        defaultModel: BASETEN_DEFAULT_MODEL_REF,
+        applyDefaultConfig: applyBasetenConfig,
+        applyProviderConfig: applyBasetenProviderConfig,
+        noteDefault: BASETEN_DEFAULT_MODEL_REF,
+        noteAgentModel,
+        prompter: params.prompter,
+      });
+      nextConfig = applied.config;
+      agentModelOverride = applied.agentModelOverride ?? agentModelOverride;
+    }
+    return { config: nextConfig, agentModelOverride };
+  }
+
  if (authChoice === "opencode-zen") {
    let hasCredential = false;
    if (!hasCredential && params.opts?.token && params.opts?.tokenProvider === "opencode") {
--- a/src/commands/auth-choice.preferred-provider.ts
+++ b/src/commands/auth-choice.preferred-provider.ts
@ -21,6 +21,7 @@ const PREFERRED_PROVIDER_BY_AUTH_CHOICE: Partial<Record<AuthChoice, string>> = {
  "xiaomi-api-key": "xiaomi",
  "synthetic-api-key": "synthetic",
  "venice-api-key": "venice",
+  "baseten-api-key": "baseten",
  "github-copilot": "github-copilot",
  "copilot-proxy": "copilot-proxy",
  "minimax-cloud": "minimax",
--- a/src/commands/onboard-auth.config-core.ts
+++ b/src/commands/onboard-auth.config-core.ts
@ -1,3 +1,9 @@
+import {
+  buildBasetenModelDefinition,
+  BASETEN_BASE_URL,
+  BASETEN_DEFAULT_MODEL_REF,
+  BASETEN_MODEL_CATALOG,
+} from "../agents/baseten-models.js";
 import { buildXiaomiProvider, XIAOMI_DEFAULT_MODEL_ID } from "../agents/models-config.providers.js";
 import {
  buildSyntheticModelDefinition,
@ -484,6 +490,83 @@ export function applyVeniceConfig(cfg: MoltbotConfig): MoltbotConfig {
  };
 }

+/**
+ * Apply Baseten provider configuration without changing the default model.
+ * Registers Baseten models and sets up the provider, but preserves existing model selection.
+ */
+export function applyBasetenProviderConfig(cfg: MoltbotConfig): MoltbotConfig {
+  const models = { ...cfg.agents?.defaults?.models };
+  models[BASETEN_DEFAULT_MODEL_REF] = {
+    ...models[BASETEN_DEFAULT_MODEL_REF],
+    alias: models[BASETEN_DEFAULT_MODEL_REF]?.alias ?? "DeepSeek V3.2",
+  };
+
+  const providers = { ...cfg.models?.providers };
+  const existingProvider = providers.baseten;
+  const existingModels = Array.isArray(existingProvider?.models) ? existingProvider.models : [];
+  const basetenModels = BASETEN_MODEL_CATALOG.map(buildBasetenModelDefinition);
+  const mergedModels = [
+    ...existingModels,
+    ...basetenModels.filter(
+      (model) => !existingModels.some((existing) => existing.id === model.id),
+    ),
+  ];
+  const { apiKey: existingApiKey, ...existingProviderRest } = (existingProvider ?? {}) as Record<
+    string,
+    unknown
+  > as { apiKey?: string };
+  const resolvedApiKey = typeof existingApiKey === "string" ? existingApiKey : undefined;
+  const normalizedApiKey = resolvedApiKey?.trim();
+  providers.baseten = {
+    ...existingProviderRest,
+    baseUrl: BASETEN_BASE_URL,
+    api: "openai-completions",
+    ...(normalizedApiKey ? { apiKey: normalizedApiKey } : {}),
+    models: mergedModels.length > 0 ? mergedModels : basetenModels,
+  };
+
+  return {
+    ...cfg,
+    agents: {
+      ...cfg.agents,
+      defaults: {
+        ...cfg.agents?.defaults,
+        models,
+      },
+    },
+    models: {
+      mode: cfg.models?.mode ?? "merge",
+      providers,
+    },
+  };
+}
+
+/**
+ * Apply Baseten provider configuration AND set Baseten as the default model.
+ * Use this when Baseten is the primary provider choice during onboarding.
+ */
+export function applyBasetenConfig(cfg: MoltbotConfig): MoltbotConfig {
+  const next = applyBasetenProviderConfig(cfg);
+  const existingModel = next.agents?.defaults?.model;
+  return {
+    ...next,
+    agents: {
+      ...next.agents,
+      defaults: {
+        ...next.agents?.defaults,
+        model: {
+          ...(existingModel && "fallbacks" in (existingModel as Record<string, unknown>)
+            ? {
+                fallbacks: (existingModel as { fallbacks?: string[] }).fallbacks,
+              }
+            : undefined),
+          primary: BASETEN_DEFAULT_MODEL_REF,
+        },
+      },
+    },
+  };
+}
+
 export function applyAuthProfileConfig(
  cfg: MoltbotConfig,
  params: {
--- a/src/commands/onboard-auth.credentials.ts
+++ b/src/commands/onboard-auth.credentials.ts
@ -112,6 +112,19 @@ export async function setVeniceApiKey(key: string, agentDir?: string) {
  });
 }

+export async function setBasetenApiKey(key: string, agentDir?: string) {
+  // Write to resolved agent dir so gateway finds credentials on startup.
+  upsertAuthProfile({
+    profileId: "baseten:default",
+    credential: {
+      type: "api_key",
+      provider: "baseten",
+      key,
+    },
+    agentDir: resolveAuthAgentDir(agentDir),
+  });
+}
+
 export const ZAI_DEFAULT_MODEL_REF = "zai/glm-4.7";
 export const XIAOMI_DEFAULT_MODEL_REF = "xiaomi/mimo-v2-flash";
 export const OPENROUTER_DEFAULT_MODEL_REF = "openrouter/auto";
--- a/src/commands/onboard-auth.ts
+++ b/src/commands/onboard-auth.ts
@ -1,3 +1,4 @@
+export { BASETEN_DEFAULT_MODEL_ID, BASETEN_DEFAULT_MODEL_REF } from "../agents/baseten-models.js";
 export {
  SYNTHETIC_DEFAULT_MODEL_ID,
  SYNTHETIC_DEFAULT_MODEL_REF,
@ -5,6 +6,8 @@ export {
 export { VENICE_DEFAULT_MODEL_ID, VENICE_DEFAULT_MODEL_REF } from "../agents/venice-models.js";
 export {
  applyAuthProfileConfig,
+  applyBasetenConfig,
+  applyBasetenProviderConfig,
  applyKimiCodeConfig,
  applyKimiCodeProviderConfig,
  applyMoonshotConfig,
@ -37,6 +40,7 @@ export {
 export {
  OPENROUTER_DEFAULT_MODEL_REF,
  setAnthropicApiKey,
+  setBasetenApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMinimaxApiKey,
--- a/src/commands/onboard-non-interactive/local/auth-choice.ts
+++ b/src/commands/onboard-non-interactive/local/auth-choice.ts
@ -8,6 +8,7 @@ import { buildTokenProfileId, validateAnthropicSetupToken } from "../../auth-tok
 import { applyGoogleGeminiModelDefault } from "../../google-gemini-model-default.js";
 import {
  applyAuthProfileConfig,
+  applyBasetenConfig,
  applyKimiCodeConfig,
  applyMinimaxApiConfig,
  applyMinimaxConfig,
@ -20,6 +21,7 @@ import {
  applyXiaomiConfig,
  applyZaiConfig,
  setAnthropicApiKey,
+  setBasetenApiKey,
  setGeminiApiKey,
  setKimiCodeApiKey,
  setMinimaxApiKey,
@ -330,6 +332,25 @@ export async function applyNonInteractiveAuthChoice(params: {
    return applyVeniceConfig(nextConfig);
  }

+  if (authChoice === "baseten-api-key") {
+    const resolved = await resolveNonInteractiveApiKey({
+      provider: "baseten",
+      cfg: baseConfig,
+      flagValue: opts.basetenApiKey,
+      flagName: "--baseten-api-key",
+      envVar: "BASETEN_API_KEY",
+      runtime,
+    });
+    if (!resolved) return null;
+    if (resolved.source !== "profile") await setBasetenApiKey(resolved.key);
+    nextConfig = applyAuthProfileConfig(nextConfig, {
+      profileId: "baseten:default",
+      provider: "baseten",
+      mode: "api_key",
+    });
+    return applyBasetenConfig(nextConfig);
+  }
+
  if (
    authChoice === "minimax-cloud" ||
    authChoice === "minimax-api" ||
--- a/src/commands/onboard-types.ts
+++ b/src/commands/onboard-types.ts
@ -12,6 +12,7 @@ export type AuthChoice =
  | "openai-codex"
  | "openai-api-key"
  | "openrouter-api-key"
+  | "baseten-api-key"
  | "ai-gateway-api-key"
  | "moonshot-api-key"
  | "kimi-code-api-key"
@ -63,6 +64,7 @@ export type OnboardOptions = {
  anthropicApiKey?: string;
  openaiApiKey?: string;
  openrouterApiKey?: string;
+  basetenApiKey?: string;
  aiGatewayApiKey?: string;
  moonshotApiKey?: string;
  kimiCodeApiKey?: string;