feat: enhance Bedrock model discovery with inference profiles support

2026-01-30 11:35:25 -05:00 · 2026-01-30 11:35:25 -05:00 · 6df9b8f671
commit 6df9b8f671
parent 4583f88626
3 changed files with 131 additions and 6 deletions
--- a/docs/bedrock.md
+++ b/docs/bedrock.md
@ -21,7 +21,8 @@ not an API key.
 If AWS credentials are detected, Moltbot can automatically discover Bedrock
 models that support **streaming** and **text output**. Discovery uses
-`bedrock:ListFoundationModels` and is cached (default: 1 hour).
+`bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles`, and is 
 cached (default: 1 hour).
 Config options live under `models.bedrockDiscovery`:
@ -34,7 +35,8 @@ Config options live under `models.bedrockDiscovery`:
      providerFilter: ["anthropic", "amazon"],
      refreshInterval: 3600,
      defaultContextWindow: 32000,
-      defaultMaxTokens: 4096
+      defaultMaxTokens: 4096,
      includeInferenceProfiles: true
    }
  }
 }
@ -47,6 +49,46 @@ Notes:
 - `refreshInterval` is seconds; set to `0` to disable caching.
 - `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`)
  are used for discovered models (override if you know your model limits).
 - `includeInferenceProfiles` (default: `true`) enables discovery of inference profiles.
 ### Inference Profiles
 AWS Bedrock **inference profiles** provide cross-region routing and improved
 availability for foundation models. When you use an inference profile ID
 (like `us.anthropic.claude-opus-4-5-20251101-v1:0`), Bedrock can automatically
 route your request to the best available region.
 **Benefits:**
 - **Higher availability**: Requests route to healthy regions automatically
 - **Lower latency**: Traffic goes to the nearest available region
 - **Same API**: Works identically to base model IDs
 **Discovery behavior:**
 - Moltbot discovers both base models and inference profiles by default
 - Inference profiles inherit metadata (context window, reasoning support, etc.) from their base model
 - Both appear in `moltbot models list` output
 - Use either ID interchangeably in your configuration
 **Example:**
 ```bash
 # List all discovered models (includes inference profiles)
 moltbot models list
 # Use an inference profile in config
 moltbot config set agents.defaults.model.primary \
  "amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0"
 ```
 To disable inference profile discovery:
 ```json5
 {
  models: {
    bedrockDiscovery: {
      includeInferenceProfiles: false
    }
  }
 }
 ```
 ## Setup (manual)
@ -161,12 +203,15 @@ moltbot models list
 ## Notes
 - Bedrock requires **model access** enabled in your AWS account/region.
- Automatic discovery needs the `bedrock:ListFoundationModels` permission.
+- Automatic discovery needs the `bedrock:ListFoundationModels` and 
  `bedrock:ListInferenceProfiles` permissions.
 - If you use profiles, set `AWS_PROFILE` on the gateway host.
 - Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`,
  then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the
  default AWS SDK chain.
 - Reasoning support depends on the model; check the Bedrock model card for
  current capabilities.
 - Inference profiles use the same authentication and permissions as base models.
 - If inference profile discovery fails, base model discovery continues normally.
 - If you prefer a managed key flow, you can also place an OpenAI‑compatible
  proxy in front of Bedrock and configure it as an OpenAI provider instead.
--- a/src/agents/bedrock-discovery.ts
+++ b/src/agents/bedrock-discovery.ts
@ -1,7 +1,9 @@
 import {
  BedrockClient,
  ListFoundationModelsCommand,
  ListInferenceProfilesCommand,
  type ListFoundationModelsCommandOutput,
  type ListInferenceProfilesCommandOutput,
 } from "@aws-sdk/client-bedrock";
 import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js";
@ -18,6 +20,10 @@ const DEFAULT_COST = {
 type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];
 type InferenceProfileSummary = NonNullable<
  ListInferenceProfilesCommandOutput["inferenceProfileSummaries"]
 >[number];
 type BedrockDiscoveryCacheEntry = {
  expiresAt: number;
  value?: ModelDefinitionConfig[];
@ -26,6 +32,7 @@ type BedrockDiscoveryCacheEntry = {
 const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
 let hasLoggedBedrockError = false;
 let hasLoggedInferenceProfileError = false;
 function normalizeProviderFilter(filter?: string[]): string[] {
  if (!filter || filter.length === 0) return [];
@ -41,6 +48,7 @@ function buildCacheKey(params: {
  refreshIntervalSeconds: number;
  defaultContextWindow: number;
  defaultMaxTokens: number;
  includeInferenceProfiles: boolean;
 }): string {
  return JSON.stringify(params);
 }
@ -119,6 +127,63 @@ function toModelDefinition(
 export function resetBedrockDiscoveryCacheForTest(): void {
  discoveryCache.clear();
  hasLoggedBedrockError = false;
  hasLoggedInferenceProfileError = false;
 }
 async function discoverInferenceProfiles(params: {
  client: BedrockClient;
  baseModelsMap: Map<string, ModelDefinitionConfig>;
  providerFilter: string[];
 }): Promise<ModelDefinitionConfig[]> {
  try {
    const response = await params.client.send(new ListInferenceProfilesCommand({}));
    const profiles: ModelDefinitionConfig[] = [];
    for (const profile of response.inferenceProfileSummaries ?? []) {
      const profileId = profile.inferenceProfileId?.trim();
      if (!profileId) continue;
      const status = profile.status?.toUpperCase();
      if (status !== "ACTIVE") continue;
      const baseModelId = (() => {
        const modelRef = profile.models?.[0];
        if (!modelRef) return undefined;
        return (modelRef as { modelArn?: string }).modelArn?.split("/")?.[1]?.trim();
      })();
      if (!baseModelId) continue;
      const baseModel = params.baseModelsMap.get(baseModelId);
      if (!baseModel) continue;
      if (params.providerFilter.length > 0) {
        const providerName = baseModelId.split(".")[0]?.trim().toLowerCase();
        if (!providerName || !params.providerFilter.includes(providerName)) {
          continue;
        }
      }
      const profileName = profile.inferenceProfileName?.trim() || profileId;
      profiles.push({
        id: profileId,
        name: profileName,
        reasoning: baseModel.reasoning,
        input: baseModel.input,
        cost: baseModel.cost,
        contextWindow: baseModel.contextWindow,
        maxTokens: baseModel.maxTokens,
      });
    }
    return profiles.sort((a, b) => a.name.localeCompare(b.name));
  } catch (error) {
    if (!hasLoggedInferenceProfileError) {
      hasLoggedInferenceProfileError = true;
      console.warn(`[bedrock-discovery] Failed to list inference profiles: ${String(error)}`);
    }
    return [];
  }
 }
 export async function discoverBedrockModels(params: {
@ -134,12 +199,14 @@ export async function discoverBedrockModels(params: {
  const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
  const defaultContextWindow = resolveDefaultContextWindow(params.config);
  const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
  const includeInferenceProfiles = params.config?.includeInferenceProfiles !== false;
  const cacheKey = buildCacheKey({
    region: params.region,
    providerFilter,
    refreshIntervalSeconds,
    defaultContextWindow,
    defaultMaxTokens,
    includeInferenceProfiles,
  });
  const now = params.now?.() ?? Date.now();
@ -158,17 +225,29 @@ export async function discoverBedrockModels(params: {
  const discoveryPromise = (async () => {
    const response = await client.send(new ListFoundationModelsCommand({}));
-    const discovered: ModelDefinitionConfig[] = [];
+    const baseModels: ModelDefinitionConfig[] = [];
    for (const summary of response.modelSummaries ?? []) {
      if (!shouldIncludeSummary(summary, providerFilter)) continue;
-      discovered.push(
+      baseModels.push(
        toModelDefinition(summary, {
          contextWindow: defaultContextWindow,
          maxTokens: defaultMaxTokens,
        }),
      );
    }
-    return discovered.sort((a, b) => a.name.localeCompare(b.name));
+
    const baseModelsMap = new Map(baseModels.map((model) => [model.id, model]));
    const profiles = includeInferenceProfiles
      ? await discoverInferenceProfiles({
          client,
          baseModelsMap,
          providerFilter,
        })
      : [];
    const combined = [...baseModels, ...profiles];
    return combined.sort((a, b) => a.name.localeCompare(b.name));
  })();
  if (refreshIntervalSeconds > 0) {
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@ -50,6 +50,7 @@ export type BedrockDiscoveryConfig = {
  refreshInterval?: number;
  defaultContextWindow?: number;
  defaultMaxTokens?: number;
  includeInferenceProfiles?: boolean;
 };
 export type ModelsConfig = {