From 6df9b8f6713dc565ce93eee7514204c859d93ff8 Mon Sep 17 00:00:00 2001
From: devmangel <lmiguell0viedo@gmail.com>
Date: Fri, 30 Jan 2026 11:35:25 -0500
Subject: [PATCH] feat: enhance Bedrock model discovery with inference profiles
 support

---
 docs/bedrock.md                 | 51 ++++++++++++++++++--
 src/agents/bedrock-discovery.ts | 85 +++++++++++++++++++++++++++++++--
 src/config/types.models.ts      |  1 +
 3 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/docs/bedrock.md b/docs/bedrock.md
index 5bd8ef6e6..e75eb0651 100644
--- a/docs/bedrock.md
+++ b/docs/bedrock.md
@@ -21,7 +21,8 @@ not an API key.
 
 If AWS credentials are detected, Moltbot can automatically discover Bedrock
 models that support **streaming** and **text output**. Discovery uses
-`bedrock:ListFoundationModels` and is cached (default: 1 hour).
+`bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles`, and is 
+cached (default: 1 hour).
 
 Config options live under `models.bedrockDiscovery`:
 
@@ -34,7 +35,8 @@ Config options live under `models.bedrockDiscovery`:
       providerFilter: ["anthropic", "amazon"],
       refreshInterval: 3600,
       defaultContextWindow: 32000,
-      defaultMaxTokens: 4096
+      defaultMaxTokens: 4096,
+      includeInferenceProfiles: true
     }
   }
 }
@@ -47,6 +49,46 @@ Notes:
 - `refreshInterval` is seconds; set to `0` to disable caching.
 - `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`)
   are used for discovered models (override if you know your model limits).
+- `includeInferenceProfiles` (default: `true`) enables discovery of inference profiles.
+
+### Inference Profiles
+
+AWS Bedrock **inference profiles** provide cross-region routing and improved
+availability for foundation models. When you use an inference profile ID
+(like `us.anthropic.claude-opus-4-5-20251101-v1:0`), Bedrock can automatically
+route your request to the best available region.
+
+**Benefits:**
+- **Higher availability**: Requests route to healthy regions automatically
+- **Lower latency**: Traffic goes to the nearest available region
+- **Same API**: Works identically to base model IDs
+
+**Discovery behavior:**
+- Moltbot discovers both base models and inference profiles by default
+- Inference profiles inherit metadata (context window, reasoning support, etc.) from their base model
+- Both appear in `moltbot models list` output
+- Use either ID interchangeably in your configuration
+
+**Example:**
+```bash
+# List all discovered models (includes inference profiles)
+moltbot models list
+
+# Use an inference profile in config
+moltbot config set agents.defaults.model.primary \
+  "amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0"
+```
+
+To disable inference profile discovery:
+```json5
+{
+  models: {
+    bedrockDiscovery: {
+      includeInferenceProfiles: false
+    }
+  }
+}
+```
 
 ## Setup (manual)
 
@@ -161,12 +203,15 @@ moltbot models list
 ## Notes
 
 - Bedrock requires **model access** enabled in your AWS account/region.
-- Automatic discovery needs the `bedrock:ListFoundationModels` permission.
+- Automatic discovery needs the `bedrock:ListFoundationModels` and 
+  `bedrock:ListInferenceProfiles` permissions.
 - If you use profiles, set `AWS_PROFILE` on the gateway host.
 - Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`,
   then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the
   default AWS SDK chain.
 - Reasoning support depends on the model; check the Bedrock model card for
   current capabilities.
+- Inference profiles use the same authentication and permissions as base models.
+- If inference profile discovery fails, base model discovery continues normally.
 - If you prefer a managed key flow, you can also place an OpenAI‑compatible
   proxy in front of Bedrock and configure it as an OpenAI provider instead.
diff --git a/src/agents/bedrock-discovery.ts b/src/agents/bedrock-discovery.ts
index 3b42d0081..aa3efcf20 100644
--- a/src/agents/bedrock-discovery.ts
+++ b/src/agents/bedrock-discovery.ts
@@ -1,7 +1,9 @@
 import {
   BedrockClient,
   ListFoundationModelsCommand,
+  ListInferenceProfilesCommand,
   type ListFoundationModelsCommandOutput,
+  type ListInferenceProfilesCommandOutput,
 } from "@aws-sdk/client-bedrock";
 
 import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js";
@@ -18,6 +20,10 @@ const DEFAULT_COST = {
 
 type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];
 
+type InferenceProfileSummary = NonNullable<
+  ListInferenceProfilesCommandOutput["inferenceProfileSummaries"]
+>[number];
+
 type BedrockDiscoveryCacheEntry = {
   expiresAt: number;
   value?: ModelDefinitionConfig[];
@@ -26,6 +32,7 @@ type BedrockDiscoveryCacheEntry = {
 
 const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
 let hasLoggedBedrockError = false;
+let hasLoggedInferenceProfileError = false;
 
 function normalizeProviderFilter(filter?: string[]): string[] {
   if (!filter || filter.length === 0) return [];
@@ -41,6 +48,7 @@ function buildCacheKey(params: {
   refreshIntervalSeconds: number;
   defaultContextWindow: number;
   defaultMaxTokens: number;
+  includeInferenceProfiles: boolean;
 }): string {
   return JSON.stringify(params);
 }
@@ -119,6 +127,63 @@ function toModelDefinition(
 export function resetBedrockDiscoveryCacheForTest(): void {
   discoveryCache.clear();
   hasLoggedBedrockError = false;
+  hasLoggedInferenceProfileError = false;
+}
+
+async function discoverInferenceProfiles(params: {
+  client: BedrockClient;
+  baseModelsMap: Map<string, ModelDefinitionConfig>;
+  providerFilter: string[];
+}): Promise<ModelDefinitionConfig[]> {
+  try {
+    const response = await params.client.send(new ListInferenceProfilesCommand({}));
+    const profiles: ModelDefinitionConfig[] = [];
+
+    for (const profile of response.inferenceProfileSummaries ?? []) {
+      const profileId = profile.inferenceProfileId?.trim();
+      if (!profileId) continue;
+
+      const status = profile.status?.toUpperCase();
+      if (status !== "ACTIVE") continue;
+
+      const baseModelId = (() => {
+        const modelRef = profile.models?.[0];
+        if (!modelRef) return undefined;
+        return (modelRef as { modelArn?: string }).modelArn?.split("/")?.[1]?.trim();
+      })();
+      if (!baseModelId) continue;
+
+      const baseModel = params.baseModelsMap.get(baseModelId);
+      if (!baseModel) continue;
+
+      if (params.providerFilter.length > 0) {
+        const providerName = baseModelId.split(".")[0]?.trim().toLowerCase();
+        if (!providerName || !params.providerFilter.includes(providerName)) {
+          continue;
+        }
+      }
+
+      const profileName = profile.inferenceProfileName?.trim() || profileId;
+
+      profiles.push({
+        id: profileId,
+        name: profileName,
+        reasoning: baseModel.reasoning,
+        input: baseModel.input,
+        cost: baseModel.cost,
+        contextWindow: baseModel.contextWindow,
+        maxTokens: baseModel.maxTokens,
+      });
+    }
+
+    return profiles.sort((a, b) => a.name.localeCompare(b.name));
+  } catch (error) {
+    if (!hasLoggedInferenceProfileError) {
+      hasLoggedInferenceProfileError = true;
+      console.warn(`[bedrock-discovery] Failed to list inference profiles: ${String(error)}`);
+    }
+    return [];
+  }
 }
 
 export async function discoverBedrockModels(params: {
@@ -134,12 +199,14 @@ export async function discoverBedrockModels(params: {
   const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
   const defaultContextWindow = resolveDefaultContextWindow(params.config);
   const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
+  const includeInferenceProfiles = params.config?.includeInferenceProfiles !== false;
   const cacheKey = buildCacheKey({
     region: params.region,
     providerFilter,
     refreshIntervalSeconds,
     defaultContextWindow,
     defaultMaxTokens,
+    includeInferenceProfiles,
   });
   const now = params.now?.() ?? Date.now();
 
@@ -158,17 +225,29 @@ export async function discoverBedrockModels(params: {
 
   const discoveryPromise = (async () => {
     const response = await client.send(new ListFoundationModelsCommand({}));
-    const discovered: ModelDefinitionConfig[] = [];
+    const baseModels: ModelDefinitionConfig[] = [];
     for (const summary of response.modelSummaries ?? []) {
       if (!shouldIncludeSummary(summary, providerFilter)) continue;
-      discovered.push(
+      baseModels.push(
         toModelDefinition(summary, {
           contextWindow: defaultContextWindow,
           maxTokens: defaultMaxTokens,
         }),
       );
     }
-    return discovered.sort((a, b) => a.name.localeCompare(b.name));
+
+    const baseModelsMap = new Map(baseModels.map((model) => [model.id, model]));
+
+    const profiles = includeInferenceProfiles
+      ? await discoverInferenceProfiles({
+          client,
+          baseModelsMap,
+          providerFilter,
+        })
+      : [];
+
+    const combined = [...baseModels, ...profiles];
+    return combined.sort((a, b) => a.name.localeCompare(b.name));
   })();
 
   if (refreshIntervalSeconds > 0) {
diff --git a/src/config/types.models.ts b/src/config/types.models.ts
index 11b6c64cb..e98c7808a 100644
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -50,6 +50,7 @@ export type BedrockDiscoveryConfig = {
   refreshInterval?: number;
   defaultContextWindow?: number;
   defaultMaxTokens?: number;
+  includeInferenceProfiles?: boolean;
 };
 
 export type ModelsConfig = {