From 6df9b8f6713dc565ce93eee7514204c859d93ff8 Mon Sep 17 00:00:00 2001 From: devmangel Date: Fri, 30 Jan 2026 11:35:25 -0500 Subject: [PATCH] feat: enhance Bedrock model discovery with inference profiles support --- docs/bedrock.md | 51 ++++++++++++++++++-- src/agents/bedrock-discovery.ts | 85 +++++++++++++++++++++++++++++++-- src/config/types.models.ts | 1 + 3 files changed, 131 insertions(+), 6 deletions(-) diff --git a/docs/bedrock.md b/docs/bedrock.md index 5bd8ef6e6..e75eb0651 100644 --- a/docs/bedrock.md +++ b/docs/bedrock.md @@ -21,7 +21,8 @@ not an API key. If AWS credentials are detected, Moltbot can automatically discover Bedrock models that support **streaming** and **text output**. Discovery uses -`bedrock:ListFoundationModels` and is cached (default: 1 hour). +`bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles`, and is +cached (default: 1 hour). Config options live under `models.bedrockDiscovery`: @@ -34,7 +35,8 @@ Config options live under `models.bedrockDiscovery`: providerFilter: ["anthropic", "amazon"], refreshInterval: 3600, defaultContextWindow: 32000, - defaultMaxTokens: 4096 + defaultMaxTokens: 4096, + includeInferenceProfiles: true } } } @@ -47,6 +49,46 @@ Notes: - `refreshInterval` is seconds; set to `0` to disable caching. - `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`) are used for discovered models (override if you know your model limits). +- `includeInferenceProfiles` (default: `true`) enables discovery of inference profiles. + +### Inference Profiles + +AWS Bedrock **inference profiles** provide cross-region routing and improved +availability for foundation models. When you use an inference profile ID +(like `us.anthropic.claude-opus-4-5-20251101-v1:0`), Bedrock can automatically +route your request to the best available region. + +**Benefits:** +- **Higher availability**: Requests route to healthy regions automatically +- **Lower latency**: Traffic goes to the nearest available region +- **Same API**: Works identically to base model IDs + +**Discovery behavior:** +- Moltbot discovers both base models and inference profiles by default +- Inference profiles inherit metadata (context window, reasoning support, etc.) from their base model +- Both appear in `moltbot models list` output +- Use either ID interchangeably in your configuration + +**Example:** +```bash +# List all discovered models (includes inference profiles) +moltbot models list + +# Use an inference profile in config +moltbot config set agents.defaults.model.primary \ + "amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0" +``` + +To disable inference profile discovery: +```json5 +{ + models: { + bedrockDiscovery: { + includeInferenceProfiles: false + } + } +} +``` ## Setup (manual) @@ -161,12 +203,15 @@ moltbot models list ## Notes - Bedrock requires **model access** enabled in your AWS account/region. -- Automatic discovery needs the `bedrock:ListFoundationModels` permission. +- Automatic discovery needs the `bedrock:ListFoundationModels` and + `bedrock:ListInferenceProfiles` permissions. - If you use profiles, set `AWS_PROFILE` on the gateway host. - Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`, then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the default AWS SDK chain. - Reasoning support depends on the model; check the Bedrock model card for current capabilities. +- Inference profiles use the same authentication and permissions as base models. +- If inference profile discovery fails, base model discovery continues normally. - If you prefer a managed key flow, you can also place an OpenAI‑compatible proxy in front of Bedrock and configure it as an OpenAI provider instead. diff --git a/src/agents/bedrock-discovery.ts b/src/agents/bedrock-discovery.ts index 3b42d0081..aa3efcf20 100644 --- a/src/agents/bedrock-discovery.ts +++ b/src/agents/bedrock-discovery.ts @@ -1,7 +1,9 @@ import { BedrockClient, ListFoundationModelsCommand, + ListInferenceProfilesCommand, type ListFoundationModelsCommandOutput, + type ListInferenceProfilesCommandOutput, } from "@aws-sdk/client-bedrock"; import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js"; @@ -18,6 +20,10 @@ const DEFAULT_COST = { type BedrockModelSummary = NonNullable[number]; +type InferenceProfileSummary = NonNullable< + ListInferenceProfilesCommandOutput["inferenceProfileSummaries"] +>[number]; + type BedrockDiscoveryCacheEntry = { expiresAt: number; value?: ModelDefinitionConfig[]; @@ -26,6 +32,7 @@ type BedrockDiscoveryCacheEntry = { const discoveryCache = new Map(); let hasLoggedBedrockError = false; +let hasLoggedInferenceProfileError = false; function normalizeProviderFilter(filter?: string[]): string[] { if (!filter || filter.length === 0) return []; @@ -41,6 +48,7 @@ function buildCacheKey(params: { refreshIntervalSeconds: number; defaultContextWindow: number; defaultMaxTokens: number; + includeInferenceProfiles: boolean; }): string { return JSON.stringify(params); } @@ -119,6 +127,63 @@ function toModelDefinition( export function resetBedrockDiscoveryCacheForTest(): void { discoveryCache.clear(); hasLoggedBedrockError = false; + hasLoggedInferenceProfileError = false; +} + +async function discoverInferenceProfiles(params: { + client: BedrockClient; + baseModelsMap: Map; + providerFilter: string[]; +}): Promise { + try { + const response = await params.client.send(new ListInferenceProfilesCommand({})); + const profiles: ModelDefinitionConfig[] = []; + + for (const profile of response.inferenceProfileSummaries ?? []) { + const profileId = profile.inferenceProfileId?.trim(); + if (!profileId) continue; + + const status = profile.status?.toUpperCase(); + if (status !== "ACTIVE") continue; + + const baseModelId = (() => { + const modelRef = profile.models?.[0]; + if (!modelRef) return undefined; + return (modelRef as { modelArn?: string }).modelArn?.split("/")?.[1]?.trim(); + })(); + if (!baseModelId) continue; + + const baseModel = params.baseModelsMap.get(baseModelId); + if (!baseModel) continue; + + if (params.providerFilter.length > 0) { + const providerName = baseModelId.split(".")[0]?.trim().toLowerCase(); + if (!providerName || !params.providerFilter.includes(providerName)) { + continue; + } + } + + const profileName = profile.inferenceProfileName?.trim() || profileId; + + profiles.push({ + id: profileId, + name: profileName, + reasoning: baseModel.reasoning, + input: baseModel.input, + cost: baseModel.cost, + contextWindow: baseModel.contextWindow, + maxTokens: baseModel.maxTokens, + }); + } + + return profiles.sort((a, b) => a.name.localeCompare(b.name)); + } catch (error) { + if (!hasLoggedInferenceProfileError) { + hasLoggedInferenceProfileError = true; + console.warn(`[bedrock-discovery] Failed to list inference profiles: ${String(error)}`); + } + return []; + } } export async function discoverBedrockModels(params: { @@ -134,12 +199,14 @@ export async function discoverBedrockModels(params: { const providerFilter = normalizeProviderFilter(params.config?.providerFilter); const defaultContextWindow = resolveDefaultContextWindow(params.config); const defaultMaxTokens = resolveDefaultMaxTokens(params.config); + const includeInferenceProfiles = params.config?.includeInferenceProfiles !== false; const cacheKey = buildCacheKey({ region: params.region, providerFilter, refreshIntervalSeconds, defaultContextWindow, defaultMaxTokens, + includeInferenceProfiles, }); const now = params.now?.() ?? Date.now(); @@ -158,17 +225,29 @@ export async function discoverBedrockModels(params: { const discoveryPromise = (async () => { const response = await client.send(new ListFoundationModelsCommand({})); - const discovered: ModelDefinitionConfig[] = []; + const baseModels: ModelDefinitionConfig[] = []; for (const summary of response.modelSummaries ?? []) { if (!shouldIncludeSummary(summary, providerFilter)) continue; - discovered.push( + baseModels.push( toModelDefinition(summary, { contextWindow: defaultContextWindow, maxTokens: defaultMaxTokens, }), ); } - return discovered.sort((a, b) => a.name.localeCompare(b.name)); + + const baseModelsMap = new Map(baseModels.map((model) => [model.id, model])); + + const profiles = includeInferenceProfiles + ? await discoverInferenceProfiles({ + client, + baseModelsMap, + providerFilter, + }) + : []; + + const combined = [...baseModels, ...profiles]; + return combined.sort((a, b) => a.name.localeCompare(b.name)); })(); if (refreshIntervalSeconds > 0) { diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 11b6c64cb..e98c7808a 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -50,6 +50,7 @@ export type BedrockDiscoveryConfig = { refreshInterval?: number; defaultContextWindow?: number; defaultMaxTokens?: number; + includeInferenceProfiles?: boolean; }; export type ModelsConfig = {