feat: enhance Bedrock model discovery with inference profiles support

This commit is contained in:
devmangel 2026-01-30 11:35:25 -05:00
parent 4583f88626
commit 6df9b8f671
3 changed files with 131 additions and 6 deletions

View File

@ -21,7 +21,8 @@ not an API key.
If AWS credentials are detected, Moltbot can automatically discover Bedrock If AWS credentials are detected, Moltbot can automatically discover Bedrock
models that support **streaming** and **text output**. Discovery uses models that support **streaming** and **text output**. Discovery uses
`bedrock:ListFoundationModels` and is cached (default: 1 hour). `bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles`, and is
cached (default: 1 hour).
Config options live under `models.bedrockDiscovery`: Config options live under `models.bedrockDiscovery`:
@ -34,7 +35,8 @@ Config options live under `models.bedrockDiscovery`:
providerFilter: ["anthropic", "amazon"], providerFilter: ["anthropic", "amazon"],
refreshInterval: 3600, refreshInterval: 3600,
defaultContextWindow: 32000, defaultContextWindow: 32000,
defaultMaxTokens: 4096 defaultMaxTokens: 4096,
includeInferenceProfiles: true
} }
} }
} }
@ -47,6 +49,46 @@ Notes:
- `refreshInterval` is seconds; set to `0` to disable caching. - `refreshInterval` is seconds; set to `0` to disable caching.
- `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`) - `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`)
are used for discovered models (override if you know your model limits). are used for discovered models (override if you know your model limits).
- `includeInferenceProfiles` (default: `true`) enables discovery of inference profiles.
### Inference Profiles
AWS Bedrock **inference profiles** provide cross-region routing and improved
availability for foundation models. When you use an inference profile ID
(like `us.anthropic.claude-opus-4-5-20251101-v1:0`), Bedrock can automatically
route your request to the best available region.
**Benefits:**
- **Higher availability**: Requests route to healthy regions automatically
- **Lower latency**: Traffic goes to the nearest available region
- **Same API**: Works identically to base model IDs
**Discovery behavior:**
- Moltbot discovers both base models and inference profiles by default
- Inference profiles inherit metadata (context window, reasoning support, etc.) from their base model
- Both appear in `moltbot models list` output
- Use either ID interchangeably in your configuration
**Example:**
```bash
# List all discovered models (includes inference profiles)
moltbot models list
# Use an inference profile in config
moltbot config set agents.defaults.model.primary \
"amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0"
```
To disable inference profile discovery:
```json5
{
models: {
bedrockDiscovery: {
includeInferenceProfiles: false
}
}
}
```
## Setup (manual) ## Setup (manual)
@ -161,12 +203,15 @@ moltbot models list
## Notes ## Notes
- Bedrock requires **model access** enabled in your AWS account/region. - Bedrock requires **model access** enabled in your AWS account/region.
- Automatic discovery needs the `bedrock:ListFoundationModels` permission. - Automatic discovery needs the `bedrock:ListFoundationModels` and
`bedrock:ListInferenceProfiles` permissions.
- If you use profiles, set `AWS_PROFILE` on the gateway host. - If you use profiles, set `AWS_PROFILE` on the gateway host.
- Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`, - Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`,
then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the
default AWS SDK chain. default AWS SDK chain.
- Reasoning support depends on the model; check the Bedrock model card for - Reasoning support depends on the model; check the Bedrock model card for
current capabilities. current capabilities.
- Inference profiles use the same authentication and permissions as base models.
- If inference profile discovery fails, base model discovery continues normally.
- If you prefer a managed key flow, you can also place an OpenAIcompatible - If you prefer a managed key flow, you can also place an OpenAIcompatible
proxy in front of Bedrock and configure it as an OpenAI provider instead. proxy in front of Bedrock and configure it as an OpenAI provider instead.

View File

@ -1,7 +1,9 @@
import { import {
BedrockClient, BedrockClient,
ListFoundationModelsCommand, ListFoundationModelsCommand,
ListInferenceProfilesCommand,
type ListFoundationModelsCommandOutput, type ListFoundationModelsCommandOutput,
type ListInferenceProfilesCommandOutput,
} from "@aws-sdk/client-bedrock"; } from "@aws-sdk/client-bedrock";
import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js"; import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js";
@ -18,6 +20,10 @@ const DEFAULT_COST = {
type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number]; type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];
type InferenceProfileSummary = NonNullable<
ListInferenceProfilesCommandOutput["inferenceProfileSummaries"]
>[number];
type BedrockDiscoveryCacheEntry = { type BedrockDiscoveryCacheEntry = {
expiresAt: number; expiresAt: number;
value?: ModelDefinitionConfig[]; value?: ModelDefinitionConfig[];
@ -26,6 +32,7 @@ type BedrockDiscoveryCacheEntry = {
const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>(); const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
let hasLoggedBedrockError = false; let hasLoggedBedrockError = false;
let hasLoggedInferenceProfileError = false;
function normalizeProviderFilter(filter?: string[]): string[] { function normalizeProviderFilter(filter?: string[]): string[] {
if (!filter || filter.length === 0) return []; if (!filter || filter.length === 0) return [];
@ -41,6 +48,7 @@ function buildCacheKey(params: {
refreshIntervalSeconds: number; refreshIntervalSeconds: number;
defaultContextWindow: number; defaultContextWindow: number;
defaultMaxTokens: number; defaultMaxTokens: number;
includeInferenceProfiles: boolean;
}): string { }): string {
return JSON.stringify(params); return JSON.stringify(params);
} }
@ -119,6 +127,63 @@ function toModelDefinition(
export function resetBedrockDiscoveryCacheForTest(): void { export function resetBedrockDiscoveryCacheForTest(): void {
discoveryCache.clear(); discoveryCache.clear();
hasLoggedBedrockError = false; hasLoggedBedrockError = false;
hasLoggedInferenceProfileError = false;
}
async function discoverInferenceProfiles(params: {
client: BedrockClient;
baseModelsMap: Map<string, ModelDefinitionConfig>;
providerFilter: string[];
}): Promise<ModelDefinitionConfig[]> {
try {
const response = await params.client.send(new ListInferenceProfilesCommand({}));
const profiles: ModelDefinitionConfig[] = [];
for (const profile of response.inferenceProfileSummaries ?? []) {
const profileId = profile.inferenceProfileId?.trim();
if (!profileId) continue;
const status = profile.status?.toUpperCase();
if (status !== "ACTIVE") continue;
const baseModelId = (() => {
const modelRef = profile.models?.[0];
if (!modelRef) return undefined;
return (modelRef as { modelArn?: string }).modelArn?.split("/")?.[1]?.trim();
})();
if (!baseModelId) continue;
const baseModel = params.baseModelsMap.get(baseModelId);
if (!baseModel) continue;
if (params.providerFilter.length > 0) {
const providerName = baseModelId.split(".")[0]?.trim().toLowerCase();
if (!providerName || !params.providerFilter.includes(providerName)) {
continue;
}
}
const profileName = profile.inferenceProfileName?.trim() || profileId;
profiles.push({
id: profileId,
name: profileName,
reasoning: baseModel.reasoning,
input: baseModel.input,
cost: baseModel.cost,
contextWindow: baseModel.contextWindow,
maxTokens: baseModel.maxTokens,
});
}
return profiles.sort((a, b) => a.name.localeCompare(b.name));
} catch (error) {
if (!hasLoggedInferenceProfileError) {
hasLoggedInferenceProfileError = true;
console.warn(`[bedrock-discovery] Failed to list inference profiles: ${String(error)}`);
}
return [];
}
} }
export async function discoverBedrockModels(params: { export async function discoverBedrockModels(params: {
@ -134,12 +199,14 @@ export async function discoverBedrockModels(params: {
const providerFilter = normalizeProviderFilter(params.config?.providerFilter); const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
const defaultContextWindow = resolveDefaultContextWindow(params.config); const defaultContextWindow = resolveDefaultContextWindow(params.config);
const defaultMaxTokens = resolveDefaultMaxTokens(params.config); const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
const includeInferenceProfiles = params.config?.includeInferenceProfiles !== false;
const cacheKey = buildCacheKey({ const cacheKey = buildCacheKey({
region: params.region, region: params.region,
providerFilter, providerFilter,
refreshIntervalSeconds, refreshIntervalSeconds,
defaultContextWindow, defaultContextWindow,
defaultMaxTokens, defaultMaxTokens,
includeInferenceProfiles,
}); });
const now = params.now?.() ?? Date.now(); const now = params.now?.() ?? Date.now();
@ -158,17 +225,29 @@ export async function discoverBedrockModels(params: {
const discoveryPromise = (async () => { const discoveryPromise = (async () => {
const response = await client.send(new ListFoundationModelsCommand({})); const response = await client.send(new ListFoundationModelsCommand({}));
const discovered: ModelDefinitionConfig[] = []; const baseModels: ModelDefinitionConfig[] = [];
for (const summary of response.modelSummaries ?? []) { for (const summary of response.modelSummaries ?? []) {
if (!shouldIncludeSummary(summary, providerFilter)) continue; if (!shouldIncludeSummary(summary, providerFilter)) continue;
discovered.push( baseModels.push(
toModelDefinition(summary, { toModelDefinition(summary, {
contextWindow: defaultContextWindow, contextWindow: defaultContextWindow,
maxTokens: defaultMaxTokens, maxTokens: defaultMaxTokens,
}), }),
); );
} }
return discovered.sort((a, b) => a.name.localeCompare(b.name));
const baseModelsMap = new Map(baseModels.map((model) => [model.id, model]));
const profiles = includeInferenceProfiles
? await discoverInferenceProfiles({
client,
baseModelsMap,
providerFilter,
})
: [];
const combined = [...baseModels, ...profiles];
return combined.sort((a, b) => a.name.localeCompare(b.name));
})(); })();
if (refreshIntervalSeconds > 0) { if (refreshIntervalSeconds > 0) {

View File

@ -50,6 +50,7 @@ export type BedrockDiscoveryConfig = {
refreshInterval?: number; refreshInterval?: number;
defaultContextWindow?: number; defaultContextWindow?: number;
defaultMaxTokens?: number; defaultMaxTokens?: number;
includeInferenceProfiles?: boolean;
}; };
export type ModelsConfig = { export type ModelsConfig = {