feat: enhance Bedrock model discovery with inference profiles support
This commit is contained in:
parent
4583f88626
commit
6df9b8f671
@ -21,7 +21,8 @@ not an API key.
|
|||||||
|
|
||||||
If AWS credentials are detected, Moltbot can automatically discover Bedrock
|
If AWS credentials are detected, Moltbot can automatically discover Bedrock
|
||||||
models that support **streaming** and **text output**. Discovery uses
|
models that support **streaming** and **text output**. Discovery uses
|
||||||
`bedrock:ListFoundationModels` and is cached (default: 1 hour).
|
`bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles`, and is
|
||||||
|
cached (default: 1 hour).
|
||||||
|
|
||||||
Config options live under `models.bedrockDiscovery`:
|
Config options live under `models.bedrockDiscovery`:
|
||||||
|
|
||||||
@ -34,7 +35,8 @@ Config options live under `models.bedrockDiscovery`:
|
|||||||
providerFilter: ["anthropic", "amazon"],
|
providerFilter: ["anthropic", "amazon"],
|
||||||
refreshInterval: 3600,
|
refreshInterval: 3600,
|
||||||
defaultContextWindow: 32000,
|
defaultContextWindow: 32000,
|
||||||
defaultMaxTokens: 4096
|
defaultMaxTokens: 4096,
|
||||||
|
includeInferenceProfiles: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -47,6 +49,46 @@ Notes:
|
|||||||
- `refreshInterval` is seconds; set to `0` to disable caching.
|
- `refreshInterval` is seconds; set to `0` to disable caching.
|
||||||
- `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`)
|
- `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`)
|
||||||
are used for discovered models (override if you know your model limits).
|
are used for discovered models (override if you know your model limits).
|
||||||
|
- `includeInferenceProfiles` (default: `true`) enables discovery of inference profiles.
|
||||||
|
|
||||||
|
### Inference Profiles
|
||||||
|
|
||||||
|
AWS Bedrock **inference profiles** provide cross-region routing and improved
|
||||||
|
availability for foundation models. When you use an inference profile ID
|
||||||
|
(like `us.anthropic.claude-opus-4-5-20251101-v1:0`), Bedrock can automatically
|
||||||
|
route your request to the best available region.
|
||||||
|
|
||||||
|
**Benefits:**
|
||||||
|
- **Higher availability**: Requests route to healthy regions automatically
|
||||||
|
- **Lower latency**: Traffic goes to the nearest available region
|
||||||
|
- **Same API**: Works identically to base model IDs
|
||||||
|
|
||||||
|
**Discovery behavior:**
|
||||||
|
- Moltbot discovers both base models and inference profiles by default
|
||||||
|
- Inference profiles inherit metadata (context window, reasoning support, etc.) from their base model
|
||||||
|
- Both appear in `moltbot models list` output
|
||||||
|
- Use either ID interchangeably in your configuration
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
# List all discovered models (includes inference profiles)
|
||||||
|
moltbot models list
|
||||||
|
|
||||||
|
# Use an inference profile in config
|
||||||
|
moltbot config set agents.defaults.model.primary \
|
||||||
|
"amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0"
|
||||||
|
```
|
||||||
|
|
||||||
|
To disable inference profile discovery:
|
||||||
|
```json5
|
||||||
|
{
|
||||||
|
models: {
|
||||||
|
bedrockDiscovery: {
|
||||||
|
includeInferenceProfiles: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Setup (manual)
|
## Setup (manual)
|
||||||
|
|
||||||
@ -161,12 +203,15 @@ moltbot models list
|
|||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- Bedrock requires **model access** enabled in your AWS account/region.
|
- Bedrock requires **model access** enabled in your AWS account/region.
|
||||||
- Automatic discovery needs the `bedrock:ListFoundationModels` permission.
|
- Automatic discovery needs the `bedrock:ListFoundationModels` and
|
||||||
|
`bedrock:ListInferenceProfiles` permissions.
|
||||||
- If you use profiles, set `AWS_PROFILE` on the gateway host.
|
- If you use profiles, set `AWS_PROFILE` on the gateway host.
|
||||||
- Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`,
|
- Moltbot surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`,
|
||||||
then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the
|
then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the
|
||||||
default AWS SDK chain.
|
default AWS SDK chain.
|
||||||
- Reasoning support depends on the model; check the Bedrock model card for
|
- Reasoning support depends on the model; check the Bedrock model card for
|
||||||
current capabilities.
|
current capabilities.
|
||||||
|
- Inference profiles use the same authentication and permissions as base models.
|
||||||
|
- If inference profile discovery fails, base model discovery continues normally.
|
||||||
- If you prefer a managed key flow, you can also place an OpenAI‑compatible
|
- If you prefer a managed key flow, you can also place an OpenAI‑compatible
|
||||||
proxy in front of Bedrock and configure it as an OpenAI provider instead.
|
proxy in front of Bedrock and configure it as an OpenAI provider instead.
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
import {
|
import {
|
||||||
BedrockClient,
|
BedrockClient,
|
||||||
ListFoundationModelsCommand,
|
ListFoundationModelsCommand,
|
||||||
|
ListInferenceProfilesCommand,
|
||||||
type ListFoundationModelsCommandOutput,
|
type ListFoundationModelsCommandOutput,
|
||||||
|
type ListInferenceProfilesCommandOutput,
|
||||||
} from "@aws-sdk/client-bedrock";
|
} from "@aws-sdk/client-bedrock";
|
||||||
|
|
||||||
import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js";
|
import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js";
|
||||||
@ -18,6 +20,10 @@ const DEFAULT_COST = {
|
|||||||
|
|
||||||
type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];
|
type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];
|
||||||
|
|
||||||
|
type InferenceProfileSummary = NonNullable<
|
||||||
|
ListInferenceProfilesCommandOutput["inferenceProfileSummaries"]
|
||||||
|
>[number];
|
||||||
|
|
||||||
type BedrockDiscoveryCacheEntry = {
|
type BedrockDiscoveryCacheEntry = {
|
||||||
expiresAt: number;
|
expiresAt: number;
|
||||||
value?: ModelDefinitionConfig[];
|
value?: ModelDefinitionConfig[];
|
||||||
@ -26,6 +32,7 @@ type BedrockDiscoveryCacheEntry = {
|
|||||||
|
|
||||||
const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
|
const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
|
||||||
let hasLoggedBedrockError = false;
|
let hasLoggedBedrockError = false;
|
||||||
|
let hasLoggedInferenceProfileError = false;
|
||||||
|
|
||||||
function normalizeProviderFilter(filter?: string[]): string[] {
|
function normalizeProviderFilter(filter?: string[]): string[] {
|
||||||
if (!filter || filter.length === 0) return [];
|
if (!filter || filter.length === 0) return [];
|
||||||
@ -41,6 +48,7 @@ function buildCacheKey(params: {
|
|||||||
refreshIntervalSeconds: number;
|
refreshIntervalSeconds: number;
|
||||||
defaultContextWindow: number;
|
defaultContextWindow: number;
|
||||||
defaultMaxTokens: number;
|
defaultMaxTokens: number;
|
||||||
|
includeInferenceProfiles: boolean;
|
||||||
}): string {
|
}): string {
|
||||||
return JSON.stringify(params);
|
return JSON.stringify(params);
|
||||||
}
|
}
|
||||||
@ -119,6 +127,63 @@ function toModelDefinition(
|
|||||||
export function resetBedrockDiscoveryCacheForTest(): void {
|
export function resetBedrockDiscoveryCacheForTest(): void {
|
||||||
discoveryCache.clear();
|
discoveryCache.clear();
|
||||||
hasLoggedBedrockError = false;
|
hasLoggedBedrockError = false;
|
||||||
|
hasLoggedInferenceProfileError = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function discoverInferenceProfiles(params: {
|
||||||
|
client: BedrockClient;
|
||||||
|
baseModelsMap: Map<string, ModelDefinitionConfig>;
|
||||||
|
providerFilter: string[];
|
||||||
|
}): Promise<ModelDefinitionConfig[]> {
|
||||||
|
try {
|
||||||
|
const response = await params.client.send(new ListInferenceProfilesCommand({}));
|
||||||
|
const profiles: ModelDefinitionConfig[] = [];
|
||||||
|
|
||||||
|
for (const profile of response.inferenceProfileSummaries ?? []) {
|
||||||
|
const profileId = profile.inferenceProfileId?.trim();
|
||||||
|
if (!profileId) continue;
|
||||||
|
|
||||||
|
const status = profile.status?.toUpperCase();
|
||||||
|
if (status !== "ACTIVE") continue;
|
||||||
|
|
||||||
|
const baseModelId = (() => {
|
||||||
|
const modelRef = profile.models?.[0];
|
||||||
|
if (!modelRef) return undefined;
|
||||||
|
return (modelRef as { modelArn?: string }).modelArn?.split("/")?.[1]?.trim();
|
||||||
|
})();
|
||||||
|
if (!baseModelId) continue;
|
||||||
|
|
||||||
|
const baseModel = params.baseModelsMap.get(baseModelId);
|
||||||
|
if (!baseModel) continue;
|
||||||
|
|
||||||
|
if (params.providerFilter.length > 0) {
|
||||||
|
const providerName = baseModelId.split(".")[0]?.trim().toLowerCase();
|
||||||
|
if (!providerName || !params.providerFilter.includes(providerName)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const profileName = profile.inferenceProfileName?.trim() || profileId;
|
||||||
|
|
||||||
|
profiles.push({
|
||||||
|
id: profileId,
|
||||||
|
name: profileName,
|
||||||
|
reasoning: baseModel.reasoning,
|
||||||
|
input: baseModel.input,
|
||||||
|
cost: baseModel.cost,
|
||||||
|
contextWindow: baseModel.contextWindow,
|
||||||
|
maxTokens: baseModel.maxTokens,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return profiles.sort((a, b) => a.name.localeCompare(b.name));
|
||||||
|
} catch (error) {
|
||||||
|
if (!hasLoggedInferenceProfileError) {
|
||||||
|
hasLoggedInferenceProfileError = true;
|
||||||
|
console.warn(`[bedrock-discovery] Failed to list inference profiles: ${String(error)}`);
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function discoverBedrockModels(params: {
|
export async function discoverBedrockModels(params: {
|
||||||
@ -134,12 +199,14 @@ export async function discoverBedrockModels(params: {
|
|||||||
const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
|
const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
|
||||||
const defaultContextWindow = resolveDefaultContextWindow(params.config);
|
const defaultContextWindow = resolveDefaultContextWindow(params.config);
|
||||||
const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
|
const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
|
||||||
|
const includeInferenceProfiles = params.config?.includeInferenceProfiles !== false;
|
||||||
const cacheKey = buildCacheKey({
|
const cacheKey = buildCacheKey({
|
||||||
region: params.region,
|
region: params.region,
|
||||||
providerFilter,
|
providerFilter,
|
||||||
refreshIntervalSeconds,
|
refreshIntervalSeconds,
|
||||||
defaultContextWindow,
|
defaultContextWindow,
|
||||||
defaultMaxTokens,
|
defaultMaxTokens,
|
||||||
|
includeInferenceProfiles,
|
||||||
});
|
});
|
||||||
const now = params.now?.() ?? Date.now();
|
const now = params.now?.() ?? Date.now();
|
||||||
|
|
||||||
@ -158,17 +225,29 @@ export async function discoverBedrockModels(params: {
|
|||||||
|
|
||||||
const discoveryPromise = (async () => {
|
const discoveryPromise = (async () => {
|
||||||
const response = await client.send(new ListFoundationModelsCommand({}));
|
const response = await client.send(new ListFoundationModelsCommand({}));
|
||||||
const discovered: ModelDefinitionConfig[] = [];
|
const baseModels: ModelDefinitionConfig[] = [];
|
||||||
for (const summary of response.modelSummaries ?? []) {
|
for (const summary of response.modelSummaries ?? []) {
|
||||||
if (!shouldIncludeSummary(summary, providerFilter)) continue;
|
if (!shouldIncludeSummary(summary, providerFilter)) continue;
|
||||||
discovered.push(
|
baseModels.push(
|
||||||
toModelDefinition(summary, {
|
toModelDefinition(summary, {
|
||||||
contextWindow: defaultContextWindow,
|
contextWindow: defaultContextWindow,
|
||||||
maxTokens: defaultMaxTokens,
|
maxTokens: defaultMaxTokens,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return discovered.sort((a, b) => a.name.localeCompare(b.name));
|
|
||||||
|
const baseModelsMap = new Map(baseModels.map((model) => [model.id, model]));
|
||||||
|
|
||||||
|
const profiles = includeInferenceProfiles
|
||||||
|
? await discoverInferenceProfiles({
|
||||||
|
client,
|
||||||
|
baseModelsMap,
|
||||||
|
providerFilter,
|
||||||
|
})
|
||||||
|
: [];
|
||||||
|
|
||||||
|
const combined = [...baseModels, ...profiles];
|
||||||
|
return combined.sort((a, b) => a.name.localeCompare(b.name));
|
||||||
})();
|
})();
|
||||||
|
|
||||||
if (refreshIntervalSeconds > 0) {
|
if (refreshIntervalSeconds > 0) {
|
||||||
|
|||||||
@ -50,6 +50,7 @@ export type BedrockDiscoveryConfig = {
|
|||||||
refreshInterval?: number;
|
refreshInterval?: number;
|
||||||
defaultContextWindow?: number;
|
defaultContextWindow?: number;
|
||||||
defaultMaxTokens?: number;
|
defaultMaxTokens?: number;
|
||||||
|
includeInferenceProfiles?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ModelsConfig = {
|
export type ModelsConfig = {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user