Merge 6df9b8f671 into 09be5d45d5

2026-01-30 11:41:16 -05:00 · 2026-01-30 11:41:16 -05:00 · 2d9507f71c
commit 2d9507f71c
parent 09be5d45d5 6df9b8f671
3 changed files with 131 additions and 6 deletions
--- a/docs/bedrock.md
+++ b/docs/bedrock.md
@ -21,7 +21,8 @@ not an API key.

 If AWS credentials are detected, OpenClaw can automatically discover Bedrock
 models that support **streaming** and **text output**. Discovery uses
-`bedrock:ListFoundationModels` and is cached (default: 1 hour).
+`bedrock:ListFoundationModels` and `bedrock:ListInferenceProfiles`, and is 
+cached (default: 1 hour).

 Config options live under `models.bedrockDiscovery`:

@ -34,7 +35,8 @@ Config options live under `models.bedrockDiscovery`:
      providerFilter: ["anthropic", "amazon"],
      refreshInterval: 3600,
      defaultContextWindow: 32000,
-      defaultMaxTokens: 4096
+      defaultMaxTokens: 4096,
+      includeInferenceProfiles: true
    }
  }
 }
@ -47,6 +49,46 @@ Notes:
 - `refreshInterval` is seconds; set to `0` to disable caching.
 - `defaultContextWindow` (default: `32000`) and `defaultMaxTokens` (default: `4096`)
  are used for discovered models (override if you know your model limits).
+- `includeInferenceProfiles` (default: `true`) enables discovery of inference profiles.
+
+### Inference Profiles
+
+AWS Bedrock **inference profiles** provide cross-region routing and improved
+availability for foundation models. When you use an inference profile ID
+(like `us.anthropic.claude-opus-4-5-20251101-v1:0`), Bedrock can automatically
+route your request to the best available region.
+
+**Benefits:**
+- **Higher availability**: Requests route to healthy regions automatically
+- **Lower latency**: Traffic goes to the nearest available region
+- **Same API**: Works identically to base model IDs
+
+**Discovery behavior:**
+- Moltbot discovers both base models and inference profiles by default
+- Inference profiles inherit metadata (context window, reasoning support, etc.) from their base model
+- Both appear in `moltbot models list` output
+- Use either ID interchangeably in your configuration
+
+**Example:**
+```bash
+# List all discovered models (includes inference profiles)
+moltbot models list
+
+# Use an inference profile in config
+moltbot config set agents.defaults.model.primary \
+  "amazon-bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0"
+```
+
+To disable inference profile discovery:
+```json5
+{
+  models: {
+    bedrockDiscovery: {
+      includeInferenceProfiles: false
+    }
+  }
+}
+```

 ## Setup (manual)

@ -161,12 +203,15 @@ openclaw models list
 ## Notes

 - Bedrock requires **model access** enabled in your AWS account/region.
- Automatic discovery needs the `bedrock:ListFoundationModels` permission.
+- Automatic discovery needs the `bedrock:ListFoundationModels` and 
+  `bedrock:ListInferenceProfiles` permissions.
 - If you use profiles, set `AWS_PROFILE` on the gateway host.
 - OpenClaw surfaces the credential source in this order: `AWS_BEARER_TOKEN_BEDROCK`,
  then `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY`, then `AWS_PROFILE`, then the
  default AWS SDK chain.
 - Reasoning support depends on the model; check the Bedrock model card for
  current capabilities.
+- Inference profiles use the same authentication and permissions as base models.
+- If inference profile discovery fails, base model discovery continues normally.
 - If you prefer a managed key flow, you can also place an OpenAI‑compatible
  proxy in front of Bedrock and configure it as an OpenAI provider instead.
--- a/src/agents/bedrock-discovery.ts
+++ b/src/agents/bedrock-discovery.ts
@ -1,7 +1,9 @@
 import {
  BedrockClient,
  ListFoundationModelsCommand,
+  ListInferenceProfilesCommand,
  type ListFoundationModelsCommandOutput,
+  type ListInferenceProfilesCommandOutput,
 } from "@aws-sdk/client-bedrock";

 import type { BedrockDiscoveryConfig, ModelDefinitionConfig } from "../config/types.js";
@ -18,6 +20,10 @@ const DEFAULT_COST = {

 type BedrockModelSummary = NonNullable<ListFoundationModelsCommandOutput["modelSummaries"]>[number];

+type InferenceProfileSummary = NonNullable<
+  ListInferenceProfilesCommandOutput["inferenceProfileSummaries"]
+>[number];
+
 type BedrockDiscoveryCacheEntry = {
  expiresAt: number;
  value?: ModelDefinitionConfig[];
@ -26,6 +32,7 @@ type BedrockDiscoveryCacheEntry = {

 const discoveryCache = new Map<string, BedrockDiscoveryCacheEntry>();
 let hasLoggedBedrockError = false;
+let hasLoggedInferenceProfileError = false;

 function normalizeProviderFilter(filter?: string[]): string[] {
  if (!filter || filter.length === 0) return [];
@ -41,6 +48,7 @@ function buildCacheKey(params: {
  refreshIntervalSeconds: number;
  defaultContextWindow: number;
  defaultMaxTokens: number;
+  includeInferenceProfiles: boolean;
 }): string {
  return JSON.stringify(params);
 }
@ -119,6 +127,63 @@ function toModelDefinition(
 export function resetBedrockDiscoveryCacheForTest(): void {
  discoveryCache.clear();
  hasLoggedBedrockError = false;
+  hasLoggedInferenceProfileError = false;
+}
+
+async function discoverInferenceProfiles(params: {
+  client: BedrockClient;
+  baseModelsMap: Map<string, ModelDefinitionConfig>;
+  providerFilter: string[];
+}): Promise<ModelDefinitionConfig[]> {
+  try {
+    const response = await params.client.send(new ListInferenceProfilesCommand({}));
+    const profiles: ModelDefinitionConfig[] = [];
+
+    for (const profile of response.inferenceProfileSummaries ?? []) {
+      const profileId = profile.inferenceProfileId?.trim();
+      if (!profileId) continue;
+
+      const status = profile.status?.toUpperCase();
+      if (status !== "ACTIVE") continue;
+
+      const baseModelId = (() => {
+        const modelRef = profile.models?.[0];
+        if (!modelRef) return undefined;
+        return (modelRef as { modelArn?: string }).modelArn?.split("/")?.[1]?.trim();
+      })();
+      if (!baseModelId) continue;
+
+      const baseModel = params.baseModelsMap.get(baseModelId);
+      if (!baseModel) continue;
+
+      if (params.providerFilter.length > 0) {
+        const providerName = baseModelId.split(".")[0]?.trim().toLowerCase();
+        if (!providerName || !params.providerFilter.includes(providerName)) {
+          continue;
+        }
+      }
+
+      const profileName = profile.inferenceProfileName?.trim() || profileId;
+
+      profiles.push({
+        id: profileId,
+        name: profileName,
+        reasoning: baseModel.reasoning,
+        input: baseModel.input,
+        cost: baseModel.cost,
+        contextWindow: baseModel.contextWindow,
+        maxTokens: baseModel.maxTokens,
+      });
+    }
+
+    return profiles.sort((a, b) => a.name.localeCompare(b.name));
+  } catch (error) {
+    if (!hasLoggedInferenceProfileError) {
+      hasLoggedInferenceProfileError = true;
+      console.warn(`[bedrock-discovery] Failed to list inference profiles: ${String(error)}`);
+    }
+    return [];
+  }
 }

 export async function discoverBedrockModels(params: {
@ -134,12 +199,14 @@ export async function discoverBedrockModels(params: {
  const providerFilter = normalizeProviderFilter(params.config?.providerFilter);
  const defaultContextWindow = resolveDefaultContextWindow(params.config);
  const defaultMaxTokens = resolveDefaultMaxTokens(params.config);
+  const includeInferenceProfiles = params.config?.includeInferenceProfiles !== false;
  const cacheKey = buildCacheKey({
    region: params.region,
    providerFilter,
    refreshIntervalSeconds,
    defaultContextWindow,
    defaultMaxTokens,
+    includeInferenceProfiles,
  });
  const now = params.now?.() ?? Date.now();

@ -158,17 +225,29 @@ export async function discoverBedrockModels(params: {

  const discoveryPromise = (async () => {
    const response = await client.send(new ListFoundationModelsCommand({}));
-    const discovered: ModelDefinitionConfig[] = [];
+    const baseModels: ModelDefinitionConfig[] = [];
    for (const summary of response.modelSummaries ?? []) {
      if (!shouldIncludeSummary(summary, providerFilter)) continue;
-      discovered.push(
+      baseModels.push(
        toModelDefinition(summary, {
          contextWindow: defaultContextWindow,
          maxTokens: defaultMaxTokens,
        }),
      );
    }
-    return discovered.sort((a, b) => a.name.localeCompare(b.name));
+
+    const baseModelsMap = new Map(baseModels.map((model) => [model.id, model]));
+
+    const profiles = includeInferenceProfiles
+      ? await discoverInferenceProfiles({
+          client,
+          baseModelsMap,
+          providerFilter,
+        })
+      : [];
+
+    const combined = [...baseModels, ...profiles];
+    return combined.sort((a, b) => a.name.localeCompare(b.name));
  })();

  if (refreshIntervalSeconds > 0) {
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@ -50,6 +50,7 @@ export type BedrockDiscoveryConfig = {
  refreshInterval?: number;
  defaultContextWindow?: number;
  defaultMaxTokens?: number;
+  includeInferenceProfiles?: boolean;
 };

 export type ModelsConfig = {