Merge e954d29291 into 09be5d45d5

2026-01-30 16:25:49 +00:00 · 2026-01-30 16:25:49 +00:00 · a0ce606db9
commit a0ce606db9
parent 09be5d45d5 e954d29291
7 changed files with 476 additions and 44 deletions
--- a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
+++ b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
@ -1,5 +1,49 @@
+/*
+ * Per-Model Cooldown Tests
+ * ────────────────────────
+ * These tests verify the per-model cooldown feature (discussion #3417).
+ *
+ * Key design asymmetry:
+ * - Failures CREATE per-model keys (e.g., "openai:default:gpt-4")
+ * - Successes UPDATE profile-level keys AND clear per-model keys (if they exist)
+ * - Per-model keys are ephemeral "penalty boxes" that only exist during cooldowns
+ *
+ * This allows independent rate limits per model while keeping the store clean.
+ * See: src/agents/auth-profiles/usage.ts for implementation details.
+ */
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
 import { describe, expect, it } from "vitest";
-import { calculateAuthProfileCooldownMs } from "./auth-profiles.js";
+import {
+  calculateAuthProfileCooldownMs,
+  clearAuthProfileCooldown,
+  cooldownKey,
+  isProfileInCooldown,
+  markAuthProfileFailure,
+  markAuthProfileUsed,
+  saveAuthProfileStore,
+} from "./auth-profiles.js";
+import type { AuthProfileStore } from "./auth-profiles.js";
+import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
+
+// Test helpers
+const makeStore = (usageStats?: AuthProfileStore["usageStats"]): AuthProfileStore => ({
+  version: AUTH_STORE_VERSION,
+  profiles: {
+    "openai:default": { type: "api_key", provider: "openai", key: "test" },
+  },
+  ...(usageStats && { usageStats }),
+});
+
+async function withTempDir<T>(fn: (tempDir: string) => Promise<T>): Promise<T> {
+  const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-"));
+  try {
+    return await fn(tempDir);
+  } finally {
+    await fs.rm(tempDir, { recursive: true, force: true });
+  }
+}

 describe("auth profile cooldowns", () => {
  it("applies exponential backoff with a 1h cap", () => {
@ -10,3 +54,212 @@ describe("auth profile cooldowns", () => {
    expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
  });
 });
+
+describe("cooldownKey", () => {
+  it("returns profileId when model is not provided or empty", () => {
+    expect(cooldownKey("openai:default")).toBe("openai:default");
+    expect(cooldownKey("openai:default", undefined)).toBe("openai:default");
+    expect(cooldownKey("openai:default", "")).toBe("openai:default");
+    expect(cooldownKey("openai:default", "   ")).toBe("openai:default");
+  });
+
+  it("returns composite key when model is provided", () => {
+    expect(cooldownKey("openai:default", "gpt-4")).toBe("openai:default:gpt-4");
+    expect(cooldownKey("github-copilot:default", "gpt-5.2")).toBe("github-copilot:default:gpt-5.2");
+  });
+});
+
+describe("isProfileInCooldown with per-model support", () => {
+  it("returns false when no cooldown exists", () => {
+    const store = makeStore();
+    expect(isProfileInCooldown(store, "openai:default")).toBe(false);
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false);
+  });
+
+  it("checks profile-level cooldown when model not provided", () => {
+    const store = makeStore({ "openai:default": { cooldownUntil: Date.now() + 60_000 } });
+    expect(isProfileInCooldown(store, "openai:default")).toBe(true);
+  });
+
+  it("checks per-model cooldown when model is provided", () => {
+    const store = makeStore({ "openai:default:gpt-4": { cooldownUntil: Date.now() + 60_000 } });
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true);
+    expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false);
+    expect(isProfileInCooldown(store, "openai:default")).toBe(false);
+  });
+
+  it("allows independent cooldowns per model", () => {
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "github-copilot:default": { type: "api_key", provider: "github-copilot", key: "test" },
+      },
+      usageStats: { "github-copilot:default:gpt-5.2": { cooldownUntil: Date.now() + 60_000 } },
+    };
+    expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5.2")).toBe(true);
+    expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5-mini")).toBe(false);
+  });
+
+  it("returns false when cooldown has expired", () => {
+    const store = makeStore({ "openai:default:gpt-4": { cooldownUntil: Date.now() - 1000 } });
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false);
+  });
+});
+
+describe("markAuthProfileUsed with per-model support", () => {
+  it("clears per-model cooldown when model is provided", async () => {
+    await withTempDir(async (tempDir) => {
+      const cooldownTime = Date.now() + 60_000;
+      const store = makeStore({
+        "openai:default": { cooldownUntil: cooldownTime },
+        "openai:default:gpt-4": { cooldownUntil: cooldownTime, errorCount: 3 },
+        "openai:default:gpt-3.5": { cooldownUntil: cooldownTime },
+      });
+      saveAuthProfileStore(store, tempDir);
+
+      await markAuthProfileUsed({
+        store,
+        profileId: "openai:default",
+        model: "gpt-4",
+        agentDir: tempDir,
+      });
+
+      expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined();
+      expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBeUndefined();
+      expect(store.usageStats?.["openai:default:gpt-4"]?.errorCount).toBe(0);
+      expect(store.usageStats?.["openai:default:gpt-3.5"]?.cooldownUntil).toBe(cooldownTime);
+    });
+  });
+
+  it("only clears profile-level cooldown when model is not provided", async () => {
+    await withTempDir(async (tempDir) => {
+      const cooldownTime = Date.now() + 60_000;
+      const store = makeStore({
+        "openai:default": { cooldownUntil: cooldownTime },
+        "openai:default:gpt-4": { cooldownUntil: cooldownTime },
+      });
+      saveAuthProfileStore(store, tempDir);
+
+      await markAuthProfileUsed({ store, profileId: "openai:default", agentDir: tempDir });
+
+      expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined();
+      expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBe(cooldownTime);
+    });
+  });
+});
+
+describe("isProfileInCooldown backward compatibility", () => {
+  it("returns true for any model when profile-level cooldown exists", () => {
+    const store = makeStore({ "openai:default": { cooldownUntil: Date.now() + 60_000 } });
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true);
+    expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(true);
+    expect(isProfileInCooldown(store, "openai:default", "o1-preview")).toBe(true);
+    expect(isProfileInCooldown(store, "openai:default")).toBe(true);
+  });
+
+  it("checks disabledUntil for per-model cooldowns (billing failures)", () => {
+    const store = makeStore({ "openai:default:gpt-4": { disabledUntil: Date.now() + 60_000 } });
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true);
+    expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false);
+  });
+});
+
+describe("markAuthProfileFailure with per-model support", () => {
+  it("tracks failure per model when model is provided", async () => {
+    await withTempDir(async (tempDir) => {
+      const store = makeStore();
+      saveAuthProfileStore(store, tempDir);
+
+      await markAuthProfileFailure({
+        store,
+        profileId: "openai:default",
+        model: "gpt-4",
+        reason: "rate_limit",
+        agentDir: tempDir,
+      });
+
+      expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBeGreaterThan(Date.now());
+      expect(store.usageStats?.["openai:default:gpt-4"]?.errorCount).toBe(1);
+      expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined();
+      expect(store.usageStats?.["openai:default:gpt-3.5"]).toBeUndefined();
+    });
+  });
+
+  it("tracks failure at profile level when model is not provided", async () => {
+    await withTempDir(async (tempDir) => {
+      const store = makeStore();
+      saveAuthProfileStore(store, tempDir);
+
+      await markAuthProfileFailure({
+        store,
+        profileId: "openai:default",
+        reason: "auth",
+        agentDir: tempDir,
+      });
+
+      expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeGreaterThan(Date.now());
+      expect(store.usageStats?.["openai:default"]?.errorCount).toBe(1);
+    });
+  });
+
+  it("tracks billing failures with disabledUntil per model", async () => {
+    await withTempDir(async (tempDir) => {
+      const store = makeStore();
+      saveAuthProfileStore(store, tempDir);
+
+      await markAuthProfileFailure({
+        store,
+        profileId: "openai:default",
+        model: "gpt-4",
+        reason: "billing",
+        agentDir: tempDir,
+      });
+
+      expect(store.usageStats?.["openai:default:gpt-4"]?.disabledUntil).toBeGreaterThan(Date.now());
+      expect(store.usageStats?.["openai:default:gpt-4"]?.disabledReason).toBe("billing");
+    });
+  });
+});
+
+describe("clearAuthProfileCooldown with per-model support", () => {
+  it("clears per-model cooldown when model is provided", async () => {
+    await withTempDir(async (tempDir) => {
+      const cooldownTime = Date.now() + 60_000;
+      const store = makeStore({
+        "openai:default": { cooldownUntil: cooldownTime },
+        "openai:default:gpt-4": { cooldownUntil: cooldownTime, errorCount: 3 },
+        "openai:default:gpt-3.5": { cooldownUntil: cooldownTime },
+      });
+      saveAuthProfileStore(store, tempDir);
+
+      await clearAuthProfileCooldown({
+        store,
+        profileId: "openai:default",
+        model: "gpt-4",
+        agentDir: tempDir,
+      });
+
+      expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBeUndefined();
+      expect(store.usageStats?.["openai:default:gpt-4"]?.errorCount).toBe(0);
+      expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBe(cooldownTime);
+      expect(store.usageStats?.["openai:default:gpt-3.5"]?.cooldownUntil).toBe(cooldownTime);
+    });
+  });
+
+  it("clears profile-level cooldown when model is not provided", async () => {
+    await withTempDir(async (tempDir) => {
+      const cooldownTime = Date.now() + 60_000;
+      const store = makeStore({
+        "openai:default": { cooldownUntil: cooldownTime, errorCount: 2 },
+        "openai:default:gpt-4": { cooldownUntil: cooldownTime },
+      });
+      saveAuthProfileStore(store, tempDir);
+
+      await clearAuthProfileCooldown({ store, profileId: "openai:default", agentDir: tempDir });
+
+      expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined();
+      expect(store.usageStats?.["openai:default"]?.errorCount).toBe(0);
+      expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBe(cooldownTime);
+    });
+  });
+});
--- a/src/agents/auth-profiles.ts
+++ b/src/agents/auth-profiles.ts
@ -32,6 +32,7 @@ export type {
 export {
  calculateAuthProfileCooldownMs,
  clearAuthProfileCooldown,
+  cooldownKey,
  isProfileInCooldown,
  markAuthProfileCooldown,
  markAuthProfileFailure,
--- a/src/agents/auth-profiles/order.ts
+++ b/src/agents/auth-profiles/order.ts
@ -4,6 +4,26 @@ import { listProfilesForProvider } from "./profiles.js";
 import type { AuthProfileStore } from "./types.js";
 import { isProfileInCooldown } from "./usage.js";

+/*
+ * Profile Ordering Design
+ * ───────────────────────
+ * Profile ordering is intentionally MODEL-AGNOSTIC. This module answers:
+ * "Which auth profiles should we try for this provider?"
+ *
+ * Per-model cooldown filtering happens DOWNSTREAM in model-fallback.ts,
+ * which calls isProfileInCooldown(store, profileId, model) for each candidate.
+ *
+ * Why two layers?
+ * 1. Profile layer (here): Selects credentials based on provider, type preference,
+ *    round-robin (lastUsed), and profile-level cooldowns (auth failures).
+ * 2. Model layer (model-fallback.ts): Filters by per-model rate limits before
+ *    each API call attempt.
+ *
+ * This separation exists because:
+ * - Auth credentials (API keys/tokens) are profile-level
+ * - Rate limits are often model-level (e.g., gpt-5.2 has quota, gpt-5-mini unlimited)
+ */
+
 function resolveProfileUnusableUntil(stats: {
  cooldownUntil?: number;
  disabledUntil?: number;
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@ -3,6 +3,43 @@ import { normalizeProviderId } from "../model-selection.js";
 import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
 import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";

+/*
+ * Per-Model Cooldown Design
+ * ─────────────────────────
+ * Cooldowns can be tracked at two granularities:
+ *
+ * 1. Profile-level keys (e.g., "github-copilot:github")
+ *    - Track success metrics: lastUsed, lastGood, errorCount reset
+ *    - Used by `lastGood` to remember which profile worked for a provider
+ *    - Auth failures (wrong API key) should use profile-level cooldowns
+ *
+ * 2. Per-model keys (e.g., "github-copilot:github:gpt-5.2")
+ *    - Created ONLY on failure to track rate limits
+ *    - Act as ephemeral "penalty boxes" for specific models
+ *    - Naturally disappear when cooldown expires or model recovers
+ *
+ * Key asymmetry:
+ * - Failures → create per-model key (if model provided)
+ * - Successes → update profile-level key + clear per-model key (if it exists)
+ *
+ * This keeps the store clean and allows independent rate limits per model
+ * while maintaining backward compatibility with profile-level cooldowns.
+ */
+
+/**
+ * Generate a cooldown key that optionally includes the model.
+ * When model is provided, cooldowns are tracked per (profile + model) combination.
+ * This allows different models from the same provider to have independent cooldowns.
+ *
+ * @example cooldownKey("openai:default", "gpt-4") => "openai:default:gpt-4"
+ * @example cooldownKey("openai:default") => "openai:default"
+ */
+export function cooldownKey(profileId: string, model?: string): string {
+  // Treat empty/whitespace-only string as "no model" to avoid trailing colon in key
+  const normalizedModel = model?.trim() || undefined;
+  return normalizedModel ? `${profileId}:${normalizedModel}` : profileId;
+}
+
 function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null {
  const values = [stats.cooldownUntil, stats.disabledUntil]
    .filter((value): value is number => typeof value === "number")
@ -11,40 +48,95 @@ function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null {
  return Math.max(...values);
 }

-/**
- * Check if a profile is currently in cooldown (due to rate limiting or errors).
- */
-export function isProfileInCooldown(store: AuthProfileStore, profileId: string): boolean {
-  const stats = store.usageStats?.[profileId];
+/** Checks if a key is currently in cooldown. */
+function isKeyInCooldown(store: AuthProfileStore, key: string, now: number): boolean {
+  const stats = store.usageStats?.[key];
  if (!stats) return false;
  const unusableUntil = resolveProfileUnusableUntil(stats);
-  return unusableUntil ? Date.now() < unusableUntil : false;
+  return unusableUntil !== null && now < unusableUntil;
+}
+
+/**
+ * Check if a profile is currently in cooldown (due to rate limiting or errors).
+ *
+ * When model is provided, checks both:
+ * 1. The per-model cooldown key (e.g., "openai:default:gpt-4")
+ * 2. The profile-level cooldown key (e.g., "openai:default")
+ *
+ * Profile-level cooldowns apply to all models under that profile, supporting
+ * legacy entries and scenarios where failures affect all models (e.g., auth errors).
+ */
+export function isProfileInCooldown(
+  store: AuthProfileStore,
+  profileId: string,
+  model?: string,
+): boolean {
+  const now = Date.now();
+
+  // Check per-model cooldown first (if model provided)
+  if (model && isKeyInCooldown(store, cooldownKey(profileId, model), now)) {
+    return true;
+  }
+
+  // Also check profile-level cooldown (applies to all models)
+  return isKeyInCooldown(store, profileId, now);
+}
+
+/** Clears cooldown fields from usage stats, preserving other fields. */
+function clearCooldownFields(
+  stats: ProfileUsageStats | undefined,
+  options?: { setLastUsed?: boolean },
+): ProfileUsageStats {
+  return {
+    ...stats,
+    ...(options?.setLastUsed ? { lastUsed: Date.now() } : {}),
+    errorCount: 0,
+    cooldownUntil: undefined,
+    disabledUntil: undefined,
+    disabledReason: undefined,
+    failureCounts: undefined,
+  };
+}
+
+/** Applies success updates to usage stats in-place. */
+function applySuccessUpdates(
+  usageStats: Record<string, ProfileUsageStats>,
+  profileId: string,
+  model?: string,
+): void {
+  usageStats[profileId] = clearCooldownFields(usageStats[profileId], { setLastUsed: true });
+  if (model) {
+    const modelKey = cooldownKey(profileId, model);
+    if (usageStats[modelKey]) {
+      usageStats[modelKey] = clearCooldownFields(usageStats[modelKey]);
+    }
+  }
 }

 /**
 * Mark a profile as successfully used. Resets error count and updates lastUsed.
 * Uses store lock to avoid overwriting concurrent usage updates.
+ *
+ * Success metrics (lastUsed, lastGood) are ALWAYS updated at the profile level,
+ * regardless of which model was used. This is intentional: if ANY model works,
+ * the credentials are valid and the profile should be remembered as "good".
+ *
+ * When model is provided, also clears the per-model cooldown (if one exists).
+ * This allows a recovered model to be used immediately without waiting for expiry.
 */
 export async function markAuthProfileUsed(params: {
  store: AuthProfileStore;
  profileId: string;
+  model?: string;
  agentDir?: string;
 }): Promise<void> {
-  const { store, profileId, agentDir } = params;
+  const { store, profileId, model, agentDir } = params;
  const updated = await updateAuthProfileStoreWithLock({
    agentDir,
    updater: (freshStore) => {
      if (!freshStore.profiles[profileId]) return false;
      freshStore.usageStats = freshStore.usageStats ?? {};
-      freshStore.usageStats[profileId] = {
-        ...freshStore.usageStats[profileId],
-        lastUsed: Date.now(),
-        errorCount: 0,
-        cooldownUntil: undefined,
-        disabledUntil: undefined,
-        disabledReason: undefined,
-        failureCounts: undefined,
-      };
+      applySuccessUpdates(freshStore.usageStats, profileId, model);
      return true;
    },
  });
@ -55,15 +147,7 @@ export async function markAuthProfileUsed(params: {
  if (!store.profiles[profileId]) return;

  store.usageStats = store.usageStats ?? {};
-  store.usageStats[profileId] = {
-    ...store.usageStats[profileId],
-    lastUsed: Date.now(),
-    errorCount: 0,
-    cooldownUntil: undefined,
-    disabledUntil: undefined,
-    disabledReason: undefined,
-    failureCounts: undefined,
-  };
+  applySuccessUpdates(store.usageStats, profileId, model);
  saveAuthProfileStore(store, agentDir);
 }

@ -187,22 +271,25 @@ function computeNextProfileUsageStats(params: {
 /**
 * Mark a profile as failed for a specific reason. Billing failures are treated
 * as "disabled" (longer backoff) vs the regular cooldown window.
+ * When model is provided, cooldown is tracked per (profile + model) combination.
 */
 export async function markAuthProfileFailure(params: {
  store: AuthProfileStore;
  profileId: string;
+  model?: string;
  reason: AuthProfileFailureReason;
  cfg?: OpenClawConfig;
  agentDir?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg } = params;
+  const { store, profileId, model, reason, agentDir, cfg } = params;
+  const key = cooldownKey(profileId, model);
  const updated = await updateAuthProfileStoreWithLock({
    agentDir,
    updater: (freshStore) => {
      const profile = freshStore.profiles[profileId];
      if (!profile) return false;
      freshStore.usageStats = freshStore.usageStats ?? {};
-      const existing = freshStore.usageStats[profileId] ?? {};
+      const existing = freshStore.usageStats[key] ?? {};

      const now = Date.now();
      const providerKey = normalizeProviderId(profile.provider);
@ -211,7 +298,7 @@ export async function markAuthProfileFailure(params: {
        providerId: providerKey,
      });

-      freshStore.usageStats[profileId] = computeNextProfileUsageStats({
+      freshStore.usageStats[key] = computeNextProfileUsageStats({
        existing,
        now,
        reason,
@ -227,7 +314,7 @@ export async function markAuthProfileFailure(params: {
  if (!store.profiles[profileId]) return;

  store.usageStats = store.usageStats ?? {};
-  const existing = store.usageStats[profileId] ?? {};
+  const existing = store.usageStats[key] ?? {};
  const now = Date.now();
  const providerKey = normalizeProviderId(store.profiles[profileId]?.provider ?? "");
  const cfgResolved = resolveAuthCooldownConfig({
@ -235,7 +322,7 @@ export async function markAuthProfileFailure(params: {
    providerId: providerKey,
  });

-  store.usageStats[profileId] = computeNextProfileUsageStats({
+  store.usageStats[key] = computeNextProfileUsageStats({
    existing,
    now,
    reason,
@ -245,18 +332,21 @@ export async function markAuthProfileFailure(params: {
 }

 /**
- * Mark a profile as failed/rate-limited. Applies exponential backoff cooldown.
- * Cooldown times: 1min, 5min, 25min, max 1 hour.
- * Uses store lock to avoid overwriting concurrent usage updates.
+ * Mark a profile as failed/rate-limited with "unknown" reason.
+ * Convenience wrapper around markAuthProfileFailure() for generic failures.
+ * Applies exponential backoff cooldown: 1min, 5min, 25min, max 1 hour.
+ * When model is provided, cooldown is tracked per (profile + model) combination.
 */
 export async function markAuthProfileCooldown(params: {
  store: AuthProfileStore;
  profileId: string;
+  model?: string;
  agentDir?: string;
 }): Promise<void> {
  await markAuthProfileFailure({
    store: params.store,
    profileId: params.profileId,
+    model: params.model,
    reason: "unknown",
    agentDir: params.agentDir,
  });
@ -265,20 +355,23 @@ export async function markAuthProfileCooldown(params: {
 /**
 * Clear cooldown for a profile (e.g., manual reset).
 * Uses store lock to avoid overwriting concurrent usage updates.
+ * When model is provided, clears the per-model cooldown key.
 */
 export async function clearAuthProfileCooldown(params: {
  store: AuthProfileStore;
  profileId: string;
+  model?: string;
  agentDir?: string;
 }): Promise<void> {
-  const { store, profileId, agentDir } = params;
+  const { store, profileId, model, agentDir } = params;
+  const key = cooldownKey(profileId, model);
  const updated = await updateAuthProfileStoreWithLock({
    agentDir,
    updater: (freshStore) => {
-      if (!freshStore.usageStats?.[profileId]) return false;
+      if (!freshStore.usageStats?.[key]) return false;

-      freshStore.usageStats[profileId] = {
-        ...freshStore.usageStats[profileId],
+      freshStore.usageStats[key] = {
+        ...freshStore.usageStats[key],
        errorCount: 0,
        cooldownUntil: undefined,
      };
@ -289,10 +382,10 @@ export async function clearAuthProfileCooldown(params: {
    store.usageStats = updated.usageStats;
    return;
  }
-  if (!store.usageStats?.[profileId]) return;
+  if (!store.usageStats?.[key]) return;

-  store.usageStats[profileId] = {
-    ...store.usageStats[profileId],
+  store.usageStats[key] = {
+    ...store.usageStats[key],
    errorCount: 0,
    cooldownUntil: undefined,
  };
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@ -240,6 +240,66 @@ describe("runWithModelFallback", () => {
    }
  });

+  it("allows different models from same provider when only one model is in cooldown", async () => {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-"));
+    const provider = `per-model-cooldown-${crypto.randomUUID()}`;
+    const profileId = `${provider}:default`;
+
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        [profileId]: {
+          type: "api_key",
+          provider,
+          key: "test-key",
+        },
+      },
+      usageStats: {
+        // Only model-a is in cooldown (per-model key)
+        [`${profileId}:model-a`]: {
+          cooldownUntil: Date.now() + 60_000,
+        },
+        // model-b has no cooldown
+      },
+    };
+
+    saveAuthProfileStore(store, tempDir);
+
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: `${provider}/model-a`,
+            fallbacks: [`${provider}/model-b`],
+          },
+        },
+      },
+    });
+
+    const run = vi.fn().mockImplementation(async (providerId, modelId) => {
+      if (modelId === "model-b") return "ok";
+      throw new Error(`unexpected model: ${providerId}/${modelId}`);
+    });
+
+    try {
+      const result = await runWithModelFallback({
+        cfg,
+        provider,
+        model: "model-a",
+        agentDir: tempDir,
+        run,
+      });
+
+      expect(result.result).toBe("ok");
+      // model-a should be skipped (in cooldown), model-b should be tried
+      expect(run.mock.calls).toEqual([[provider, "model-b"]]);
+      expect(result.attempts[0]?.reason).toBe("rate_limit");
+      expect(result.attempts[0]?.model).toBe("model-a");
+    } finally {
+      await fs.rm(tempDir, { recursive: true, force: true });
+    }
+  });
+
  it("does not append configured primary when fallbacksOverride is set", async () => {
    const cfg = makeCfg({
      agents: {
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@ -231,14 +231,16 @@ export async function runWithModelFallback<T>(params: {
        store: authStore,
        provider: candidate.provider,
      });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
+      const isAnyProfileAvailable = profileIds.some(
+        (id) => !isProfileInCooldown(authStore, id, candidate.model),
+      );

      if (profileIds.length > 0 && !isAnyProfileAvailable) {
-        // All profiles for this provider are in cooldown; skip without attempting
+        // All profiles for this provider+model are in cooldown; skip without attempting
        attempts.push({
          provider: candidate.provider,
          model: candidate.model,
-          error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
+          error: `${candidate.provider}/${candidate.model} is in cooldown (all profiles unavailable)`,
          reason: "rate_limit",
        });
        continue;
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@ -474,6 +474,7 @@ export async function runEmbeddedPiAgent(
              await markAuthProfileFailure({
                store: authStore,
                profileId: lastProfileId,
+                model: modelId,
                reason: promptFailoverReason,
                cfg: params.config,
                agentDir: params.agentDir,
@ -561,6 +562,7 @@ export async function runEmbeddedPiAgent(
              await markAuthProfileFailure({
                store: authStore,
                profileId: lastProfileId,
+                model: modelId,
                reason,
                cfg: params.config,
                agentDir: params.agentDir,
@ -644,6 +646,7 @@ export async function runEmbeddedPiAgent(
            await markAuthProfileUsed({
              store: authStore,
              profileId: lastProfileId,
+              model: modelId,
              agentDir: params.agentDir,
            });
          }