From 715728c989a997dd101a15031e2d93a8018b0ab2 Mon Sep 17 00:00:00 2001
From: Bruno Guidolim <bguidolim@gmail.com>
Date: Wed, 28 Jan 2026 18:01:21 +0100
Subject: [PATCH] feat(auth): track cooldown per (auth profile + model)

Allow different models from the same provider to have independent
cooldowns. When a model hits rate limit, only that specific model
is blocked, not all models using the same auth profile.

- Add cooldownKey() helper for composite key generation
- Update isProfileInCooldown to check both per-model and profile-level
- Update markAuthProfileFailure/Cooldown with optional model param
- Pass model to cooldown checks in model-fallback and agent runner
- Add comprehensive tests for per-model cooldown behavior

Ref: #3417
---
 ...th-profiles.auth-profile-cooldowns.test.ts | 97 ++++++++++++++++++-
 src/agents/auth-profiles.ts                   |  1 +
 src/agents/auth-profiles/usage.ts             | 93 ++++++++++++++----
 src/agents/model-fallback.test.ts             | 60 ++++++++++++
 src/agents/model-fallback.ts                  |  8 +-
 src/agents/pi-embedded-runner/run.ts          |  2 +
 6 files changed, 240 insertions(+), 21 deletions(-)

diff --git a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
index e5fe3900a..8eab8d3d7 100644
--- a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
+++ b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
@@ -1,5 +1,11 @@
 import { describe, expect, it } from "vitest";
-import { calculateAuthProfileCooldownMs } from "./auth-profiles.js";
+import {
+  calculateAuthProfileCooldownMs,
+  cooldownKey,
+  isProfileInCooldown,
+} from "./auth-profiles.js";
+import type { AuthProfileStore } from "./auth-profiles.js";
+import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
 
 describe("auth profile cooldowns", () => {
   it("applies exponential backoff with a 1h cap", () => {
@@ -10,3 +16,92 @@ describe("auth profile cooldowns", () => {
     expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
   });
 });
+
+describe("cooldownKey", () => {
+  it("returns profileId when model is not provided", () => {
+    expect(cooldownKey("openai:default")).toBe("openai:default");
+    expect(cooldownKey("openai:default", undefined)).toBe("openai:default");
+  });
+
+  it("returns composite key when model is provided", () => {
+    expect(cooldownKey("openai:default", "gpt-4")).toBe("openai:default:gpt-4");
+    expect(cooldownKey("github-copilot:default", "gpt-5.2")).toBe("github-copilot:default:gpt-5.2");
+  });
+});
+
+describe("isProfileInCooldown with per-model support", () => {
+  it("returns false when no cooldown exists", () => {
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "openai:default": { type: "api_key", provider: "openai", key: "test" },
+      },
+    };
+    expect(isProfileInCooldown(store, "openai:default")).toBe(false);
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false);
+  });
+
+  it("checks profile-level cooldown when model not provided", () => {
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "openai:default": { type: "api_key", provider: "openai", key: "test" },
+      },
+      usageStats: {
+        "openai:default": { cooldownUntil: Date.now() + 60_000 },
+      },
+    };
+    expect(isProfileInCooldown(store, "openai:default")).toBe(true);
+  });
+
+  it("checks per-model cooldown when model is provided", () => {
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "openai:default": { type: "api_key", provider: "openai", key: "test" },
+      },
+      usageStats: {
+        "openai:default:gpt-4": { cooldownUntil: Date.now() + 60_000 },
+      },
+    };
+    // model-specific cooldown exists
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true);
+    // different model is not in cooldown
+    expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false);
+    // profile-level is not in cooldown
+    expect(isProfileInCooldown(store, "openai:default")).toBe(false);
+  });
+
+  it("allows independent cooldowns per model", () => {
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "github-copilot:default": {
+          type: "api_key",
+          provider: "github-copilot",
+          key: "test",
+        },
+      },
+      usageStats: {
+        // gpt-5.2 is in cooldown (rate limited)
+        "github-copilot:default:gpt-5.2": { cooldownUntil: Date.now() + 60_000 },
+        // gpt-5-mini has no cooldown (unlimited quota)
+      },
+    };
+    expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5.2")).toBe(true);
+    expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5-mini")).toBe(false);
+  });
+
+  it("returns false when cooldown has expired", () => {
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        "openai:default": { type: "api_key", provider: "openai", key: "test" },
+      },
+      usageStats: {
+        "openai:default:gpt-4": { cooldownUntil: Date.now() - 1000 }, // expired
+      },
+    };
+    expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false);
+  });
+});
diff --git a/src/agents/auth-profiles.ts b/src/agents/auth-profiles.ts
index 9a6c75b10..e68a27f1c 100644
--- a/src/agents/auth-profiles.ts
+++ b/src/agents/auth-profiles.ts
@@ -32,6 +32,7 @@ export type {
 export {
   calculateAuthProfileCooldownMs,
   clearAuthProfileCooldown,
+  cooldownKey,
   isProfileInCooldown,
   markAuthProfileCooldown,
   markAuthProfileFailure,
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index 2bf79b299..43bb660b6 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -3,6 +3,15 @@ import { normalizeProviderId } from "../model-selection.js";
 import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
 import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";
 
+/**
+ * Generate a cooldown key that optionally includes the model.
+ * When model is provided, cooldowns are tracked per (profile + model) combination.
+ * This allows different models from the same provider to have independent cooldowns.
+ */
+export function cooldownKey(profileId: string, model?: string): string {
+  return model ? `${profileId}:${model}` : profileId;
+}
+
 function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null {
   const values = [stats.cooldownUntil, stats.disabledUntil]
     .filter((value): value is number => typeof value === "number")
@@ -14,11 +23,39 @@ function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null {
 /**
  * Check if a profile is currently in cooldown (due to rate limiting or errors).
  */
-export function isProfileInCooldown(store: AuthProfileStore, profileId: string): boolean {
-  const stats = store.usageStats?.[profileId];
-  if (!stats) return false;
-  const unusableUntil = resolveProfileUnusableUntil(stats);
-  return unusableUntil ? Date.now() < unusableUntil : false;
+/**
+ * Check if a profile is currently in cooldown (due to rate limiting or errors).
+ * When model is provided, checks the per-model cooldown key.
+ */
+/**
+ * Check if a profile is currently in cooldown (due to rate limiting or errors).
+ * When model is provided, checks both the per-model cooldown key and the profile-level key.
+ * A profile-level cooldown applies to all models (backward compatibility).
+ */
+export function isProfileInCooldown(
+  store: AuthProfileStore,
+  profileId: string,
+  model?: string,
+): boolean {
+  const now = Date.now();
+
+  // Check per-model cooldown first (if model provided)
+  if (model) {
+    const modelKey = cooldownKey(profileId, model);
+    const modelStats = store.usageStats?.[modelKey];
+    if (modelStats) {
+      const modelUnusableUntil = resolveProfileUnusableUntil(modelStats);
+      if (modelUnusableUntil && now < modelUnusableUntil) {
+        return true;
+      }
+    }
+  }
+
+  // Also check profile-level cooldown (applies to all models)
+  const profileStats = store.usageStats?.[profileId];
+  if (!profileStats) return false;
+  const profileUnusableUntil = resolveProfileUnusableUntil(profileStats);
+  return profileUnusableUntil ? now < profileUnusableUntil : false;
 }
 
 /**
@@ -188,21 +225,28 @@ function computeNextProfileUsageStats(params: {
  * Mark a profile as failed for a specific reason. Billing failures are treated
  * as "disabled" (longer backoff) vs the regular cooldown window.
  */
+/**
+ * Mark a profile as failed for a specific reason. Billing failures are treated
+ * as "disabled" (longer backoff) vs the regular cooldown window.
+ * When model is provided, cooldown is tracked per (profile + model) combination.
+ */
 export async function markAuthProfileFailure(params: {
   store: AuthProfileStore;
   profileId: string;
+  model?: string;
   reason: AuthProfileFailureReason;
   cfg?: MoltbotConfig;
   agentDir?: string;
 }): Promise<void> {
-  const { store, profileId, reason, agentDir, cfg } = params;
+  const { store, profileId, model, reason, agentDir, cfg } = params;
+  const key = cooldownKey(profileId, model);
   const updated = await updateAuthProfileStoreWithLock({
     agentDir,
     updater: (freshStore) => {
       const profile = freshStore.profiles[profileId];
       if (!profile) return false;
       freshStore.usageStats = freshStore.usageStats ?? {};
-      const existing = freshStore.usageStats[profileId] ?? {};
+      const existing = freshStore.usageStats[key] ?? {};
 
       const now = Date.now();
       const providerKey = normalizeProviderId(profile.provider);
@@ -211,7 +255,7 @@ export async function markAuthProfileFailure(params: {
         providerId: providerKey,
       });
 
-      freshStore.usageStats[profileId] = computeNextProfileUsageStats({
+      freshStore.usageStats[key] = computeNextProfileUsageStats({
         existing,
         now,
         reason,
@@ -227,7 +271,7 @@ export async function markAuthProfileFailure(params: {
   if (!store.profiles[profileId]) return;
 
   store.usageStats = store.usageStats ?? {};
-  const existing = store.usageStats[profileId] ?? {};
+  const existing = store.usageStats[key] ?? {};
   const now = Date.now();
   const providerKey = normalizeProviderId(store.profiles[profileId]?.provider ?? "");
   const cfgResolved = resolveAuthCooldownConfig({
@@ -235,7 +279,7 @@ export async function markAuthProfileFailure(params: {
     providerId: providerKey,
   });
 
-  store.usageStats[profileId] = computeNextProfileUsageStats({
+  store.usageStats[key] = computeNextProfileUsageStats({
     existing,
     now,
     reason,
@@ -249,14 +293,22 @@ export async function markAuthProfileFailure(params: {
  * Cooldown times: 1min, 5min, 25min, max 1 hour.
  * Uses store lock to avoid overwriting concurrent usage updates.
  */
+/**
+ * Mark a profile as failed/rate-limited. Applies exponential backoff cooldown.
+ * Cooldown times: 1min, 5min, 25min, max 1 hour.
+ * Uses store lock to avoid overwriting concurrent usage updates.
+ * When model is provided, cooldown is tracked per (profile + model) combination.
+ */
 export async function markAuthProfileCooldown(params: {
   store: AuthProfileStore;
   profileId: string;
+  model?: string;
   agentDir?: string;
 }): Promise<void> {
   await markAuthProfileFailure({
     store: params.store,
     profileId: params.profileId,
+    model: params.model,
     reason: "unknown",
     agentDir: params.agentDir,
   });
@@ -266,19 +318,26 @@ export async function markAuthProfileCooldown(params: {
  * Clear cooldown for a profile (e.g., manual reset).
  * Uses store lock to avoid overwriting concurrent usage updates.
  */
+/**
+ * Clear cooldown for a profile (e.g., manual reset).
+ * Uses store lock to avoid overwriting concurrent usage updates.
+ * When model is provided, clears the per-model cooldown key.
+ */
 export async function clearAuthProfileCooldown(params: {
   store: AuthProfileStore;
   profileId: string;
+  model?: string;
   agentDir?: string;
 }): Promise<void> {
-  const { store, profileId, agentDir } = params;
+  const { store, profileId, model, agentDir } = params;
+  const key = cooldownKey(profileId, model);
   const updated = await updateAuthProfileStoreWithLock({
     agentDir,
     updater: (freshStore) => {
-      if (!freshStore.usageStats?.[profileId]) return false;
+      if (!freshStore.usageStats?.[key]) return false;
 
-      freshStore.usageStats[profileId] = {
-        ...freshStore.usageStats[profileId],
+      freshStore.usageStats[key] = {
+        ...freshStore.usageStats[key],
         errorCount: 0,
         cooldownUntil: undefined,
       };
@@ -289,10 +348,10 @@ export async function clearAuthProfileCooldown(params: {
     store.usageStats = updated.usageStats;
     return;
   }
-  if (!store.usageStats?.[profileId]) return;
+  if (!store.usageStats?.[key]) return;
 
-  store.usageStats[profileId] = {
-    ...store.usageStats[profileId],
+  store.usageStats[key] = {
+    ...store.usageStats[key],
     errorCount: 0,
     cooldownUntil: undefined,
   };
diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index f8a94560c..25c311ae7 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -240,6 +240,66 @@ describe("runWithModelFallback", () => {
     }
   });
 
+  it("allows different models from same provider when only one model is in cooldown", async () => {
+    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-"));
+    const provider = `per-model-cooldown-${crypto.randomUUID()}`;
+    const profileId = `${provider}:default`;
+
+    const store: AuthProfileStore = {
+      version: AUTH_STORE_VERSION,
+      profiles: {
+        [profileId]: {
+          type: "api_key",
+          provider,
+          key: "test-key",
+        },
+      },
+      usageStats: {
+        // Only model-a is in cooldown (per-model key)
+        [`${profileId}:model-a`]: {
+          cooldownUntil: Date.now() + 60_000,
+        },
+        // model-b has no cooldown
+      },
+    };
+
+    saveAuthProfileStore(store, tempDir);
+
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: `${provider}/model-a`,
+            fallbacks: [`${provider}/model-b`],
+          },
+        },
+      },
+    });
+
+    const run = vi.fn().mockImplementation(async (providerId, modelId) => {
+      if (modelId === "model-b") return "ok";
+      throw new Error(`unexpected model: ${providerId}/${modelId}`);
+    });
+
+    try {
+      const result = await runWithModelFallback({
+        cfg,
+        provider,
+        model: "model-a",
+        agentDir: tempDir,
+        run,
+      });
+
+      expect(result.result).toBe("ok");
+      // model-a should be skipped (in cooldown), model-b should be tried
+      expect(run.mock.calls).toEqual([[provider, "model-b"]]);
+      expect(result.attempts[0]?.reason).toBe("rate_limit");
+      expect(result.attempts[0]?.model).toBe("model-a");
+    } finally {
+      await fs.rm(tempDir, { recursive: true, force: true });
+    }
+  });
+
   it("does not append configured primary when fallbacksOverride is set", async () => {
     const cfg = makeCfg({
       agents: {
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index b87135f9f..aa27a5776 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -231,14 +231,16 @@ export async function runWithModelFallback<T>(params: {
         store: authStore,
         provider: candidate.provider,
       });
-      const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
+      const isAnyProfileAvailable = profileIds.some(
+        (id) => !isProfileInCooldown(authStore, id, candidate.model),
+      );
 
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
-        // All profiles for this provider are in cooldown; skip without attempting
+        // All profiles for this provider+model are in cooldown; skip without attempting
         attempts.push({
           provider: candidate.provider,
           model: candidate.model,
-          error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
+          error: `${candidate.provider}/${candidate.model} is in cooldown (all profiles unavailable)`,
           reason: "rate_limit",
         });
         continue;
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 870453f38..746398217 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -474,6 +474,7 @@ export async function runEmbeddedPiAgent(
               await markAuthProfileFailure({
                 store: authStore,
                 profileId: lastProfileId,
+                model: modelId,
                 reason: promptFailoverReason,
                 cfg: params.config,
                 agentDir: params.agentDir,
@@ -561,6 +562,7 @@ export async function runEmbeddedPiAgent(
               await markAuthProfileFailure({
                 store: authStore,
                 profileId: lastProfileId,
+                model: modelId,
                 reason,
                 cfg: params.config,
                 agentDir: params.agentDir,