From 715728c989a997dd101a15031e2d93a8018b0ab2 Mon Sep 17 00:00:00 2001 From: Bruno Guidolim Date: Wed, 28 Jan 2026 18:01:21 +0100 Subject: [PATCH] feat(auth): track cooldown per (auth profile + model) Allow different models from the same provider to have independent cooldowns. When a model hits rate limit, only that specific model is blocked, not all models using the same auth profile. - Add cooldownKey() helper for composite key generation - Update isProfileInCooldown to check both per-model and profile-level - Update markAuthProfileFailure/Cooldown with optional model param - Pass model to cooldown checks in model-fallback and agent runner - Add comprehensive tests for per-model cooldown behavior Ref: #3417 --- ...th-profiles.auth-profile-cooldowns.test.ts | 97 ++++++++++++++++++- src/agents/auth-profiles.ts | 1 + src/agents/auth-profiles/usage.ts | 93 ++++++++++++++---- src/agents/model-fallback.test.ts | 60 ++++++++++++ src/agents/model-fallback.ts | 8 +- src/agents/pi-embedded-runner/run.ts | 2 + 6 files changed, 240 insertions(+), 21 deletions(-) diff --git a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts index e5fe3900a..8eab8d3d7 100644 --- a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts +++ b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts @@ -1,5 +1,11 @@ import { describe, expect, it } from "vitest"; -import { calculateAuthProfileCooldownMs } from "./auth-profiles.js"; +import { + calculateAuthProfileCooldownMs, + cooldownKey, + isProfileInCooldown, +} from "./auth-profiles.js"; +import type { AuthProfileStore } from "./auth-profiles.js"; +import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js"; describe("auth profile cooldowns", () => { it("applies exponential backoff with a 1h cap", () => { @@ -10,3 +16,92 @@ describe("auth profile cooldowns", () => { expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000); }); }); + +describe("cooldownKey", () => { + it("returns profileId when model is not provided", () => { + expect(cooldownKey("openai:default")).toBe("openai:default"); + expect(cooldownKey("openai:default", undefined)).toBe("openai:default"); + }); + + it("returns composite key when model is provided", () => { + expect(cooldownKey("openai:default", "gpt-4")).toBe("openai:default:gpt-4"); + expect(cooldownKey("github-copilot:default", "gpt-5.2")).toBe("github-copilot:default:gpt-5.2"); + }); +}); + +describe("isProfileInCooldown with per-model support", () => { + it("returns false when no cooldown exists", () => { + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "openai:default": { type: "api_key", provider: "openai", key: "test" }, + }, + }; + expect(isProfileInCooldown(store, "openai:default")).toBe(false); + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false); + }); + + it("checks profile-level cooldown when model not provided", () => { + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "openai:default": { type: "api_key", provider: "openai", key: "test" }, + }, + usageStats: { + "openai:default": { cooldownUntil: Date.now() + 60_000 }, + }, + }; + expect(isProfileInCooldown(store, "openai:default")).toBe(true); + }); + + it("checks per-model cooldown when model is provided", () => { + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "openai:default": { type: "api_key", provider: "openai", key: "test" }, + }, + usageStats: { + "openai:default:gpt-4": { cooldownUntil: Date.now() + 60_000 }, + }, + }; + // model-specific cooldown exists + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true); + // different model is not in cooldown + expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false); + // profile-level is not in cooldown + expect(isProfileInCooldown(store, "openai:default")).toBe(false); + }); + + it("allows independent cooldowns per model", () => { + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "github-copilot:default": { + type: "api_key", + provider: "github-copilot", + key: "test", + }, + }, + usageStats: { + // gpt-5.2 is in cooldown (rate limited) + "github-copilot:default:gpt-5.2": { cooldownUntil: Date.now() + 60_000 }, + // gpt-5-mini has no cooldown (unlimited quota) + }, + }; + expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5.2")).toBe(true); + expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5-mini")).toBe(false); + }); + + it("returns false when cooldown has expired", () => { + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "openai:default": { type: "api_key", provider: "openai", key: "test" }, + }, + usageStats: { + "openai:default:gpt-4": { cooldownUntil: Date.now() - 1000 }, // expired + }, + }; + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false); + }); +}); diff --git a/src/agents/auth-profiles.ts b/src/agents/auth-profiles.ts index 9a6c75b10..e68a27f1c 100644 --- a/src/agents/auth-profiles.ts +++ b/src/agents/auth-profiles.ts @@ -32,6 +32,7 @@ export type { export { calculateAuthProfileCooldownMs, clearAuthProfileCooldown, + cooldownKey, isProfileInCooldown, markAuthProfileCooldown, markAuthProfileFailure, diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 2bf79b299..43bb660b6 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -3,6 +3,15 @@ import { normalizeProviderId } from "../model-selection.js"; import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js"; import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js"; +/** + * Generate a cooldown key that optionally includes the model. + * When model is provided, cooldowns are tracked per (profile + model) combination. + * This allows different models from the same provider to have independent cooldowns. + */ +export function cooldownKey(profileId: string, model?: string): string { + return model ? `${profileId}:${model}` : profileId; +} + function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null { const values = [stats.cooldownUntil, stats.disabledUntil] .filter((value): value is number => typeof value === "number") @@ -14,11 +23,39 @@ function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null { /** * Check if a profile is currently in cooldown (due to rate limiting or errors). */ -export function isProfileInCooldown(store: AuthProfileStore, profileId: string): boolean { - const stats = store.usageStats?.[profileId]; - if (!stats) return false; - const unusableUntil = resolveProfileUnusableUntil(stats); - return unusableUntil ? Date.now() < unusableUntil : false; +/** + * Check if a profile is currently in cooldown (due to rate limiting or errors). + * When model is provided, checks the per-model cooldown key. + */ +/** + * Check if a profile is currently in cooldown (due to rate limiting or errors). + * When model is provided, checks both the per-model cooldown key and the profile-level key. + * A profile-level cooldown applies to all models (backward compatibility). + */ +export function isProfileInCooldown( + store: AuthProfileStore, + profileId: string, + model?: string, +): boolean { + const now = Date.now(); + + // Check per-model cooldown first (if model provided) + if (model) { + const modelKey = cooldownKey(profileId, model); + const modelStats = store.usageStats?.[modelKey]; + if (modelStats) { + const modelUnusableUntil = resolveProfileUnusableUntil(modelStats); + if (modelUnusableUntil && now < modelUnusableUntil) { + return true; + } + } + } + + // Also check profile-level cooldown (applies to all models) + const profileStats = store.usageStats?.[profileId]; + if (!profileStats) return false; + const profileUnusableUntil = resolveProfileUnusableUntil(profileStats); + return profileUnusableUntil ? now < profileUnusableUntil : false; } /** @@ -188,21 +225,28 @@ function computeNextProfileUsageStats(params: { * Mark a profile as failed for a specific reason. Billing failures are treated * as "disabled" (longer backoff) vs the regular cooldown window. */ +/** + * Mark a profile as failed for a specific reason. Billing failures are treated + * as "disabled" (longer backoff) vs the regular cooldown window. + * When model is provided, cooldown is tracked per (profile + model) combination. + */ export async function markAuthProfileFailure(params: { store: AuthProfileStore; profileId: string; + model?: string; reason: AuthProfileFailureReason; cfg?: MoltbotConfig; agentDir?: string; }): Promise { - const { store, profileId, reason, agentDir, cfg } = params; + const { store, profileId, model, reason, agentDir, cfg } = params; + const key = cooldownKey(profileId, model); const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { const profile = freshStore.profiles[profileId]; if (!profile) return false; freshStore.usageStats = freshStore.usageStats ?? {}; - const existing = freshStore.usageStats[profileId] ?? {}; + const existing = freshStore.usageStats[key] ?? {}; const now = Date.now(); const providerKey = normalizeProviderId(profile.provider); @@ -211,7 +255,7 @@ export async function markAuthProfileFailure(params: { providerId: providerKey, }); - freshStore.usageStats[profileId] = computeNextProfileUsageStats({ + freshStore.usageStats[key] = computeNextProfileUsageStats({ existing, now, reason, @@ -227,7 +271,7 @@ export async function markAuthProfileFailure(params: { if (!store.profiles[profileId]) return; store.usageStats = store.usageStats ?? {}; - const existing = store.usageStats[profileId] ?? {}; + const existing = store.usageStats[key] ?? {}; const now = Date.now(); const providerKey = normalizeProviderId(store.profiles[profileId]?.provider ?? ""); const cfgResolved = resolveAuthCooldownConfig({ @@ -235,7 +279,7 @@ export async function markAuthProfileFailure(params: { providerId: providerKey, }); - store.usageStats[profileId] = computeNextProfileUsageStats({ + store.usageStats[key] = computeNextProfileUsageStats({ existing, now, reason, @@ -249,14 +293,22 @@ export async function markAuthProfileFailure(params: { * Cooldown times: 1min, 5min, 25min, max 1 hour. * Uses store lock to avoid overwriting concurrent usage updates. */ +/** + * Mark a profile as failed/rate-limited. Applies exponential backoff cooldown. + * Cooldown times: 1min, 5min, 25min, max 1 hour. + * Uses store lock to avoid overwriting concurrent usage updates. + * When model is provided, cooldown is tracked per (profile + model) combination. + */ export async function markAuthProfileCooldown(params: { store: AuthProfileStore; profileId: string; + model?: string; agentDir?: string; }): Promise { await markAuthProfileFailure({ store: params.store, profileId: params.profileId, + model: params.model, reason: "unknown", agentDir: params.agentDir, }); @@ -266,19 +318,26 @@ export async function markAuthProfileCooldown(params: { * Clear cooldown for a profile (e.g., manual reset). * Uses store lock to avoid overwriting concurrent usage updates. */ +/** + * Clear cooldown for a profile (e.g., manual reset). + * Uses store lock to avoid overwriting concurrent usage updates. + * When model is provided, clears the per-model cooldown key. + */ export async function clearAuthProfileCooldown(params: { store: AuthProfileStore; profileId: string; + model?: string; agentDir?: string; }): Promise { - const { store, profileId, agentDir } = params; + const { store, profileId, model, agentDir } = params; + const key = cooldownKey(profileId, model); const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { - if (!freshStore.usageStats?.[profileId]) return false; + if (!freshStore.usageStats?.[key]) return false; - freshStore.usageStats[profileId] = { - ...freshStore.usageStats[profileId], + freshStore.usageStats[key] = { + ...freshStore.usageStats[key], errorCount: 0, cooldownUntil: undefined, }; @@ -289,10 +348,10 @@ export async function clearAuthProfileCooldown(params: { store.usageStats = updated.usageStats; return; } - if (!store.usageStats?.[profileId]) return; + if (!store.usageStats?.[key]) return; - store.usageStats[profileId] = { - ...store.usageStats[profileId], + store.usageStats[key] = { + ...store.usageStats[key], errorCount: 0, cooldownUntil: undefined, }; diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index f8a94560c..25c311ae7 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -240,6 +240,66 @@ describe("runWithModelFallback", () => { } }); + it("allows different models from same provider when only one model is in cooldown", async () => { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-")); + const provider = `per-model-cooldown-${crypto.randomUUID()}`; + const profileId = `${provider}:default`; + + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + [profileId]: { + type: "api_key", + provider, + key: "test-key", + }, + }, + usageStats: { + // Only model-a is in cooldown (per-model key) + [`${profileId}:model-a`]: { + cooldownUntil: Date.now() + 60_000, + }, + // model-b has no cooldown + }, + }; + + saveAuthProfileStore(store, tempDir); + + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: `${provider}/model-a`, + fallbacks: [`${provider}/model-b`], + }, + }, + }, + }); + + const run = vi.fn().mockImplementation(async (providerId, modelId) => { + if (modelId === "model-b") return "ok"; + throw new Error(`unexpected model: ${providerId}/${modelId}`); + }); + + try { + const result = await runWithModelFallback({ + cfg, + provider, + model: "model-a", + agentDir: tempDir, + run, + }); + + expect(result.result).toBe("ok"); + // model-a should be skipped (in cooldown), model-b should be tried + expect(run.mock.calls).toEqual([[provider, "model-b"]]); + expect(result.attempts[0]?.reason).toBe("rate_limit"); + expect(result.attempts[0]?.model).toBe("model-a"); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + it("does not append configured primary when fallbacksOverride is set", async () => { const cfg = makeCfg({ agents: { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index b87135f9f..aa27a5776 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -231,14 +231,16 @@ export async function runWithModelFallback(params: { store: authStore, provider: candidate.provider, }); - const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); + const isAnyProfileAvailable = profileIds.some( + (id) => !isProfileInCooldown(authStore, id, candidate.model), + ); if (profileIds.length > 0 && !isAnyProfileAvailable) { - // All profiles for this provider are in cooldown; skip without attempting + // All profiles for this provider+model are in cooldown; skip without attempting attempts.push({ provider: candidate.provider, model: candidate.model, - error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`, + error: `${candidate.provider}/${candidate.model} is in cooldown (all profiles unavailable)`, reason: "rate_limit", }); continue; diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 870453f38..746398217 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -474,6 +474,7 @@ export async function runEmbeddedPiAgent( await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, + model: modelId, reason: promptFailoverReason, cfg: params.config, agentDir: params.agentDir, @@ -561,6 +562,7 @@ export async function runEmbeddedPiAgent( await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, + model: modelId, reason, cfg: params.config, agentDir: params.agentDir,