diff --git a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts index e5fe3900a..01f0722f5 100644 --- a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts +++ b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts @@ -1,5 +1,49 @@ +/* + * Per-Model Cooldown Tests + * ──────────────────────── + * These tests verify the per-model cooldown feature (discussion #3417). + * + * Key design asymmetry: + * - Failures CREATE per-model keys (e.g., "openai:default:gpt-4") + * - Successes UPDATE profile-level keys AND clear per-model keys (if they exist) + * - Per-model keys are ephemeral "penalty boxes" that only exist during cooldowns + * + * This allows independent rate limits per model while keeping the store clean. + * See: src/agents/auth-profiles/usage.ts for implementation details. + */ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; import { describe, expect, it } from "vitest"; -import { calculateAuthProfileCooldownMs } from "./auth-profiles.js"; +import { + calculateAuthProfileCooldownMs, + clearAuthProfileCooldown, + cooldownKey, + isProfileInCooldown, + markAuthProfileFailure, + markAuthProfileUsed, + saveAuthProfileStore, +} from "./auth-profiles.js"; +import type { AuthProfileStore } from "./auth-profiles.js"; +import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js"; + +// Test helpers +const makeStore = (usageStats?: AuthProfileStore["usageStats"]): AuthProfileStore => ({ + version: AUTH_STORE_VERSION, + profiles: { + "openai:default": { type: "api_key", provider: "openai", key: "test" }, + }, + ...(usageStats && { usageStats }), +}); + +async function withTempDir(fn: (tempDir: string) => Promise): Promise { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-")); + try { + return await fn(tempDir); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } +} describe("auth profile cooldowns", () => { it("applies exponential backoff with a 1h cap", () => { @@ -10,3 +54,212 @@ describe("auth profile cooldowns", () => { expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000); }); }); + +describe("cooldownKey", () => { + it("returns profileId when model is not provided or empty", () => { + expect(cooldownKey("openai:default")).toBe("openai:default"); + expect(cooldownKey("openai:default", undefined)).toBe("openai:default"); + expect(cooldownKey("openai:default", "")).toBe("openai:default"); + expect(cooldownKey("openai:default", " ")).toBe("openai:default"); + }); + + it("returns composite key when model is provided", () => { + expect(cooldownKey("openai:default", "gpt-4")).toBe("openai:default:gpt-4"); + expect(cooldownKey("github-copilot:default", "gpt-5.2")).toBe("github-copilot:default:gpt-5.2"); + }); +}); + +describe("isProfileInCooldown with per-model support", () => { + it("returns false when no cooldown exists", () => { + const store = makeStore(); + expect(isProfileInCooldown(store, "openai:default")).toBe(false); + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false); + }); + + it("checks profile-level cooldown when model not provided", () => { + const store = makeStore({ "openai:default": { cooldownUntil: Date.now() + 60_000 } }); + expect(isProfileInCooldown(store, "openai:default")).toBe(true); + }); + + it("checks per-model cooldown when model is provided", () => { + const store = makeStore({ "openai:default:gpt-4": { cooldownUntil: Date.now() + 60_000 } }); + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true); + expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false); + expect(isProfileInCooldown(store, "openai:default")).toBe(false); + }); + + it("allows independent cooldowns per model", () => { + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + "github-copilot:default": { type: "api_key", provider: "github-copilot", key: "test" }, + }, + usageStats: { "github-copilot:default:gpt-5.2": { cooldownUntil: Date.now() + 60_000 } }, + }; + expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5.2")).toBe(true); + expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5-mini")).toBe(false); + }); + + it("returns false when cooldown has expired", () => { + const store = makeStore({ "openai:default:gpt-4": { cooldownUntil: Date.now() - 1000 } }); + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false); + }); +}); + +describe("markAuthProfileUsed with per-model support", () => { + it("clears per-model cooldown when model is provided", async () => { + await withTempDir(async (tempDir) => { + const cooldownTime = Date.now() + 60_000; + const store = makeStore({ + "openai:default": { cooldownUntil: cooldownTime }, + "openai:default:gpt-4": { cooldownUntil: cooldownTime, errorCount: 3 }, + "openai:default:gpt-3.5": { cooldownUntil: cooldownTime }, + }); + saveAuthProfileStore(store, tempDir); + + await markAuthProfileUsed({ + store, + profileId: "openai:default", + model: "gpt-4", + agentDir: tempDir, + }); + + expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined(); + expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBeUndefined(); + expect(store.usageStats?.["openai:default:gpt-4"]?.errorCount).toBe(0); + expect(store.usageStats?.["openai:default:gpt-3.5"]?.cooldownUntil).toBe(cooldownTime); + }); + }); + + it("only clears profile-level cooldown when model is not provided", async () => { + await withTempDir(async (tempDir) => { + const cooldownTime = Date.now() + 60_000; + const store = makeStore({ + "openai:default": { cooldownUntil: cooldownTime }, + "openai:default:gpt-4": { cooldownUntil: cooldownTime }, + }); + saveAuthProfileStore(store, tempDir); + + await markAuthProfileUsed({ store, profileId: "openai:default", agentDir: tempDir }); + + expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined(); + expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBe(cooldownTime); + }); + }); +}); + +describe("isProfileInCooldown backward compatibility", () => { + it("returns true for any model when profile-level cooldown exists", () => { + const store = makeStore({ "openai:default": { cooldownUntil: Date.now() + 60_000 } }); + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true); + expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(true); + expect(isProfileInCooldown(store, "openai:default", "o1-preview")).toBe(true); + expect(isProfileInCooldown(store, "openai:default")).toBe(true); + }); + + it("checks disabledUntil for per-model cooldowns (billing failures)", () => { + const store = makeStore({ "openai:default:gpt-4": { disabledUntil: Date.now() + 60_000 } }); + expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true); + expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false); + }); +}); + +describe("markAuthProfileFailure with per-model support", () => { + it("tracks failure per model when model is provided", async () => { + await withTempDir(async (tempDir) => { + const store = makeStore(); + saveAuthProfileStore(store, tempDir); + + await markAuthProfileFailure({ + store, + profileId: "openai:default", + model: "gpt-4", + reason: "rate_limit", + agentDir: tempDir, + }); + + expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBeGreaterThan(Date.now()); + expect(store.usageStats?.["openai:default:gpt-4"]?.errorCount).toBe(1); + expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined(); + expect(store.usageStats?.["openai:default:gpt-3.5"]).toBeUndefined(); + }); + }); + + it("tracks failure at profile level when model is not provided", async () => { + await withTempDir(async (tempDir) => { + const store = makeStore(); + saveAuthProfileStore(store, tempDir); + + await markAuthProfileFailure({ + store, + profileId: "openai:default", + reason: "auth", + agentDir: tempDir, + }); + + expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeGreaterThan(Date.now()); + expect(store.usageStats?.["openai:default"]?.errorCount).toBe(1); + }); + }); + + it("tracks billing failures with disabledUntil per model", async () => { + await withTempDir(async (tempDir) => { + const store = makeStore(); + saveAuthProfileStore(store, tempDir); + + await markAuthProfileFailure({ + store, + profileId: "openai:default", + model: "gpt-4", + reason: "billing", + agentDir: tempDir, + }); + + expect(store.usageStats?.["openai:default:gpt-4"]?.disabledUntil).toBeGreaterThan(Date.now()); + expect(store.usageStats?.["openai:default:gpt-4"]?.disabledReason).toBe("billing"); + }); + }); +}); + +describe("clearAuthProfileCooldown with per-model support", () => { + it("clears per-model cooldown when model is provided", async () => { + await withTempDir(async (tempDir) => { + const cooldownTime = Date.now() + 60_000; + const store = makeStore({ + "openai:default": { cooldownUntil: cooldownTime }, + "openai:default:gpt-4": { cooldownUntil: cooldownTime, errorCount: 3 }, + "openai:default:gpt-3.5": { cooldownUntil: cooldownTime }, + }); + saveAuthProfileStore(store, tempDir); + + await clearAuthProfileCooldown({ + store, + profileId: "openai:default", + model: "gpt-4", + agentDir: tempDir, + }); + + expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBeUndefined(); + expect(store.usageStats?.["openai:default:gpt-4"]?.errorCount).toBe(0); + expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBe(cooldownTime); + expect(store.usageStats?.["openai:default:gpt-3.5"]?.cooldownUntil).toBe(cooldownTime); + }); + }); + + it("clears profile-level cooldown when model is not provided", async () => { + await withTempDir(async (tempDir) => { + const cooldownTime = Date.now() + 60_000; + const store = makeStore({ + "openai:default": { cooldownUntil: cooldownTime, errorCount: 2 }, + "openai:default:gpt-4": { cooldownUntil: cooldownTime }, + }); + saveAuthProfileStore(store, tempDir); + + await clearAuthProfileCooldown({ store, profileId: "openai:default", agentDir: tempDir }); + + expect(store.usageStats?.["openai:default"]?.cooldownUntil).toBeUndefined(); + expect(store.usageStats?.["openai:default"]?.errorCount).toBe(0); + expect(store.usageStats?.["openai:default:gpt-4"]?.cooldownUntil).toBe(cooldownTime); + }); + }); +}); diff --git a/src/agents/auth-profiles.ts b/src/agents/auth-profiles.ts index 9a6c75b10..e68a27f1c 100644 --- a/src/agents/auth-profiles.ts +++ b/src/agents/auth-profiles.ts @@ -32,6 +32,7 @@ export type { export { calculateAuthProfileCooldownMs, clearAuthProfileCooldown, + cooldownKey, isProfileInCooldown, markAuthProfileCooldown, markAuthProfileFailure, diff --git a/src/agents/auth-profiles/order.ts b/src/agents/auth-profiles/order.ts index 68bfba68e..d8015b6ad 100644 --- a/src/agents/auth-profiles/order.ts +++ b/src/agents/auth-profiles/order.ts @@ -4,6 +4,26 @@ import { listProfilesForProvider } from "./profiles.js"; import type { AuthProfileStore } from "./types.js"; import { isProfileInCooldown } from "./usage.js"; +/* + * Profile Ordering Design + * ─────────────────────── + * Profile ordering is intentionally MODEL-AGNOSTIC. This module answers: + * "Which auth profiles should we try for this provider?" + * + * Per-model cooldown filtering happens DOWNSTREAM in model-fallback.ts, + * which calls isProfileInCooldown(store, profileId, model) for each candidate. + * + * Why two layers? + * 1. Profile layer (here): Selects credentials based on provider, type preference, + * round-robin (lastUsed), and profile-level cooldowns (auth failures). + * 2. Model layer (model-fallback.ts): Filters by per-model rate limits before + * each API call attempt. + * + * This separation exists because: + * - Auth credentials (API keys/tokens) are profile-level + * - Rate limits are often model-level (e.g., gpt-5.2 has quota, gpt-5-mini unlimited) + */ + function resolveProfileUnusableUntil(stats: { cooldownUntil?: number; disabledUntil?: number; diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 82a77c7fe..3e724261b 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -3,6 +3,43 @@ import { normalizeProviderId } from "../model-selection.js"; import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js"; import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js"; +/* + * Per-Model Cooldown Design + * ───────────────────────── + * Cooldowns can be tracked at two granularities: + * + * 1. Profile-level keys (e.g., "github-copilot:github") + * - Track success metrics: lastUsed, lastGood, errorCount reset + * - Used by `lastGood` to remember which profile worked for a provider + * - Auth failures (wrong API key) should use profile-level cooldowns + * + * 2. Per-model keys (e.g., "github-copilot:github:gpt-5.2") + * - Created ONLY on failure to track rate limits + * - Act as ephemeral "penalty boxes" for specific models + * - Naturally disappear when cooldown expires or model recovers + * + * Key asymmetry: + * - Failures → create per-model key (if model provided) + * - Successes → update profile-level key + clear per-model key (if it exists) + * + * This keeps the store clean and allows independent rate limits per model + * while maintaining backward compatibility with profile-level cooldowns. + */ + +/** + * Generate a cooldown key that optionally includes the model. + * When model is provided, cooldowns are tracked per (profile + model) combination. + * This allows different models from the same provider to have independent cooldowns. + * + * @example cooldownKey("openai:default", "gpt-4") => "openai:default:gpt-4" + * @example cooldownKey("openai:default") => "openai:default" + */ +export function cooldownKey(profileId: string, model?: string): string { + // Treat empty/whitespace-only string as "no model" to avoid trailing colon in key + const normalizedModel = model?.trim() || undefined; + return normalizedModel ? `${profileId}:${normalizedModel}` : profileId; +} + function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null { const values = [stats.cooldownUntil, stats.disabledUntil] .filter((value): value is number => typeof value === "number") @@ -11,40 +48,95 @@ function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null { return Math.max(...values); } -/** - * Check if a profile is currently in cooldown (due to rate limiting or errors). - */ -export function isProfileInCooldown(store: AuthProfileStore, profileId: string): boolean { - const stats = store.usageStats?.[profileId]; +/** Checks if a key is currently in cooldown. */ +function isKeyInCooldown(store: AuthProfileStore, key: string, now: number): boolean { + const stats = store.usageStats?.[key]; if (!stats) return false; const unusableUntil = resolveProfileUnusableUntil(stats); - return unusableUntil ? Date.now() < unusableUntil : false; + return unusableUntil !== null && now < unusableUntil; +} + +/** + * Check if a profile is currently in cooldown (due to rate limiting or errors). + * + * When model is provided, checks both: + * 1. The per-model cooldown key (e.g., "openai:default:gpt-4") + * 2. The profile-level cooldown key (e.g., "openai:default") + * + * Profile-level cooldowns apply to all models under that profile, supporting + * legacy entries and scenarios where failures affect all models (e.g., auth errors). + */ +export function isProfileInCooldown( + store: AuthProfileStore, + profileId: string, + model?: string, +): boolean { + const now = Date.now(); + + // Check per-model cooldown first (if model provided) + if (model && isKeyInCooldown(store, cooldownKey(profileId, model), now)) { + return true; + } + + // Also check profile-level cooldown (applies to all models) + return isKeyInCooldown(store, profileId, now); +} + +/** Clears cooldown fields from usage stats, preserving other fields. */ +function clearCooldownFields( + stats: ProfileUsageStats | undefined, + options?: { setLastUsed?: boolean }, +): ProfileUsageStats { + return { + ...stats, + ...(options?.setLastUsed ? { lastUsed: Date.now() } : {}), + errorCount: 0, + cooldownUntil: undefined, + disabledUntil: undefined, + disabledReason: undefined, + failureCounts: undefined, + }; +} + +/** Applies success updates to usage stats in-place. */ +function applySuccessUpdates( + usageStats: Record, + profileId: string, + model?: string, +): void { + usageStats[profileId] = clearCooldownFields(usageStats[profileId], { setLastUsed: true }); + if (model) { + const modelKey = cooldownKey(profileId, model); + if (usageStats[modelKey]) { + usageStats[modelKey] = clearCooldownFields(usageStats[modelKey]); + } + } } /** * Mark a profile as successfully used. Resets error count and updates lastUsed. * Uses store lock to avoid overwriting concurrent usage updates. + * + * Success metrics (lastUsed, lastGood) are ALWAYS updated at the profile level, + * regardless of which model was used. This is intentional: if ANY model works, + * the credentials are valid and the profile should be remembered as "good". + * + * When model is provided, also clears the per-model cooldown (if one exists). + * This allows a recovered model to be used immediately without waiting for expiry. */ export async function markAuthProfileUsed(params: { store: AuthProfileStore; profileId: string; + model?: string; agentDir?: string; }): Promise { - const { store, profileId, agentDir } = params; + const { store, profileId, model, agentDir } = params; const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { if (!freshStore.profiles[profileId]) return false; freshStore.usageStats = freshStore.usageStats ?? {}; - freshStore.usageStats[profileId] = { - ...freshStore.usageStats[profileId], - lastUsed: Date.now(), - errorCount: 0, - cooldownUntil: undefined, - disabledUntil: undefined, - disabledReason: undefined, - failureCounts: undefined, - }; + applySuccessUpdates(freshStore.usageStats, profileId, model); return true; }, }); @@ -55,15 +147,7 @@ export async function markAuthProfileUsed(params: { if (!store.profiles[profileId]) return; store.usageStats = store.usageStats ?? {}; - store.usageStats[profileId] = { - ...store.usageStats[profileId], - lastUsed: Date.now(), - errorCount: 0, - cooldownUntil: undefined, - disabledUntil: undefined, - disabledReason: undefined, - failureCounts: undefined, - }; + applySuccessUpdates(store.usageStats, profileId, model); saveAuthProfileStore(store, agentDir); } @@ -187,22 +271,25 @@ function computeNextProfileUsageStats(params: { /** * Mark a profile as failed for a specific reason. Billing failures are treated * as "disabled" (longer backoff) vs the regular cooldown window. + * When model is provided, cooldown is tracked per (profile + model) combination. */ export async function markAuthProfileFailure(params: { store: AuthProfileStore; profileId: string; + model?: string; reason: AuthProfileFailureReason; cfg?: OpenClawConfig; agentDir?: string; }): Promise { - const { store, profileId, reason, agentDir, cfg } = params; + const { store, profileId, model, reason, agentDir, cfg } = params; + const key = cooldownKey(profileId, model); const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { const profile = freshStore.profiles[profileId]; if (!profile) return false; freshStore.usageStats = freshStore.usageStats ?? {}; - const existing = freshStore.usageStats[profileId] ?? {}; + const existing = freshStore.usageStats[key] ?? {}; const now = Date.now(); const providerKey = normalizeProviderId(profile.provider); @@ -211,7 +298,7 @@ export async function markAuthProfileFailure(params: { providerId: providerKey, }); - freshStore.usageStats[profileId] = computeNextProfileUsageStats({ + freshStore.usageStats[key] = computeNextProfileUsageStats({ existing, now, reason, @@ -227,7 +314,7 @@ export async function markAuthProfileFailure(params: { if (!store.profiles[profileId]) return; store.usageStats = store.usageStats ?? {}; - const existing = store.usageStats[profileId] ?? {}; + const existing = store.usageStats[key] ?? {}; const now = Date.now(); const providerKey = normalizeProviderId(store.profiles[profileId]?.provider ?? ""); const cfgResolved = resolveAuthCooldownConfig({ @@ -235,7 +322,7 @@ export async function markAuthProfileFailure(params: { providerId: providerKey, }); - store.usageStats[profileId] = computeNextProfileUsageStats({ + store.usageStats[key] = computeNextProfileUsageStats({ existing, now, reason, @@ -245,18 +332,21 @@ export async function markAuthProfileFailure(params: { } /** - * Mark a profile as failed/rate-limited. Applies exponential backoff cooldown. - * Cooldown times: 1min, 5min, 25min, max 1 hour. - * Uses store lock to avoid overwriting concurrent usage updates. + * Mark a profile as failed/rate-limited with "unknown" reason. + * Convenience wrapper around markAuthProfileFailure() for generic failures. + * Applies exponential backoff cooldown: 1min, 5min, 25min, max 1 hour. + * When model is provided, cooldown is tracked per (profile + model) combination. */ export async function markAuthProfileCooldown(params: { store: AuthProfileStore; profileId: string; + model?: string; agentDir?: string; }): Promise { await markAuthProfileFailure({ store: params.store, profileId: params.profileId, + model: params.model, reason: "unknown", agentDir: params.agentDir, }); @@ -265,20 +355,23 @@ export async function markAuthProfileCooldown(params: { /** * Clear cooldown for a profile (e.g., manual reset). * Uses store lock to avoid overwriting concurrent usage updates. + * When model is provided, clears the per-model cooldown key. */ export async function clearAuthProfileCooldown(params: { store: AuthProfileStore; profileId: string; + model?: string; agentDir?: string; }): Promise { - const { store, profileId, agentDir } = params; + const { store, profileId, model, agentDir } = params; + const key = cooldownKey(profileId, model); const updated = await updateAuthProfileStoreWithLock({ agentDir, updater: (freshStore) => { - if (!freshStore.usageStats?.[profileId]) return false; + if (!freshStore.usageStats?.[key]) return false; - freshStore.usageStats[profileId] = { - ...freshStore.usageStats[profileId], + freshStore.usageStats[key] = { + ...freshStore.usageStats[key], errorCount: 0, cooldownUntil: undefined, }; @@ -289,10 +382,10 @@ export async function clearAuthProfileCooldown(params: { store.usageStats = updated.usageStats; return; } - if (!store.usageStats?.[profileId]) return; + if (!store.usageStats?.[key]) return; - store.usageStats[profileId] = { - ...store.usageStats[profileId], + store.usageStats[key] = { + ...store.usageStats[key], errorCount: 0, cooldownUntil: undefined, }; diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index d6aea4782..8d3729018 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -240,6 +240,66 @@ describe("runWithModelFallback", () => { } }); + it("allows different models from same provider when only one model is in cooldown", async () => { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-")); + const provider = `per-model-cooldown-${crypto.randomUUID()}`; + const profileId = `${provider}:default`; + + const store: AuthProfileStore = { + version: AUTH_STORE_VERSION, + profiles: { + [profileId]: { + type: "api_key", + provider, + key: "test-key", + }, + }, + usageStats: { + // Only model-a is in cooldown (per-model key) + [`${profileId}:model-a`]: { + cooldownUntil: Date.now() + 60_000, + }, + // model-b has no cooldown + }, + }; + + saveAuthProfileStore(store, tempDir); + + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: `${provider}/model-a`, + fallbacks: [`${provider}/model-b`], + }, + }, + }, + }); + + const run = vi.fn().mockImplementation(async (providerId, modelId) => { + if (modelId === "model-b") return "ok"; + throw new Error(`unexpected model: ${providerId}/${modelId}`); + }); + + try { + const result = await runWithModelFallback({ + cfg, + provider, + model: "model-a", + agentDir: tempDir, + run, + }); + + expect(result.result).toBe("ok"); + // model-a should be skipped (in cooldown), model-b should be tried + expect(run.mock.calls).toEqual([[provider, "model-b"]]); + expect(result.attempts[0]?.reason).toBe("rate_limit"); + expect(result.attempts[0]?.model).toBe("model-a"); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }); + } + }); + it("does not append configured primary when fallbacksOverride is set", async () => { const cfg = makeCfg({ agents: { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index b99a104d5..517c9d140 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -231,14 +231,16 @@ export async function runWithModelFallback(params: { store: authStore, provider: candidate.provider, }); - const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); + const isAnyProfileAvailable = profileIds.some( + (id) => !isProfileInCooldown(authStore, id, candidate.model), + ); if (profileIds.length > 0 && !isAnyProfileAvailable) { - // All profiles for this provider are in cooldown; skip without attempting + // All profiles for this provider+model are in cooldown; skip without attempting attempts.push({ provider: candidate.provider, model: candidate.model, - error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`, + error: `${candidate.provider}/${candidate.model} is in cooldown (all profiles unavailable)`, reason: "rate_limit", }); continue; diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 1d5010679..8848d574f 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -474,6 +474,7 @@ export async function runEmbeddedPiAgent( await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, + model: modelId, reason: promptFailoverReason, cfg: params.config, agentDir: params.agentDir, @@ -561,6 +562,7 @@ export async function runEmbeddedPiAgent( await markAuthProfileFailure({ store: authStore, profileId: lastProfileId, + model: modelId, reason, cfg: params.config, agentDir: params.agentDir, @@ -644,6 +646,7 @@ export async function runEmbeddedPiAgent( await markAuthProfileUsed({ store: authStore, profileId: lastProfileId, + model: modelId, agentDir: params.agentDir, }); }