diff --git a/src/agents/auth-profiles/order.ts b/src/agents/auth-profiles/order.ts index 677c29069..d5c954147 100644 --- a/src/agents/auth-profiles/order.ts +++ b/src/agents/auth-profiles/order.ts @@ -4,6 +4,26 @@ import { listProfilesForProvider } from "./profiles.js"; import type { AuthProfileStore } from "./types.js"; import { isProfileInCooldown } from "./usage.js"; +/* + * Profile Ordering Design + * ─────────────────────── + * Profile ordering is intentionally MODEL-AGNOSTIC. This module answers: + * "Which auth profiles should we try for this provider?" + * + * Per-model cooldown filtering happens DOWNSTREAM in model-fallback.ts, + * which calls isProfileInCooldown(store, profileId, model) for each candidate. + * + * Why two layers? + * 1. Profile layer (here): Selects credentials based on provider, type preference, + * round-robin (lastUsed), and profile-level cooldowns (auth failures). + * 2. Model layer (model-fallback.ts): Filters by per-model rate limits before + * each API call attempt. + * + * This separation exists because: + * - Auth credentials (API keys/tokens) are profile-level + * - Rate limits are often model-level (e.g., gpt-5.2 has quota, gpt-5-mini unlimited) + */ + function resolveProfileUnusableUntil(stats: { cooldownUntil?: number; disabledUntil?: number; diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index 0514ccc8d..2cab9a729 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -87,7 +87,13 @@ export function isProfileInCooldown( /** * Mark a profile as successfully used. Resets error count and updates lastUsed. * Uses store lock to avoid overwriting concurrent usage updates. - * When model is provided, also clears the per-model cooldown. + * + * Success metrics (lastUsed, lastGood) are ALWAYS updated at the profile level, + * regardless of which model was used. This is intentional: if ANY model works, + * the credentials are valid and the profile should be remembered as "good". + * + * When model is provided, also clears the per-model cooldown (if one exists). + * This allows a recovered model to be used immediately without waiting for expiry. */ export async function markAuthProfileUsed(params: { store: AuthProfileStore;