From 9edf0306af85978e260968be3df9800274cf408b Mon Sep 17 00:00:00 2001 From: Bruno Guidolim Date: Wed, 28 Jan 2026 23:51:40 +0100 Subject: [PATCH] docs(auth): document per-model cooldown design decision - Add module-level comment in usage.ts explaining the key asymmetry: failures create per-model keys, successes update profile-level keys - Add explanatory comment at top of cooldown test file - Create Serena memory (decision_auth_permodel_cooldown_design) for future reference This documents the design from discussion #3417 where per-model cooldowns allow independent rate limits while keeping the store clean. --- ...th-profiles.auth-profile-cooldowns.test.ts | 13 +++++++++++ src/agents/auth-profiles/usage.ts | 23 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts index dcb68315f..e63910250 100644 --- a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts +++ b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts @@ -1,3 +1,16 @@ +/* + * Per-Model Cooldown Tests + * ──────────────────────── + * These tests verify the per-model cooldown feature (discussion #3417). + * + * Key design asymmetry: + * - Failures CREATE per-model keys (e.g., "openai:default:gpt-4") + * - Successes UPDATE profile-level keys AND clear per-model keys (if they exist) + * - Per-model keys are ephemeral "penalty boxes" that only exist during cooldowns + * + * This allows independent rate limits per model while keeping the store clean. + * See: src/agents/auth-profiles/usage.ts for implementation details. + */ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts index a425bd78b..0514ccc8d 100644 --- a/src/agents/auth-profiles/usage.ts +++ b/src/agents/auth-profiles/usage.ts @@ -3,6 +3,29 @@ import { normalizeProviderId } from "../model-selection.js"; import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js"; import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js"; +/* + * Per-Model Cooldown Design + * ───────────────────────── + * Cooldowns can be tracked at two granularities: + * + * 1. Profile-level keys (e.g., "github-copilot:github") + * - Track success metrics: lastUsed, lastGood, errorCount reset + * - Used by `lastGood` to remember which profile worked for a provider + * - Auth failures (wrong API key) should use profile-level cooldowns + * + * 2. Per-model keys (e.g., "github-copilot:github:gpt-5.2") + * - Created ONLY on failure to track rate limits + * - Act as ephemeral "penalty boxes" for specific models + * - Naturally disappear when cooldown expires or model recovers + * + * Key asymmetry: + * - Failures → create per-model key (if model provided) + * - Successes → update profile-level key + clear per-model key (if it exists) + * + * This keeps the store clean and allows independent rate limits per model + * while maintaining backward compatibility with profile-level cooldowns. + */ + /** * Generate a cooldown key that optionally includes the model. * When model is provided, cooldowns are tracked per (profile + model) combination.