From 9edf0306af85978e260968be3df9800274cf408b Mon Sep 17 00:00:00 2001
From: Bruno Guidolim <bguidolim@gmail.com>
Date: Wed, 28 Jan 2026 23:51:40 +0100
Subject: [PATCH] docs(auth): document per-model cooldown design decision

- Add module-level comment in usage.ts explaining the key asymmetry:
  failures create per-model keys, successes update profile-level keys
- Add explanatory comment at top of cooldown test file
- Create Serena memory (decision_auth_permodel_cooldown_design) for
  future reference

This documents the design from discussion #3417 where per-model cooldowns
allow independent rate limits while keeping the store clean.
---
 ...th-profiles.auth-profile-cooldowns.test.ts | 13 +++++++++++
 src/agents/auth-profiles/usage.ts             | 23 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
index dcb68315f..e63910250 100644
--- a/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
+++ b/src/agents/auth-profiles.auth-profile-cooldowns.test.ts
@@ -1,3 +1,16 @@
+/*
+ * Per-Model Cooldown Tests
+ * ────────────────────────
+ * These tests verify the per-model cooldown feature (discussion #3417).
+ *
+ * Key design asymmetry:
+ * - Failures CREATE per-model keys (e.g., "openai:default:gpt-4")
+ * - Successes UPDATE profile-level keys AND clear per-model keys (if they exist)
+ * - Per-model keys are ephemeral "penalty boxes" that only exist during cooldowns
+ *
+ * This allows independent rate limits per model while keeping the store clean.
+ * See: src/agents/auth-profiles/usage.ts for implementation details.
+ */
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
diff --git a/src/agents/auth-profiles/usage.ts b/src/agents/auth-profiles/usage.ts
index a425bd78b..0514ccc8d 100644
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -3,6 +3,29 @@ import { normalizeProviderId } from "../model-selection.js";
 import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
 import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";
 
+/*
+ * Per-Model Cooldown Design
+ * ─────────────────────────
+ * Cooldowns can be tracked at two granularities:
+ *
+ * 1. Profile-level keys (e.g., "github-copilot:github")
+ *    - Track success metrics: lastUsed, lastGood, errorCount reset
+ *    - Used by `lastGood` to remember which profile worked for a provider
+ *    - Auth failures (wrong API key) should use profile-level cooldowns
+ *
+ * 2. Per-model keys (e.g., "github-copilot:github:gpt-5.2")
+ *    - Created ONLY on failure to track rate limits
+ *    - Act as ephemeral "penalty boxes" for specific models
+ *    - Naturally disappear when cooldown expires or model recovers
+ *
+ * Key asymmetry:
+ * - Failures → create per-model key (if model provided)
+ * - Successes → update profile-level key + clear per-model key (if it exists)
+ *
+ * This keeps the store clean and allows independent rate limits per model
+ * while maintaining backward compatibility with profile-level cooldowns.
+ */
+
 /**
  * Generate a cooldown key that optionally includes the model.
  * When model is provided, cooldowns are tracked per (profile + model) combination.