feat(auth): track cooldown per (auth profile + model)
Allow different models from the same provider to have independent cooldowns. When a model hits rate limit, only that specific model is blocked, not all models using the same auth profile. - Add cooldownKey() helper for composite key generation - Update isProfileInCooldown to check both per-model and profile-level - Update markAuthProfileFailure/Cooldown with optional model param - Pass model to cooldown checks in model-fallback and agent runner - Add comprehensive tests for per-model cooldown behavior Ref: #3417
This commit is contained in:
parent
4583f88626
commit
715728c989
@ -1,5 +1,11 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { calculateAuthProfileCooldownMs } from "./auth-profiles.js";
|
||||
import {
|
||||
calculateAuthProfileCooldownMs,
|
||||
cooldownKey,
|
||||
isProfileInCooldown,
|
||||
} from "./auth-profiles.js";
|
||||
import type { AuthProfileStore } from "./auth-profiles.js";
|
||||
import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
|
||||
|
||||
describe("auth profile cooldowns", () => {
|
||||
it("applies exponential backoff with a 1h cap", () => {
|
||||
@ -10,3 +16,92 @@ describe("auth profile cooldowns", () => {
|
||||
expect(calculateAuthProfileCooldownMs(5)).toBe(60 * 60_000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("cooldownKey", () => {
|
||||
it("returns profileId when model is not provided", () => {
|
||||
expect(cooldownKey("openai:default")).toBe("openai:default");
|
||||
expect(cooldownKey("openai:default", undefined)).toBe("openai:default");
|
||||
});
|
||||
|
||||
it("returns composite key when model is provided", () => {
|
||||
expect(cooldownKey("openai:default", "gpt-4")).toBe("openai:default:gpt-4");
|
||||
expect(cooldownKey("github-copilot:default", "gpt-5.2")).toBe("github-copilot:default:gpt-5.2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("isProfileInCooldown with per-model support", () => {
|
||||
it("returns false when no cooldown exists", () => {
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"openai:default": { type: "api_key", provider: "openai", key: "test" },
|
||||
},
|
||||
};
|
||||
expect(isProfileInCooldown(store, "openai:default")).toBe(false);
|
||||
expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false);
|
||||
});
|
||||
|
||||
it("checks profile-level cooldown when model not provided", () => {
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"openai:default": { type: "api_key", provider: "openai", key: "test" },
|
||||
},
|
||||
usageStats: {
|
||||
"openai:default": { cooldownUntil: Date.now() + 60_000 },
|
||||
},
|
||||
};
|
||||
expect(isProfileInCooldown(store, "openai:default")).toBe(true);
|
||||
});
|
||||
|
||||
it("checks per-model cooldown when model is provided", () => {
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"openai:default": { type: "api_key", provider: "openai", key: "test" },
|
||||
},
|
||||
usageStats: {
|
||||
"openai:default:gpt-4": { cooldownUntil: Date.now() + 60_000 },
|
||||
},
|
||||
};
|
||||
// model-specific cooldown exists
|
||||
expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(true);
|
||||
// different model is not in cooldown
|
||||
expect(isProfileInCooldown(store, "openai:default", "gpt-3.5")).toBe(false);
|
||||
// profile-level is not in cooldown
|
||||
expect(isProfileInCooldown(store, "openai:default")).toBe(false);
|
||||
});
|
||||
|
||||
it("allows independent cooldowns per model", () => {
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"github-copilot:default": {
|
||||
type: "api_key",
|
||||
provider: "github-copilot",
|
||||
key: "test",
|
||||
},
|
||||
},
|
||||
usageStats: {
|
||||
// gpt-5.2 is in cooldown (rate limited)
|
||||
"github-copilot:default:gpt-5.2": { cooldownUntil: Date.now() + 60_000 },
|
||||
// gpt-5-mini has no cooldown (unlimited quota)
|
||||
},
|
||||
};
|
||||
expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5.2")).toBe(true);
|
||||
expect(isProfileInCooldown(store, "github-copilot:default", "gpt-5-mini")).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false when cooldown has expired", () => {
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
"openai:default": { type: "api_key", provider: "openai", key: "test" },
|
||||
},
|
||||
usageStats: {
|
||||
"openai:default:gpt-4": { cooldownUntil: Date.now() - 1000 }, // expired
|
||||
},
|
||||
};
|
||||
expect(isProfileInCooldown(store, "openai:default", "gpt-4")).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@ -32,6 +32,7 @@ export type {
|
||||
export {
|
||||
calculateAuthProfileCooldownMs,
|
||||
clearAuthProfileCooldown,
|
||||
cooldownKey,
|
||||
isProfileInCooldown,
|
||||
markAuthProfileCooldown,
|
||||
markAuthProfileFailure,
|
||||
|
||||
@ -3,6 +3,15 @@ import { normalizeProviderId } from "../model-selection.js";
|
||||
import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
|
||||
import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";
|
||||
|
||||
/**
|
||||
* Generate a cooldown key that optionally includes the model.
|
||||
* When model is provided, cooldowns are tracked per (profile + model) combination.
|
||||
* This allows different models from the same provider to have independent cooldowns.
|
||||
*/
|
||||
export function cooldownKey(profileId: string, model?: string): string {
|
||||
return model ? `${profileId}:${model}` : profileId;
|
||||
}
|
||||
|
||||
function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null {
|
||||
const values = [stats.cooldownUntil, stats.disabledUntil]
|
||||
.filter((value): value is number => typeof value === "number")
|
||||
@ -14,11 +23,39 @@ function resolveProfileUnusableUntil(stats: ProfileUsageStats): number | null {
|
||||
/**
|
||||
* Check if a profile is currently in cooldown (due to rate limiting or errors).
|
||||
*/
|
||||
export function isProfileInCooldown(store: AuthProfileStore, profileId: string): boolean {
|
||||
const stats = store.usageStats?.[profileId];
|
||||
if (!stats) return false;
|
||||
const unusableUntil = resolveProfileUnusableUntil(stats);
|
||||
return unusableUntil ? Date.now() < unusableUntil : false;
|
||||
/**
|
||||
* Check if a profile is currently in cooldown (due to rate limiting or errors).
|
||||
* When model is provided, checks the per-model cooldown key.
|
||||
*/
|
||||
/**
|
||||
* Check if a profile is currently in cooldown (due to rate limiting or errors).
|
||||
* When model is provided, checks both the per-model cooldown key and the profile-level key.
|
||||
* A profile-level cooldown applies to all models (backward compatibility).
|
||||
*/
|
||||
export function isProfileInCooldown(
|
||||
store: AuthProfileStore,
|
||||
profileId: string,
|
||||
model?: string,
|
||||
): boolean {
|
||||
const now = Date.now();
|
||||
|
||||
// Check per-model cooldown first (if model provided)
|
||||
if (model) {
|
||||
const modelKey = cooldownKey(profileId, model);
|
||||
const modelStats = store.usageStats?.[modelKey];
|
||||
if (modelStats) {
|
||||
const modelUnusableUntil = resolveProfileUnusableUntil(modelStats);
|
||||
if (modelUnusableUntil && now < modelUnusableUntil) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also check profile-level cooldown (applies to all models)
|
||||
const profileStats = store.usageStats?.[profileId];
|
||||
if (!profileStats) return false;
|
||||
const profileUnusableUntil = resolveProfileUnusableUntil(profileStats);
|
||||
return profileUnusableUntil ? now < profileUnusableUntil : false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -188,21 +225,28 @@ function computeNextProfileUsageStats(params: {
|
||||
* Mark a profile as failed for a specific reason. Billing failures are treated
|
||||
* as "disabled" (longer backoff) vs the regular cooldown window.
|
||||
*/
|
||||
/**
|
||||
* Mark a profile as failed for a specific reason. Billing failures are treated
|
||||
* as "disabled" (longer backoff) vs the regular cooldown window.
|
||||
* When model is provided, cooldown is tracked per (profile + model) combination.
|
||||
*/
|
||||
export async function markAuthProfileFailure(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
model?: string;
|
||||
reason: AuthProfileFailureReason;
|
||||
cfg?: MoltbotConfig;
|
||||
agentDir?: string;
|
||||
}): Promise<void> {
|
||||
const { store, profileId, reason, agentDir, cfg } = params;
|
||||
const { store, profileId, model, reason, agentDir, cfg } = params;
|
||||
const key = cooldownKey(profileId, model);
|
||||
const updated = await updateAuthProfileStoreWithLock({
|
||||
agentDir,
|
||||
updater: (freshStore) => {
|
||||
const profile = freshStore.profiles[profileId];
|
||||
if (!profile) return false;
|
||||
freshStore.usageStats = freshStore.usageStats ?? {};
|
||||
const existing = freshStore.usageStats[profileId] ?? {};
|
||||
const existing = freshStore.usageStats[key] ?? {};
|
||||
|
||||
const now = Date.now();
|
||||
const providerKey = normalizeProviderId(profile.provider);
|
||||
@ -211,7 +255,7 @@ export async function markAuthProfileFailure(params: {
|
||||
providerId: providerKey,
|
||||
});
|
||||
|
||||
freshStore.usageStats[profileId] = computeNextProfileUsageStats({
|
||||
freshStore.usageStats[key] = computeNextProfileUsageStats({
|
||||
existing,
|
||||
now,
|
||||
reason,
|
||||
@ -227,7 +271,7 @@ export async function markAuthProfileFailure(params: {
|
||||
if (!store.profiles[profileId]) return;
|
||||
|
||||
store.usageStats = store.usageStats ?? {};
|
||||
const existing = store.usageStats[profileId] ?? {};
|
||||
const existing = store.usageStats[key] ?? {};
|
||||
const now = Date.now();
|
||||
const providerKey = normalizeProviderId(store.profiles[profileId]?.provider ?? "");
|
||||
const cfgResolved = resolveAuthCooldownConfig({
|
||||
@ -235,7 +279,7 @@ export async function markAuthProfileFailure(params: {
|
||||
providerId: providerKey,
|
||||
});
|
||||
|
||||
store.usageStats[profileId] = computeNextProfileUsageStats({
|
||||
store.usageStats[key] = computeNextProfileUsageStats({
|
||||
existing,
|
||||
now,
|
||||
reason,
|
||||
@ -249,14 +293,22 @@ export async function markAuthProfileFailure(params: {
|
||||
* Cooldown times: 1min, 5min, 25min, max 1 hour.
|
||||
* Uses store lock to avoid overwriting concurrent usage updates.
|
||||
*/
|
||||
/**
|
||||
* Mark a profile as failed/rate-limited. Applies exponential backoff cooldown.
|
||||
* Cooldown times: 1min, 5min, 25min, max 1 hour.
|
||||
* Uses store lock to avoid overwriting concurrent usage updates.
|
||||
* When model is provided, cooldown is tracked per (profile + model) combination.
|
||||
*/
|
||||
export async function markAuthProfileCooldown(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
model?: string;
|
||||
agentDir?: string;
|
||||
}): Promise<void> {
|
||||
await markAuthProfileFailure({
|
||||
store: params.store,
|
||||
profileId: params.profileId,
|
||||
model: params.model,
|
||||
reason: "unknown",
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
@ -266,19 +318,26 @@ export async function markAuthProfileCooldown(params: {
|
||||
* Clear cooldown for a profile (e.g., manual reset).
|
||||
* Uses store lock to avoid overwriting concurrent usage updates.
|
||||
*/
|
||||
/**
|
||||
* Clear cooldown for a profile (e.g., manual reset).
|
||||
* Uses store lock to avoid overwriting concurrent usage updates.
|
||||
* When model is provided, clears the per-model cooldown key.
|
||||
*/
|
||||
export async function clearAuthProfileCooldown(params: {
|
||||
store: AuthProfileStore;
|
||||
profileId: string;
|
||||
model?: string;
|
||||
agentDir?: string;
|
||||
}): Promise<void> {
|
||||
const { store, profileId, agentDir } = params;
|
||||
const { store, profileId, model, agentDir } = params;
|
||||
const key = cooldownKey(profileId, model);
|
||||
const updated = await updateAuthProfileStoreWithLock({
|
||||
agentDir,
|
||||
updater: (freshStore) => {
|
||||
if (!freshStore.usageStats?.[profileId]) return false;
|
||||
if (!freshStore.usageStats?.[key]) return false;
|
||||
|
||||
freshStore.usageStats[profileId] = {
|
||||
...freshStore.usageStats[profileId],
|
||||
freshStore.usageStats[key] = {
|
||||
...freshStore.usageStats[key],
|
||||
errorCount: 0,
|
||||
cooldownUntil: undefined,
|
||||
};
|
||||
@ -289,10 +348,10 @@ export async function clearAuthProfileCooldown(params: {
|
||||
store.usageStats = updated.usageStats;
|
||||
return;
|
||||
}
|
||||
if (!store.usageStats?.[profileId]) return;
|
||||
if (!store.usageStats?.[key]) return;
|
||||
|
||||
store.usageStats[profileId] = {
|
||||
...store.usageStats[profileId],
|
||||
store.usageStats[key] = {
|
||||
...store.usageStats[key],
|
||||
errorCount: 0,
|
||||
cooldownUntil: undefined,
|
||||
};
|
||||
|
||||
@ -240,6 +240,66 @@ describe("runWithModelFallback", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("allows different models from same provider when only one model is in cooldown", async () => {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-auth-"));
|
||||
const provider = `per-model-cooldown-${crypto.randomUUID()}`;
|
||||
const profileId = `${provider}:default`;
|
||||
|
||||
const store: AuthProfileStore = {
|
||||
version: AUTH_STORE_VERSION,
|
||||
profiles: {
|
||||
[profileId]: {
|
||||
type: "api_key",
|
||||
provider,
|
||||
key: "test-key",
|
||||
},
|
||||
},
|
||||
usageStats: {
|
||||
// Only model-a is in cooldown (per-model key)
|
||||
[`${profileId}:model-a`]: {
|
||||
cooldownUntil: Date.now() + 60_000,
|
||||
},
|
||||
// model-b has no cooldown
|
||||
},
|
||||
};
|
||||
|
||||
saveAuthProfileStore(store, tempDir);
|
||||
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: `${provider}/model-a`,
|
||||
fallbacks: [`${provider}/model-b`],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockImplementation(async (providerId, modelId) => {
|
||||
if (modelId === "model-b") return "ok";
|
||||
throw new Error(`unexpected model: ${providerId}/${modelId}`);
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider,
|
||||
model: "model-a",
|
||||
agentDir: tempDir,
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
// model-a should be skipped (in cooldown), model-b should be tried
|
||||
expect(run.mock.calls).toEqual([[provider, "model-b"]]);
|
||||
expect(result.attempts[0]?.reason).toBe("rate_limit");
|
||||
expect(result.attempts[0]?.model).toBe("model-a");
|
||||
} finally {
|
||||
await fs.rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("does not append configured primary when fallbacksOverride is set", async () => {
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
|
||||
@ -231,14 +231,16 @@ export async function runWithModelFallback<T>(params: {
|
||||
store: authStore,
|
||||
provider: candidate.provider,
|
||||
});
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
|
||||
const isAnyProfileAvailable = profileIds.some(
|
||||
(id) => !isProfileInCooldown(authStore, id, candidate.model),
|
||||
);
|
||||
|
||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||
// All profiles for this provider are in cooldown; skip without attempting
|
||||
// All profiles for this provider+model are in cooldown; skip without attempting
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
|
||||
error: `${candidate.provider}/${candidate.model} is in cooldown (all profiles unavailable)`,
|
||||
reason: "rate_limit",
|
||||
});
|
||||
continue;
|
||||
|
||||
@ -474,6 +474,7 @@ export async function runEmbeddedPiAgent(
|
||||
await markAuthProfileFailure({
|
||||
store: authStore,
|
||||
profileId: lastProfileId,
|
||||
model: modelId,
|
||||
reason: promptFailoverReason,
|
||||
cfg: params.config,
|
||||
agentDir: params.agentDir,
|
||||
@ -561,6 +562,7 @@ export async function runEmbeddedPiAgent(
|
||||
await markAuthProfileFailure({
|
||||
store: authStore,
|
||||
profileId: lastProfileId,
|
||||
model: modelId,
|
||||
reason,
|
||||
cfg: params.config,
|
||||
agentDir: params.agentDir,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user