Compare commits

...

4 Commits

Author SHA1 Message Date
Gustavo Madeira Santana
a087540ef3 fix: skip cooldowned providers during model failover (#2143) (thanks @YiWang24) 2026-01-26 21:59:20 -05:00
Gustavo Madeira Santana
ff0cbee297 Agents: skip cooldowned providers in fallback 2026-01-26 21:56:20 -05:00
YiWang24
9227026af3 fix(agents): correct imports and API usage for cooldown check 2026-01-26 21:56:20 -05:00
YiWang24
23f65cc90b feat(agents): skip cooldowned providers during failover
When all auth profiles for a provider are in cooldown, the failover
mechanism now skips that provider immediately rather than attempting
and waiting for the cooldown error. This prevents long delays when
multiple OAuth providers fail in sequence.
2026-01-26 21:56:20 -05:00
8 changed files with 159 additions and 0 deletions

View File

@ -66,6 +66,7 @@ Status: unreleased.
- CLI: avoid loading config for global help/version while registering plugin commands. (#2212) Thanks @dial481.
- Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj.
- Agents: release session locks on process termination and cover more signals. (#2483) Thanks @janeexai.
- Agents: skip cooldowned providers during model failover. (#2143) Thanks @YiWang24.
- Telegram: harden polling + retry behavior for transient network errors and Node 22 transport issues. (#2420) Thanks @techboss.
- Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos.
- Telegram: centralize API error logging for delivery and bot calls. (#2492) Thanks @altryne.

View File

@ -1,6 +1,13 @@
import crypto from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import type { ClawdbotConfig } from "../config/config.js";
import type { AuthProfileStore } from "./auth-profiles.js";
import { saveAuthProfileStore } from "./auth-profiles.js";
import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
import { runWithModelFallback } from "./model-fallback.js";
function makeCfg(overrides: Partial<ClawdbotConfig> = {}): ClawdbotConfig {
@ -117,6 +124,122 @@ describe("runWithModelFallback", () => {
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
});
it("skips providers when all profiles are in cooldown", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-auth-"));
const provider = `cooldown-test-${crypto.randomUUID()}`;
const profileId = `${provider}:default`;
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
[profileId]: {
type: "api_key",
provider,
key: "test-key",
},
},
usageStats: {
[profileId]: {
cooldownUntil: Date.now() + 60_000,
},
},
};
saveAuthProfileStore(store, tempDir);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: `${provider}/m1`,
fallbacks: ["fallback/ok-model"],
},
},
},
});
const run = vi.fn().mockImplementation(async (providerId, modelId) => {
if (providerId === "fallback") return "ok";
throw new Error(`unexpected provider: ${providerId}/${modelId}`);
});
try {
const result = await runWithModelFallback({
cfg,
provider,
model: "m1",
agentDir: tempDir,
run,
});
expect(result.result).toBe("ok");
expect(run.mock.calls).toEqual([["fallback", "ok-model"]]);
expect(result.attempts[0]?.reason).toBe("rate_limit");
} finally {
await fs.rm(tempDir, { recursive: true, force: true });
}
});
it("does not skip when any profile is available", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-auth-"));
const provider = `cooldown-mixed-${crypto.randomUUID()}`;
const profileA = `${provider}:a`;
const profileB = `${provider}:b`;
const store: AuthProfileStore = {
version: AUTH_STORE_VERSION,
profiles: {
[profileA]: {
type: "api_key",
provider,
key: "key-a",
},
[profileB]: {
type: "api_key",
provider,
key: "key-b",
},
},
usageStats: {
[profileA]: {
cooldownUntil: Date.now() + 60_000,
},
},
};
saveAuthProfileStore(store, tempDir);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: `${provider}/m1`,
fallbacks: ["fallback/ok-model"],
},
},
},
});
const run = vi.fn().mockImplementation(async (providerId) => {
if (providerId === provider) return "ok";
return "unexpected";
});
try {
const result = await runWithModelFallback({
cfg,
provider,
model: "m1",
agentDir: tempDir,
run,
});
expect(result.result).toBe("ok");
expect(run.mock.calls).toEqual([[provider, "m1"]]);
expect(result.attempts).toEqual([]);
} finally {
await fs.rm(tempDir, { recursive: true, force: true });
}
});
it("does not append configured primary when fallbacksOverride is set", async () => {
const cfg = makeCfg({
agents: {

View File

@ -14,6 +14,11 @@ import {
resolveModelRefFromString,
} from "./model-selection.js";
import type { FailoverReason } from "./pi-embedded-helpers.js";
import {
ensureAuthProfileStore,
isProfileInCooldown,
resolveAuthProfileOrder,
} from "./auth-profiles.js";
type ModelCandidate = {
provider: string;
@ -189,6 +194,7 @@ export async function runWithModelFallback<T>(params: {
cfg: ClawdbotConfig | undefined;
provider: string;
model: string;
agentDir?: string;
/** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */
fallbacksOverride?: string[];
run: (provider: string, model: string) => Promise<T>;
@ -211,11 +217,33 @@ export async function runWithModelFallback<T>(params: {
model: params.model,
fallbacksOverride: params.fallbacksOverride,
});
const authStore = params.cfg
? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false })
: null;
const attempts: FallbackAttempt[] = [];
let lastError: unknown;
for (let i = 0; i < candidates.length; i += 1) {
const candidate = candidates[i] as ModelCandidate;
if (authStore) {
const profileIds = resolveAuthProfileOrder({
cfg: params.cfg,
store: authStore,
provider: candidate.provider,
});
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
if (profileIds.length > 0 && !isAnyProfileAvailable) {
// All profiles for this provider are in cooldown; skip without attempting
attempts.push({
provider: candidate.provider,
model: candidate.model,
error: `Provider ${candidate.provider} is in cooldown (all profiles unavailable)`,
reason: "rate_limit",
});
continue;
}
}
try {
const result = await params.run(candidate.provider, candidate.model);
return {

View File

@ -138,6 +138,7 @@ export async function runAgentTurnWithFallback(params: {
cfg: params.followupRun.run.config,
provider: params.followupRun.run.provider,
model: params.followupRun.run.model,
agentDir: params.followupRun.run.agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(
params.followupRun.run.config,
resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey),

View File

@ -92,6 +92,7 @@ export async function runMemoryFlushIfNeeded(params: {
cfg: params.followupRun.run.config,
provider: params.followupRun.run.provider,
model: params.followupRun.run.model,
agentDir: params.followupRun.run.agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(
params.followupRun.run.config,
resolveAgentIdFromSessionKey(params.followupRun.run.sessionKey),

View File

@ -129,6 +129,7 @@ export function createFollowupRunner(params: {
cfg: queued.run.config,
provider: queued.run.provider,
model: queued.run.model,
agentDir: queued.run.agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(
queued.run.config,
resolveAgentIdFromSessionKey(queued.run.sessionKey),

View File

@ -382,6 +382,7 @@ export async function agentCommand(
cfg,
provider,
model,
agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(cfg, sessionAgentId),
run: (providerOverride, modelOverride) => {
if (isCliProvider(providerOverride, cfg)) {

View File

@ -1,5 +1,6 @@
import {
resolveAgentConfig,
resolveAgentDir,
resolveAgentModelFallbacksOverride,
resolveAgentWorkspaceDir,
resolveDefaultAgentId,
@ -128,6 +129,7 @@ export async function runCronIsolatedAgentTurn(params: {
});
const workspaceDirRaw = resolveAgentWorkspaceDir(params.cfg, agentId);
const agentDir = resolveAgentDir(params.cfg, agentId);
const workspace = await ensureAgentWorkspace({
dir: workspaceDirRaw,
ensureBootstrapFiles: !agentCfg?.skipBootstrap,
@ -330,6 +332,7 @@ export async function runCronIsolatedAgentTurn(params: {
cfg: cfgWithAgentDefaults,
provider,
model,
agentDir,
fallbacksOverride: resolveAgentModelFallbacksOverride(params.cfg, agentId),
run: (providerOverride, modelOverride) => {
if (isCliProvider(providerOverride, cfgWithAgentDefaults)) {