This commit is contained in:
Garnet Liu 2026-01-29 23:33:33 -08:00 committed by GitHub
commit 2cf4685a76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 193 additions and 8 deletions

View File

@ -72,6 +72,7 @@ Status: stable.
- **BREAKING:** Gateway auth mode "none" is removed; gateway now requires token/password (Tailscale Serve identity still allowed).
### Fixes
- Gateway: prevent crash when all auth profiles are in cooldown; log warning and continue. (#2811)
- Telegram: avoid silent empty replies by tracking normalization skips before fallback. (#3796)
- Telegram: scope native skill commands to bound agent per bot. (#4360) Thanks @robhparker.
- Mentions: honor mentionPatterns even when explicit mentions are present. (#3303) Thanks @HirokiKobayashi-R.

View File

@ -0,0 +1,91 @@
import { describe, it, expect } from "vitest";
import { AllModelsFailedError, isAllModelsFailedError } from "./model-fallback-error.js";
describe("AllModelsFailedError", () => {
it("creates error with cooldown-only flag", () => {
const error = new AllModelsFailedError("All models failed", {
attempts: [
{
provider: "anthropic",
model: "claude-3-5",
error: "cooldown",
reason: "rate_limit",
},
],
allInCooldown: true,
retryAfterMs: 300000,
});
expect(error.name).toBe("AllModelsFailedError");
expect(error.allInCooldown).toBe(true);
expect(error.isCooldownOnly()).toBe(true);
expect(isAllModelsFailedError(error)).toBe(true);
expect(error.retryAfterMs).toBe(300000);
});
it("distinguishes mixed failures", () => {
const error = new AllModelsFailedError("msg", {
attempts: [
{ provider: "anthropic", model: "c", error: "cooldown", reason: "rate_limit" },
{ provider: "openai", model: "gpt-4", error: "auth", reason: "auth" },
],
allInCooldown: false,
});
expect(error.isCooldownOnly()).toBe(false);
expect(error.allInCooldown).toBe(false);
});
it("returns false for isCooldownOnly when no attempts", () => {
const error = new AllModelsFailedError("msg", {
attempts: [],
allInCooldown: true,
});
expect(error.isCooldownOnly()).toBe(false);
});
it("preserves cause in error chain", () => {
const cause = new Error("Original error");
const error = new AllModelsFailedError("All models failed", {
attempts: [{ provider: "anthropic", model: "c", error: "cooldown", reason: "rate_limit" }],
allInCooldown: true,
cause,
});
expect(error.cause).toBe(cause);
});
it("includes all attempt details", () => {
const attempts = [
{
provider: "anthropic",
model: "c",
error: "cooldown",
reason: "rate_limit" as const,
status: 429,
code: "rate_limit",
},
{ provider: "openai", model: "gpt-4", error: "auth", reason: "auth" as const, status: 401 },
];
const error = new AllModelsFailedError("msg", {
attempts,
allInCooldown: false,
});
expect(error.attempts).toEqual(attempts);
});
it("type guard works correctly", () => {
const error = new AllModelsFailedError("msg", {
attempts: [{ provider: "anthropic", model: "c", error: "c", reason: "rate_limit" }],
allInCooldown: true,
});
expect(isAllModelsFailedError(error)).toBe(true);
const regularError = new Error("regular");
expect(isAllModelsFailedError(regularError)).toBe(false);
expect(isAllModelsFailedError(null)).toBe(false);
expect(isAllModelsFailedError(undefined)).toBe(false);
expect(isAllModelsFailedError("string")).toBe(false);
});
});

View File

@ -0,0 +1,38 @@
import type { FailoverReason } from "./pi-embedded-helpers/types.js";
export class AllModelsFailedError extends Error {
readonly attempts: Array<{
provider: string;
model: string;
error: string;
reason?: FailoverReason;
status?: number;
code?: string;
}>;
readonly allInCooldown: boolean;
readonly retryAfterMs?: number;
constructor(
message: string,
params: {
attempts: AllModelsFailedError["attempts"];
allInCooldown: boolean;
retryAfterMs?: number;
cause?: unknown;
},
) {
super(message, { cause: params.cause });
this.name = "AllModelsFailedError";
this.attempts = params.attempts;
this.allInCooldown = params.allInCooldown;
this.retryAfterMs = params.retryAfterMs;
}
isCooldownOnly(): boolean {
return this.allInCooldown && this.attempts.length > 0;
}
}
export function isAllModelsFailedError(err: unknown): err is AllModelsFailedError {
return err instanceof AllModelsFailedError;
}

View File

@ -1,4 +1,9 @@
import type { OpenClawConfig } from "../config/config.js";
import {
ensureAuthProfileStore,
isProfileInCooldown,
resolveAuthProfileOrder,
} from "./auth-profiles.js";
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js";
import {
coerceToFailoverError,
@ -6,6 +11,7 @@ import {
isFailoverError,
isTimeoutError,
} from "./failover-error.js";
import { AllModelsFailedError } from "./model-fallback-error.js";
import {
buildModelAliasIndex,
modelKey,
@ -14,11 +20,6 @@ import {
resolveModelRefFromString,
} from "./model-selection.js";
import type { FailoverReason } from "./pi-embedded-helpers.js";
import {
ensureAuthProfileStore,
isProfileInCooldown,
resolveAuthProfileOrder,
} from "./auth-profiles.js";
type ModelCandidate = {
provider: string;
@ -293,9 +294,46 @@ export async function runWithModelFallback<T>(params: {
)
.join(" | ")
: "unknown";
throw new Error(`All models failed (${attempts.length || candidates.length}): ${summary}`, {
cause: lastError instanceof Error ? lastError : undefined,
});
// Determine if all failures are due to cooldown (rate_limit)
const allCooldown = attempts.length > 0 && attempts.every((a) => a.reason === "rate_limit");
// Calculate earliest retry time if all in cooldown
let retryAfterMs: number | undefined;
if (allCooldown && authStore) {
const profileIds = new Set<string>();
for (const candidate of candidates) {
const profiles = resolveAuthProfileOrder({
cfg: params.cfg,
store: authStore,
provider: candidate.provider,
});
profiles.forEach((id) => profileIds.add(id));
}
let earliest: number | null = null;
for (const id of profileIds) {
const stats = authStore.usageStats?.[id];
if (!stats) continue;
const unusableUntil = Math.max(stats.cooldownUntil ?? 0, stats.disabledUntil ?? 0);
if (unusableUntil > 0 && (earliest === null || unusableUntil < earliest)) {
earliest = unusableUntil;
}
}
if (earliest) {
retryAfterMs = Math.max(0, earliest - Date.now());
}
}
throw new AllModelsFailedError(
`All models failed (${attempts.length || candidates.length}): ${summary}`,
{
attempts,
allInCooldown: allCooldown,
retryAfterMs,
cause: lastError instanceof Error ? lastError : undefined,
},
);
}
export async function runWithImageModelFallback<T>(params: {

View File

@ -1,5 +1,6 @@
import process from "node:process";
import { isAllModelsFailedError } from "../agents/model-fallback-error.js";
import { extractErrorCode, formatUncaughtError } from "./errors.js";
type UnhandledRejectionHandler = (reason: unknown) => boolean;
@ -134,6 +135,22 @@ export function installUnhandledRejectionHandler(): void {
return;
}
// Handle AllModelsFailedError - don't crash on cooldown
if (isAllModelsFailedError(reason)) {
if (reason.allInCooldown) {
const mins = reason.retryAfterMs ? Math.round(reason.retryAfterMs / 60000) : "unknown";
console.warn(
`[openclaw] All models in cooldown - gateway continuing. ` +
`Retry after ${mins}min. ` +
`Providers: ${reason.attempts.map((a) => a.provider).join(", ")}`,
);
return; // Don't exit
}
// Mixed failures (not all cooldown) - log but don't crash
console.warn("[openclaw] All models failed (mixed reasons):", formatUncaughtError(reason));
return;
}
if (isFatalError(reason)) {
console.error("[openclaw] FATAL unhandled rejection:", formatUncaughtError(reason));
process.exit(1);