Compare commits

...

5 Commits

Author SHA1 Message Date
Peter Steinberger
8aa2c0229d docs: update changelog for rate-limit fallback 2026-01-05 22:33:55 +01:00
Peter Steinberger
485481895e style: format PR 223 changes 2026-01-05 22:33:51 +01:00
CI
f01784369d fix(model): treat quota errors as rate limits 2026-01-05 19:16:38 +01:00
CI
0a853df6c9 fix(model): retry with supported thinking level 2026-01-05 18:54:23 +01:00
CI
25ada49e36 fix: retry model fallback on rate limits 2026-01-05 18:04:36 +01:00
5 changed files with 478 additions and 298 deletions

View File

@ -5,6 +5,7 @@
## Unreleased ## Unreleased
### Fixes ### Fixes
- Model: retry fallback on rate-limit/quota errors and unsupported thinking levels. (#223) — thanks @augard
- Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step. - Onboarding: resolve CLI entrypoint when running via `npx` so gateway daemon install works without a build step.
- TUI: migrate key handling to the updated pi-tui Key matcher API. - TUI: migrate key handling to the updated pi-tui Key matcher API.
- macOS: prefer gateway config reads/writes in local mode (fall back to disk if the gateway is unavailable). - macOS: prefer gateway config reads/writes in local mode (fall back to disk if the gateway is unavailable).

View File

@ -0,0 +1,77 @@
import type { AssistantMessage } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import type { ThinkLevel } from "../auto-reply/thinking.js";
import {
isRateLimitAssistantError,
pickFallbackThinkingLevel,
} from "./pi-embedded-helpers.js";
const asAssistant = (overrides: Partial<AssistantMessage>) =>
({
role: "assistant",
stopReason: "error",
...overrides,
}) as AssistantMessage;
describe("isRateLimitAssistantError", () => {
it("detects 429 rate limit payloads", () => {
const msg = asAssistant({
errorMessage:
'429 {"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed your account\'s rate limit. Please try again later."}}',
});
expect(isRateLimitAssistantError(msg)).toBe(true);
});
it("detects human-readable rate limit messages", () => {
const msg = asAssistant({
errorMessage: "Too many requests. Rate limit exceeded.",
});
expect(isRateLimitAssistantError(msg)).toBe(true);
});
it("detects quota exceeded messages", () => {
const msg = asAssistant({
errorMessage:
"You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.",
});
expect(isRateLimitAssistantError(msg)).toBe(true);
});
it("returns false for non-error messages", () => {
const msg = asAssistant({
stopReason: "end_turn",
errorMessage: "rate limit",
});
expect(isRateLimitAssistantError(msg)).toBe(false);
});
});
describe("pickFallbackThinkingLevel", () => {
it("selects the first supported thinking level", () => {
const attempted = new Set<ThinkLevel>(["low"]);
const next = pickFallbackThinkingLevel({
message:
"Unsupported value: 'low' is not supported with the 'gpt-5.2-pro' model. Supported values are: 'medium', 'high', and 'xhigh'.",
attempted,
});
expect(next).toBe("medium");
});
it("skips already attempted levels", () => {
const attempted = new Set<ThinkLevel>(["low", "medium"]);
const next = pickFallbackThinkingLevel({
message: "Supported values are: 'medium', 'high', and 'xhigh'.",
attempted,
});
expect(next).toBe("high");
});
it("returns undefined when no supported values are found", () => {
const attempted = new Set<ThinkLevel>(["low"]);
const next = pickFallbackThinkingLevel({
message: "Request failed.",
attempted,
});
expect(next).toBeUndefined();
});
});

View File

@ -6,6 +6,10 @@ import type {
AgentToolResult, AgentToolResult,
} from "@mariozechner/pi-agent-core"; } from "@mariozechner/pi-agent-core";
import type { AssistantMessage } from "@mariozechner/pi-ai"; import type { AssistantMessage } from "@mariozechner/pi-ai";
import {
normalizeThinkLevel,
type ThinkLevel,
} from "../auto-reply/thinking.js";
import { sanitizeContentBlocksImages } from "./tool-images.js"; import { sanitizeContentBlocksImages } from "./tool-images.js";
import type { WorkspaceBootstrapFile } from "./workspace.js"; import type { WorkspaceBootstrapFile } from "./workspace.js";
@ -109,3 +113,50 @@ export function formatAssistantErrorText(
// Keep it short for WhatsApp. // Keep it short for WhatsApp.
return raw.length > 600 ? `${raw.slice(0, 600)}` : raw; return raw.length > 600 ? `${raw.slice(0, 600)}` : raw;
} }
export function isRateLimitAssistantError(
msg: AssistantMessage | undefined,
): boolean {
if (!msg || msg.stopReason !== "error") return false;
const raw = (msg.errorMessage ?? "").toLowerCase();
if (!raw) return false;
return (
/rate[_ ]limit|too many requests|429/.test(raw) ||
raw.includes("exceeded your current quota")
);
}
function extractSupportedValues(raw: string): string[] {
const match =
raw.match(/supported values are:\s*([^\n.]+)/i) ??
raw.match(/supported values:\s*([^\n.]+)/i);
if (!match?.[1]) return [];
const fragment = match[1];
const quoted = Array.from(fragment.matchAll(/['"]([^'"]+)['"]/g)).map(
(entry) => entry[1]?.trim(),
);
if (quoted.length > 0) {
return quoted.filter((entry): entry is string => Boolean(entry));
}
return fragment
.split(/,|\band\b/gi)
.map((entry) => entry.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "").trim())
.filter(Boolean);
}
export function pickFallbackThinkingLevel(params: {
message?: string;
attempted: Set<ThinkLevel>;
}): ThinkLevel | undefined {
const raw = params.message?.trim();
if (!raw) return undefined;
const supported = extractSupportedValues(raw);
if (supported.length === 0) return undefined;
for (const entry of supported) {
const normalized = normalizeThinkLevel(entry);
if (!normalized) continue;
if (params.attempted.has(normalized)) continue;
return normalized;
}
return undefined;
}

View File

@ -32,6 +32,8 @@ import {
buildBootstrapContextFiles, buildBootstrapContextFiles,
ensureSessionHeader, ensureSessionHeader,
formatAssistantErrorText, formatAssistantErrorText,
isRateLimitAssistantError,
pickFallbackThinkingLevel,
sanitizeSessionMessagesImages, sanitizeSessionMessagesImages,
} from "./pi-embedded-helpers.js"; } from "./pi-embedded-helpers.js";
import { import {
@ -317,10 +319,15 @@ export async function runEmbeddedPiAgent(params: {
const apiKey = await getApiKeyForModel(model, authStorage); const apiKey = await getApiKeyForModel(model, authStorage);
authStorage.setRuntimeApiKey(model.provider, apiKey); authStorage.setRuntimeApiKey(model.provider, apiKey);
const thinkingLevel = mapThinkingLevel(params.thinkLevel); let thinkLevel = params.thinkLevel ?? "off";
const attemptedThinking = new Set<ThinkLevel>();
while (true) {
const thinkingLevel = mapThinkingLevel(thinkLevel);
attemptedThinking.add(thinkLevel);
log.debug( log.debug(
`embedded run start: runId=${params.runId} sessionId=${params.sessionId} provider=${provider} model=${modelId} surface=${params.surface ?? "unknown"}`, `embedded run start: runId=${params.runId} sessionId=${params.sessionId} provider=${provider} model=${modelId} thinking=${thinkLevel} surface=${params.surface ?? "unknown"}`,
); );
await fs.mkdir(resolvedWorkspace, { recursive: true }); await fs.mkdir(resolvedWorkspace, { recursive: true });
@ -344,7 +351,8 @@ export async function runEmbeddedPiAgent(params: {
config: params.config, config: params.config,
entries: skillEntries, entries: skillEntries,
}); });
const sandboxSessionKey = params.sessionKey?.trim() || params.sessionId; const sandboxSessionKey =
params.sessionKey?.trim() || params.sessionId;
const sandbox = await resolveSandboxContext({ const sandbox = await resolveSandboxContext({
config: params.config, config: params.config,
sessionKey: sandboxSessionKey, sessionKey: sandboxSessionKey,
@ -363,7 +371,10 @@ export async function runEmbeddedPiAgent(params: {
const bootstrapFiles = const bootstrapFiles =
await loadWorkspaceBootstrapFiles(resolvedWorkspace); await loadWorkspaceBootstrapFiles(resolvedWorkspace);
const contextFiles = buildBootstrapContextFiles(bootstrapFiles); const contextFiles = buildBootstrapContextFiles(bootstrapFiles);
const promptSkills = resolvePromptSkills(skillsSnapshot, skillEntries); const promptSkills = resolvePromptSkills(
skillsSnapshot,
skillEntries,
);
// Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`). // Tool schemas must be provider-compatible (OpenAI requires top-level `type: "object"`).
// `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged. // `createClawdbotCodingTools()` normalizes schemas so the session can pass them through unchanged.
const tools = createClawdbotCodingTools({ const tools = createClawdbotCodingTools({
@ -389,7 +400,7 @@ export async function runEmbeddedPiAgent(params: {
const systemPrompt = buildSystemPrompt({ const systemPrompt = buildSystemPrompt({
appendPrompt: buildAgentSystemPromptAppend({ appendPrompt: buildAgentSystemPromptAppend({
workspaceDir: resolvedWorkspace, workspaceDir: resolvedWorkspace,
defaultThinkLevel: params.thinkLevel, defaultThinkLevel: thinkLevel,
extraSystemPrompt: params.extraSystemPrompt, extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers, ownerNumbers: params.ownerNumbers,
reasoningTagHint, reasoningTagHint,
@ -531,6 +542,20 @@ export async function runEmbeddedPiAgent(params: {
params.abortSignal?.removeEventListener?.("abort", onAbort); params.abortSignal?.removeEventListener?.("abort", onAbort);
} }
if (promptError && !aborted) { if (promptError && !aborted) {
const fallbackThinking = pickFallbackThinkingLevel({
message:
promptError instanceof Error
? promptError.message
: String(promptError),
attempted: attemptedThinking,
});
if (fallbackThinking) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
throw promptError; throw promptError;
} }
@ -541,6 +566,28 @@ export async function runEmbeddedPiAgent(params: {
| AssistantMessage | AssistantMessage
| undefined; | undefined;
const fallbackThinking = pickFallbackThinkingLevel({
message: lastAssistant?.errorMessage,
attempted: attemptedThinking,
});
if (fallbackThinking && !aborted) {
log.warn(
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
);
thinkLevel = fallbackThinking;
continue;
}
const fallbackConfigured =
(params.config?.agent?.modelFallbacks?.length ?? 0) > 0;
if (fallbackConfigured && isRateLimitAssistantError(lastAssistant)) {
const message =
lastAssistant?.errorMessage?.trim() ||
(lastAssistant ? formatAssistantErrorText(lastAssistant) : "") ||
"LLM request rate limited.";
throw new Error(message);
}
const usage = lastAssistant?.usage; const usage = lastAssistant?.usage;
const agentMeta: EmbeddedPiAgentMeta = { const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed, sessionId: sessionIdUsed,
@ -572,7 +619,8 @@ export async function runEmbeddedPiAgent(params: {
if (inlineToolResults) { if (inlineToolResults) {
for (const { toolName, meta } of toolMetas) { for (const { toolName, meta } of toolMetas) {
const agg = formatToolAggregate(toolName, meta ? [meta] : []); const agg = formatToolAggregate(toolName, meta ? [meta] : []);
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(agg); const { text: cleanedText, mediaUrls } =
splitMediaFromOutput(agg);
if (cleanedText) if (cleanedText)
replyItems.push({ text: cleanedText, media: mediaUrls }); replyItems.push({ text: cleanedText, media: mediaUrls });
} }
@ -584,7 +632,8 @@ export async function runEmbeddedPiAgent(params: {
? [extractAssistantText(lastAssistant)] ? [extractAssistantText(lastAssistant)]
: []) { : []) {
const { text: cleanedText, mediaUrls } = splitMediaFromOutput(text); const { text: cleanedText, mediaUrls } = splitMediaFromOutput(text);
if (!cleanedText && (!mediaUrls || mediaUrls.length === 0)) continue; if (!cleanedText && (!mediaUrls || mediaUrls.length === 0))
continue;
replyItems.push({ text: cleanedText, media: mediaUrls }); replyItems.push({ text: cleanedText, media: mediaUrls });
} }
@ -614,6 +663,7 @@ export async function runEmbeddedPiAgent(params: {
restoreSkillEnv?.(); restoreSkillEnv?.();
process.chdir(prevCwd); process.chdir(prevCwd);
} }
}
}), }),
); );
} }

View File

@ -1,6 +1,9 @@
import fs from "node:fs"; import fs from "node:fs";
import { getEnvApiKey } from "@mariozechner/pi-ai";
import { discoverAuthStorage } from "@mariozechner/pi-coding-agent";
import { resolveClawdbotAgentDir } from "../../agents/agent-paths.js";
import type { ClawdbotConfig } from "../../config/config.js"; import type { ClawdbotConfig } from "../../config/config.js";
import { resolveOAuthPath } from "../../config/paths.js";
import { import {
type SessionEntry, type SessionEntry,
type SessionScope, type SessionScope,
@ -12,10 +15,6 @@ import { resolveSendPolicy } from "../../sessions/send-policy.js";
import { normalizeE164 } from "../../utils.js"; import { normalizeE164 } from "../../utils.js";
import { resolveHeartbeatSeconds } from "../../web/reconnect.js"; import { resolveHeartbeatSeconds } from "../../web/reconnect.js";
import { getWebAuthAgeMs, webAuthExists } from "../../web/session.js"; import { getWebAuthAgeMs, webAuthExists } from "../../web/session.js";
import { resolveClawdbotAgentDir } from "../../agents/agent-paths.js";
import { resolveOAuthPath } from "../../config/paths.js";
import { getEnvApiKey } from "@mariozechner/pi-ai";
import { discoverAuthStorage } from "@mariozechner/pi-coding-agent";
import { import {
normalizeGroupActivation, normalizeGroupActivation,
parseActivationCommand, parseActivationCommand,
@ -61,7 +60,8 @@ function hasOAuthCredentials(provider: string): boolean {
if (!entry) return false; if (!entry) return false;
const refresh = const refresh =
entry.refresh ?? entry.refresh_token ?? entry.refreshToken ?? ""; entry.refresh ?? entry.refresh_token ?? entry.refreshToken ?? "";
const access = entry.access ?? entry.access_token ?? entry.accessToken ?? ""; const access =
entry.access ?? entry.access_token ?? entry.accessToken ?? "";
return Boolean(refresh.trim() && access.trim()); return Boolean(refresh.trim() && access.trim());
} catch { } catch {
return false; return false;
@ -204,6 +204,7 @@ export async function handleCommands(params: {
resolvedVerboseLevel, resolvedVerboseLevel,
resolvedElevatedLevel, resolvedElevatedLevel,
resolveDefaultThinkingLevel, resolveDefaultThinkingLevel,
provider,
model, model,
contextTokens, contextTokens,
isGroup, isGroup,