Gateway: tolerate transient network failures(#4425)
This commit is contained in:
parent
4583f88626
commit
7698c29363
@ -100,6 +100,24 @@ describe("isTransientNetworkError", () => {
|
||||
expect(isTransientNetworkError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for MediaFetchError with fetch_failed code", () => {
|
||||
const error = Object.assign(new Error("media fetch failed"), {
|
||||
name: "MediaFetchError",
|
||||
code: "fetch_failed",
|
||||
});
|
||||
expect(isTransientNetworkError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for MediaFetchError that wraps a network cause", () => {
|
||||
const cause = Object.assign(new Error("timeout"), { code: "ETIMEDOUT" });
|
||||
const error = Object.assign(new Error("media fetch failed"), {
|
||||
name: "MediaFetchError",
|
||||
code: "fetch_failed",
|
||||
cause,
|
||||
});
|
||||
expect(isTransientNetworkError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false for regular errors without network codes", () => {
|
||||
expect(isTransientNetworkError(new Error("Something went wrong"))).toBe(false);
|
||||
expect(isTransientNetworkError(new TypeError("Cannot read property"))).toBe(false);
|
||||
|
||||
@ -78,6 +78,16 @@ function isConfigError(err: unknown): boolean {
|
||||
export function isTransientNetworkError(err: unknown): boolean {
|
||||
if (!err) return false;
|
||||
|
||||
if (err && typeof err === "object") {
|
||||
const name = "name" in err ? String(err.name) : "";
|
||||
const code = extractErrorCodeWithCause(err);
|
||||
if (name === "MediaFetchError" && code === "fetch_failed") {
|
||||
const cause = getErrorCause(err);
|
||||
if (cause && cause !== err) return isTransientNetworkError(cause);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
const code = extractErrorCodeWithCause(err);
|
||||
if (code && TRANSIENT_NETWORK_CODES.has(code)) return true;
|
||||
|
||||
|
||||
@ -13,10 +13,17 @@ export type MediaFetchErrorCode = "max_bytes" | "http_error" | "fetch_failed";
|
||||
export class MediaFetchError extends Error {
|
||||
readonly code: MediaFetchErrorCode;
|
||||
|
||||
constructor(code: MediaFetchErrorCode, message: string) {
|
||||
constructor(code: MediaFetchErrorCode, message: string, options?: { cause?: unknown }) {
|
||||
super(message);
|
||||
this.code = code;
|
||||
this.name = "MediaFetchError";
|
||||
if (options?.cause !== undefined) {
|
||||
try {
|
||||
(this as { cause?: unknown }).cause = options.cause;
|
||||
} catch {
|
||||
// ignore if cause cannot be assigned
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,7 +81,9 @@ export async function fetchRemoteMedia(options: FetchMediaOptions): Promise<Fetc
|
||||
try {
|
||||
res = await fetcher(url);
|
||||
} catch (err) {
|
||||
throw new MediaFetchError("fetch_failed", `Failed to fetch media from ${url}: ${String(err)}`);
|
||||
throw new MediaFetchError("fetch_failed", `Failed to fetch media from ${url}: ${String(err)}`, {
|
||||
cause: err,
|
||||
});
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
|
||||
@ -5,6 +5,7 @@ import { resolveAgentMaxConcurrent } from "../config/agent-limits.js";
|
||||
import { computeBackoff, sleepWithAbort } from "../infra/backoff.js";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
import { formatDurationMs } from "../infra/format-duration.js";
|
||||
import { registerUnhandledRejectionHandler } from "../infra/unhandled-rejections.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
import { resolveTelegramAccount } from "./accounts.js";
|
||||
import { resolveTelegramAllowedUpdates } from "./allowed-updates.js";
|
||||
@ -108,92 +109,106 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) {
|
||||
opts.proxyFetch ??
|
||||
(account.config.proxy ? makeProxyFetch(account.config.proxy as string) : undefined);
|
||||
|
||||
let lastUpdateId = await readTelegramUpdateOffset({
|
||||
accountId: account.accountId,
|
||||
});
|
||||
const persistUpdateId = async (updateId: number) => {
|
||||
if (lastUpdateId !== null && updateId <= lastUpdateId) return;
|
||||
lastUpdateId = updateId;
|
||||
try {
|
||||
await writeTelegramUpdateOffset({
|
||||
accountId: account.accountId,
|
||||
updateId,
|
||||
});
|
||||
} catch (err) {
|
||||
(opts.runtime?.error ?? console.error)(
|
||||
`telegram: failed to persist update offset: ${String(err)}`,
|
||||
let unregisterUnhandled: (() => void) | null = null;
|
||||
try {
|
||||
unregisterUnhandled = registerUnhandledRejectionHandler((reason) => {
|
||||
if (!isRecoverableTelegramNetworkError(reason)) return false;
|
||||
const errMsg = formatErrorMessage(reason);
|
||||
(opts.runtime?.error ?? console.warn)(
|
||||
`telegram: suppressed unhandled network rejection: ${errMsg}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
const bot = createTelegramBot({
|
||||
token,
|
||||
runtime: opts.runtime,
|
||||
proxyFetch,
|
||||
config: cfg,
|
||||
accountId: account.accountId,
|
||||
updateOffset: {
|
||||
lastUpdateId,
|
||||
onUpdateId: persistUpdateId,
|
||||
},
|
||||
});
|
||||
|
||||
if (opts.useWebhook) {
|
||||
await startTelegramWebhook({
|
||||
token,
|
||||
accountId: account.accountId,
|
||||
config: cfg,
|
||||
path: opts.webhookPath,
|
||||
port: opts.webhookPort,
|
||||
secret: opts.webhookSecret,
|
||||
runtime: opts.runtime as RuntimeEnv,
|
||||
fetch: proxyFetch,
|
||||
abortSignal: opts.abortSignal,
|
||||
publicUrl: opts.webhookUrl,
|
||||
return true;
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Use grammyjs/runner for concurrent update processing
|
||||
let restartAttempts = 0;
|
||||
|
||||
while (!opts.abortSignal?.aborted) {
|
||||
const runner = run(bot, createTelegramRunnerOptions(cfg));
|
||||
const stopOnAbort = () => {
|
||||
if (opts.abortSignal?.aborted) {
|
||||
void runner.stop();
|
||||
let lastUpdateId = await readTelegramUpdateOffset({
|
||||
accountId: account.accountId,
|
||||
});
|
||||
const persistUpdateId = async (updateId: number) => {
|
||||
if (lastUpdateId !== null && updateId <= lastUpdateId) return;
|
||||
lastUpdateId = updateId;
|
||||
try {
|
||||
await writeTelegramUpdateOffset({
|
||||
accountId: account.accountId,
|
||||
updateId,
|
||||
});
|
||||
} catch (err) {
|
||||
(opts.runtime?.error ?? console.error)(
|
||||
`telegram: failed to persist update offset: ${String(err)}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
opts.abortSignal?.addEventListener("abort", stopOnAbort, { once: true });
|
||||
try {
|
||||
// runner.task() returns a promise that resolves when the runner stops
|
||||
await runner.task();
|
||||
|
||||
const bot = createTelegramBot({
|
||||
token,
|
||||
runtime: opts.runtime,
|
||||
proxyFetch,
|
||||
config: cfg,
|
||||
accountId: account.accountId,
|
||||
updateOffset: {
|
||||
lastUpdateId,
|
||||
onUpdateId: persistUpdateId,
|
||||
},
|
||||
});
|
||||
|
||||
if (opts.useWebhook) {
|
||||
await startTelegramWebhook({
|
||||
token,
|
||||
accountId: account.accountId,
|
||||
config: cfg,
|
||||
path: opts.webhookPath,
|
||||
port: opts.webhookPort,
|
||||
secret: opts.webhookSecret,
|
||||
runtime: opts.runtime as RuntimeEnv,
|
||||
fetch: proxyFetch,
|
||||
abortSignal: opts.abortSignal,
|
||||
publicUrl: opts.webhookUrl,
|
||||
});
|
||||
return;
|
||||
} catch (err) {
|
||||
if (opts.abortSignal?.aborted) {
|
||||
throw err;
|
||||
}
|
||||
const isConflict = isGetUpdatesConflict(err);
|
||||
const isRecoverable = isRecoverableTelegramNetworkError(err, { context: "polling" });
|
||||
const isNetworkError = isNetworkRelatedError(err);
|
||||
if (!isConflict && !isRecoverable && !isNetworkError) {
|
||||
throw err;
|
||||
}
|
||||
restartAttempts += 1;
|
||||
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
|
||||
const reason = isConflict ? "getUpdates conflict" : "network error";
|
||||
const errMsg = formatErrorMessage(err);
|
||||
(opts.runtime?.error ?? console.error)(
|
||||
`Telegram ${reason}: ${errMsg}; retrying in ${formatDurationMs(delayMs)}.`,
|
||||
);
|
||||
try {
|
||||
await sleepWithAbort(delayMs, opts.abortSignal);
|
||||
} catch (sleepErr) {
|
||||
if (opts.abortSignal?.aborted) return;
|
||||
throw sleepErr;
|
||||
}
|
||||
} finally {
|
||||
opts.abortSignal?.removeEventListener("abort", stopOnAbort);
|
||||
}
|
||||
|
||||
// Use grammyjs/runner for concurrent update processing
|
||||
let restartAttempts = 0;
|
||||
|
||||
while (!opts.abortSignal?.aborted) {
|
||||
const runner = run(bot, createTelegramRunnerOptions(cfg));
|
||||
const stopOnAbort = () => {
|
||||
if (opts.abortSignal?.aborted) {
|
||||
void runner.stop();
|
||||
}
|
||||
};
|
||||
opts.abortSignal?.addEventListener("abort", stopOnAbort, { once: true });
|
||||
try {
|
||||
// runner.task() returns a promise that resolves when the runner stops
|
||||
await runner.task();
|
||||
return;
|
||||
} catch (err) {
|
||||
if (opts.abortSignal?.aborted) {
|
||||
throw err;
|
||||
}
|
||||
const isConflict = isGetUpdatesConflict(err);
|
||||
const isRecoverable = isRecoverableTelegramNetworkError(err, { context: "polling" });
|
||||
const isNetworkError = isNetworkRelatedError(err);
|
||||
if (!isConflict && !isRecoverable && !isNetworkError) {
|
||||
throw err;
|
||||
}
|
||||
restartAttempts += 1;
|
||||
const delayMs = computeBackoff(TELEGRAM_POLL_RESTART_POLICY, restartAttempts);
|
||||
const reason = isConflict ? "getUpdates conflict" : "network error";
|
||||
const errMsg = formatErrorMessage(err);
|
||||
(opts.runtime?.error ?? console.error)(
|
||||
`Telegram ${reason}: ${errMsg}; retrying in ${formatDurationMs(delayMs)}.`,
|
||||
);
|
||||
try {
|
||||
await sleepWithAbort(delayMs, opts.abortSignal);
|
||||
} catch (sleepErr) {
|
||||
if (opts.abortSignal?.aborted) return;
|
||||
throw sleepErr;
|
||||
}
|
||||
} finally {
|
||||
opts.abortSignal?.removeEventListener("abort", stopOnAbort);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
unregisterUnhandled?.();
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user