From bdbef04ac312e52fb46f3c40928120b53402e647 Mon Sep 17 00:00:00 2001 From: Trevin Chow Date: Thu, 29 Jan 2026 12:23:02 -0800 Subject: [PATCH] fix(gateway): expand transient network error detection patterns Add more DNS error codes (EAI_NODATA, EAI_NONAME), TLS certificate errors, HTTP status codes (429, 502, 503, 504), and message-based fallbacks to prevent gateway crash loops on transient network issues. --- src/infra/unhandled-rejections.test.ts | 60 ++++++++++++++++++++++++++ src/infra/unhandled-rejections.ts | 32 ++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/infra/unhandled-rejections.test.ts b/src/infra/unhandled-rejections.test.ts index 1ec144ba1..a4f637c5a 100644 --- a/src/infra/unhandled-rejections.test.ts +++ b/src/infra/unhandled-rejections.test.ts @@ -127,3 +127,63 @@ describe("isTransientNetworkError", () => { expect(isTransientNetworkError(error)).toBe(false); }); }); + +describe("isTransientNetworkError - expanded patterns", () => { + it("recognizes EAI_NODATA as transient", () => { + const err = { code: "EAI_NODATA", message: "DNS lookup failed" }; + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes EAI_NONAME as transient", () => { + const err = { code: "EAI_NONAME", message: "DNS name not found" }; + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes HTTP 502 as transient", () => { + const err = { status: 502, message: "Bad Gateway" }; + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes HTTP 503 as transient", () => { + const err = { statusCode: 503, message: "Service Unavailable" }; + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes HTTP 429 rate limit as transient", () => { + const err = { status: 429, message: "Too Many Requests" }; + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes socket closed message as transient", () => { + const err = new Error("socket closed unexpectedly"); + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes client network socket disconnected as transient", () => { + const err = new Error("Client network socket disconnected before secure TLS connection"); + expect(isTransientNetworkError(err)).toBe(true); + }); + + it("recognizes TLS certificate errors as transient", () => { + const err1 = { code: "CERT_HAS_EXPIRED", message: "certificate has expired" }; + expect(isTransientNetworkError(err1)).toBe(true); + + const err2 = { code: "ERR_TLS_CERT_ALTNAME_INVALID", message: "Hostname mismatch" }; + expect(isTransientNetworkError(err2)).toBe(true); + }); + + it("does not recognize HTTP 400 as transient", () => { + const err = { status: 400, message: "Bad Request" }; + expect(isTransientNetworkError(err)).toBe(false); + }); + + it("does not recognize HTTP 401 as transient", () => { + const err = { status: 401, message: "Unauthorized" }; + expect(isTransientNetworkError(err)).toBe(false); + }); + + it("handles string status codes", () => { + const err = { status: "503", message: "Service Unavailable" }; + expect(isTransientNetworkError(err)).toBe(true); + }); +}); diff --git a/src/infra/unhandled-rejections.ts b/src/infra/unhandled-rejections.ts index d186c6a78..3e3041157 100644 --- a/src/infra/unhandled-rejections.ts +++ b/src/infra/unhandled-rejections.ts @@ -28,6 +28,10 @@ const TRANSIENT_NETWORK_CODES = new Set([ "EHOSTUNREACH", "ENETUNREACH", "EAI_AGAIN", + "EAI_NODATA", + "EAI_NONAME", + "CERT_HAS_EXPIRED", + "ERR_TLS_CERT_ALTNAME_INVALID", "UND_ERR_CONNECT_TIMEOUT", "UND_ERR_DNS_RESOLVE_FAILED", "UND_ERR_CONNECT", @@ -99,6 +103,34 @@ export function isTransientNetworkError(err: unknown): boolean { return err.errors.some((e) => isTransientNetworkError(e)); } + // Message-based fallback detection + const message = err instanceof Error ? err.message?.toLowerCase() : ""; + if ( + message.includes("fetch failed") || + message.includes("network error") || + message.includes("socket hang up") || + message.includes("socket closed") || + message.includes("client network socket disconnected") + ) { + return true; + } + + // Check for transient HTTP error responses (handle both number and string status codes) + const statusRaw = + (err as { status?: unknown }).status ?? (err as { statusCode?: unknown }).statusCode; + const status = + typeof statusRaw === "number" + ? statusRaw + : typeof statusRaw === "string" + ? parseInt(statusRaw, 10) + : NaN; + if ( + Number.isFinite(status) && + (status === 429 || status === 502 || status === 503 || status === 504) + ) { + return true; + } + return false; }