diff --git a/src/cli/gateway-cli/crash-tracker.test.ts b/src/cli/gateway-cli/crash-tracker.test.ts new file mode 100644 index 000000000..89a4b426c --- /dev/null +++ b/src/cli/gateway-cli/crash-tracker.test.ts @@ -0,0 +1,109 @@ +// src/cli/gateway-cli/crash-tracker.test.ts +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import { + recordCrash, + getRecentCrashes, + getCrashesInLastHour, + clearCrashes, + classifyError, +} from "./crash-tracker.js"; + +describe("crash-tracker", () => { + beforeEach(() => { + clearCrashes(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("records a crash with timestamp", () => { + vi.setSystemTime(new Date("2026-01-29T12:00:00Z")); + recordCrash({ + errorType: "fetch_failed", + errorMessage: "ECONNREFUSED", + uptimeMs: 5000, + backoffMs: 2000, + consecutiveFailures: 1, + }); + + const crashes = getRecentCrashes(); + expect(crashes).toHaveLength(1); + expect(crashes[0].timestamp).toBe(Date.now()); + expect(crashes[0].errorType).toBe("fetch_failed"); + }); + + it("limits to MAX_CRASH_HISTORY entries", () => { + for (let i = 0; i < 25; i++) { + recordCrash({ + errorType: "network_error", + errorMessage: `Error ${i}`, + uptimeMs: 0, + backoffMs: 2000, + consecutiveFailures: i + 1, + }); + } + + const crashes = getRecentCrashes(); + expect(crashes).toHaveLength(20); + expect(crashes[0].errorMessage).toBe("Error 5"); // First 5 were dropped + }); + + it("counts crashes in last hour correctly", () => { + vi.setSystemTime(new Date("2026-01-29T12:00:00Z")); + recordCrash({ + errorType: "fetch_failed", + errorMessage: "a", + uptimeMs: 0, + backoffMs: 2000, + consecutiveFailures: 1, + }); + + vi.setSystemTime(new Date("2026-01-29T12:30:00Z")); + recordCrash({ + errorType: "fetch_failed", + errorMessage: "b", + uptimeMs: 0, + backoffMs: 4000, + consecutiveFailures: 2, + }); + + // At 13:29:59, "a" is 89 min old (outside), "b" is 59 min old (inside) + vi.setSystemTime(new Date("2026-01-29T13:29:59Z")); + expect(getCrashesInLastHour()).toBe(1); // Only "b" is within last hour + }); +}); + +describe("classifyError", () => { + it("classifies fetch failed errors", () => { + expect(classifyError(new Error("TypeError: fetch failed"))).toBe("fetch_failed"); + expect(classifyError(new Error("connect ECONNREFUSED 127.0.0.1:443"))).toBe("fetch_failed"); + }); + + it("classifies network errors", () => { + expect(classifyError(new Error("read ECONNRESET"))).toBe("network_error"); + expect(classifyError(new Error("connect ETIMEDOUT"))).toBe("network_error"); + expect(classifyError(new Error("network unreachable"))).toBe("network_error"); + }); + + it("classifies startup errors", () => { + expect(classifyError(new Error("startup failed: missing config"))).toBe("startup_error"); + expect(classifyError(new Error("init error: bad credentials"))).toBe("startup_error"); + }); + + it("defaults to runtime_error for unrecognized errors", () => { + expect(classifyError(new Error("something went wrong"))).toBe("runtime_error"); + expect(classifyError(new Error("unexpected condition"))).toBe("runtime_error"); + }); + + it("handles null/undefined safely", () => { + expect(classifyError(null)).toBe("unknown"); + expect(classifyError(undefined)).toBe("unknown"); + }); + + it("handles non-Error objects", () => { + expect(classifyError("string error")).toBe("runtime_error"); + expect(classifyError({ message: "object error" })).toBe("runtime_error"); + }); +}); diff --git a/src/cli/gateway-cli/crash-tracker.ts b/src/cli/gateway-cli/crash-tracker.ts new file mode 100644 index 000000000..7fdaeb27a --- /dev/null +++ b/src/cli/gateway-cli/crash-tracker.ts @@ -0,0 +1,102 @@ +// src/cli/gateway-cli/crash-tracker.ts +import { createSubsystemLogger } from "../../logging/subsystem.js"; +import { isTransientNetworkError } from "../../infra/unhandled-rejections.js"; + +const log = createSubsystemLogger("gateway"); + +export type CrashErrorType = + | "fetch_failed" + | "network_error" + | "startup_error" + | "runtime_error" + | "unknown"; + +export type CrashRecord = { + timestamp: number; + errorType: CrashErrorType; + errorMessage: string; + uptimeMs: number; + backoffMs: number; + consecutiveFailures: number; +}; + +const MAX_CRASH_HISTORY = 20; +const recentCrashes: CrashRecord[] = []; + +export function recordCrash(record: Omit): void { + const full: CrashRecord = { ...record, timestamp: Date.now() }; + recentCrashes.push(full); + if (recentCrashes.length > MAX_CRASH_HISTORY) { + recentCrashes.shift(); + } + + log.error("gateway_crash", { + errorType: record.errorType, + errorMessage: record.errorMessage, + uptimeMs: record.uptimeMs, + backoffMs: record.backoffMs, + consecutiveFailures: record.consecutiveFailures, + crashesInLastHour: getCrashesInLastHour(), + }); +} + +export function getRecentCrashes(): readonly CrashRecord[] { + return recentCrashes; +} + +export function getCrashesInLastHour(): number { + const oneHourAgo = Date.now() - 3600_000; + return recentCrashes.filter((c) => c.timestamp > oneHourAgo).length; +} + +export function clearCrashes(): void { + recentCrashes.length = 0; +} + +// Network error patterns to match in error messages +const FETCH_FAILED_PATTERNS = ["fetch failed", "econnrefused"]; +const NETWORK_ERROR_PATTERNS = [ + "econnreset", + "etimedout", + "enotfound", + "ehostunreach", + "enetunreach", + "network unreachable", + "socket hang up", +]; + +function getErrorMessage(err: unknown): string { + if (err instanceof Error) return err.message; + if (typeof err === "string") return err; + if (err && typeof err === "object" && "message" in err && typeof err.message === "string") { + return err.message; + } + return ""; +} + +export function classifyError(err: unknown): CrashErrorType { + if (!err) return "unknown"; + + const message = getErrorMessage(err).toLowerCase(); + + // Use existing transient network detection for consistency + if (isTransientNetworkError(err)) { + if (FETCH_FAILED_PATTERNS.some((p) => message.includes(p))) { + return "fetch_failed"; + } + return "network_error"; + } + + // Also check message patterns for errors without proper error codes + if (FETCH_FAILED_PATTERNS.some((p) => message.includes(p))) { + return "fetch_failed"; + } + if (NETWORK_ERROR_PATTERNS.some((p) => message.includes(p))) { + return "network_error"; + } + + if (message.includes("startup") || message.includes("init")) { + return "startup_error"; + } + return "runtime_error"; +}