feat(gateway): add crash tracking module
This commit is contained in:
parent
5a3f915641
commit
5ef33d9359
109
src/cli/gateway-cli/crash-tracker.test.ts
Normal file
109
src/cli/gateway-cli/crash-tracker.test.ts
Normal file
@ -0,0 +1,109 @@
|
||||
// src/cli/gateway-cli/crash-tracker.test.ts
|
||||
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
||||
import {
|
||||
recordCrash,
|
||||
getRecentCrashes,
|
||||
getCrashesInLastHour,
|
||||
clearCrashes,
|
||||
classifyError,
|
||||
} from "./crash-tracker.js";
|
||||
|
||||
describe("crash-tracker", () => {
|
||||
beforeEach(() => {
|
||||
clearCrashes();
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("records a crash with timestamp", () => {
|
||||
vi.setSystemTime(new Date("2026-01-29T12:00:00Z"));
|
||||
recordCrash({
|
||||
errorType: "fetch_failed",
|
||||
errorMessage: "ECONNREFUSED",
|
||||
uptimeMs: 5000,
|
||||
backoffMs: 2000,
|
||||
consecutiveFailures: 1,
|
||||
});
|
||||
|
||||
const crashes = getRecentCrashes();
|
||||
expect(crashes).toHaveLength(1);
|
||||
expect(crashes[0].timestamp).toBe(Date.now());
|
||||
expect(crashes[0].errorType).toBe("fetch_failed");
|
||||
});
|
||||
|
||||
it("limits to MAX_CRASH_HISTORY entries", () => {
|
||||
for (let i = 0; i < 25; i++) {
|
||||
recordCrash({
|
||||
errorType: "network_error",
|
||||
errorMessage: `Error ${i}`,
|
||||
uptimeMs: 0,
|
||||
backoffMs: 2000,
|
||||
consecutiveFailures: i + 1,
|
||||
});
|
||||
}
|
||||
|
||||
const crashes = getRecentCrashes();
|
||||
expect(crashes).toHaveLength(20);
|
||||
expect(crashes[0].errorMessage).toBe("Error 5"); // First 5 were dropped
|
||||
});
|
||||
|
||||
it("counts crashes in last hour correctly", () => {
|
||||
vi.setSystemTime(new Date("2026-01-29T12:00:00Z"));
|
||||
recordCrash({
|
||||
errorType: "fetch_failed",
|
||||
errorMessage: "a",
|
||||
uptimeMs: 0,
|
||||
backoffMs: 2000,
|
||||
consecutiveFailures: 1,
|
||||
});
|
||||
|
||||
vi.setSystemTime(new Date("2026-01-29T12:30:00Z"));
|
||||
recordCrash({
|
||||
errorType: "fetch_failed",
|
||||
errorMessage: "b",
|
||||
uptimeMs: 0,
|
||||
backoffMs: 4000,
|
||||
consecutiveFailures: 2,
|
||||
});
|
||||
|
||||
// At 13:29:59, "a" is 89 min old (outside), "b" is 59 min old (inside)
|
||||
vi.setSystemTime(new Date("2026-01-29T13:29:59Z"));
|
||||
expect(getCrashesInLastHour()).toBe(1); // Only "b" is within last hour
|
||||
});
|
||||
});
|
||||
|
||||
describe("classifyError", () => {
|
||||
it("classifies fetch failed errors", () => {
|
||||
expect(classifyError(new Error("TypeError: fetch failed"))).toBe("fetch_failed");
|
||||
expect(classifyError(new Error("connect ECONNREFUSED 127.0.0.1:443"))).toBe("fetch_failed");
|
||||
});
|
||||
|
||||
it("classifies network errors", () => {
|
||||
expect(classifyError(new Error("read ECONNRESET"))).toBe("network_error");
|
||||
expect(classifyError(new Error("connect ETIMEDOUT"))).toBe("network_error");
|
||||
expect(classifyError(new Error("network unreachable"))).toBe("network_error");
|
||||
});
|
||||
|
||||
it("classifies startup errors", () => {
|
||||
expect(classifyError(new Error("startup failed: missing config"))).toBe("startup_error");
|
||||
expect(classifyError(new Error("init error: bad credentials"))).toBe("startup_error");
|
||||
});
|
||||
|
||||
it("defaults to runtime_error for unrecognized errors", () => {
|
||||
expect(classifyError(new Error("something went wrong"))).toBe("runtime_error");
|
||||
expect(classifyError(new Error("unexpected condition"))).toBe("runtime_error");
|
||||
});
|
||||
|
||||
it("handles null/undefined safely", () => {
|
||||
expect(classifyError(null)).toBe("unknown");
|
||||
expect(classifyError(undefined)).toBe("unknown");
|
||||
});
|
||||
|
||||
it("handles non-Error objects", () => {
|
||||
expect(classifyError("string error")).toBe("runtime_error");
|
||||
expect(classifyError({ message: "object error" })).toBe("runtime_error");
|
||||
});
|
||||
});
|
||||
102
src/cli/gateway-cli/crash-tracker.ts
Normal file
102
src/cli/gateway-cli/crash-tracker.ts
Normal file
@ -0,0 +1,102 @@
|
||||
// src/cli/gateway-cli/crash-tracker.ts
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import { isTransientNetworkError } from "../../infra/unhandled-rejections.js";
|
||||
|
||||
const log = createSubsystemLogger("gateway");
|
||||
|
||||
export type CrashErrorType =
|
||||
| "fetch_failed"
|
||||
| "network_error"
|
||||
| "startup_error"
|
||||
| "runtime_error"
|
||||
| "unknown";
|
||||
|
||||
export type CrashRecord = {
|
||||
timestamp: number;
|
||||
errorType: CrashErrorType;
|
||||
errorMessage: string;
|
||||
uptimeMs: number;
|
||||
backoffMs: number;
|
||||
consecutiveFailures: number;
|
||||
};
|
||||
|
||||
const MAX_CRASH_HISTORY = 20;
|
||||
const recentCrashes: CrashRecord[] = [];
|
||||
|
||||
export function recordCrash(record: Omit<CrashRecord, "timestamp">): void {
|
||||
const full: CrashRecord = { ...record, timestamp: Date.now() };
|
||||
recentCrashes.push(full);
|
||||
if (recentCrashes.length > MAX_CRASH_HISTORY) {
|
||||
recentCrashes.shift();
|
||||
}
|
||||
|
||||
log.error("gateway_crash", {
|
||||
errorType: record.errorType,
|
||||
errorMessage: record.errorMessage,
|
||||
uptimeMs: record.uptimeMs,
|
||||
backoffMs: record.backoffMs,
|
||||
consecutiveFailures: record.consecutiveFailures,
|
||||
crashesInLastHour: getCrashesInLastHour(),
|
||||
});
|
||||
}
|
||||
|
||||
export function getRecentCrashes(): readonly CrashRecord[] {
|
||||
return recentCrashes;
|
||||
}
|
||||
|
||||
export function getCrashesInLastHour(): number {
|
||||
const oneHourAgo = Date.now() - 3600_000;
|
||||
return recentCrashes.filter((c) => c.timestamp > oneHourAgo).length;
|
||||
}
|
||||
|
||||
export function clearCrashes(): void {
|
||||
recentCrashes.length = 0;
|
||||
}
|
||||
|
||||
// Network error patterns to match in error messages
|
||||
const FETCH_FAILED_PATTERNS = ["fetch failed", "econnrefused"];
|
||||
const NETWORK_ERROR_PATTERNS = [
|
||||
"econnreset",
|
||||
"etimedout",
|
||||
"enotfound",
|
||||
"ehostunreach",
|
||||
"enetunreach",
|
||||
"network unreachable",
|
||||
"socket hang up",
|
||||
];
|
||||
|
||||
function getErrorMessage(err: unknown): string {
|
||||
if (err instanceof Error) return err.message;
|
||||
if (typeof err === "string") return err;
|
||||
if (err && typeof err === "object" && "message" in err && typeof err.message === "string") {
|
||||
return err.message;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
export function classifyError(err: unknown): CrashErrorType {
|
||||
if (!err) return "unknown";
|
||||
|
||||
const message = getErrorMessage(err).toLowerCase();
|
||||
|
||||
// Use existing transient network detection for consistency
|
||||
if (isTransientNetworkError(err)) {
|
||||
if (FETCH_FAILED_PATTERNS.some((p) => message.includes(p))) {
|
||||
return "fetch_failed";
|
||||
}
|
||||
return "network_error";
|
||||
}
|
||||
|
||||
// Also check message patterns for errors without proper error codes
|
||||
if (FETCH_FAILED_PATTERNS.some((p) => message.includes(p))) {
|
||||
return "fetch_failed";
|
||||
}
|
||||
if (NETWORK_ERROR_PATTERNS.some((p) => message.includes(p))) {
|
||||
return "network_error";
|
||||
}
|
||||
|
||||
if (message.includes("startup") || message.includes("init")) {
|
||||
return "startup_error";
|
||||
}
|
||||
return "runtime_error";
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user