From 13d0644c01accef8c0047d8141c5935506af9e95 Mon Sep 17 00:00:00 2001 From: Trevin Chow Date: Thu, 29 Jan 2026 12:29:55 -0800 Subject: [PATCH] feat(gateway): add child process registry for cleanup --- src/infra/child-registry.test.ts | 136 +++++++++++++++++++++++++++ src/infra/child-registry.ts | 153 +++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 src/infra/child-registry.test.ts create mode 100644 src/infra/child-registry.ts diff --git a/src/infra/child-registry.test.ts b/src/infra/child-registry.test.ts new file mode 100644 index 000000000..b66fcc90d --- /dev/null +++ b/src/infra/child-registry.test.ts @@ -0,0 +1,136 @@ +// src/infra/child-registry.test.ts +import { describe, it, expect, beforeEach, vi, afterEach } from "vitest"; +import { spawn, type ChildProcess } from "node:child_process"; +import { + registerChild, + unregisterChild, + killAllChildren, + killAllChildrenSync, + getRegisteredChildren, + clearRegistry, +} from "./child-registry.js"; + +describe("child-registry", () => { + beforeEach(() => { + clearRegistry(); + }); + + afterEach(() => { + clearRegistry(); + }); + + it("registers a child process with PID", () => { + const mockProc = { + pid: 12345, + killed: false, + exitCode: null, + signalCode: null, + kill: vi.fn(), + on: vi.fn(), + once: vi.fn(), + } as unknown as ChildProcess; + + registerChild("test-child", mockProc); + + const children = getRegisteredChildren(); + expect(children).toHaveLength(1); + expect(children[0].pid).toBe(12345); + expect(children[0].name).toBe("test-child"); + }); + + it("does not register if no PID", () => { + const mockProc = { + pid: undefined, + on: vi.fn(), + } as unknown as ChildProcess; + + registerChild("no-pid", mockProc); + + expect(getRegisteredChildren()).toHaveLength(0); + }); + + it("respects managedExternally flag", () => { + const mockProc = { + pid: 11111, + killed: false, + exitCode: null, + signalCode: null, + kill: vi.fn(), + on: vi.fn(), + once: vi.fn(), + } as unknown as ChildProcess; + + registerChild("managed", mockProc, { managedExternally: true }); + + const children = getRegisteredChildren(); + expect(children[0].managedExternally).toBe(true); + }); + + it("unregisters a child by PID", () => { + const mockProc = { + pid: 22222, + killed: false, + exitCode: null, + signalCode: null, + kill: vi.fn(), + on: vi.fn(), + once: vi.fn(), + } as unknown as ChildProcess; + + registerChild("to-remove", mockProc); + expect(getRegisteredChildren()).toHaveLength(1); + + unregisterChild(22222); + expect(getRegisteredChildren()).toHaveLength(0); + }); + + it("killAllChildrenSync sends SIGKILL to all children", () => { + const killFn = vi.fn(); + const mockProc = { + pid: 33333, + killed: false, + exitCode: null, + signalCode: null, + kill: killFn, + on: vi.fn(), + once: vi.fn(), + } as unknown as ChildProcess; + + registerChild("to-kill", mockProc); + killAllChildrenSync(); + + expect(killFn).toHaveBeenCalledWith("SIGKILL"); + }); + + it("skips already-dead processes", () => { + const killFn = vi.fn(); + const mockProc = { + pid: 44444, + killed: false, + exitCode: 0, // Already exited + signalCode: null, + kill: killFn, + on: vi.fn(), + once: vi.fn(), + } as unknown as ChildProcess; + + registerChild("already-dead", mockProc); + killAllChildrenSync(); + + expect(killFn).not.toHaveBeenCalled(); + }); +}); + +describe("child-registry integration", () => { + it("kills a real spawned process", async () => { + const proc = spawn("sleep", ["60"], { detached: false }); + registerChild("sleep-test", proc); + + expect(getRegisteredChildren()).toHaveLength(1); + + await killAllChildren("SIGTERM", { timeoutMs: 1000 }); + + // Process should be killed + expect(proc.killed || proc.exitCode !== null || proc.signalCode !== null).toBe(true); + }); +}); diff --git a/src/infra/child-registry.ts b/src/infra/child-registry.ts new file mode 100644 index 000000000..d09012401 --- /dev/null +++ b/src/infra/child-registry.ts @@ -0,0 +1,153 @@ +// src/infra/child-registry.ts +import type { ChildProcess } from "node:child_process"; +import { createSubsystemLogger } from "../logging/subsystem.js"; + +const log = createSubsystemLogger("child-registry"); + +type ChildEntry = { + name: string; + process: ChildProcess; + managedExternally: boolean; +}; + +const children = new Map(); + +export function registerChild( + name: string, + proc: ChildProcess, + opts?: { managedExternally?: boolean }, +): void { + if (!proc.pid) { + log.warn(`Cannot register child "${name}": no PID (spawn may have failed)`); + return; + } + + children.set(proc.pid, { + name, + process: proc, + managedExternally: opts?.managedExternally ?? false, + }); + + const cleanup = () => { + if (proc.pid) children.delete(proc.pid); + }; + proc.on("exit", cleanup); + proc.on("error", cleanup); +} + +export function unregisterChild(pid: number): void { + children.delete(pid); +} + +export async function killAllChildren( + signal: NodeJS.Signals = "SIGTERM", + opts?: { excludeManaged?: boolean; timeoutMs?: number }, +): Promise { + const timeoutMs = opts?.timeoutMs ?? (signal === "SIGKILL" ? 500 : 3000); + const excludeManaged = opts?.excludeManaged ?? false; + const promises: Promise[] = []; + + // Copy entries to avoid mutation during iteration + const entries = [...children.entries()]; + for (const [pid, entry] of entries) { + const { name, process: proc, managedExternally } = entry; + + if (excludeManaged && managedExternally) { + continue; + } + + if (proc.killed || proc.exitCode !== null || proc.signalCode !== null) { + children.delete(pid); + continue; + } + + log.info(`Killing child process: ${name} (pid=${pid}) with ${signal}`); + promises.push(killWithTimeout(pid, proc, signal, timeoutMs, name)); + } + + await Promise.all(promises); +} + +async function killWithTimeout( + pid: number, + proc: ChildProcess, + signal: NodeJS.Signals, + timeoutMs: number, + name: string, +): Promise { + if (proc.exitCode !== null || proc.signalCode !== null) { + return; + } + + try { + proc.kill(signal); + } catch (err) { + const errnoErr = err as NodeJS.ErrnoException; + if (errnoErr.code !== "ESRCH") { + log.warn(`Failed to send ${signal} to ${name}: ${errnoErr.message}`); + } + return; + } + + if (timeoutMs <= 0) return; + + await Promise.race([ + new Promise((resolve) => proc.once("exit", resolve)), + new Promise((resolve) => setTimeout(resolve, timeoutMs)), + ]); + + if (proc.exitCode === null && proc.signalCode === null) { + if (signal !== "SIGKILL") { + log.warn(`${name} did not exit after ${timeoutMs}ms; sending SIGKILL`); + try { + proc.kill("SIGKILL"); + } catch { + /* ignore */ + } + + await new Promise((resolve) => setTimeout(resolve, 500)); + + if (proc.exitCode === null && proc.signalCode === null) { + log.warn(`Process ${name} (pid=${pid}) did not respond to SIGKILL; proceeding anyway`); + } + } else { + log.warn( + `Process ${name} (pid=${pid}) did not respond to SIGKILL after ${timeoutMs}ms; proceeding anyway`, + ); + } + } + + children.delete(pid); +} + +export function killAllChildrenSync(): void { + // Copy entries to avoid mutation during iteration + const entries = [...children.entries()]; + for (const [pid, { name, process: proc }] of entries) { + if (proc.exitCode !== null || proc.signalCode !== null) continue; + try { + // Use console.error since logging may be unreliable during process exit + console.error(`[child-registry] Force-killing child process: ${name} (pid=${pid})`); + proc.kill("SIGKILL"); + } catch { + /* ignore */ + } + } + children.clear(); +} + +export function getRegisteredChildren(): Array<{ + pid: number; + name: string; + managedExternally: boolean; +}> { + return [...children.entries()].map(([pid, entry]) => ({ + pid, + name: entry.name, + managedExternally: entry.managedExternally, + })); +} + +export function clearRegistry(): void { + children.clear(); +}