diff --git a/CHANGELOG.md b/CHANGELOG.md index 5909c9899..673ea7461 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.molt.bot Status: beta. ### Changes +- Gateway: add HTTP health endpoints (`/health`, `/ready`, `/health/deep`) for Kubernetes liveness/readiness probes. (#3507) - Rebrand: rename the npm package/CLI to `moltbot`, add a `moltbot` compatibility shim, and move extensions to the `@moltbot/*` scope. - Commands: group /help and /commands output with Telegram paging. (#2504) Thanks @hougangdev. - macOS: limit project-local `node_modules/.bin` PATH preference to debug builds (reduce PATH hijacking risk). diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index a0d562f7b..ae4942630 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -191,6 +191,19 @@ export type GatewayHttpConfig = { endpoints?: GatewayHttpEndpointsConfig; }; +/** + * Health endpoint configuration for container orchestration. + * Provides K8s-style liveness/readiness probes. + */ +export type GatewayHealthConfig = { + /** Enable health endpoints (default: true). */ + enabled?: boolean; + /** Base path for health endpoints (default: ""). */ + basePath?: string; + /** Require auth for /health/deep (default: true). */ + deepAuthRequired?: boolean; +}; + export type GatewayNodesConfig = { /** Browser routing policy for node-hosted browser proxies. */ browser?: { @@ -233,6 +246,8 @@ export type GatewayConfig = { tls?: GatewayTlsConfig; http?: GatewayHttpConfig; nodes?: GatewayNodesConfig; + /** Health endpoint configuration for K8s/container probes. */ + health?: GatewayHealthConfig; /** * IPs of trusted reverse proxies (e.g. Traefik, nginx). When a connection * arrives from one of these IPs, the Gateway trusts `x-forwarded-for` (or diff --git a/src/gateway/http-health.test.ts b/src/gateway/http-health.test.ts new file mode 100644 index 000000000..e632c4e22 --- /dev/null +++ b/src/gateway/http-health.test.ts @@ -0,0 +1,194 @@ +import { describe, expect, it, afterEach } from "vitest"; +import { createServer, type Server, type IncomingMessage, type ServerResponse } from "node:http"; +import type { AddressInfo } from "node:net"; +import { createHealthEndpointsHandler } from "./http-health.js"; +import type { ResolvedGatewayAuth } from "./auth.js"; + +const mockAuth: ResolvedGatewayAuth = { + mode: "token", + token: "test-token-123", + allowTailscale: false, +}; + +function createTestServer( + handler: (req: IncomingMessage, res: ServerResponse) => Promise, +) { + const server = createServer(async (req, res) => { + const handled = await handler(req, res); + if (!handled) { + res.statusCode = 404; + res.end("Not Found"); + } + }); + return server; +} + +async function startServer(server: Server): Promise { + return new Promise((resolve) => { + server.listen(0, "127.0.0.1", () => { + const addr = server.address() as AddressInfo; + resolve(`http://127.0.0.1:${addr.port}`); + }); + }); +} + +async function stopServer(server: Server): Promise { + return new Promise((resolve) => { + server.close(() => resolve()); + }); +} + +describe("http-health", () => { + let server: Server; + let baseUrl: string; + + afterEach(async () => { + if (server) { + await stopServer(server); + } + }); + + describe("GET /health", () => { + it("returns 200 with healthy status when enabled", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health`); + expect(res.status).toBe(200); + + const body = await res.json(); + expect(body.status).toBe("healthy"); + expect(body.version).toBeDefined(); + expect(body.uptimeMs).toBeGreaterThanOrEqual(0); + expect(body.timestamp).toBeDefined(); + }); + + it("returns 404 when disabled", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: false }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health`); + expect(res.status).toBe(404); + }); + + it("respects custom basePath", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true, basePath: "/api/v1" }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + // Original path should 404 + const res1 = await fetch(`${baseUrl}/health`); + expect(res1.status).toBe(404); + + // Custom path should work + const res2 = await fetch(`${baseUrl}/api/v1/health`); + expect(res2.status).toBe(200); + }); + }); + + describe("GET /ready", () => { + it("returns 200 when healthy", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/ready`); + expect(res.status).toBe(200); + + const body = await res.json(); + expect(body.status).toBe("healthy"); + }); + }); + + describe("GET /health/deep", () => { + it("returns 401 without auth when deepAuthRequired is true", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true, deepAuthRequired: true }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health/deep`); + expect(res.status).toBe(401); + + const body = await res.json(); + expect(body.error).toBe("Unauthorized"); + }); + + it("returns 200 with valid auth", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true, deepAuthRequired: true }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health/deep`, { + headers: { + Authorization: `Bearer ${mockAuth.token}`, + }, + }); + expect(res.status).toBe(200); + + const body = await res.json(); + expect(body.status).toBeDefined(); + expect(body.checks).toBeDefined(); + }); + + it("returns 200 without auth when deepAuthRequired is false", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true, deepAuthRequired: false }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health/deep`); + expect(res.status).toBe(200); + + const body = await res.json(); + expect(body.checks).toBeDefined(); + }); + }); + + describe("response format", () => { + it("includes Cache-Control: no-store header", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health`); + expect(res.headers.get("Cache-Control")).toBe("no-store"); + }); + + it("includes Content-Type: application/json header", async () => { + const handler = createHealthEndpointsHandler({ + config: { enabled: true }, + resolvedAuth: mockAuth, + }); + server = createTestServer(handler); + baseUrl = await startServer(server); + + const res = await fetch(`${baseUrl}/health`); + expect(res.headers.get("Content-Type")).toContain("application/json"); + }); + }); +}); diff --git a/src/gateway/http-health.ts b/src/gateway/http-health.ts new file mode 100644 index 000000000..ea94295e6 --- /dev/null +++ b/src/gateway/http-health.ts @@ -0,0 +1,203 @@ +/** + * HTTP health endpoints for container orchestration (K8s liveness/readiness). + * + * - GET /health - Liveness probe: 200 if process alive + * - GET /ready - Readiness probe: 200 if channels ready, 503 if degraded + * - GET /health/deep - Detailed status (auth-protected) + */ + +import type { IncomingMessage, ServerResponse } from "node:http"; +import { VERSION } from "../version.js"; +import type { HealthSummary } from "../commands/health.js"; +import type { GatewayHealthConfig } from "../config/types.gateway.js"; +import { authorizeGatewayConnect, type ResolvedGatewayAuth } from "./auth.js"; +import { getBearerToken } from "./http-utils.js"; +import { getHealthCache, refreshGatewayHealthSnapshot } from "./server/health-state.js"; + +export type HealthStatus = "healthy" | "degraded" | "unhealthy"; + +export type HealthResponse = { + status: HealthStatus; + version: string; + uptimeMs: number; + timestamp: string; + checks?: { + channels?: { + total: number; + configured: number; + healthy: number; + degraded: string[]; + }; + agents?: { + total: number; + default: string; + }; + }; +}; + +const startTime = Date.now(); + +function sendJson(res: ServerResponse, status: number, body: unknown) { + res.statusCode = status; + res.setHeader("Content-Type", "application/json; charset=utf-8"); + res.setHeader("Cache-Control", "no-store"); + res.end(JSON.stringify(body)); +} + +/** + * Determine overall health status from channel health summaries. + * - healthy: all configured channels have successful probes + * - degraded: some channels have failed probes + * - unhealthy: all channels have failed probes or critical failure + */ +function determineHealthStatus(health: HealthSummary | null): { + status: HealthStatus; + degradedChannels: string[]; +} { + if (!health) { + return { status: "healthy", degradedChannels: [] }; + } + + const channels = health.channels ?? {}; + const channelIds = Object.keys(channels); + const degradedChannels: string[] = []; + + for (const channelId of channelIds) { + const channelSummary = channels[channelId]; + if (!channelSummary) continue; + + // Skip unconfigured channels + if (channelSummary.configured === false) continue; + + // Check probe status + const probe = channelSummary.probe as { ok?: boolean } | undefined; + if (probe && probe.ok === false) { + degradedChannels.push(channelId); + } + + // Check account-level probes + const accounts = channelSummary.accounts ?? {}; + for (const [accountId, accountSummary] of Object.entries(accounts)) { + const accountProbe = accountSummary.probe as { ok?: boolean } | undefined; + if (accountProbe && accountProbe.ok === false) { + const key = `${channelId}:${accountId}`; + if (!degradedChannels.includes(key)) { + degradedChannels.push(key); + } + } + } + } + + const configuredCount = channelIds.filter((id) => channels[id]?.configured !== false).length; + + if (degradedChannels.length === 0) { + return { status: "healthy", degradedChannels }; + } + if (degradedChannels.length >= configuredCount && configuredCount > 0) { + return { status: "unhealthy", degradedChannels }; + } + return { status: "degraded", degradedChannels }; +} + +function buildHealthResponse(health: HealthSummary | null, includeChecks: boolean): HealthResponse { + const uptimeMs = Date.now() - startTime; + const { status, degradedChannels } = determineHealthStatus(health); + + const response: HealthResponse = { + status, + version: VERSION, + uptimeMs, + timestamp: new Date().toISOString(), + }; + + if (includeChecks && health) { + const channels = health.channels ?? {}; + const channelIds = Object.keys(channels); + const configuredCount = channelIds.filter((id) => channels[id]?.configured !== false).length; + const healthyCount = configuredCount - degradedChannels.length; + + response.checks = { + channels: { + total: channelIds.length, + configured: configuredCount, + healthy: healthyCount, + degraded: degradedChannels, + }, + agents: { + total: health.agents?.length ?? 0, + default: health.defaultAgentId ?? "pi", + }, + }; + } + + return response; +} + +export type HealthEndpointsHandler = ( + req: IncomingMessage, + res: ServerResponse, +) => Promise; + +export function createHealthEndpointsHandler(opts: { + config?: GatewayHealthConfig; + resolvedAuth: ResolvedGatewayAuth; + trustedProxies?: string[]; +}): HealthEndpointsHandler { + const { config, resolvedAuth, trustedProxies } = opts; + const basePath = config?.basePath ?? ""; + const deepAuthRequired = config?.deepAuthRequired !== false; + + return async (req, res) => { + if (config?.enabled === false) return false; + + const url = new URL(req.url ?? "/", "http://localhost"); + const path = url.pathname; + + // GET /health - Liveness probe (always 200 if process is running) + if (path === `${basePath}/health` && req.method === "GET") { + const health = getHealthCache(); + const response = buildHealthResponse(health, false); + sendJson(res, 200, response); + return true; + } + + // GET /ready - Readiness probe (503 if degraded/unhealthy) + if (path === `${basePath}/ready` && req.method === "GET") { + const health = getHealthCache(); + const { status } = determineHealthStatus(health); + const response = buildHealthResponse(health, false); + + const httpStatus = status === "healthy" ? 200 : 503; + sendJson(res, httpStatus, response); + return true; + } + + // GET /health/deep - Detailed health with auth + if (path === `${basePath}/health/deep` && req.method === "GET") { + if (deepAuthRequired) { + const token = getBearerToken(req); + const authResult = await authorizeGatewayConnect({ + auth: resolvedAuth, + connectAuth: token ? { token, password: token } : null, + req, + trustedProxies, + }); + if (!authResult.ok) { + sendJson(res, 401, { + error: "Unauthorized", + reason: authResult.reason ?? "Authentication required", + }); + return true; + } + } + + // Refresh health snapshot for deep check + const health = await refreshGatewayHealthSnapshot({ probe: true }); + const response = buildHealthResponse(health, true); + sendJson(res, 200, response); + return true; + } + + return false; + }; +} diff --git a/src/gateway/server-http.ts b/src/gateway/server-http.ts index f08dc811c..95a7d4972 100644 --- a/src/gateway/server-http.ts +++ b/src/gateway/server-http.ts @@ -10,6 +10,8 @@ import type { WebSocketServer } from "ws"; import { handleA2uiHttpRequest } from "../canvas-host/a2ui.js"; import type { CanvasHostHandler } from "../canvas-host/server.js"; import { loadConfig } from "../config/config.js"; +import type { GatewayHealthConfig } from "../config/types.gateway.js"; +import { createHealthEndpointsHandler, type HealthEndpointsHandler } from "./http-health.js"; import type { createSubsystemLogger } from "../logging/subsystem.js"; import { handleSlackHttpRequest } from "../slack/http/index.js"; import { resolveAgentAvatar } from "../agents/identity-avatar.js"; @@ -211,6 +213,7 @@ export function createGatewayHttpServer(opts: { handlePluginRequest?: HooksRequestHandler; resolvedAuth: import("./auth.js").ResolvedGatewayAuth; tlsOptions?: TlsOptions; + healthConfig?: GatewayHealthConfig; }): HttpServer { const { canvasHost, @@ -222,7 +225,14 @@ export function createGatewayHttpServer(opts: { handleHooksRequest, handlePluginRequest, resolvedAuth, + healthConfig, } = opts; + + // Create health endpoints handler + const handleHealthRequest: HealthEndpointsHandler = createHealthEndpointsHandler({ + config: healthConfig, + resolvedAuth, + }); const httpServer: HttpServer = opts.tlsOptions ? createHttpsServer(opts.tlsOptions, (req, res) => { void handleRequest(req, res); @@ -238,6 +248,10 @@ export function createGatewayHttpServer(opts: { try { const configSnapshot = loadConfig(); const trustedProxies = configSnapshot.gateway?.trustedProxies ?? []; + + // Health endpoints for container orchestration (early, minimal auth) + if (await handleHealthRequest(req, res)) return; + if (await handleHooksRequest(req, res)) return; if ( await handleToolsInvokeHttpRequest(req, res, {