From d16b6413db20ad6c986e97d1303786899bba12e4 Mon Sep 17 00:00:00 2001 From: Jhony Sidney Date: Wed, 28 Jan 2026 06:50:21 +0000 Subject: [PATCH] feat(exec): optional systemd background runner - Add tools.exec.backgroundRunner=systemd (Linux) to run background jobs via systemd-run\n- Extend process tool to poll/log/stop systemd-backed sessions\n- Document new config option --- docs/gateway/background-process.md | 1 + docs/gateway/configuration.md | 1 + docs/tools/exec.md | 1 + src/agents/bash-process-registry.ts | 3 + src/agents/bash-tools.exec.ts | 153 ++++++++++++++++++ src/agents/bash-tools.process.ts | 214 +++++++++++++++++++++++++ src/agents/pi-tools.ts | 1 + src/config/schema.ts | 3 + src/config/types.tools.ts | 7 + src/config/zod-schema.agent-runtime.ts | 2 + 10 files changed, 386 insertions(+) diff --git a/docs/gateway/background-process.md b/docs/gateway/background-process.md index f4d51eafe..0863a39af 100644 --- a/docs/gateway/background-process.md +++ b/docs/gateway/background-process.md @@ -38,6 +38,7 @@ Environment overrides: Config (preferred): - `tools.exec.backgroundMs` (default 10000) +- `tools.exec.backgroundRunner` (default `"process"`; Linux-only `"systemd"` supported) - `tools.exec.timeoutSec` (default 1800) - `tools.exec.cleanupMs` (default 1800000) - `tools.exec.notifyOnExit` (default true): enqueue a system event + request heartbeat when a backgrounded exec exits. diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 1d270974d..15a6c2fbe 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -1934,6 +1934,7 @@ of `every`, keep `HEARTBEAT.md` tiny, and/or choose a cheaper `model`. `tools.exec` configures background exec defaults: - `backgroundMs`: time before auto-background (ms, default 10000) +- `backgroundRunner`: `"process"` (default) or `"systemd"` (Linux only). When `"systemd"`, background jobs started with `background=true` run via `systemd-run --user` so long-running tasks (tmux/codex) don’t stay in the gateway cgroup. - `timeoutSec`: auto-kill after this runtime (seconds, default 1800) - `cleanupMs`: how long to keep finished sessions in memory (ms, default 1800000) - `notifyOnExit`: enqueue a system event + request heartbeat when backgrounded exec exits (default true) diff --git a/docs/tools/exec.md b/docs/tools/exec.md index ca50140a9..da35e68ed 100644 --- a/docs/tools/exec.md +++ b/docs/tools/exec.md @@ -48,6 +48,7 @@ Notes: - `tools.exec.node` (default: unset) - `tools.exec.pathPrepend`: list of directories to prepend to `PATH` for exec runs. - `tools.exec.safeBins`: stdin-only safe binaries that can run without explicit allowlist entries. +- `tools.exec.backgroundRunner` (default: `"process"`): when set to `"systemd"` (Linux only), background jobs started with `background=true` run in a systemd user unit via `systemd-run --user`, so long-running tasks (tmux/codex) don’t stay in the gateway cgroup. Example: ```json5 diff --git a/src/agents/bash-process-registry.ts b/src/agents/bash-process-registry.ts index 278b11199..3ffbb64be 100644 --- a/src/agents/bash-process-registry.ts +++ b/src/agents/bash-process-registry.ts @@ -31,6 +31,7 @@ export interface ProcessSession { child?: ChildProcessWithoutNullStreams; stdin?: SessionStdin; pid?: number; + systemdUnit?: string; startedAt: number; cwd?: string; maxOutputChars: number; @@ -59,6 +60,7 @@ export interface FinishedSession { status: ProcessStatus; exitCode?: number | null; exitSignal?: NodeJS.Signals | number | null; + systemdUnit?: string; aggregated: string; tail: string; truncated: boolean; @@ -166,6 +168,7 @@ function moveToFinished(session: ProcessSession, status: ProcessStatus) { status, exitCode: session.exitCode, exitSignal: session.exitSignal, + systemdUnit: session.systemdUnit, aggregated: session.aggregated, tail: session.tail, truncated: session.truncated, diff --git a/src/agents/bash-tools.exec.ts b/src/agents/bash-tools.exec.ts index b9de81872..e99b27271 100644 --- a/src/agents/bash-tools.exec.ts +++ b/src/agents/bash-tools.exec.ts @@ -26,7 +26,9 @@ import { resolveShellEnvFallbackTimeoutMs, } from "../infra/shell-env.js"; import { enqueueSystemEvent } from "../infra/system-events.js"; +import { formatErrorMessage } from "../infra/errors.js"; import { logInfo, logWarn } from "../logger.js"; +import { runExec } from "../process/exec.js"; import { formatSpawnError, spawnWithFallback } from "../process/spawn-utils.js"; import { type ProcessSession, @@ -34,6 +36,7 @@ import { addSession, appendOutput, createSessionSlug, + deleteSession, markBackgrounded, markExited, tail, @@ -124,6 +127,7 @@ export type ExecToolDefaults = { safeBins?: string[]; agentId?: string; backgroundMs?: number; + backgroundRunner?: "process" | "systemd"; timeoutSec?: number; approvalRunningNoticeMs?: number; sandbox?: BashSandboxConfig; @@ -197,6 +201,7 @@ export type ExecToolDetails = status: "running"; sessionId: string; pid?: number; + systemdUnit?: string; startedAt: number; cwd?: string; tail?: string; @@ -703,6 +708,105 @@ async function runExecProcess(opts: { }; } +function resolveBackgroundRunner(defaults?: ExecToolDefaults) { + const envOverride = process.env.CLAWDBOT_EXEC_BACKGROUND_RUNNER?.trim().toLowerCase(); + if (envOverride === "systemd") return "systemd"; + if (envOverride === "process") return "process"; + return defaults?.backgroundRunner ?? "process"; +} + +function canUseSystemdRun() { + return process.platform === "linux"; +} + +function normalizeSystemdSetenv(env: Record) { + const entries: string[] = []; + for (const [key, value] of Object.entries(env)) { + const k = key.trim(); + if (!k) continue; + const v = `${value ?? ""}`; + if (v.includes("\0") || v.includes("\n")) continue; + entries.push(`${k}=${v}`); + } + return entries; +} + +async function startSystemdScopeBackground(opts: { + command: string; + workdir: string; + env: Record; + maxOutput: number; + pendingMaxOutput: number; + scopeKey?: string; + sessionKey?: string; +}): Promise { + const startedAt = Date.now(); + const sessionId = createSessionSlug(); + const unit = `clawdbot-exec-${sessionId}.scope`; + const { shell, args: shellArgs } = getShellConfig(); + + const session = { + id: sessionId, + command: opts.command, + scopeKey: opts.scopeKey, + sessionKey: opts.sessionKey, + notifyOnExit: false, + exitNotified: false, + pid: undefined, + systemdUnit: unit, + startedAt, + cwd: opts.workdir, + maxOutputChars: opts.maxOutput, + pendingMaxOutputChars: opts.pendingMaxOutput, + totalOutputChars: 0, + pendingStdout: [], + pendingStderr: [], + pendingStdoutChars: 0, + pendingStderrChars: 0, + aggregated: "", + tail: "", + exited: false, + exitCode: undefined as number | null | undefined, + exitSignal: undefined as NodeJS.Signals | number | null | undefined, + truncated: false, + backgrounded: true, + } satisfies ProcessSession; + + addSession(session); + markBackgrounded(session); + + if (!canUseSystemdRun()) { + deleteSession(sessionId); + throw new Error("systemd background runner is only supported on Linux"); + } + + const setenv = normalizeSystemdSetenv(opts.env); + const argv = [ + "--user", + "--scope", + "--collect", + "--no-block", + "--unit", + unit, + "--working-directory", + opts.workdir, + ...setenv.flatMap((entry) => ["-E", entry]), + "--", + shell, + ...shellArgs, + opts.command, + ]; + + try { + await runExec("systemd-run", argv, { timeoutMs: 10_000, maxBuffer: 1024 * 1024 }); + } catch (err) { + deleteSession(sessionId); + throw err; + } + + return session; +} + export function createExecTool( defaults?: ExecToolDefaults, // biome-ignore lint/suspicious/noExplicitAny: TypeBox schema type from pi-agent-core uses a different module instance. @@ -768,6 +872,7 @@ export function createExecTool( ? 0 : clampNumber(params.yieldMs ?? defaultBackgroundMs, defaultBackgroundMs, 10, 120_000) : null; + const backgroundRunner = resolveBackgroundRunner(defaults); const elevatedDefaults = defaults?.elevated; const elevatedAllowed = Boolean(elevatedDefaults?.enabled && elevatedDefaults.allowed); const elevatedDefaultMode = @@ -1382,6 +1487,54 @@ export function createExecTool( typeof params.timeout === "number" ? params.timeout : defaultTimeoutSec; const getWarningText = () => (warnings.length ? `${warnings.join("\n")}\n\n` : ""); const usePty = params.pty === true && !sandbox; + + const immediateBackground = backgroundRequested || yieldWindow === 0; + const wantsSystemdRunner = + backgroundRunner === "systemd" && + immediateBackground && + host === "gateway" && + !sandbox && + !usePty; + + if (wantsSystemdRunner) { + try { + const session = await startSystemdScopeBackground({ + command: params.command, + workdir, + env, + maxOutput, + pendingMaxOutput, + scopeKey: defaults?.scopeKey, + sessionKey: notifySessionKey, + }); + return { + content: [ + { + type: "text", + text: + `${getWarningText()}` + + `Started in systemd (${session.systemdUnit}) as a background job (session ${session.id}). ` + + "Use process (list/poll/log/kill/remove) to manage it.", + }, + ], + details: { + status: "running", + sessionId: session.id, + pid: undefined, + systemdUnit: session.systemdUnit, + startedAt: session.startedAt, + cwd: session.cwd, + tail: session.tail, + }, + }; + } catch (err) { + const msg = formatErrorMessage(err); + warnings.push( + `Warning: systemd background runner failed (${msg}); falling back to process runner.`, + ); + } + } + const run = await runExecProcess({ command: params.command, workdir, diff --git a/src/agents/bash-tools.process.ts b/src/agents/bash-tools.process.ts index 1463df086..81026cb02 100644 --- a/src/agents/bash-tools.process.ts +++ b/src/agents/bash-tools.process.ts @@ -1,6 +1,9 @@ import type { AgentTool } from "@mariozechner/pi-agent-core"; import { Type } from "@sinclair/typebox"; +import { formatErrorMessage } from "../infra/errors.js"; +import { runExec } from "../process/exec.js"; + import { deleteSession, drainSession, @@ -26,6 +29,71 @@ export type ProcessToolDefaults = { scopeKey?: string; }; +type SystemdUnitState = { + activeState?: string; + subState?: string; +}; + +async function systemdShowUserUnit(unit: string): Promise { + try { + const { stdout } = await runExec("systemctl", [ + "--user", + "show", + unit, + "-p", + "ActiveState", + "-p", + "SubState", + ]); + const out = stdout.trim(); + if (!out) return null; + const lines = out.split("\n"); + const state: SystemdUnitState = {}; + for (const line of lines) { + const [key, ...rest] = line.split("="); + const value = rest.join("=").trim(); + if (key === "ActiveState") state.activeState = value; + if (key === "SubState") state.subState = value; + } + return state; + } catch (err) { + const msg = formatErrorMessage(err).toLowerCase(); + if (msg.includes("could not be found") || msg.includes("not found")) { + return null; + } + if ( + msg.includes("failed to connect to bus") || + msg.includes("systemctl not available") || + msg.includes("not been booted with systemd") + ) { + return null; + } + throw err; + } +} + +async function systemdJournalTail(unit: string, limit: number): Promise { + try { + const { stdout, stderr } = await runExec("journalctl", [ + "--user", + "-u", + unit, + "--no-pager", + "-o", + "cat", + "-n", + String(Math.max(1, Math.floor(limit))), + ]); + return (stdout || stderr).trimEnd(); + } catch (err) { + return `journalctl failed: ${formatErrorMessage(err)}`.trim(); + } +} + +async function systemdStopUserUnit(unit: string): Promise { + await runExec("systemctl", ["--user", "stop", unit]).catch(() => undefined); +} + const processSchema = Type.Object({ action: Type.String({ description: "Process action" }), sessionId: Type.Optional(Type.String({ description: "Session id for actions other than list" })), @@ -91,6 +159,7 @@ export function createProcessTool( sessionId: s.id, status: "running", pid: s.pid ?? undefined, + systemdUnit: s.systemdUnit ?? undefined, startedAt: s.startedAt, runtimeMs: Date.now() - s.startedAt, cwd: s.cwd, @@ -114,6 +183,7 @@ export function createProcessTool( truncated: s.truncated, exitCode: s.exitCode ?? undefined, exitSignal: s.exitSignal ?? undefined, + systemdUnit: s.systemdUnit ?? undefined, })); const lines = [...running, ...finished] .sort((a, b) => b.startedAt - a.startedAt) @@ -194,6 +264,58 @@ export function createProcessTool( details: { status: "failed" }, }; } + + if (scopedSession.systemdUnit) { + const unit = scopedSession.systemdUnit; + const state = await systemdShowUserUnit(unit); + const output = await systemdJournalTail(unit, 200); + const active = state?.activeState ?? "unknown"; + const sub = state?.subState ?? "unknown"; + const exited = state == null || (active !== "active" && active !== "activating"); + if (exited) { + const failed = active === "failed" || sub === "failed"; + const status = failed ? "failed" : "completed"; + scopedSession.aggregated = output; + scopedSession.tail = scopedSession.aggregated + ? truncateMiddle(scopedSession.aggregated, 2000) + : scopedSession.tail; + markExited(scopedSession, failed ? 1 : 0, null, status); + return { + content: [ + { + type: "text", + text: + (output || "(no output)") + + `\n\nsystemd unit exited (${unit}; state=${active}/${sub}).`, + }, + ], + details: { + status: status === "completed" ? "completed" : "failed", + sessionId: params.sessionId, + exitCode: failed ? 1 : 0, + aggregated: output, + name: deriveSessionName(scopedSession.command), + }, + }; + } + return { + content: [ + { + type: "text", + text: + (output || "(no recent output)") + + `\n\nsystemd unit running (${unit}; state=${active}/${sub}).`, + }, + ], + details: { + status: "running", + sessionId: params.sessionId, + aggregated: output, + name: deriveSessionName(scopedSession.command), + }, + }; + } + const { stdout, stderr } = drainSession(scopedSession); const exited = scopedSession.exited; const exitCode = scopedSession.exitCode ?? 0; @@ -249,6 +371,20 @@ export function createProcessTool( details: { status: "failed" }, }; } + if (scopedSession.systemdUnit) { + const unit = scopedSession.systemdUnit; + const limit = typeof params.limit === "number" ? Math.floor(params.limit) : 200; + const output = await systemdJournalTail(unit, limit); + return { + content: [{ type: "text", text: output || "(no output yet)" }], + details: { + status: "running", + sessionId: params.sessionId, + aggregated: output, + name: deriveSessionName(scopedSession.command), + }, + }; + } const { slice, totalLines, totalChars } = sliceLogLines( scopedSession.aggregated, params.offset, @@ -323,6 +459,17 @@ export function createProcessTool( details: { status: "failed" }, }; } + if (scopedSession.systemdUnit) { + return { + content: [ + { + type: "text", + text: `Session ${params.sessionId} is managed by systemd (${scopedSession.systemdUnit}); stdin write is not supported.`, + }, + ], + details: { status: "failed" }, + }; + } const stdin = scopedSession.stdin ?? scopedSession.child?.stdin; if (!stdin || stdin.destroyed) { return { @@ -384,6 +531,17 @@ export function createProcessTool( details: { status: "failed" }, }; } + if (scopedSession.systemdUnit) { + return { + content: [ + { + type: "text", + text: `Session ${params.sessionId} is managed by systemd (${scopedSession.systemdUnit}); interactive input is not supported.`, + }, + ], + details: { status: "failed" }, + }; + } const stdin = scopedSession.stdin ?? scopedSession.child?.stdin; if (!stdin || stdin.destroyed) { return { @@ -458,6 +616,17 @@ export function createProcessTool( details: { status: "failed" }, }; } + if (scopedSession.systemdUnit) { + return { + content: [ + { + type: "text", + text: `Session ${params.sessionId} is managed by systemd (${scopedSession.systemdUnit}); interactive input is not supported.`, + }, + ], + details: { status: "failed" }, + }; + } const stdin = scopedSession.stdin ?? scopedSession.child?.stdin; if (!stdin || stdin.destroyed) { return { @@ -514,6 +683,17 @@ export function createProcessTool( details: { status: "failed" }, }; } + if (scopedSession.systemdUnit) { + return { + content: [ + { + type: "text", + text: `Session ${params.sessionId} is managed by systemd (${scopedSession.systemdUnit}); interactive input is not supported.`, + }, + ], + details: { status: "failed" }, + }; + } const stdin = scopedSession.stdin ?? scopedSession.child?.stdin; if (!stdin || stdin.destroyed) { return { @@ -582,6 +762,23 @@ export function createProcessTool( details: { status: "failed" }, }; } + if (scopedSession.systemdUnit) { + const unit = scopedSession.systemdUnit; + await systemdStopUserUnit(unit); + markExited(scopedSession, null, "SIGTERM", "killed"); + return { + content: [ + { + type: "text", + text: `Stopped systemd unit ${unit} (session ${params.sessionId}).`, + }, + ], + details: { + status: "completed", + name: scopedSession ? deriveSessionName(scopedSession.command) : undefined, + }, + }; + } killSession(scopedSession); markExited(scopedSession, null, "SIGKILL", "failed"); return { @@ -614,6 +811,23 @@ export function createProcessTool( case "remove": { if (scopedSession) { + if (scopedSession.systemdUnit) { + const unit = scopedSession.systemdUnit; + await systemdStopUserUnit(unit); + markExited(scopedSession, null, "SIGTERM", "killed"); + return { + content: [ + { + type: "text", + text: `Removed systemd unit ${unit} (session ${params.sessionId}).`, + }, + ], + details: { + status: "completed", + name: scopedSession ? deriveSessionName(scopedSession.command) : undefined, + }, + }; + } killSession(scopedSession); markExited(scopedSession, null, "SIGKILL", "failed"); return { diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index d763393a4..aa1360725 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -88,6 +88,7 @@ function resolveExecConfig(cfg: MoltbotConfig | undefined) { pathPrepend: globalExec?.pathPrepend, safeBins: globalExec?.safeBins, backgroundMs: globalExec?.backgroundMs, + backgroundRunner: globalExec?.backgroundRunner, timeoutSec: globalExec?.timeoutSec, approvalRunningNoticeMs: globalExec?.approvalRunningNoticeMs, cleanupMs: globalExec?.cleanupMs, diff --git a/src/config/schema.ts b/src/config/schema.ts index b4ec8723b..f6f3376c4 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -180,6 +180,7 @@ const FIELD_LABELS: Record = { "tools.exec.node": "Exec Node Binding", "tools.exec.pathPrepend": "Exec PATH Prepend", "tools.exec.safeBins": "Exec Safe Bins", + "tools.exec.backgroundRunner": "Exec Background Runner", "tools.message.allowCrossContextSend": "Allow Cross-Context Messaging", "tools.message.crossContext.allowWithinProvider": "Allow Cross-Context (Same Provider)", "tools.message.crossContext.allowAcrossProviders": "Allow Cross-Context (Across Providers)", @@ -420,6 +421,8 @@ const FIELD_HELP: Record = { "tools.exec.pathPrepend": "Directories to prepend to PATH for exec runs (gateway/sandbox).", "tools.exec.safeBins": "Allow stdin-only safe binaries to run without explicit allowlist entries.", + "tools.exec.backgroundRunner": + 'Background execution strategy ("process" default; "systemd" runs background jobs via systemd-run user scopes to avoid leaving long tasks in the gateway cgroup).', "tools.message.allowCrossContextSend": "Legacy override: allow cross-context sends across all providers.", "tools.message.crossContext.allowWithinProvider": diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index bb1d45bf0..1208caeec 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -175,6 +175,13 @@ export type ExecToolConfig = { safeBins?: string[]; /** Default time (ms) before an exec command auto-backgrounds. */ backgroundMs?: number; + /** + * Background runner strategy: + * - "process": track a child process managed by the gateway process (default). + * - "systemd": run background jobs in a systemd user scope via `systemd-run --user --scope`, + * so long-lived tasks (tmux/codex) don't remain in the gateway cgroup. + */ + backgroundRunner?: "process" | "systemd"; /** Default timeout (seconds) before auto-killing exec commands. */ timeoutSec?: number; /** Emit a running notice (ms) when approval-backed exec runs long (default: 10000, 0 = off). */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 7a63e307d..f31940d8c 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -267,6 +267,7 @@ export const AgentToolsSchema = z pathPrepend: z.array(z.string()).optional(), safeBins: z.array(z.string()).optional(), backgroundMs: z.number().int().positive().optional(), + backgroundRunner: z.enum(["process", "systemd"]).optional(), timeoutSec: z.number().int().positive().optional(), approvalRunningNoticeMs: z.number().int().nonnegative().optional(), cleanupMs: z.number().int().positive().optional(), @@ -508,6 +509,7 @@ export const ToolsSchema = z pathPrepend: z.array(z.string()).optional(), safeBins: z.array(z.string()).optional(), backgroundMs: z.number().int().positive().optional(), + backgroundRunner: z.enum(["process", "systemd"]).optional(), timeoutSec: z.number().int().positive().optional(), cleanupMs: z.number().int().positive().optional(), notifyOnExit: z.boolean().optional(),