diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 062e5736c..fb67a3424 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -369,6 +369,8 @@ jobs: - name: Setup Android SDK uses: android-actions/setup-android@v3 + with: + accept-android-sdk-licenses: false - name: Setup Gradle uses: gradle/actions/setup-gradle@v4 diff --git a/AGENTS.md b/AGENTS.md index 176232a06..ca40cb52a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -50,7 +50,7 @@ ### PR Workflow (Review vs Land) - **Review mode (PR link only):** read `gh pr view/diff`; **do not** switch branches; **do not** change code. -- **Landing mode:** create an integration branch from `main`, bring in PR commits (**prefer rebase** for linear history; **merge allowed** when complexity/conflicts make it safer), apply fixes, add changelog (+ thanks + PR #), run full gate **locally before committing** (`pnpm lint && pnpm build && pnpm test`), commit, merge back to `main`, then `git switch main` (never stay on a topic branch after landing). +- **Landing mode:** create an integration branch from `main`, bring in PR commits (**prefer rebase** for linear history; **merge allowed** when complexity/conflicts make it safer), apply fixes, add changelog (+ thanks + PR #), run full gate **locally before committing** (`pnpm lint && pnpm build && pnpm test`), commit, merge back to `main`, then `git switch main` (never stay on a topic branch after landing). Important: contributor needs to be in git graph after this! ## Security & Configuration Tips - Web provider stores creds at `~/.clawdbot/credentials/`; rerun `clawdbot login` if logged out. diff --git a/CHANGELOG.md b/CHANGELOG.md index fb6fe86ad..7a9093af3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,27 +18,47 @@ - Auto-reply: removed `autoReply` from Discord/Slack/Telegram channel configs; use `requireMention` instead (Telegram topics now support `requireMention` overrides). ### Fixes +- Discord/Telegram: add per-request retry policy with configurable delays and docs. +- Telegram: run long polling via grammY runner with per-chat sequentialization and concurrency tied to `agent.maxConcurrent`. Thanks @mukhtharcm for PR #366. +- macOS: prevent gateway launchd startup race where the app could kill a just-started gateway; avoid unnecessary `bootout` and ensure the job is enabled at login. Fixes #306. Thanks @gupsammy for PR #387. +- macOS: ignore ciao announcement cancellation rejections during Bonjour shutdown to avoid unhandled exits. Thanks @emanuelst for PR #419. - Pairing: generate DM pairing codes with CSPRNG, expire pending codes after 1 hour, and avoid re-sending codes for already pending requests. - Pairing: lock + atomically write pairing stores with 0600 perms and stop logging pairing codes in provider logs. +- WhatsApp: add self-phone mode (no pairing replies for outbound DMs) and onboarding prompt for personal vs separate numbers (auto allowlist + response prefix for personal). - Discord: include all inbound attachments in `MediaPaths`/`MediaUrls` (back-compat `MediaPath`/`MediaUrl` still first). - Sandbox: add `agent.sandbox.workspaceAccess` (`none`/`ro`/`rw`) to control agent workspace visibility inside the container; `ro` hard-disables `write`/`edit`. - Routing: allow per-agent sandbox overrides (including `workspaceAccess` and `sandbox.tools`) plus per-agent tool policies in multi-agent configs. Thanks @pasogott for PR #380. +- Tools: make per-agent tool policies override global defaults and run bash synchronously when `process` is disallowed. +- Tools: scope `process` sessions per agent to prevent cross-agent visibility. +- Cron: clamp timer delay to avoid TimeoutOverflowWarning. Thanks @emanuelst for PR #412. +- Web UI: allow reconnect + password URL auth for the control UI and always scrub auth params from the URL. Thanks @oswalpalash for PR #414. +- ClawdbotKit: fix SwiftPM resource bundling path for `tool-display.json`. Thanks @fcatuhe for PR #398. - Tools: add Telegram/WhatsApp reaction tools (with per-provider gating). Thanks @zats for PR #353. +- Tools: flatten literal-union schemas for Claude on Vertex AI. Thanks @carlulsoe for PR #409. +- Tools: keep tool failure logs concise (no stack traces); full stack only in debug logs. - Tools: unify reaction removal semantics across Discord/Slack/Telegram/WhatsApp and allow WhatsApp reaction routing across accounts. +- Android: fix APK output filename renaming after AGP updates. Thanks @Syhids for PR #410. +- Android: rotate camera photos by EXIF orientation. Thanks @fcatuhe for PR #403. - Gateway/CLI: add daemon runtime selection (Node recommended; Bun optional) and document WhatsApp/Baileys Bun WebSocket instability on reconnect. - CLI: add `clawdbot docs` live docs search with pretty output. - CLI: add `clawdbot agents` (list/add/delete) with wizarded workspace/setup, provider login, and full prune on delete. +- Discord/Slack: fork thread sessions (agent-scoped) and inject thread starters for context. Thanks @thewilloftheshadow for PR #400. - Agent: treat compaction retry AbortError as a fallback trigger without swallowing non-abort errors. Thanks @erikpr1994 for PR #341. +- Agent: add opt-in session pruning for tool results to reduce context bloat. Thanks @maxsumrall for PR #381. +- Agent: protect bootstrap prefix from context pruning. Thanks @maxsumrall for PR #381. - Agent: deliver final replies for non-streaming models when block chunking is enabled. Thank you @mneves75 for PR #369! - Agent: trim bootstrap context injections and keep group guidance concise (emoji reactions allowed). Thanks @tobiasbischoff for PR #370. +- Agent: return a friendly context overflow response (413/request_too_large). Thanks @alejandroOPI for PR #395. - Sub-agents: allow `sessions_spawn` model overrides and error on invalid models. Thanks @azade-c for PR #298. - Sub-agents: skip invalid model overrides with a warning and keep the run alive; tool exceptions now return tool errors instead of crashing the agent. +- Sessions: forward explicit sessionKey through gateway/chat/node bridge to avoid sub-agent sessionId mixups. - Heartbeat: default interval 30m; clarified default prompt usage and HEARTBEAT.md template behavior. - Onboarding: write auth profiles to the multi-agent path (`~/.clawdbot/agents/main/agent/`) so the gateway finds credentials on first startup. Thanks @minghinmatthewlam for PR #327. - Docs: add missing `ui:install` setup step in the README. Thanks @hugobarauna for PR #300. - Docs: sanitize AGENTS guidance and add Clawdis migration troubleshooting note. Thanks @buddyh for PR #348. - Docs: add ClawdHub guide and hubs link for browsing, install, and sync workflows. - Docs: add FAQ for PNPM/Bun lockfile migration warning; link AgentSkills spec + ClawdHub guide (`/clawdhub`) from skills docs. +- Docs: add showcase projects (xuezh, gohome, roborock, padel-cli). Thanks @joshp123. - Build: import tool-display JSON as a module instead of runtime file reads. Thanks @mukhtharcm for PR #312. - Status: add provider usage snapshots to `/status`, `clawdbot status --usage`, and the macOS menu bar. - Build: fix macOS packaging QR smoke test for the bun-compiled relay. Thanks @dbhurley for PR #358. @@ -49,6 +69,8 @@ - Telegram: include sender identity in group envelope headers. (#336) - Telegram: support forum topics with topic-isolated sessions and message_thread_id routing. Thanks @HazAT, @nachoiacovino, @RandyVentures for PR #321/#333/#334. - Telegram: add draft streaming via `sendMessageDraft` with `telegram.streamMode`, plus `/reasoning stream` for draft-only reasoning. +- Telegram: honor `/activation` session mode for group mention gating and clarify group activation docs. Thanks @julianengel for PR #377. +- Telegram: isolate forum topic transcripts per thread and validate Gemini turn ordering in multi-topic sessions. Thanks @hsrvc for PR #407. - iMessage: ignore disconnect errors during shutdown (avoid unhandled promise rejections). Thanks @antons for PR #359. - Messages: stop defaulting ack reactions to 👀 when identity emoji is missing. - Auto-reply: require slash for control commands to avoid false triggers in normal text. @@ -58,6 +80,7 @@ - Auto-reply: add per-channel/topic skill filters + system prompts for Discord/Slack/Telegram. Thanks @kitze for PR #286. - Auto-reply: refresh `/status` output with build info, compact context, and queue depth. - Commands: add `/stop` to the registry and route native aborts to the active chat session. Thanks @nachoiacovino for PR #295. +- Commands: allow `/` shorthand for `/model` using `agent.models.*.alias`, without shadowing built-ins. Thanks @azade-c for PR #393. - Commands: unify native + text chat commands behind `commands.*` config (Discord/Slack/Telegram). Thanks @thewilloftheshadow for PR #275. - Auto-reply: treat steer during compaction as a follow-up, queued until compaction completes. - Auth: lock auth profile refreshes to avoid multi-instance OAuth logouts; keep credentials on refresh failure. @@ -70,6 +93,7 @@ - Doctor: suggest adding the workspace memory system when missing (opt-out via `--no-workspace-suggestions`). - Doctor: normalize default workspace path to `~/clawd` (avoid `~/clawdbot`). - Doctor: add `--yes` and `--non-interactive` for headless/automation runs (`--non-interactive` only applies safe migrations). +- Doctor/CLI: scan for extra gateway-like services (optional `--deep`) and show cleanup hints. - Gateway/CLI: auto-migrate legacy sessions + agent state layouts on startup (safe; WhatsApp auth still requires `clawdbot doctor`). - Workspace: only create `BOOTSTRAP.md` for brand-new workspaces (don’t recreate after deletion). - Build: fix duplicate protocol export, align Codex OAuth options, and add proper-lockfile typings. @@ -78,6 +102,7 @@ - Typing indicators: fix a race that could keep the typing indicator stuck after quick replies. Thanks @thewilloftheshadow for PR #270. - Google: merge consecutive messages to satisfy strict role alternation for Google provider models. Thanks @Asleep123 for PR #266. - Postinstall: handle targetDir symlinks in the install script. Thanks @obviyus for PR #272. +- Status: show configured model in `/status` (override-aware). Thanks @azade-c for PR #396. - WhatsApp/Telegram: add groupPolicy handling for group messages and normalize allowFrom matching (tg/telegram prefixes). Thanks @mneves75. - Auto-reply: add configurable ack reactions for inbound messages (default 👀 or `identity.emoji`) with scope controls. Thanks @obviyus for PR #178. - Polls: unify WhatsApp + Discord poll sends via the gateway + CLI (`clawdbot poll`). (#123) — thanks @dbhurley @@ -117,6 +142,7 @@ - Control UI: show a reading indicator bubble while the assistant is responding. - Control UI: animate reading indicator dots (honors reduced-motion). - Control UI: stabilize chat streaming during tool runs (no flicker/vanishing text; correct run scoping). +- Google: recover from corrupted transcripts that start with an assistant tool call to avoid Cloud Code Assist 400 ordering errors. Thanks @jonasjancarik for PR #421. (#406) - Control UI: let config-form enums select empty-string values. Thanks @sreekaransrinath for PR #268. - Control UI: scroll chat to bottom on initial load. Thanks @kiranjd for PR #274. - Control UI: add Chat focus mode toggle to collapse header + sidebar. @@ -171,6 +197,11 @@ - Refactor: centralize group allowlist/mention policy across providers. - Deps: update to latest across the repo. +## 2026.1.7 + +### Fixes +- Android: bump version to 2026.1.7, add version code, and name APK outputs. Thanks @fcatuhe for PR #402. + ## 2026.1.5-3 ### Fixes diff --git a/README.md b/README.md index 83e8620f3..03c446cc1 100644 --- a/README.md +++ b/README.md @@ -454,5 +454,5 @@ Thanks to all clawtributors: adamgall jalehman jarvis-medmatic mneves75 regenrek tobiasbischoff MSch obviyus dbhurley Asleep123 Iamadig imfing kitze nachoiacovino VACInc cash-echo-bot claude kiranjd pcty-nextgen-service-account minghinmatthewlam ngutman onutc oswalpalash snopoke ManuelHettich loukotal hugobarauna AbhisekBasu1 emanuelst dantelex erikpr1994 antons RandyVentures - reeltimeapps + reeltimeapps fcatuhe maxsumrall carlulsoe alejandroOPI pasogott

diff --git a/apps/android/app/build.gradle.kts b/apps/android/app/build.gradle.kts index e4f3c193a..009f08904 100644 --- a/apps/android/app/build.gradle.kts +++ b/apps/android/app/build.gradle.kts @@ -1,3 +1,5 @@ +import com.android.build.api.variant.impl.VariantOutputImpl + plugins { id("com.android.application") id("org.jetbrains.kotlin.android") @@ -19,8 +21,8 @@ android { applicationId = "com.clawdbot.android" minSdk = 31 targetSdk = 36 - versionCode = 1 - versionName = "2.0.0-beta3" + versionCode = 20260107 + versionName = "2026.1.7" } buildTypes { @@ -54,6 +56,19 @@ android { } } +androidComponents { + onVariants { variant -> + variant.outputs + .filterIsInstance() + .forEach { output -> + val versionName = output.versionName.orNull ?: "0" + val buildType = variant.buildType + + val outputFileName = "clawdbot-${versionName}-${buildType}.apk" + output.outputFileName = outputFileName + } + } +} kotlin { compilerOptions { jvmTarget.set(org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_17) diff --git a/apps/android/app/src/main/java/com/clawdbot/android/node/CameraCaptureManager.kt b/apps/android/app/src/main/java/com/clawdbot/android/node/CameraCaptureManager.kt index 514524491..69a8a13c9 100644 --- a/apps/android/app/src/main/java/com/clawdbot/android/node/CameraCaptureManager.kt +++ b/apps/android/app/src/main/java/com/clawdbot/android/node/CameraCaptureManager.kt @@ -5,8 +5,10 @@ import android.content.Context import android.annotation.SuppressLint import android.graphics.Bitmap import android.graphics.BitmapFactory +import android.graphics.Matrix import android.util.Base64 import android.content.pm.PackageManager +import android.media.ExifInterface import androidx.lifecycle.LifecycleOwner import androidx.camera.core.CameraSelector import androidx.camera.core.ImageCapture @@ -86,18 +88,19 @@ class CameraCaptureManager(private val context: Context) { provider.unbindAll() provider.bindToLifecycle(owner, selector, capture) - val bytes = capture.takeJpegBytes(context.mainExecutor()) + val (bytes, orientation) = capture.takeJpegWithExif(context.mainExecutor()) val decoded = BitmapFactory.decodeByteArray(bytes, 0, bytes.size) ?: throw IllegalStateException("UNAVAILABLE: failed to decode captured image") + val rotated = rotateBitmapByExif(decoded, orientation) val scaled = - if (maxWidth != null && maxWidth > 0 && decoded.width > maxWidth) { + if (maxWidth != null && maxWidth > 0 && rotated.width > maxWidth) { val h = - (decoded.height.toDouble() * (maxWidth.toDouble() / decoded.width.toDouble())) + (rotated.height.toDouble() * (maxWidth.toDouble() / rotated.width.toDouble())) .toInt() .coerceAtLeast(1) - decoded.scale(maxWidth, h) + rotated.scale(maxWidth, h) } else { - decoded + rotated } val maxPayloadBytes = 5 * 1024 * 1024 @@ -194,6 +197,31 @@ class CameraCaptureManager(private val context: Context) { ) } + private fun rotateBitmapByExif(bitmap: Bitmap, orientation: Int): Bitmap { + val matrix = Matrix() + when (orientation) { + ExifInterface.ORIENTATION_ROTATE_90 -> matrix.postRotate(90f) + ExifInterface.ORIENTATION_ROTATE_180 -> matrix.postRotate(180f) + ExifInterface.ORIENTATION_ROTATE_270 -> matrix.postRotate(270f) + ExifInterface.ORIENTATION_FLIP_HORIZONTAL -> matrix.postScale(-1f, 1f) + ExifInterface.ORIENTATION_FLIP_VERTICAL -> matrix.postScale(1f, -1f) + ExifInterface.ORIENTATION_TRANSPOSE -> { + matrix.postRotate(90f) + matrix.postScale(-1f, 1f) + } + ExifInterface.ORIENTATION_TRANSVERSE -> { + matrix.postRotate(-90f) + matrix.postScale(-1f, 1f) + } + else -> return bitmap + } + val rotated = Bitmap.createBitmap(bitmap, 0, 0, bitmap.width, bitmap.height, matrix, true) + if (rotated !== bitmap) { + bitmap.recycle() + } + return rotated + } + private fun parseFacing(paramsJson: String?): String? = when { paramsJson?.contains("\"front\"") == true -> "front" @@ -254,7 +282,8 @@ private suspend fun Context.cameraProvider(): ProcessCameraProvider = ) } -private suspend fun ImageCapture.takeJpegBytes(executor: Executor): ByteArray = +/** Returns (jpegBytes, exifOrientation) so caller can rotate the decoded bitmap. */ +private suspend fun ImageCapture.takeJpegWithExif(executor: Executor): Pair = suspendCancellableCoroutine { cont -> val file = File.createTempFile("clawdbot-snap-", ".jpg") val options = ImageCapture.OutputFileOptions.Builder(file).build() @@ -263,13 +292,19 @@ private suspend fun ImageCapture.takeJpegBytes(executor: Executor): ByteArray = executor, object : ImageCapture.OnImageSavedCallback { override fun onError(exception: ImageCaptureException) { + file.delete() cont.resumeWithException(exception) } override fun onImageSaved(outputFileResults: ImageCapture.OutputFileResults) { try { + val exif = ExifInterface(file.absolutePath) + val orientation = exif.getAttributeInt( + ExifInterface.TAG_ORIENTATION, + ExifInterface.ORIENTATION_NORMAL, + ) val bytes = file.readBytes() - cont.resume(bytes) + cont.resume(Pair(bytes, orientation)) } catch (e: Exception) { cont.resumeWithException(e) } finally { diff --git a/apps/macos/Sources/Clawdbot/AgentEventsWindow.swift b/apps/macos/Sources/Clawdbot/AgentEventsWindow.swift index f37961ae3..e3ccc87bc 100644 --- a/apps/macos/Sources/Clawdbot/AgentEventsWindow.swift +++ b/apps/macos/Sources/Clawdbot/AgentEventsWindow.swift @@ -1,3 +1,4 @@ +import ClawdbotProtocol import SwiftUI @MainActor diff --git a/apps/macos/Sources/Clawdbot/AnyCodable.swift b/apps/macos/Sources/Clawdbot/AnyCodable.swift deleted file mode 100644 index 7c9a4668d..000000000 --- a/apps/macos/Sources/Clawdbot/AnyCodable.swift +++ /dev/null @@ -1,54 +0,0 @@ -import Foundation - -/// Lightweight `Codable` wrapper that round-trips heterogeneous JSON payloads. -/// Marked `@unchecked Sendable` because it can hold reference types. -struct AnyCodable: Codable, @unchecked Sendable { - let value: Any - - init(_ value: Any) { self.value = value } - - init(from decoder: Decoder) throws { - let container = try decoder.singleValueContainer() - if let intVal = try? container.decode(Int.self) { self.value = intVal; return } - if let doubleVal = try? container.decode(Double.self) { self.value = doubleVal; return } - if let boolVal = try? container.decode(Bool.self) { self.value = boolVal; return } - if let stringVal = try? container.decode(String.self) { self.value = stringVal; return } - if container.decodeNil() { self.value = NSNull(); return } - if let dict = try? container.decode([String: AnyCodable].self) { self.value = dict; return } - if let array = try? container.decode([AnyCodable].self) { self.value = array; return } - throw DecodingError.dataCorruptedError( - in: container, - debugDescription: "Unsupported type") - } - - func encode(to encoder: Encoder) throws { - var container = encoder.singleValueContainer() - switch self.value { - case let intVal as Int: try container.encode(intVal) - case let doubleVal as Double: try container.encode(doubleVal) - case let boolVal as Bool: try container.encode(boolVal) - case let stringVal as String: try container.encode(stringVal) - case is NSNull: try container.encodeNil() - case let dict as [String: AnyCodable]: try container.encode(dict) - case let array as [AnyCodable]: try container.encode(array) - case let dict as [String: Any]: - try container.encode(dict.mapValues { AnyCodable($0) }) - case let array as [Any]: - try container.encode(array.map { AnyCodable($0) }) - case let dict as NSDictionary: - var converted: [String: AnyCodable] = [:] - for (k, v) in dict { - guard let key = k as? String else { continue } - converted[key] = AnyCodable(v) - } - try container.encode(converted) - case let array as NSArray: - try container.encode(array.map { AnyCodable($0) }) - default: - let context = EncodingError.Context( - codingPath: encoder.codingPath, - debugDescription: "Unsupported type") - throw EncodingError.invalidValue(self.value, context) - } - } -} diff --git a/apps/macos/Sources/Clawdbot/Bridge/BridgeServer.swift b/apps/macos/Sources/Clawdbot/Bridge/BridgeServer.swift index 60d01459a..4b71e6dec 100644 --- a/apps/macos/Sources/Clawdbot/Bridge/BridgeServer.swift +++ b/apps/macos/Sources/Clawdbot/Bridge/BridgeServer.swift @@ -229,7 +229,7 @@ actor BridgeServer { error: BridgeRPCError(code: "FORBIDDEN", message: "Method not allowed")) } - let params: [String: AnyCodable]? + let params: [String: ClawdbotProtocol.AnyCodable]? if let json = req.paramsJSON?.trimmingCharacters(in: .whitespacesAndNewlines), !json.isEmpty { guard let data = json.data(using: .utf8) else { return BridgeRPCResponse( @@ -238,7 +238,7 @@ actor BridgeServer { error: BridgeRPCError(code: "INVALID_REQUEST", message: "paramsJSON not UTF-8")) } do { - params = try JSONDecoder().decode([String: AnyCodable].self, from: data) + params = try JSONDecoder().decode([String: ClawdbotProtocol.AnyCodable].self, from: data) } catch { return BridgeRPCResponse( id: req.id, @@ -360,16 +360,16 @@ actor BridgeServer { "reason \(reason)", ].compactMap(\.self).joined(separator: " · ") - var params: [String: AnyCodable] = [ - "text": AnyCodable(summary), - "instanceId": AnyCodable(nodeId), - "host": AnyCodable(host), - "mode": AnyCodable("node"), - "reason": AnyCodable(reason), - "tags": AnyCodable(tags), + var params: [String: ClawdbotProtocol.AnyCodable] = [ + "text": ClawdbotProtocol.AnyCodable(summary), + "instanceId": ClawdbotProtocol.AnyCodable(nodeId), + "host": ClawdbotProtocol.AnyCodable(host), + "mode": ClawdbotProtocol.AnyCodable("node"), + "reason": ClawdbotProtocol.AnyCodable(reason), + "tags": ClawdbotProtocol.AnyCodable(tags), ] - if let ip { params["ip"] = AnyCodable(ip) } - if let version { params["version"] = AnyCodable(version) } + if let ip { params["ip"] = ClawdbotProtocol.AnyCodable(ip) } + if let version { params["version"] = ClawdbotProtocol.AnyCodable(version) } await GatewayConnection.shared.sendSystemEvent(params) } diff --git a/apps/macos/Sources/Clawdbot/ClawdbotConfigFile.swift b/apps/macos/Sources/Clawdbot/ClawdbotConfigFile.swift index 433b3e1c8..83d38b79a 100644 --- a/apps/macos/Sources/Clawdbot/ClawdbotConfigFile.swift +++ b/apps/macos/Sources/Clawdbot/ClawdbotConfigFile.swift @@ -1,3 +1,4 @@ +import ClawdbotProtocol import Foundation enum ClawdbotConfigFile { @@ -32,7 +33,8 @@ enum ClawdbotConfigFile { } static func saveDict(_ dict: [String: Any]) { - if ProcessInfo.processInfo.isNixMode { return } + // Nix mode disables config writes in production, but tests rely on saving temp configs. + if ProcessInfo.processInfo.isNixMode, !ProcessInfo.processInfo.isRunningTests { return } do { let data = try JSONSerialization.data(withJSONObject: dict, options: [.prettyPrinted, .sortedKeys]) let url = self.url() diff --git a/apps/macos/Sources/Clawdbot/ClawdbotPaths.swift b/apps/macos/Sources/Clawdbot/ClawdbotPaths.swift index 9cd7985ea..3e32782c0 100644 --- a/apps/macos/Sources/Clawdbot/ClawdbotPaths.swift +++ b/apps/macos/Sources/Clawdbot/ClawdbotPaths.swift @@ -3,9 +3,9 @@ import Foundation enum ClawdbotEnv { static func path(_ key: String) -> String? { // Normalize env overrides once so UI + file IO stay consistent. - guard let value = ProcessInfo.processInfo.environment[key]? - .trimmingCharacters(in: .whitespacesAndNewlines), - !value.isEmpty + guard let raw = getenv(key) else { return nil } + let value = String(cString: raw).trimmingCharacters(in: .whitespacesAndNewlines) + guard !value.isEmpty else { return nil } diff --git a/apps/macos/Sources/Clawdbot/ConfigStore.swift b/apps/macos/Sources/Clawdbot/ConfigStore.swift index 9090dd1d8..93b10cff4 100644 --- a/apps/macos/Sources/Clawdbot/ConfigStore.swift +++ b/apps/macos/Sources/Clawdbot/ConfigStore.swift @@ -1,3 +1,4 @@ +import ClawdbotProtocol import Foundation enum ConfigStore { diff --git a/apps/macos/Sources/Clawdbot/CronJobEditor+Helpers.swift b/apps/macos/Sources/Clawdbot/CronJobEditor+Helpers.swift index ec07cc5e4..877c0c6c7 100644 --- a/apps/macos/Sources/Clawdbot/CronJobEditor+Helpers.swift +++ b/apps/macos/Sources/Clawdbot/CronJobEditor+Helpers.swift @@ -1,3 +1,4 @@ +import ClawdbotProtocol import Foundation import SwiftUI diff --git a/apps/macos/Sources/Clawdbot/CronJobEditor.swift b/apps/macos/Sources/Clawdbot/CronJobEditor.swift index 93d2615bf..144368bf1 100644 --- a/apps/macos/Sources/Clawdbot/CronJobEditor.swift +++ b/apps/macos/Sources/Clawdbot/CronJobEditor.swift @@ -1,3 +1,4 @@ +import ClawdbotProtocol import SwiftUI struct CronJobEditor: View { diff --git a/apps/macos/Sources/Clawdbot/CronSettings+Actions.swift b/apps/macos/Sources/Clawdbot/CronSettings+Actions.swift index 8ae63704b..0de686bad 100644 --- a/apps/macos/Sources/Clawdbot/CronSettings+Actions.swift +++ b/apps/macos/Sources/Clawdbot/CronSettings+Actions.swift @@ -1,3 +1,4 @@ +import ClawdbotProtocol import Foundation extension CronSettings { diff --git a/apps/macos/Sources/Clawdbot/GatewayAgentChannel.swift b/apps/macos/Sources/Clawdbot/GatewayAgentChannel.swift index da96723a1..69e70b2b6 100644 --- a/apps/macos/Sources/Clawdbot/GatewayAgentChannel.swift +++ b/apps/macos/Sources/Clawdbot/GatewayAgentChannel.swift @@ -16,12 +16,11 @@ enum GatewayAgentChannel: String, CaseIterable, Sendable { func shouldDeliver(_ isLast: Bool) -> Bool { switch self { case .webchat: - return false + false case .last: - return isLast + isLast case .whatsapp, .telegram: - return true + true } } } - diff --git a/apps/macos/Sources/Clawdbot/GatewayDiscoveryModel.swift b/apps/macos/Sources/Clawdbot/GatewayDiscoveryModel.swift index 5770670d1..384f9ca4f 100644 --- a/apps/macos/Sources/Clawdbot/GatewayDiscoveryModel.swift +++ b/apps/macos/Sources/Clawdbot/GatewayDiscoveryModel.swift @@ -208,9 +208,15 @@ final class GatewayDiscoveryModel { return merged } - static func parseGatewayTXT(_ txt: [String: String]) - -> (lanHost: String?, tailnetDns: String?, sshPort: Int, gatewayPort: Int?, cliPath: String?) - { + struct GatewayTXT: Equatable { + var lanHost: String? + var tailnetDns: String? + var sshPort: Int + var gatewayPort: Int? + var cliPath: String? + } + + static func parseGatewayTXT(_ txt: [String: String]) -> GatewayTXT { var lanHost: String? var tailnetDns: String? var sshPort = 22 @@ -242,7 +248,12 @@ final class GatewayDiscoveryModel { cliPath = trimmed.isEmpty ? nil : trimmed } - return (lanHost, tailnetDns, sshPort, gatewayPort, cliPath) + return GatewayTXT( + lanHost: lanHost, + tailnetDns: tailnetDns, + sshPort: sshPort, + gatewayPort: gatewayPort, + cliPath: cliPath) } static func buildSSHTarget(user: String, host: String, port: Int) -> String { diff --git a/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift b/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift index ee6b2e8e1..a4b718f35 100644 --- a/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift +++ b/apps/macos/Sources/Clawdbot/GatewayLaunchAgentManager.swift @@ -43,25 +43,52 @@ enum GatewayLaunchAgentManager { return [gatewayBin, "gateway-daemon", "--port", "\(port)", "--bind", bind] } - static func status() async -> Bool { + static func isLoaded() async -> Bool { guard FileManager.default.fileExists(atPath: self.plistURL.path) else { return false } - let result = await self.runLaunchctl(["print", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + let result = await Launchctl.run(["print", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) return result.status == 0 } static func set(enabled: Bool, bundlePath: String, port: Int) async -> String? { if enabled { - _ = await self.runLaunchctl(["bootout", "gui/\(getuid())/\(self.legacyGatewayLaunchdLabel)"]) + _ = await Launchctl.run(["bootout", "gui/\(getuid())/\(self.legacyGatewayLaunchdLabel)"]) try? FileManager.default.removeItem(at: self.legacyPlistURL) let gatewayBin = self.gatewayExecutablePath(bundlePath: bundlePath) guard FileManager.default.isExecutableFile(atPath: gatewayBin) else { self.logger.error("launchd enable failed: gateway missing at \(gatewayBin)") return "Embedded gateway missing in bundle; rebuild via scripts/package-mac-app.sh" } - self.logger.info("launchd enable requested port=\(port)") + + let desiredBind = self.preferredGatewayBind() ?? "loopback" + let desiredToken = self.preferredGatewayToken() + let desiredPassword = self.preferredGatewayPassword() + let desiredConfig = DesiredConfig( + port: port, + bind: desiredBind, + token: desiredToken, + password: desiredPassword) + + // If launchd already loaded the job (common on login), avoid `bootout` unless we must + // change the config. `bootout` can kill a just-started gateway and cause attach loops. + let loaded = await self.isLoaded() + if loaded, + let existing = self.readPlistConfig(), + existing.matches(desiredConfig) + { + self.logger.info("launchd job already loaded with desired config; skipping bootout") + await self.ensureEnabled() + _ = await Launchctl.run(["kickstart", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + return nil + } + + self.logger.info("launchd enable requested port=\(port) bind=\(desiredBind)") self.writePlist(bundlePath: bundlePath, port: port) - _ = await self.runLaunchctl(["bootout", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) - let bootstrap = await self.runLaunchctl(["bootstrap", "gui/\(getuid())", self.plistURL.path]) + + await self.ensureEnabled() + if loaded { + _ = await Launchctl.run(["bootout", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + } + let bootstrap = await Launchctl.run(["bootstrap", "gui/\(getuid())", self.plistURL.path]) if bootstrap.status != 0 { let msg = bootstrap.output.trimmingCharacters(in: .whitespacesAndNewlines) self.logger.error("launchd bootstrap failed: \(msg)") @@ -69,20 +96,19 @@ enum GatewayLaunchAgentManager { ? "Failed to bootstrap gateway launchd job" : bootstrap.output.trimmingCharacters(in: .whitespacesAndNewlines) } - // Note: removed redundant `kickstart -k` that caused race condition. - // bootstrap already starts the job; kickstart -k would kill it immediately - // and with KeepAlive=true, cause a restart loop with port conflicts. + await self.ensureEnabled() return nil } self.logger.info("launchd disable requested") - _ = await self.runLaunchctl(["bootout", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + _ = await Launchctl.run(["bootout", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + await self.ensureDisabled() try? FileManager.default.removeItem(at: self.plistURL) return nil } static func kickstart() async { - _ = await self.runLaunchctl(["kickstart", "-k", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + _ = await Launchctl.run(["kickstart", "-k", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) } private static func writePlist(bundlePath: String, port: Int) { @@ -208,30 +234,57 @@ enum GatewayLaunchAgentManager { .replacingOccurrences(of: "'", with: "'") } - private struct LaunchctlResult { - let status: Int32 - let output: String + private struct DesiredConfig: Equatable { + let port: Int + let bind: String + let token: String? + let password: String? } - @discardableResult - private static func runLaunchctl(_ args: [String]) async -> LaunchctlResult { - await Task.detached(priority: .utility) { () -> LaunchctlResult in - let process = Process() - process.launchPath = "/bin/launchctl" - process.arguments = args - let pipe = Pipe() - process.standardOutput = pipe - process.standardError = pipe - do { - try process.run() - process.waitUntilExit() - let data = pipe.fileHandleForReading.readToEndSafely() - let output = String(data: data, encoding: .utf8) ?? "" - return LaunchctlResult(status: process.terminationStatus, output: output) - } catch { - return LaunchctlResult(status: -1, output: error.localizedDescription) - } - }.value + private struct InstalledConfig: Equatable { + let port: Int? + let bind: String? + let token: String? + let password: String? + + func matches(_ desired: DesiredConfig) -> Bool { + guard self.port == desired.port else { return false } + guard (self.bind ?? "loopback") == desired.bind else { return false } + guard self.token == desired.token else { return false } + guard self.password == desired.password else { return false } + return true + } + } + + private static func readPlistConfig() -> InstalledConfig? { + guard let snapshot = LaunchAgentPlist.snapshot(url: self.plistURL) else { return nil } + return InstalledConfig( + port: snapshot.port, + bind: snapshot.bind, + token: snapshot.token, + password: snapshot.password) + } + + private static func ensureEnabled() async { + let result = await Launchctl.run(["enable", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + guard result.status != 0 else { return } + let msg = result.output.trimmingCharacters(in: .whitespacesAndNewlines) + if msg.isEmpty { + self.logger.warning("launchd enable failed") + } else { + self.logger.warning("launchd enable failed: \(msg)") + } + } + + private static func ensureDisabled() async { + let result = await Launchctl.run(["disable", "gui/\(getuid())/\(gatewayLaunchdLabel)"]) + guard result.status != 0 else { return } + let msg = result.output.trimmingCharacters(in: .whitespacesAndNewlines) + if msg.isEmpty { + self.logger.warning("launchd disable failed") + } else { + self.logger.warning("launchd disable failed: \(msg)") + } } } diff --git a/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift b/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift index 62745cc66..3d046d855 100644 --- a/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift +++ b/apps/macos/Sources/Clawdbot/GatewayProcessManager.swift @@ -70,7 +70,7 @@ final class GatewayProcessManager { func ensureLaunchAgentEnabledIfNeeded() async { guard !CommandResolver.connectionModeIsRemote() else { return } guard !AppStateStore.attachExistingGatewayOnly else { return } - let enabled = await GatewayLaunchAgentManager.status() + let enabled = await GatewayLaunchAgentManager.isLoaded() guard !enabled else { return } let bundlePath = Bundle.main.bundleURL.path let port = GatewayEnvironment.gatewayPort() diff --git a/apps/macos/Sources/Clawdbot/Launchctl.swift b/apps/macos/Sources/Clawdbot/Launchctl.swift new file mode 100644 index 000000000..ba52bb96b --- /dev/null +++ b/apps/macos/Sources/Clawdbot/Launchctl.swift @@ -0,0 +1,81 @@ +import Foundation + +enum Launchctl { + struct Result: Sendable { + let status: Int32 + let output: String + } + + @discardableResult + static func run(_ args: [String]) async -> Result { + await Task.detached(priority: .utility) { () -> Result in + let process = Process() + process.launchPath = "/bin/launchctl" + process.arguments = args + let pipe = Pipe() + process.standardOutput = pipe + process.standardError = pipe + do { + try process.run() + process.waitUntilExit() + let data = pipe.fileHandleForReading.readToEndSafely() + let output = String(data: data, encoding: .utf8) ?? "" + return Result(status: process.terminationStatus, output: output) + } catch { + return Result(status: -1, output: error.localizedDescription) + } + }.value + } +} + +struct LaunchAgentPlistSnapshot: Equatable, Sendable { + let programArguments: [String] + let environment: [String: String] + + let port: Int? + let bind: String? + let token: String? + let password: String? +} + +enum LaunchAgentPlist { + static func snapshot(url: URL) -> LaunchAgentPlistSnapshot? { + guard let data = try? Data(contentsOf: url) else { return nil } + let rootAny: Any + do { + rootAny = try PropertyListSerialization.propertyList( + from: data, + options: [], + format: nil) + } catch { + return nil + } + guard let root = rootAny as? [String: Any] else { return nil } + let programArguments = root["ProgramArguments"] as? [String] ?? [] + let env = root["EnvironmentVariables"] as? [String: String] ?? [:] + let port = Self.extractFlagInt(programArguments, flag: "--port") + let bind = Self.extractFlagString(programArguments, flag: "--bind")?.lowercased() + let token = env["CLAWDBOT_GATEWAY_TOKEN"]?.trimmingCharacters(in: .whitespacesAndNewlines).nonEmpty + let password = env["CLAWDBOT_GATEWAY_PASSWORD"]?.trimmingCharacters(in: .whitespacesAndNewlines).nonEmpty + return LaunchAgentPlistSnapshot( + programArguments: programArguments, + environment: env, + port: port, + bind: bind, + token: token, + password: password) + } + + private static func extractFlagInt(_ args: [String], flag: String) -> Int? { + guard let raw = self.extractFlagString(args, flag: flag) else { return nil } + return Int(raw) + } + + private static func extractFlagString(_ args: [String], flag: String) -> String? { + guard let idx = args.firstIndex(of: flag) else { return nil } + let valueIdx = args.index(after: idx) + guard valueIdx < args.endIndex else { return nil } + let token = args[valueIdx].trimmingCharacters(in: .whitespacesAndNewlines) + return token.isEmpty ? nil : token + } +} diff --git a/apps/macos/Sources/Clawdbot/MenuSessionsInjector.swift b/apps/macos/Sources/Clawdbot/MenuSessionsInjector.swift index c7fdcb545..8c2e01656 100644 --- a/apps/macos/Sources/Clawdbot/MenuSessionsInjector.swift +++ b/apps/macos/Sources/Clawdbot/MenuSessionsInjector.swift @@ -110,8 +110,7 @@ final class MenuSessionsInjector: NSObject, NSMenuDelegate { guard let insertIndex = self.findInsertIndex(in: menu) else { return } let width = self.initialWidth(for: menu) - - guard self.isControlChannelConnected else { return } + let isConnected = self.isControlChannelConnected var cursor = insertIndex var headerView: NSView? @@ -132,7 +131,9 @@ final class MenuSessionsInjector: NSObject, NSMenuDelegate { headerItem.tag = self.tag headerItem.isEnabled = false let hosted = self.makeHostedView( - rootView: AnyView(MenuSessionsHeaderView(count: rows.count, statusText: nil)), + rootView: AnyView(MenuSessionsHeaderView( + count: rows.count, + statusText: isConnected ? nil : "Gateway disconnected")), width: width, highlighted: false) headerItem.view = hosted @@ -163,16 +164,29 @@ final class MenuSessionsInjector: NSObject, NSMenuDelegate { let headerItem = NSMenuItem() headerItem.tag = self.tag headerItem.isEnabled = false + let statusText = isConnected + ? (self.cachedErrorText ?? "Loading sessions…") + : "Gateway disconnected" let hosted = self.makeHostedView( rootView: AnyView(MenuSessionsHeaderView( count: 0, - statusText: self.cachedErrorText ?? "Loading sessions…")), + statusText: statusText)), width: width, highlighted: false) headerItem.view = hosted headerView = hosted menu.insertItem(headerItem, at: cursor) cursor += 1 + + if !isConnected { + menu.insertItem( + self.makeMessageItem( + text: "Connect the gateway to see sessions", + symbolName: "bolt.slash", + width: width), + at: cursor) + cursor += 1 + } } cursor = self.insertUsageSection(into: menu, at: cursor, width: width) @@ -253,7 +267,7 @@ final class MenuSessionsInjector: NSObject, NSMenuDelegate { let rows = self.usageRows let errorText = self.cachedUsageErrorText - if rows.isEmpty && errorText == nil { + if rows.isEmpty, errorText == nil { return cursor } diff --git a/apps/macos/Sources/Clawdbot/MenuUsageHeaderView.swift b/apps/macos/Sources/Clawdbot/MenuUsageHeaderView.swift index 199b01cf1..73152143d 100644 --- a/apps/macos/Sources/Clawdbot/MenuUsageHeaderView.swift +++ b/apps/macos/Sources/Clawdbot/MenuUsageHeaderView.swift @@ -42,4 +42,3 @@ struct MenuUsageHeaderView: View { return "\(self.count) providers" } } - diff --git a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift index cf0e28372..b439c66ea 100644 --- a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift +++ b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntime.swift @@ -5,8 +5,16 @@ import Foundation actor MacNodeRuntime { private let cameraCapture = CameraCaptureService() - @MainActor private let screenRecorder = ScreenRecordService() - @MainActor private let locationService = MacNodeLocationService() + private let makeMainActorServices: () async -> any MacNodeRuntimeMainActorServices + private var cachedMainActorServices: (any MacNodeRuntimeMainActorServices)? + + init( + makeMainActorServices: @escaping () async -> any MacNodeRuntimeMainActorServices = { + await MainActor.run { LiveMacNodeRuntimeMainActorServices() } + }) + { + self.makeMainActorServices = makeMainActorServices + } func handleInvoke(_ req: BridgeInvokeRequest) async -> BridgeInvokeResponse { let command = req.command @@ -212,7 +220,8 @@ actor MacNodeRuntime { ClawdbotLocationGetParams() let desired = params.desiredAccuracy ?? (Self.locationPreciseEnabled() ? .precise : .balanced) - let status = await self.locationService.authorizationStatus() + let services = await self.mainActorServices() + let status = await services.locationAuthorizationStatus() if status != .authorizedAlways { return BridgeInvokeResponse( id: req.id, @@ -222,11 +231,11 @@ actor MacNodeRuntime { message: "LOCATION_PERMISSION_REQUIRED: grant Location permission")) } do { - let location = try await self.locationService.currentLocation( + let location = try await services.currentLocation( desiredAccuracy: desired, maxAgeMs: params.maxAgeMs, timeoutMs: params.timeoutMs) - let isPrecise = await self.locationService.accuracyAuthorization() == .fullAccuracy + let isPrecise = await services.locationAccuracyAuthorization() == .fullAccuracy let payload = ClawdbotLocationPayload( lat: location.coordinate.latitude, lon: location.coordinate.longitude, @@ -265,7 +274,8 @@ actor MacNodeRuntime { code: .invalidRequest, message: "INVALID_REQUEST: screen format must be mp4") } - let res = try await self.screenRecorder.record( + let services = await self.mainActorServices() + let res = try await services.recordScreen( screenIndex: params.screenIndex, durationMs: params.durationMs, fps: params.fps, @@ -291,6 +301,13 @@ actor MacNodeRuntime { return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload) } + private func mainActorServices() async -> any MacNodeRuntimeMainActorServices { + if let cachedMainActorServices { return cachedMainActorServices } + let services = await self.makeMainActorServices() + self.cachedMainActorServices = services + return services + } + private func handleA2UIReset(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse { try await self.ensureA2UIHost() diff --git a/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntimeMainActorServices.swift b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntimeMainActorServices.swift new file mode 100644 index 000000000..a6e03e3e3 --- /dev/null +++ b/apps/macos/Sources/Clawdbot/NodeMode/MacNodeRuntimeMainActorServices.swift @@ -0,0 +1,60 @@ +import ClawdbotKit +import CoreLocation +import Foundation + +@MainActor +protocol MacNodeRuntimeMainActorServices: Sendable { + func recordScreen( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + includeAudio: Bool?, + outPath: String?) async throws -> (path: String, hasAudio: Bool) + + func locationAuthorizationStatus() -> CLAuthorizationStatus + func locationAccuracyAuthorization() -> CLAccuracyAuthorization + func currentLocation( + desiredAccuracy: ClawdbotLocationAccuracy, + maxAgeMs: Int?, + timeoutMs: Int?) async throws -> CLLocation +} + +@MainActor +final class LiveMacNodeRuntimeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable { + private let screenRecorder = ScreenRecordService() + private let locationService = MacNodeLocationService() + + func recordScreen( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + includeAudio: Bool?, + outPath: String?) async throws -> (path: String, hasAudio: Bool) + { + try await self.screenRecorder.record( + screenIndex: screenIndex, + durationMs: durationMs, + fps: fps, + includeAudio: includeAudio, + outPath: outPath) + } + + func locationAuthorizationStatus() -> CLAuthorizationStatus { + self.locationService.authorizationStatus() + } + + func locationAccuracyAuthorization() -> CLAccuracyAuthorization { + self.locationService.accuracyAuthorization() + } + + func currentLocation( + desiredAccuracy: ClawdbotLocationAccuracy, + maxAgeMs: Int?, + timeoutMs: Int?) async throws -> CLLocation + { + try await self.locationService.currentLocation( + desiredAccuracy: desiredAccuracy, + maxAgeMs: maxAgeMs, + timeoutMs: timeoutMs) + } +} diff --git a/apps/macos/Sources/Clawdbot/ProcessInfo+Clawdbot.swift b/apps/macos/Sources/Clawdbot/ProcessInfo+Clawdbot.swift index b4d037018..29f9e7251 100644 --- a/apps/macos/Sources/Clawdbot/ProcessInfo+Clawdbot.swift +++ b/apps/macos/Sources/Clawdbot/ProcessInfo+Clawdbot.swift @@ -2,11 +2,12 @@ import Foundation extension ProcessInfo { var isPreview: Bool { - self.environment["XCODE_RUNNING_FOR_PREVIEWS"] == "1" + guard let raw = getenv("XCODE_RUNNING_FOR_PREVIEWS") else { return false } + return String(cString: raw) == "1" } var isNixMode: Bool { - if self.environment["CLAWDBOT_NIX_MODE"] == "1" { return true } + if let raw = getenv("CLAWDBOT_NIX_MODE"), String(cString: raw) == "1" { return true } return UserDefaults.standard.bool(forKey: "clawdbot.nixMode") } diff --git a/apps/macos/Sources/Clawdbot/RemotePortTunnel.swift b/apps/macos/Sources/Clawdbot/RemotePortTunnel.swift index cf0818b28..34e952540 100644 --- a/apps/macos/Sources/Clawdbot/RemotePortTunnel.swift +++ b/apps/macos/Sources/Clawdbot/RemotePortTunnel.swift @@ -41,8 +41,8 @@ final class RemotePortTunnel { static func create( remotePort: Int, preferredLocalPort: UInt16? = nil, - allowRemoteUrlOverride: Bool = true - ) async throws -> RemotePortTunnel { + allowRemoteUrlOverride: Bool = true) async throws -> RemotePortTunnel + { let settings = CommandResolver.connectionSettings() guard settings.mode == .remote, let parsed = CommandResolver.parseSSHTarget(settings.target) else { throw NSError( diff --git a/apps/macos/Sources/Clawdbot/UsageData.swift b/apps/macos/Sources/Clawdbot/UsageData.swift index 0db492938..2318d98e8 100644 --- a/apps/macos/Sources/Clawdbot/UsageData.swift +++ b/apps/macos/Sources/Clawdbot/UsageData.swift @@ -29,8 +29,8 @@ struct UsageRow: Identifiable { let error: String? var titleText: String { - if let plan, !plan.isEmpty { return "\(displayName) (\(plan))" } - return displayName + if let plan, !plan.isEmpty { return "\(self.displayName) (\(plan))" } + return self.displayName } var remainingPercent: Int? { @@ -107,4 +107,3 @@ enum UsageLoader { return try JSONDecoder().decode(GatewayUsageSummary.self, from: data) } } - diff --git a/apps/macos/Sources/Clawdbot/UsageMenuLabelView.swift b/apps/macos/Sources/Clawdbot/UsageMenuLabelView.swift index c5514a53d..4b1193e2f 100644 --- a/apps/macos/Sources/Clawdbot/UsageMenuLabelView.swift +++ b/apps/macos/Sources/Clawdbot/UsageMenuLabelView.swift @@ -21,7 +21,7 @@ struct UsageMenuLabelView: View { } HStack(alignment: .firstTextBaseline, spacing: 6) { - Text(row.titleText) + Text(self.row.titleText) .font(.caption.weight(.semibold)) .foregroundStyle(self.primaryTextColor) .lineLimit(1) @@ -30,7 +30,7 @@ struct UsageMenuLabelView: View { Spacer(minLength: 4) - Text(row.detailText()) + Text(self.row.detailText()) .font(.caption.monospacedDigit()) .foregroundStyle(self.secondaryTextColor) .lineLimit(1) @@ -43,4 +43,3 @@ struct UsageMenuLabelView: View { .padding(.trailing, self.paddingTrailing) } } - diff --git a/apps/macos/Sources/Clawdbot/WorkActivityStore.swift b/apps/macos/Sources/Clawdbot/WorkActivityStore.swift index 47d241ace..9ab5b93d4 100644 --- a/apps/macos/Sources/Clawdbot/WorkActivityStore.swift +++ b/apps/macos/Sources/Clawdbot/WorkActivityStore.swift @@ -1,4 +1,5 @@ import ClawdbotKit +import ClawdbotProtocol import Foundation import Observation import SwiftUI @@ -53,7 +54,7 @@ final class WorkActivityStore { phase: String, name: String?, meta: String?, - args: [String: AnyCodable]?) + args: [String: ClawdbotProtocol.AnyCodable]?) { let toolKind = Self.mapToolKind(name) let label = Self.buildLabel(name: name, meta: meta, args: args) @@ -211,7 +212,7 @@ final class WorkActivityStore { private static func buildLabel( name: String?, meta: String?, - args: [String: AnyCodable]?) -> String + args: [String: ClawdbotProtocol.AnyCodable]?) -> String { let wrappedArgs = self.wrapToolArgs(args) let display = ToolDisplayRegistry.resolve(name: name ?? "tool", args: wrappedArgs, meta: meta) @@ -221,17 +222,17 @@ final class WorkActivityStore { return display.label } - private static func wrapToolArgs(_ args: [String: AnyCodable]?) -> ClawdbotKit.AnyCodable? { + private static func wrapToolArgs(_ args: [String: ClawdbotProtocol.AnyCodable]?) -> ClawdbotKit.AnyCodable? { guard let args else { return nil } let converted: [String: Any] = args.mapValues { self.unwrapJSONValue($0.value) } return ClawdbotKit.AnyCodable(converted) } private static func unwrapJSONValue(_ value: Any) -> Any { - if let dict = value as? [String: AnyCodable] { + if let dict = value as? [String: ClawdbotProtocol.AnyCodable] { return dict.mapValues { self.unwrapJSONValue($0.value) } } - if let array = value as? [AnyCodable] { + if let array = value as? [ClawdbotProtocol.AnyCodable] { return array.map { self.unwrapJSONValue($0.value) } } if let dict = value as? [String: Any] { diff --git a/apps/macos/Tests/ClawdbotIPCTests/AgentEventStoreTests.swift b/apps/macos/Tests/ClawdbotIPCTests/AgentEventStoreTests.swift index 1353c8b4f..1b0e75207 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/AgentEventStoreTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/AgentEventStoreTests.swift @@ -1,5 +1,6 @@ import Foundation import Testing +import ClawdbotProtocol @testable import Clawdbot @Suite @@ -15,7 +16,7 @@ struct AgentEventStoreTests { seq: 1, stream: "test", ts: 0, - data: [:] as [String: AnyCodable], + data: [:] as [String: ClawdbotProtocol.AnyCodable], summary: nil)) #expect(store.events.count == 1) @@ -32,7 +33,7 @@ struct AgentEventStoreTests { seq: i, stream: "test", ts: Double(i), - data: [:] as [String: AnyCodable], + data: [:] as [String: ClawdbotProtocol.AnyCodable], summary: nil)) } diff --git a/apps/macos/Tests/ClawdbotIPCTests/AnyCodableEncodingTests.swift b/apps/macos/Tests/ClawdbotIPCTests/AnyCodableEncodingTests.swift index 897ab6433..cb1cec109 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/AnyCodableEncodingTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/AnyCodableEncodingTests.swift @@ -12,7 +12,7 @@ import Testing "null": NSNull(), ] - let data = try JSONEncoder().encode(Clawdbot.AnyCodable(payload)) + let data = try JSONEncoder().encode(ClawdbotProtocol.AnyCodable(payload)) let obj = try #require(JSONSerialization.jsonObject(with: data) as? [String: Any]) #expect(obj["tags"] as? [String] == ["node", "ios"]) diff --git a/apps/macos/Tests/ClawdbotIPCTests/ClawdbotConfigFileTests.swift b/apps/macos/Tests/ClawdbotIPCTests/ClawdbotConfigFileTests.swift index b976541f6..9ee97e22c 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/ClawdbotConfigFileTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/ClawdbotConfigFileTests.swift @@ -2,29 +2,29 @@ import Foundation import Testing @testable import Clawdbot -@Suite +@Suite(.serialized) struct ClawdbotConfigFileTests { @Test - func configPathRespectsEnvOverride() { + func configPathRespectsEnvOverride() async { let override = FileManager.default.temporaryDirectory .appendingPathComponent("clawdbot-config-\(UUID().uuidString)") .appendingPathComponent("clawdbot.json") .path - self.withEnv("CLAWDBOT_CONFIG_PATH", value: override) { + await TestIsolation.withEnvValues(["CLAWDBOT_CONFIG_PATH": override]) { #expect(ClawdbotConfigFile.url().path == override) } } @MainActor @Test - func remoteGatewayPortParsesAndMatchesHost() { + func remoteGatewayPortParsesAndMatchesHost() async { let override = FileManager.default.temporaryDirectory .appendingPathComponent("clawdbot-config-\(UUID().uuidString)") .appendingPathComponent("clawdbot.json") .path - self.withEnv("CLAWDBOT_CONFIG_PATH", value: override) { + await TestIsolation.withEnvValues(["CLAWDBOT_CONFIG_PATH": override]) { ClawdbotConfigFile.saveDict([ "gateway": [ "remote": [ @@ -41,13 +41,13 @@ struct ClawdbotConfigFileTests { @MainActor @Test - func setRemoteGatewayUrlPreservesScheme() { + func setRemoteGatewayUrlPreservesScheme() async { let override = FileManager.default.temporaryDirectory .appendingPathComponent("clawdbot-config-\(UUID().uuidString)") .appendingPathComponent("clawdbot.json") .path - self.withEnv("CLAWDBOT_CONFIG_PATH", value: override) { + await TestIsolation.withEnvValues(["CLAWDBOT_CONFIG_PATH": override]) { ClawdbotConfigFile.saveDict([ "gateway": [ "remote": [ @@ -63,33 +63,17 @@ struct ClawdbotConfigFileTests { } @Test - func stateDirOverrideSetsConfigPath() { + func stateDirOverrideSetsConfigPath() async { let dir = FileManager.default.temporaryDirectory .appendingPathComponent("clawdbot-state-\(UUID().uuidString)", isDirectory: true) .path - self.withEnv("CLAWDBOT_CONFIG_PATH", value: nil) { - self.withEnv("CLAWDBOT_STATE_DIR", value: dir) { - #expect(ClawdbotConfigFile.stateDirURL().path == dir) - #expect(ClawdbotConfigFile.url().path == "\(dir)/clawdbot.json") - } + await TestIsolation.withEnvValues([ + "CLAWDBOT_CONFIG_PATH": nil, + "CLAWDBOT_STATE_DIR": dir, + ]) { + #expect(ClawdbotConfigFile.stateDirURL().path == dir) + #expect(ClawdbotConfigFile.url().path == "\(dir)/clawdbot.json") } } - - private func withEnv(_ key: String, value: String?, _ body: () -> Void) { - let previous = ProcessInfo.processInfo.environment[key] - if let value { - setenv(key, value, 1) - } else { - unsetenv(key) - } - defer { - if let previous { - setenv(key, previous, 1) - } else { - unsetenv(key) - } - } - body() - } } diff --git a/apps/macos/Tests/ClawdbotIPCTests/CronJobEditorSmokeTests.swift b/apps/macos/Tests/ClawdbotIPCTests/CronJobEditorSmokeTests.swift index efa369e5c..ea5f86579 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/CronJobEditorSmokeTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/CronJobEditorSmokeTests.swift @@ -35,7 +35,7 @@ struct CronJobEditorSmokeTests { thinking: "low", timeoutSeconds: 120, deliver: true, - channel: "whatsapp", + provider: "whatsapp", to: "+15551234567", bestEffortDeliver: true), isolation: CronIsolation(postToMainPrefix: "Cron"), diff --git a/apps/macos/Tests/ClawdbotIPCTests/CronModelsTests.swift b/apps/macos/Tests/ClawdbotIPCTests/CronModelsTests.swift index fe478ac14..81ef2e96e 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/CronModelsTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/CronModelsTests.swift @@ -31,7 +31,7 @@ struct CronModelsTests { thinking: "low", timeoutSeconds: 15, deliver: true, - channel: "whatsapp", + provider: "whatsapp", to: "+15551234567", bestEffortDeliver: false) let data = try JSONEncoder().encode(payload) diff --git a/apps/macos/Tests/ClawdbotIPCTests/GatewayChannelConfigureTests.swift b/apps/macos/Tests/ClawdbotIPCTests/GatewayChannelConfigureTests.swift index 7893dafe7..22c83d4fd 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/GatewayChannelConfigureTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/GatewayChannelConfigureTests.swift @@ -170,7 +170,7 @@ import Testing let url = URL(string: "ws://example.invalid")! let cfg = ConfigSource(token: nil) let conn = GatewayConnection( - configProvider: { (url, cfg.snapshotToken()) }, + configProvider: { (url: url, token: cfg.snapshotToken(), password: nil) }, sessionBox: WebSocketSessionBox(session: session)) _ = try await conn.request(method: "status", params: nil) @@ -186,7 +186,7 @@ import Testing let url = URL(string: "ws://example.invalid")! let cfg = ConfigSource(token: "a") let conn = GatewayConnection( - configProvider: { (url, cfg.snapshotToken()) }, + configProvider: { (url: url, token: cfg.snapshotToken(), password: nil) }, sessionBox: WebSocketSessionBox(session: session)) _ = try await conn.request(method: "status", params: nil) @@ -203,7 +203,7 @@ import Testing let url = URL(string: "ws://example.invalid")! let cfg = ConfigSource(token: nil) let conn = GatewayConnection( - configProvider: { (url, cfg.snapshotToken()) }, + configProvider: { (url: url, token: cfg.snapshotToken(), password: nil) }, sessionBox: WebSocketSessionBox(session: session)) async let r1: Data = conn.request(method: "status", params: nil) @@ -218,7 +218,7 @@ import Testing let url = URL(string: "ws://example.invalid")! let cfg = ConfigSource(token: nil) let conn = GatewayConnection( - configProvider: { (url, cfg.snapshotToken()) }, + configProvider: { (url: url, token: cfg.snapshotToken(), password: nil) }, sessionBox: WebSocketSessionBox(session: session)) _ = try await conn.request(method: "status", params: nil) @@ -239,7 +239,7 @@ import Testing let url = URL(string: "ws://example.invalid")! let cfg = ConfigSource(token: nil) let conn = GatewayConnection( - configProvider: { (url, cfg.snapshotToken()) }, + configProvider: { (url: url, token: cfg.snapshotToken(), password: nil) }, sessionBox: WebSocketSessionBox(session: session)) let stream = await conn.subscribe(bufferingNewest: 10) diff --git a/apps/macos/Tests/ClawdbotIPCTests/GatewayEnvironmentTests.swift b/apps/macos/Tests/ClawdbotIPCTests/GatewayEnvironmentTests.swift index 0e4e35e6f..20d5b5973 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/GatewayEnvironmentTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/GatewayEnvironmentTests.swift @@ -19,13 +19,19 @@ import Testing #expect(Semver(major: 1, minor: 9, patch: 9).compatible(with: required) == false) } - @Test func gatewayPortDefaultsAndRespectsOverride() { - let defaultPort = GatewayEnvironment.gatewayPort() - #expect(defaultPort == 18789) + @Test func gatewayPortDefaultsAndRespectsOverride() async { + let configPath = TestIsolation.tempConfigPath() + await TestIsolation.withIsolatedState( + env: ["CLAWDBOT_CONFIG_PATH": configPath], + defaults: ["gatewayPort": nil]) + { + let defaultPort = GatewayEnvironment.gatewayPort() + #expect(defaultPort == 18789) - UserDefaults.standard.set(19999, forKey: "gatewayPort") - defer { UserDefaults.standard.removeObject(forKey: "gatewayPort") } - #expect(GatewayEnvironment.gatewayPort() == 19999) + UserDefaults.standard.set(19999, forKey: "gatewayPort") + defer { UserDefaults.standard.removeObject(forKey: "gatewayPort") } + #expect(GatewayEnvironment.gatewayPort() == 19999) + } } @Test func expectedGatewayVersionFromStringUsesParser() { diff --git a/apps/macos/Tests/ClawdbotIPCTests/GatewayLaunchAgentManagerTests.swift b/apps/macos/Tests/ClawdbotIPCTests/GatewayLaunchAgentManagerTests.swift new file mode 100644 index 000000000..ae8357b0c --- /dev/null +++ b/apps/macos/Tests/ClawdbotIPCTests/GatewayLaunchAgentManagerTests.swift @@ -0,0 +1,41 @@ +import Foundation +import Testing +@testable import Clawdbot + +@Suite struct GatewayLaunchAgentManagerTests { + @Test func launchAgentPlistSnapshotParsesArgsAndEnv() throws { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("clawdbot-launchd-\(UUID().uuidString).plist") + let plist: [String: Any] = [ + "ProgramArguments": ["clawdbot", "gateway-daemon", "--port", "18789", "--bind", "loopback"], + "EnvironmentVariables": [ + "CLAWDBOT_GATEWAY_TOKEN": " secret ", + "CLAWDBOT_GATEWAY_PASSWORD": "pw", + ], + ] + let data = try PropertyListSerialization.data(fromPropertyList: plist, format: .xml, options: 0) + try data.write(to: url, options: [.atomic]) + defer { try? FileManager.default.removeItem(at: url) } + + let snapshot = try #require(LaunchAgentPlist.snapshot(url: url)) + #expect(snapshot.port == 18789) + #expect(snapshot.bind == "loopback") + #expect(snapshot.token == "secret") + #expect(snapshot.password == "pw") + } + + @Test func launchAgentPlistSnapshotAllowsMissingBind() throws { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("clawdbot-launchd-\(UUID().uuidString).plist") + let plist: [String: Any] = [ + "ProgramArguments": ["clawdbot", "gateway-daemon", "--port", "18789"], + ] + let data = try PropertyListSerialization.data(fromPropertyList: plist, format: .xml, options: 0) + try data.write(to: url, options: [.atomic]) + defer { try? FileManager.default.removeItem(at: url) } + + let snapshot = try #require(LaunchAgentPlist.snapshot(url: url)) + #expect(snapshot.port == 18789) + #expect(snapshot.bind == nil) + } +} diff --git a/apps/macos/Tests/ClawdbotIPCTests/LowCoverageHelperTests.swift b/apps/macos/Tests/ClawdbotIPCTests/LowCoverageHelperTests.swift index e7b745b08..6ee7cc012 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/LowCoverageHelperTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/LowCoverageHelperTests.swift @@ -1,6 +1,7 @@ import AppKit import Foundation import Testing +import ClawdbotProtocol @testable import Clawdbot @@ -23,7 +24,7 @@ struct LowCoverageHelperTests { #expect(dict["list"]?.arrayValue?.count == 2) let foundation = any.foundationValue as? [String: Any] - #expect(foundation?["title"] as? String == "Hello") + #expect((foundation?["title"] as? String) == "Hello") } @Test func attributedStringStripsForegroundColor() { @@ -92,34 +93,22 @@ struct LowCoverageHelperTests { _ = PresenceReporter._testPrimaryIPv4Address() } - @Test func gatewayLaunchAgentHelpers() { - let keyBind = "CLAWDBOT_GATEWAY_BIND" - let keyToken = "CLAWDBOT_GATEWAY_TOKEN" - let previousBind = ProcessInfo.processInfo.environment[keyBind] - let previousToken = ProcessInfo.processInfo.environment[keyToken] - defer { - if let previousBind { - setenv(keyBind, previousBind, 1) - } else { - unsetenv(keyBind) - } - if let previousToken { - setenv(keyToken, previousToken, 1) - } else { - unsetenv(keyToken) - } + @Test func gatewayLaunchAgentHelpers() async throws { + await TestIsolation.withEnvValues( + [ + "CLAWDBOT_GATEWAY_BIND": "Lan", + "CLAWDBOT_GATEWAY_TOKEN": " secret ", + ]) + { + #expect(GatewayLaunchAgentManager._testPreferredGatewayBind() == "lan") + #expect(GatewayLaunchAgentManager._testPreferredGatewayToken() == "secret") + #expect( + GatewayLaunchAgentManager._testEscapePlistValue("a&b\"'") == + "a&b<c>"'") + + #expect(GatewayLaunchAgentManager._testGatewayExecutablePath(bundlePath: "/App") == "/App/Contents/Resources/Relay/clawdbot") + #expect(GatewayLaunchAgentManager._testRelayDir(bundlePath: "/App") == "/App/Contents/Resources/Relay") } - - setenv(keyBind, "Lan", 1) - setenv(keyToken, " secret ", 1) - #expect(GatewayLaunchAgentManager._testPreferredGatewayBind() == "lan") - #expect(GatewayLaunchAgentManager._testPreferredGatewayToken() == "secret") - #expect( - GatewayLaunchAgentManager._testEscapePlistValue("a&b\"'") == - "a&b<c>"'") - - #expect(GatewayLaunchAgentManager._testGatewayExecutablePath(bundlePath: "/App") == "/App/Contents/Resources/Relay/clawdbot") - #expect(GatewayLaunchAgentManager._testRelayDir(bundlePath: "/App") == "/App/Contents/Resources/Relay") } @Test func portGuardianParsesListenersAndBuildsReports() { diff --git a/apps/macos/Tests/ClawdbotIPCTests/LowCoverageViewSmokeTests.swift b/apps/macos/Tests/ClawdbotIPCTests/LowCoverageViewSmokeTests.swift index 98018035b..27aff597e 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/LowCoverageViewSmokeTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/LowCoverageViewSmokeTests.swift @@ -1,6 +1,7 @@ import AppKit import SwiftUI import Testing +import ClawdbotProtocol @testable import Clawdbot diff --git a/apps/macos/Tests/ClawdbotIPCTests/MacNodeRuntimeTests.swift b/apps/macos/Tests/ClawdbotIPCTests/MacNodeRuntimeTests.swift index 7b64265f5..2dd408f1f 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/MacNodeRuntimeTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/MacNodeRuntimeTests.swift @@ -1,9 +1,9 @@ import ClawdbotKit +import CoreLocation import Foundation import Testing @testable import Clawdbot -@Suite(.serialized) struct MacNodeRuntimeTests { @Test func handleInvokeRejectsUnknownCommand() async { let runtime = MacNodeRuntime() @@ -31,21 +31,58 @@ struct MacNodeRuntimeTests { } @Test func handleInvokeCameraListRequiresEnabledCamera() async { - let defaults = UserDefaults.standard - let previous = defaults.object(forKey: cameraEnabledKey) - defaults.set(false, forKey: cameraEnabledKey) - defer { - if let previous { - defaults.set(previous, forKey: cameraEnabledKey) - } else { - defaults.removeObject(forKey: cameraEnabledKey) + await TestIsolation.withUserDefaultsValues([cameraEnabledKey: false]) { + let runtime = MacNodeRuntime() + let response = await runtime.handleInvoke( + BridgeInvokeRequest(id: "req-4", command: ClawdbotCameraCommand.list.rawValue)) + #expect(response.ok == false) + #expect(response.error?.message.contains("CAMERA_DISABLED") == true) + } + } + + @Test func handleInvokeScreenRecordUsesInjectedServices() async throws { + @MainActor + final class FakeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable { + func recordScreen( + screenIndex: Int?, + durationMs: Int?, + fps: Double?, + includeAudio: Bool?, + outPath: String?) async throws -> (path: String, hasAudio: Bool) + { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("clawdbot-test-screen-record-\(UUID().uuidString).mp4") + try Data("ok".utf8).write(to: url) + return (path: url.path, hasAudio: false) + } + + func locationAuthorizationStatus() -> CLAuthorizationStatus { .authorizedAlways } + func locationAccuracyAuthorization() -> CLAccuracyAuthorization { .fullAccuracy } + func currentLocation( + desiredAccuracy: ClawdbotLocationAccuracy, + maxAgeMs: Int?, + timeoutMs: Int?) async throws -> CLLocation + { + CLLocation(latitude: 0, longitude: 0) } } - let runtime = MacNodeRuntime() + let services = await MainActor.run { FakeMainActorServices() } + let runtime = MacNodeRuntime(makeMainActorServices: { services }) + + let params = MacNodeScreenRecordParams(durationMs: 250) + let json = String(data: try JSONEncoder().encode(params), encoding: .utf8) let response = await runtime.handleInvoke( - BridgeInvokeRequest(id: "req-4", command: ClawdbotCameraCommand.list.rawValue)) - #expect(response.ok == false) - #expect(response.error?.message.contains("CAMERA_DISABLED") == true) + BridgeInvokeRequest(id: "req-5", command: MacNodeScreenCommand.record.rawValue, paramsJSON: json)) + #expect(response.ok == true) + let payloadJSON = try #require(response.payloadJSON) + + struct Payload: Decodable { + var format: String + var base64: String + } + let payload = try JSONDecoder().decode(Payload.self, from: Data(payloadJSON.utf8)) + #expect(payload.format == "mp4") + #expect(!payload.base64.isEmpty) } } diff --git a/apps/macos/Tests/ClawdbotIPCTests/MenuSessionsInjectorTests.swift b/apps/macos/Tests/ClawdbotIPCTests/MenuSessionsInjectorTests.swift index 2e5bafcfd..cae8b7be4 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/MenuSessionsInjectorTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/MenuSessionsInjectorTests.swift @@ -30,7 +30,7 @@ struct MenuSessionsInjectorTests { key: "main", kind: .direct, displayName: nil, - surface: nil, + provider: nil, subject: nil, room: nil, space: nil, @@ -47,7 +47,7 @@ struct MenuSessionsInjectorTests { key: "discord:group:alpha", kind: .group, displayName: nil, - surface: nil, + provider: nil, subject: nil, room: nil, space: nil, diff --git a/apps/macos/Tests/ClawdbotIPCTests/SessionDataTests.swift b/apps/macos/Tests/ClawdbotIPCTests/SessionDataTests.swift index 96f21ab0c..d52c9aecb 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/SessionDataTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/SessionDataTests.swift @@ -28,7 +28,7 @@ struct SessionDataTests { key: "user@example.com", kind: .direct, displayName: nil, - surface: nil, + provider: nil, subject: nil, room: nil, space: nil, diff --git a/apps/macos/Tests/ClawdbotIPCTests/SettingsViewSmokeTests.swift b/apps/macos/Tests/ClawdbotIPCTests/SettingsViewSmokeTests.swift index d3fe9e07d..c59aba43a 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/SettingsViewSmokeTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/SettingsViewSmokeTests.swift @@ -45,7 +45,7 @@ struct SettingsViewSmokeTests { thinking: "low", timeoutSeconds: 30, deliver: true, - channel: "sms", + provider: "sms", to: "+15551234567", bestEffortDeliver: true), isolation: CronIsolation(postToMainPrefix: "[cron] "), diff --git a/apps/macos/Tests/ClawdbotIPCTests/SkillsSettingsSmokeTests.swift b/apps/macos/Tests/ClawdbotIPCTests/SkillsSettingsSmokeTests.swift index afa028dcf..f2d8a61bf 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/SkillsSettingsSmokeTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/SkillsSettingsSmokeTests.swift @@ -1,4 +1,5 @@ import Testing +import ClawdbotProtocol @testable import Clawdbot @Suite(.serialized) diff --git a/apps/macos/Tests/ClawdbotIPCTests/TestIsolation.swift b/apps/macos/Tests/ClawdbotIPCTests/TestIsolation.swift new file mode 100644 index 000000000..03c32607f --- /dev/null +++ b/apps/macos/Tests/ClawdbotIPCTests/TestIsolation.swift @@ -0,0 +1,116 @@ +import Foundation + +actor TestIsolationLock { + static let shared = TestIsolationLock() + + private var locked = false + private var waiters: [CheckedContinuation] = [] + + func acquire() async { + if !self.locked { + self.locked = true + return + } + await withCheckedContinuation { cont in + self.waiters.append(cont) + } + // `unlock()` resumed us; lock is now held for this caller. + } + + func release() { + if self.waiters.isEmpty { + self.locked = false + return + } + let next = self.waiters.removeFirst() + next.resume() + } +} + +@MainActor +enum TestIsolation { + static func withIsolatedState( + env: [String: String?] = [:], + defaults: [String: Any?] = [:], + _ body: () async throws -> T) async rethrows -> T + { + await TestIsolationLock.shared.acquire() + var previousEnv: [String: String?] = [:] + for (key, value) in env { + previousEnv[key] = getenv(key).map { String(cString: $0) } + if let value { + setenv(key, value, 1) + } else { + unsetenv(key) + } + } + + let userDefaults = UserDefaults.standard + var previousDefaults: [String: Any?] = [:] + for (key, value) in defaults { + previousDefaults[key] = userDefaults.object(forKey: key) + if let value { + userDefaults.set(value, forKey: key) + } else { + userDefaults.removeObject(forKey: key) + } + } + + do { + let result = try await body() + for (key, value) in previousDefaults { + if let value { + userDefaults.set(value, forKey: key) + } else { + userDefaults.removeObject(forKey: key) + } + } + for (key, value) in previousEnv { + if let value { + setenv(key, value, 1) + } else { + unsetenv(key) + } + } + await TestIsolationLock.shared.release() + return result + } catch { + for (key, value) in previousDefaults { + if let value { + userDefaults.set(value, forKey: key) + } else { + userDefaults.removeObject(forKey: key) + } + } + for (key, value) in previousEnv { + if let value { + setenv(key, value, 1) + } else { + unsetenv(key) + } + } + await TestIsolationLock.shared.release() + throw error + } + } + + static func withEnvValues( + _ values: [String: String?], + _ body: () async throws -> T) async rethrows -> T + { + try await Self.withIsolatedState(env: values, defaults: [:], body) + } + + static func withUserDefaultsValues( + _ values: [String: Any?], + _ body: () async throws -> T) async rethrows -> T + { + try await Self.withIsolatedState(env: [:], defaults: values, body) + } + + nonisolated static func tempConfigPath() -> String { + FileManager.default.temporaryDirectory + .appendingPathComponent("clawdbot-test-config-\(UUID().uuidString).json") + .path + } +} diff --git a/apps/macos/Tests/ClawdbotIPCTests/VoiceWakeForwarderTests.swift b/apps/macos/Tests/ClawdbotIPCTests/VoiceWakeForwarderTests.swift index b8318c3fe..35a96626b 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/VoiceWakeForwarderTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/VoiceWakeForwarderTests.swift @@ -17,6 +17,6 @@ import Testing #expect(opts.thinking == "low") #expect(opts.deliver == true) #expect(opts.to == nil) - #expect(opts.channel == .last) + #expect(opts.provider == .last) } } diff --git a/apps/macos/Tests/ClawdbotIPCTests/WorkActivityStoreTests.swift b/apps/macos/Tests/ClawdbotIPCTests/WorkActivityStoreTests.swift index 50a4e69d6..983c394b3 100644 --- a/apps/macos/Tests/ClawdbotIPCTests/WorkActivityStoreTests.swift +++ b/apps/macos/Tests/ClawdbotIPCTests/WorkActivityStoreTests.swift @@ -1,5 +1,6 @@ import Foundation import Testing +import ClawdbotProtocol @testable import Clawdbot @Suite diff --git a/apps/shared/ClawdbotKit/Resources/tool-display.json b/apps/shared/ClawdbotKit/Sources/ClawdbotKit/Resources/tool-display.json similarity index 100% rename from apps/shared/ClawdbotKit/Resources/tool-display.json rename to apps/shared/ClawdbotKit/Sources/ClawdbotKit/Resources/tool-display.json diff --git a/docs/assets/markdown.css b/docs/assets/markdown.css index c6acd9785..6ad456334 100644 --- a/docs/assets/markdown.css +++ b/docs/assets/markdown.css @@ -84,6 +84,52 @@ box-shadow: 0 12px 0 -8px rgba(0, 0, 0, 0.18); } +.showcase-link { + position: relative; + display: inline-flex; + align-items: center; + gap: 6px; +} + +.showcase-preview { + position: absolute; + left: 50%; + top: 100%; + width: min(420px, 80vw); + padding: 8px; + border-radius: 14px; + background: color-mix(in oklab, var(--panel) 92%, transparent); + border: 1px solid color-mix(in oklab, var(--frame-border) 30%, transparent); + box-shadow: 0 18px 40px -18px rgba(0, 0, 0, 0.55); + transform: translate(-50%, 10px) scale(0.98); + opacity: 0; + visibility: hidden; + pointer-events: none; + z-index: 20; + transition: opacity 0.18s ease, transform 0.18s ease, visibility 0.18s ease; +} + +.showcase-preview img { + width: 100%; + height: auto; + border-radius: 10px; + border: 1px solid color-mix(in oklab, var(--frame-border) 25%, transparent); + box-shadow: none; +} + +.showcase-link:hover .showcase-preview, +.showcase-link:focus-within .showcase-preview { + opacity: 1; + visibility: visible; + transform: translate(-50%, 6px) scale(1); +} + +@media (hover: none) { + .showcase-preview { + display: none; + } +} + .markdown code { font-family: var(--font-body); font-size: 0.95em; diff --git a/docs/assets/showcase/gohome-grafana.png b/docs/assets/showcase/gohome-grafana.png new file mode 100644 index 000000000..bd7cf0774 Binary files /dev/null and b/docs/assets/showcase/gohome-grafana.png differ diff --git a/docs/assets/showcase/padel-cli.svg b/docs/assets/showcase/padel-cli.svg new file mode 100644 index 000000000..61eb6334d --- /dev/null +++ b/docs/assets/showcase/padel-cli.svg @@ -0,0 +1,11 @@ + + + + + $ padel search --location "Barcelona" --date 2026-01-08 --time 18:00-22:00 + Available courts (3): + - Vall d'Hebron 19:00 Court 2 (90m) EUR 34 + - Badalona 20:30 Court 1 (60m) EUR 28 + - Gracia 21:00 Court 4 (90m) EUR 36 + + diff --git a/docs/assets/showcase/padel-screenshot.jpg b/docs/assets/showcase/padel-screenshot.jpg new file mode 100644 index 000000000..eb1ae39ea Binary files /dev/null and b/docs/assets/showcase/padel-screenshot.jpg differ diff --git a/docs/assets/showcase/roborock-screenshot.jpg b/docs/assets/showcase/roborock-screenshot.jpg new file mode 100644 index 000000000..e31ba11eb Binary files /dev/null and b/docs/assets/showcase/roborock-screenshot.jpg differ diff --git a/docs/assets/showcase/roborock-status.svg b/docs/assets/showcase/roborock-status.svg new file mode 100644 index 000000000..470840423 --- /dev/null +++ b/docs/assets/showcase/roborock-status.svg @@ -0,0 +1,13 @@ + + + + + $ gohome roborock status --device "Living Room" + Device: Roborock Q Revo + State: cleaning (zone) + Battery: 78% + Dustbin: 42% + Water tank: 61% + Last clean: 2026-01-06 19:42 + + diff --git a/docs/assets/showcase/xuezh-pronunciation.jpeg b/docs/assets/showcase/xuezh-pronunciation.jpeg new file mode 100644 index 000000000..7f7d86a8f Binary files /dev/null and b/docs/assets/showcase/xuezh-pronunciation.jpeg differ diff --git a/docs/cli/index.md b/docs/cli/index.md index a12dbcf2c..d7c4faca0 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -176,10 +176,13 @@ Interactive configuration wizard (models, providers, skills, gateway). Audit and modernize the local configuration. ### `doctor` -Health checks + quick fixes. +Health checks + quick fixes (config + gateway + legacy services). Options: - `--no-workspace-suggestions`: disable workspace memory hints. +- `--yes`: accept defaults without prompting (headless). +- `--non-interactive`: skip prompts; apply safe migrations only. +- `--deep`: scan system services for extra gateway installs. ## Auth + provider helpers @@ -362,6 +365,25 @@ Options: ### `gateway-daemon` Run the Gateway as a long-lived daemon (same options as `gateway`, minus `--allow-unconfigured` and `--force`). +### `daemon` +Manage the Gateway service (launchd/systemd/schtasks). + +Subcommands: +- `daemon status` (probes the Gateway RPC by default) +- `daemon install` (service install) +- `daemon uninstall` +- `daemon start` +- `daemon stop` +- `daemon restart` + +Notes: +- `daemon status` uses the same URL/token defaults as `gateway status` unless you pass `--url/--token/--password`. +- `daemon status` supports `--no-probe`, `--deep`, and `--json` for scripting. +- `daemon status` also surfaces legacy or extra gateway services when it can detect them (`--deep` adds system-level scans). +- `daemon install` defaults to Node runtime; use `--runtime bun` only when WhatsApp is disabled. +- `daemon install` options: `--port`, `--runtime`, `--token`. +- `gateway install|uninstall|start|stop|restart` remain as service aliases; `daemon` is the dedicated manager. + ### `gateway ` Gateway RPC helpers (use `--url`, `--token`, `--password`, `--timeout`, `--expect-final` for each). @@ -372,8 +394,12 @@ Subcommands: - `gateway wake --text [--mode now|next-heartbeat]` - `gateway send --to --message [--media-url ] [--gif-playback] [--idempotency-key ]` - `gateway agent --message [--to ] [--session-id ] [--thinking ] [--deliver] [--timeout-seconds ] [--idempotency-key ]` +- `gateway install` +- `gateway uninstall` +- `gateway start` - `gateway stop` - `gateway restart` +- `gateway daemon status` (alias for `clawdbot daemon status`) ## Models diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md new file mode 100644 index 000000000..4ec1bfdfd --- /dev/null +++ b/docs/concepts/compaction.md @@ -0,0 +1,43 @@ +--- +summary: "Context window + compaction: how Clawdbot keeps sessions under model limits" +read_when: + - You want to understand auto-compaction and /compact + - You are debugging long sessions hitting context limits +--- +# Context Window & Compaction + +Every model has a **context window** (max tokens it can see). Long-running chats accumulate messages and tool results; once the window is tight, Clawdbot **compacts** older history to stay within limits. + +## What compaction is +Compaction **summarizes older conversation** into a compact summary entry and keeps recent messages intact. The summary is stored in the session history, so future requests use: +- The compaction summary +- Recent messages after the compaction point + +Compaction **persists** in the session’s JSONL history. + +## Auto-compaction (default on) +When a session nears or exceeds the model’s context window, Clawdbot triggers auto-compaction and may retry the original request using the compacted context. + +You’ll see: +- `🧹 Auto-compaction complete` in verbose mode +- `/status` showing `🧹 Compactions: ` + +## Manual compaction +Use `/compact` (optionally with instructions) to force a compaction pass: +``` +/compact Focus on decisions and open questions +``` + +## Context window source +Context window is model-specific. Clawdbot uses the model definition from the configured provider catalog to determine limits. + +## Compaction vs pruning +- **Compaction**: summarises and **persists** in JSONL. +- **Session pruning**: trims old **tool results** only, **in-memory**, per request. + +See [/concepts/session-pruning](/concepts/session-pruning) for pruning details. + +## Tips +- Use `/compact` when sessions feel stale or context is bloated. +- Large tool outputs are already truncated; pruning can further reduce tool-result buildup. +- If you need a fresh slate, `/new` or `/reset` starts a new session id. diff --git a/docs/concepts/group-messages.md b/docs/concepts/group-messages.md index e403634d2..358a64c95 100644 --- a/docs/concepts/group-messages.md +++ b/docs/concepts/group-messages.md @@ -71,3 +71,4 @@ Only the owner number (from `whatsapp.allowFrom`, or the bot’s own E.164 when - Heartbeats are intentionally skipped for groups to avoid noisy broadcasts. - Echo suppression uses the combined batch string; if you send identical text twice without mentions, only the first will get a response. - Session store entries will appear as `agent::whatsapp:group:` in the session store (`~/.clawdbot/agents//sessions/sessions.json` by default); a missing entry just means the group hasn’t triggered a run yet. +- Typing indicators in groups follow `agent.typingMode` (default: `message` when unmentioned). diff --git a/docs/concepts/models.md b/docs/concepts/models.md index b7fba7b12..7ad93c347 100644 --- a/docs/concepts/models.md +++ b/docs/concepts/models.md @@ -12,6 +12,23 @@ See [`docs/model-failover.md`](/concepts/model-failover) for how auth profiles r Goal: give clear model visibility + control (configured vs available), plus scan tooling that prefers tool-call + image-capable models and maintains ordered fallbacks. +## How Clawdbot models work (quick explainer) + +Clawdbot selects models in this order: +1) The configured **primary** model (`agent.model.primary`). +2) If it fails, fallbacks in `agent.model.fallbacks` (in order). +3) Auth failover happens **inside** the provider first (see [/concepts/model-failover](/concepts/model-failover)). + +Key pieces: +- `provider/model` is the canonical model id (e.g. `anthropic/claude-opus-4-5`). +- `agent.models` is the **allowlist/catalog** of models Clawdbot can use, with optional aliases. +- `agent.imageModel` is only used when the primary model **can’t** accept images. +- `models.providers` lets you add custom providers + models (written to `models.json`). +- `/model ` switches the active model for the current session; `/model list` shows what’s allowed. + +Related: +- Context limits are model-specific; long sessions may trigger compaction. See [/concepts/compaction](/concepts/compaction). + ## Model recommendations Through testing, we’ve found [Claude Opus 4.5](https://www.anthropic.com/claude/opus) is the most useful general-purpose model for anything coding-related. We suggest [GPT-5.2-Codex](https://developers.openai.com/codex/models) for coding and sub-agents. For personal assistant work, nothing comes close to Opus. If you’re going all-in on Claude, we recommend the [Claude Max $200 subscription](https://www.anthropic.com/pricing/). @@ -45,6 +62,33 @@ Anecdotal notes from the Discord thread on January 4–5, 2026. Treat as “what See [/cli](/cli) for the full command tree and CLI flags. +### CLI output (list + status) + +`clawdbot models list` (default) prints a table with these columns: +- `Model`: `provider/model` key (truncated in TTY). +- `Input`: `text` or `text+image`. +- `Ctx`: context window in K tokens (from the model registry). +- `Local`: `yes/no` when the provider base URL is local. +- `Auth`: `yes/no` when the provider has usable auth. +- `Tags`: origin + role hints. + +Common tags: +- `default` — resolved default model. +- `fallback#N` — `agent.model.fallbacks` order. +- `image` — `agent.imageModel.primary`. +- `img-fallback#N` — `agent.imageModel.fallbacks` order. +- `configured` — present in `agent.models`. +- `alias:` — alias from `agent.models.*.alias`. +- `missing` — referenced in config but not found in the registry. + +Output formats: +- `--plain`: prints only `provider/model` keys (one per line). +- `--json`: `{ count, models: [{ key, name, input, contextWindow, local, available, tags, missing }] }`. + +`clawdbot models status` prints the resolved defaults, fallbacks, image model, aliases, +and an **Auth overview** section showing which providers have profiles/env/models.json keys. +`--plain` prints the resolved default model only; `--json` returns a structured object for tooling. + ## Config changes - `agent.models` (configured model catalog + aliases). diff --git a/docs/concepts/retry.md b/docs/concepts/retry.md new file mode 100644 index 000000000..ca9b32c03 --- /dev/null +++ b/docs/concepts/retry.md @@ -0,0 +1,58 @@ +--- +summary: "Retry policy for outbound provider calls" +read_when: + - Updating provider retry behavior or defaults + - Debugging provider send errors or rate limits +--- +# Retry policy + +## Goals +- Retry per HTTP request, not per multi-step flow. +- Preserve ordering by retrying only the current step. +- Avoid duplicating non-idempotent operations. + +## Defaults +- Attempts: 3 +- Max delay cap: 30000 ms +- Jitter: 0.1 (10 percent) +- Provider defaults: + - Telegram min delay: 400 ms + - Discord min delay: 500 ms + +## Behavior +### Discord +- Retries only on rate-limit errors (HTTP 429). +- Uses Discord `retry_after` when available, otherwise exponential backoff. + +### Telegram +- Retries on transient errors (429, timeout, connect/reset/closed, temporarily unavailable). +- Uses `retry_after` when available, otherwise exponential backoff. +- Markdown parse errors are not retried; they fall back to plain text. + +## Configuration +Set retry policy per provider in `~/.clawdbot/clawdbot.json`: + +```json5 +{ + telegram: { + retry: { + attempts: 3, + minDelayMs: 400, + maxDelayMs: 30000, + jitter: 0.1 + } + }, + discord: { + retry: { + attempts: 3, + minDelayMs: 500, + maxDelayMs: 30000, + jitter: 0.1 + } + } +} +``` + +## Notes +- Retries apply per request (message send, media upload, reaction, poll, sticker). +- Composite flows do not retry completed steps. diff --git a/docs/concepts/session-pruning.md b/docs/concepts/session-pruning.md new file mode 100644 index 000000000..d59b77b6e --- /dev/null +++ b/docs/concepts/session-pruning.md @@ -0,0 +1,92 @@ +--- +summary: "Session pruning: opt-in tool-result trimming to reduce context bloat" +read_when: + - You want to reduce LLM context growth from tool outputs + - You are tuning agent.contextPruning +--- +# Session Pruning + +Session pruning trims **old tool results** from the in-memory context right before each LLM call. It is **opt-in** and does **not** rewrite the on-disk session history (`*.jsonl`). + +## When it runs +- Before each LLM request (context hook). +- Only affects the messages sent to the model for that request. + +## What can be pruned +- Only `toolResult` messages. +- User + assistant messages are **never** modified. +- The last `keepLastAssistants` assistant messages are protected; tool results after that cutoff are not pruned. +- If there aren’t enough assistant messages to establish the cutoff, pruning is skipped. +- Tool results containing **image blocks** are skipped (never trimmed/cleared). + +## Context window estimation +Pruning uses an estimated context window (chars ≈ tokens × 4). The window size is resolved in this order: +1) Model definition `contextWindow` (from the model registry). +2) `models.providers.*.models[].contextWindow` override. +3) `agent.contextTokens`. +4) Default `200000` tokens. + +## Modes +### adaptive +- If estimated context ratio ≥ `softTrimRatio`: soft-trim oversized tool results. +- If still ≥ `hardClearRatio` **and** prunable tool text ≥ `minPrunableToolChars`: hard-clear oldest eligible tool results. + +### aggressive +- Always hard-clears eligible tool results before the cutoff. +- Ignores `hardClear.enabled` (always clears when eligible). + +## Soft vs hard pruning +- **Soft-trim**: only for oversized tool results. + - Keeps head + tail, inserts `...`, and appends a note with the original size. + - Skips results with image blocks. +- **Hard-clear**: replaces the entire tool result with `hardClear.placeholder`. + +## Tool selection +- `tools.allow` / `tools.deny` support `*` wildcards. +- Deny wins. +- Empty allow list => all tools allowed. + +## Interaction with other limits +- Built-in tools already truncate their own output; session pruning is an extra layer that prevents long-running chats from accumulating too much tool output in the model context. +- Compaction is separate: compaction summarizes and persists, pruning is transient per request. See [/concepts/compaction](/concepts/compaction). + +## Defaults (when enabled) +- `keepLastAssistants`: `3` +- `softTrimRatio`: `0.3` +- `hardClearRatio`: `0.5` +- `minPrunableToolChars`: `50000` +- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }` +- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }` + +## Examples +Minimal (adaptive): +```json5 +{ + agent: { + contextPruning: { mode: "adaptive" } + } +} +``` + +Aggressive: +```json5 +{ + agent: { + contextPruning: { mode: "aggressive" } + } +} +``` + +Restrict pruning to specific tools: +```json5 +{ + agent: { + contextPruning: { + mode: "adaptive", + tools: { allow: ["bash", "read"], deny: ["*image*"] } + } + } +} +``` + +See config reference: [Gateway Configuration](/gateway/configuration) diff --git a/docs/concepts/session-tool.md b/docs/concepts/session-tool.md index de4f31fb2..d1e0cb343 100644 --- a/docs/concepts/session-tool.md +++ b/docs/concepts/session-tool.md @@ -127,14 +127,15 @@ Parameters: - `task` (required) - `label?` (optional; used for logs/UI) - `model?` (optional; overrides the sub-agent model; invalid values error) -- `timeoutSeconds?` (optional; omit for long-running jobs; if set, Clawdbot aborts the sub-agent when the timeout elapses) +- `runTimeoutSeconds?` (default 0; when set, aborts the sub-agent run after N seconds) - `cleanup?` (`delete|keep`, default `keep`) Behavior: -- Starts a new `agent::subagent:` session with `deliver: false`. +- Starts a new `agent::subagent:` session with `deliver: false`. - Sub-agents default to the full tool set **minus session tools** (configurable via `agent.subagents.tools`). - Sub-agents are not allowed to call `sessions_spawn` (no sub-agent → sub-agent spawning). -- After completion (or best-effort wait), Clawdbot runs a sub-agent **announce step** and posts the result to the requester chat provider. +- Always non-blocking: returns `{ status: "accepted", runId, childSessionKey }` immediately. +- After completion, Clawdbot runs a sub-agent **announce step** and posts the result to the requester chat provider. - Reply exactly `ANNOUNCE_SKIP` during the announce step to stay silent. - Sub-agent sessions are auto-archived after `agent.subagents.archiveAfterMinutes` (default: 60). - Announce replies include a stats line (runtime, tokens, sessionKey/sessionId, transcript path, and optional cost). diff --git a/docs/concepts/session.md b/docs/concepts/session.md index 8cd144201..311015bea 100644 --- a/docs/concepts/session.md +++ b/docs/concepts/session.md @@ -16,11 +16,15 @@ All session state is **owned by the gateway** (the “master” Clawdbot). UI cl ## Where state lives - On the **gateway host**: - Store file: `~/.clawdbot/agents//sessions/sessions.json` (per agent). - - Transcripts: `~/.clawdbot/agents//sessions/.jsonl` (one file per session id). +- Transcripts: `~/.clawdbot/agents//sessions/.jsonl` (Telegram topic sessions use `.../-topic-.jsonl`). - The store is a map `sessionKey -> { sessionId, updatedAt, ... }`. Deleting entries is safe; they are recreated on demand. - Group entries may include `displayName`, `provider`, `subject`, `room`, and `space` to label sessions in UIs. - Clawdbot does **not** read legacy Pi/Tau session folders. +## Session pruning (optional) +Clawdbot can trim **old tool results** from the in-memory context right before LLM calls (opt-in). +This does **not** rewrite JSONL history. See [/concepts/session-pruning](/concepts/session-pruning). + ## Mapping transports → session keys - Direct chats collapse to the per-agent primary key: `agent::`. - Multiple phone numbers and providers can map to the same agent main key; they act as transports into one conversation. @@ -81,7 +85,7 @@ Send these as standalone messages so they register. - `clawdbot gateway call sessions.list --params '{}'` — fetch sessions from the running gateway (use `--url`/`--token` for remote gateway access). - Send `/status` as a standalone message in chat to see whether the agent is reachable, how much of the session context is used, current thinking/verbose toggles, and when your WhatsApp web creds were last refreshed (helps spot relink needs). - Send `/stop` as a standalone message to abort the current run. -- Send `/compact` (optional instructions) as a standalone message to summarize older context and free up window space. +- Send `/compact` (optional instructions) as a standalone message to summarize older context and free up window space. See [/concepts/compaction](/concepts/compaction). - JSONL transcripts can be opened directly to review full turns. ## Tips diff --git a/docs/concepts/typing-indicators.md b/docs/concepts/typing-indicators.md new file mode 100644 index 000000000..e3d92a46f --- /dev/null +++ b/docs/concepts/typing-indicators.md @@ -0,0 +1,59 @@ +--- +summary: "When Clawdbot shows typing indicators and how to tune them" +read_when: + - Changing typing indicator behavior or defaults +--- +# Typing indicators + +Typing indicators are sent to the chat provider while a run is active. Use +`agent.typingMode` to control **when** typing starts and `typingIntervalSeconds` +to control **how often** it refreshes. + +## Defaults +When `agent.typingMode` is **unset**, Clawdbot keeps the legacy behavior: +- **Direct chats**: typing starts immediately once the model loop begins. +- **Group chats with a mention**: typing starts immediately. +- **Group chats without a mention**: typing starts only when message text begins streaming. +- **Heartbeat runs**: typing is disabled. + +## Modes +Set `agent.typingMode` to one of: +- `never` — no typing indicator, ever. +- `instant` — start typing **as soon as the model loop begins**, even if the run + later returns only the silent reply token. +- `thinking` — start typing on the **first reasoning delta** (requires + `reasoningLevel: "stream"` for the run). +- `message` — start typing on the **first non-silent text delta** (ignores + the `NO_REPLY` silent token). + +Order of “how early it fires”: +`never` → `message` → `thinking` → `instant` + +## Configuration +```json5 +{ + agent: { + typingMode: "thinking", + typingIntervalSeconds: 6 + } +} +``` + +You can override mode or cadence per session: +```json5 +{ + session: { + typingMode: "message", + typingIntervalSeconds: 4 + } +} +``` + +## Notes +- `message` mode won’t show typing for silent-only replies (e.g. the `NO_REPLY` + token used to suppress output). +- `thinking` only fires if the run streams reasoning (`reasoningLevel: "stream"`). + If the model doesn’t emit reasoning deltas, typing won’t start. +- Heartbeats never show typing, regardless of mode. +- `typingIntervalSeconds` controls the **refresh cadence**, not the start time. + The default is 6 seconds. diff --git a/docs/docs.json b/docs/docs.json index e29cfd9c3..ca8a54b7d 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -545,13 +545,16 @@ "concepts/agent-loop", "concepts/agent-workspace", "concepts/multi-agent", + "concepts/compaction", "concepts/session", + "concepts/session-pruning", "concepts/sessions", "concepts/session-tool", "concepts/presence", "concepts/provider-routing", "concepts/groups", "concepts/group-messages", + "concepts/typing-indicators", "concepts/queue", "concepts/models", "concepts/model-failover", @@ -644,7 +647,17 @@ { "group": "Platforms", "pages": [ + "platforms", "platforms/macos", + "platforms/ios", + "platforms/android", + "platforms/windows", + "platforms/linux" + ] + }, + { + "group": "macOS Companion App", + "pages": [ "platforms/mac/dev-setup", "platforms/mac/menu-bar", "platforms/mac/voicewake", @@ -662,11 +675,7 @@ "platforms/mac/bun", "platforms/mac/xpc", "platforms/mac/skills", - "platforms/mac/peekaboo", - "platforms/ios", - "platforms/android", - "platforms/windows", - "platforms/linux" + "platforms/mac/peekaboo" ] }, { diff --git a/docs/gateway/background-process.md b/docs/gateway/background-process.md index 49fdc7559..3f97c844b 100644 --- a/docs/gateway/background-process.md +++ b/docs/gateway/background-process.md @@ -24,6 +24,7 @@ Behavior: - Foreground runs return output directly. - When backgrounded (explicit or timeout), the tool returns `status: "running"` + `sessionId` and a short tail. - Output is kept in memory until the session is polled or cleared. +- If the `process` tool is disallowed, `bash` runs synchronously and ignores `yieldMs`/`background`. Environment overrides: - `PI_BASH_YIELD_MS`: default yield (ms) @@ -50,6 +51,7 @@ Notes: - Only backgrounded sessions are listed/persisted in memory. - Sessions are lost on process restart (no disk persistence). - Session logs are only saved to chat history if you run `process poll/log` and the tool result is recorded. +- `process` is scoped per agent; it only sees sessions started by that agent. - `process list` includes a derived `name` (command verb + target) for quick scans. - `process log` uses line-based `offset`/`limit` (omit `offset` to grab the last N lines). diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 0b39a9580..7209d2967 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -340,7 +340,7 @@ Run multiple isolated agents (separate workspace, `agentDir`, sessions) inside o - `scope`: `"session"` | `"agent"` | `"shared"` - `workspaceRoot`: custom sandbox workspace root - `tools`: per-agent sandbox tool policy (deny wins; overrides `agent.sandbox.tools`) - - `tools`: per-agent tool restrictions (applied before sandbox tool policy). + - `tools`: per-agent tool restrictions (overrides `agent.tools`; applied before sandbox tool policy). - `allow`: array of allowed tool names - `deny`: array of denied tool names (deny wins) - `routing.bindings[]`: routes inbound messages to an `agentId`. @@ -359,6 +359,75 @@ Deterministic match order: Within each match tier, the first matching entry in `routing.bindings` wins. +#### Per-agent access profiles (multi-agent) + +Each agent can carry its own sandbox + tool policy. Use this to mix access +levels in one gateway: +- **Full access** (personal agent) +- **Read-only** tools + workspace +- **No filesystem access** (messaging/session tools only) + +See [Multi-Agent Sandbox & Tools](/multi-agent-sandbox-tools) for precedence and +additional examples. + +Full access (no sandbox): +```json5 +{ + routing: { + agents: { + personal: { + workspace: "~/clawd-personal", + sandbox: { mode: "off" } + } + } + } +} +``` + +Read-only tools + read-only workspace: +```json5 +{ + routing: { + agents: { + family: { + workspace: "~/clawd-family", + sandbox: { + mode: "all", + scope: "agent", + workspaceAccess: "ro" + }, + tools: { + allow: ["read", "sessions_list", "sessions_history", "sessions_send", "sessions_spawn"], + deny: ["write", "edit", "bash", "process", "browser"] + } + } + } + } +} +``` + +No filesystem access (messaging/session tools enabled): +```json5 +{ + routing: { + agents: { + public: { + workspace: "~/clawd-public", + sandbox: { + mode: "all", + scope: "agent", + workspaceAccess: "none" + }, + tools: { + allow: ["sessions_list", "sessions_history", "sessions_send", "sessions_spawn", "whatsapp", "telegram", "slack", "discord", "gateway"], + deny: ["read", "write", "edit", "bash", "process", "browser", "canvas", "nodes", "cron", "gateway", "image"] + } + } + } + } +} +``` + Example: two WhatsApp accounts → two agents: ```json5 @@ -493,6 +562,12 @@ Set `telegram.enabled: false` to disable automatic startup. streamMode: "partial", // off | partial | block (draft streaming) actions: { reactions: true }, // tool action gates (false disables) mediaMaxMb: 5, + retry: { // outbound retry policy + attempts: 3, + minDelayMs: 400, + maxDelayMs: 30000, + jitter: 0.1 + }, proxy: "socks5://localhost:9050", webhookUrl: "https://example.com/telegram-webhook", webhookSecret: "secret", @@ -505,6 +580,7 @@ Draft streaming notes: - Uses Telegram `sendMessageDraft` (draft bubble, not a real message). - Requires **private chat topics** (message_thread_id in DMs; bot has topics enabled). - `/reasoning stream` streams reasoning into the draft, then sends the final answer. +Retry policy defaults and behavior are documented in [Retry policy](/concepts/retry). ### `discord` (bot transport) @@ -559,7 +635,13 @@ Configure the Discord bot by setting the bot token and optional gating: } } }, - historyLimit: 20 // include last N guild messages as context + historyLimit: 20, // include last N guild messages as context + retry: { // outbound retry policy + attempts: 3, + minDelayMs: 500, + maxDelayMs: 30000, + jitter: 0.1 + } } } ``` @@ -571,6 +653,7 @@ Reaction notification modes: - `own`: reactions on the bot's own messages (default). - `all`: all reactions on all messages. - `allowlist`: reactions from `guilds..users` on all messages (empty list disables). +Retry policy defaults and behavior are documented in [Retry policy](/concepts/retry). ### `slack` (socket mode) @@ -813,6 +896,88 @@ If you configure the same alias name (case-insensitive) yourself, your value win } ``` +#### `agent.contextPruning` (opt-in tool-result pruning) + +`agent.contextPruning` prunes **old tool results** from the in-memory context right before a request is sent to the LLM. +It does **not** modify the session history on disk (`*.jsonl` remains complete). + +This is intended to reduce token usage for chatty agents that accumulate large tool outputs over time. + +High level: +- Never touches user/assistant messages. +- Protects the last `keepLastAssistants` assistant messages (no tool results after that point are pruned). +- Protects the bootstrap prefix (nothing before the first user message is pruned). +- Modes: + - `adaptive`: soft-trims oversized tool results (keep head/tail) when the estimated context ratio crosses `softTrimRatio`. + Then hard-clears the oldest eligible tool results when the estimated context ratio crosses `hardClearRatio` **and** + there’s enough prunable tool-result bulk (`minPrunableToolChars`). + - `aggressive`: always replaces eligible tool results before the cutoff with the `hardClear.placeholder` (no ratio checks). + +Soft vs hard pruning (what changes in the context sent to the LLM): +- **Soft-trim**: only for *oversized* tool results. Keeps the beginning + end and inserts `...` in the middle. + - Before: `toolResult("…very long output…")` + - After: `toolResult("HEAD…\n...\n…TAIL\n\n[Tool result trimmed: …]")` +- **Hard-clear**: replaces the entire tool result with the placeholder. + - Before: `toolResult("…very long output…")` + - After: `toolResult("[Old tool result content cleared]")` + +Notes / current limitations: +- Tool results containing **image blocks are skipped** (never trimmed/cleared) right now. +- The estimated “context ratio” is based on **characters** (approximate), not exact tokens. +- If the session doesn’t contain at least `keepLastAssistants` assistant messages yet, pruning is skipped. +- In `aggressive` mode, `hardClear.enabled` is ignored (eligible tool results are always replaced with `hardClear.placeholder`). + +Example (minimal): +```json5 +{ + agent: { + contextPruning: { + mode: "adaptive" + } + } +} +``` + +Defaults (when `mode` is `"adaptive"` or `"aggressive"`): +- `keepLastAssistants`: `3` +- `softTrimRatio`: `0.3` (adaptive only) +- `hardClearRatio`: `0.5` (adaptive only) +- `minPrunableToolChars`: `50000` (adaptive only) +- `softTrim`: `{ maxChars: 4000, headChars: 1500, tailChars: 1500 }` (adaptive only) +- `hardClear`: `{ enabled: true, placeholder: "[Old tool result content cleared]" }` + +Example (aggressive, minimal): +```json5 +{ + agent: { + contextPruning: { + mode: "aggressive" + } + } +} +``` + +Example (adaptive tuned): +```json5 +{ + agent: { + contextPruning: { + mode: "adaptive", + keepLastAssistants: 3, + softTrimRatio: 0.3, + hardClearRatio: 0.5, + minPrunableToolChars: 50000, + softTrim: { maxChars: 4000, headChars: 1500, tailChars: 1500 }, + hardClear: { enabled: true, placeholder: "[Old tool result content cleared]" }, + // Optional: restrict pruning to specific tools (deny wins; supports "*" wildcards) + tools: { deny: ["browser", "canvas"] }, + } + } +} +``` + +See [/concepts/session-pruning](/concepts/session-pruning) for behavior details. + Block streaming: - `agent.blockStreamingDefault`: `"on"`/`"off"` (default on). - `agent.blockStreamingBreak`: `"text_end"` or `"message_end"` (default: text_end). @@ -828,6 +993,14 @@ Block streaming: ``` See [/concepts/streaming](/concepts/streaming) for behavior + chunking details. +Typing indicators: +- `agent.typingMode`: `"never" | "instant" | "thinking" | "message"`. Defaults to + `instant` for direct chats / mentions and `message` for unmentioned group chats. +- `session.typingMode`: per-session override for the mode. +- `agent.typingIntervalSeconds`: how often the typing signal is refreshed (default: 6s). +- `session.typingIntervalSeconds`: per-session override for the refresh interval. +See [/concepts/typing-indicators](/concepts/typing-indicators) for behavior details. + `agent.model.primary` should be set as `provider/model` (e.g. `anthropic/claude-opus-4-5`). Aliases come from `agent.models.*.alias` (e.g. `Opus`). If you omit the provider, CLAWDBOT currently assumes `anthropic` as a temporary diff --git a/docs/gateway/doctor.md b/docs/gateway/doctor.md index 38e9ee334..4075b88a3 100644 --- a/docs/gateway/doctor.md +++ b/docs/gateway/doctor.md @@ -15,6 +15,7 @@ read_when: - Migrates legacy `~/.clawdis/clawdis.json` when no Clawdbot config exists. - Checks sandbox Docker images when sandboxing is enabled (offers to build or switch to legacy names). - Detects legacy Clawdis services (launchd/systemd; legacy schtasks for native Windows) and offers to migrate them. +- Detects other gateway-like services and prints cleanup hints (optional deep scan for system services). - On Linux, checks if systemd user lingering is enabled and can enable it (required to keep the Gateway alive after logout). - Migrates legacy on-disk state layouts (sessions, agentDir, provider auth dirs) into the current per-agent/per-account structure. @@ -70,6 +71,12 @@ clawdbot doctor --non-interactive Run without prompts and only apply safe migrations (config normalization + on-disk state moves). Skips restart/service/sandbox actions that require human confirmation. +```bash +clawdbot doctor --deep +``` + +Scan system services for extra gateway installs (launchd/systemd/schtasks). + If you want to review changes before writing, open the config file first: ```bash diff --git a/docs/gateway/index.md b/docs/gateway/index.md index f025dff31..416ee4682 100644 --- a/docs/gateway/index.md +++ b/docs/gateway/index.md @@ -157,6 +157,25 @@ See also: [`docs/presence.md`](/concepts/presence) for how presence is produced/ - On failure, launchd restarts; fatal misconfig should keep exiting so the operator notices. - LaunchAgents are per-user and require a logged-in session; for headless setups use a custom LaunchDaemon (not shipped). +## Daemon management (CLI) + +Use the CLI daemon manager for install/start/stop/restart/status: + +```bash +clawdbot daemon status +clawdbot daemon install +clawdbot daemon stop +clawdbot daemon restart +``` + +Notes: +- `daemon status` probes the Gateway RPC by default (same URL/token defaults as `gateway status`). +- `daemon status --deep` adds system-level scans (LaunchDaemons/system units). +- `gateway install|uninstall|start|stop|restart` remain supported as aliases; `daemon` is the dedicated manager. +- `gateway daemon status` is an alias for `clawdbot daemon status`. +- If other gateway-like services are detected, the CLI warns. We recommend **one gateway per machine**; one gateway can host multiple agents. + - Cleanup: `clawdbot daemon uninstall` (current service) and `clawdbot doctor` (legacy migrations). + Bundled mac app: - Clawdbot.app can bundle a bun-compiled gateway binary and install a per-user LaunchAgent labeled `com.clawdbot.gateway`. - To stop it cleanly, use `clawdbot gateway stop` (or `launchctl bootout gui/$UID/com.clawdbot.gateway`). diff --git a/docs/gateway/security.md b/docs/gateway/security.md index d12dde53b..e09347746 100644 --- a/docs/gateway/security.md +++ b/docs/gateway/security.md @@ -128,12 +128,13 @@ Consider running your AI on a separate phone number from your personal one: - Personal number: Your conversations stay private - Bot number: AI handles these, with appropriate boundaries -### 4. Read-Only Mode (Future) +### 4. Read-Only Mode (Today, via sandbox + tools) -We're considering a `readOnlyMode` flag that prevents the AI from: -- Writing files outside a sandbox -- Executing shell commands -- Sending messages +You can already build a read-only profile by combining: +- `sandbox.workspaceAccess: "ro"` (or `"none"` for no workspace access) +- tool allow/deny lists that block `write`, `edit`, `bash`, `process`, etc. + +We may add a single `readOnlyMode` flag later to simplify this configuration. ## Sandboxing (recommended) @@ -153,6 +154,79 @@ Also consider agent workspace access inside the sandbox: Important: `agent.elevated` is an explicit escape hatch that runs bash on the host. Keep `agent.elevated.allowFrom` tight and don’t enable it for strangers. +## Per-agent access profiles (multi-agent) + +With multi-agent routing, each agent can have its own sandbox + tool policy: +use this to give **full access**, **read-only**, or **no access** per agent. +See [Multi-Agent Sandbox & Tools](/multi-agent-sandbox-tools) for full details +and precedence rules. + +Common use cases: +- Personal agent: full access, no sandbox +- Family/work agent: sandboxed + read-only tools +- Public agent: sandboxed + no filesystem/shell tools + +### Example: full access (no sandbox) + +```json5 +{ + routing: { + agents: { + personal: { + workspace: "~/clawd-personal", + sandbox: { mode: "off" } + } + } + } +} +``` + +### Example: read-only tools + read-only workspace + +```json5 +{ + routing: { + agents: { + family: { + workspace: "~/clawd-family", + sandbox: { + mode: "all", + scope: "agent", + workspaceAccess: "ro" + }, + tools: { + allow: ["read"], + deny: ["write", "edit", "bash", "process", "browser"] + } + } + } + } +} +``` + +### Example: no filesystem/shell access (provider messaging allowed) + +```json5 +{ + routing: { + agents: { + public: { + workspace: "~/clawd-public", + sandbox: { + mode: "all", + scope: "agent", + workspaceAccess: "none" + }, + tools: { + allow: ["sessions_list", "sessions_history", "sessions_send", "sessions_spawn", "whatsapp", "telegram", "slack", "discord", "gateway"], + deny: ["read", "write", "edit", "bash", "process", "browser", "canvas", "nodes", "cron", "gateway", "image"] + } + } + } + } +} +``` + ## What to Tell Your AI Include security guidelines in your agent's system prompt: diff --git a/docs/install/docker.md b/docs/install/docker.md index ed06679e9..0f3879de4 100644 --- a/docs/install/docker.md +++ b/docs/install/docker.md @@ -86,6 +86,18 @@ container. The gateway stays on your host, but the tool execution is isolated: Warning: `scope: "shared"` disables cross-session isolation. All sessions share one container and one workspace. +### Per-agent sandbox profiles (multi-agent) + +If you use multi-agent routing, each agent can override sandbox + tool settings: +`routing.agents[id].sandbox` and `routing.agents[id].tools`. This lets you run +mixed access levels in one gateway: +- Full access (personal agent) +- Read-only tools + read-only workspace (family/work agent) +- No filesystem/shell tools (public agent) + +See [Multi-Agent Sandbox & Tools](/multi-agent-sandbox-tools) for examples, +precedence, and troubleshooting. + ### Default behavior - Image: `clawdbot-sandbox:bookworm-slim` diff --git a/docs/install/updating.md b/docs/install/updating.md index 11846ccbc..f6e045c4e 100644 --- a/docs/install/updating.md +++ b/docs/install/updating.md @@ -97,7 +97,7 @@ Runbook + exact service labels: [Gateway runbook](/gateway) Install a known-good version: ```bash -npm i -g clawdbot@2026.1.5-3 +npm i -g clawdbot@2026.1.7 ``` Then restart + re-run doctor: diff --git a/docs/platforms/android.md b/docs/platforms/android.md index 56beab345..9e274da0a 100644 --- a/docs/platforms/android.md +++ b/docs/platforms/android.md @@ -8,6 +8,15 @@ read_when: # Android App (Node) +## Support snapshot +- Role: companion node app (Android does not host the Gateway). +- Gateway required: yes (run it on macOS, Linux, or Windows via WSL2). +- Install: [Getting Started](/start/getting-started) + [Pairing](/gateway/pairing). +- Gateway: [Runbook](/gateway) + [Configuration](/gateway/configuration). + +## System control +System control (launchd/systemd) lives on the Gateway host. See [Gateway](/gateway). + ## Connection Runbook Android node app ⇄ (mDNS/NSD + TCP bridge) ⇄ **Gateway bridge** ⇄ (loopback WS) ⇄ **Gateway** diff --git a/docs/platforms/index.md b/docs/platforms/index.md new file mode 100644 index 000000000..9d388140f --- /dev/null +++ b/docs/platforms/index.md @@ -0,0 +1,40 @@ +--- +summary: "Platform support overview (Gateway + companion apps)" +read_when: + - Looking for OS support or install paths + - Deciding where to run the Gateway +--- +# Platforms + +Clawdbot core is written in TypeScript, so the CLI + Gateway run anywhere Node or Bun runs. + +Companion apps exist for macOS (menu bar app) and mobile nodes (iOS/Android). Windows and +Linux companion apps are planned, but the core Gateway is fully supported today. + +## Choose your OS + +- macOS: [macOS](/platforms/macos) +- iOS: [iOS](/platforms/ios) +- Android: [Android](/platforms/android) +- Windows: [Windows](/platforms/windows) +- Linux: [Linux](/platforms/linux) + +## Common links + +- Install guide: [Getting Started](/start/getting-started) +- Gateway runbook: [Gateway](/gateway) +- Gateway configuration: [Configuration](/gateway/configuration) +- Service status: `clawdbot daemon status` + +## Gateway service install (CLI) + +Use one of these (all supported): + +- Wizard (recommended): `clawdbot onboard --install-daemon` +- Direct: `clawdbot daemon install` (alias: `clawdbot gateway install`) +- Configure flow: `clawdbot configure` → select **Gateway daemon** +- Repair/migrate: `clawdbot doctor` (offers to install or fix the service) + +The service target depends on OS: +- macOS: LaunchAgent (`com.clawdbot.gateway`) +- Linux/WSL2: systemd user service diff --git a/docs/platforms/ios.md b/docs/platforms/ios.md index 09cb80ce4..939d5c044 100644 --- a/docs/platforms/ios.md +++ b/docs/platforms/ios.md @@ -12,6 +12,15 @@ read_when: Status: prototype implemented (internal) · Date: 2025-12-13 +## Support snapshot +- Role: companion node app (iOS does not host the Gateway). +- Gateway required: yes (run it on macOS, Linux, or Windows via WSL2). +- Install: [Getting Started](/start/getting-started) + [Pairing](/gateway/pairing). +- Gateway: [Runbook](/gateway) + [Configuration](/gateway/configuration). + +## System control +System control (launchd/systemd) lives on the Gateway host. See [Gateway](/gateway). + ## Connection Runbook This is the practical “how do I connect the iOS node” guide: diff --git a/docs/platforms/linux.md b/docs/platforms/linux.md index b5e27e4cb..78348d698 100644 --- a/docs/platforms/linux.md +++ b/docs/platforms/linux.md @@ -1,11 +1,80 @@ --- -summary: "Linux app status + contribution call" +summary: "Linux support + companion app status" read_when: - Looking for Linux companion app status - Planning platform coverage or contributions --- # Linux App -Clawdbot core is fully supported on Linux. The core is written in TypeScript, so it runs anywhere Node runs. +Clawdbot core is fully supported on Linux. The core is written in TypeScript, so it runs anywhere Node or Bun runs. We do not have a Linux companion app yet. It is planned, and we would love contributions to make it happen. + +## Install +- [Getting Started](/start/getting-started) +- [Install & updates](/install/updating) +- Optional flows: [Bun](/install/bun), [Nix](/install/nix), [Docker](/install/docker) + +## Gateway +- [Gateway runbook](/gateway) +- [Configuration](/gateway/configuration) + +## Gateway service install (CLI) + +Use one of these: + +``` +clawdbot onboard --install-daemon +``` + +Or: + +``` +clawdbot daemon install +``` + +Or: + +``` +clawdbot gateway install +``` + +Or: + +``` +clawdbot configure +``` + +Select **Gateway daemon** when prompted. + +Repair/migrate: + +``` +clawdbot doctor +``` + +## System control (systemd user unit) +Full unit example lives in the [Gateway runbook](/gateway). Minimal setup: + +Create `~/.config/systemd/user/clawdbot-gateway.service`: + +``` +[Unit] +Description=Clawdbot Gateway +After=network-online.target +Wants=network-online.target + +[Service] +ExecStart=/usr/local/bin/clawdbot gateway --port 18789 +Restart=always +RestartSec=5 + +[Install] +WantedBy=default.target +``` + +Enable it: + +``` +systemctl --user enable --now clawdbot-gateway.service +``` diff --git a/docs/platforms/macos.md b/docs/platforms/macos.md index bac5c1539..a1daa37cc 100644 --- a/docs/platforms/macos.md +++ b/docs/platforms/macos.md @@ -8,6 +8,23 @@ read_when: Author: steipete · Status: draft spec · Date: 2025-12-20 +## Support snapshot +- Core Gateway: supported (TypeScript on Node/Bun). +- Companion app: macOS menu bar app with permissions + node bridge. +- Install: [Getting Started](/start/getting-started) or [Install & updates](/install/updating). +- Gateway: [Runbook](/gateway) + [Configuration](/gateway/configuration). + +## System control (launchd) +If you run the bundled macOS app, it installs a per-user LaunchAgent labeled `com.clawdbot.gateway`. +CLI-only installs can use `clawdbot onboard --install-daemon`, `clawdbot daemon install`, or `clawdbot configure` → **Gateway daemon**. + +```bash +launchctl kickstart -k gui/$UID/com.clawdbot.gateway +launchctl bootout gui/$UID/com.clawdbot.gateway +``` + +Details: [Gateway runbook](/gateway) and [Bundled bun Gateway](/platforms/mac/bun). + ## Purpose - Single macOS menu-bar app named **Clawdbot** that: - Shows native notifications for Clawdbot/clawdbot events. diff --git a/docs/platforms/windows.md b/docs/platforms/windows.md index 67ad766c0..b97906295 100644 --- a/docs/platforms/windows.md +++ b/docs/platforms/windows.md @@ -1,5 +1,5 @@ --- -summary: "Windows (WSL2) setup + companion app status" +summary: "Windows (WSL2) support + companion app status" read_when: - Installing Clawdbot on Windows - Looking for Windows companion app status @@ -7,14 +7,55 @@ read_when: --- # Windows (WSL2) -Clawdbot runs on Windows **via WSL2** (Ubuntu recommended). WSL2 is **strongly -recommended**; native Windows installs are untested and more problematic. Use -WSL2 and follow the Linux flow inside it. +Clawdbot core is supported on Windows **via WSL2** (Ubuntu recommended). The +CLI + Gateway run inside Linux, which keeps the runtime consistent. Native +Windows installs are untested and more problematic. + +## Install +- [Getting Started](/start/getting-started) (use inside WSL) +- [Install & updates](/install/updating) +- Official WSL2 guide (Microsoft): https://learn.microsoft.com/windows/wsl/install + +## Gateway +- [Gateway runbook](/gateway) +- [Configuration](/gateway/configuration) + +## Gateway service install (CLI) + +Inside WSL2: + +``` +clawdbot onboard --install-daemon +``` + +Or: + +``` +clawdbot daemon install +``` + +Or: + +``` +clawdbot gateway install +``` + +Or: + +``` +clawdbot configure +``` + +Select **Gateway daemon** when prompted. + +Repair/migrate: + +``` +clawdbot doctor +``` ## How to install this correctly -Start here (official WSL2 guide): https://learn.microsoft.com/windows/wsl/install - ### 1) Install WSL2 + Ubuntu Open PowerShell (Admin): diff --git a/docs/providers/discord.md b/docs/providers/discord.md index b4bfaf878..4d5d652c4 100644 --- a/docs/providers/discord.md +++ b/docs/providers/discord.md @@ -5,7 +5,7 @@ read_when: --- # Discord (Bot API) -Updated: 2025-12-07 +Updated: 2026-01-07 Status: ready for DM and guild text channels via the official Discord bot gateway. @@ -122,6 +122,12 @@ Example “single server, only allow me, only allow #help”: help: { allow: true, requireMention: true } } } + }, + retry: { + attempts: 3, + minDelayMs: 500, + maxDelayMs: 30000, + jitter: 0.1 } } } @@ -154,6 +160,9 @@ Notes: - Reply context is injected when a message references another message (quoted content + ids). - Native reply threading is **off by default**; enable with `discord.replyToMode` and reply tags. +## Retry policy +Outbound Discord API calls retry on rate limits (429) using Discord `retry_after` when available, with exponential backoff and jitter. Configure via `discord.retry`. See [Retry policy](/concepts/retry). + ## Config ```json5 @@ -235,6 +244,7 @@ Ack reactions are controlled globally via `messages.ackReaction` + - `guilds..reactionNotifications`: reaction system event mode (`off`, `own`, `all`, `allowlist`). - `mediaMaxMb`: clamp inbound media saved to disk. - `historyLimit`: number of recent guild messages to include as context when replying to a mention (default 20, `0` disables). +- `retry`: retry policy for outbound Discord API calls (attempts, minDelayMs, maxDelayMs, jitter). - `actions`: per-action tool gates; omit to allow all (set `false` to disable). - `reactions` (covers react + read reactions) - `stickers`, `polls`, `permissions`, `messages`, `threads`, `pins`, `search` diff --git a/docs/providers/telegram.md b/docs/providers/telegram.md index 3772ebc81..a8722481f 100644 --- a/docs/providers/telegram.md +++ b/docs/providers/telegram.md @@ -37,6 +37,59 @@ Status: production-ready for bot DMs + groups via grammY. Long-polling by defaul - Inbound messages are normalized into the shared provider envelope with reply context and media placeholders. - Group replies require a mention by default (native @mention or `routing.groupChat.mentionPatterns`). - Replies always route back to the same Telegram chat. +- Long-polling uses grammY runner with per-chat sequencing; overall concurrency is capped by `agent.maxConcurrent`. + +## Group activation modes + +By default, the bot only responds to mentions in groups (`@botname` or patterns in `routing.groupChat.mentionPatterns`). To change this behavior: + +### Via config (recommended) + +```json5 +{ + telegram: { + groups: { + "-1001234567890": { requireMention: false } // always respond in this group + } + } +} +``` + +**Important:** Setting `telegram.groups` creates an **allowlist** - only listed groups (or `"*"`) will be accepted. + +To allow all groups with always-respond: +```json5 +{ + telegram: { + groups: { + "*": { requireMention: false } // all groups, always respond + } + } +} +``` + +To keep mention-only for all groups (default behavior): +```json5 +{ + telegram: { + groups: { + "*": { requireMention: true } // or omit groups entirely + } + } +} +``` + +### Via command (session-level) + +Send in the group: +- `/activation always` - respond to all messages +- `/activation mention` - require mentions (default) + +**Note:** Commands update session state only. For persistent behavior across restarts, use config. + +### Getting the group chat ID + +Forward any message from the group to `@userinfobot` or `@getidsbot` on Telegram to see the chat ID (negative number like `-1001234567890`). ## Topics (forum supergroups) Telegram forum topics include a `message_thread_id` per message. Clawdbot: @@ -50,15 +103,29 @@ Private topics (DM forum mode) also include `message_thread_id`. Clawdbot: - Uses the thread id for draft streaming + replies. ## Access control (DMs + groups) + +### DM access - Default: `telegram.dmPolicy = "pairing"`. Unknown senders receive a pairing code; messages are ignored until approved (codes expire after 1 hour). - Approve via: - `clawdbot pairing list --provider telegram` - `clawdbot pairing approve --provider telegram ` - Pairing is the default token exchange used for Telegram DMs. Details: [Pairing](/start/pairing) -Group gating: -- `telegram.groupPolicy = open | allowlist | disabled`. -- `telegram.groups` doubles as a group allowlist when set (include `"*"` to allow all). +### Group access + +Two independent controls: + +**1. Which groups are allowed** (group allowlist via `telegram.groups`): +- No `groups` config = all groups allowed +- With `groups` config = only listed groups or `"*"` are allowed +- Example: `"groups": { "-1001234567890": {}, "*": {} }` allows all groups + +**2. Which senders are allowed** (sender filtering via `telegram.groupPolicy`): +- `"open"` (default) = all senders in allowed groups can message +- `"allowlist"` = only senders in `telegram.groupAllowFrom` can message +- `"disabled"` = no group messages accepted at all + +Most users want: `groupPolicy: "open"` + specific groups listed in `telegram.groups` ## Long-polling vs webhook - Default: long-polling (no public URL required). @@ -96,6 +163,9 @@ Reasoning stream (Telegram only): - If `telegram.streamMode` is `off`, reasoning stream is disabled. More context: [Streaming + chunking](/concepts/streaming). +## Retry policy +Outbound Telegram API calls retry on transient network/429 errors with exponential backoff and jitter. Configure via `telegram.retry`. See [Retry policy](/concepts/retry). + ## Agent tool (reactions) - Tool: `telegram` with `react` action (`chatId`, `messageId`, `emoji`). - Reaction removal semantics: see [/tools/reactions](/tools/reactions). @@ -105,6 +175,27 @@ More context: [Streaming + chunking](/concepts/streaming). - Use a chat id (`123456789`) or a username (`@name`) as the target. - Example: `clawdbot send --provider telegram --to 123456789 "hi"`. +## Troubleshooting + +**Bot doesn't respond to non-mention messages in group:** +- Check if group is in `telegram.groups` with `requireMention: false` +- Or use `"*": { "requireMention": false }` to enable for all groups +- Test with `/activation always` command (requires config change to persist) + +**Bot not seeing group messages at all:** +- If `telegram.groups` is set, the group must be listed or use `"*"` +- Check Privacy Settings in @BotFather → "Group Privacy" should be **OFF** +- Verify bot is actually a member (not just an admin with no read access) +- Check gateway logs: `journalctl --user -u clawdbot -f` (look for "skipping group message") + +**Bot responds to mentions but not `/activation always`:** +- The `/activation` command updates session state but doesn't persist to config +- For persistent behavior, add group to `telegram.groups` with `requireMention: false` + +**Commands like `/status` don't work:** +- Make sure your Telegram user ID is authorized (via pairing or `telegram.allowFrom`) +- Commands require authorization even in groups with `groupPolicy: "open"` + ## Configuration reference (Telegram) Full configuration: [Configuration](/gateway/configuration) @@ -128,6 +219,7 @@ Provider options: - `telegram.textChunkLimit`: outbound chunk size (chars). - `telegram.streamMode`: `off | partial | block` (draft streaming). - `telegram.mediaMaxMb`: inbound/outbound media cap (MB). +- `telegram.retry`: retry policy for outbound Telegram API calls (attempts, minDelayMs, maxDelayMs, jitter). - `telegram.proxy`: proxy URL for Bot API calls (SOCKS/HTTP). - `telegram.webhookUrl`: enable webhook mode. - `telegram.webhookSecret`: webhook secret (optional). diff --git a/docs/providers/whatsapp.md b/docs/providers/whatsapp.md index 42dfb0572..a777af70f 100644 --- a/docs/providers/whatsapp.md +++ b/docs/providers/whatsapp.md @@ -61,6 +61,25 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number - Pairing: unknown senders get a pairing code (approve via `clawdbot pairing approve --provider whatsapp `; codes expire after 1 hour). - Open: requires `whatsapp.allowFrom` to include `"*"`. - Self messages are always allowed; “self-chat mode” still requires `whatsapp.allowFrom` to include your own number. + +### Same-phone mode (personal number) +If you run Clawdbot on your **personal WhatsApp number**, set: + +```json +{ + "whatsapp": { + "selfChatMode": true + } +} +``` + +Behavior: +- Suppresses pairing replies for **outbound DMs** (prevents spamming contacts). +- Inbound unknown senders still follow `whatsapp.dmPolicy`. + +Recommended for personal numbers: +- Set `whatsapp.dmPolicy="allowlist"` and add your number to `whatsapp.allowFrom`. +- Set `messages.responsePrefix` (for example, `[clawdbot]`) so replies are clearly labeled. - **Group policy**: `whatsapp.groupPolicy` controls group handling (`open|disabled|allowlist`). - `allowlist` uses `whatsapp.groupAllowFrom` (fallback: explicit `whatsapp.allowFrom`). - **Self-chat mode**: avoids auto read receipts and ignores mention JIDs. @@ -139,6 +158,7 @@ WhatsApp requires a real mobile number for verification. VoIP and virtual number ## Config quick map - `whatsapp.dmPolicy` (DM policy: pairing/allowlist/open/disabled). +- `whatsapp.selfChatMode` (same-phone setup; suppress pairing replies for outbound DMs). - `whatsapp.allowFrom` (DM allowlist). - `whatsapp.accounts..*` (per-account settings + optional `authDir`). - `whatsapp.groupAllowFrom` (group sender allowlist). diff --git a/docs/start/faq.md b/docs/start/faq.md index ad16dd0e7..e2849ad02 100644 --- a/docs/start/faq.md +++ b/docs/start/faq.md @@ -337,7 +337,7 @@ See [Groups](/concepts/groups) for details. ### How much context can Clawdbot handle? -Context window depends on the model. Clawdbot uses **autocompaction** — older conversation gets summarized to stay under the limit. +Context window depends on the model. Clawdbot uses **autocompaction** — older conversation gets summarized to stay under the limit. See [/concepts/compaction](/concepts/compaction). Practical tips: - Keep `AGENTS.md` focused, not bloated. diff --git a/docs/start/hubs.md b/docs/start/hubs.md index 77b943b47..58b9209b7 100644 --- a/docs/start/hubs.md +++ b/docs/start/hubs.md @@ -36,8 +36,10 @@ Use these hubs to discover every page, including deep dives and reference docs t - [Agent loop](https://docs.clawd.bot/concepts/agent-loop) - [Streaming + chunking](/concepts/streaming) - [Multi-agent routing](https://docs.clawd.bot/concepts/multi-agent) +- [Compaction](https://docs.clawd.bot/concepts/compaction) - [Sessions](https://docs.clawd.bot/concepts/session) - [Sessions (alias)](https://docs.clawd.bot/concepts/sessions) +- [Session pruning](https://docs.clawd.bot/concepts/session-pruning) - [Session tools](https://docs.clawd.bot/concepts/session-tool) - [Queue](https://docs.clawd.bot/concepts/queue) - [Slash commands](https://docs.clawd.bot/tools/slash-commands) @@ -112,7 +114,16 @@ Use these hubs to discover every page, including deep dives and reference docs t ## Platforms -- [macOS app overview](https://docs.clawd.bot/platforms/macos) +- [Platforms overview](https://docs.clawd.bot/platforms) +- [macOS](https://docs.clawd.bot/platforms/macos) +- [iOS](https://docs.clawd.bot/platforms/ios) +- [Android](https://docs.clawd.bot/platforms/android) +- [Windows (WSL2)](https://docs.clawd.bot/platforms/windows) +- [Linux](https://docs.clawd.bot/platforms/linux) +- [Web surfaces](https://docs.clawd.bot/web) + +## macOS companion app (internals) + - [macOS dev setup](https://docs.clawd.bot/platforms/mac/dev-setup) - [macOS menu bar](https://docs.clawd.bot/platforms/mac/menu-bar) - [macOS voice wake](https://docs.clawd.bot/platforms/mac/voicewake) @@ -131,11 +142,6 @@ Use these hubs to discover every page, including deep dives and reference docs t - [macOS XPC](https://docs.clawd.bot/platforms/mac/xpc) - [macOS skills](https://docs.clawd.bot/platforms/mac/skills) - [macOS Peekaboo plan](https://docs.clawd.bot/platforms/mac/peekaboo) -- [iOS node](https://docs.clawd.bot/platforms/ios) -- [Android node](https://docs.clawd.bot/platforms/android) -- [Windows (WSL2)](https://docs.clawd.bot/platforms/windows) -- [Linux app](https://docs.clawd.bot/platforms/linux) -- [Web surfaces](https://docs.clawd.bot/web) ## Workspace + templates diff --git a/docs/start/showcase.md b/docs/start/showcase.md index 1e64dc7aa..3be0cb96b 100644 --- a/docs/start/showcase.md +++ b/docs/start/showcase.md @@ -11,9 +11,11 @@ Real projects from the community. Highlights from #showcase (Jan 2–5, 2026). - **Grocery autopilot (Picnic)** — Skill built around an unofficial Picnic API client. Pulls order history, infers preferred brands, maps recipes to cart, completes order in minutes. https://github.com/timkrase/clawdis-picnic-skill - **Grocery autopilot (Picnic, alt)** — Another Picnic-based skill built via the `picnic-api` package. https://github.com/MRVDH/picnic-api - **German rail planning** — Go CLI for Deutsche Bahn; skill picks best connections given time windows and preferences. https://github.com/timkrase/dbrest-cli + https://github.com/timkrase/clawdis-skills/tree/main/db-bahn +- **padel-cli** — Playtomic availability + booking CLI with a Clawdbot plugin output. github.com/joshp123/padel-clipadel-cli availability screenshot - **Accounting intake** — Collect PDFs from email, prep for tax consultant (monthly accounting batch). (No link shared.) ## Knowledge & memory systems +- **xuezh** — Chinese learning engine + Clawdbot skill for pronunciation feedback and study flows. github.com/joshp123/xuezhxuezh pronunciation feedback in Clawdbot - **WhatsApp memory vault** — Ingests full exports, transcribes 1k+ voice notes, cross‑checks with git logs, outputs linked MD reports + ongoing indexing. (No link shared.) - **Karakeep semantic search** — Sidecar adds vector search to Karakeep bookmarks (Qdrant + OpenAI/Ollama), includes Clawdis skill. https://github.com/jamesbrooksco/karakeep-semantic-search - **Inside‑Out‑2 style memory** — Separate memory manager app turns session files into memories → beliefs → self model. (No link shared.) @@ -26,11 +28,12 @@ Real projects from the community. Highlights from #showcase (Jan 2–5, 2026). ## Infrastructure & deployment - **Home Assistant OS gateway add‑on** — Clawdbot gateway running on HA OS (Raspberry Pi), with SSH tunnel support + persistent state in /config. https://github.com/ngutman/clawdbot-ha-addon - **Home Assistant skill** — Control/automate HA via ClawdHub. https://clawdhub.com/skills/homeassistant -- **Nix packaging** — Batteries‑included nixified clawdis config. https://github.com/joshp123/nix-clawdis +- **Nix packaging** — Batteries‑included nixified clawdbot config. https://github.com/clawdbot/nix-clawdbot - **CalDAV skill** — khal/vdirsyncer based calendar skill. ClawdHub: caldav-calendar → https://clawdhub.com/skills/caldav-calendar ## Home + hardware -- **Roborock integration** — Plugin for robot vacuum control. https://github.com/joshp123/gohome/tree/main/plugins/roborock +- **gohome** — Nix-native home automation with Clawdbot as the interface, plus Grafana dashboards. github.com/joshp123/gohomeGoHome Grafana dashboard +- **Roborock integration** — Plugin for robot vacuum control. github.com/joshp123/gohome/tree/main/plugins/roborockGoHome Roborock status screenshot ## Community builds (non‑Clawdis but made with/around it) - **StarSwap marketplace** — Full astronomy gear marketplace. https://star-swap.com/ diff --git a/docs/tools/bash.md b/docs/tools/bash.md index 75211c2d9..73106a1e5 100644 --- a/docs/tools/bash.md +++ b/docs/tools/bash.md @@ -8,6 +8,8 @@ read_when: # Bash tool Run shell commands in the workspace. Supports foreground + background execution via `process`. +If `process` is disallowed, `bash` runs synchronously and ignores `yieldMs`/`background`. +Background sessions are scoped per agent; `process` only sees sessions from the same agent. ## Parameters diff --git a/docs/tools/index.md b/docs/tools/index.md index c6db325cc..6e9d14daa 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -42,6 +42,7 @@ Core parameters: Notes: - Returns `status: "running"` with a `sessionId` when backgrounded. - Use `process` to poll/log/write/kill/clear background sessions. +- If `process` is disallowed, `bash` runs synchronously and ignores `yieldMs`/`background`. ### `process` Manage background bash sessions. @@ -52,6 +53,7 @@ Core actions: Notes: - `poll` returns new output and exit status when complete. - `log` supports line-based `offset`/`limit` (omit `offset` to grab the last N lines). +- `process` is scoped per agent; sessions from other agents are not visible. ### `browser` Control the dedicated clawd browser. @@ -157,13 +159,14 @@ Core parameters: - `sessions_list`: `kinds?`, `limit?`, `activeMinutes?`, `messageLimit?` (0 = none) - `sessions_history`: `sessionKey`, `limit?`, `includeTools?` - `sessions_send`: `sessionKey`, `message`, `timeoutSeconds?` (0 = fire-and-forget) -- `sessions_spawn`: `task`, `label?`, `model?`, `timeoutSeconds?`, `cleanup?` +- `sessions_spawn`: `task`, `label?`, `model?`, `runTimeoutSeconds?`, `cleanup?` Notes: - `main` is the canonical direct-chat key; global/unknown are hidden. - `messageLimit > 0` fetches last N messages per session (tool messages filtered). - `sessions_send` waits for final completion when `timeoutSeconds > 0`. - `sessions_spawn` starts a sub-agent run and posts an announce reply back to the requester chat. +- `sessions_spawn` is non-blocking and returns `status: "accepted"` immediately. - `sessions_send` runs a reply‑back ping‑pong (reply `REPLY_SKIP` to stop; max turns via `session.agentToAgent.maxPingPongTurns`, 0–5). - After the ping‑pong, the target agent runs an **announce step**; reply `ANNOUNCE_SKIP` to suppress the announcement. diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md index 67633b9c5..58af62b71 100644 --- a/docs/tools/slash-commands.md +++ b/docs/tools/slash-commands.md @@ -42,11 +42,11 @@ Text + native (when enabled): - `/verbose on|off` (alias: `/v`) - `/reasoning on|off|stream` (alias: `/reason`; `stream` = Telegram draft only) - `/elevated on|off` (alias: `/elev`) -- `/model ` +- `/model ` (or `/` from `agent.models.*.alias`) - `/queue ` (plus options like `debounce:2s cap:25 drop:summarize`) Text-only: -- `/compact [instructions]` +- `/compact [instructions]` (see [/concepts/compaction](/concepts/compaction)) ## Surface notes diff --git a/docs/tools/subagents.md b/docs/tools/subagents.md index 68a88360d..c2d25e389 100644 --- a/docs/tools/subagents.md +++ b/docs/tools/subagents.md @@ -7,7 +7,7 @@ read_when: # Sub-agents -Sub-agents are background agent runs spawned from an existing agent run. They run in their own session (`agent::subagent:`) and, when finished, **announce** their result back to the requester chat provider. +Sub-agents are background agent runs spawned from an existing agent run. They run in their own session (`agent::subagent:`) and, when finished, **announce** their result back to the requester chat provider. Primary goals: - Parallelize “research / long task / slow tool” work without blocking the main run. @@ -25,7 +25,7 @@ Tool params: - `task` (required) - `label?` (optional) - `model?` (optional; overrides the sub-agent model; invalid values are skipped and the sub-agent runs on the default model with a warning in the tool result) -- `timeoutSeconds?` (optional; omit for long-running jobs; when set, Clawdbot waits up to N seconds and aborts the sub-agent if it is still running) +- `runTimeoutSeconds?` (default `0`; when set, the sub-agent run is aborted after N seconds) - `cleanup?` (`delete|keep`, default `keep`) Auto-archive: @@ -33,7 +33,7 @@ Auto-archive: - Archive uses `sessions.delete` and renames the transcript to `*.deleted.` (same folder). - `cleanup: "delete"` archives immediately after announce (still keeps the transcript via rename). - Auto-archive is best-effort; pending timers are lost if the gateway restarts. -- Timeouts do **not** auto-archive; they only stop the run. The session remains until auto-archive. +- `runTimeoutSeconds` does **not** auto-archive; it only stops the run. The session remains until auto-archive. ## Announce @@ -84,3 +84,4 @@ Sub-agents use a dedicated in-process queue lane: - Sub-agent announce is **best-effort**. If the gateway restarts, pending “announce back” work is lost. - Sub-agents still share the same gateway process resources; treat `maxConcurrent` as a safety valve. +- `sessions_spawn` is always non-blocking: it returns `{ status: "accepted", runId, childSessionKey }` immediately. diff --git a/package.json b/package.json index 26b097c81..b21ad573b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "clawdbot", - "version": "2026.1.5-3", + "version": "2026.1.7", "description": "WhatsApp gateway CLI (Baileys web) with Pi RPC agent", "type": "module", "main": "dist/index.js", @@ -85,6 +85,7 @@ "dependencies": { "@buape/carbon": "0.0.0-beta-20260107085330", "@clack/prompts": "^0.11.0", + "@grammyjs/runner": "^2.0.3", "@grammyjs/transformer-throttler": "^1.2.1", "@homebridge/ciao": "^1.3.4", "@mariozechner/pi-agent-core": "^0.37.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 53505f5f1..14f509a45 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -28,6 +28,9 @@ importers: '@clack/prompts': specifier: ^0.11.0 version: 0.11.0 + '@grammyjs/runner': + specifier: ^2.0.3 + version: 2.0.3(grammy@1.39.2) '@grammyjs/transformer-throttler': specifier: ^1.2.1 version: 1.2.1(grammy@1.39.2) @@ -591,6 +594,12 @@ packages: '@modelcontextprotocol/sdk': optional: true + '@grammyjs/runner@2.0.3': + resolution: {integrity: sha512-nckmTs1dPWfVQteK9cxqxzE+0m1VRvluLWB8UgFzsjg62w3qthPJt0TYtJBEdG7OedvfQq4vnFAyE6iaMkR42A==} + engines: {node: '>=12.20.0 || >=14.13.1'} + peerDependencies: + grammy: ^1.13.1 + '@grammyjs/transformer-throttler@1.2.1': resolution: {integrity: sha512-CpWB0F3rJdUiKsq7826QhQsxbZi4wqfz1ccKX+fr+AOC+o8K7ZvS+wqX0suSu1QCsyUq2MDpNiKhyL2ZOJUS4w==} engines: {node: ^12.20.0 || >=14.13.1} @@ -3411,6 +3420,11 @@ snapshots: - supports-color - utf-8-validate + '@grammyjs/runner@2.0.3(grammy@1.39.2)': + dependencies: + abort-controller: 3.0.0 + grammy: 1.39.2 + '@grammyjs/transformer-throttler@1.2.1(grammy@1.39.2)': dependencies: bottleneck: 2.19.5 diff --git a/showcase.md b/showcase.md index 91ec938ce..7a57e6f49 100644 --- a/showcase.md +++ b/showcase.md @@ -2,6 +2,12 @@ Highlights from #showcase (Jan 2–5, 2026). Curated for “wow” factor + concrete links. +## Clawdhub projects (formerly Clawdis) +- **xuezh** — Chinese learning engine + Clawdbot skill for pronunciation feedback and study flows. github.com/joshp123/xuezhxuezh pronunciation feedback in Clawdbot +- **gohome** — Nix-native home automation with Clawdbot as the interface, plus Grafana dashboards. github.com/joshp123/gohomeGoHome Grafana dashboard +- **Roborock skill for GoHome** — Vacuum control plugin with gRPC actions + metrics. github.com/joshp123/gohome/tree/main/plugins/roborockGoHome Roborock status screenshot +- **padel-cli** — Playtomic availability + booking CLI with a Clawdbot plugin output. github.com/joshp123/padel-clipadel-cli availability screenshot + ## Automation & real-world outcomes - **Grocery autopilot (Picnic)** — Skill built around an unofficial Picnic API client. Pulls order history, infers preferred brands, maps recipes to cart, completes order in minutes. https://github.com/timkrase/clawdis-picnic-skill - **Grocery autopilot (Picnic, alt)** — Another Picnic-based skill built via the `picnic-api` package. https://github.com/MRVDH/picnic-api diff --git a/skills/1password/SKILL.md b/skills/1password/SKILL.md index 7aea6b8c1..7bac1be06 100644 --- a/skills/1password/SKILL.md +++ b/skills/1password/SKILL.md @@ -19,26 +19,29 @@ Follow the official CLI get-started steps. Don't guess install commands. 1. Check OS + shell. 2. Verify CLI present: `op --version`. 3. Confirm desktop app integration is enabled (per get-started) and the app is unlocked. -4. Sign in / authorize this terminal: `op signin` (expect an app prompt). -5. If multiple accounts: use `--account` or `OP_ACCOUNT`. -6. Verify access: `op whoami` or `op account list`. +4. REQUIRED: create a fresh tmux session for all `op` commands (no direct `op` calls outside tmux). +5. Sign in / authorize inside tmux: `op signin` (expect app prompt). +6. Verify access inside tmux: `op whoami` (must succeed before any secret read). +7. If multiple accounts: use `--account` or `OP_ACCOUNT`. -## Avoid repeated auth prompts (tmux) +## REQUIRED tmux session (T-Max) -The bash tool uses a fresh TTY per command, so app integration may prompt every time. To reuse authorization, run multiple `op` commands inside a single tmux session. +The shell tool uses a fresh TTY per command. To avoid re-prompts and failures, always run `op` inside a dedicated tmux session with a fresh socket/session name. -Example (see `tmux` skill for socket conventions): +Example (see `tmux` skill for socket conventions, do not reuse old session names): ```bash SOCKET_DIR="${CLAWDBOT_TMUX_SOCKET_DIR:-${TMPDIR:-/tmp}/clawdbot-tmux-sockets}" mkdir -p "$SOCKET_DIR" -SOCKET="$SOCKET_DIR/clawdbot.sock" -SESSION=op-auth +SOCKET="$SOCKET_DIR/clawdbot-op.sock" +SESSION="op-auth-$(date +%Y%m%d-%H%M%S)" tmux -S "$SOCKET" new -d -s "$SESSION" -n shell tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- "op signin --account my.1password.com" Enter +tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- "op whoami" Enter tmux -S "$SOCKET" send-keys -t "$SESSION":0.0 -- "op vault list" Enter tmux -S "$SOCKET" capture-pane -p -J -t "$SESSION":0.0 -S -200 +tmux -S "$SOCKET" kill-session -t "$SESSION" ``` ## Guardrails @@ -46,4 +49,5 @@ tmux -S "$SOCKET" capture-pane -p -J -t "$SESSION":0.0 -S -200 - Never paste secrets into logs, chat, or code. - Prefer `op run` / `op inject` over writing secrets to disk. - If sign-in without app integration is needed, use `op account add`. -- If a command returns "account is not signed in", re-run `op signin` and authorize in the app. +- If a command returns "account is not signed in", re-run `op signin` inside tmux and authorize in the app. +- Do not run `op` outside tmux; stop and ask if tmux is unavailable. diff --git a/src/agents/bash-process-registry.ts b/src/agents/bash-process-registry.ts index d91081b6b..71c911376 100644 --- a/src/agents/bash-process-registry.ts +++ b/src/agents/bash-process-registry.ts @@ -18,6 +18,7 @@ export type ProcessStatus = "running" | "completed" | "failed" | "killed"; export interface ProcessSession { id: string; command: string; + scopeKey?: string; child?: ChildProcessWithoutNullStreams; pid?: number; startedAt: number; @@ -38,6 +39,7 @@ export interface ProcessSession { export interface FinishedSession { id: string; command: string; + scopeKey?: string; startedAt: number; endedAt: number; cwd?: string; @@ -126,6 +128,7 @@ function moveToFinished(session: ProcessSession, status: ProcessStatus) { finishedSessions.set(session.id, { id: session.id, command: session.command, + scopeKey: session.scopeKey, startedAt: session.startedAt, endedAt: Date.now(), cwd: session.cwd, diff --git a/src/agents/bash-tools.test.ts b/src/agents/bash-tools.test.ts index 7276803bb..9214ab2c7 100644 --- a/src/agents/bash-tools.test.ts +++ b/src/agents/bash-tools.test.ts @@ -185,4 +185,36 @@ describe("bash tool backgrounding", () => { const textBlock = log.content.find((c) => c.type === "text"); expect(textBlock?.text).toBe("beta"); }); + + it("scopes process sessions by scopeKey", async () => { + const bashA = createBashTool({ backgroundMs: 10, scopeKey: "agent:alpha" }); + const processA = createProcessTool({ scopeKey: "agent:alpha" }); + const bashB = createBashTool({ backgroundMs: 10, scopeKey: "agent:beta" }); + const processB = createProcessTool({ scopeKey: "agent:beta" }); + + const resultA = await bashA.execute("call1", { + command: 'node -e "setTimeout(() => {}, 50)"', + background: true, + }); + const resultB = await bashB.execute("call2", { + command: 'node -e "setTimeout(() => {}, 50)"', + background: true, + }); + + const sessionA = (resultA.details as { sessionId: string }).sessionId; + const sessionB = (resultB.details as { sessionId: string }).sessionId; + + const listA = await processA.execute("call3", { action: "list" }); + const sessionsA = ( + listA.details as { sessions: Array<{ sessionId: string }> } + ).sessions; + expect(sessionsA.some((s) => s.sessionId === sessionA)).toBe(true); + expect(sessionsA.some((s) => s.sessionId === sessionB)).toBe(false); + + const pollB = await processB.execute("call4", { + action: "poll", + sessionId: sessionA, + }); + expect(pollB.details.status).toBe("failed"); + }); }); diff --git a/src/agents/bash-tools.ts b/src/agents/bash-tools.ts index c380710a3..bb4aff4c5 100644 --- a/src/agents/bash-tools.ts +++ b/src/agents/bash-tools.ts @@ -39,27 +39,32 @@ const DEFAULT_PATH = process.env.PATH ?? "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; -const stringEnum = ( - values: readonly string[], - options?: Parameters[1], +// NOTE: Using Type.Unsafe with enum instead of Type.Union([Type.Literal(...)]) +// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema. +// Type.Union of literals compiles to { anyOf: [{enum:["a"]}, {enum:["b"]}, ...] } +// which is valid but not accepted. A flat enum { type: "string", enum: [...] } works. +const stringEnum = ( + values: T, + options?: { description?: string }, ) => - Type.Union( - values.map((value) => Type.Literal(value)) as [ - ReturnType, - ...ReturnType[], - ], - options, - ); + Type.Unsafe({ + type: "string", + enum: values as unknown as string[], + ...options, + }); export type BashToolDefaults = { backgroundMs?: number; timeoutSec?: number; sandbox?: BashSandboxConfig; elevated?: BashElevatedDefaults; + allowBackground?: boolean; + scopeKey?: string; }; export type ProcessToolDefaults = { cleanupMs?: number; + scopeKey?: string; }; export type BashSandboxConfig = { @@ -126,6 +131,7 @@ export function createBashTool( 10, 120_000, ); + const allowBackground = defaults?.allowBackground ?? true; const defaultTimeoutSec = typeof defaults?.timeoutSec === "number" && defaults.timeoutSec > 0 ? defaults.timeoutSec @@ -152,18 +158,27 @@ export function createBashTool( throw new Error("Provide a command to start."); } - const yieldWindow = params.background - ? 0 - : clampNumber( - params.yieldMs ?? defaultBackgroundMs, - defaultBackgroundMs, - 10, - 120_000, - ); const maxOutput = DEFAULT_MAX_OUTPUT; const startedAt = Date.now(); const sessionId = randomUUID(); const warnings: string[] = []; + const backgroundRequested = params.background === true; + const yieldRequested = typeof params.yieldMs === "number"; + if (!allowBackground && (backgroundRequested || yieldRequested)) { + warnings.push( + "Warning: background execution is disabled; running synchronously.", + ); + } + const yieldWindow = allowBackground + ? backgroundRequested + ? 0 + : clampNumber( + params.yieldMs ?? defaultBackgroundMs, + defaultBackgroundMs, + 10, + 120_000, + ) + : null; const elevatedDefaults = defaults?.elevated; const elevatedDefaultOn = elevatedDefaults?.defaultLevel === "on" && @@ -238,6 +253,7 @@ export function createBashTool( const session = { id: sessionId, command: params.command, + scopeKey: defaults?.scopeKey, child, pid: child?.pid, startedAt, @@ -351,15 +367,17 @@ export function createBashTool( resolveRunning(); }; - if (yieldWindow === 0) { - onYieldNow(); - } else { - yieldTimer = setTimeout(() => { - if (settled) return; - yielded = true; - markBackgrounded(session); - resolveRunning(); - }, yieldWindow); + if (allowBackground && yieldWindow !== null) { + if (yieldWindow === 0) { + onYieldNow(); + } else { + yieldTimer = setTimeout(() => { + if (settled) return; + yielded = true; + markBackgrounded(session); + resolveRunning(); + }, yieldWindow); + } } const handleExit = ( @@ -456,6 +474,9 @@ export function createProcessTool( if (defaults?.cleanupMs !== undefined) { setJobTtlMs(defaults.cleanupMs); } + const scopeKey = defaults?.scopeKey; + const isInScope = (session?: { scopeKey?: string } | null) => + !scopeKey || session?.scopeKey === scopeKey; return { name: "process", @@ -473,32 +494,36 @@ export function createProcessTool( }; if (params.action === "list") { - const running = listRunningSessions().map((s) => ({ - sessionId: s.id, - status: "running", - pid: s.pid ?? undefined, - startedAt: s.startedAt, - runtimeMs: Date.now() - s.startedAt, - cwd: s.cwd, - command: s.command, - name: deriveSessionName(s.command), - tail: s.tail, - truncated: s.truncated, - })); - const finished = listFinishedSessions().map((s) => ({ - sessionId: s.id, - status: s.status, - startedAt: s.startedAt, - endedAt: s.endedAt, - runtimeMs: s.endedAt - s.startedAt, - cwd: s.cwd, - command: s.command, - name: deriveSessionName(s.command), - tail: s.tail, - truncated: s.truncated, - exitCode: s.exitCode ?? undefined, - exitSignal: s.exitSignal ?? undefined, - })); + const running = listRunningSessions() + .filter((s) => isInScope(s)) + .map((s) => ({ + sessionId: s.id, + status: "running", + pid: s.pid ?? undefined, + startedAt: s.startedAt, + runtimeMs: Date.now() - s.startedAt, + cwd: s.cwd, + command: s.command, + name: deriveSessionName(s.command), + tail: s.tail, + truncated: s.truncated, + })); + const finished = listFinishedSessions() + .filter((s) => isInScope(s)) + .map((s) => ({ + sessionId: s.id, + status: s.status, + startedAt: s.startedAt, + endedAt: s.endedAt, + runtimeMs: s.endedAt - s.startedAt, + cwd: s.cwd, + command: s.command, + name: deriveSessionName(s.command), + tail: s.tail, + truncated: s.truncated, + exitCode: s.exitCode ?? undefined, + exitSignal: s.exitSignal ?? undefined, + })); const lines = [...running, ...finished] .sort((a, b) => b.startedAt - a.startedAt) .map((s) => { @@ -532,34 +557,38 @@ export function createProcessTool( const session = getSession(params.sessionId); const finished = getFinishedSession(params.sessionId); + const scopedSession = isInScope(session) ? session : undefined; + const scopedFinished = isInScope(finished) ? finished : undefined; switch (params.action) { case "poll": { - if (!session) { - if (finished) { + if (!scopedSession) { + if (scopedFinished) { return { content: [ { type: "text", text: - (finished.tail || + (scopedFinished.tail || `(no output recorded${ - finished.truncated ? " — truncated to cap" : "" + scopedFinished.truncated ? " — truncated to cap" : "" })`) + `\n\nProcess exited with ${ - finished.exitSignal - ? `signal ${finished.exitSignal}` - : `code ${finished.exitCode ?? 0}` + scopedFinished.exitSignal + ? `signal ${scopedFinished.exitSignal}` + : `code ${scopedFinished.exitCode ?? 0}` }.`, }, ], details: { status: - finished.status === "completed" ? "completed" : "failed", + scopedFinished.status === "completed" + ? "completed" + : "failed", sessionId: params.sessionId, - exitCode: finished.exitCode ?? undefined, - aggregated: finished.aggregated, - name: deriveSessionName(finished.command), + exitCode: scopedFinished.exitCode ?? undefined, + aggregated: scopedFinished.aggregated, + name: deriveSessionName(scopedFinished.command), }, }; } @@ -573,7 +602,7 @@ export function createProcessTool( details: { status: "failed" }, }; } - if (!session.backgrounded) { + if (!scopedSession.backgrounded) { return { content: [ { @@ -584,17 +613,17 @@ export function createProcessTool( details: { status: "failed" }, }; } - const { stdout, stderr } = drainSession(session); - const exited = session.exited; - const exitCode = session.exitCode ?? 0; - const exitSignal = session.exitSignal ?? undefined; + const { stdout, stderr } = drainSession(scopedSession); + const exited = scopedSession.exited; + const exitCode = scopedSession.exitCode ?? 0; + const exitSignal = scopedSession.exitSignal ?? undefined; if (exited) { const status = exitCode === 0 && exitSignal == null ? "completed" : "failed"; markExited( - session, - session.exitCode ?? null, - session.exitSignal ?? null, + scopedSession, + scopedSession.exitCode ?? null, + scopedSession.exitSignal ?? null, status, ); } @@ -624,15 +653,15 @@ export function createProcessTool( status, sessionId: params.sessionId, exitCode: exited ? exitCode : undefined, - aggregated: session.aggregated, - name: deriveSessionName(session.command), + aggregated: scopedSession.aggregated, + name: deriveSessionName(scopedSession.command), }, }; } case "log": { - if (session) { - if (!session.backgrounded) { + if (scopedSession) { + if (!scopedSession.backgrounded) { return { content: [ { @@ -644,31 +673,31 @@ export function createProcessTool( }; } const { slice, totalLines, totalChars } = sliceLogLines( - session.aggregated, + scopedSession.aggregated, params.offset, params.limit, ); return { content: [{ type: "text", text: slice || "(no output yet)" }], details: { - status: session.exited ? "completed" : "running", + status: scopedSession.exited ? "completed" : "running", sessionId: params.sessionId, total: totalLines, totalLines, totalChars, - truncated: session.truncated, - name: deriveSessionName(session.command), + truncated: scopedSession.truncated, + name: deriveSessionName(scopedSession.command), }, }; } - if (finished) { + if (scopedFinished) { const { slice, totalLines, totalChars } = sliceLogLines( - finished.aggregated, + scopedFinished.aggregated, params.offset, params.limit, ); const status = - finished.status === "completed" ? "completed" : "failed"; + scopedFinished.status === "completed" ? "completed" : "failed"; return { content: [ { type: "text", text: slice || "(no output recorded)" }, @@ -679,10 +708,10 @@ export function createProcessTool( total: totalLines, totalLines, totalChars, - truncated: finished.truncated, - exitCode: finished.exitCode ?? undefined, - exitSignal: finished.exitSignal ?? undefined, - name: deriveSessionName(finished.command), + truncated: scopedFinished.truncated, + exitCode: scopedFinished.exitCode ?? undefined, + exitSignal: scopedFinished.exitSignal ?? undefined, + name: deriveSessionName(scopedFinished.command), }, }; } @@ -698,7 +727,7 @@ export function createProcessTool( } case "write": { - if (!session) { + if (!scopedSession) { return { content: [ { @@ -709,7 +738,7 @@ export function createProcessTool( details: { status: "failed" }, }; } - if (!session.backgrounded) { + if (!scopedSession.backgrounded) { return { content: [ { @@ -720,7 +749,10 @@ export function createProcessTool( details: { status: "failed" }, }; } - if (!session.child?.stdin || session.child.stdin.destroyed) { + if ( + !scopedSession.child?.stdin || + scopedSession.child.stdin.destroyed + ) { return { content: [ { @@ -732,13 +764,13 @@ export function createProcessTool( }; } await new Promise((resolve, reject) => { - session.child?.stdin.write(params.data ?? "", (err) => { + scopedSession.child?.stdin.write(params.data ?? "", (err) => { if (err) reject(err); else resolve(); }); }); if (params.eof) { - session.child.stdin.end(); + scopedSession.child.stdin.end(); } return { content: [ @@ -752,13 +784,15 @@ export function createProcessTool( details: { status: "running", sessionId: params.sessionId, - name: session ? deriveSessionName(session.command) : undefined, + name: scopedSession + ? deriveSessionName(scopedSession.command) + : undefined, }, }; } case "kill": { - if (!session) { + if (!scopedSession) { return { content: [ { @@ -769,7 +803,7 @@ export function createProcessTool( details: { status: "failed" }, }; } - if (!session.backgrounded) { + if (!scopedSession.backgrounded) { return { content: [ { @@ -780,21 +814,23 @@ export function createProcessTool( details: { status: "failed" }, }; } - killSession(session); - markExited(session, null, "SIGKILL", "failed"); + killSession(scopedSession); + markExited(scopedSession, null, "SIGKILL", "failed"); return { content: [ { type: "text", text: `Killed session ${params.sessionId}.` }, ], details: { status: "failed", - name: session ? deriveSessionName(session.command) : undefined, + name: scopedSession + ? deriveSessionName(scopedSession.command) + : undefined, }, }; } case "clear": { - if (finished) { + if (scopedFinished) { deleteSession(params.sessionId); return { content: [ @@ -815,20 +851,22 @@ export function createProcessTool( } case "remove": { - if (session) { - killSession(session); - markExited(session, null, "SIGKILL", "failed"); + if (scopedSession) { + killSession(scopedSession); + markExited(scopedSession, null, "SIGKILL", "failed"); return { content: [ { type: "text", text: `Removed session ${params.sessionId}.` }, ], details: { status: "failed", - name: session ? deriveSessionName(session.command) : undefined, + name: scopedSession + ? deriveSessionName(scopedSession.command) + : undefined, }, }; } - if (finished) { + if (scopedFinished) { deleteSession(params.sessionId); return { content: [ diff --git a/src/agents/clawdbot-tools.subagents.test.ts b/src/agents/clawdbot-tools.subagents.test.ts index d8be2d249..0df0a0abd 100644 --- a/src/agents/clawdbot-tools.subagents.test.ts +++ b/src/agents/clawdbot-tools.subagents.test.ts @@ -19,17 +19,21 @@ vi.mock("../config/config.js", async (importOriginal) => { }; }); +import { emitAgentEvent } from "../infra/agent-events.js"; import { createClawdbotTools } from "./clawdbot-tools.js"; +import { resetSubagentRegistryForTests } from "./subagent-registry.js"; describe("subagents", () => { it("sessions_spawn announces back to the requester group provider", async () => { + resetSubagentRegistryForTests(); callGatewayMock.mockReset(); const calls: Array<{ method?: string; params?: unknown }> = []; let agentCallCount = 0; - let lastWaitedRunId: string | undefined; - const replyByRunId = new Map(); let sendParams: { to?: string; provider?: string; message?: string } = {}; let deletedKey: string | undefined; + let childRunId: string | undefined; + let childSessionKey: string | undefined; + const sessionLastAssistantText = new Map(); callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: unknown }; @@ -37,13 +41,21 @@ describe("subagents", () => { if (request.method === "agent") { agentCallCount += 1; const runId = `run-${agentCallCount}`; - const params = request.params as - | { message?: string; sessionKey?: string } - | undefined; + const params = request.params as { + message?: string; + sessionKey?: string; + timeout?: number; + }; const message = params?.message ?? ""; - const reply = - message === "Sub-agent announce step." ? "announce now" : "result"; - replyByRunId.set(runId, reply); + const sessionKey = params?.sessionKey ?? ""; + if (message === "Sub-agent announce step.") { + sessionLastAssistantText.set(sessionKey, "announce now"); + } else { + childRunId = runId; + childSessionKey = sessionKey; + sessionLastAssistantText.set(sessionKey, "result"); + expect(params?.timeout).toBe(1); + } return { runId, status: "accepted", @@ -51,13 +63,28 @@ describe("subagents", () => { }; } if (request.method === "agent.wait") { - const params = request.params as { runId?: string } | undefined; - lastWaitedRunId = params?.runId; + const params = request.params as + | { runId?: string; timeoutMs?: number } + | undefined; + if ( + params?.runId && + params.runId === childRunId && + typeof params.timeoutMs === "number" && + params.timeoutMs > 0 + ) { + throw new Error( + "sessions_spawn must not wait for sub-agent completion", + ); + } + if (params?.timeoutMs === 0) { + return { runId: params?.runId ?? "run-1", status: "timeout" }; + } return { runId: params?.runId ?? "run-1", status: "ok" }; } if (request.method === "chat.history") { + const params = request.params as { sessionKey?: string } | undefined; const text = - (lastWaitedRunId && replyByRunId.get(lastWaitedRunId)) ?? ""; + sessionLastAssistantText.get(params?.sessionKey ?? "") ?? ""; return { messages: [{ role: "assistant", content: [{ type: "text", text }] }], }; @@ -89,11 +116,26 @@ describe("subagents", () => { const result = await tool.execute("call1", { task: "do thing", - timeoutSeconds: 1, + runTimeoutSeconds: 1, cleanup: "delete", }); - expect(result.details).toMatchObject({ status: "ok", reply: "result" }); + expect(result.details).toMatchObject({ + status: "accepted", + runId: "run-1", + }); + if (!childRunId) throw new Error("missing child runId"); + emitAgentEvent({ + runId: childRunId, + stream: "lifecycle", + data: { + phase: "end", + startedAt: 1234, + endedAt: 2345, + }, + }); + + await new Promise((resolve) => setTimeout(resolve, 0)); await new Promise((resolve) => setTimeout(resolve, 0)); await new Promise((resolve) => setTimeout(resolve, 0)); @@ -105,6 +147,7 @@ describe("subagents", () => { expect(first?.lane).toBe("subagent"); expect(first?.deliver).toBe(false); expect(first?.sessionKey?.startsWith("agent:main:subagent:")).toBe(true); + expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true); expect(sendParams.provider).toBe("discord"); expect(sendParams.to).toBe("channel:req"); @@ -114,12 +157,14 @@ describe("subagents", () => { }); it("sessions_spawn resolves main announce target from sessions.list", async () => { + resetSubagentRegistryForTests(); callGatewayMock.mockReset(); const calls: Array<{ method?: string; params?: unknown }> = []; let agentCallCount = 0; - let lastWaitedRunId: string | undefined; - const replyByRunId = new Map(); let sendParams: { to?: string; provider?: string; message?: string } = {}; + let childRunId: string | undefined; + let childSessionKey: string | undefined; + const sessionLastAssistantText = new Map(); callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: unknown }; @@ -138,13 +183,19 @@ describe("subagents", () => { if (request.method === "agent") { agentCallCount += 1; const runId = `run-${agentCallCount}`; - const params = request.params as - | { message?: string; sessionKey?: string } - | undefined; + const params = request.params as { + message?: string; + sessionKey?: string; + }; const message = params?.message ?? ""; - const reply = - message === "Sub-agent announce step." ? "hello from sub" : "done"; - replyByRunId.set(runId, reply); + const sessionKey = params?.sessionKey ?? ""; + if (message === "Sub-agent announce step.") { + sessionLastAssistantText.set(sessionKey, "hello from sub"); + } else { + childRunId = runId; + childSessionKey = sessionKey; + sessionLastAssistantText.set(sessionKey, "done"); + } return { runId, status: "accepted", @@ -152,13 +203,18 @@ describe("subagents", () => { }; } if (request.method === "agent.wait") { - const params = request.params as { runId?: string } | undefined; - lastWaitedRunId = params?.runId; + const params = request.params as + | { runId?: string; timeoutMs?: number } + | undefined; + if (params?.timeoutMs === 0) { + return { runId: params?.runId ?? "run-1", status: "timeout" }; + } return { runId: params?.runId ?? "run-1", status: "ok" }; } if (request.method === "chat.history") { + const params = request.params as { sessionKey?: string } | undefined; const text = - (lastWaitedRunId && replyByRunId.get(lastWaitedRunId)) ?? ""; + sessionLastAssistantText.get(params?.sessionKey ?? "") ?? ""; return { messages: [{ role: "assistant", content: [{ type: "text", text }] }], }; @@ -188,10 +244,25 @@ describe("subagents", () => { const result = await tool.execute("call2", { task: "do thing", - timeoutSeconds: 1, + runTimeoutSeconds: 1, + }); + expect(result.details).toMatchObject({ + status: "accepted", + runId: "run-1", }); - expect(result.details).toMatchObject({ status: "ok", reply: "done" }); + if (!childRunId) throw new Error("missing child runId"); + emitAgentEvent({ + runId: childRunId, + stream: "lifecycle", + data: { + phase: "end", + startedAt: 1000, + endedAt: 2000, + }, + }); + + await new Promise((resolve) => setTimeout(resolve, 0)); await new Promise((resolve) => setTimeout(resolve, 0)); await new Promise((resolve) => setTimeout(resolve, 0)); @@ -199,14 +270,14 @@ describe("subagents", () => { expect(sendParams.to).toBe("+123"); expect(sendParams.message ?? "").toContain("hello from sub"); expect(sendParams.message ?? "").toContain("Stats:"); + expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true); }); it("sessions_spawn applies a model to the child session", async () => { + resetSubagentRegistryForTests(); callGatewayMock.mockReset(); const calls: Array<{ method?: string; params?: unknown }> = []; let agentCallCount = 0; - let lastWaitedRunId: string | undefined; - const replyByRunId = new Map(); callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: unknown }; @@ -217,13 +288,6 @@ describe("subagents", () => { if (request.method === "agent") { agentCallCount += 1; const runId = `run-${agentCallCount}`; - const params = request.params as - | { message?: string; sessionKey?: string } - | undefined; - const message = params?.message ?? ""; - const reply = - message === "Sub-agent announce step." ? "ANNOUNCE_SKIP" : "done"; - replyByRunId.set(runId, reply); return { runId, status: "accepted", @@ -231,16 +295,9 @@ describe("subagents", () => { }; } if (request.method === "agent.wait") { - const params = request.params as { runId?: string } | undefined; - lastWaitedRunId = params?.runId; - return { runId: params?.runId ?? "run-1", status: "ok" }; - } - if (request.method === "chat.history") { - const text = - (lastWaitedRunId && replyByRunId.get(lastWaitedRunId)) ?? ""; - return { - messages: [{ role: "assistant", content: [{ type: "text", text }] }], - }; + const params = request.params as { timeoutMs?: number } | undefined; + if (params?.timeoutMs === 0) return { status: "timeout" }; + return { status: "ok" }; } if (request.method === "sessions.delete") { return { ok: true }; @@ -256,11 +313,14 @@ describe("subagents", () => { const result = await tool.execute("call3", { task: "do thing", - timeoutSeconds: 1, + runTimeoutSeconds: 1, model: "claude-haiku-4-5", cleanup: "keep", }); - expect(result.details).toMatchObject({ status: "ok", reply: "done" }); + expect(result.details).toMatchObject({ + status: "accepted", + modelApplied: true, + }); const patchIndex = calls.findIndex( (call) => call.method === "sessions.patch", @@ -277,11 +337,10 @@ describe("subagents", () => { }); it("sessions_spawn skips invalid model overrides and continues", async () => { + resetSubagentRegistryForTests(); callGatewayMock.mockReset(); const calls: Array<{ method?: string; params?: unknown }> = []; let agentCallCount = 0; - let lastWaitedRunId: string | undefined; - const replyByRunId = new Map(); callGatewayMock.mockImplementation(async (opts: unknown) => { const request = opts as { method?: string; params?: unknown }; @@ -292,13 +351,6 @@ describe("subagents", () => { if (request.method === "agent") { agentCallCount += 1; const runId = `run-${agentCallCount}`; - const params = request.params as - | { message?: string; sessionKey?: string } - | undefined; - const message = params?.message ?? ""; - const reply = - message === "Sub-agent announce step." ? "ANNOUNCE_SKIP" : "done"; - replyByRunId.set(runId, reply); return { runId, status: "accepted", @@ -306,16 +358,9 @@ describe("subagents", () => { }; } if (request.method === "agent.wait") { - const params = request.params as { runId?: string } | undefined; - lastWaitedRunId = params?.runId; - return { runId: params?.runId ?? "run-1", status: "ok" }; - } - if (request.method === "chat.history") { - const text = - (lastWaitedRunId && replyByRunId.get(lastWaitedRunId)) ?? ""; - return { - messages: [{ role: "assistant", content: [{ type: "text", text }] }], - }; + const params = request.params as { timeoutMs?: number } | undefined; + if (params?.timeoutMs === 0) return { status: "timeout" }; + return { status: "ok" }; } if (request.method === "sessions.delete") { return { ok: true }; @@ -331,11 +376,11 @@ describe("subagents", () => { const result = await tool.execute("call4", { task: "do thing", - timeoutSeconds: 1, + runTimeoutSeconds: 1, model: "bad-model", }); expect(result.details).toMatchObject({ - status: "ok", + status: "accepted", modelApplied: false, }); expect( @@ -343,4 +388,36 @@ describe("subagents", () => { ).toContain("invalid model"); expect(calls.some((call) => call.method === "agent")).toBe(true); }); + + it("sessions_spawn supports legacy timeoutSeconds alias", async () => { + resetSubagentRegistryForTests(); + callGatewayMock.mockReset(); + let spawnedTimeout: number | undefined; + + callGatewayMock.mockImplementation(async (opts: unknown) => { + const request = opts as { method?: string; params?: unknown }; + if (request.method === "agent") { + const params = request.params as { timeout?: number } | undefined; + spawnedTimeout = params?.timeout; + return { runId: "run-1", status: "accepted", acceptedAt: 1000 }; + } + return {}; + }); + + const tool = createClawdbotTools({ + agentSessionKey: "main", + agentProvider: "whatsapp", + }).find((candidate) => candidate.name === "sessions_spawn"); + if (!tool) throw new Error("missing sessions_spawn tool"); + + const result = await tool.execute("call5", { + task: "do thing", + timeoutSeconds: 2, + }); + expect(result.details).toMatchObject({ + status: "accepted", + runId: "run-1", + }); + expect(spawnedTimeout).toBe(2); + }); }); diff --git a/src/agents/pi-embedded-helpers.test.ts b/src/agents/pi-embedded-helpers.test.ts index c36664dba..6d95b50f1 100644 --- a/src/agents/pi-embedded-helpers.test.ts +++ b/src/agents/pi-embedded-helpers.test.ts @@ -1,6 +1,13 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { AssistantMessage } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; - -import { buildBootstrapContextFiles } from "./pi-embedded-helpers.js"; +import { + buildBootstrapContextFiles, + formatAssistantErrorText, + isContextOverflowError, + sanitizeGoogleTurnOrdering, + validateGeminiTurns, +} from "./pi-embedded-helpers.js"; import { DEFAULT_AGENTS_FILENAME, type WorkspaceBootstrapFile, @@ -16,6 +23,145 @@ const makeFile = ( ...overrides, }); +describe("validateGeminiTurns", () => { + it("should return empty array unchanged", () => { + const result = validateGeminiTurns([]); + expect(result).toEqual([]); + }); + + it("should return single message unchanged", () => { + const msgs: AgentMessage[] = [ + { + role: "user", + content: "Hello", + }, + ]; + const result = validateGeminiTurns(msgs); + expect(result).toEqual(msgs); + }); + + it("should leave alternating user/assistant unchanged", () => { + const msgs: AgentMessage[] = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: [{ type: "text", text: "Hi" }] }, + { role: "user", content: "How are you?" }, + { role: "assistant", content: [{ type: "text", text: "Good!" }] }, + ]; + const result = validateGeminiTurns(msgs); + expect(result).toHaveLength(4); + expect(result).toEqual(msgs); + }); + + it("should merge consecutive assistant messages", () => { + const msgs: AgentMessage[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [{ type: "text", text: "Part 1" }], + stopReason: "end_turn", + }, + { + role: "assistant", + content: [{ type: "text", text: "Part 2" }], + stopReason: "end_turn", + }, + { role: "user", content: "How are you?" }, + ]; + + const result = validateGeminiTurns(msgs); + + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ role: "user", content: "Hello" }); + expect(result[1].role).toBe("assistant"); + expect(result[1].content).toHaveLength(2); + expect(result[2]).toEqual({ role: "user", content: "How are you?" }); + }); + + it("should preserve metadata from later message when merging", () => { + const msgs: AgentMessage[] = [ + { + role: "assistant", + content: [{ type: "text", text: "Part 1" }], + usage: { input: 10, output: 5 }, + }, + { + role: "assistant", + content: [{ type: "text", text: "Part 2" }], + usage: { input: 10, output: 10 }, + stopReason: "end_turn", + }, + ]; + + const result = validateGeminiTurns(msgs); + + expect(result).toHaveLength(1); + const merged = result[0] as Extract; + expect(merged.usage).toEqual({ input: 10, output: 10 }); + expect(merged.stopReason).toBe("end_turn"); + expect(merged.content).toHaveLength(2); + }); + + it("should handle toolResult messages without merging", () => { + const msgs: AgentMessage[] = [ + { role: "user", content: "Use tool" }, + { + role: "assistant", + content: [{ type: "toolUse", id: "tool-1", name: "test", input: {} }], + }, + { + role: "toolResult", + toolUseId: "tool-1", + content: [{ type: "text", text: "Result" }], + }, + { role: "user", content: "Next request" }, + ]; + + const result = validateGeminiTurns(msgs); + + expect(result).toHaveLength(4); + expect(result).toEqual(msgs); + }); + + it("should handle real-world corrupted sequence", () => { + // This is the pattern that causes Gemini errors: + // user → assistant → assistant (consecutive, wrong!) + const msgs: AgentMessage[] = [ + { role: "user", content: "Request 1" }, + { + role: "assistant", + content: [{ type: "text", text: "Response A" }], + }, + { + role: "assistant", + content: [{ type: "toolUse", id: "t1", name: "search", input: {} }], + }, + { + role: "toolResult", + toolUseId: "t1", + content: [{ type: "text", text: "Found data" }], + }, + { + role: "assistant", + content: [{ type: "text", text: "Here's the answer" }], + }, + { + role: "assistant", + content: [{ type: "text", text: "Extra thoughts" }], + }, + { role: "user", content: "Request 2" }, + ]; + + const result = validateGeminiTurns(msgs); + + // Should merge the consecutive assistants + expect(result[0].role).toBe("user"); + expect(result[1].role).toBe("assistant"); + expect(result[2].role).toBe("toolResult"); + expect(result[3].role).toBe("assistant"); + expect(result[4].role).toBe("user"); + }); +}); + describe("buildBootstrapContextFiles", () => { it("keeps missing markers", () => { const files = [makeFile({ missing: true, content: undefined })]; @@ -46,3 +192,58 @@ describe("buildBootstrapContextFiles", () => { expect(result?.content.endsWith(long.slice(-120))).toBe(true); }); }); + +describe("isContextOverflowError", () => { + it("matches known overflow hints", () => { + const samples = [ + "request_too_large", + "Request exceeds the maximum size", + "context length exceeded", + "Maximum context length", + "413 Request Entity Too Large", + ]; + for (const sample of samples) { + expect(isContextOverflowError(sample)).toBe(true); + } + }); + + it("ignores unrelated errors", () => { + expect(isContextOverflowError("rate limit exceeded")).toBe(false); + }); +}); + +describe("formatAssistantErrorText", () => { + const makeAssistantError = (errorMessage: string): AssistantMessage => + ({ + stopReason: "error", + errorMessage, + }) as AssistantMessage; + + it("returns a friendly message for context overflow", () => { + const msg = makeAssistantError("request_too_large"); + expect(formatAssistantErrorText(msg)).toContain("Context overflow"); + }); +}); + +describe("sanitizeGoogleTurnOrdering", () => { + it("prepends a synthetic user turn when history starts with assistant", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "bash", arguments: {} }, + ], + }, + ] satisfies AgentMessage[]; + + const out = sanitizeGoogleTurnOrdering(input); + expect(out[0]?.role).toBe("user"); + expect(out[1]?.role).toBe("assistant"); + }); + + it("is a no-op when history starts with user", () => { + const input = [{ role: "user", content: "hi" }] satisfies AgentMessage[]; + const out = sanitizeGoogleTurnOrdering(input); + expect(out).toBe(input); + }); +}); diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 750c9504b..ad2ac3704 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -104,6 +104,40 @@ export async function sanitizeSessionMessagesImages( return out; } +const GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT = "(session bootstrap)"; + +export function isGoogleModelApi(api?: string | null): boolean { + return api === "google-gemini-cli" || api === "google-generative-ai"; +} + +export function sanitizeGoogleTurnOrdering( + messages: AgentMessage[], +): AgentMessage[] { + const first = messages[0] as + | { role?: unknown; content?: unknown } + | undefined; + const role = first?.role; + const content = first?.content; + if ( + role === "user" && + typeof content === "string" && + content.trim() === GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT + ) { + return messages; + } + if (role !== "assistant") return messages; + + // Cloud Code Assist rejects histories that begin with a model turn (tool call or text). + // Prepend a tiny synthetic user turn so the rest of the transcript can be used. + const bootstrap: AgentMessage = { + role: "user", + content: GOOGLE_TURN_ORDER_BOOTSTRAP_TEXT, + timestamp: Date.now(), + } as AgentMessage; + + return [bootstrap, ...messages]; +} + export function buildBootstrapContextFiles( files: WorkspaceBootstrapFile[], ): EmbeddedContextFile[] { @@ -126,6 +160,18 @@ export function buildBootstrapContextFiles( return result; } +export function isContextOverflowError(errorMessage?: string): boolean { + if (!errorMessage) return false; + const lower = errorMessage.toLowerCase(); + return ( + lower.includes("request_too_large") || + lower.includes("request exceeds the maximum size") || + lower.includes("context length exceeded") || + lower.includes("maximum context length") || + (lower.includes("413") && lower.includes("too large")) + ); +} + export function formatAssistantErrorText( msg: AssistantMessage, ): string | undefined { @@ -133,6 +179,14 @@ export function formatAssistantErrorText( const raw = (msg.errorMessage ?? "").trim(); if (!raw) return "LLM request failed with an unknown error."; + // Check for context overflow (413) errors + if (isContextOverflowError(raw)) { + return ( + "Context overflow: the conversation history is too large. " + + "Use /new or /reset to start a fresh session." + ); + } + const invalidRequest = raw.match( /"type":"invalid_request_error".*?"message":"([^"]+)"/, ); @@ -218,3 +272,77 @@ export function pickFallbackThinkingLevel(params: { } return undefined; } + +/** + * Validates and fixes conversation turn sequences for Gemini API. + * Gemini requires strict alternating user→assistant→tool→user pattern. + * This function: + * 1. Detects consecutive messages from the same role + * 2. Merges consecutive assistant messages together + * 3. Preserves metadata (usage, stopReason, etc.) + * + * This prevents the "function call turn comes immediately after a user turn or after a function response turn" error. + */ +export function validateGeminiTurns(messages: AgentMessage[]): AgentMessage[] { + if (!Array.isArray(messages) || messages.length === 0) { + return messages; + } + + const result: AgentMessage[] = []; + let lastRole: string | undefined; + + for (const msg of messages) { + if (!msg || typeof msg !== "object") { + result.push(msg); + continue; + } + + const msgRole = (msg as { role?: unknown }).role as string | undefined; + if (!msgRole) { + result.push(msg); + continue; + } + + // Check if this message has the same role as the last one + if (msgRole === lastRole && lastRole === "assistant") { + // Merge consecutive assistant messages + const lastMsg = result[result.length - 1]; + const currentMsg = msg as Extract; + + if (lastMsg && typeof lastMsg === "object") { + const lastAsst = lastMsg as Extract< + AgentMessage, + { role: "assistant" } + >; + + // Merge content blocks + const mergedContent = [ + ...(Array.isArray(lastAsst.content) ? lastAsst.content : []), + ...(Array.isArray(currentMsg.content) ? currentMsg.content : []), + ]; + + // Preserve metadata from the later message (more recent) + const merged: Extract = { + ...lastAsst, + content: mergedContent, + // Take timestamps, usage, stopReason from the newer message if present + ...(currentMsg.usage && { usage: currentMsg.usage }), + ...(currentMsg.stopReason && { stopReason: currentMsg.stopReason }), + ...(currentMsg.errorMessage && { + errorMessage: currentMsg.errorMessage, + }), + }; + + // Replace the last message with merged version + result[result.length - 1] = merged; + continue; + } + } + + // Not a consecutive duplicate, add normally + result.push(msg); + lastRole = msgRole; + } + + return result; +} diff --git a/src/agents/pi-embedded-runner.test.ts b/src/agents/pi-embedded-runner.test.ts index ac5b75a76..e2fc92541 100644 --- a/src/agents/pi-embedded-runner.test.ts +++ b/src/agents/pi-embedded-runner.test.ts @@ -1,7 +1,9 @@ -import type { AgentTool } from "@mariozechner/pi-agent-core"; +import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core"; +import { SessionManager } from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { + applyGoogleTurnOrderingFix, buildEmbeddedSandboxInfo, splitSdkTools, } from "./pi-embedded-runner.js"; @@ -102,3 +104,64 @@ describe("splitSdkTools", () => { expect(customTools.map((tool) => tool.name)).toEqual(["browser"]); }); }); + +describe("applyGoogleTurnOrderingFix", () => { + const makeAssistantFirst = () => + [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_1", name: "bash", arguments: {} }, + ], + }, + ] satisfies AgentMessage[]; + + it("prepends a bootstrap once and records a marker for Google models", () => { + const sessionManager = SessionManager.inMemory(); + const warn = vi.fn(); + const input = makeAssistantFirst(); + const first = applyGoogleTurnOrderingFix({ + messages: input, + modelApi: "google-generative-ai", + sessionManager, + sessionId: "session:1", + warn, + }); + expect(first.messages[0]?.role).toBe("user"); + expect(first.messages[1]?.role).toBe("assistant"); + expect(warn).toHaveBeenCalledTimes(1); + expect( + sessionManager + .getEntries() + .some( + (entry) => + entry.type === "custom" && + entry.customType === "google-turn-ordering-bootstrap", + ), + ).toBe(true); + + applyGoogleTurnOrderingFix({ + messages: input, + modelApi: "google-generative-ai", + sessionManager, + sessionId: "session:1", + warn, + }); + expect(warn).toHaveBeenCalledTimes(1); + }); + + it("skips non-Google models", () => { + const sessionManager = SessionManager.inMemory(); + const warn = vi.fn(); + const input = makeAssistantFirst(); + const result = applyGoogleTurnOrderingFix({ + messages: input, + modelApi: "openai", + sessionManager, + sessionId: "session:2", + warn, + }); + expect(result.messages).toBe(input); + expect(warn).not.toHaveBeenCalled(); + }); +}); diff --git a/src/agents/pi-embedded-runner.ts b/src/agents/pi-embedded-runner.ts index d153a5802..cf428d64f 100644 --- a/src/agents/pi-embedded-runner.ts +++ b/src/agents/pi-embedded-runner.ts @@ -1,5 +1,7 @@ import fs from "node:fs/promises"; import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; import type { AgentMessage, @@ -16,7 +18,6 @@ import { SettingsManager, type Skill, } from "@mariozechner/pi-coding-agent"; -import type { TSchema } from "@sinclair/typebox"; import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js"; import type { ReasoningLevel, @@ -24,6 +25,7 @@ import type { VerboseLevel, } from "../auto-reply/thinking.js"; import { formatToolAggregate } from "../auto-reply/tool-meta.js"; +import { isCacheEnabled, resolveCacheTtlMs } from "../config/cache-utils.js"; import type { ClawdbotConfig } from "../config/config.js"; import { getMachineDisplayName } from "../infra/machine-name.js"; import { createSubsystemLogger } from "../logging.js"; @@ -40,7 +42,11 @@ import { markAuthProfileUsed, } from "./auth-profiles.js"; import type { BashElevatedDefaults } from "./bash-tools.js"; -import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; +import { + DEFAULT_CONTEXT_TOKENS, + DEFAULT_MODEL, + DEFAULT_PROVIDER, +} from "./defaults.js"; import { ensureAuthProfileStore, getApiKeyForModel, @@ -53,10 +59,14 @@ import { formatAssistantErrorText, isAuthAssistantError, isAuthErrorMessage, + isContextOverflowError, + isGoogleModelApi, isRateLimitAssistantError, isRateLimitErrorMessage, pickFallbackThinkingLevel, + sanitizeGoogleTurnOrdering, sanitizeSessionMessagesImages, + validateGeminiTurns, } from "./pi-embedded-helpers.js"; import { type BlockReplyChunking, @@ -67,6 +77,9 @@ import { extractAssistantThinking, formatReasoningMarkdown, } from "./pi-embedded-utils.js"; +import { setContextPruningRuntime } from "./pi-extensions/context-pruning/runtime.js"; +import { computeEffectiveSettings } from "./pi-extensions/context-pruning/settings.js"; +import { makeToolPrunablePredicate } from "./pi-extensions/context-pruning/tools.js"; import { toToolDefinitions } from "./pi-tool-definition-adapter.js"; import { createClawdbotCodingTools } from "./pi-tools.js"; import { resolveSandboxContext } from "./sandbox.js"; @@ -82,6 +95,84 @@ import { buildAgentSystemPromptAppend } from "./system-prompt.js"; import { normalizeUsage, type UsageLike } from "./usage.js"; import { loadWorkspaceBootstrapFiles } from "./workspace.js"; +// Optional features can be implemented as Pi extensions that run in the same Node process. +// We configure context pruning per-session via a WeakMap registry keyed by the SessionManager instance. + +function resolvePiExtensionPath(id: string): string { + const self = fileURLToPath(import.meta.url); + const dir = path.dirname(self); + // In dev this file is `.ts` (tsx), in production it's `.js`. + const ext = path.extname(self) === ".ts" ? "ts" : "js"; + return path.join(dir, "pi-extensions", `${id}.${ext}`); +} + +function resolveContextWindowTokens(params: { + cfg: ClawdbotConfig | undefined; + provider: string; + modelId: string; + model: Model | undefined; +}): number { + const fromModel = + typeof params.model?.contextWindow === "number" && + Number.isFinite(params.model.contextWindow) && + params.model.contextWindow > 0 + ? params.model.contextWindow + : undefined; + if (fromModel) return fromModel; + + const fromModelsConfig = (() => { + const providers = params.cfg?.models?.providers as + | Record< + string, + { models?: Array<{ id?: string; contextWindow?: number }> } + > + | undefined; + const providerEntry = providers?.[params.provider]; + const models = Array.isArray(providerEntry?.models) + ? providerEntry.models + : []; + const match = models.find((m) => m?.id === params.modelId); + return typeof match?.contextWindow === "number" && match.contextWindow > 0 + ? match.contextWindow + : undefined; + })(); + if (fromModelsConfig) return fromModelsConfig; + + const fromAgentConfig = + typeof params.cfg?.agent?.contextTokens === "number" && + Number.isFinite(params.cfg.agent.contextTokens) && + params.cfg.agent.contextTokens > 0 + ? Math.floor(params.cfg.agent.contextTokens) + : undefined; + if (fromAgentConfig) return fromAgentConfig; + + return DEFAULT_CONTEXT_TOKENS; +} + +function buildContextPruningExtension(params: { + cfg: ClawdbotConfig | undefined; + sessionManager: SessionManager; + provider: string; + modelId: string; + model: Model | undefined; +}): { additionalExtensionPaths?: string[] } { + const raw = params.cfg?.agent?.contextPruning; + if (raw?.mode !== "adaptive" && raw?.mode !== "aggressive") return {}; + + const settings = computeEffectiveSettings(raw); + if (!settings) return {}; + + setContextPruningRuntime(params.sessionManager, { + settings, + contextWindowTokens: resolveContextWindowTokens(params), + isToolPrunable: makeToolPrunablePredicate(settings.tools), + }); + + return { + additionalExtensionPaths: [resolvePiExtensionPath("context-pruning")], + }; +} + export type EmbeddedPiAgentMeta = { sessionId: string; provider: string; @@ -155,6 +246,80 @@ type EmbeddedPiQueueHandle = { }; const log = createSubsystemLogger("agent/embedded"); +const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap"; + +type CustomEntryLike = { type?: unknown; customType?: unknown }; + +function hasGoogleTurnOrderingMarker(sessionManager: SessionManager): boolean { + try { + return sessionManager + .getEntries() + .some( + (entry) => + (entry as CustomEntryLike)?.type === "custom" && + (entry as CustomEntryLike)?.customType === + GOOGLE_TURN_ORDERING_CUSTOM_TYPE, + ); + } catch { + return false; + } +} + +function markGoogleTurnOrderingMarker(sessionManager: SessionManager): void { + try { + sessionManager.appendCustomEntry(GOOGLE_TURN_ORDERING_CUSTOM_TYPE, { + timestamp: Date.now(), + }); + } catch { + // ignore marker persistence failures + } +} + +export function applyGoogleTurnOrderingFix(params: { + messages: AgentMessage[]; + modelApi?: string | null; + sessionManager: SessionManager; + sessionId: string; + warn?: (message: string) => void; +}): { messages: AgentMessage[]; didPrepend: boolean } { + if (!isGoogleModelApi(params.modelApi)) { + return { messages: params.messages, didPrepend: false }; + } + const first = params.messages[0] as + | { role?: unknown; content?: unknown } + | undefined; + if (first?.role !== "assistant") { + return { messages: params.messages, didPrepend: false }; + } + const sanitized = sanitizeGoogleTurnOrdering(params.messages); + const didPrepend = sanitized !== params.messages; + if (didPrepend && !hasGoogleTurnOrderingMarker(params.sessionManager)) { + const warn = params.warn ?? ((message: string) => log.warn(message)); + warn( + `google turn ordering fixup: prepended user bootstrap (sessionId=${params.sessionId})`, + ); + markGoogleTurnOrderingMarker(params.sessionManager); + } + return { messages: sanitized, didPrepend }; +} + +async function sanitizeSessionHistory(params: { + messages: AgentMessage[]; + modelApi?: string | null; + sessionManager: SessionManager; + sessionId: string; +}): Promise { + const sanitizedImages = await sanitizeSessionMessagesImages( + params.messages, + "session:history", + ); + return applyGoogleTurnOrderingFix({ + messages: sanitizedImages, + modelApi: params.modelApi, + sessionManager: params.sessionManager, + sessionId: params.sessionId, + }).messages; +} const ACTIVE_EMBEDDED_RUNS = new Map(); type EmbeddedRunWaiter = { @@ -163,6 +328,66 @@ type EmbeddedRunWaiter = { }; const EMBEDDED_RUN_WAITERS = new Map>(); +// ============================================================================ +// SessionManager Pre-warming Cache +// ============================================================================ + +type SessionManagerCacheEntry = { + sessionFile: string; + loadedAt: number; +}; + +const SESSION_MANAGER_CACHE = new Map(); +const DEFAULT_SESSION_MANAGER_TTL_MS = 45_000; // 45 seconds + +function getSessionManagerTtl(): number { + return resolveCacheTtlMs({ + envValue: process.env.CLAWDBOT_SESSION_MANAGER_CACHE_TTL_MS, + defaultTtlMs: DEFAULT_SESSION_MANAGER_TTL_MS, + }); +} + +function isSessionManagerCacheEnabled(): boolean { + return isCacheEnabled(getSessionManagerTtl()); +} + +function trackSessionManagerAccess(sessionFile: string): void { + if (!isSessionManagerCacheEnabled()) return; + const now = Date.now(); + SESSION_MANAGER_CACHE.set(sessionFile, { + sessionFile, + loadedAt: now, + }); +} + +function isSessionManagerCached(sessionFile: string): boolean { + if (!isSessionManagerCacheEnabled()) return false; + const entry = SESSION_MANAGER_CACHE.get(sessionFile); + if (!entry) return false; + const now = Date.now(); + const ttl = getSessionManagerTtl(); + return now - entry.loadedAt <= ttl; +} + +async function prewarmSessionFile(sessionFile: string): Promise { + if (!isSessionManagerCacheEnabled()) return; + if (isSessionManagerCached(sessionFile)) return; + + try { + // Read a small chunk to encourage OS page cache warmup. + const handle = await fs.open(sessionFile, "r"); + try { + const buffer = Buffer.alloc(4096); + await handle.read(buffer, 0, buffer.length, 0); + } finally { + await handle.close(); + } + trackSessionManagerAccess(sessionFile); + } catch { + // File doesn't exist yet, SessionManager will create it + } +} + const isAbortError = (err: unknown): boolean => { if (!err || typeof err !== "object") return false; const name = "name" in err ? String(err.name) : ""; @@ -269,7 +494,7 @@ export function buildEmbeddedSandboxInfo( const BUILT_IN_TOOL_NAMES = new Set(["read", "bash", "edit", "write"]); -type AnyAgentTool = AgentTool; +type AnyAgentTool = AgentTool; export function splitSdkTools(options: { tools: AnyAgentTool[]; @@ -573,18 +798,30 @@ export async function compactEmbeddedPiSession(params: { tools, }); + // Pre-warm session file to bring it into OS page cache + await prewarmSessionFile(params.sessionFile); const sessionManager = SessionManager.open(params.sessionFile); + trackSessionManagerAccess(params.sessionFile); const settingsManager = SettingsManager.create( effectiveWorkspace, agentDir, ); + const pruning = buildContextPruningExtension({ + cfg: params.config, + sessionManager, + provider, + modelId, + model, + }); + const additionalExtensionPaths = pruning.additionalExtensionPaths; const { builtInTools, customTools } = splitSdkTools({ tools, sandboxEnabled: !!sandbox?.enabled, }); - const { session } = await createAgentSession({ + let session: Awaited>["session"]; + ({ session } = await createAgentSession({ cwd: resolvedWorkspace, agentDir, authStorage, @@ -598,15 +835,19 @@ export async function compactEmbeddedPiSession(params: { settingsManager, skills: promptSkills, contextFiles, - }); + additionalExtensionPaths, + })); try { - const prior = await sanitizeSessionMessagesImages( - session.messages, - "session:history", - ); - if (prior.length > 0) { - session.agent.replaceMessages(prior); + const prior = await sanitizeSessionHistory({ + messages: session.messages, + modelApi: model.api, + sessionManager, + sessionId: params.sessionId, + }); + const validated = validateGeminiTurns(prior); + if (validated.length > 0) { + session.agent.replaceMessages(validated); } const result = await session.compact(params.customInstructions); return { @@ -882,18 +1123,32 @@ export async function runEmbeddedPiAgent(params: { tools, }); + // Pre-warm session file to bring it into OS page cache + await prewarmSessionFile(params.sessionFile); const sessionManager = SessionManager.open(params.sessionFile); + trackSessionManagerAccess(params.sessionFile); const settingsManager = SettingsManager.create( effectiveWorkspace, agentDir, ); + const pruning = buildContextPruningExtension({ + cfg: params.config, + sessionManager, + provider, + modelId, + model, + }); + const additionalExtensionPaths = pruning.additionalExtensionPaths; const { builtInTools, customTools } = splitSdkTools({ tools, sandboxEnabled: !!sandbox?.enabled, }); - const { session } = await createAgentSession({ + let session: Awaited< + ReturnType + >["session"]; + ({ session } = await createAgentSession({ cwd: resolvedWorkspace, agentDir, authStorage, @@ -909,14 +1164,23 @@ export async function runEmbeddedPiAgent(params: { settingsManager, skills: promptSkills, contextFiles, - }); + additionalExtensionPaths, + })); - const prior = await sanitizeSessionMessagesImages( - session.messages, - "session:history", - ); - if (prior.length > 0) { - session.agent.replaceMessages(prior); + try { + const prior = await sanitizeSessionHistory({ + messages: session.messages, + modelApi: model.api, + sessionManager, + sessionId: params.sessionId, + }); + const validated = validateGeminiTurns(prior); + if (validated.length > 0) { + session.agent.replaceMessages(validated); + } + } catch (err) { + session.dispose(); + throw err; } let aborted = Boolean(params.abortSignal?.aborted); let timedOut = false; @@ -925,21 +1189,27 @@ export async function runEmbeddedPiAgent(params: { if (isTimeout) timedOut = true; void session.abort(); }; - const subscription = subscribeEmbeddedPiSession({ - session, - runId: params.runId, - verboseLevel: params.verboseLevel, - reasoningMode: params.reasoningLevel ?? "off", - shouldEmitToolResult: params.shouldEmitToolResult, - onToolResult: params.onToolResult, - onReasoningStream: params.onReasoningStream, - onBlockReply: params.onBlockReply, - blockReplyBreak: params.blockReplyBreak, - blockReplyChunking: params.blockReplyChunking, - onPartialReply: params.onPartialReply, - onAgentEvent: params.onAgentEvent, - enforceFinalTag: params.enforceFinalTag, - }); + let subscription: ReturnType; + try { + subscription = subscribeEmbeddedPiSession({ + session, + runId: params.runId, + verboseLevel: params.verboseLevel, + reasoningMode: params.reasoningLevel ?? "off", + shouldEmitToolResult: params.shouldEmitToolResult, + onToolResult: params.onToolResult, + onReasoningStream: params.onReasoningStream, + onBlockReply: params.onBlockReply, + blockReplyBreak: params.blockReplyBreak, + blockReplyChunking: params.blockReplyChunking, + onPartialReply: params.onPartialReply, + onAgentEvent: params.onAgentEvent, + enforceFinalTag: params.enforceFinalTag, + }); + } catch (err) { + session.dispose(); + throw err; + } const { assistantTexts, toolMetas, @@ -1033,6 +1303,26 @@ export async function runEmbeddedPiAgent(params: { } if (promptError && !aborted) { const errorText = describeUnknownError(promptError); + if (isContextOverflowError(errorText)) { + return { + payloads: [ + { + text: + "Context overflow: the conversation history is too large for the model. " + + "Use /new or /reset to start a fresh session, or try a model with a larger context window.", + isError: true, + }, + ], + meta: { + durationMs: Date.now() - started, + agentMeta: { + sessionId: sessionIdUsed, + provider, + model: model.id, + }, + }, + }; + } if ( (isAuthErrorMessage(errorText) || isRateLimitErrorMessage(errorText)) && diff --git a/src/agents/pi-extensions/context-pruning.test.ts b/src/agents/pi-extensions/context-pruning.test.ts new file mode 100644 index 000000000..3d28c519e --- /dev/null +++ b/src/agents/pi-extensions/context-pruning.test.ts @@ -0,0 +1,447 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { + ExtensionAPI, + ExtensionContext, +} from "@mariozechner/pi-coding-agent"; +import { describe, expect, it } from "vitest"; + +import { setContextPruningRuntime } from "./context-pruning/runtime.js"; + +import { + computeEffectiveSettings, + default as contextPruningExtension, + DEFAULT_CONTEXT_PRUNING_SETTINGS, + pruneContextMessages, +} from "./context-pruning.js"; + +function toolText(msg: AgentMessage): string { + if (msg.role !== "toolResult") throw new Error("expected toolResult"); + const first = msg.content.find((b) => b.type === "text"); + if (!first || first.type !== "text") return ""; + return first.text; +} + +function findToolResult( + messages: AgentMessage[], + toolCallId: string, +): AgentMessage { + const msg = messages.find( + (m) => m.role === "toolResult" && m.toolCallId === toolCallId, + ); + if (!msg) throw new Error(`missing toolResult: ${toolCallId}`); + return msg; +} + +function makeToolResult(params: { + toolCallId: string; + toolName: string; + text: string; +}): AgentMessage { + return { + role: "toolResult", + toolCallId: params.toolCallId, + toolName: params.toolName, + content: [{ type: "text", text: params.text }], + isError: false, + timestamp: Date.now(), + }; +} + +function makeImageToolResult(params: { + toolCallId: string; + toolName: string; + text: string; +}): AgentMessage { + return { + role: "toolResult", + toolCallId: params.toolCallId, + toolName: params.toolName, + content: [ + { type: "image", data: "AA==", mimeType: "image/png" }, + { type: "text", text: params.text }, + ], + isError: false, + timestamp: Date.now(), + }; +} + +function makeAssistant(text: string): AgentMessage { + return { + role: "assistant", + content: [{ type: "text", text }], + api: "openai-responses", + provider: "openai", + model: "fake", + usage: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0, total: 2 }, + stopReason: "stop", + timestamp: Date.now(), + }; +} + +function makeUser(text: string): AgentMessage { + return { role: "user", content: text, timestamp: Date.now() }; +} + +describe("context-pruning", () => { + it("mode off disables pruning", () => { + expect(computeEffectiveSettings({ mode: "off" })).toBeNull(); + expect(computeEffectiveSettings({})).toBeNull(); + }); + + it("does not touch tool results after the last N assistants", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + makeAssistant("a1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "x".repeat(20_000), + }), + makeUser("u2"), + makeAssistant("a2"), + makeToolResult({ + toolCallId: "t2", + toolName: "bash", + text: "y".repeat(20_000), + }), + makeUser("u3"), + makeAssistant("a3"), + makeToolResult({ + toolCallId: "t3", + toolName: "bash", + text: "z".repeat(20_000), + }), + makeUser("u4"), + makeAssistant("a4"), + makeToolResult({ + toolCallId: "t4", + toolName: "bash", + text: "w".repeat(20_000), + }), + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 3, + softTrimRatio: 0.0, + hardClearRatio: 0.0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 3, tailChars: 3 }, + }; + + const ctx = { + model: { contextWindow: 1000 }, + } as unknown as ExtensionContext; + + const next = pruneContextMessages({ messages, settings, ctx }); + + expect(toolText(findToolResult(next, "t2"))).toContain("y".repeat(20_000)); + expect(toolText(findToolResult(next, "t3"))).toContain("z".repeat(20_000)); + expect(toolText(findToolResult(next, "t4"))).toContain("w".repeat(20_000)); + expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]"); + }); + + it("never prunes tool results before the first user message", () => { + const settings = computeEffectiveSettings({ + mode: "aggressive", + keepLastAssistants: 0, + hardClear: { placeholder: "[cleared]" }, + }); + if (!settings) throw new Error("expected settings"); + + const messages: AgentMessage[] = [ + makeAssistant("bootstrap tool calls"), + makeToolResult({ + toolCallId: "t0", + toolName: "read", + text: "x".repeat(20_000), + }), + makeAssistant("greeting"), + makeUser("u1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "y".repeat(20_000), + }), + ]; + + const next = pruneContextMessages({ + messages, + settings, + ctx: { model: { contextWindow: 1000 } } as unknown as ExtensionContext, + isToolPrunable: () => true, + contextWindowTokensOverride: 1000, + }); + + expect(toolText(findToolResult(next, "t0"))).toBe("x".repeat(20_000)); + expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]"); + }); + + it("mode aggressive clears eligible tool results before cutoff", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + makeAssistant("a1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "x".repeat(20_000), + }), + makeToolResult({ + toolCallId: "t2", + toolName: "bash", + text: "y".repeat(20_000), + }), + makeUser("u2"), + makeAssistant("a2"), + makeToolResult({ + toolCallId: "t3", + toolName: "bash", + text: "z".repeat(20_000), + }), + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + mode: "aggressive", + keepLastAssistants: 1, + hardClear: { enabled: false, placeholder: "[cleared]" }, + }; + + const ctx = { + model: { contextWindow: 1000 }, + } as unknown as ExtensionContext; + const next = pruneContextMessages({ messages, settings, ctx }); + + expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]"); + expect(toolText(findToolResult(next, "t2"))).toBe("[cleared]"); + // Tool results after the last assistant are protected. + expect(toolText(findToolResult(next, "t3"))).toContain("z".repeat(20_000)); + }); + + it("uses contextWindow override when ctx.model is missing", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + makeAssistant("a1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "x".repeat(20_000), + }), + makeAssistant("a2"), + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 0, + softTrimRatio: 0, + hardClearRatio: 0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 3, tailChars: 3 }, + }; + + const next = pruneContextMessages({ + messages, + settings, + ctx: { model: undefined } as unknown as ExtensionContext, + contextWindowTokensOverride: 1000, + }); + + expect(toolText(findToolResult(next, "t1"))).toBe("[cleared]"); + }); + + it("reads per-session settings from registry", async () => { + const sessionManager = {}; + + setContextPruningRuntime(sessionManager, { + settings: { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 0, + softTrimRatio: 0, + hardClearRatio: 0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 3, tailChars: 3 }, + }, + contextWindowTokens: 1000, + isToolPrunable: () => true, + }); + + const messages: AgentMessage[] = [ + makeUser("u1"), + makeAssistant("a1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "x".repeat(20_000), + }), + makeAssistant("a2"), + ]; + + let handler: + | (( + event: { messages: AgentMessage[] }, + ctx: ExtensionContext, + ) => { messages: AgentMessage[] } | undefined) + | undefined; + + const api = { + on: (name: string, fn: unknown) => { + if (name === "context") { + handler = fn as typeof handler; + } + }, + appendEntry: (_type: string, _data?: unknown) => {}, + } as unknown as ExtensionAPI; + + contextPruningExtension(api); + + if (!handler) throw new Error("missing context handler"); + + const result = handler({ messages }, { + model: undefined, + sessionManager, + } as unknown as ExtensionContext); + + if (!result) throw new Error("expected handler to return messages"); + expect(toolText(findToolResult(result.messages, "t1"))).toBe("[cleared]"); + }); + + it("respects tools allow/deny (deny wins; wildcards supported)", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "x".repeat(20_000), + }), + makeToolResult({ + toolCallId: "t2", + toolName: "browser", + text: "y".repeat(20_000), + }), + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 0, + softTrimRatio: 0.0, + hardClearRatio: 0.0, + minPrunableToolChars: 0, + tools: { allow: ["ba*"], deny: ["bash"] }, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 3, tailChars: 3 }, + }; + + const ctx = { + model: { contextWindow: 1000 }, + } as unknown as ExtensionContext; + const next = pruneContextMessages({ messages, settings, ctx }); + + // Deny wins => bash is not pruned, even though allow matches. + expect(toolText(findToolResult(next, "t1"))).toContain("x".repeat(20_000)); + // allow is non-empty and browser is not allowed => never pruned. + expect(toolText(findToolResult(next, "t2"))).toContain("y".repeat(20_000)); + }); + + it("skips tool results that contain images (no soft trim, no hard clear)", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + makeImageToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "x".repeat(20_000), + }), + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 0, + softTrimRatio: 0.0, + hardClearRatio: 0.0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 3, tailChars: 3 }, + }; + + const ctx = { + model: { contextWindow: 1000 }, + } as unknown as ExtensionContext; + const next = pruneContextMessages({ messages, settings, ctx }); + + const tool = findToolResult(next, "t1"); + if (!tool || tool.role !== "toolResult") { + throw new Error("unexpected pruned message list shape"); + } + expect(tool.content.some((b) => b.type === "image")).toBe(true); + expect(toolText(tool)).toContain("x".repeat(20_000)); + }); + + it("soft-trims across block boundaries", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + { + role: "toolResult", + toolCallId: "t1", + toolName: "bash", + content: [ + { type: "text", text: "AAAAA" }, + { type: "text", text: "BBBBB" }, + ], + isError: false, + timestamp: Date.now(), + } as unknown as AgentMessage, + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 0, + softTrimRatio: 0.0, + hardClearRatio: 10.0, + softTrim: { maxChars: 5, headChars: 7, tailChars: 3 }, + }; + + const ctx = { + model: { contextWindow: 1000 }, + } as unknown as ExtensionContext; + const next = pruneContextMessages({ messages, settings, ctx }); + + const text = toolText(findToolResult(next, "t1")); + expect(text).toContain("AAAAA\nB"); + expect(text).toContain("BBB"); + expect(text).toContain("[Tool result trimmed:"); + }); + + it("soft-trims oversized tool results and preserves head/tail with a note", () => { + const messages: AgentMessage[] = [ + makeUser("u1"), + makeToolResult({ + toolCallId: "t1", + toolName: "bash", + text: "abcdefghij".repeat(1000), + }), + ]; + + const settings = { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 0, + softTrimRatio: 0.0, + hardClearRatio: 10.0, + minPrunableToolChars: 0, + hardClear: { enabled: true, placeholder: "[cleared]" }, + softTrim: { maxChars: 10, headChars: 6, tailChars: 6 }, + }; + + const ctx = { + model: { contextWindow: 1000 }, + } as unknown as ExtensionContext; + const next = pruneContextMessages({ messages, settings, ctx }); + + const tool = findToolResult(next, "t1"); + const text = toolText(tool); + expect(text).toContain("abcdef"); + expect(text).toContain("efghij"); + expect(text).toContain("[Tool result trimmed:"); + }); +}); diff --git a/src/agents/pi-extensions/context-pruning.ts b/src/agents/pi-extensions/context-pruning.ts new file mode 100644 index 000000000..b80addb9d --- /dev/null +++ b/src/agents/pi-extensions/context-pruning.ts @@ -0,0 +1,19 @@ +/** + * Opt-in context pruning (“microcompact”-style) for Pi sessions. + * + * This only affects the in-memory context for the current request; it does not rewrite session + * history persisted on disk. + */ + +export { default } from "./context-pruning/extension.js"; + +export { pruneContextMessages } from "./context-pruning/pruner.js"; +export type { + ContextPruningConfig, + ContextPruningToolMatch, + EffectiveContextPruningSettings, +} from "./context-pruning/settings.js"; +export { + computeEffectiveSettings, + DEFAULT_CONTEXT_PRUNING_SETTINGS, +} from "./context-pruning/settings.js"; diff --git a/src/agents/pi-extensions/context-pruning/extension.ts b/src/agents/pi-extensions/context-pruning/extension.ts new file mode 100644 index 000000000..13b9a8d4b --- /dev/null +++ b/src/agents/pi-extensions/context-pruning/extension.ts @@ -0,0 +1,27 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { + ContextEvent, + ExtensionAPI, + ExtensionContext, +} from "@mariozechner/pi-coding-agent"; + +import { pruneContextMessages } from "./pruner.js"; +import { getContextPruningRuntime } from "./runtime.js"; + +export default function contextPruningExtension(api: ExtensionAPI): void { + api.on("context", (event: ContextEvent, ctx: ExtensionContext) => { + const runtime = getContextPruningRuntime(ctx.sessionManager); + if (!runtime) return undefined; + + const next = pruneContextMessages({ + messages: event.messages as AgentMessage[], + settings: runtime.settings, + ctx, + isToolPrunable: runtime.isToolPrunable, + contextWindowTokensOverride: runtime.contextWindowTokens ?? undefined, + }); + + if (next === event.messages) return undefined; + return { messages: next }; + }); +} diff --git a/src/agents/pi-extensions/context-pruning/pruner.ts b/src/agents/pi-extensions/context-pruning/pruner.ts new file mode 100644 index 000000000..589cf1bb4 --- /dev/null +++ b/src/agents/pi-extensions/context-pruning/pruner.ts @@ -0,0 +1,324 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { + ImageContent, + TextContent, + ToolResultMessage, +} from "@mariozechner/pi-ai"; +import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; + +import type { EffectiveContextPruningSettings } from "./settings.js"; +import { makeToolPrunablePredicate } from "./tools.js"; + +const CHARS_PER_TOKEN_ESTIMATE = 4; +// We currently skip pruning tool results that contain images. Still, we count them (approx.) so +// we start trimming prunable tool results earlier when image-heavy context is consuming the window. +const IMAGE_CHAR_ESTIMATE = 8_000; + +function asText(text: string): TextContent { + return { type: "text", text }; +} + +function collectTextSegments( + content: ReadonlyArray, +): string[] { + const parts: string[] = []; + for (const block of content) { + if (block.type === "text") parts.push(block.text); + } + return parts; +} + +function estimateJoinedTextLength(parts: string[]): number { + if (parts.length === 0) return 0; + let len = 0; + for (const p of parts) len += p.length; + // Joined with "\n" separators between blocks. + len += Math.max(0, parts.length - 1); + return len; +} + +function takeHeadFromJoinedText(parts: string[], maxChars: number): string { + if (maxChars <= 0 || parts.length === 0) return ""; + let remaining = maxChars; + let out = ""; + for (let i = 0; i < parts.length && remaining > 0; i++) { + if (i > 0) { + out += "\n"; + remaining -= 1; + if (remaining <= 0) break; + } + const p = parts[i]; + if (p.length <= remaining) { + out += p; + remaining -= p.length; + } else { + out += p.slice(0, remaining); + remaining = 0; + } + } + return out; +} + +function takeTailFromJoinedText(parts: string[], maxChars: number): string { + if (maxChars <= 0 || parts.length === 0) return ""; + let remaining = maxChars; + const out: string[] = []; + for (let i = parts.length - 1; i >= 0 && remaining > 0; i--) { + const p = parts[i]; + if (p.length <= remaining) { + out.push(p); + remaining -= p.length; + } else { + out.push(p.slice(p.length - remaining)); + remaining = 0; + break; + } + if (remaining > 0 && i > 0) { + out.push("\n"); + remaining -= 1; + } + } + out.reverse(); + return out.join(""); +} + +function hasImageBlocks( + content: ReadonlyArray, +): boolean { + for (const block of content) { + if (block.type === "image") return true; + } + return false; +} + +function estimateMessageChars(message: AgentMessage): number { + if (message.role === "user") { + const content = message.content; + if (typeof content === "string") return content.length; + let chars = 0; + for (const b of content) { + if (b.type === "text") chars += b.text.length; + if (b.type === "image") chars += IMAGE_CHAR_ESTIMATE; + } + return chars; + } + + if (message.role === "assistant") { + let chars = 0; + for (const b of message.content) { + if (b.type === "text") chars += b.text.length; + if (b.type === "thinking") chars += b.thinking.length; + if (b.type === "toolCall") { + try { + chars += JSON.stringify(b.arguments ?? {}).length; + } catch { + chars += 128; + } + } + } + return chars; + } + + if (message.role === "toolResult") { + let chars = 0; + for (const b of message.content) { + if (b.type === "text") chars += b.text.length; + if (b.type === "image") chars += IMAGE_CHAR_ESTIMATE; + } + return chars; + } + + return 256; +} + +function estimateContextChars(messages: AgentMessage[]): number { + return messages.reduce((sum, m) => sum + estimateMessageChars(m), 0); +} + +function findAssistantCutoffIndex( + messages: AgentMessage[], + keepLastAssistants: number, +): number | null { + // keepLastAssistants <= 0 => everything is potentially prunable. + if (keepLastAssistants <= 0) return messages.length; + + let remaining = keepLastAssistants; + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role !== "assistant") continue; + remaining--; + if (remaining === 0) return i; + } + + // Not enough assistant messages to establish a protected tail. + return null; +} + +function findFirstUserIndex(messages: AgentMessage[]): number | null { + for (let i = 0; i < messages.length; i++) { + if (messages[i]?.role === "user") return i; + } + return null; +} + +function softTrimToolResultMessage(params: { + msg: ToolResultMessage; + settings: EffectiveContextPruningSettings; +}): ToolResultMessage | null { + const { msg, settings } = params; + // Ignore image tool results for now: these are often directly relevant and hard to partially prune safely. + if (hasImageBlocks(msg.content)) return null; + + const parts = collectTextSegments(msg.content); + const rawLen = estimateJoinedTextLength(parts); + if (rawLen <= settings.softTrim.maxChars) return null; + + const headChars = Math.max(0, settings.softTrim.headChars); + const tailChars = Math.max(0, settings.softTrim.tailChars); + if (headChars + tailChars >= rawLen) return null; + + const head = takeHeadFromJoinedText(parts, headChars); + const tail = takeTailFromJoinedText(parts, tailChars); + const trimmed = `${head} +... +${tail}`; + + const note = ` + +[Tool result trimmed: kept first ${headChars} chars and last ${tailChars} chars of ${rawLen} chars.]`; + + return { ...msg, content: [asText(trimmed + note)] }; +} + +export function pruneContextMessages(params: { + messages: AgentMessage[]; + settings: EffectiveContextPruningSettings; + ctx: Pick; + isToolPrunable?: (toolName: string) => boolean; + contextWindowTokensOverride?: number; +}): AgentMessage[] { + const { messages, settings, ctx } = params; + const contextWindowTokens = + typeof params.contextWindowTokensOverride === "number" && + Number.isFinite(params.contextWindowTokensOverride) && + params.contextWindowTokensOverride > 0 + ? params.contextWindowTokensOverride + : ctx.model?.contextWindow; + if (!contextWindowTokens || contextWindowTokens <= 0) return messages; + + const charWindow = contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE; + if (charWindow <= 0) return messages; + + const cutoffIndex = findAssistantCutoffIndex( + messages, + settings.keepLastAssistants, + ); + if (cutoffIndex === null) return messages; + + // Bootstrap safety: never prune anything before the first user message. This protects initial + // "identity" reads (SOUL.md, USER.md, etc.) which typically happen before the first inbound user + // message exists in the session transcript. + const firstUserIndex = findFirstUserIndex(messages); + const pruneStartIndex = + firstUserIndex === null ? messages.length : firstUserIndex; + + const isToolPrunable = + params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools); + + if (settings.mode === "aggressive") { + let next: AgentMessage[] | null = null; + + for (let i = pruneStartIndex; i < cutoffIndex; i++) { + const msg = messages[i]; + if (!msg || msg.role !== "toolResult") continue; + if (!isToolPrunable(msg.toolName)) continue; + if (hasImageBlocks(msg.content)) { + continue; + } + + const alreadyCleared = + msg.content.length === 1 && + msg.content[0]?.type === "text" && + msg.content[0].text === settings.hardClear.placeholder; + if (alreadyCleared) continue; + + const cleared: ToolResultMessage = { + ...msg, + content: [asText(settings.hardClear.placeholder)], + }; + if (!next) next = messages.slice(); + next[i] = cleared as unknown as AgentMessage; + } + + return next ?? messages; + } + + const totalCharsBefore = estimateContextChars(messages); + let totalChars = totalCharsBefore; + let ratio = totalChars / charWindow; + if (ratio < settings.softTrimRatio) { + return messages; + } + + const prunableToolIndexes: number[] = []; + let next: AgentMessage[] | null = null; + + for (let i = pruneStartIndex; i < cutoffIndex; i++) { + const msg = messages[i]; + if (!msg || msg.role !== "toolResult") continue; + if (!isToolPrunable(msg.toolName)) continue; + if (hasImageBlocks(msg.content)) { + continue; + } + prunableToolIndexes.push(i); + + const updated = softTrimToolResultMessage({ + msg: msg as unknown as ToolResultMessage, + settings, + }); + if (!updated) continue; + + const beforeChars = estimateMessageChars(msg); + const afterChars = estimateMessageChars(updated as unknown as AgentMessage); + totalChars += afterChars - beforeChars; + if (!next) next = messages.slice(); + next[i] = updated as unknown as AgentMessage; + } + + const outputAfterSoftTrim = next ?? messages; + ratio = totalChars / charWindow; + if (ratio < settings.hardClearRatio) { + return outputAfterSoftTrim; + } + if (!settings.hardClear.enabled) { + return outputAfterSoftTrim; + } + + let prunableToolChars = 0; + for (const i of prunableToolIndexes) { + const msg = outputAfterSoftTrim[i]; + if (!msg || msg.role !== "toolResult") continue; + prunableToolChars += estimateMessageChars(msg); + } + if (prunableToolChars < settings.minPrunableToolChars) { + return outputAfterSoftTrim; + } + + for (const i of prunableToolIndexes) { + if (ratio < settings.hardClearRatio) break; + const msg = (next ?? messages)[i]; + if (!msg || msg.role !== "toolResult") continue; + + const beforeChars = estimateMessageChars(msg); + const cleared: ToolResultMessage = { + ...msg, + content: [asText(settings.hardClear.placeholder)], + }; + if (!next) next = messages.slice(); + next[i] = cleared as unknown as AgentMessage; + const afterChars = estimateMessageChars(cleared as unknown as AgentMessage); + totalChars += afterChars - beforeChars; + ratio = totalChars / charWindow; + } + + return next ?? messages; +} diff --git a/src/agents/pi-extensions/context-pruning/runtime.ts b/src/agents/pi-extensions/context-pruning/runtime.ts new file mode 100644 index 000000000..b497e6383 --- /dev/null +++ b/src/agents/pi-extensions/context-pruning/runtime.ts @@ -0,0 +1,39 @@ +import type { EffectiveContextPruningSettings } from "./settings.js"; + +export type ContextPruningRuntimeValue = { + settings: EffectiveContextPruningSettings; + contextWindowTokens?: number | null; + isToolPrunable: (toolName: string) => boolean; +}; + +// Session-scoped runtime registry keyed by object identity. +// Important: this relies on Pi passing the same SessionManager object instance into +// ExtensionContext (ctx.sessionManager) that we used when calling setContextPruningRuntime. +const REGISTRY = new WeakMap(); + +export function setContextPruningRuntime( + sessionManager: unknown, + value: ContextPruningRuntimeValue | null, +): void { + if (!sessionManager || typeof sessionManager !== "object") { + return; + } + + const key = sessionManager as object; + if (value === null) { + REGISTRY.delete(key); + return; + } + + REGISTRY.set(key, value); +} + +export function getContextPruningRuntime( + sessionManager: unknown, +): ContextPruningRuntimeValue | null { + if (!sessionManager || typeof sessionManager !== "object") { + return null; + } + + return REGISTRY.get(sessionManager as object) ?? null; +} diff --git a/src/agents/pi-extensions/context-pruning/settings.ts b/src/agents/pi-extensions/context-pruning/settings.ts new file mode 100644 index 000000000..f3bb6de83 --- /dev/null +++ b/src/agents/pi-extensions/context-pruning/settings.ts @@ -0,0 +1,135 @@ +export type ContextPruningToolMatch = { + allow?: string[]; + deny?: string[]; +}; + +export type ContextPruningMode = "off" | "adaptive" | "aggressive"; + +export type ContextPruningConfig = { + mode?: ContextPruningMode; + keepLastAssistants?: number; + softTrimRatio?: number; + hardClearRatio?: number; + minPrunableToolChars?: number; + tools?: ContextPruningToolMatch; + softTrim?: { + maxChars?: number; + headChars?: number; + tailChars?: number; + }; + hardClear?: { + enabled?: boolean; + placeholder?: string; + }; +}; + +export type EffectiveContextPruningSettings = { + mode: Exclude; + keepLastAssistants: number; + softTrimRatio: number; + hardClearRatio: number; + minPrunableToolChars: number; + tools: ContextPruningToolMatch; + softTrim: { + maxChars: number; + headChars: number; + tailChars: number; + }; + hardClear: { + enabled: boolean; + placeholder: string; + }; +}; + +export const DEFAULT_CONTEXT_PRUNING_SETTINGS: EffectiveContextPruningSettings = + { + mode: "adaptive", + keepLastAssistants: 3, + softTrimRatio: 0.3, + hardClearRatio: 0.5, + minPrunableToolChars: 50_000, + tools: {}, + softTrim: { + maxChars: 4_000, + headChars: 1_500, + tailChars: 1_500, + }, + hardClear: { + enabled: true, + placeholder: "[Old tool result content cleared]", + }, + }; + +export function computeEffectiveSettings( + raw: unknown, +): EffectiveContextPruningSettings | null { + if (!raw || typeof raw !== "object") return null; + const cfg = raw as ContextPruningConfig; + if (cfg.mode !== "adaptive" && cfg.mode !== "aggressive") return null; + + const s: EffectiveContextPruningSettings = structuredClone( + DEFAULT_CONTEXT_PRUNING_SETTINGS, + ); + s.mode = cfg.mode; + + if ( + typeof cfg.keepLastAssistants === "number" && + Number.isFinite(cfg.keepLastAssistants) + ) { + s.keepLastAssistants = Math.max(0, Math.floor(cfg.keepLastAssistants)); + } + if ( + typeof cfg.softTrimRatio === "number" && + Number.isFinite(cfg.softTrimRatio) + ) { + s.softTrimRatio = Math.min(1, Math.max(0, cfg.softTrimRatio)); + } + if ( + typeof cfg.hardClearRatio === "number" && + Number.isFinite(cfg.hardClearRatio) + ) { + s.hardClearRatio = Math.min(1, Math.max(0, cfg.hardClearRatio)); + } + if ( + typeof cfg.minPrunableToolChars === "number" && + Number.isFinite(cfg.minPrunableToolChars) + ) { + s.minPrunableToolChars = Math.max(0, Math.floor(cfg.minPrunableToolChars)); + } + if (cfg.tools) { + s.tools = cfg.tools; + } + if (cfg.softTrim) { + if ( + typeof cfg.softTrim.maxChars === "number" && + Number.isFinite(cfg.softTrim.maxChars) + ) { + s.softTrim.maxChars = Math.max(0, Math.floor(cfg.softTrim.maxChars)); + } + if ( + typeof cfg.softTrim.headChars === "number" && + Number.isFinite(cfg.softTrim.headChars) + ) { + s.softTrim.headChars = Math.max(0, Math.floor(cfg.softTrim.headChars)); + } + if ( + typeof cfg.softTrim.tailChars === "number" && + Number.isFinite(cfg.softTrim.tailChars) + ) { + s.softTrim.tailChars = Math.max(0, Math.floor(cfg.softTrim.tailChars)); + } + } + if (cfg.hardClear) { + if (s.mode === "adaptive" && typeof cfg.hardClear.enabled === "boolean") { + s.hardClear.enabled = cfg.hardClear.enabled; + } + if ( + typeof cfg.hardClear.placeholder === "string" && + cfg.hardClear.placeholder.trim() + ) { + s.hardClear.placeholder = cfg.hardClear.placeholder.trim(); + } + } + + return s; +} diff --git a/src/agents/pi-extensions/context-pruning/tools.ts b/src/agents/pi-extensions/context-pruning/tools.ts new file mode 100644 index 000000000..81b064767 --- /dev/null +++ b/src/agents/pi-extensions/context-pruning/tools.ts @@ -0,0 +1,46 @@ +import type { ContextPruningToolMatch } from "./settings.js"; + +function normalizePatterns(patterns?: string[]): string[] { + if (!Array.isArray(patterns)) return []; + return patterns.map((p) => String(p ?? "").trim()).filter(Boolean); +} + +type CompiledPattern = + | { kind: "all" } + | { kind: "exact"; value: string } + | { kind: "regex"; value: RegExp }; + +function compilePattern(pattern: string): CompiledPattern { + if (pattern === "*") return { kind: "all" }; + if (!pattern.includes("*")) return { kind: "exact", value: pattern }; + + const escaped = pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const re = new RegExp(`^${escaped.replaceAll("\\*", ".*")}$`); + return { kind: "regex", value: re }; +} + +function compilePatterns(patterns?: string[]): CompiledPattern[] { + return normalizePatterns(patterns).map(compilePattern); +} + +function matchesAny(toolName: string, patterns: CompiledPattern[]): boolean { + for (const p of patterns) { + if (p.kind === "all") return true; + if (p.kind === "exact" && toolName === p.value) return true; + if (p.kind === "regex" && p.value.test(toolName)) return true; + } + return false; +} + +export function makeToolPrunablePredicate( + match: ContextPruningToolMatch, +): (toolName: string) => boolean { + const deny = compilePatterns(match.deny); + const allow = compilePatterns(match.allow); + + return (toolName: string) => { + if (matchesAny(toolName, deny)) return false; + if (allow.length === 0) return true; + return matchesAny(toolName, allow); + }; +} diff --git a/src/agents/pi-tool-definition-adapter.test.ts b/src/agents/pi-tool-definition-adapter.test.ts index 27a101002..48700ad28 100644 --- a/src/agents/pi-tool-definition-adapter.test.ts +++ b/src/agents/pi-tool-definition-adapter.test.ts @@ -22,6 +22,7 @@ describe("pi tool definition adapter", () => { status: "error", tool: "boom", }); - expect(JSON.stringify(result.details)).toContain("nope"); + expect(result.details).toMatchObject({ error: "nope" }); + expect(JSON.stringify(result.details)).not.toContain("\n at "); }); }); diff --git a/src/agents/pi-tool-definition-adapter.ts b/src/agents/pi-tool-definition-adapter.ts index df8b64d8d..9f4451625 100644 --- a/src/agents/pi-tool-definition-adapter.ts +++ b/src/agents/pi-tool-definition-adapter.ts @@ -4,12 +4,23 @@ import type { AgentToolUpdateCallback, } from "@mariozechner/pi-agent-core"; import type { ToolDefinition } from "@mariozechner/pi-coding-agent"; -import { logError } from "../logger.js"; +import { logDebug, logError } from "../logger.js"; import { jsonResult } from "./tools/common.js"; // biome-ignore lint/suspicious/noExplicitAny: TypeBox schema type from pi-agent-core uses a different module instance. type AnyAgentTool = AgentTool; +function describeToolExecutionError(err: unknown): { + message: string; + stack?: string; +} { + if (err instanceof Error) { + const message = err.message?.trim() ? err.message : String(err); + return { message, stack: err.stack }; + } + return { message: String(err) }; +} + export function toToolDefinitions(tools: AnyAgentTool[]): ToolDefinition[] { return tools.map((tool) => { const name = tool.name || "tool"; @@ -37,13 +48,15 @@ export function toToolDefinitions(tools: AnyAgentTool[]): ToolDefinition[] { ? String((err as { name?: unknown }).name) : ""; if (name === "AbortError") throw err; - const message = - err instanceof Error ? (err.stack ?? err.message) : String(err); - logError(`[tools] ${tool.name} failed: ${message}`); + const described = describeToolExecutionError(err); + if (described.stack && described.stack !== described.message) { + logDebug(`tools: ${tool.name} failed stack:\n${described.stack}`); + } + logError(`[tools] ${tool.name} failed: ${described.message}`); return jsonResult({ status: "error", tool: tool.name, - error: message, + error: described.message, }); } }, diff --git a/src/agents/pi-tools-agent-config.test.ts b/src/agents/pi-tools-agent-config.test.ts index 65c429781..a89b27c69 100644 --- a/src/agents/pi-tools-agent-config.test.ts +++ b/src/agents/pi-tools-agent-config.test.ts @@ -114,7 +114,7 @@ describe("Agent-specific tool filtering", () => { expect(familyToolNames).not.toContain("edit"); }); - it("should combine global and agent-specific deny lists", () => { + it("should prefer agent-specific tool policy over global", () => { const cfg: ClawdbotConfig = { agent: { tools: { @@ -126,7 +126,7 @@ describe("Agent-specific tool filtering", () => { work: { workspace: "~/clawd-work", tools: { - deny: ["bash", "process"], // Agent deny + deny: ["bash", "process"], // Agent deny (override) }, }, }, @@ -141,8 +141,8 @@ describe("Agent-specific tool filtering", () => { }); const toolNames = tools.map((t) => t.name); - // Both global and agent denies should be applied - expect(toolNames).not.toContain("browser"); + // Agent policy overrides global: browser is allowed again + expect(toolNames).toContain("browser"); expect(toolNames).not.toContain("bash"); expect(toolNames).not.toContain("process"); }); @@ -213,4 +213,30 @@ describe("Agent-specific tool filtering", () => { expect(toolNames).not.toContain("bash"); expect(toolNames).not.toContain("write"); }); + + it("should run bash synchronously when process is denied", async () => { + const cfg: ClawdbotConfig = { + agent: { + tools: { + deny: ["process"], + }, + }, + }; + + const tools = createClawdbotCodingTools({ + config: cfg, + sessionKey: "agent:main:main", + workspaceDir: "/tmp/test-main", + agentDir: "/tmp/agent-main", + }); + const bash = tools.find((tool) => tool.name === "bash"); + expect(bash).toBeDefined(); + + const result = await bash?.execute("call1", { + command: "node -e \"setTimeout(() => { console.log('done') }, 50)\"", + yieldMs: 10, + }); + + expect(result?.details.status).toBe("completed"); + }); }); diff --git a/src/agents/pi-tools.test.ts b/src/agents/pi-tools.test.ts index 566e85659..d805eff0c 100644 --- a/src/agents/pi-tools.test.ts +++ b/src/agents/pi-tools.test.ts @@ -31,6 +31,39 @@ describe("createClawdbotCodingTools", () => { expect(parameters.required ?? []).toContain("action"); }); + it("flattens anyOf-of-literals to enum for provider compatibility", () => { + const tools = createClawdbotCodingTools(); + const browser = tools.find((tool) => tool.name === "browser"); + expect(browser).toBeDefined(); + + const parameters = browser?.parameters as { + properties?: Record; + }; + const action = parameters.properties?.action as + | { + type?: unknown; + enum?: unknown[]; + anyOf?: unknown[]; + } + | undefined; + + expect(action?.type).toBe("string"); + expect(action?.anyOf).toBeUndefined(); + expect(Array.isArray(action?.enum)).toBe(true); + expect(action?.enum).toContain("act"); + + const format = parameters.properties?.format as + | { + type?: unknown; + enum?: unknown[]; + anyOf?: unknown[]; + } + | undefined; + expect(format?.type).toBe("string"); + expect(format?.anyOf).toBeUndefined(); + expect(format?.enum).toEqual(["aria", "ai"]); + }); + it("preserves action enums in normalized schemas", () => { const tools = createClawdbotCodingTools(); const toolNames = ["browser", "canvas", "nodes", "cron", "gateway"]; diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index 80de703fd..5e67bab0c 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -154,12 +154,73 @@ function mergePropertySchemas(existing: unknown, incoming: unknown): unknown { return existing; } +// Check if an anyOf array contains only literal values that can be flattened +// TypeBox Type.Literal generates { const: "value", type: "string" } +// Some schemas may use { enum: ["value"], type: "string" } +// Both patterns are flattened to { type: "string", enum: ["a", "b", ...] } +function tryFlattenLiteralAnyOf( + anyOf: unknown[], +): { type: string; enum: unknown[] } | null { + if (anyOf.length === 0) return null; + + const allValues: unknown[] = []; + let commonType: string | null = null; + + for (const variant of anyOf) { + if (!variant || typeof variant !== "object") return null; + const v = variant as Record; + + // Extract the literal value - either from const or single-element enum + let literalValue: unknown; + if ("const" in v) { + literalValue = v.const; + } else if (Array.isArray(v.enum) && v.enum.length === 1) { + literalValue = v.enum[0]; + } else { + return null; // Not a literal pattern + } + + // Must have consistent type (usually "string") + const variantType = typeof v.type === "string" ? v.type : null; + if (!variantType) return null; + if (commonType === null) commonType = variantType; + else if (commonType !== variantType) return null; + + allValues.push(literalValue); + } + + if (commonType && allValues.length > 0) { + return { type: commonType, enum: allValues }; + } + return null; +} + function cleanSchemaForGemini(schema: unknown): unknown { if (!schema || typeof schema !== "object") return schema; if (Array.isArray(schema)) return schema.map(cleanSchemaForGemini); const obj = schema as Record; const hasAnyOf = "anyOf" in obj && Array.isArray(obj.anyOf); + + // Try to flatten anyOf of literals to a single enum BEFORE processing + // This handles Type.Union([Type.Literal("a"), Type.Literal("b")]) patterns + if (hasAnyOf) { + const flattened = tryFlattenLiteralAnyOf(obj.anyOf as unknown[]); + if (flattened) { + // Return flattened enum, preserving metadata (description, title, default, examples) + const result: Record = { + type: flattened.type, + enum: flattened.enum, + }; + for (const key of ["description", "title", "default", "examples"]) { + if (key in obj && obj[key] !== undefined) { + result[key] = obj[key]; + } + } + return result; + } + } + const cleaned: Record = {}; for (const [key, value] of Object.entries(obj)) { @@ -371,6 +432,43 @@ function filterToolsByPolicy( }); } +function resolveEffectiveToolPolicy(params: { + config?: ClawdbotConfig; + sessionKey?: string; +}) { + const agentId = params.sessionKey + ? resolveAgentIdFromSessionKey(params.sessionKey) + : undefined; + const agentConfig = + params.config && agentId + ? resolveAgentConfig(params.config, agentId) + : undefined; + const hasAgentTools = agentConfig?.tools !== undefined; + const globalTools = params.config?.agent?.tools; + return { + agentId, + policy: hasAgentTools ? agentConfig?.tools : globalTools, + }; +} + +function isToolAllowedByPolicy(name: string, policy?: SandboxToolPolicy) { + if (!policy) return true; + const deny = new Set(normalizeToolNames(policy.deny)); + const allowRaw = normalizeToolNames(policy.allow); + const allow = allowRaw.length > 0 ? new Set(allowRaw) : null; + const normalized = name.trim().toLowerCase(); + if (deny.has(normalized)) return false; + if (allow) return allow.has(normalized); + return true; +} + +function isToolAllowedByPolicies( + name: string, + policies: Array, +) { + return policies.every((policy) => isToolAllowedByPolicy(name, policy)); +} + function wrapSandboxPathGuard(tool: AnyAgentTool, root: string): AnyAgentTool { return { ...tool, @@ -409,8 +507,13 @@ function createWhatsAppLoginTool(): AnyAgentTool { name: "whatsapp_login", description: "Generate a WhatsApp QR code for linking, or wait for the scan to complete.", + // NOTE: Using Type.Unsafe for action enum instead of Type.Union([Type.Literal(...)]) + // because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema. parameters: Type.Object({ - action: Type.Union([Type.Literal("start"), Type.Literal("wait")]), + action: Type.Unsafe<"start" | "wait">({ + type: "string", + enum: ["start", "wait"], + }), timeoutMs: Type.Optional(Type.Number()), force: Type.Optional(Type.Boolean()), }), @@ -529,6 +632,21 @@ export function createClawdbotCodingTools(options?: { }): AnyAgentTool[] { const bashToolName = "bash"; const sandbox = options?.sandbox?.enabled ? options.sandbox : undefined; + const { agentId, policy: effectiveToolsPolicy } = resolveEffectiveToolPolicy({ + config: options?.config, + sessionKey: options?.sessionKey, + }); + const scopeKey = + options?.bash?.scopeKey ?? (agentId ? `agent:${agentId}` : undefined); + const subagentPolicy = + isSubagentSessionKey(options?.sessionKey) && options?.sessionKey + ? resolveSubagentToolPolicy(options.config) + : undefined; + const allowBackground = isToolAllowedByPolicies("process", [ + effectiveToolsPolicy, + sandbox?.tools, + subagentPolicy, + ]); const sandboxRoot = sandbox?.workspaceDir; const allowWorkspaceWrites = sandbox?.workspaceAccess !== "ro"; const base = (codingTools as unknown as AnyAgentTool[]).flatMap((tool) => { @@ -545,6 +663,8 @@ export function createClawdbotCodingTools(options?: { }); const bashTool = createBashTool({ ...options?.bash, + allowBackground, + scopeKey, sandbox: sandbox ? { containerName: sandbox.containerName, @@ -556,6 +676,7 @@ export function createClawdbotCodingTools(options?: { }); const processTool = createProcessTool({ cleanupMs: options?.bash?.cleanupMs, + scopeKey, }); const tools: AnyAgentTool[] = [ ...base, @@ -590,33 +711,15 @@ export function createClawdbotCodingTools(options?: { if (tool.name === "whatsapp") return allowWhatsApp; return true; }); - const globallyFiltered = - options?.config?.agent?.tools && - (options.config.agent.tools.allow?.length || - options.config.agent.tools.deny?.length) - ? filterToolsByPolicy(filtered, options.config.agent.tools) - : filtered; - - // Agent-specific tool policy - let agentFiltered = globallyFiltered; - if (options?.sessionKey && options?.config) { - const agentId = resolveAgentIdFromSessionKey(options.sessionKey); - const agentConfig = resolveAgentConfig(options.config, agentId); - if (agentConfig?.tools) { - agentFiltered = filterToolsByPolicy(globallyFiltered, agentConfig.tools); - } - } - + const toolsFiltered = effectiveToolsPolicy + ? filterToolsByPolicy(filtered, effectiveToolsPolicy) + : filtered; const sandboxed = sandbox - ? filterToolsByPolicy(agentFiltered, sandbox.tools) - : agentFiltered; - const subagentFiltered = - isSubagentSessionKey(options?.sessionKey) && options?.sessionKey - ? filterToolsByPolicy( - sandboxed, - resolveSubagentToolPolicy(options.config), - ) - : sandboxed; + ? filterToolsByPolicy(toolsFiltered, sandbox.tools) + : toolsFiltered; + const subagentFiltered = subagentPolicy + ? filterToolsByPolicy(sandboxed, subagentPolicy) + : sandboxed; // Always normalize tool JSON Schemas before handing them to pi-agent/pi-ai. // Without this, some providers (notably OpenAI) will reject root-level union schemas. return subagentFiltered.map(normalizeToolParameters); diff --git a/src/agents/tool-display.json b/src/agents/tool-display.json index fb02b91f4..67ecb83ae 100644 --- a/src/agents/tool-display.json +++ b/src/agents/tool-display.json @@ -168,7 +168,7 @@ "sessions_spawn": { "emoji": "🧑‍🔧", "title": "Sub-agent", - "detailKeys": ["label", "timeoutSeconds", "cleanup"] + "detailKeys": ["label", "runTimeoutSeconds", "cleanup"] }, "whatsapp_login": { "emoji": "🟢", diff --git a/src/agents/tools/browser-tool.ts b/src/agents/tools/browser-tool.ts index 8681e3abb..6e997a1ae 100644 --- a/src/agents/tools/browser-tool.ts +++ b/src/agents/tools/browser-tool.ts @@ -28,74 +28,61 @@ import { readStringParam, } from "./common.js"; -const BrowserActSchema = Type.Union([ - Type.Object({ - kind: Type.Literal("click"), - ref: Type.String(), - targetId: Type.Optional(Type.String()), - doubleClick: Type.Optional(Type.Boolean()), - button: Type.Optional(Type.String()), - modifiers: Type.Optional(Type.Array(Type.String())), +const BROWSER_ACT_KINDS = [ + "click", + "type", + "press", + "hover", + "drag", + "select", + "fill", + "resize", + "wait", + "evaluate", + "close", +] as const; + +type BrowserActKind = (typeof BROWSER_ACT_KINDS)[number]; + +// NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...]) +// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema. +// The discriminator (kind) determines which properties are relevant; runtime validates. +const BrowserActSchema = Type.Object({ + kind: Type.Unsafe({ + type: "string", + enum: [...BROWSER_ACT_KINDS], }), - Type.Object({ - kind: Type.Literal("type"), - ref: Type.String(), - text: Type.String(), - targetId: Type.Optional(Type.String()), - submit: Type.Optional(Type.Boolean()), - slowly: Type.Optional(Type.Boolean()), - }), - Type.Object({ - kind: Type.Literal("press"), - key: Type.String(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("hover"), - ref: Type.String(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("drag"), - startRef: Type.String(), - endRef: Type.String(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("select"), - ref: Type.String(), - values: Type.Array(Type.String()), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("fill"), - fields: Type.Array(Type.Record(Type.String(), Type.Unknown())), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("resize"), - width: Type.Number(), - height: Type.Number(), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("wait"), - timeMs: Type.Optional(Type.Number()), - text: Type.Optional(Type.String()), - textGone: Type.Optional(Type.String()), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("evaluate"), - fn: Type.String(), - ref: Type.Optional(Type.String()), - targetId: Type.Optional(Type.String()), - }), - Type.Object({ - kind: Type.Literal("close"), - targetId: Type.Optional(Type.String()), - }), -]); + // Common fields + targetId: Type.Optional(Type.String()), + ref: Type.Optional(Type.String()), + // click + doubleClick: Type.Optional(Type.Boolean()), + button: Type.Optional(Type.String()), + modifiers: Type.Optional(Type.Array(Type.String())), + // type + text: Type.Optional(Type.String()), + submit: Type.Optional(Type.Boolean()), + slowly: Type.Optional(Type.Boolean()), + // press + key: Type.Optional(Type.String()), + // drag + startRef: Type.Optional(Type.String()), + endRef: Type.Optional(Type.String()), + // select + values: Type.Optional(Type.Array(Type.String())), + // fill - use permissive array of objects + fields: Type.Optional( + Type.Array(Type.Object({}, { additionalProperties: true })), + ), + // resize + width: Type.Optional(Type.Number()), + height: Type.Optional(Type.Number()), + // wait + timeMs: Type.Optional(Type.Number()), + textGone: Type.Optional(Type.String()), + // evaluate + fn: Type.Optional(Type.String()), +}); // IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`. // A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`), diff --git a/src/agents/tools/sessions-spawn-tool.ts b/src/agents/tools/sessions-spawn-tool.ts index ea19f370a..fe983feb5 100644 --- a/src/agents/tools/sessions-spawn-tool.ts +++ b/src/agents/tools/sessions-spawn-tool.ts @@ -9,15 +9,8 @@ import { normalizeAgentId, parseAgentSessionKey, } from "../../routing/session-key.js"; -import { - buildSubagentSystemPrompt, - runSubagentAnnounceFlow, -} from "../subagent-announce.js"; -import { - beginSubagentAnnounce, - registerSubagentRun, -} from "../subagent-registry.js"; -import { readLatestAssistantReply } from "./agent-step.js"; +import { buildSubagentSystemPrompt } from "../subagent-announce.js"; +import { registerSubagentRun } from "../subagent-registry.js"; import type { AnyAgentTool } from "./common.js"; import { jsonResult, readStringParam } from "./common.js"; import { @@ -30,6 +23,8 @@ const SessionsSpawnToolSchema = Type.Object({ task: Type.String(), label: Type.Optional(Type.String()), model: Type.Optional(Type.String()), + runTimeoutSeconds: Type.Optional(Type.Integer({ minimum: 0 })), + // Back-compat alias. Prefer runTimeoutSeconds. timeoutSeconds: Type.Optional(Type.Integer({ minimum: 0 })), cleanup: Type.Optional( Type.Union([Type.Literal("delete"), Type.Literal("keep")]), @@ -56,12 +51,20 @@ export function createSessionsSpawnTool(opts?: { params.cleanup === "keep" || params.cleanup === "delete" ? (params.cleanup as "keep" | "delete") : "keep"; - const timeoutSeconds = - typeof params.timeoutSeconds === "number" && - Number.isFinite(params.timeoutSeconds) - ? Math.max(0, Math.floor(params.timeoutSeconds)) - : 0; - const timeoutMs = timeoutSeconds * 1000; + const runTimeoutSeconds = (() => { + const explicit = + typeof params.runTimeoutSeconds === "number" && + Number.isFinite(params.runTimeoutSeconds) + ? Math.max(0, Math.floor(params.runTimeoutSeconds)) + : undefined; + if (explicit !== undefined) return explicit; + const legacy = + typeof params.timeoutSeconds === "number" && + Number.isFinite(params.timeoutSeconds) + ? Math.max(0, Math.floor(params.timeoutSeconds)) + : undefined; + return legacy ?? 0; + })(); let modelWarning: string | undefined; let modelApplied = false; @@ -152,6 +155,7 @@ export function createSessionsSpawnTool(opts?: { deliver: false, lane: "subagent", extraSystemPrompt: childSystemPrompt, + timeout: runTimeoutSeconds > 0 ? runTimeoutSeconds : undefined, }, timeoutMs: 10_000, })) as { runId?: string }; @@ -183,109 +187,10 @@ export function createSessionsSpawnTool(opts?: { cleanup, }); - if (timeoutSeconds === 0) { - return jsonResult({ - status: "accepted", - childSessionKey, - runId: childRunId, - modelApplied: model ? modelApplied : undefined, - warning: modelWarning, - }); - } - - let waitStatus: string | undefined; - let waitError: string | undefined; - let waitStartedAt: number | undefined; - let waitEndedAt: number | undefined; - try { - const wait = (await callGateway({ - method: "agent.wait", - params: { - runId: childRunId, - timeoutMs, - }, - timeoutMs: timeoutMs + 2000, - })) as { - status?: string; - error?: string; - startedAt?: number; - endedAt?: number; - }; - waitStatus = typeof wait?.status === "string" ? wait.status : undefined; - waitError = typeof wait?.error === "string" ? wait.error : undefined; - waitStartedAt = - typeof wait?.startedAt === "number" ? wait.startedAt : undefined; - waitEndedAt = - typeof wait?.endedAt === "number" ? wait.endedAt : undefined; - } catch (err) { - const messageText = - err instanceof Error - ? err.message - : typeof err === "string" - ? err - : "error"; - return jsonResult({ - status: messageText.includes("gateway timeout") ? "timeout" : "error", - error: messageText, - childSessionKey, - runId: childRunId, - }); - } - - if (waitStatus === "timeout") { - try { - await callGateway({ - method: "chat.abort", - params: { sessionKey: childSessionKey, runId: childRunId }, - timeoutMs: 5_000, - }); - } catch { - // best-effort - } - return jsonResult({ - status: "timeout", - error: waitError, - childSessionKey, - runId: childRunId, - modelApplied: model ? modelApplied : undefined, - warning: modelWarning, - }); - } - if (waitStatus === "error") { - return jsonResult({ - status: "error", - error: waitError ?? "agent error", - childSessionKey, - runId: childRunId, - modelApplied: model ? modelApplied : undefined, - warning: modelWarning, - }); - } - - const replyText = await readLatestAssistantReply({ - sessionKey: childSessionKey, - }); - if (beginSubagentAnnounce(childRunId)) { - void runSubagentAnnounceFlow({ - childSessionKey, - childRunId, - requesterSessionKey: requesterInternalKey, - requesterProvider: opts?.agentProvider, - requesterDisplayKey, - task, - timeoutMs: 30_000, - cleanup, - roundOneReply: replyText, - startedAt: waitStartedAt, - endedAt: waitEndedAt, - }); - } - return jsonResult({ - status: "ok", + status: "accepted", childSessionKey, runId: childRunId, - reply: replyText, modelApplied: model ? modelApplied : undefined, warning: modelWarning, }); diff --git a/src/agents/tools/telegram-schema.ts b/src/agents/tools/telegram-schema.ts index b8d999817..a19bb4683 100644 --- a/src/agents/tools/telegram-schema.ts +++ b/src/agents/tools/telegram-schema.ts @@ -2,11 +2,14 @@ import { Type } from "@sinclair/typebox"; import { createReactionSchema } from "./reaction-schema.js"; +// NOTE: chatId and messageId use Type.String() instead of Type.Union([Type.String(), Type.Number()]) +// because nested anyOf schemas cause JSON Schema validation failures with Claude API on Vertex AI. +// Telegram IDs are coerced to strings at runtime in telegram-actions.ts. export const TelegramToolSchema = Type.Union([ createReactionSchema({ ids: { - chatId: Type.Union([Type.String(), Type.Number()]), - messageId: Type.Union([Type.String(), Type.Number()]), + chatId: Type.String(), + messageId: Type.String(), }, includeRemove: true, }), diff --git a/src/auto-reply/envelope.ts b/src/auto-reply/envelope.ts index 628e13e54..0130ed59d 100644 --- a/src/auto-reply/envelope.ts +++ b/src/auto-reply/envelope.ts @@ -34,3 +34,17 @@ export function formatAgentEnvelope(params: AgentEnvelopeParams): string { const header = `[${parts.join(" ")}]`; return `${header} ${params.body}`; } + +export function formatThreadStarterEnvelope(params: { + provider: string; + author?: string; + timestamp?: number | Date; + body: string; +}): string { + return formatAgentEnvelope({ + provider: params.provider, + from: params.author, + timestamp: params.timestamp, + body: params.body, + }); +} diff --git a/src/auto-reply/model.test.ts b/src/auto-reply/model.test.ts new file mode 100644 index 000000000..85a3b3560 --- /dev/null +++ b/src/auto-reply/model.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, it } from "vitest"; +import { extractModelDirective } from "./model.js"; + +describe("extractModelDirective", () => { + describe("basic /model command", () => { + it("extracts /model with argument", () => { + const result = extractModelDirective("/model gpt-5"); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("gpt-5"); + expect(result.cleaned).toBe(""); + }); + + it("extracts /model with provider/model format", () => { + const result = extractModelDirective("/model anthropic/claude-opus-4-5"); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("anthropic/claude-opus-4-5"); + }); + + it("extracts /model with profile override", () => { + const result = extractModelDirective("/model gpt-5@myprofile"); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("gpt-5"); + expect(result.rawProfile).toBe("myprofile"); + }); + + it("returns no directive for plain text", () => { + const result = extractModelDirective("hello world"); + expect(result.hasDirective).toBe(false); + expect(result.cleaned).toBe("hello world"); + }); + }); + + describe("alias shortcuts", () => { + it("recognizes /gpt as model directive when alias is configured", () => { + const result = extractModelDirective("/gpt", { + aliases: ["gpt", "sonnet", "opus"], + }); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("gpt"); + expect(result.cleaned).toBe(""); + }); + + it("recognizes /sonnet as model directive", () => { + const result = extractModelDirective("/sonnet", { + aliases: ["gpt", "sonnet", "opus"], + }); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("sonnet"); + }); + + it("recognizes alias mid-message", () => { + const result = extractModelDirective("switch to /opus please", { + aliases: ["opus"], + }); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("opus"); + expect(result.cleaned).toBe("switch to please"); + }); + + it("is case-insensitive for aliases", () => { + const result = extractModelDirective("/GPT", { aliases: ["gpt"] }); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("GPT"); + }); + + it("does not match alias without leading slash", () => { + const result = extractModelDirective("gpt is great", { + aliases: ["gpt"], + }); + expect(result.hasDirective).toBe(false); + }); + + it("does not match unknown aliases", () => { + const result = extractModelDirective("/unknown", { + aliases: ["gpt", "sonnet"], + }); + expect(result.hasDirective).toBe(false); + expect(result.cleaned).toBe("/unknown"); + }); + + it("prefers /model over alias when both present", () => { + const result = extractModelDirective("/model haiku", { + aliases: ["gpt"], + }); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("haiku"); + }); + + it("handles empty aliases array", () => { + const result = extractModelDirective("/gpt", { aliases: [] }); + expect(result.hasDirective).toBe(false); + }); + + it("handles undefined aliases", () => { + const result = extractModelDirective("/gpt"); + expect(result.hasDirective).toBe(false); + }); + }); + + describe("edge cases", () => { + it("handles alias with special regex characters", () => { + const result = extractModelDirective("/test.alias", { + aliases: ["test.alias"], + }); + expect(result.hasDirective).toBe(true); + expect(result.rawModel).toBe("test.alias"); + }); + + it("does not match partial alias", () => { + const result = extractModelDirective("/gpt-turbo", { aliases: ["gpt"] }); + expect(result.hasDirective).toBe(false); + }); + + it("handles empty body", () => { + const result = extractModelDirective("", { aliases: ["gpt"] }); + expect(result.hasDirective).toBe(false); + expect(result.cleaned).toBe(""); + }); + + it("handles undefined body", () => { + const result = extractModelDirective(undefined, { aliases: ["gpt"] }); + expect(result.hasDirective).toBe(false); + }); + }); +}); diff --git a/src/auto-reply/model.ts b/src/auto-reply/model.ts index 56bb6e19e..f85cb4ba5 100644 --- a/src/auto-reply/model.ts +++ b/src/auto-reply/model.ts @@ -1,14 +1,38 @@ -export function extractModelDirective(body?: string): { +function escapeRegExp(value: string) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +export function extractModelDirective( + body?: string, + options?: { aliases?: string[] }, +): { cleaned: string; rawModel?: string; rawProfile?: string; hasDirective: boolean; } { if (!body) return { cleaned: "", hasDirective: false }; - const match = body.match( + + const modelMatch = body.match( /(?:^|\s)\/model(?=$|\s|:)\s*:?\s*([A-Za-z0-9_.:@-]+(?:\/[A-Za-z0-9_.:@-]+)?)?/i, ); - const raw = match?.[1]?.trim(); + + const aliases = (options?.aliases ?? []) + .map((alias) => alias.trim()) + .filter(Boolean); + const aliasMatch = + modelMatch || aliases.length === 0 + ? null + : body.match( + new RegExp( + `(?:^|\\s)\\/(${aliases.map(escapeRegExp).join("|")})(?=$|\\s|:)`, + "i", + ), + ); + + const match = modelMatch ?? aliasMatch; + const raw = modelMatch ? modelMatch?.[1]?.trim() : aliasMatch?.[1]?.trim(); + let rawModel = raw; let rawProfile: string | undefined; if (raw?.includes("@")) { @@ -16,9 +40,11 @@ export function extractModelDirective(body?: string): { rawModel = parts[0]?.trim(); rawProfile = parts.slice(1).join("@").trim() || undefined; } + const cleaned = match ? body.replace(match[0], "").replace(/\s+/g, " ").trim() : body.trim(); + return { cleaned, rawModel, diff --git a/src/auto-reply/reply.directive.test.ts b/src/auto-reply/reply.directive.test.ts index 9f9105d44..a6014e8f9 100644 --- a/src/auto-reply/reply.directive.test.ts +++ b/src/auto-reply/reply.directive.test.ts @@ -144,6 +144,36 @@ describe("directive parsing", () => { expect(res.cleaned).toBe("please now"); }); + it("keeps reserved command aliases from matching after trimming", async () => { + await withTempHome(async (home) => { + vi.mocked(runEmbeddedPiAgent).mockReset(); + + const res = await getReplyFromConfig( + { + Body: "/help", + From: "+1222", + To: "+1222", + }, + {}, + { + agent: { + model: "anthropic/claude-opus-4-5", + workspace: path.join(home, "clawd"), + models: { + "anthropic/claude-opus-4-5": { alias: " help " }, + }, + }, + whatsapp: { allowFrom: ["*"] }, + session: { store: path.join(home, "sessions.json") }, + }, + ); + + const text = Array.isArray(res) ? res[0]?.text : res?.text; + expect(text).toContain("Help"); + expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); + }); + }); + it("errors on invalid queue options", async () => { await withTempHome(async (home) => { vi.mocked(runEmbeddedPiAgent).mockReset(); diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index f4cc9b445..e7f95bb0d 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -25,13 +25,16 @@ import { type ClawdbotConfig, loadConfig, } from "../config/config.js"; -import { resolveSessionTranscriptPath } from "../config/sessions.js"; +import { resolveSessionFilePath } from "../config/sessions.js"; import { logVerbose } from "../globals.js"; import { clearCommandLane, getQueueSize } from "../process/command-queue.js"; import { defaultRuntime } from "../runtime.js"; import { resolveCommandAuthorization } from "./command-auth.js"; import { hasControlCommand } from "./command-detection.js"; -import { shouldHandleTextCommands } from "./commands-registry.js"; +import { + listChatCommands, + shouldHandleTextCommands, +} from "./commands-registry.js"; import { getAbortMemory } from "./reply/abort.js"; import { runReplyAgent } from "./reply/agent-runner.js"; import { resolveBlockStreamingChunking } from "./reply/block-streaming.js"; @@ -62,6 +65,10 @@ import { prependSystemEvents, } from "./reply/session-updates.js"; import { createTypingController } from "./reply/typing.js"; +import { + createTypingSignaler, + resolveTypingMode, +} from "./reply/typing-mode.js"; import type { MsgContext, TemplateContext } from "./templating.js"; import { type ElevatedLevel, @@ -312,7 +319,18 @@ export async function getReplyFromConfig( rawDrop: undefined, hasQueueOptions: false, }); - let parsedDirectives = parseInlineDirectives(rawBody); + const reservedCommands = new Set( + listChatCommands().flatMap((cmd) => + cmd.textAliases.map((a) => a.replace(/^\//, "").toLowerCase()), + ), + ); + const configuredAliases = Object.values(cfg.agent?.models ?? {}) + .map((entry) => entry.alias?.trim()) + .filter((alias): alias is string => Boolean(alias)) + .filter((alias) => !reservedCommands.has(alias.toLowerCase())); + let parsedDirectives = parseInlineDirectives(rawBody, { + modelAliases: configuredAliases, + }); const hasDirective = parsedDirectives.hasThinkDirective || parsedDirectives.hasVerboseDirective || @@ -580,7 +598,17 @@ export async function getReplyFromConfig( const isGroupChat = sessionCtx.ChatType === "group"; const wasMentioned = ctx.WasMentioned === true; const isHeartbeat = opts?.isHeartbeat === true; - const shouldEagerType = (!isGroupChat || wasMentioned) && !isHeartbeat; + const typingMode = resolveTypingMode({ + configured: sessionCfg?.typingMode ?? agentCfg?.typingMode, + isGroupChat, + wasMentioned, + isHeartbeat, + }); + const typingSignals = createTypingSignaler({ + typing, + mode: typingMode, + isHeartbeat, + }); const shouldInjectGroupIntro = Boolean( isGroupChat && (isFirstTurnInSession || sessionEntry?.groupActivationNeedsSystemIntro), @@ -646,6 +674,11 @@ export async function getReplyFromConfig( isNewSession, prefixedBodyBase, }); + const threadStarterBody = ctx.ThreadStarterBody?.trim(); + const threadStarterNote = + isNewSession && threadStarterBody + ? `[Thread starter - for context]\n${threadStarterBody}` + : undefined; const skillResult = await ensureSkillSnapshot({ sessionEntry, sessionStore, @@ -661,10 +694,10 @@ export async function getReplyFromConfig( systemSent = skillResult.systemSent; const skillsSnapshot = skillResult.skillsSnapshot; const prefixedBody = transcribedText - ? [prefixedBodyBase, `Transcript:\n${transcribedText}`] + ? [threadStarterNote, prefixedBodyBase, `Transcript:\n${transcribedText}`] .filter(Boolean) .join("\n\n") - : prefixedBodyBase; + : [threadStarterNote, prefixedBodyBase].filter(Boolean).join("\n\n"); const mediaNote = ctx.MediaPath?.length ? `[media attached: ${ctx.MediaPath}${ctx.MediaType ? ` (${ctx.MediaType})` : ""}${ctx.MediaUrl ? ` | ${ctx.MediaUrl}` : ""}]` : undefined; @@ -689,12 +722,12 @@ export async function getReplyFromConfig( resolvedThinkLevel = await modelState.resolveDefaultThinkingLevel(); } const sessionIdFinal = sessionId ?? crypto.randomUUID(); - const sessionFile = resolveSessionTranscriptPath(sessionIdFinal); + const sessionFile = resolveSessionFilePath(sessionIdFinal, sessionEntry); const queueBodyBase = transcribedText - ? [baseBodyFinal, `Transcript:\n${transcribedText}`] + ? [threadStarterNote, baseBodyFinal, `Transcript:\n${transcribedText}`] .filter(Boolean) .join("\n\n") - : baseBodyFinal; + : [threadStarterNote, baseBodyFinal].filter(Boolean).join("\n\n"); const queuedBody = mediaNote ? [mediaNote, mediaReplyHint, queueBodyBase] .filter(Boolean) @@ -769,8 +802,8 @@ export async function getReplyFromConfig( }, }; - if (shouldEagerType) { - await typing.startTypingLoop(); + if (typingSignals.shouldStartImmediately) { + await typingSignals.signalRunStart(); } return runReplyAgent({ @@ -797,6 +830,7 @@ export async function getReplyFromConfig( resolvedBlockStreamingBreak, sessionCtx, shouldInjectGroupIntro, + typingMode, }); } diff --git a/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts b/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts index 5ae3b6ec1..19e0f362f 100644 --- a/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts +++ b/src/auto-reply/reply/agent-runner.heartbeat-typing.test.ts @@ -5,10 +5,11 @@ import { describe, expect, it, vi } from "vitest"; import type { SessionEntry } from "../../config/sessions.js"; import * as sessions from "../../config/sessions.js"; +import type { TypingMode } from "../../config/types.js"; import type { TemplateContext } from "../templating.js"; import type { GetReplyOptions } from "../types.js"; import type { FollowupRun, QueueSettings } from "./queue.js"; -import type { TypingController } from "./typing.js"; +import { createMockTypingController } from "./test-helpers.js"; const runEmbeddedPiAgentMock = vi.fn(); @@ -45,18 +46,6 @@ vi.mock("./queue.js", async () => { import { runReplyAgent } from "./agent-runner.js"; -function createTyping(): TypingController { - return { - onReplyStart: vi.fn(async () => {}), - startTypingLoop: vi.fn(async () => {}), - startTypingOnText: vi.fn(async () => {}), - refreshTypingTtl: vi.fn(), - markRunComplete: vi.fn(), - markDispatchIdle: vi.fn(), - cleanup: vi.fn(), - }; -} - type EmbeddedPiAgentParams = { onPartialReply?: (payload: { text?: string }) => Promise | void; }; @@ -68,8 +57,9 @@ function createMinimalRun(params?: { sessionEntry?: SessionEntry; sessionKey?: string; storePath?: string; + typingMode?: TypingMode; }) { - const typing = createTyping(); + const typing = createMockTypingController(); const opts = params?.opts; const sessionCtx = { Provider: "whatsapp", @@ -130,6 +120,7 @@ function createMinimalRun(params?: { blockStreamingEnabled: false, resolvedBlockStreamingBreak: "message_end", shouldInjectGroupIntro: false, + typingMode: params?.typingMode ?? "instant", }), }; } @@ -173,6 +164,63 @@ describe("runReplyAgent typing (heartbeat)", () => { expect(typing.startTypingLoop).not.toHaveBeenCalled(); }); + it("starts typing only on deltas in message mode", async () => { + runEmbeddedPiAgentMock.mockImplementationOnce(async () => ({ + payloads: [{ text: "final" }], + meta: {}, + })); + + const { run, typing } = createMinimalRun({ + typingMode: "message", + }); + await run(); + + expect(typing.startTypingOnText).not.toHaveBeenCalled(); + expect(typing.startTypingLoop).not.toHaveBeenCalled(); + }); + + it("starts typing from reasoning stream in thinking mode", async () => { + runEmbeddedPiAgentMock.mockImplementationOnce( + async (params: { + onPartialReply?: (payload: { text?: string }) => Promise | void; + onReasoningStream?: (payload: { + text?: string; + }) => Promise | void; + }) => { + await params.onReasoningStream?.({ text: "Reasoning:\nstep" }); + await params.onPartialReply?.({ text: "hi" }); + return { payloads: [{ text: "final" }], meta: {} }; + }, + ); + + const { run, typing } = createMinimalRun({ + typingMode: "thinking", + }); + await run(); + + expect(typing.startTypingLoop).toHaveBeenCalled(); + expect(typing.startTypingOnText).not.toHaveBeenCalled(); + }); + + it("suppresses typing in never mode", async () => { + runEmbeddedPiAgentMock.mockImplementationOnce( + async (params: { + onPartialReply?: (payload: { text?: string }) => void; + }) => { + params.onPartialReply?.({ text: "hi" }); + return { payloads: [{ text: "final" }], meta: {} }; + }, + ); + + const { run, typing } = createMinimalRun({ + typingMode: "never", + }); + await run(); + + expect(typing.startTypingOnText).not.toHaveBeenCalled(); + expect(typing.startTypingLoop).not.toHaveBeenCalled(); + }); + it("announces auto-compaction in verbose mode and tracks count", async () => { const storePath = path.join( await fs.mkdtemp(path.join(tmpdir(), "clawdbot-compaction-")), diff --git a/src/auto-reply/reply/agent-runner.ts b/src/auto-reply/reply/agent-runner.ts index cf64530da..358c16b28 100644 --- a/src/auto-reply/reply/agent-runner.ts +++ b/src/auto-reply/reply/agent-runner.ts @@ -14,6 +14,7 @@ import { type SessionEntry, saveSessionStore, } from "../../config/sessions.js"; +import type { TypingMode } from "../../config/types.js"; import { logVerbose } from "../../globals.js"; import { registerAgentRunContext } from "../../infra/agent-events.js"; import { defaultRuntime } from "../../runtime.js"; @@ -32,6 +33,7 @@ import { import { extractReplyToTag } from "./reply-tags.js"; import { incrementCompactionCount } from "./session-updates.js"; import type { TypingController } from "./typing.js"; +import { createTypingSignaler } from "./typing-mode.js"; const BUN_FETCH_SOCKET_ERROR_RE = /socket connection was closed unexpectedly/i; @@ -76,6 +78,7 @@ export async function runReplyAgent(params: { resolvedBlockStreamingBreak: "text_end" | "message_end"; sessionCtx: TemplateContext; shouldInjectGroupIntro: boolean; + typingMode: TypingMode; }): Promise { const { commandBody, @@ -101,9 +104,15 @@ export async function runReplyAgent(params: { resolvedBlockStreamingBreak, sessionCtx, shouldInjectGroupIntro, + typingMode, } = params; const isHeartbeat = opts?.isHeartbeat === true; + const typingSignals = createTypingSignaler({ + typing, + mode: typingMode, + isHeartbeat, + }); const shouldEmitToolResult = () => { if (!sessionKey || !storePath) { @@ -173,6 +182,7 @@ export async function runReplyAgent(params: { const runFollowupTurn = createFollowupRunner({ opts, typing, + typingMode, sessionEntry, sessionStore, sessionKey, @@ -252,23 +262,23 @@ export async function runReplyAgent(params: { } text = stripped.text; } - if (!isHeartbeat) { - await typing.startTypingOnText(text); - } + await typingSignals.signalTextDelta(text); await opts.onPartialReply?.({ text, mediaUrls: payload.mediaUrls, }); } : undefined, - onReasoningStream: opts?.onReasoningStream - ? async (payload) => { - await opts.onReasoningStream?.({ - text: payload.text, - mediaUrls: payload.mediaUrls, - }); - } - : undefined, + onReasoningStream: + typingSignals.shouldStartOnReasoning || opts?.onReasoningStream + ? async (payload) => { + await typingSignals.signalReasoningDelta(); + await opts?.onReasoningStream?.({ + text: payload.text, + mediaUrls: payload.mediaUrls, + }); + } + : undefined, onAgentEvent: (evt) => { if (evt.stream !== "compaction") return; const phase = @@ -320,9 +330,7 @@ export async function runReplyAgent(params: { } pendingStreamedPayloadKeys.add(payloadKey); const task = (async () => { - if (!isHeartbeat) { - await typing.startTypingOnText(cleaned); - } + await typingSignals.signalTextDelta(cleaned); await opts.onBlockReply?.(blockPayload); })() .then(() => { @@ -367,9 +375,7 @@ export async function runReplyAgent(params: { } text = stripped.text; } - if (!isHeartbeat) { - await typing.startTypingOnText(text); - } + await typingSignals.signalTextDelta(text); await opts.onToolResult?.({ text, mediaUrls: payload.mediaUrls, @@ -524,8 +530,8 @@ export async function runReplyAgent(params: { if (payload.mediaUrls && payload.mediaUrls.length > 0) return true; return false; }); - if (shouldSignalTyping && !isHeartbeat) { - await typing.startTypingLoop(); + if (shouldSignalTyping) { + await typingSignals.signalRunStart(); } if (sessionStore && sessionKey) { diff --git a/src/auto-reply/reply/commands.ts b/src/auto-reply/reply/commands.ts index fce665e9f..30152dda9 100644 --- a/src/auto-reply/reply/commands.ts +++ b/src/auto-reply/reply/commands.ts @@ -14,7 +14,7 @@ import { } from "../../agents/pi-embedded.js"; import type { ClawdbotConfig } from "../../config/config.js"; import { - resolveSessionTranscriptPath, + resolveSessionFilePath, type SessionEntry, type SessionScope, saveSessionStore, @@ -509,7 +509,7 @@ export async function handleCommands(params: { sessionId, sessionKey, messageProvider: command.provider, - sessionFile: resolveSessionTranscriptPath(sessionId), + sessionFile: resolveSessionFilePath(sessionId, sessionEntry), workspaceDir, config: cfg, skillsSnapshot: sessionEntry.skillsSnapshot, diff --git a/src/auto-reply/reply/directive-handling.ts b/src/auto-reply/reply/directive-handling.ts index b811248c3..5c368721c 100644 --- a/src/auto-reply/reply/directive-handling.ts +++ b/src/auto-reply/reply/directive-handling.ts @@ -181,7 +181,10 @@ export type InlineDirectives = { hasQueueOptions: boolean; }; -export function parseInlineDirectives(body: string): InlineDirectives { +export function parseInlineDirectives( + body: string, + options?: { modelAliases?: string[] }, +): InlineDirectives { const { cleaned: thinkCleaned, thinkLevel, @@ -213,7 +216,9 @@ export function parseInlineDirectives(body: string): InlineDirectives { rawModel, rawProfile, hasDirective: hasModelDirective, - } = extractModelDirective(statusCleaned); + } = extractModelDirective(statusCleaned, { + aliases: options?.modelAliases, + }); const { cleaned: queueCleaned, queueMode, diff --git a/src/auto-reply/reply/followup-runner.compaction.test.ts b/src/auto-reply/reply/followup-runner.compaction.test.ts index 6c319a310..1e4e6336b 100644 --- a/src/auto-reply/reply/followup-runner.compaction.test.ts +++ b/src/auto-reply/reply/followup-runner.compaction.test.ts @@ -5,7 +5,7 @@ import { describe, expect, it, vi } from "vitest"; import type { SessionEntry } from "../../config/sessions.js"; import type { FollowupRun } from "./queue.js"; -import type { TypingController } from "./typing.js"; +import { createMockTypingController } from "./test-helpers.js"; const runEmbeddedPiAgentMock = vi.fn(); @@ -31,18 +31,6 @@ vi.mock("../../agents/pi-embedded.js", () => ({ import { createFollowupRunner } from "./followup-runner.js"; -function createTyping(): TypingController { - return { - onReplyStart: vi.fn(async () => {}), - startTypingLoop: vi.fn(async () => {}), - startTypingOnText: vi.fn(async () => {}), - refreshTypingTtl: vi.fn(), - markRunComplete: vi.fn(), - markDispatchIdle: vi.fn(), - cleanup: vi.fn(), - }; -} - describe("createFollowupRunner compaction", () => { it("adds verbose auto-compaction notice and tracks count", async () => { const storePath = path.join( @@ -75,7 +63,8 @@ describe("createFollowupRunner compaction", () => { const runner = createFollowupRunner({ opts: { onBlockReply }, - typing: createTyping(), + typing: createMockTypingController(), + typingMode: "instant", sessionEntry, sessionStore, sessionKey: "main", diff --git a/src/auto-reply/reply/followup-runner.ts b/src/auto-reply/reply/followup-runner.ts index 026fc0e80..46fd90884 100644 --- a/src/auto-reply/reply/followup-runner.ts +++ b/src/auto-reply/reply/followup-runner.ts @@ -5,6 +5,7 @@ import { runWithModelFallback } from "../../agents/model-fallback.js"; import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js"; import { hasNonzeroUsage } from "../../agents/usage.js"; import { type SessionEntry, saveSessionStore } from "../../config/sessions.js"; +import type { TypingMode } from "../../config/types.js"; import { logVerbose } from "../../globals.js"; import { registerAgentRunContext } from "../../infra/agent-events.js"; import { defaultRuntime } from "../../runtime.js"; @@ -16,10 +17,12 @@ import { extractReplyToTag } from "./reply-tags.js"; import { isRoutableChannel, routeReply } from "./route-reply.js"; import { incrementCompactionCount } from "./session-updates.js"; import type { TypingController } from "./typing.js"; +import { createTypingSignaler } from "./typing-mode.js"; export function createFollowupRunner(params: { opts?: GetReplyOptions; typing: TypingController; + typingMode: TypingMode; sessionEntry?: SessionEntry; sessionStore?: Record; sessionKey?: string; @@ -30,6 +33,7 @@ export function createFollowupRunner(params: { const { opts, typing, + typingMode, sessionEntry, sessionStore, sessionKey, @@ -37,6 +41,11 @@ export function createFollowupRunner(params: { defaultModel, agentCfgContextTokens, } = params; + const typingSignals = createTypingSignaler({ + typing, + mode: typingMode, + isHeartbeat: opts?.isHeartbeat === true, + }); /** * Sends followup payloads, routing to the originating channel if set. @@ -71,7 +80,7 @@ export function createFollowupRunner(params: { ) { continue; } - await typing.startTypingOnText(payload.text); + await typingSignals.signalTextDelta(payload.text); // Route to originating channel if set, otherwise fall back to dispatcher. if (shouldRouteToOriginating) { @@ -99,6 +108,7 @@ export function createFollowupRunner(params: { }; return async (queued: FollowupRun) => { + await typingSignals.signalRunStart(); try { const runId = crypto.randomUUID(); if (queued.run.sessionKey) { diff --git a/src/auto-reply/reply/session.test.ts b/src/auto-reply/reply/session.test.ts new file mode 100644 index 000000000..abcfe6996 --- /dev/null +++ b/src/auto-reply/reply/session.test.ts @@ -0,0 +1,112 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; + +import { describe, expect, it } from "vitest"; + +import type { ClawdbotConfig } from "../../config/config.js"; +import { saveSessionStore } from "../../config/sessions.js"; +import { initSessionState } from "./session.js"; + +describe("initSessionState thread forking", () => { + it("forks a new session from the parent session file", async () => { + const root = await fs.mkdtemp( + path.join(os.tmpdir(), "clawdbot-thread-session-"), + ); + const sessionsDir = path.join(root, "sessions"); + await fs.mkdir(sessionsDir, { recursive: true }); + + const parentSessionId = "parent-session"; + const parentSessionFile = path.join(sessionsDir, "parent.jsonl"); + const header = { + type: "session", + version: 3, + id: parentSessionId, + timestamp: new Date().toISOString(), + cwd: process.cwd(), + }; + const message = { + type: "message", + id: "m1", + parentId: null, + timestamp: new Date().toISOString(), + message: { role: "user", content: "Parent prompt" }, + }; + await fs.writeFile( + parentSessionFile, + `${JSON.stringify(header)}\n${JSON.stringify(message)}\n`, + "utf-8", + ); + + const storePath = path.join(root, "sessions.json"); + const parentSessionKey = "agent:main:slack:channel:C1"; + await saveSessionStore(storePath, { + [parentSessionKey]: { + sessionId: parentSessionId, + sessionFile: parentSessionFile, + updatedAt: Date.now(), + }, + }); + + const cfg = { + session: { store: storePath }, + } as ClawdbotConfig; + + const threadSessionKey = "agent:main:slack:channel:C1:thread:123"; + const threadLabel = "Slack thread #general: starter"; + const result = await initSessionState({ + ctx: { + Body: "Thread reply", + SessionKey: threadSessionKey, + ParentSessionKey: parentSessionKey, + ThreadLabel: threadLabel, + }, + cfg, + commandAuthorized: true, + }); + + expect(result.sessionKey).toBe(threadSessionKey); + expect(result.sessionEntry.sessionId).not.toBe(parentSessionId); + expect(result.sessionEntry.sessionFile).toBeTruthy(); + expect(result.sessionEntry.displayName).toBe(threadLabel); + + const newSessionFile = result.sessionEntry.sessionFile; + if (!newSessionFile) { + throw new Error("Missing session file for forked thread"); + } + const [headerLine] = (await fs.readFile(newSessionFile, "utf-8")) + .split(/\r?\n/) + .filter((line) => line.trim().length > 0); + const parsedHeader = JSON.parse(headerLine) as { + parentSession?: string; + }; + expect(parsedHeader.parentSession).toBe(parentSessionFile); + }); + + it("records topic-specific session files when MessageThreadId is present", async () => { + const root = await fs.mkdtemp( + path.join(os.tmpdir(), "clawdbot-topic-session-"), + ); + const storePath = path.join(root, "sessions.json"); + + const cfg = { + session: { store: storePath }, + } as ClawdbotConfig; + + const result = await initSessionState({ + ctx: { + Body: "Hello topic", + SessionKey: "agent:main:telegram:group:123:topic:456", + MessageThreadId: 456, + }, + cfg, + commandAuthorized: true, + }); + + const sessionFile = result.sessionEntry.sessionFile; + expect(sessionFile).toBeTruthy(); + expect(path.basename(sessionFile ?? "")).toBe( + `${result.sessionEntry.sessionId}-topic-456.jsonl`, + ); + }); +}); diff --git a/src/auto-reply/reply/session.ts b/src/auto-reply/reply/session.ts index 992fb2f61..0b141d82a 100644 --- a/src/auto-reply/reply/session.ts +++ b/src/auto-reply/reply/session.ts @@ -1,5 +1,11 @@ import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; +import { + CURRENT_SESSION_VERSION, + SessionManager, +} from "@mariozechner/pi-coding-agent"; import type { ClawdbotConfig } from "../../config/config.js"; import { buildGroupDisplayName, @@ -9,7 +15,9 @@ import { loadSessionStore, resolveAgentIdFromSessionKey, resolveGroupSessionKey, + resolveSessionFilePath, resolveSessionKey, + resolveSessionTranscriptPath, resolveStorePath, type SessionEntry, type SessionScope, @@ -36,6 +44,45 @@ export type SessionInitResult = { triggerBodyNormalized: string; }; +function forkSessionFromParent(params: { + parentEntry: SessionEntry; +}): { sessionId: string; sessionFile: string } | null { + const parentSessionFile = resolveSessionFilePath( + params.parentEntry.sessionId, + params.parentEntry, + ); + if (!parentSessionFile || !fs.existsSync(parentSessionFile)) return null; + try { + const manager = SessionManager.open(parentSessionFile); + const leafId = manager.getLeafId(); + if (leafId) { + const sessionFile = + manager.createBranchedSession(leafId) ?? manager.getSessionFile(); + const sessionId = manager.getSessionId(); + if (sessionFile && sessionId) return { sessionId, sessionFile }; + } + const sessionId = crypto.randomUUID(); + const timestamp = new Date().toISOString(); + const fileTimestamp = timestamp.replace(/[:.]/g, "-"); + const sessionFile = path.join( + manager.getSessionDir(), + `${fileTimestamp}_${sessionId}.jsonl`, + ); + const header = { + type: "session", + version: CURRENT_SESSION_VERSION, + id: sessionId, + timestamp, + cwd: manager.getCwd(), + parentSession: parentSessionFile, + }; + fs.writeFileSync(sessionFile, `${JSON.stringify(header)}\n`, "utf-8"); + return { sessionId, sessionFile }; + } catch { + return null; + } +} + export async function initSessionState(params: { ctx: MsgContext; cfg: ClawdbotConfig; @@ -189,6 +236,33 @@ export async function initSessionState(params: { } else if (!sessionEntry.chatType) { sessionEntry.chatType = "direct"; } + const threadLabel = ctx.ThreadLabel?.trim(); + if (threadLabel) { + sessionEntry.displayName = threadLabel; + } + const parentSessionKey = ctx.ParentSessionKey?.trim(); + if ( + isNewSession && + parentSessionKey && + parentSessionKey !== sessionKey && + sessionStore[parentSessionKey] + ) { + const forked = forkSessionFromParent({ + parentEntry: sessionStore[parentSessionKey], + }); + if (forked) { + sessionId = forked.sessionId; + sessionEntry.sessionId = forked.sessionId; + sessionEntry.sessionFile = forked.sessionFile; + } + } + if (!sessionEntry.sessionFile) { + sessionEntry.sessionFile = resolveSessionTranscriptPath( + sessionEntry.sessionId, + agentId, + ctx.MessageThreadId, + ); + } sessionStore[sessionKey] = sessionEntry; await saveSessionStore(storePath, sessionStore); diff --git a/src/auto-reply/reply/test-helpers.ts b/src/auto-reply/reply/test-helpers.ts new file mode 100644 index 000000000..c9f49ac58 --- /dev/null +++ b/src/auto-reply/reply/test-helpers.ts @@ -0,0 +1,15 @@ +import { vi } from "vitest"; + +import type { TypingController } from "./typing.js"; + +export function createMockTypingController(): TypingController { + return { + onReplyStart: vi.fn(async () => {}), + startTypingLoop: vi.fn(async () => {}), + startTypingOnText: vi.fn(async () => {}), + refreshTypingTtl: vi.fn(), + markRunComplete: vi.fn(), + markDispatchIdle: vi.fn(), + cleanup: vi.fn(), + }; +} diff --git a/src/auto-reply/reply/typing-mode.test.ts b/src/auto-reply/reply/typing-mode.test.ts new file mode 100644 index 000000000..9d3f06e1e --- /dev/null +++ b/src/auto-reply/reply/typing-mode.test.ts @@ -0,0 +1,141 @@ +import { describe, expect, it } from "vitest"; + +import { createMockTypingController } from "./test-helpers.js"; +import { createTypingSignaler, resolveTypingMode } from "./typing-mode.js"; + +describe("resolveTypingMode", () => { + it("defaults to instant for direct chats", () => { + expect( + resolveTypingMode({ + configured: undefined, + isGroupChat: false, + wasMentioned: false, + isHeartbeat: false, + }), + ).toBe("instant"); + }); + + it("defaults to message for group chats without mentions", () => { + expect( + resolveTypingMode({ + configured: undefined, + isGroupChat: true, + wasMentioned: false, + isHeartbeat: false, + }), + ).toBe("message"); + }); + + it("defaults to instant for mentioned group chats", () => { + expect( + resolveTypingMode({ + configured: undefined, + isGroupChat: true, + wasMentioned: true, + isHeartbeat: false, + }), + ).toBe("instant"); + }); + + it("honors configured mode across contexts", () => { + expect( + resolveTypingMode({ + configured: "thinking", + isGroupChat: false, + wasMentioned: false, + isHeartbeat: false, + }), + ).toBe("thinking"); + expect( + resolveTypingMode({ + configured: "message", + isGroupChat: true, + wasMentioned: true, + isHeartbeat: false, + }), + ).toBe("message"); + }); + + it("forces never for heartbeat runs", () => { + expect( + resolveTypingMode({ + configured: "instant", + isGroupChat: false, + wasMentioned: false, + isHeartbeat: true, + }), + ).toBe("never"); + }); +}); + +describe("createTypingSignaler", () => { + it("signals immediately for instant mode", async () => { + const typing = createMockTypingController(); + const signaler = createTypingSignaler({ + typing, + mode: "instant", + isHeartbeat: false, + }); + + await signaler.signalRunStart(); + + expect(typing.startTypingLoop).toHaveBeenCalled(); + }); + + it("signals on text for message mode", async () => { + const typing = createMockTypingController(); + const signaler = createTypingSignaler({ + typing, + mode: "message", + isHeartbeat: false, + }); + + await signaler.signalTextDelta("hello"); + + expect(typing.startTypingOnText).toHaveBeenCalledWith("hello"); + expect(typing.startTypingLoop).not.toHaveBeenCalled(); + }); + + it("signals on reasoning for thinking mode", async () => { + const typing = createMockTypingController(); + const signaler = createTypingSignaler({ + typing, + mode: "thinking", + isHeartbeat: false, + }); + + await signaler.signalReasoningDelta(); + + expect(typing.startTypingLoop).toHaveBeenCalled(); + }); + + it("refreshes ttl on text for thinking mode", async () => { + const typing = createMockTypingController(); + const signaler = createTypingSignaler({ + typing, + mode: "thinking", + isHeartbeat: false, + }); + + await signaler.signalTextDelta("hi"); + + expect(typing.refreshTypingTtl).toHaveBeenCalled(); + expect(typing.startTypingOnText).not.toHaveBeenCalled(); + }); + + it("suppresses typing when disabled", async () => { + const typing = createMockTypingController(); + const signaler = createTypingSignaler({ + typing, + mode: "instant", + isHeartbeat: true, + }); + + await signaler.signalRunStart(); + await signaler.signalTextDelta("hi"); + await signaler.signalReasoningDelta(); + + expect(typing.startTypingLoop).not.toHaveBeenCalled(); + expect(typing.startTypingOnText).not.toHaveBeenCalled(); + }); +}); diff --git a/src/auto-reply/reply/typing-mode.ts b/src/auto-reply/reply/typing-mode.ts new file mode 100644 index 000000000..e5ac1671e --- /dev/null +++ b/src/auto-reply/reply/typing-mode.ts @@ -0,0 +1,77 @@ +import type { TypingMode } from "../../config/types.js"; +import type { TypingController } from "./typing.js"; + +export type TypingModeContext = { + configured?: TypingMode; + isGroupChat: boolean; + wasMentioned: boolean; + isHeartbeat: boolean; +}; + +export const DEFAULT_GROUP_TYPING_MODE: TypingMode = "message"; + +export function resolveTypingMode({ + configured, + isGroupChat, + wasMentioned, + isHeartbeat, +}: TypingModeContext): TypingMode { + if (isHeartbeat) return "never"; + if (configured) return configured; + if (!isGroupChat || wasMentioned) return "instant"; + return DEFAULT_GROUP_TYPING_MODE; +} + +export type TypingSignaler = { + mode: TypingMode; + shouldStartImmediately: boolean; + shouldStartOnText: boolean; + shouldStartOnReasoning: boolean; + signalRunStart: () => Promise; + signalTextDelta: (text?: string) => Promise; + signalReasoningDelta: () => Promise; +}; + +export function createTypingSignaler(params: { + typing: TypingController; + mode: TypingMode; + isHeartbeat: boolean; +}): TypingSignaler { + const { typing, mode, isHeartbeat } = params; + const shouldStartImmediately = mode === "instant"; + const shouldStartOnText = mode === "message" || mode === "instant"; + const shouldStartOnReasoning = mode === "thinking"; + const disabled = isHeartbeat || mode === "never"; + + const signalRunStart = async () => { + if (disabled || !shouldStartImmediately) return; + await typing.startTypingLoop(); + }; + + const signalTextDelta = async (text?: string) => { + if (disabled) return; + if (shouldStartOnText) { + await typing.startTypingOnText(text); + return; + } + if (shouldStartOnReasoning) { + typing.refreshTypingTtl(); + } + }; + + const signalReasoningDelta = async () => { + if (disabled || !shouldStartOnReasoning) return; + await typing.startTypingLoop(); + typing.refreshTypingTtl(); + }; + + return { + mode, + shouldStartImmediately, + shouldStartOnText, + shouldStartOnReasoning, + signalRunStart, + signalTextDelta, + signalReasoningDelta, + }; +} diff --git a/src/auto-reply/reply/typing.test.ts b/src/auto-reply/reply/typing.test.ts index 18c3fd322..da7033162 100644 --- a/src/auto-reply/reply/typing.test.ts +++ b/src/auto-reply/reply/typing.test.ts @@ -52,6 +52,21 @@ describe("typing controller", () => { expect(onReplyStart).toHaveBeenCalledTimes(3); }); + it("does not start typing after run completion", async () => { + vi.useFakeTimers(); + const onReplyStart = vi.fn(async () => {}); + const typing = createTypingController({ + onReplyStart, + typingIntervalSeconds: 1, + typingTtlMs: 30_000, + }); + + typing.markRunComplete(); + await typing.startTypingOnText("late text"); + vi.advanceTimersByTime(2_000); + expect(onReplyStart).not.toHaveBeenCalled(); + }); + it("does not restart typing after it has stopped", async () => { vi.useFakeTimers(); const onReplyStart = vi.fn(async () => {}); diff --git a/src/auto-reply/reply/typing.ts b/src/auto-reply/reply/typing.ts index 7850ec132..09cc4e51b 100644 --- a/src/auto-reply/reply/typing.ts +++ b/src/auto-reply/reply/typing.ts @@ -101,6 +101,7 @@ export function createTypingController(params: { const startTypingLoop = async () => { if (sealed) return; + if (runComplete) return; if (!onReplyStart) return; if (typingIntervalMs <= 0) return; if (typingTimer) return; diff --git a/src/auto-reply/status.test.ts b/src/auto-reply/status.test.ts index 97bafe16e..549334c7f 100644 --- a/src/auto-reply/status.test.ts +++ b/src/auto-reply/status.test.ts @@ -45,6 +45,29 @@ describe("buildStatusMessage", () => { expect(text).toContain("Queue: collect"); }); + it("prefers model overrides over last-run model", () => { + const text = buildStatusMessage({ + agent: { + model: "anthropic/claude-opus-4-5", + contextTokens: 32_000, + }, + sessionEntry: { + sessionId: "override-1", + updatedAt: 0, + providerOverride: "openai", + modelOverride: "gpt-4.1-mini", + modelProvider: "anthropic", + model: "claude-haiku-4-5", + contextTokens: 32_000, + }, + sessionKey: "agent:main:main", + sessionScope: "per-sender", + queue: { mode: "collect", depth: 0 }, + }); + + expect(text).toContain("🧠 Model: openai/gpt-4.1-mini"); + }); + it("handles missing agent config gracefully", () => { const text = buildStatusMessage({ agent: {}, diff --git a/src/auto-reply/status.ts b/src/auto-reply/status.ts index da125ece8..db0849455 100644 --- a/src/auto-reply/status.ts +++ b/src/auto-reply/status.ts @@ -16,7 +16,7 @@ import { import type { ClawdbotConfig } from "../config/config.js"; import { resolveMainSessionKey, - resolveSessionTranscriptPath, + resolveSessionFilePath, type SessionEntry, type SessionScope, } from "../config/sessions.js"; @@ -185,6 +185,7 @@ const formatQueueDetails = (queue?: QueueStatus) => { const readUsageFromSessionLog = ( sessionId?: string, + sessionEntry?: SessionEntry, ): | { input: number; @@ -194,9 +195,9 @@ const readUsageFromSessionLog = ( model?: string; } | undefined => { - // Transcripts always live at: ~/.clawdbot/sessions/.jsonl + // Transcripts are stored at the session file path (fallback: ~/.clawdbot/sessions/.jsonl) if (!sessionId) return undefined; - const logPath = resolveSessionTranscriptPath(sessionId); + const logPath = resolveSessionFilePath(sessionId, sessionEntry); if (!fs.existsSync(logPath)) return undefined; try { @@ -249,8 +250,8 @@ export function buildStatusMessage(args: StatusArgs): string { defaultModel: DEFAULT_MODEL, }); const provider = - entry?.modelProvider ?? resolved.provider ?? DEFAULT_PROVIDER; - let model = entry?.model ?? resolved.model ?? DEFAULT_MODEL; + entry?.providerOverride ?? resolved.provider ?? DEFAULT_PROVIDER; + let model = entry?.modelOverride ?? resolved.model ?? DEFAULT_MODEL; let contextTokens = entry?.contextTokens ?? args.agent?.contextTokens ?? @@ -264,7 +265,7 @@ export function buildStatusMessage(args: StatusArgs): string { // Prefer prompt-size tokens from the session transcript when it looks larger // (cached prompt tokens are often missing from agent meta/store). if (args.includeTranscriptUsage) { - const logUsage = readUsageFromSessionLog(entry?.sessionId); + const logUsage = readUsageFromSessionLog(entry?.sessionId, entry); if (logUsage) { const candidate = logUsage.promptTokens || logUsage.total; if (!totalTokens || totalTokens === 0 || candidate > totalTokens) { diff --git a/src/auto-reply/templating.ts b/src/auto-reply/templating.ts index a63243237..398290c2f 100644 --- a/src/auto-reply/templating.ts +++ b/src/auto-reply/templating.ts @@ -15,10 +15,13 @@ export type MsgContext = { SessionKey?: string; /** Provider account id (multi-account). */ AccountId?: string; + ParentSessionKey?: string; MessageSid?: string; ReplyToId?: string; ReplyToBody?: string; ReplyToSender?: string; + ThreadStarterBody?: string; + ThreadLabel?: string; MediaPath?: string; MediaUrl?: string; MediaType?: string; diff --git a/src/cli/daemon-cli.coverage.test.ts b/src/cli/daemon-cli.coverage.test.ts new file mode 100644 index 000000000..1c8fedb5d --- /dev/null +++ b/src/cli/daemon-cli.coverage.test.ts @@ -0,0 +1,134 @@ +import { Command } from "commander"; +import { describe, expect, it, vi } from "vitest"; + +const callGateway = vi.fn(async () => ({ ok: true })); +const resolveGatewayProgramArguments = vi.fn(async () => ({ + programArguments: ["/bin/node", "cli", "gateway-daemon", "--port", "18789"], +})); +const serviceInstall = vi.fn().mockResolvedValue(undefined); +const serviceUninstall = vi.fn().mockResolvedValue(undefined); +const serviceStop = vi.fn().mockResolvedValue(undefined); +const serviceRestart = vi.fn().mockResolvedValue(undefined); +const serviceIsLoaded = vi.fn().mockResolvedValue(false); +const serviceReadCommand = vi.fn().mockResolvedValue(null); +const findExtraGatewayServices = vi.fn(async () => []); + +const runtimeLogs: string[] = []; +const runtimeErrors: string[] = []; +const defaultRuntime = { + log: (msg: string) => runtimeLogs.push(msg), + error: (msg: string) => runtimeErrors.push(msg), + exit: (code: number) => { + throw new Error(`__exit__:${code}`); + }, +}; + +vi.mock("../gateway/call.js", () => ({ + callGateway: (opts: unknown) => callGateway(opts), +})); + +vi.mock("../daemon/program-args.js", () => ({ + resolveGatewayProgramArguments: (opts: unknown) => + resolveGatewayProgramArguments(opts), +})); + +vi.mock("../daemon/service.js", () => ({ + resolveGatewayService: () => ({ + label: "LaunchAgent", + loadedText: "loaded", + notLoadedText: "not loaded", + install: serviceInstall, + uninstall: serviceUninstall, + stop: serviceStop, + restart: serviceRestart, + isLoaded: serviceIsLoaded, + readCommand: serviceReadCommand, + }), +})); + +vi.mock("../daemon/legacy.js", () => ({ + findLegacyGatewayServices: () => [], +})); + +vi.mock("../daemon/inspect.js", () => ({ + findExtraGatewayServices: (env: unknown, opts?: unknown) => + findExtraGatewayServices(env, opts), +})); + +vi.mock("../runtime.js", () => ({ + defaultRuntime, +})); + +vi.mock("./deps.js", () => ({ + createDefaultDeps: () => {}, +})); + +describe("daemon-cli coverage", () => { + it("probes gateway status by default", async () => { + runtimeLogs.length = 0; + runtimeErrors.length = 0; + callGateway.mockClear(); + + const { registerDaemonCli } = await import("./daemon-cli.js"); + const program = new Command(); + program.exitOverride(); + registerDaemonCli(program); + + await program.parseAsync(["daemon", "status"], { from: "user" }); + + expect(callGateway).toHaveBeenCalledTimes(1); + expect(callGateway).toHaveBeenCalledWith( + expect.objectContaining({ method: "status" }), + ); + expect(findExtraGatewayServices).toHaveBeenCalled(); + }); + + it("passes deep scan flag for daemon status", async () => { + findExtraGatewayServices.mockClear(); + + const { registerDaemonCli } = await import("./daemon-cli.js"); + const program = new Command(); + program.exitOverride(); + registerDaemonCli(program); + + await program.parseAsync(["daemon", "status", "--deep"], { from: "user" }); + + expect(findExtraGatewayServices).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ deep: true }), + ); + }); + + it("installs the daemon when requested", async () => { + serviceIsLoaded.mockResolvedValueOnce(false); + serviceInstall.mockClear(); + + const { registerDaemonCli } = await import("./daemon-cli.js"); + const program = new Command(); + program.exitOverride(); + registerDaemonCli(program); + + await program.parseAsync(["daemon", "install", "--port", "18789"], { + from: "user", + }); + + expect(serviceInstall).toHaveBeenCalledTimes(1); + }); + + it("starts and stops the daemon via service helpers", async () => { + serviceRestart.mockClear(); + serviceStop.mockClear(); + serviceIsLoaded.mockResolvedValue(true); + + const { registerDaemonCli } = await import("./daemon-cli.js"); + const program = new Command(); + program.exitOverride(); + registerDaemonCli(program); + + await program.parseAsync(["daemon", "start"], { from: "user" }); + await program.parseAsync(["daemon", "stop"], { from: "user" }); + + expect(serviceRestart).toHaveBeenCalledTimes(1); + expect(serviceStop).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/cli/daemon-cli.ts b/src/cli/daemon-cli.ts new file mode 100644 index 000000000..0d6403482 --- /dev/null +++ b/src/cli/daemon-cli.ts @@ -0,0 +1,448 @@ +import path from "node:path"; +import type { Command } from "commander"; + +import { + DEFAULT_GATEWAY_DAEMON_RUNTIME, + isGatewayDaemonRuntime, +} from "../commands/daemon-runtime.js"; +import { loadConfig, resolveGatewayPort } from "../config/config.js"; +import { resolveIsNixMode } from "../config/paths.js"; +import { + GATEWAY_LAUNCH_AGENT_LABEL, + GATEWAY_SYSTEMD_SERVICE_NAME, + GATEWAY_WINDOWS_TASK_NAME, +} from "../daemon/constants.js"; +import { + type FindExtraGatewayServicesOptions, + findExtraGatewayServices, + renderGatewayServiceCleanupHints, +} from "../daemon/inspect.js"; +import { findLegacyGatewayServices } from "../daemon/legacy.js"; +import { resolveGatewayProgramArguments } from "../daemon/program-args.js"; +import { resolveGatewayService } from "../daemon/service.js"; +import { callGateway } from "../gateway/call.js"; +import { defaultRuntime } from "../runtime.js"; +import { createDefaultDeps } from "./deps.js"; + +type DaemonStatus = { + service: { + label: string; + loaded: boolean; + loadedText: string; + notLoadedText: string; + command?: { + programArguments: string[]; + workingDirectory?: string; + } | null; + }; + rpc?: { + ok: boolean; + error?: string; + }; + legacyServices: Array<{ label: string; detail: string }>; + extraServices: Array<{ label: string; detail: string; scope: string }>; +}; + +export type GatewayRpcOpts = { + url?: string; + token?: string; + password?: string; + timeout?: string; +}; + +export type DaemonStatusOptions = { + rpc: GatewayRpcOpts; + probe: boolean; + json: boolean; +} & FindExtraGatewayServicesOptions; + +export type DaemonInstallOptions = { + port?: string | number; + runtime?: string; + token?: string; +}; + +function parsePort(raw: unknown): number | null { + if (raw === undefined || raw === null) return null; + const value = + typeof raw === "string" + ? raw + : typeof raw === "number" || typeof raw === "bigint" + ? raw.toString() + : null; + if (value === null) return null; + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) return null; + return parsed; +} + +async function probeGatewayStatus(opts: GatewayRpcOpts) { + try { + await callGateway({ + url: opts.url, + token: opts.token, + password: opts.password, + method: "status", + timeoutMs: Number(opts.timeout ?? 10_000), + clientName: "cli", + mode: "cli", + }); + return { ok: true } as const; + } catch (err) { + return { + ok: false, + error: err instanceof Error ? err.message : String(err), + } as const; + } +} + +function renderGatewayServiceStartHints(): string[] { + switch (process.platform) { + case "darwin": + return [ + `launchctl bootstrap gui/$UID ~/Library/LaunchAgents/${GATEWAY_LAUNCH_AGENT_LABEL}.plist`, + ]; + case "linux": + return [`systemctl --user start ${GATEWAY_SYSTEMD_SERVICE_NAME}.service`]; + case "win32": + return [`schtasks /Run /TN "${GATEWAY_WINDOWS_TASK_NAME}"`]; + default: + return []; + } +} + +async function gatherDaemonStatus(opts: { + rpc: GatewayRpcOpts; + probe: boolean; + deep?: boolean; +}): Promise { + const service = resolveGatewayService(); + const [loaded, command] = await Promise.all([ + service.isLoaded({ env: process.env }).catch(() => false), + service.readCommand(process.env).catch(() => null), + ]); + const legacyServices = await findLegacyGatewayServices(process.env); + const extraServices = await findExtraGatewayServices(process.env, { + deep: opts.deep, + }); + const rpc = opts.probe ? await probeGatewayStatus(opts.rpc) : undefined; + + return { + service: { + label: service.label, + loaded, + loadedText: service.loadedText, + notLoadedText: service.notLoadedText, + command, + }, + rpc, + legacyServices, + extraServices, + }; +} + +function printDaemonStatus(status: DaemonStatus, opts: { json: boolean }) { + if (opts.json) { + defaultRuntime.log(JSON.stringify(status, null, 2)); + return; + } + + const { service, rpc, legacyServices, extraServices } = status; + defaultRuntime.log( + `Service: ${service.label} (${service.loaded ? service.loadedText : service.notLoadedText})`, + ); + if (service.command?.programArguments?.length) { + defaultRuntime.log( + `Command: ${service.command.programArguments.join(" ")}`, + ); + } + if (service.command?.workingDirectory) { + defaultRuntime.log(`Working dir: ${service.command.workingDirectory}`); + } + if (rpc) { + if (rpc.ok) { + defaultRuntime.log("RPC probe: ok"); + } else { + defaultRuntime.error(`RPC probe: failed (${rpc.error})`); + } + } + + if (legacyServices.length > 0) { + defaultRuntime.error("Legacy Clawdis services detected:"); + for (const svc of legacyServices) { + defaultRuntime.error(`- ${svc.label} (${svc.detail})`); + } + defaultRuntime.error("Cleanup: clawdbot doctor"); + } + + if (extraServices.length > 0) { + defaultRuntime.error("Other gateway-like services detected (best effort):"); + for (const svc of extraServices) { + defaultRuntime.error(`- ${svc.label} (${svc.scope}, ${svc.detail})`); + } + for (const hint of renderGatewayServiceCleanupHints()) { + defaultRuntime.error(`Cleanup hint: ${hint}`); + } + } + + if (legacyServices.length > 0 || extraServices.length > 0) { + defaultRuntime.error( + "Recommendation: run a single gateway per machine. One gateway supports multiple agents.", + ); + defaultRuntime.error( + "If you need multiple gateways, isolate ports + config/state (see docs: /gateway#multiple-gateways-same-host).", + ); + } +} + +export async function runDaemonStatus(opts: DaemonStatusOptions) { + try { + const status = await gatherDaemonStatus({ + rpc: opts.rpc, + probe: Boolean(opts.probe), + deep: Boolean(opts.deep), + }); + printDaemonStatus(status, { json: Boolean(opts.json) }); + } catch (err) { + defaultRuntime.error(`Daemon status failed: ${String(err)}`); + defaultRuntime.exit(1); + } +} + +export async function runDaemonInstall(opts: DaemonInstallOptions) { + if (resolveIsNixMode(process.env)) { + defaultRuntime.error("Nix mode detected; daemon install is disabled."); + defaultRuntime.exit(1); + return; + } + + const cfg = loadConfig(); + const portOverride = parsePort(opts.port); + if (opts.port !== undefined && portOverride === null) { + defaultRuntime.error("Invalid port"); + defaultRuntime.exit(1); + return; + } + const port = portOverride ?? resolveGatewayPort(cfg); + if (!Number.isFinite(port) || port <= 0) { + defaultRuntime.error("Invalid port"); + defaultRuntime.exit(1); + return; + } + const runtimeRaw = opts.runtime + ? String(opts.runtime) + : DEFAULT_GATEWAY_DAEMON_RUNTIME; + if (!isGatewayDaemonRuntime(runtimeRaw)) { + defaultRuntime.error('Invalid --runtime (use "node" or "bun")'); + defaultRuntime.exit(1); + return; + } + + const service = resolveGatewayService(); + let loaded = false; + try { + loaded = await service.isLoaded({ env: process.env }); + } catch (err) { + defaultRuntime.error(`Gateway service check failed: ${String(err)}`); + defaultRuntime.exit(1); + return; + } + if (loaded) { + defaultRuntime.log(`Gateway service already ${service.loadedText}.`); + return; + } + + const devMode = + process.argv[1]?.includes(`${path.sep}src${path.sep}`) && + process.argv[1]?.endsWith(".ts"); + const { programArguments, workingDirectory } = + await resolveGatewayProgramArguments({ + port, + dev: devMode, + runtime: runtimeRaw, + }); + const environment: Record = { + PATH: process.env.PATH, + CLAWDBOT_GATEWAY_TOKEN: + opts.token || + cfg.gateway?.auth?.token || + process.env.CLAWDBOT_GATEWAY_TOKEN, + CLAWDBOT_LAUNCHD_LABEL: + process.platform === "darwin" ? GATEWAY_LAUNCH_AGENT_LABEL : undefined, + }; + + try { + await service.install({ + env: process.env, + stdout: process.stdout, + programArguments, + workingDirectory, + environment, + }); + } catch (err) { + defaultRuntime.error(`Gateway install failed: ${String(err)}`); + defaultRuntime.exit(1); + } +} + +export async function runDaemonUninstall() { + if (resolveIsNixMode(process.env)) { + defaultRuntime.error("Nix mode detected; daemon uninstall is disabled."); + defaultRuntime.exit(1); + return; + } + + const service = resolveGatewayService(); + try { + await service.uninstall({ env: process.env, stdout: process.stdout }); + } catch (err) { + defaultRuntime.error(`Gateway uninstall failed: ${String(err)}`); + defaultRuntime.exit(1); + } +} + +export async function runDaemonStart() { + const service = resolveGatewayService(); + let loaded = false; + try { + loaded = await service.isLoaded({ env: process.env }); + } catch (err) { + defaultRuntime.error(`Gateway service check failed: ${String(err)}`); + defaultRuntime.exit(1); + return; + } + if (!loaded) { + defaultRuntime.log(`Gateway service ${service.notLoadedText}.`); + for (const hint of renderGatewayServiceStartHints()) { + defaultRuntime.log(`Start with: ${hint}`); + } + return; + } + try { + await service.restart({ stdout: process.stdout }); + } catch (err) { + defaultRuntime.error(`Gateway start failed: ${String(err)}`); + for (const hint of renderGatewayServiceStartHints()) { + defaultRuntime.error(`Start with: ${hint}`); + } + defaultRuntime.exit(1); + } +} + +export async function runDaemonStop() { + const service = resolveGatewayService(); + let loaded = false; + try { + loaded = await service.isLoaded({ env: process.env }); + } catch (err) { + defaultRuntime.error(`Gateway service check failed: ${String(err)}`); + defaultRuntime.exit(1); + return; + } + if (!loaded) { + defaultRuntime.log(`Gateway service ${service.notLoadedText}.`); + return; + } + try { + await service.stop({ stdout: process.stdout }); + } catch (err) { + defaultRuntime.error(`Gateway stop failed: ${String(err)}`); + defaultRuntime.exit(1); + } +} + +export async function runDaemonRestart() { + const service = resolveGatewayService(); + let loaded = false; + try { + loaded = await service.isLoaded({ env: process.env }); + } catch (err) { + defaultRuntime.error(`Gateway service check failed: ${String(err)}`); + defaultRuntime.exit(1); + return; + } + if (!loaded) { + defaultRuntime.log(`Gateway service ${service.notLoadedText}.`); + for (const hint of renderGatewayServiceStartHints()) { + defaultRuntime.log(`Start with: ${hint}`); + } + return; + } + try { + await service.restart({ stdout: process.stdout }); + } catch (err) { + defaultRuntime.error(`Gateway restart failed: ${String(err)}`); + defaultRuntime.exit(1); + } +} + +export function registerDaemonCli(program: Command) { + const daemon = program + .command("daemon") + .description( + "Manage the Gateway daemon service (launchd/systemd/schtasks)", + ); + + daemon + .command("status") + .description("Show daemon install status + probe the Gateway") + .option( + "--url ", + "Gateway WebSocket URL (defaults to config/remote/local)", + ) + .option("--token ", "Gateway token (if required)") + .option("--password ", "Gateway password (password auth)") + .option("--timeout ", "Timeout in ms", "10000") + .option("--no-probe", "Skip RPC probe") + .option("--deep", "Scan system-level services", false) + .option("--json", "Output JSON", false) + .action(async (opts) => { + await runDaemonStatus({ + rpc: opts, + probe: Boolean(opts.probe), + deep: Boolean(opts.deep), + json: Boolean(opts.json), + }); + }); + + daemon + .command("install") + .description("Install the Gateway service (launchd/systemd/schtasks)") + .option("--port ", "Gateway port") + .option("--runtime ", "Daemon runtime (node|bun). Default: node") + .option("--token ", "Gateway token (token auth)") + .action(async (opts) => { + await runDaemonInstall(opts); + }); + + daemon + .command("uninstall") + .description("Uninstall the Gateway service (launchd/systemd/schtasks)") + .action(async () => { + await runDaemonUninstall(); + }); + + daemon + .command("start") + .description("Start the Gateway service (launchd/systemd/schtasks)") + .action(async () => { + await runDaemonStart(); + }); + + daemon + .command("stop") + .description("Stop the Gateway service (launchd/systemd/schtasks)") + .action(async () => { + await runDaemonStop(); + }); + + daemon + .command("restart") + .description("Restart the Gateway service (launchd/systemd/schtasks)") + .action(async () => { + await runDaemonRestart(); + }); + + // Build default deps (parity with other commands). + void createDefaultDeps(); +} diff --git a/src/cli/gateway-cli.coverage.test.ts b/src/cli/gateway-cli.coverage.test.ts index c4a134a6d..9bdef0027 100644 --- a/src/cli/gateway-cli.coverage.test.ts +++ b/src/cli/gateway-cli.coverage.test.ts @@ -13,7 +13,9 @@ const forceFreePortAndWait = vi.fn(async () => ({ waitedMs: 0, escalatedToSigkill: false, })); +const serviceInstall = vi.fn().mockResolvedValue(undefined); const serviceStop = vi.fn().mockResolvedValue(undefined); +const serviceUninstall = vi.fn().mockResolvedValue(undefined); const serviceRestart = vi.fn().mockResolvedValue(undefined); const serviceIsLoaded = vi.fn().mockResolvedValue(true); @@ -82,8 +84,8 @@ vi.mock("../daemon/service.js", () => ({ label: "LaunchAgent", loadedText: "loaded", notLoadedText: "not loaded", - install: vi.fn(), - uninstall: vi.fn(), + install: serviceInstall, + uninstall: serviceUninstall, stop: serviceStop, restart: serviceRestart, isLoaded: serviceIsLoaded, @@ -91,6 +93,12 @@ vi.mock("../daemon/service.js", () => ({ }), })); +vi.mock("../daemon/program-args.js", () => ({ + resolveGatewayProgramArguments: async () => ({ + programArguments: ["/bin/node", "cli", "gateway-daemon", "--port", "18789"], + }), +})); + describe("gateway-cli coverage", () => { it("registers call/health/status/send/agent commands and routes to callGateway", async () => { runtimeLogs.length = 0; @@ -264,6 +272,30 @@ describe("gateway-cli coverage", () => { expect(serviceRestart).toHaveBeenCalledTimes(1); }); + it("supports gateway install/uninstall/start via daemon helpers", async () => { + runtimeLogs.length = 0; + runtimeErrors.length = 0; + serviceInstall.mockClear(); + serviceUninstall.mockClear(); + serviceRestart.mockClear(); + serviceIsLoaded.mockResolvedValueOnce(false); + + const { registerGatewayCli } = await import("./gateway-cli.js"); + const program = new Command(); + program.exitOverride(); + registerGatewayCli(program); + + await program.parseAsync(["gateway", "install", "--port", "18789"], { + from: "user", + }); + await program.parseAsync(["gateway", "uninstall"], { from: "user" }); + await program.parseAsync(["gateway", "start"], { from: "user" }); + + expect(serviceInstall).toHaveBeenCalledTimes(1); + expect(serviceUninstall).toHaveBeenCalledTimes(1); + expect(serviceRestart).toHaveBeenCalledTimes(1); + }); + it("prints stop hints on GatewayLockError when service is loaded", async () => { runtimeLogs.length = 0; runtimeErrors.length = 0; diff --git a/src/cli/gateway-cli.ts b/src/cli/gateway-cli.ts index 6ac33db34..72e5badd2 100644 --- a/src/cli/gateway-cli.ts +++ b/src/cli/gateway-cli.ts @@ -22,6 +22,14 @@ import { setVerbose } from "../globals.js"; import { GatewayLockError } from "../infra/gateway-lock.js"; import { createSubsystemLogger } from "../logging.js"; import { defaultRuntime } from "../runtime.js"; +import { + runDaemonInstall, + runDaemonRestart, + runDaemonStart, + runDaemonStatus, + runDaemonStop, + runDaemonUninstall, +} from "./daemon-cli.js"; import { createDefaultDeps } from "./deps.js"; import { forceFreePortAndWait } from "./ports.js"; @@ -91,21 +99,6 @@ function renderGatewayServiceStopHints(): string[] { } } -function renderGatewayServiceStartHints(): string[] { - switch (process.platform) { - case "darwin": - return [ - `launchctl bootstrap gui/$UID ~/Library/LaunchAgents/${GATEWAY_LAUNCH_AGENT_LABEL}.plist`, - ]; - case "linux": - return [`systemctl --user start ${GATEWAY_SYSTEMD_SERVICE_NAME}.service`]; - case "win32": - return [`schtasks /Run /TN "${GATEWAY_WINDOWS_TASK_NAME}"`]; - default: - return []; - } -} - async function maybeExplainGatewayServiceStop() { const service = resolveGatewayService(); let loaded: boolean | null = null; @@ -594,6 +587,62 @@ export function registerGatewayCli(program: Command) { } }); + gateway + .command("install") + .description( + "Install the Gateway service (alias for `clawdbot daemon install`)", + ) + .option("--port ", "Gateway port") + .option("--runtime ", "Daemon runtime (node|bun). Default: node") + .option("--token ", "Gateway token (token auth)") + .action(async (opts) => { + await runDaemonInstall(opts); + }); + + gateway + .command("uninstall") + .description( + "Uninstall the Gateway service (alias for `clawdbot daemon uninstall`)", + ) + .action(async () => { + await runDaemonUninstall(); + }); + + gateway + .command("start") + .description( + "Start the Gateway service (alias for `clawdbot daemon start`)", + ) + .action(async () => { + await runDaemonStart(); + }); + + const gatewayDaemon = gateway + .command("daemon") + .description("Daemon helpers (alias for `clawdbot daemon`)"); + + gatewayDaemon + .command("status") + .description("Show daemon install status + probe the Gateway") + .option( + "--url ", + "Gateway WebSocket URL (defaults to config/remote/local)", + ) + .option("--token ", "Gateway token (if required)") + .option("--password ", "Gateway password (password auth)") + .option("--timeout ", "Timeout in ms", "10000") + .option("--no-probe", "Skip RPC probe") + .option("--deep", "Scan system-level services", false) + .option("--json", "Output JSON", false) + .action(async (opts) => { + await runDaemonStatus({ + rpc: opts, + probe: Boolean(opts.probe), + deep: Boolean(opts.deep), + json: Boolean(opts.json), + }); + }); + gatewayCallOpts( gateway .command("call") @@ -737,53 +786,14 @@ export function registerGatewayCli(program: Command) { .command("stop") .description("Stop the Gateway service (launchd/systemd/schtasks)") .action(async () => { - const service = resolveGatewayService(); - let loaded = false; - try { - loaded = await service.isLoaded({ env: process.env }); - } catch (err) { - defaultRuntime.error(`Gateway service check failed: ${String(err)}`); - defaultRuntime.exit(1); - return; - } - if (!loaded) { - defaultRuntime.log(`Gateway service ${service.notLoadedText}.`); - return; - } - try { - await service.stop({ stdout: process.stdout }); - } catch (err) { - defaultRuntime.error(`Gateway stop failed: ${String(err)}`); - defaultRuntime.exit(1); - } + await runDaemonStop(); }); gateway .command("restart") .description("Restart the Gateway service (launchd/systemd/schtasks)") .action(async () => { - const service = resolveGatewayService(); - let loaded = false; - try { - loaded = await service.isLoaded({ env: process.env }); - } catch (err) { - defaultRuntime.error(`Gateway service check failed: ${String(err)}`); - defaultRuntime.exit(1); - return; - } - if (!loaded) { - defaultRuntime.log(`Gateway service ${service.notLoadedText}.`); - for (const hint of renderGatewayServiceStartHints()) { - defaultRuntime.log(`Start with: ${hint}`); - } - return; - } - try { - await service.restart({ stdout: process.stdout }); - } catch (err) { - defaultRuntime.error(`Gateway restart failed: ${String(err)}`); - defaultRuntime.exit(1); - } + await runDaemonRestart(); }); // Build default deps (keeps parity with other commands; future-proofing). diff --git a/src/cli/program.ts b/src/cli/program.ts index 3ff9dbc73..bf315f06f 100644 --- a/src/cli/program.ts +++ b/src/cli/program.ts @@ -32,6 +32,7 @@ import { resolveWhatsAppAccount } from "../web/accounts.js"; import { registerBrowserCli } from "./browser-cli.js"; import { registerCanvasCli } from "./canvas-cli.js"; import { registerCronCli } from "./cron-cli.js"; +import { registerDaemonCli } from "./daemon-cli.js"; import { createDefaultDeps } from "./deps.js"; import { registerDnsCli } from "./dns-cli.js"; import { registerDocsCli } from "./docs-cli.js"; @@ -323,12 +324,14 @@ export function buildProgram() { "Run without prompts (safe migrations only)", false, ) + .option("--deep", "Scan system services for extra gateway installs", false) .action(async (opts) => { try { await doctorCommand(defaultRuntime, { workspaceSuggestions: opts.workspaceSuggestions, yes: Boolean(opts.yes), nonInteractive: Boolean(opts.nonInteractive), + deep: Boolean(opts.deep), }); } catch (err) { defaultRuntime.error(String(err)); @@ -624,6 +627,7 @@ Examples: }); registerCanvasCli(program); + registerDaemonCli(program); registerGatewayCli(program); registerModelsCli(program); registerNodesCli(program); diff --git a/src/cli/run-main.ts b/src/cli/run-main.ts index cd2fc8247..b9a4fa533 100644 --- a/src/cli/run-main.ts +++ b/src/cli/run-main.ts @@ -6,6 +6,7 @@ import { normalizeEnv } from "../infra/env.js"; import { isMainModule } from "../infra/is-main.js"; import { ensureClawdbotCliOnPath } from "../infra/path-env.js"; import { assertSupportedRuntime } from "../infra/runtime-guard.js"; +import { installUnhandledRejectionHandler } from "../infra/unhandled-rejections.js"; import { enableConsoleCapture } from "../logging.js"; export async function runCli(argv: string[] = process.argv) { @@ -24,13 +25,7 @@ export async function runCli(argv: string[] = process.argv) { // Global error handlers to prevent silent crashes from unhandled rejections/exceptions. // These log the error and exit gracefully instead of crashing without trace. - process.on("unhandledRejection", (reason, _promise) => { - console.error( - "[clawdbot] Unhandled promise rejection:", - reason instanceof Error ? (reason.stack ?? reason.message) : reason, - ); - process.exit(1); - }); + installUnhandledRejectionHandler(); process.on("uncaughtException", (error) => { console.error( diff --git a/src/commands/agent.test.ts b/src/commands/agent.test.ts index b25a95304..8383190f5 100644 --- a/src/commands/agent.test.ts +++ b/src/commands/agent.test.ts @@ -168,6 +168,45 @@ describe("agentCommand", () => { }); }); + it("keeps explicit sessionKey even when sessionId exists elsewhere", async () => { + await withTempHome(async (home) => { + const store = path.join(home, "sessions.json"); + fs.mkdirSync(path.dirname(store), { recursive: true }); + fs.writeFileSync( + store, + JSON.stringify( + { + "agent:main:main": { + sessionId: "sess-main", + updatedAt: Date.now(), + }, + }, + null, + 2, + ), + ); + mockConfig(home, store); + + await agentCommand( + { + message: "hi", + sessionId: "sess-main", + sessionKey: "agent:main:subagent:abc", + }, + runtime, + ); + + const callArgs = vi.mocked(runEmbeddedPiAgent).mock.calls.at(-1)?.[0]; + expect(callArgs?.sessionKey).toBe("agent:main:subagent:abc"); + + const saved = JSON.parse(fs.readFileSync(store, "utf-8")) as Record< + string, + { sessionId?: string } + >; + expect(saved["agent:main:subagent:abc"]?.sessionId).toBe("sess-main"); + }); + }); + it("defaults thinking to low for reasoning-capable models", async () => { await withTempHome(async (home) => { const store = path.join(home, "sessions.json"); diff --git a/src/commands/agent.ts b/src/commands/agent.ts index 86af31262..6386befe0 100644 --- a/src/commands/agent.ts +++ b/src/commands/agent.ts @@ -34,8 +34,9 @@ import { type ClawdbotConfig, loadConfig } from "../config/config.js"; import { DEFAULT_IDLE_MINUTES, loadSessionStore, + resolveAgentIdFromSessionKey, + resolveSessionFilePath, resolveSessionKey, - resolveSessionTranscriptPath, resolveStorePath, type SessionEntry, saveSessionStore, @@ -61,6 +62,7 @@ type AgentCommandOpts = { message: string; to?: string; sessionId?: string; + sessionKey?: string; thinking?: string; thinkingOnce?: string; verbose?: string; @@ -92,6 +94,7 @@ function resolveSession(opts: { cfg: ClawdbotConfig; to?: string; sessionId?: string; + sessionKey?: string; }): SessionResolution { const sessionCfg = opts.cfg.session; const scope = sessionCfg?.scope ?? "per-sender"; @@ -101,20 +104,25 @@ function resolveSession(opts: { 1, ); const idleMs = idleMinutes * 60_000; - const storePath = resolveStorePath(sessionCfg?.store); + const explicitSessionKey = opts.sessionKey?.trim(); + const storeAgentId = resolveAgentIdFromSessionKey(explicitSessionKey); + const storePath = resolveStorePath(sessionCfg?.store, { + agentId: storeAgentId, + }); const sessionStore = loadSessionStore(storePath); const now = Date.now(); const ctx: MsgContext | undefined = opts.to?.trim() ? { From: opts.to } : undefined; - let sessionKey: string | undefined = ctx - ? resolveSessionKey(scope, ctx, mainKey) - : undefined; + let sessionKey: string | undefined = + explicitSessionKey ?? + (ctx ? resolveSessionKey(scope, ctx, mainKey) : undefined); let sessionEntry = sessionKey ? sessionStore[sessionKey] : undefined; // If a session id was provided, prefer to re-use its entry (by id) even when no key was derived. if ( + !explicitSessionKey && opts.sessionId && (!sessionEntry || sessionEntry.sessionId !== opts.sessionId) ) { @@ -162,7 +170,7 @@ export async function agentCommand( ) { const body = (opts.message ?? "").trim(); if (!body) throw new Error("Message (--message) is required"); - if (!opts.to && !opts.sessionId) { + if (!opts.to && !opts.sessionId && !opts.sessionKey) { throw new Error("Pass --to or --session-id to choose a session"); } @@ -216,6 +224,7 @@ export async function agentCommand( cfg, to: opts.to, sessionId: opts.sessionId, + sessionKey: opts.sessionKey, }); const { @@ -377,7 +386,7 @@ export async function agentCommand( catalog: catalogForThinking, }); } - const sessionFile = resolveSessionTranscriptPath(sessionId); + const sessionFile = resolveSessionFilePath(sessionId, sessionEntry); const startedAt = Date.now(); let lifecycleEnded = false; diff --git a/src/commands/doctor.test.ts b/src/commands/doctor.test.ts index db25b6962..e48a01ce2 100644 --- a/src/commands/doctor.test.ts +++ b/src/commands/doctor.test.ts @@ -60,6 +60,8 @@ const createConfigIO = vi.fn(() => ({ const findLegacyGatewayServices = vi.fn().mockResolvedValue([]); const uninstallLegacyGatewayServices = vi.fn().mockResolvedValue([]); +const findExtraGatewayServices = vi.fn().mockResolvedValue([]); +const renderGatewayServiceCleanupHints = vi.fn().mockReturnValue(["cleanup"]); const resolveGatewayProgramArguments = vi.fn().mockResolvedValue({ programArguments: ["node", "cli", "gateway-daemon", "--port", "18789"], }); @@ -98,6 +100,11 @@ vi.mock("../daemon/legacy.js", () => ({ uninstallLegacyGatewayServices, })); +vi.mock("../daemon/inspect.js", () => ({ + findExtraGatewayServices, + renderGatewayServiceCleanupHints, +})); + vi.mock("../daemon/program-args.js", () => ({ resolveGatewayProgramArguments, })); diff --git a/src/commands/doctor.ts b/src/commands/doctor.ts index 4e958c4a7..b1027bfc3 100644 --- a/src/commands/doctor.ts +++ b/src/commands/doctor.ts @@ -24,6 +24,10 @@ import { } from "../config/config.js"; import { resolveGatewayPort, resolveIsNixMode } from "../config/paths.js"; import { GATEWAY_LAUNCH_AGENT_LABEL } from "../daemon/constants.js"; +import { + findExtraGatewayServices, + renderGatewayServiceCleanupHints, +} from "../daemon/inspect.js"; import { findLegacyGatewayServices, uninstallLegacyGatewayServices, @@ -351,6 +355,7 @@ type DoctorOptions = { workspaceSuggestions?: boolean; yes?: boolean; nonInteractive?: boolean; + deep?: boolean; }; type DoctorPrompter = { @@ -863,6 +868,34 @@ async function maybeMigrateLegacyGatewayService( }); } +async function maybeScanExtraGatewayServices(options: DoctorOptions) { + const extraServices = await findExtraGatewayServices(process.env, { + deep: options.deep, + }); + if (extraServices.length === 0) return; + + note( + extraServices + .map((svc) => `- ${svc.label} (${svc.scope}, ${svc.detail})`) + .join("\n"), + "Other gateway-like services detected", + ); + + const cleanupHints = renderGatewayServiceCleanupHints(); + if (cleanupHints.length > 0) { + note(cleanupHints.map((hint) => `- ${hint}`).join("\n"), "Cleanup hints"); + } + + note( + [ + "Recommendation: run a single gateway per machine.", + "One gateway supports multiple agents.", + "If you need multiple gateways, isolate ports + config/state (see docs: /gateway#multiple-gateways-same-host).", + ].join("\n"), + "Gateway recommendation", + ); +} + export async function doctorCommand( runtime: RuntimeEnv = defaultRuntime, options: DoctorOptions = {}, @@ -939,6 +972,7 @@ export async function doctorCommand( cfg = await maybeRepairSandboxImages(cfg, runtime, prompter); await maybeMigrateLegacyGatewayService(cfg, runtime, prompter); + await maybeScanExtraGatewayServices(options); await noteSecurityWarnings(cfg); diff --git a/src/commands/onboard-providers.ts b/src/commands/onboard-providers.ts index 77d1bf174..65679b18d 100644 --- a/src/commands/onboard-providers.ts +++ b/src/commands/onboard-providers.ts @@ -53,13 +53,12 @@ async function noteProviderPrimer(prompter: WizardPrompter): Promise { 'Public DMs require dmPolicy="open" + allowFrom=["*"].', "Docs: https://docs.clawd.bot/start/pairing", "", - "WhatsApp: links via WhatsApp Web (scan QR), stores creds for future sends.", - "WhatsApp: dedicated second number recommended; primary number OK (self-chat).", - "Telegram: Bot API (token from @BotFather), replies via your bot.", - "Discord: Bot token from Discord Developer Portal; invite bot to your server.", - "Slack: Socket Mode app token + bot token, DMs via App Home Messages tab.", - "Signal: signal-cli as a linked device; separate number recommended.", - "iMessage: local imsg CLI; separate Apple ID recommended only on a separate Mac.", + "Telegram: easiest start — register a bot with @BotFather, paste token, go.", + "WhatsApp: works with your own number; recommend a separate phone + eSIM.", + "Discord: very well supported right now.", + "Slack: supported (Socket Mode).", + "Signal: signal-cli linked device; more setup (if you want easy, hop on Discord).", + "iMessage: this is still a work in progress.", ].join("\n"), "How providers work", ); @@ -182,7 +181,10 @@ async function noteSlackTokenHelp( ); } -function setWhatsAppDmPolicy(cfg: ClawdbotConfig, dmPolicy?: DmPolicy) { +function setWhatsAppDmPolicy( + cfg: ClawdbotConfig, + dmPolicy?: DmPolicy, +): ClawdbotConfig { return { ...cfg, whatsapp: { @@ -192,7 +194,10 @@ function setWhatsAppDmPolicy(cfg: ClawdbotConfig, dmPolicy?: DmPolicy) { }; } -function setWhatsAppAllowFrom(cfg: ClawdbotConfig, allowFrom?: string[]) { +function setWhatsAppAllowFrom( + cfg: ClawdbotConfig, + allowFrom?: string[], +): ClawdbotConfig { return { ...cfg, whatsapp: { @@ -202,6 +207,32 @@ function setWhatsAppAllowFrom(cfg: ClawdbotConfig, allowFrom?: string[]) { }; } +function setMessagesResponsePrefix( + cfg: ClawdbotConfig, + responsePrefix?: string, +): ClawdbotConfig { + return { + ...cfg, + messages: { + ...cfg.messages, + responsePrefix, + }, + }; +} + +function setWhatsAppSelfChatMode( + cfg: ClawdbotConfig, + selfChatMode?: boolean, +): ClawdbotConfig { + return { + ...cfg, + whatsapp: { + ...cfg.whatsapp, + selfChatMode, + }, + }; +} + function setTelegramDmPolicy(cfg: ClawdbotConfig, dmPolicy: DmPolicy) { const allowFrom = dmPolicy === "open" @@ -390,6 +421,7 @@ async function promptWhatsAppAllowFrom( const existingAllowFrom = cfg.whatsapp?.allowFrom ?? []; const existingLabel = existingAllowFrom.length > 0 ? existingAllowFrom.join(", ") : "unset"; + const existingResponsePrefix = cfg.messages?.responsePrefix; await prompter.note( [ @@ -405,6 +437,56 @@ async function promptWhatsAppAllowFrom( "WhatsApp DM access", ); + const phoneMode = (await prompter.select({ + message: "WhatsApp phone setup", + options: [ + { value: "personal", label: "This is my personal phone number" }, + { value: "separate", label: "Separate phone just for Clawdbot" }, + ], + })) as "personal" | "separate"; + + if (phoneMode === "personal") { + const entry = await prompter.text({ + message: "Your WhatsApp number (E.164)", + placeholder: "+15555550123", + initialValue: existingAllowFrom[0], + validate: (value) => { + const raw = String(value ?? "").trim(); + if (!raw) return "Required"; + const normalized = normalizeE164(raw); + if (!normalized) return `Invalid number: ${raw}`; + return undefined; + }, + }); + const normalized = normalizeE164(String(entry).trim()); + const merged = [ + ...existingAllowFrom + .filter((item) => item !== "*") + .map((item) => normalizeE164(item)) + .filter(Boolean), + normalized, + ]; + const unique = [...new Set(merged.filter(Boolean))]; + let next = setWhatsAppSelfChatMode(cfg, true); + next = setWhatsAppDmPolicy(next, "allowlist"); + next = setWhatsAppAllowFrom(next, unique); + if (existingResponsePrefix === undefined) { + next = setMessagesResponsePrefix(next, "[clawdbot]"); + } + await prompter.note( + [ + "Personal phone mode enabled.", + "- dmPolicy set to allowlist (pairing skipped)", + `- allowFrom includes ${normalized}`, + existingResponsePrefix === undefined + ? "- responsePrefix set to [clawdbot]" + : "- responsePrefix left unchanged", + ].join("\n"), + "WhatsApp personal phone", + ); + return next; + } + const policy = (await prompter.select({ message: "WhatsApp DM policy", options: [ @@ -415,8 +497,11 @@ async function promptWhatsAppAllowFrom( ], })) as DmPolicy; - const next = setWhatsAppDmPolicy(cfg, policy); - if (policy === "open") return setWhatsAppAllowFrom(next, ["*"]); + let next = setWhatsAppSelfChatMode(cfg, false); + next = setWhatsAppDmPolicy(next, policy); + if (policy === "open") { + next = setWhatsAppAllowFrom(next, ["*"]); + } if (policy === "disabled") return next; const options = @@ -439,38 +524,43 @@ async function promptWhatsAppAllowFrom( options: options.map((opt) => ({ value: opt.value, label: opt.label })), })) as (typeof options)[number]["value"]; - if (mode === "keep") return next; - if (mode === "unset") return setWhatsAppAllowFrom(next, undefined); + if (mode === "keep") { + // Keep allowFrom as-is. + } else if (mode === "unset") { + next = setWhatsAppAllowFrom(next, undefined); + } else { + const allowRaw = await prompter.text({ + message: "Allowed sender numbers (comma-separated, E.164)", + placeholder: "+15555550123, +447700900123", + validate: (value) => { + const raw = String(value ?? "").trim(); + if (!raw) return "Required"; + const parts = raw + .split(/[\n,;]+/g) + .map((p) => p.trim()) + .filter(Boolean); + if (parts.length === 0) return "Required"; + for (const part of parts) { + if (part === "*") continue; + const normalized = normalizeE164(part); + if (!normalized) return `Invalid number: ${part}`; + } + return undefined; + }, + }); - const allowRaw = await prompter.text({ - message: "Allowed sender numbers (comma-separated, E.164)", - placeholder: "+15555550123, +447700900123", - validate: (value) => { - const raw = String(value ?? "").trim(); - if (!raw) return "Required"; - const parts = raw - .split(/[\n,;]+/g) - .map((p) => p.trim()) - .filter(Boolean); - if (parts.length === 0) return "Required"; - for (const part of parts) { - if (part === "*") continue; - const normalized = normalizeE164(part); - if (!normalized) return `Invalid number: ${part}`; - } - return undefined; - }, - }); + const parts = String(allowRaw) + .split(/[\n,;]+/g) + .map((p) => p.trim()) + .filter(Boolean); + const normalized = parts.map((part) => + part === "*" ? "*" : normalizeE164(part), + ); + const unique = [...new Set(normalized.filter(Boolean))]; + next = setWhatsAppAllowFrom(next, unique); + } - const parts = String(allowRaw) - .split(/[\n,;]+/g) - .map((p) => p.trim()) - .filter(Boolean); - const normalized = parts.map((part) => - part === "*" ? "*" : normalizeE164(part), - ); - const unique = [...new Set(normalized.filter(Boolean))]; - return setWhatsAppAllowFrom(next, unique); + return next; } type SetupProvidersOptions = { @@ -518,8 +608,8 @@ export async function setupProviders( whatsappAccountId === DEFAULT_ACCOUNT_ID ? "default" : whatsappAccountId; await prompter.note( [ - `WhatsApp (${waAccountLabel}): ${whatsappLinked ? "linked" : "not linked"}`, `Telegram: ${telegramConfigured ? "configured" : "needs token"}`, + `WhatsApp (${waAccountLabel}): ${whatsappLinked ? "linked" : "not linked"}`, `Discord: ${discordConfigured ? "configured" : "needs token"}`, `Slack: ${slackConfigured ? "configured" : "needs tokens"}`, `Signal: ${signalConfigured ? "configured" : "needs setup"}`, @@ -541,16 +631,18 @@ export async function setupProviders( const selection = (await prompter.multiselect({ message: "Select providers", options: [ + { + value: "telegram", + label: "Telegram (Bot API)", + hint: telegramConfigured + ? "easy start · configured" + : "easy start · needs token", + }, { value: "whatsapp", label: "WhatsApp (QR link)", hint: whatsappLinked ? "linked" : "not linked", }, - { - value: "telegram", - label: "Telegram (Bot API)", - hint: telegramConfigured ? "configured" : "needs token", - }, { value: "discord", label: "Discord (Bot API)", @@ -576,6 +668,27 @@ export async function setupProviders( options?.onSelection?.(selection); + const selectionNotes: Record = { + telegram: + "Telegram — easiest start: register a bot with @BotFather and paste the token. Docs: https://docs.clawd.bot/telegram", + whatsapp: + "WhatsApp — works with your own number; recommend a separate phone + eSIM. Docs: https://docs.clawd.bot/whatsapp", + discord: + "Discord — very well supported right now. Docs: https://docs.clawd.bot/discord", + slack: + "Slack — supported (Socket Mode). Docs: https://docs.clawd.bot/slack", + signal: + "Signal — signal-cli linked device; more setup (if you want easy, hop on Discord). Docs: https://docs.clawd.bot/signal", + imessage: + "iMessage — this is still a work in progress. Docs: https://docs.clawd.bot/imessage", + }; + const selectedLines = selection + .map((provider) => selectionNotes[provider]) + .filter(Boolean); + if (selectedLines.length > 0) { + await prompter.note(selectedLines.join("\n"), "Selected providers"); + } + let next = cfg; if (selection.includes("whatsapp")) { @@ -993,6 +1106,7 @@ export async function setupProviders( await prompter.note( [ + "This is still a work in progress.", "Ensure Clawdbot has Full Disk Access to Messages DB.", "Grant Automation permission for Messages when prompted.", "List chats with: imsg chats --limit 20", diff --git a/src/config/cache-utils.ts b/src/config/cache-utils.ts new file mode 100644 index 000000000..df0178764 --- /dev/null +++ b/src/config/cache-utils.ts @@ -0,0 +1,27 @@ +import fs from "node:fs"; + +export function resolveCacheTtlMs(params: { + envValue: string | undefined; + defaultTtlMs: number; +}): number { + const { envValue, defaultTtlMs } = params; + if (envValue) { + const parsed = Number.parseInt(envValue, 10); + if (Number.isFinite(parsed) && parsed >= 0) { + return parsed; + } + } + return defaultTtlMs; +} + +export function isCacheEnabled(ttlMs: number): boolean { + return ttlMs > 0; +} + +export function getFileMtimeMs(filePath: string): number | undefined { + try { + return fs.statSync(filePath).mtimeMs; + } catch { + return undefined; + } +} diff --git a/src/config/schema.ts b/src/config/schema.ts index 5cf88f528..639c80f9f 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -108,10 +108,19 @@ const FIELD_LABELS: Record = { "telegram.botToken": "Telegram Bot Token", "telegram.dmPolicy": "Telegram DM Policy", "telegram.streamMode": "Telegram Stream Mode", + "telegram.retry.attempts": "Telegram Retry Attempts", + "telegram.retry.minDelayMs": "Telegram Retry Min Delay (ms)", + "telegram.retry.maxDelayMs": "Telegram Retry Max Delay (ms)", + "telegram.retry.jitter": "Telegram Retry Jitter", "whatsapp.dmPolicy": "WhatsApp DM Policy", + "whatsapp.selfChatMode": "WhatsApp Self-Phone Mode", "signal.dmPolicy": "Signal DM Policy", "imessage.dmPolicy": "iMessage DM Policy", "discord.dm.policy": "Discord DM Policy", + "discord.retry.attempts": "Discord Retry Attempts", + "discord.retry.minDelayMs": "Discord Retry Min Delay (ms)", + "discord.retry.maxDelayMs": "Discord Retry Max Delay (ms)", + "discord.retry.jitter": "Discord Retry Jitter", "slack.dm.policy": "Slack DM Policy", "discord.token": "Discord Bot Token", "slack.botToken": "Slack Bot Token", @@ -158,14 +167,32 @@ const FIELD_HELP: Record = { 'Direct message access control ("pairing" recommended). "open" requires telegram.allowFrom=["*"].', "telegram.streamMode": "Draft streaming mode for Telegram replies (off | partial | block). Requires private topics + sendMessageDraft.", + "telegram.retry.attempts": + "Max retry attempts for outbound Telegram API calls (default: 3).", + "telegram.retry.minDelayMs": + "Minimum retry delay in ms for Telegram outbound calls.", + "telegram.retry.maxDelayMs": + "Maximum retry delay cap in ms for Telegram outbound calls.", + "telegram.retry.jitter": + "Jitter factor (0-1) applied to Telegram retry delays.", "whatsapp.dmPolicy": 'Direct message access control ("pairing" recommended). "open" requires whatsapp.allowFrom=["*"].', + "whatsapp.selfChatMode": + "Same-phone setup (bot uses your personal WhatsApp number). Suppresses pairing replies for outbound DMs.", "signal.dmPolicy": 'Direct message access control ("pairing" recommended). "open" requires signal.allowFrom=["*"].', "imessage.dmPolicy": 'Direct message access control ("pairing" recommended). "open" requires imessage.allowFrom=["*"].', "discord.dm.policy": 'Direct message access control ("pairing" recommended). "open" requires discord.dm.allowFrom=["*"].', + "discord.retry.attempts": + "Max retry attempts for outbound Discord API calls (default: 3).", + "discord.retry.minDelayMs": + "Minimum retry delay in ms for Discord outbound calls.", + "discord.retry.maxDelayMs": + "Maximum retry delay cap in ms for Discord outbound calls.", + "discord.retry.jitter": + "Jitter factor (0-1) applied to Discord retry delays.", "slack.dm.policy": 'Direct message access control ("pairing" recommended). "open" requires slack.dm.allowFrom=["*"].', }; diff --git a/src/config/sessions.cache.test.ts b/src/config/sessions.cache.test.ts new file mode 100644 index 000000000..697a605b8 --- /dev/null +++ b/src/config/sessions.cache.test.ts @@ -0,0 +1,187 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + clearSessionStoreCacheForTest, + loadSessionStore, + type SessionEntry, + saveSessionStore, +} from "./sessions.js"; + +describe("Session Store Cache", () => { + let testDir: string; + let storePath: string; + + beforeEach(() => { + // Create a temporary directory for test + testDir = path.join(os.tmpdir(), `session-cache-test-${Date.now()}`); + fs.mkdirSync(testDir, { recursive: true }); + storePath = path.join(testDir, "sessions.json"); + + // Clear cache before each test + clearSessionStoreCacheForTest(); + + // Reset environment variable + delete process.env.CLAWDBOT_SESSION_CACHE_TTL_MS; + }); + + afterEach(() => { + // Clean up test directory + if (fs.existsSync(testDir)) { + fs.rmSync(testDir, { recursive: true, force: true }); + } + clearSessionStoreCacheForTest(); + delete process.env.CLAWDBOT_SESSION_CACHE_TTL_MS; + }); + + it("should load session store from disk on first call", async () => { + const testStore: Record = { + "session:1": { + sessionId: "id-1", + updatedAt: Date.now(), + displayName: "Test Session 1", + }, + }; + + // Write test data + await saveSessionStore(storePath, testStore); + + // Load it + const loaded = loadSessionStore(storePath); + expect(loaded).toEqual(testStore); + }); + + it("should cache session store on first load when file is unchanged", async () => { + const testStore: Record = { + "session:1": { + sessionId: "id-1", + updatedAt: Date.now(), + displayName: "Test Session 1", + }, + }; + + await saveSessionStore(storePath, testStore); + + const readSpy = vi.spyOn(fs, "readFileSync"); + + // First load - from disk + const loaded1 = loadSessionStore(storePath); + expect(loaded1).toEqual(testStore); + + // Second load - should return cached data (no extra disk read) + const loaded2 = loadSessionStore(storePath); + expect(loaded2).toEqual(testStore); + expect(readSpy).toHaveBeenCalledTimes(1); + readSpy.mockRestore(); + }); + + it("should refresh cache when store file changes on disk", async () => { + const testStore: Record = { + "session:1": { + sessionId: "id-1", + updatedAt: Date.now(), + displayName: "Test Session 1", + }, + }; + + await saveSessionStore(storePath, testStore); + + // First load - from disk + const loaded1 = loadSessionStore(storePath); + expect(loaded1).toEqual(testStore); + + // Modify file on disk while cache is valid + const modifiedStore: Record = { + "session:99": { sessionId: "id-99", updatedAt: Date.now() }, + }; + fs.writeFileSync(storePath, JSON.stringify(modifiedStore, null, 2)); + const bump = new Date(Date.now() + 2000); + fs.utimesSync(storePath, bump, bump); + + // Second load - should return the updated store + const loaded2 = loadSessionStore(storePath); + expect(loaded2).toEqual(modifiedStore); + }); + + it("should invalidate cache on write", async () => { + const testStore: Record = { + "session:1": { + sessionId: "id-1", + updatedAt: Date.now(), + displayName: "Test Session 1", + }, + }; + + await saveSessionStore(storePath, testStore); + + // Load - should cache + const loaded1 = loadSessionStore(storePath); + expect(loaded1).toEqual(testStore); + + // Update store + const updatedStore: Record = { + "session:1": { + ...testStore["session:1"], + displayName: "Updated Session 1", + }, + }; + + // Save - should invalidate cache + await saveSessionStore(storePath, updatedStore); + + // Load again - should get new data from disk + const loaded2 = loadSessionStore(storePath); + expect(loaded2["session:1"].displayName).toBe("Updated Session 1"); + }); + + it("should respect CLAWDBOT_SESSION_CACHE_TTL_MS=0 to disable cache", async () => { + process.env.CLAWDBOT_SESSION_CACHE_TTL_MS = "0"; + clearSessionStoreCacheForTest(); + + const testStore: Record = { + "session:1": { + sessionId: "id-1", + updatedAt: Date.now(), + displayName: "Test Session 1", + }, + }; + + await saveSessionStore(storePath, testStore); + + // First load + const loaded1 = loadSessionStore(storePath); + expect(loaded1).toEqual(testStore); + + // Modify file on disk + const modifiedStore: Record = { + "session:2": { + sessionId: "id-2", + updatedAt: Date.now(), + displayName: "Test Session 2", + }, + }; + fs.writeFileSync(storePath, JSON.stringify(modifiedStore, null, 2)); + + // Second load - should read from disk (cache disabled) + const loaded2 = loadSessionStore(storePath); + expect(loaded2).toEqual(modifiedStore); // Should be modified, not cached + }); + + it("should handle non-existent store gracefully", () => { + const nonExistentPath = path.join(testDir, "non-existent.json"); + + // Should return empty store + const loaded = loadSessionStore(nonExistentPath); + expect(loaded).toEqual({}); + }); + + it("should handle invalid JSON gracefully", async () => { + // Write invalid JSON + fs.writeFileSync(storePath, "not valid json {"); + + // Should return empty store + const loaded = loadSessionStore(storePath); + expect(loaded).toEqual({}); + }); +}); diff --git a/src/config/sessions.test.ts b/src/config/sessions.test.ts index c7529eaf1..0a62e50cc 100644 --- a/src/config/sessions.test.ts +++ b/src/config/sessions.test.ts @@ -8,6 +8,7 @@ import { deriveSessionKey, loadSessionStore, resolveSessionKey, + resolveSessionTranscriptPath, resolveSessionTranscriptsDir, updateLastRoute, } from "./sessions.js"; @@ -147,4 +148,21 @@ describe("sessions", () => { ); expect(dir).toBe("/legacy/state/agents/main/sessions"); }); + + it("includes topic ids in session transcript filenames", () => { + const prev = process.env.CLAWDBOT_STATE_DIR; + process.env.CLAWDBOT_STATE_DIR = "/custom/state"; + try { + const sessionFile = resolveSessionTranscriptPath("sess-1", "main", 123); + expect(sessionFile).toBe( + "/custom/state/agents/main/sessions/sess-1-topic-123.jsonl", + ); + } finally { + if (prev === undefined) { + delete process.env.CLAWDBOT_STATE_DIR; + } else { + process.env.CLAWDBOT_STATE_DIR = prev; + } + } + }); }); diff --git a/src/config/sessions.ts b/src/config/sessions.ts index e1f986a1d..cbb348a27 100644 --- a/src/config/sessions.ts +++ b/src/config/sessions.ts @@ -14,8 +14,52 @@ import { parseAgentSessionKey, } from "../routing/session-key.js"; import { normalizeE164 } from "../utils.js"; +import { + getFileMtimeMs, + isCacheEnabled, + resolveCacheTtlMs, +} from "./cache-utils.js"; import { resolveStateDir } from "./paths.js"; +// ============================================================================ +// Session Store Cache with TTL Support +// ============================================================================ + +type SessionStoreCacheEntry = { + store: Record; + loadedAt: number; + storePath: string; + mtimeMs?: number; +}; + +const SESSION_STORE_CACHE = new Map(); +const DEFAULT_SESSION_STORE_TTL_MS = 45_000; // 45 seconds (between 30-60s) + +function getSessionStoreTtl(): number { + return resolveCacheTtlMs({ + envValue: process.env.CLAWDBOT_SESSION_CACHE_TTL_MS, + defaultTtlMs: DEFAULT_SESSION_STORE_TTL_MS, + }); +} + +function isSessionStoreCacheEnabled(): boolean { + return isCacheEnabled(getSessionStoreTtl()); +} + +function isSessionStoreCacheValid(entry: SessionStoreCacheEntry): boolean { + const now = Date.now(); + const ttl = getSessionStoreTtl(); + return now - entry.loadedAt <= ttl; +} + +function invalidateSessionStoreCache(storePath: string): void { + SESSION_STORE_CACHE.delete(storePath); +} + +export function clearSessionStoreCacheForTest(): void { + SESSION_STORE_CACHE.clear(); +} + export type SessionScope = "per-sender" | "global"; const GROUP_SURFACES = new Set([ @@ -33,6 +77,7 @@ export type SessionChatType = "direct" | "group" | "room"; export type SessionEntry = { sessionId: string; updatedAt: number; + sessionFile?: string; /** Parent session key that spawned this session (used for sandbox session-tool scoping). */ spawnedBy?: string; systemSent?: boolean; @@ -133,8 +178,24 @@ export const DEFAULT_IDLE_MINUTES = 60; export function resolveSessionTranscriptPath( sessionId: string, agentId?: string, + topicId?: number, ): string { - return path.join(resolveAgentSessionsDir(agentId), `${sessionId}.jsonl`); + const fileName = + topicId !== undefined + ? `${sessionId}-topic-${topicId}.jsonl` + : `${sessionId}.jsonl`; + return path.join(resolveAgentSessionsDir(agentId), fileName); +} + +export function resolveSessionFilePath( + sessionId: string, + entry?: SessionEntry, + opts?: { agentId?: string }, +): string { + const candidate = entry?.sessionFile?.trim(); + return candidate + ? candidate + : resolveSessionTranscriptPath(sessionId, opts?.agentId); } export function resolveStorePath(store?: string, opts?: { agentId?: string }) { @@ -328,22 +389,53 @@ export function resolveGroupSessionKey( export function loadSessionStore( storePath: string, ): Record { + // Check cache first if enabled + if (isSessionStoreCacheEnabled()) { + const cached = SESSION_STORE_CACHE.get(storePath); + if (cached && isSessionStoreCacheValid(cached)) { + const currentMtimeMs = getFileMtimeMs(storePath); + if (currentMtimeMs === cached.mtimeMs) { + // Return a shallow copy to prevent external mutations affecting cache + return { ...cached.store }; + } + invalidateSessionStoreCache(storePath); + } + } + + // Cache miss or disabled - load from disk + let store: Record = {}; + let mtimeMs = getFileMtimeMs(storePath); try { const raw = fs.readFileSync(storePath, "utf-8"); const parsed = JSON5.parse(raw); if (parsed && typeof parsed === "object") { - return parsed as Record; + store = parsed as Record; } + mtimeMs = getFileMtimeMs(storePath) ?? mtimeMs; } catch { // ignore missing/invalid store; we'll recreate it } - return {}; + + // Cache the result if caching is enabled + if (isSessionStoreCacheEnabled()) { + SESSION_STORE_CACHE.set(storePath, { + store: { ...store }, // Store a copy to prevent external mutations + loadedAt: Date.now(), + storePath, + mtimeMs, + }); + } + + return store; } export async function saveSessionStore( storePath: string, store: Record, ) { + // Invalidate cache on write to ensure consistency + invalidateSessionStoreCache(storePath); + await fs.promises.mkdir(path.dirname(storePath), { recursive: true }); const json = JSON.stringify(store, null, 2); const tmp = `${storePath}.${process.pid}.${crypto.randomUUID()}.tmp`; @@ -393,6 +485,7 @@ export async function updateLastRoute(params: { const next: SessionEntry = { sessionId: existing?.sessionId ?? crypto.randomUUID(), updatedAt: Math.max(existing?.updatedAt ?? 0, now), + sessionFile: existing?.sessionFile, systemSent: existing?.systemSent, abortedLastRun: existing?.abortedLastRun, thinkingLevel: existing?.thinkingLevel, diff --git a/src/config/types.ts b/src/config/types.ts index e8a16f23d..e625a5914 100644 --- a/src/config/types.ts +++ b/src/config/types.ts @@ -1,9 +1,21 @@ export type ReplyMode = "text" | "command"; +export type TypingMode = "never" | "instant" | "thinking" | "message"; export type SessionScope = "per-sender" | "global"; export type ReplyToMode = "off" | "first" | "all"; export type GroupPolicy = "open" | "disabled" | "allowlist"; export type DmPolicy = "pairing" | "allowlist" | "open" | "disabled"; +export type OutboundRetryConfig = { + /** Max retry attempts for outbound requests (default: 3). */ + attempts?: number; + /** Minimum retry delay in ms (default: 300-500ms depending on provider). */ + minDelayMs?: number; + /** Maximum retry delay cap in ms (default: 30000). */ + maxDelayMs?: number; + /** Jitter factor (0-1) applied to delays (default: 0.1). */ + jitter?: number; +}; + export type SessionSendPolicyAction = "allow" | "deny"; export type SessionSendPolicyMatch = { provider?: string; @@ -26,6 +38,7 @@ export type SessionConfig = { heartbeatIdleMinutes?: number; store?: string; typingIntervalSeconds?: number; + typingMode?: TypingMode; mainKey?: string; sendPolicy?: SessionSendPolicyConfig; agentToAgent?: { @@ -86,6 +99,11 @@ export type WhatsAppConfig = { accounts?: Record; /** Direct message access policy (default: pairing). */ dmPolicy?: DmPolicy; + /** + * Same-phone setup (bot uses your personal WhatsApp number). + * When true, suppress pairing replies for outbound DMs. + */ + selfChatMode?: boolean; /** Optional allowlist for WhatsApp direct chats (E.164). */ allowFrom?: string[]; /** Optional allowlist for WhatsApp group senders (E.164). */ @@ -116,6 +134,8 @@ export type WhatsAppAccountConfig = { authDir?: string; /** Direct message access policy (default: pairing). */ dmPolicy?: DmPolicy; + /** Same-phone setup for this account (suppresses pairing replies for outbound DMs). */ + selfChatMode?: boolean; allowFrom?: string[]; groupAllowFrom?: string[]; groupPolicy?: GroupPolicy; @@ -294,6 +314,8 @@ export type TelegramConfig = { /** Draft streaming mode for Telegram (off|partial|block). Default: partial. */ streamMode?: "off" | "partial" | "block"; mediaMaxMb?: number; + /** Retry policy for outbound Telegram API calls. */ + retry?: OutboundRetryConfig; proxy?: string; webhookUrl?: string; webhookSecret?: string; @@ -378,6 +400,8 @@ export type DiscordConfig = { textChunkLimit?: number; mediaMaxMb?: number; historyLimit?: number; + /** Retry policy for outbound Discord API calls. */ + retry?: OutboundRetryConfig; /** Per-action tool gating (default: true for all). */ actions?: DiscordActionConfig; /** Control reply threading when reply tags are present (off|first|all). */ @@ -850,6 +874,27 @@ export type AgentModelListConfig = { fallbacks?: string[]; }; +export type AgentContextPruningConfig = { + mode?: "off" | "adaptive" | "aggressive"; + keepLastAssistants?: number; + softTrimRatio?: number; + hardClearRatio?: number; + minPrunableToolChars?: number; + tools?: { + allow?: string[]; + deny?: string[]; + }; + softTrim?: { + maxChars?: number; + headChars?: number; + tailChars?: number; + }; + hardClear?: { + enabled?: boolean; + placeholder?: string; + }; +}; + export type ClawdbotConfig = { auth?: AuthConfig; env?: { @@ -895,6 +940,8 @@ export type ClawdbotConfig = { userTimezone?: string; /** Optional display-only context window override (used for % in status UIs). */ contextTokens?: number; + /** Opt-in: prune old tool results from the LLM context to reduce token usage. */ + contextPruning?: AgentContextPruningConfig; /** Default thinking level when no /think directive is present. */ thinkingDefault?: "off" | "minimal" | "low" | "medium" | "high"; /** Default verbose level when no /verbose directive is present. */ @@ -919,6 +966,8 @@ export type ClawdbotConfig = { /** Max inbound media size in MB for agent-visible attachments (text note or future image attach). */ mediaMaxMb?: number; typingIntervalSeconds?: number; + /** Typing indicator start mode (never|instant|thinking|message). */ + typingMode?: TypingMode; /** Periodic background heartbeat runs. */ heartbeat?: { /** Heartbeat interval (duration string, default unit: minutes; default: 30m). */ diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index b3dfef5ab..a9458dc9d 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -89,6 +89,15 @@ const GroupPolicySchema = z.enum(["open", "disabled", "allowlist"]); const DmPolicySchema = z.enum(["pairing", "allowlist", "open", "disabled"]); +const RetryConfigSchema = z + .object({ + attempts: z.number().int().min(1).optional(), + minDelayMs: z.number().int().min(0).optional(), + maxDelayMs: z.number().int().min(0).optional(), + jitter: z.number().min(0).max(1).optional(), + }) + .optional(); + const QueueModeBySurfaceSchema = z .object({ whatsapp: QueueModeSchema.optional(), @@ -120,6 +129,14 @@ const SessionSchema = z heartbeatIdleMinutes: z.number().int().positive().optional(), store: z.string().optional(), typingIntervalSeconds: z.number().int().positive().optional(), + typingMode: z + .union([ + z.literal("never"), + z.literal("instant"), + z.literal("thinking"), + z.literal("message"), + ]) + .optional(), mainKey: z.string().optional(), sendPolicy: z .object({ @@ -513,6 +530,40 @@ export const ClawdbotSchema = z.object({ skipBootstrap: z.boolean().optional(), userTimezone: z.string().optional(), contextTokens: z.number().int().positive().optional(), + contextPruning: z + .object({ + mode: z + .union([ + z.literal("off"), + z.literal("adaptive"), + z.literal("aggressive"), + ]) + .optional(), + keepLastAssistants: z.number().int().nonnegative().optional(), + softTrimRatio: z.number().min(0).max(1).optional(), + hardClearRatio: z.number().min(0).max(1).optional(), + minPrunableToolChars: z.number().int().nonnegative().optional(), + tools: z + .object({ + allow: z.array(z.string()).optional(), + deny: z.array(z.string()).optional(), + }) + .optional(), + softTrim: z + .object({ + maxChars: z.number().int().nonnegative().optional(), + headChars: z.number().int().nonnegative().optional(), + tailChars: z.number().int().nonnegative().optional(), + }) + .optional(), + hardClear: z + .object({ + enabled: z.boolean().optional(), + placeholder: z.string().optional(), + }) + .optional(), + }) + .optional(), tools: z .object({ allow: z.array(z.string()).optional(), @@ -552,6 +603,14 @@ export const ClawdbotSchema = z.object({ timeoutSeconds: z.number().int().positive().optional(), mediaMaxMb: z.number().positive().optional(), typingIntervalSeconds: z.number().int().positive().optional(), + typingMode: z + .union([ + z.literal("never"), + z.literal("instant"), + z.literal("thinking"), + z.literal("message"), + ]) + .optional(), heartbeat: HeartbeatSchema, maxConcurrent: z.number().int().positive().optional(), subagents: z @@ -721,6 +780,7 @@ export const ClawdbotSchema = z.object({ /** Override auth directory for this WhatsApp account (Baileys multi-file auth state). */ authDir: z.string().optional(), dmPolicy: DmPolicySchema.optional().default("pairing"), + selfChatMode: z.boolean().optional(), allowFrom: z.array(z.string()).optional(), groupAllowFrom: z.array(z.string()).optional(), groupPolicy: GroupPolicySchema.optional().default("open"), @@ -753,6 +813,7 @@ export const ClawdbotSchema = z.object({ ) .optional(), dmPolicy: DmPolicySchema.optional().default("pairing"), + selfChatMode: z.boolean().optional(), allowFrom: z.array(z.string()).optional(), groupAllowFrom: z.array(z.string()).optional(), groupPolicy: GroupPolicySchema.optional().default("open"), @@ -833,6 +894,7 @@ export const ClawdbotSchema = z.object({ .optional() .default("partial"), mediaMaxMb: z.number().positive().optional(), + retry: RetryConfigSchema, proxy: z.string().optional(), webhookUrl: z.string().optional(), webhookSecret: z.string().optional(), @@ -865,6 +927,7 @@ export const ClawdbotSchema = z.object({ textChunkLimit: z.number().int().positive().optional(), mediaMaxMb: z.number().positive().optional(), historyLimit: z.number().int().min(0).optional(), + retry: RetryConfigSchema, actions: z .object({ reactions: z.boolean().optional(), diff --git a/src/cron/service.ts b/src/cron/service.ts index cdda4bc51..a75cc9ae6 100644 --- a/src/cron/service.ts +++ b/src/cron/service.ts @@ -44,6 +44,7 @@ export type CronServiceDeps = { }; const STUCK_RUN_MS = 2 * 60 * 60 * 1000; +const MAX_TIMEOUT_MS = 2 ** 31 - 1; function normalizeRequiredName(raw: unknown) { if (typeof raw !== "string") throw new Error("cron job name is required"); @@ -393,11 +394,13 @@ export class CronService { const nextAt = this.nextWakeAtMs(); if (!nextAt) return; const delay = Math.max(nextAt - this.deps.nowMs(), 0); + // Avoid TimeoutOverflowWarning when a job is far in the future. + const clampedDelay = Math.min(delay, MAX_TIMEOUT_MS); this.timer = setTimeout(() => { void this.onTimer().catch((err) => { this.deps.log.error({ err: String(err) }, "cron: timer tick failed"); }); - }, delay); + }, clampedDelay); this.timer.unref?.(); } diff --git a/src/daemon/inspect.ts b/src/daemon/inspect.ts new file mode 100644 index 000000000..327ca3702 --- /dev/null +++ b/src/daemon/inspect.ts @@ -0,0 +1,324 @@ +import { execFile } from "node:child_process"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { promisify } from "node:util"; + +import { + GATEWAY_LAUNCH_AGENT_LABEL, + GATEWAY_SYSTEMD_SERVICE_NAME, + GATEWAY_WINDOWS_TASK_NAME, + LEGACY_GATEWAY_LAUNCH_AGENT_LABELS, + LEGACY_GATEWAY_SYSTEMD_SERVICE_NAMES, + LEGACY_GATEWAY_WINDOWS_TASK_NAMES, +} from "./constants.js"; + +export type ExtraGatewayService = { + platform: "darwin" | "linux" | "win32"; + label: string; + detail: string; + scope: "user" | "system"; +}; + +export type FindExtraGatewayServicesOptions = { + deep?: boolean; +}; + +const EXTRA_MARKERS = ["clawdbot", "clawdis", "gateway-daemon"]; +const execFileAsync = promisify(execFile); + +export function renderGatewayServiceCleanupHints(): string[] { + switch (process.platform) { + case "darwin": + return [ + `launchctl bootout gui/$UID/${GATEWAY_LAUNCH_AGENT_LABEL}`, + `rm ~/Library/LaunchAgents/${GATEWAY_LAUNCH_AGENT_LABEL}.plist`, + ]; + case "linux": + return [ + `systemctl --user disable --now ${GATEWAY_SYSTEMD_SERVICE_NAME}.service`, + `rm ~/.config/systemd/user/${GATEWAY_SYSTEMD_SERVICE_NAME}.service`, + ]; + case "win32": + return [`schtasks /Delete /TN "${GATEWAY_WINDOWS_TASK_NAME}" /F`]; + default: + return []; + } +} + +function resolveHomeDir(env: Record): string { + const home = env.HOME?.trim() || env.USERPROFILE?.trim(); + if (!home) throw new Error("Missing HOME"); + return home; +} + +function containsMarker(content: string): boolean { + const lower = content.toLowerCase(); + return EXTRA_MARKERS.some((marker) => lower.includes(marker)); +} + +function tryExtractPlistLabel(contents: string): string | null { + const match = contents.match( + /Label<\/key>\s*([\s\S]*?)<\/string>/i, + ); + if (!match) return null; + return match[1]?.trim() || null; +} + +function isIgnoredLaunchdLabel(label: string): boolean { + return ( + label === GATEWAY_LAUNCH_AGENT_LABEL || + LEGACY_GATEWAY_LAUNCH_AGENT_LABELS.includes(label) + ); +} + +function isIgnoredSystemdName(name: string): boolean { + return ( + name === GATEWAY_SYSTEMD_SERVICE_NAME || + LEGACY_GATEWAY_SYSTEMD_SERVICE_NAMES.includes(name) + ); +} + +async function scanLaunchdDir(params: { + dir: string; + scope: "user" | "system"; +}): Promise { + const results: ExtraGatewayService[] = []; + let entries: string[] = []; + try { + entries = await fs.readdir(params.dir); + } catch { + return results; + } + + for (const entry of entries) { + if (!entry.endsWith(".plist")) continue; + const labelFromName = entry.replace(/\.plist$/, ""); + if (isIgnoredLaunchdLabel(labelFromName)) continue; + const fullPath = path.join(params.dir, entry); + let contents = ""; + try { + contents = await fs.readFile(fullPath, "utf8"); + } catch { + continue; + } + if (!containsMarker(contents)) continue; + const label = tryExtractPlistLabel(contents) ?? labelFromName; + if (isIgnoredLaunchdLabel(label)) continue; + results.push({ + platform: "darwin", + label, + detail: `plist: ${fullPath}`, + scope: params.scope, + }); + } + + return results; +} + +async function scanSystemdDir(params: { + dir: string; + scope: "user" | "system"; +}): Promise { + const results: ExtraGatewayService[] = []; + let entries: string[] = []; + try { + entries = await fs.readdir(params.dir); + } catch { + return results; + } + + for (const entry of entries) { + if (!entry.endsWith(".service")) continue; + const name = entry.replace(/\.service$/, ""); + if (isIgnoredSystemdName(name)) continue; + const fullPath = path.join(params.dir, entry); + let contents = ""; + try { + contents = await fs.readFile(fullPath, "utf8"); + } catch { + continue; + } + if (!containsMarker(contents)) continue; + results.push({ + platform: "linux", + label: entry, + detail: `unit: ${fullPath}`, + scope: params.scope, + }); + } + + return results; +} + +type ScheduledTaskInfo = { + name: string; + taskToRun?: string; +}; + +function parseSchtasksList(output: string): ScheduledTaskInfo[] { + const tasks: ScheduledTaskInfo[] = []; + let current: ScheduledTaskInfo | null = null; + + for (const rawLine of output.split(/\r?\n/)) { + const line = rawLine.trim(); + if (!line) { + if (current) { + tasks.push(current); + current = null; + } + continue; + } + const idx = line.indexOf(":"); + if (idx <= 0) continue; + const key = line.slice(0, idx).trim().toLowerCase(); + const value = line.slice(idx + 1).trim(); + if (!value) continue; + if (key === "taskname") { + if (current) tasks.push(current); + current = { name: value }; + continue; + } + if (!current) continue; + if (key === "task to run") { + current.taskToRun = value; + } + } + + if (current) tasks.push(current); + return tasks; +} + +async function execSchtasks( + args: string[], +): Promise<{ stdout: string; stderr: string; code: number }> { + try { + const { stdout, stderr } = await execFileAsync("schtasks", args, { + encoding: "utf8", + windowsHide: true, + }); + return { + stdout: String(stdout ?? ""), + stderr: String(stderr ?? ""), + code: 0, + }; + } catch (error) { + const e = error as { + stdout?: unknown; + stderr?: unknown; + code?: unknown; + message?: unknown; + }; + return { + stdout: typeof e.stdout === "string" ? e.stdout : "", + stderr: + typeof e.stderr === "string" + ? e.stderr + : typeof e.message === "string" + ? e.message + : "", + code: typeof e.code === "number" ? e.code : 1, + }; + } +} + +export async function findExtraGatewayServices( + env: Record, + opts: FindExtraGatewayServicesOptions = {}, +): Promise { + const results: ExtraGatewayService[] = []; + const seen = new Set(); + const push = (svc: ExtraGatewayService) => { + const key = `${svc.platform}:${svc.label}:${svc.detail}:${svc.scope}`; + if (seen.has(key)) return; + seen.add(key); + results.push(svc); + }; + + if (process.platform === "darwin") { + try { + const home = resolveHomeDir(env); + const userDir = path.join(home, "Library", "LaunchAgents"); + for (const svc of await scanLaunchdDir({ + dir: userDir, + scope: "user", + })) { + push(svc); + } + if (opts.deep) { + for (const svc of await scanLaunchdDir({ + dir: path.join(path.sep, "Library", "LaunchAgents"), + scope: "system", + })) { + push(svc); + } + for (const svc of await scanLaunchdDir({ + dir: path.join(path.sep, "Library", "LaunchDaemons"), + scope: "system", + })) { + push(svc); + } + } + } catch { + return results; + } + return results; + } + + if (process.platform === "linux") { + try { + const home = resolveHomeDir(env); + const userDir = path.join(home, ".config", "systemd", "user"); + for (const svc of await scanSystemdDir({ + dir: userDir, + scope: "user", + })) { + push(svc); + } + if (opts.deep) { + for (const dir of [ + "/etc/systemd/system", + "/usr/lib/systemd/system", + "/lib/systemd/system", + ]) { + for (const svc of await scanSystemdDir({ + dir, + scope: "system", + })) { + push(svc); + } + } + } + } catch { + return results; + } + return results; + } + + if (process.platform === "win32") { + if (!opts.deep) return results; + const res = await execSchtasks(["/Query", "/FO", "LIST", "/V"]); + if (res.code !== 0) return results; + const tasks = parseSchtasksList(res.stdout); + for (const task of tasks) { + const name = task.name.trim(); + if (!name) continue; + if (name === GATEWAY_WINDOWS_TASK_NAME) continue; + if (LEGACY_GATEWAY_WINDOWS_TASK_NAMES.includes(name)) continue; + const lowerName = name.toLowerCase(); + const lowerCommand = task.taskToRun?.toLowerCase() ?? ""; + const matches = EXTRA_MARKERS.some( + (marker) => lowerName.includes(marker) || lowerCommand.includes(marker), + ); + if (!matches) continue; + push({ + platform: "win32", + label: name, + detail: task.taskToRun ? `task: ${name}, run: ${task.taskToRun}` : name, + scope: "system", + }); + } + return results; + } + + return results; +} diff --git a/src/discord/monitor.tool-result.test.ts b/src/discord/monitor.tool-result.test.ts index 310c07e82..9606b0388 100644 --- a/src/discord/monitor.tool-result.test.ts +++ b/src/discord/monitor.tool-result.test.ts @@ -167,4 +167,213 @@ describe("discord tool result dispatch", () => { expect(dispatchMock).toHaveBeenCalledTimes(1); expect(sendMock).toHaveBeenCalledTimes(1); }, 10000); + + it("forks thread sessions and injects starter context", async () => { + const { createDiscordMessageHandler } = await import("./monitor.js"); + let capturedCtx: + | { + SessionKey?: string; + ParentSessionKey?: string; + ThreadStarterBody?: string; + ThreadLabel?: string; + } + | undefined; + dispatchMock.mockImplementationOnce(async ({ ctx, dispatcher }) => { + capturedCtx = ctx; + dispatcher.sendFinalReply({ text: "hi" }); + return { queuedFinal: true, counts: { final: 1 } }; + }); + + const cfg = { + agent: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/clawd" }, + session: { store: "/tmp/clawdbot-sessions.json" }, + messages: { responsePrefix: "PFX" }, + discord: { + dm: { enabled: true, policy: "open" }, + guilds: { "*": { requireMention: false } }, + }, + routing: { allowFrom: [] }, + } as ReturnType; + + const handler = createDiscordMessageHandler({ + cfg, + token: "token", + runtime: { + log: vi.fn(), + error: vi.fn(), + exit: (code: number): never => { + throw new Error(`exit ${code}`); + }, + }, + botUserId: "bot-id", + guildHistories: new Map(), + historyLimit: 0, + mediaMaxBytes: 10_000, + textLimit: 2000, + replyToMode: "off", + dmEnabled: true, + groupDmEnabled: false, + guildEntries: { "*": { requireMention: false } }, + }); + + const threadChannel = { + type: ChannelType.GuildText, + name: "thread-name", + parentId: "p1", + parent: { id: "p1", name: "general" }, + isThread: () => true, + fetchStarterMessage: async () => ({ + content: "starter message", + author: { tag: "Alice#1", username: "Alice" }, + createdTimestamp: Date.now(), + }), + }; + + const client = { + fetchChannel: vi.fn().mockResolvedValue({ + type: ChannelType.GuildText, + name: "thread-name", + }), + rest: { + get: vi.fn().mockResolvedValue({ + content: "starter message", + author: { id: "u1", username: "Alice", discriminator: "0001" }, + timestamp: new Date().toISOString(), + }), + }, + } as unknown as Client; + + await handler( + { + message: { + id: "m4", + content: "thread reply", + channelId: "t1", + channel: threadChannel, + timestamp: new Date().toISOString(), + type: MessageType.Default, + attachments: [], + embeds: [], + mentionedEveryone: false, + mentionedUsers: [], + mentionedRoles: [], + author: { id: "u2", bot: false, username: "Bob", tag: "Bob#2" }, + }, + author: { id: "u2", bot: false, username: "Bob", tag: "Bob#2" }, + member: { displayName: "Bob" }, + guild: { id: "g1", name: "Guild" }, + guild_id: "g1", + }, + client, + ); + + expect(capturedCtx?.SessionKey).toBe("agent:main:discord:channel:t1"); + expect(capturedCtx?.ParentSessionKey).toBe("agent:main:discord:channel:p1"); + expect(capturedCtx?.ThreadStarterBody).toContain("starter message"); + expect(capturedCtx?.ThreadLabel).toContain("Discord thread #general"); + }); + + it("scopes thread sessions to the routed agent", async () => { + const { createDiscordMessageHandler } = await import("./monitor.js"); + + let capturedCtx: + | { + SessionKey?: string; + ParentSessionKey?: string; + } + | undefined; + dispatchMock.mockImplementationOnce(async ({ ctx, dispatcher }) => { + capturedCtx = ctx; + dispatcher.sendFinalReply({ text: "hi" }); + return { queuedFinal: true, counts: { final: 1 } }; + }); + + const cfg = { + agent: { model: "anthropic/claude-opus-4-5", workspace: "/tmp/clawd" }, + session: { store: "/tmp/clawdbot-sessions.json" }, + messages: { responsePrefix: "PFX" }, + discord: { + dm: { enabled: true, policy: "open" }, + guilds: { "*": { requireMention: false } }, + }, + routing: { + allowFrom: [], + bindings: [ + { agentId: "support", match: { provider: "discord", guildId: "g1" } }, + ], + }, + } as ReturnType; + + const handler = createDiscordMessageHandler({ + cfg, + token: "token", + runtime: { + log: vi.fn(), + error: vi.fn(), + exit: (code: number): never => { + throw new Error(`exit ${code}`); + }, + }, + botUserId: "bot-id", + guildHistories: new Map(), + historyLimit: 0, + mediaMaxBytes: 10_000, + textLimit: 2000, + replyToMode: "off", + dmEnabled: true, + groupDmEnabled: false, + guildEntries: { "*": { requireMention: false } }, + }); + + const threadChannel = { + type: ChannelType.GuildText, + name: "thread-name", + parentId: "p1", + parent: { id: "p1", name: "general" }, + isThread: () => true, + }; + + const client = { + fetchChannel: vi.fn().mockResolvedValue({ + type: ChannelType.GuildText, + name: "thread-name", + }), + rest: { + get: vi.fn().mockResolvedValue({ + content: "starter message", + author: { id: "u1", username: "Alice", discriminator: "0001" }, + timestamp: new Date().toISOString(), + }), + }, + } as unknown as Client; + + await handler( + { + message: { + id: "m5", + content: "thread reply", + channelId: "t1", + channel: threadChannel, + timestamp: new Date().toISOString(), + type: MessageType.Default, + attachments: [], + embeds: [], + mentionedEveryone: false, + mentionedUsers: [], + mentionedRoles: [], + author: { id: "u2", bot: false, username: "Bob", tag: "Bob#2" }, + }, + author: { id: "u2", bot: false, username: "Bob", tag: "Bob#2" }, + member: { displayName: "Bob" }, + guild: { id: "g1", name: "Guild" }, + guild_id: "g1", + }, + client, + ); + + expect(capturedCtx?.SessionKey).toBe("agent:support:discord:channel:t1"); + expect(capturedCtx?.ParentSessionKey).toBe( + "agent:support:discord:channel:p1", + ); + }); }); diff --git a/src/discord/monitor.ts b/src/discord/monitor.ts index 473d18eab..11482199d 100644 --- a/src/discord/monitor.ts +++ b/src/discord/monitor.ts @@ -27,7 +27,10 @@ import { listNativeCommandSpecs, shouldHandleTextCommands, } from "../auto-reply/commands-registry.js"; -import { formatAgentEnvelope } from "../auto-reply/envelope.js"; +import { + formatAgentEnvelope, + formatThreadStarterEnvelope, +} from "../auto-reply/envelope.js"; import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js"; import { buildMentionRegexes, @@ -51,7 +54,11 @@ import { readProviderAllowFromStore, upsertProviderPairingRequest, } from "../pairing/pairing-store.js"; -import { resolveAgentRoute } from "../routing/resolve-route.js"; +import { + buildAgentSessionKey, + resolveAgentRoute, +} from "../routing/resolve-route.js"; +import { resolveThreadSessionKeys } from "../routing/session-key.js"; import type { RuntimeEnv } from "../runtime.js"; import { loadWebMedia } from "../web/media.js"; import { fetchDiscordApplicationId } from "./probe.js"; @@ -81,6 +88,67 @@ type DiscordHistoryEntry = { }; type DiscordReactionEvent = Parameters[0]; +type DiscordThreadChannel = { + id: string; + name?: string | null; + parentId?: string | null; + parent?: { id?: string; name?: string }; +}; +type DiscordThreadStarter = { + text: string; + author: string; + timestamp?: number; +}; + +const DISCORD_THREAD_STARTER_CACHE = new Map(); + +async function resolveDiscordThreadStarter(params: { + channel: DiscordThreadChannel; + client: Client; + parentId?: string; +}): Promise { + const cacheKey = params.channel.id; + const cached = DISCORD_THREAD_STARTER_CACHE.get(cacheKey); + if (cached) return cached; + try { + if (!params.parentId) return null; + const starter = (await params.client.rest.get( + Routes.channelMessage(params.parentId, params.channel.id), + )) as { + content?: string | null; + embeds?: Array<{ description?: string | null }>; + member?: { nick?: string | null; displayName?: string | null }; + author?: { + id?: string | null; + username?: string | null; + discriminator?: string | null; + }; + timestamp?: string | null; + }; + if (!starter) return null; + const text = + starter.content?.trim() ?? starter.embeds?.[0]?.description?.trim() ?? ""; + if (!text) return null; + const author = + starter.member?.nick ?? + starter.member?.displayName ?? + (starter.author + ? starter.author.discriminator && starter.author.discriminator !== "0" + ? `${starter.author.username ?? "Unknown"}#${starter.author.discriminator}` + : (starter.author.username ?? starter.author.id ?? "Unknown") + : "Unknown"); + const timestamp = resolveTimestampMs(starter.timestamp); + const payload: DiscordThreadStarter = { + text, + author, + timestamp: timestamp ?? undefined, + }; + DISCORD_THREAD_STARTER_CACHE.set(cacheKey, payload); + return payload; + } catch { + return null; + } +} export type DiscordAllowList = { allowAll: boolean; @@ -509,8 +577,33 @@ export function createDiscordMessageHandler(params: { return; } - const channelName = channelInfo?.name; - const channelSlug = channelName ? normalizeDiscordSlug(channelName) : ""; + const channelName = + channelInfo?.name ?? + ((isGuildMessage || isGroupDm) && + message.channel && + "name" in message.channel + ? message.channel.name + : undefined); + const isThreadChannel = + isGuildMessage && + message.channel && + "isThread" in message.channel && + message.channel.isThread(); + const threadChannel = isThreadChannel + ? (message.channel as DiscordThreadChannel) + : null; + const threadParentId = + threadChannel?.parentId ?? threadChannel?.parent?.id ?? undefined; + const threadParentName = threadChannel?.parent?.name; + const threadName = threadChannel?.name; + const configChannelName = threadParentName ?? channelName; + const configChannelSlug = configChannelName + ? normalizeDiscordSlug(configChannelName) + : ""; + const displayChannelName = threadName ?? channelName; + const displayChannelSlug = displayChannelName + ? normalizeDiscordSlug(displayChannelName) + : ""; const guildSlug = guildInfo?.slug || (data.guild?.name ? normalizeDiscordSlug(data.guild.name) : ""); @@ -524,12 +617,13 @@ export function createDiscordMessageHandler(params: { id: isDirectMessage ? author.id : message.channelId, }, }); + const baseSessionKey = route.sessionKey; const channelConfig = isGuildMessage ? resolveDiscordChannelConfig({ guildInfo, - channelId: message.channelId, - channelName, - channelSlug, + channelId: threadParentId ?? message.channelId, + channelName: configChannelName, + channelSlug: configChannelSlug, }) : null; if (isGuildMessage && channelConfig?.enabled === false) { @@ -544,8 +638,8 @@ export function createDiscordMessageHandler(params: { resolveGroupDmAllow({ channels: groupDmChannels, channelId: message.channelId, - channelName, - channelSlug, + channelName: displayChannelName, + channelSlug: displayChannelSlug, }); if (isGroupDm && !groupDmAllowed) return; @@ -715,7 +809,9 @@ export function createDiscordMessageHandler(params: { channelId: message.channelId, }); const groupRoom = - isGuildMessage && channelSlug ? `#${channelSlug}` : undefined; + isGuildMessage && displayChannelSlug + ? `#${displayChannelSlug}` + : undefined; const groupSubject = isDirectMessage ? undefined : groupRoom; const channelDescription = channelInfo?.topic?.trim(); const systemPromptParts = [ @@ -761,15 +857,51 @@ export function createDiscordMessageHandler(params: { combinedBody = `[Replied message - for context]\n${replyContext}\n\n${combinedBody}`; } + let threadStarterBody: string | undefined; + let threadLabel: string | undefined; + let parentSessionKey: string | undefined; + if (threadChannel) { + const starter = await resolveDiscordThreadStarter({ + channel: threadChannel, + client, + parentId: threadParentId, + }); + if (starter?.text) { + const starterEnvelope = formatThreadStarterEnvelope({ + provider: "Discord", + author: starter.author, + timestamp: starter.timestamp, + body: starter.text, + }); + threadStarterBody = starterEnvelope; + } + const parentName = threadParentName ?? "parent"; + threadLabel = threadName + ? `Discord thread #${normalizeDiscordSlug(parentName)} › ${threadName}` + : `Discord thread #${normalizeDiscordSlug(parentName)}`; + if (threadParentId) { + parentSessionKey = buildAgentSessionKey({ + agentId: route.agentId, + provider: route.provider, + peer: { kind: "channel", id: threadParentId }, + }); + } + } const mediaPayload = buildDiscordMediaPayload(mediaList); const discordTo = `channel:${message.channelId}`; + const threadKeys = resolveThreadSessionKeys({ + baseSessionKey, + threadId: threadChannel ? message.channelId : undefined, + parentSessionKey, + useSuffix: false, + }); const ctxPayload = { Body: combinedBody, From: isDirectMessage ? `discord:${author.id}` : `group:${message.channelId}`, To: discordTo, - SessionKey: route.sessionKey, + SessionKey: threadKeys.sessionKey, AccountId: route.accountId, ChatType: isDirectMessage ? "direct" : "group", SenderName: @@ -787,6 +919,9 @@ export function createDiscordMessageHandler(params: { Surface: "discord" as const, WasMentioned: wasMentioned, MessageSid: message.id, + ParentSessionKey: threadKeys.parentSessionKey, + ThreadStarterBody: threadStarterBody, + ThreadLabel: threadLabel, Timestamp: resolveTimestampMs(message.timestamp), ...mediaPayload, CommandAuthorized: commandAuthorized, diff --git a/src/discord/send.test.ts b/src/discord/send.test.ts index c5b67f3e2..6f714ca2b 100644 --- a/src/discord/send.test.ts +++ b/src/discord/send.test.ts @@ -1,3 +1,4 @@ +import { RateLimitError } from "@buape/carbon"; import { PermissionFlagsBits, Routes } from "discord-api-types/v10"; import { beforeEach, describe, expect, it, vi } from "vitest"; @@ -662,3 +663,133 @@ describe("sendPollDiscord", () => { ); }); }); + +function createMockRateLimitError(retryAfter = 0.001): RateLimitError { + const response = new Response(null, { + status: 429, + headers: { + "X-RateLimit-Scope": "user", + "X-RateLimit-Bucket": "test-bucket", + }, + }); + return new RateLimitError(response, { + message: "You are being rate limited.", + retry_after: retryAfter, + global: false, + }); +} + +describe("retry rate limits", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("retries on Discord rate limits", async () => { + const { rest, postMock } = makeRest(); + const rateLimitError = createMockRateLimitError(0); + + postMock + .mockRejectedValueOnce(rateLimitError) + .mockResolvedValueOnce({ id: "msg1", channel_id: "789" }); + + const res = await sendMessageDiscord("channel:789", "hello", { + rest, + token: "t", + retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 }, + }); + + expect(res.messageId).toBe("msg1"); + expect(postMock).toHaveBeenCalledTimes(2); + }); + + it("uses retry_after delays when rate limited", async () => { + vi.useFakeTimers(); + const setTimeoutSpy = vi.spyOn(global, "setTimeout"); + const { rest, postMock } = makeRest(); + const rateLimitError = createMockRateLimitError(0.5); + + postMock + .mockRejectedValueOnce(rateLimitError) + .mockResolvedValueOnce({ id: "msg1", channel_id: "789" }); + + const promise = sendMessageDiscord("channel:789", "hello", { + rest, + token: "t", + retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 1000, jitter: 0 }, + }); + + await vi.runAllTimersAsync(); + await expect(promise).resolves.toEqual({ + messageId: "msg1", + channelId: "789", + }); + expect(setTimeoutSpy.mock.calls[0]?.[1]).toBe(500); + setTimeoutSpy.mockRestore(); + vi.useRealTimers(); + }); + + it("stops after max retry attempts", async () => { + const { rest, postMock } = makeRest(); + const rateLimitError = createMockRateLimitError(0); + + postMock.mockRejectedValue(rateLimitError); + + await expect( + sendMessageDiscord("channel:789", "hello", { + rest, + token: "t", + retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 }, + }), + ).rejects.toBeInstanceOf(RateLimitError); + expect(postMock).toHaveBeenCalledTimes(2); + }); + + it("does not retry non-rate-limit errors", async () => { + const { rest, postMock } = makeRest(); + postMock.mockRejectedValueOnce(new Error("network error")); + + await expect( + sendMessageDiscord("channel:789", "hello", { rest, token: "t" }), + ).rejects.toThrow("network error"); + expect(postMock).toHaveBeenCalledTimes(1); + }); + + it("retries reactions on rate limits", async () => { + const { rest, putMock } = makeRest(); + const rateLimitError = createMockRateLimitError(0); + + putMock + .mockRejectedValueOnce(rateLimitError) + .mockResolvedValueOnce(undefined); + + const res = await reactMessageDiscord("chan1", "msg1", "ok", { + rest, + token: "t", + retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 }, + }); + + expect(res.ok).toBe(true); + expect(putMock).toHaveBeenCalledTimes(2); + }); + + it("retries media upload without duplicating overflow text", async () => { + const { rest, postMock } = makeRest(); + const rateLimitError = createMockRateLimitError(0); + const text = "a".repeat(2005); + + postMock + .mockRejectedValueOnce(rateLimitError) + .mockResolvedValueOnce({ id: "msg1", channel_id: "789" }) + .mockResolvedValueOnce({ id: "msg2", channel_id: "789" }); + + const res = await sendMessageDiscord("channel:789", text, { + rest, + token: "t", + mediaUrl: "https://example.com/photo.jpg", + retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 }, + }); + + expect(res.messageId).toBe("msg1"); + expect(postMock).toHaveBeenCalledTimes(3); + }); +}); diff --git a/src/discord/send.ts b/src/discord/send.ts index 35de62e2f..f07040dfb 100644 --- a/src/discord/send.ts +++ b/src/discord/send.ts @@ -19,6 +19,11 @@ import { import { chunkMarkdownText } from "../auto-reply/chunk.js"; import { loadConfig } from "../config/config.js"; +import type { RetryConfig } from "../infra/retry.js"; +import { + createDiscordRetryRunner, + type RetryRunner, +} from "../infra/retry-policy.js"; import { normalizePollDurationHours, normalizePollInput, @@ -35,6 +40,7 @@ const DISCORD_POLL_MAX_ANSWERS = 10; const DISCORD_POLL_MAX_DURATION_HOURS = 32 * 24; const DISCORD_MISSING_PERMISSIONS = 50013; const DISCORD_CANNOT_DM = 50007; +type DiscordRequest = RetryRunner; export class DiscordSendError extends Error { kind?: "missing-permissions" | "dm-blocked"; @@ -72,6 +78,7 @@ type DiscordSendOpts = { verbose?: boolean; rest?: RequestClient; replyTo?: string; + retry?: RetryConfig; }; export type DiscordSendResult = { @@ -82,6 +89,8 @@ export type DiscordSendResult = { export type DiscordReactOpts = { token?: string; rest?: RequestClient; + verbose?: boolean; + retry?: RetryConfig; }; export type DiscordReactionUser = { @@ -187,6 +196,24 @@ function resolveRest(token: string, rest?: RequestClient) { return rest ?? new RequestClient(token); } +type DiscordClientOpts = { + token?: string; + rest?: RequestClient; + retry?: RetryConfig; + verbose?: boolean; +}; + +function createDiscordClient(opts: DiscordClientOpts, cfg = loadConfig()) { + const token = resolveToken(opts.token); + const rest = resolveRest(token, opts.rest); + const request = createDiscordRetryRunner({ + retry: opts.retry, + configRetry: cfg.discord?.retry, + verbose: opts.verbose, + }); + return { token, rest, request }; +} + function normalizeReactionEmoji(raw: string) { const trimmed = raw.trim(); if (!trimmed) { @@ -358,13 +385,18 @@ async function buildDiscordSendError( async function resolveChannelId( rest: RequestClient, recipient: DiscordRecipient, + request: DiscordRequest, ): Promise<{ channelId: string; dm?: boolean }> { if (recipient.kind === "channel") { return { channelId: recipient.id }; } - const dmChannel = (await rest.post(Routes.userChannels(), { - body: { recipient_id: recipient.id }, - })) as { id: string }; + const dmChannel = (await request( + () => + rest.post(Routes.userChannels(), { + body: { recipient_id: recipient.id }, + }) as Promise<{ id: string }>, + "dm-channel", + )) as { id: string }; if (!dmChannel?.id) { throw new Error("Failed to create Discord DM channel"); } @@ -375,7 +407,8 @@ async function sendDiscordText( rest: RequestClient, channelId: string, text: string, - replyTo?: string, + replyTo: string | undefined, + request: DiscordRequest, ) { if (!text.trim()) { throw new Error("Message must be non-empty for Discord sends"); @@ -384,21 +417,29 @@ async function sendDiscordText( ? { message_id: replyTo, fail_if_not_exists: false } : undefined; if (text.length <= DISCORD_TEXT_LIMIT) { - const res = (await rest.post(Routes.channelMessages(channelId), { - body: { content: text, message_reference: messageReference }, - })) as { id: string; channel_id: string }; + const res = (await request( + () => + rest.post(Routes.channelMessages(channelId), { + body: { content: text, message_reference: messageReference }, + }) as Promise<{ id: string; channel_id: string }>, + "text", + )) as { id: string; channel_id: string }; return res; } const chunks = chunkMarkdownText(text, DISCORD_TEXT_LIMIT); let last: { id: string; channel_id: string } | null = null; let isFirst = true; for (const chunk of chunks) { - last = (await rest.post(Routes.channelMessages(channelId), { - body: { - content: chunk, - message_reference: isFirst ? messageReference : undefined, - }, - })) as { id: string; channel_id: string }; + last = (await request( + () => + rest.post(Routes.channelMessages(channelId), { + body: { + content: chunk, + message_reference: isFirst ? messageReference : undefined, + }, + }) as Promise<{ id: string; channel_id: string }>, + "text", + )) as { id: string; channel_id: string }; isFirst = false; } if (!last) { @@ -412,7 +453,8 @@ async function sendDiscordMedia( channelId: string, text: string, mediaUrl: string, - replyTo?: string, + replyTo: string | undefined, + request: DiscordRequest, ) { const media = await loadWebMedia(mediaUrl); const caption = @@ -420,22 +462,26 @@ async function sendDiscordMedia( const messageReference = replyTo ? { message_id: replyTo, fail_if_not_exists: false } : undefined; - const res = (await rest.post(Routes.channelMessages(channelId), { - body: { - content: caption || undefined, - message_reference: messageReference, - files: [ - { - data: media.buffer, - name: media.fileName ?? "upload", + const res = (await request( + () => + rest.post(Routes.channelMessages(channelId), { + body: { + content: caption || undefined, + message_reference: messageReference, + files: [ + { + data: media.buffer, + name: media.fileName ?? "upload", + }, + ], }, - ], - }, - })) as { id: string; channel_id: string }; + }) as Promise<{ id: string; channel_id: string }>, + "media", + )) as { id: string; channel_id: string }; if (text.length > DISCORD_TEXT_LIMIT) { const remaining = text.slice(DISCORD_TEXT_LIMIT).trim(); if (remaining) { - await sendDiscordText(rest, channelId, remaining); + await sendDiscordText(rest, channelId, remaining, undefined, request); } } return res; @@ -471,10 +517,10 @@ export async function sendMessageDiscord( text: string, opts: DiscordSendOpts = {}, ): Promise { - const token = resolveToken(opts.token); - const rest = resolveRest(token, opts.rest); + const cfg = loadConfig(); + const { token, rest, request } = createDiscordClient(opts, cfg); const recipient = parseRecipient(to); - const { channelId } = await resolveChannelId(rest, recipient); + const { channelId } = await resolveChannelId(rest, recipient, request); let result: | { id: string; channel_id: string } | { id: string | null; channel_id: string }; @@ -486,9 +532,16 @@ export async function sendMessageDiscord( text, opts.mediaUrl, opts.replyTo, + request, ); } else { - result = await sendDiscordText(rest, channelId, text, opts.replyTo); + result = await sendDiscordText( + rest, + channelId, + text, + opts.replyTo, + request, + ); } } catch (err) { throw await buildDiscordSendError(err, { @@ -510,18 +563,22 @@ export async function sendStickerDiscord( stickerIds: string[], opts: DiscordSendOpts & { content?: string } = {}, ): Promise { - const token = resolveToken(opts.token); - const rest = resolveRest(token, opts.rest); + const cfg = loadConfig(); + const { rest, request } = createDiscordClient(opts, cfg); const recipient = parseRecipient(to); - const { channelId } = await resolveChannelId(rest, recipient); + const { channelId } = await resolveChannelId(rest, recipient, request); const content = opts.content?.trim(); const stickers = normalizeStickerIds(stickerIds); - const res = (await rest.post(Routes.channelMessages(channelId), { - body: { - content: content || undefined, - sticker_ids: stickers, - }, - })) as { id: string; channel_id: string }; + const res = (await request( + () => + rest.post(Routes.channelMessages(channelId), { + body: { + content: content || undefined, + sticker_ids: stickers, + }, + }) as Promise<{ id: string; channel_id: string }>, + "sticker", + )) as { id: string; channel_id: string }; return { messageId: res.id ? String(res.id) : "unknown", channelId: String(res.channel_id ?? channelId), @@ -533,18 +590,22 @@ export async function sendPollDiscord( poll: PollInput, opts: DiscordSendOpts & { content?: string } = {}, ): Promise { - const token = resolveToken(opts.token); - const rest = resolveRest(token, opts.rest); + const cfg = loadConfig(); + const { rest, request } = createDiscordClient(opts, cfg); const recipient = parseRecipient(to); - const { channelId } = await resolveChannelId(rest, recipient); + const { channelId } = await resolveChannelId(rest, recipient, request); const content = opts.content?.trim(); const payload = normalizeDiscordPollInput(poll); - const res = (await rest.post(Routes.channelMessages(channelId), { - body: { - content: content || undefined, - poll: payload, - }, - })) as { id: string; channel_id: string }; + const res = (await request( + () => + rest.post(Routes.channelMessages(channelId), { + body: { + content: content || undefined, + poll: payload, + }, + }) as Promise<{ id: string; channel_id: string }>, + "poll", + )) as { id: string; channel_id: string }; return { messageId: res.id ? String(res.id) : "unknown", channelId: String(res.channel_id ?? channelId), @@ -557,11 +618,13 @@ export async function reactMessageDiscord( emoji: string, opts: DiscordReactOpts = {}, ) { - const token = resolveToken(opts.token); - const rest = resolveRest(token, opts.rest); + const cfg = loadConfig(); + const { rest, request } = createDiscordClient(opts, cfg); const encoded = normalizeReactionEmoji(emoji); - await rest.put( - Routes.channelMessageOwnReaction(channelId, messageId, encoded), + await request( + () => + rest.put(Routes.channelMessageOwnReaction(channelId, messageId, encoded)), + "react", ); return { ok: true }; } diff --git a/src/gateway/server-bridge.ts b/src/gateway/server-bridge.ts index cc18f8e3e..5d4b2cf5e 100644 --- a/src/gateway/server-bridge.ts +++ b/src/gateway/server-bridge.ts @@ -707,6 +707,7 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { for (const candidate of resolveSessionTranscriptCandidates( sessionId, storePath, + entry?.sessionFile, )) { if (!fs.existsSync(candidate)) continue; try { @@ -773,6 +774,7 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { const filePath = resolveSessionTranscriptCandidates( sessionId, storePath, + entry?.sessionFile, ).find((candidate) => fs.existsSync(candidate)); if (!filePath) { return { @@ -843,7 +845,7 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { const sessionId = entry?.sessionId; const rawMessages = sessionId && storePath - ? readSessionMessages(sessionId, storePath) + ? readSessionMessages(sessionId, storePath, entry?.sessionFile) : []; const max = typeof limit === "number" ? limit : 200; const sliced = @@ -1053,6 +1055,7 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { { message: messageWithAttachments, sessionId, + sessionKey: p.sessionKey, runId: clientRunId, thinking: p.thinking, deliver: p.deliver, @@ -1169,6 +1172,7 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { { message: text, sessionId, + sessionKey, thinking: "low", deliver: false, messageProvider: "node", @@ -1245,6 +1249,7 @@ export function createBridgeHandlers(ctx: BridgeHandlersContext) { { message, sessionId, + sessionKey, thinking: link?.thinking ?? undefined, deliver, to, diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts index cab2009e0..0419e9696 100644 --- a/src/gateway/server-methods/agent.ts +++ b/src/gateway/server-methods/agent.ts @@ -244,6 +244,7 @@ export const agentHandlers: GatewayRequestHandlers = { message, to: sanitizedTo, sessionId: resolvedSessionId, + sessionKey: requestedSessionKey, thinking: request.thinking, deliver, provider: resolvedProvider, diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index 9bef65084..6b2799200 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -46,7 +46,9 @@ export const chatHandlers: GatewayRequestHandlers = { const { cfg, storePath, entry } = loadSessionEntry(sessionKey); const sessionId = entry?.sessionId; const rawMessages = - sessionId && storePath ? readSessionMessages(sessionId, storePath) : []; + sessionId && storePath + ? readSessionMessages(sessionId, storePath, entry?.sessionFile) + : []; const hardMax = 1000; const defaultLimit = 200; const requested = typeof limit === "number" ? limit : defaultLimit; @@ -259,6 +261,7 @@ export const chatHandlers: GatewayRequestHandlers = { { message: messageWithAttachments, sessionId, + sessionKey: p.sessionKey, runId: clientRunId, thinking: p.thinking, deliver: p.deliver, diff --git a/src/gateway/server-methods/sessions.ts b/src/gateway/server-methods/sessions.ts index 7da991553..3e86dfdb1 100644 --- a/src/gateway/server-methods/sessions.ts +++ b/src/gateway/server-methods/sessions.ts @@ -485,6 +485,7 @@ export const sessionsHandlers: GatewayRequestHandlers = { for (const candidate of resolveSessionTranscriptCandidates( sessionId, storePath, + entry?.sessionFile, target.agentId, )) { if (!fs.existsSync(candidate)) continue; @@ -559,6 +560,7 @@ export const sessionsHandlers: GatewayRequestHandlers = { const filePath = resolveSessionTranscriptCandidates( sessionId, storePath, + entry?.sessionFile, target.agentId, ).find((candidate) => fs.existsSync(candidate)); if (!filePath) { diff --git a/src/gateway/server.agent.test.ts b/src/gateway/server.agent.test.ts index a13df9206..3aff8b125 100644 --- a/src/gateway/server.agent.test.ts +++ b/src/gateway/server.agent.test.ts @@ -66,6 +66,43 @@ describe("gateway server agent", () => { testState.allowFrom = undefined; }); + test("agent forwards sessionKey to agentCommand", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gw-")); + testState.sessionStorePath = path.join(dir, "sessions.json"); + await fs.writeFile( + testState.sessionStorePath, + JSON.stringify( + { + "agent:main:subagent:abc": { + sessionId: "sess-sub", + updatedAt: Date.now(), + }, + }, + null, + 2, + ), + "utf-8", + ); + + const { server, ws } = await startServerWithClient(); + await connectOk(ws); + + const res = await rpcReq(ws, "agent", { + message: "hi", + sessionKey: "agent:main:subagent:abc", + idempotencyKey: "idem-agent-subkey", + }); + expect(res.ok).toBe(true); + + const spy = vi.mocked(agentCommand); + const call = spy.mock.calls.at(-1)?.[0] as Record; + expect(call.sessionKey).toBe("agent:main:subagent:abc"); + expect(call.sessionId).toBe("sess-sub"); + + ws.close(); + await server.close(); + }); + test("agent routes main last-channel whatsapp", async () => { const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gw-")); testState.sessionStorePath = path.join(dir, "sessions.json"); diff --git a/src/gateway/server.chat.test.ts b/src/gateway/server.chat.test.ts index 748203ee0..b4650cc51 100644 --- a/src/gateway/server.chat.test.ts +++ b/src/gateway/server.chat.test.ts @@ -61,6 +61,26 @@ describe("gateway server chat", () => { await server.close(); }); + test("chat.send forwards sessionKey to agentCommand", async () => { + const { server, ws } = await startServerWithClient(); + await connectOk(ws); + + const res = await rpcReq(ws, "chat.send", { + sessionKey: "agent:main:subagent:abc", + message: "hello", + idempotencyKey: "idem-session-key-1", + }); + expect(res.ok).toBe(true); + + const call = vi.mocked(agentCommand).mock.calls.at(-1)?.[0] as + | { sessionKey?: string } + | undefined; + expect(call?.sessionKey).toBe("agent:main:subagent:abc"); + + ws.close(); + await server.close(); + }); + test("chat.send blocked by send policy", async () => { const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gw-")); testState.sessionStorePath = path.join(dir, "sessions.json"); @@ -307,6 +327,67 @@ describe("gateway server chat", () => { await server.close(); }); + test("chat.history prefers sessionFile when set", async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "clawdbot-gw-")); + testState.sessionStorePath = path.join(dir, "sessions.json"); + + const forkedPath = path.join(dir, "sess-forked.jsonl"); + await fs.writeFile( + forkedPath, + JSON.stringify({ + message: { + role: "user", + content: [{ type: "text", text: "from-fork" }], + timestamp: Date.now(), + }, + }), + "utf-8", + ); + + await fs.writeFile( + path.join(dir, "sess-main.jsonl"), + JSON.stringify({ + message: { + role: "user", + content: [{ type: "text", text: "from-default" }], + timestamp: Date.now(), + }, + }), + "utf-8", + ); + + await fs.writeFile( + testState.sessionStorePath, + JSON.stringify( + { + main: { + sessionId: "sess-main", + sessionFile: forkedPath, + updatedAt: Date.now(), + }, + }, + null, + 2, + ), + "utf-8", + ); + + const { server, ws } = await startServerWithClient(); + await connectOk(ws); + + const res = await rpcReq<{ messages?: unknown[] }>(ws, "chat.history", { + sessionKey: "main", + }); + expect(res.ok).toBe(true); + const messages = res.payload?.messages ?? []; + expect(messages.length).toBe(1); + const first = messages[0] as { content?: { text?: string }[] }; + expect(first.content?.[0]?.text).toBe("from-fork"); + + ws.close(); + await server.close(); + }); + test("chat.history defaults thinking to low for reasoning-capable models", async () => { piSdkMock.enabled = true; piSdkMock.models = [ diff --git a/src/gateway/server.node-bridge.test.ts b/src/gateway/server.node-bridge.test.ts index 3b25d4c8d..a885014df 100644 --- a/src/gateway/server.node-bridge.test.ts +++ b/src/gateway/server.node-bridge.test.ts @@ -758,6 +758,7 @@ describe("gateway server node/bridge", () => { expect(spy.mock.calls.length).toBe(beforeCalls + 1); const call = spy.mock.calls.at(-1)?.[0] as Record; expect(call.sessionId).toBe("sess-main"); + expect(call.sessionKey).toBe("main"); expect(call.deliver).toBe(false); expect(call.messageProvider).toBe("node"); diff --git a/src/gateway/session-utils.ts b/src/gateway/session-utils.ts index 3aa1aca4d..91dc540e8 100644 --- a/src/gateway/session-utils.ts +++ b/src/gateway/session-utils.ts @@ -74,8 +74,13 @@ export type SessionsPatchResult = { export function readSessionMessages( sessionId: string, storePath: string | undefined, + sessionFile?: string, ): unknown[] { - const candidates = resolveSessionTranscriptCandidates(sessionId, storePath); + const candidates = resolveSessionTranscriptCandidates( + sessionId, + storePath, + sessionFile, + ); const filePath = candidates.find((p) => fs.existsSync(p)); if (!filePath) return []; @@ -99,9 +104,11 @@ export function readSessionMessages( export function resolveSessionTranscriptCandidates( sessionId: string, storePath: string | undefined, + sessionFile?: string, agentId?: string, ): string[] { const candidates: string[] = []; + if (sessionFile) candidates.push(sessionFile); if (storePath) { const dir = path.dirname(storePath); candidates.push(path.join(dir, `${sessionId}.jsonl`)); diff --git a/src/index.ts b/src/index.ts index b4e755bba..2fd81d1fa 100644 --- a/src/index.ts +++ b/src/index.ts @@ -27,7 +27,7 @@ import { PortInUseError, } from "./infra/ports.js"; import { assertSupportedRuntime } from "./infra/runtime-guard.js"; -import { isUnhandledRejectionHandled } from "./infra/unhandled-rejections.js"; +import { installUnhandledRejectionHandler } from "./infra/unhandled-rejections.js"; import { enableConsoleCapture } from "./logging.js"; import { runCommandWithTimeout, runExec } from "./process/exec.js"; import { monitorWebProvider } from "./provider-web.js"; @@ -79,14 +79,7 @@ const isMain = isMainModule({ if (isMain) { // Global error handlers to prevent silent crashes from unhandled rejections/exceptions. // These log the error and exit gracefully instead of crashing without trace. - process.on("unhandledRejection", (reason, _promise) => { - if (isUnhandledRejectionHandled(reason)) return; - console.error( - "[clawdbot] Unhandled promise rejection:", - reason instanceof Error ? (reason.stack ?? reason.message) : reason, - ); - process.exit(1); - }); + installUnhandledRejectionHandler(); process.on("uncaughtException", (error) => { console.error( diff --git a/src/infra/bonjour-ciao.ts b/src/infra/bonjour-ciao.ts new file mode 100644 index 000000000..9ca24aa21 --- /dev/null +++ b/src/infra/bonjour-ciao.ts @@ -0,0 +1,14 @@ +import { logDebug } from "../logger.js"; + +import { formatBonjourError } from "./bonjour-errors.js"; + +export function ignoreCiaoCancellationRejection(reason: unknown): boolean { + const message = formatBonjourError(reason).toUpperCase(); + if (!message.includes("CIAO ANNOUNCEMENT CANCELLED")) { + return false; + } + logDebug( + `bonjour: ignoring unhandled ciao rejection: ${formatBonjourError(reason)}`, + ); + return true; +} diff --git a/src/infra/bonjour-errors.ts b/src/infra/bonjour-errors.ts new file mode 100644 index 000000000..7af8e3f3f --- /dev/null +++ b/src/infra/bonjour-errors.ts @@ -0,0 +1,7 @@ +export function formatBonjourError(err: unknown): string { + if (err instanceof Error) { + const msg = err.message || String(err); + return err.name && err.name !== "Error" ? `${err.name}: ${msg}` : msg; + } + return String(err); +} diff --git a/src/infra/bonjour.test.ts b/src/infra/bonjour.test.ts index 1acb21dfc..527118f87 100644 --- a/src/infra/bonjour.test.ts +++ b/src/infra/bonjour.test.ts @@ -4,6 +4,7 @@ import { afterEach, describe, expect, it, vi } from "vitest"; const createService = vi.fn(); const shutdown = vi.fn(); +const registerUnhandledRejectionHandler = vi.fn(); const logWarn = vi.fn(); const logDebug = vi.fn(); @@ -38,6 +39,14 @@ vi.mock("@homebridge/ciao", () => { }; }); +vi.mock("./unhandled-rejections.js", () => { + return { + registerUnhandledRejectionHandler: ( + handler: (reason: unknown) => boolean, + ) => registerUnhandledRejectionHandler(handler), + }; +}); + const { startGatewayBonjourAdvertiser } = await import("./bonjour.js"); describe("gateway bonjour advertiser", () => { @@ -60,6 +69,7 @@ describe("gateway bonjour advertiser", () => { createService.mockReset(); shutdown.mockReset(); + registerUnhandledRejectionHandler.mockReset(); logWarn.mockReset(); logDebug.mockReset(); getLoggerInfo.mockReset(); @@ -177,6 +187,51 @@ describe("gateway bonjour advertiser", () => { await started.stop(); }); + it("cleans up unhandled rejection handler after shutdown", async () => { + // Allow advertiser to run in unit tests. + delete process.env.VITEST; + process.env.NODE_ENV = "development"; + + vi.spyOn(os, "hostname").mockReturnValue("test-host"); + + const destroy = vi.fn().mockResolvedValue(undefined); + const advertise = vi.fn().mockResolvedValue(undefined); + const order: string[] = []; + shutdown.mockImplementation(async () => { + order.push("shutdown"); + }); + + createService.mockImplementation((options: Record) => { + return { + advertise, + destroy, + serviceState: "announced", + on: vi.fn(), + getFQDN: () => + `${asString(options.type, "service")}.${asString(options.domain, "local")}.`, + getHostname: () => asString(options.hostname, "unknown"), + getPort: () => Number(options.port ?? -1), + }; + }); + + const cleanup = vi.fn(() => { + order.push("cleanup"); + }); + registerUnhandledRejectionHandler.mockImplementation(() => cleanup); + + const started = await startGatewayBonjourAdvertiser({ + gatewayPort: 18789, + sshPort: 2222, + bridgePort: 18790, + }); + + await started.stop(); + + expect(registerUnhandledRejectionHandler).toHaveBeenCalledTimes(1); + expect(cleanup).toHaveBeenCalledTimes(1); + expect(order).toEqual(["shutdown", "cleanup"]); + }); + it("logs advertise failures and retries via watchdog", async () => { // Allow advertiser to run in unit tests. delete process.env.VITEST; diff --git a/src/infra/bonjour.ts b/src/infra/bonjour.ts index d8becf195..43ea728fc 100644 --- a/src/infra/bonjour.ts +++ b/src/infra/bonjour.ts @@ -2,6 +2,9 @@ import os from "node:os"; import { logDebug, logWarn } from "../logger.js"; import { getLogger } from "../logging.js"; +import { ignoreCiaoCancellationRejection } from "./bonjour-ciao.js"; +import { formatBonjourError } from "./bonjour-errors.js"; +import { registerUnhandledRejectionHandler } from "./unhandled-rejections.js"; export type GatewayBonjourAdvertiser = { stop: () => Promise; @@ -44,14 +47,6 @@ type BonjourService = { serviceState: string; }; -function formatBonjourError(err: unknown): string { - if (err instanceof Error) { - const msg = err.message || String(err); - return err.name && err.name !== "Error" ? `${err.name}: ${msg}` : msg; - } - return String(err); -} - function serviceSummary(label: string, svc: BonjourService): string { let fqdn = "unknown"; let hostname = "unknown"; @@ -143,6 +138,13 @@ export async function startGatewayBonjourAdvertiser( }); } + let ciaoCancellationRejectionHandler: (() => void) | undefined; + if (services.length > 0) { + ciaoCancellationRejectionHandler = registerUnhandledRejectionHandler( + ignoreCiaoCancellationRejection, + ); + } + logDebug( `bonjour: starting (hostname=${hostname}, instance=${JSON.stringify( safeServiceName(instanceName), @@ -254,6 +256,8 @@ export async function startGatewayBonjourAdvertiser( await responder.shutdown(); } catch { /* ignore */ + } finally { + ciaoCancellationRejectionHandler?.(); } }, }; diff --git a/src/infra/retry-policy.ts b/src/infra/retry-policy.ts new file mode 100644 index 000000000..3f30974e6 --- /dev/null +++ b/src/infra/retry-policy.ts @@ -0,0 +1,106 @@ +import { RateLimitError } from "@buape/carbon"; + +import { formatErrorMessage } from "./errors.js"; +import { type RetryConfig, resolveRetryConfig, retryAsync } from "./retry.js"; + +export type RetryRunner = ( + fn: () => Promise, + label?: string, +) => Promise; + +export const DISCORD_RETRY_DEFAULTS = { + attempts: 3, + minDelayMs: 500, + maxDelayMs: 30_000, + jitter: 0.1, +}; + +export const TELEGRAM_RETRY_DEFAULTS = { + attempts: 3, + minDelayMs: 400, + maxDelayMs: 30_000, + jitter: 0.1, +}; + +const TELEGRAM_RETRY_RE = + /429|timeout|connect|reset|closed|unavailable|temporarily/i; + +function getTelegramRetryAfterMs(err: unknown): number | undefined { + if (!err || typeof err !== "object") return undefined; + const candidate = + "parameters" in err && err.parameters && typeof err.parameters === "object" + ? (err.parameters as { retry_after?: unknown }).retry_after + : "response" in err && + err.response && + typeof err.response === "object" && + "parameters" in err.response + ? ( + err.response as { + parameters?: { retry_after?: unknown }; + } + ).parameters?.retry_after + : "error" in err && + err.error && + typeof err.error === "object" && + "parameters" in err.error + ? (err.error as { parameters?: { retry_after?: unknown } }).parameters + ?.retry_after + : undefined; + return typeof candidate === "number" && Number.isFinite(candidate) + ? candidate * 1000 + : undefined; +} + +export function createDiscordRetryRunner(params: { + retry?: RetryConfig; + configRetry?: RetryConfig; + verbose?: boolean; +}): RetryRunner { + const retryConfig = resolveRetryConfig(DISCORD_RETRY_DEFAULTS, { + ...params.configRetry, + ...params.retry, + }); + return (fn: () => Promise, label?: string) => + retryAsync(fn, { + ...retryConfig, + label, + shouldRetry: (err) => err instanceof RateLimitError, + retryAfterMs: (err) => + err instanceof RateLimitError ? err.retryAfter * 1000 : undefined, + onRetry: params.verbose + ? (info) => { + const labelText = info.label ?? "request"; + const maxRetries = Math.max(1, info.maxAttempts - 1); + console.warn( + `discord ${labelText} rate limited, retry ${info.attempt}/${maxRetries} in ${info.delayMs}ms`, + ); + } + : undefined, + }); +} + +export function createTelegramRetryRunner(params: { + retry?: RetryConfig; + configRetry?: RetryConfig; + verbose?: boolean; +}): RetryRunner { + const retryConfig = resolveRetryConfig(TELEGRAM_RETRY_DEFAULTS, { + ...params.configRetry, + ...params.retry, + }); + return (fn: () => Promise, label?: string) => + retryAsync(fn, { + ...retryConfig, + label, + shouldRetry: (err) => TELEGRAM_RETRY_RE.test(formatErrorMessage(err)), + retryAfterMs: getTelegramRetryAfterMs, + onRetry: params.verbose + ? (info) => { + const maxRetries = Math.max(1, info.maxAttempts - 1); + console.warn( + `telegram send retry ${info.attempt}/${maxRetries} for ${info.label ?? label ?? "request"} in ${info.delayMs}ms: ${formatErrorMessage(info.err)}`, + ); + } + : undefined, + }); +} diff --git a/src/infra/retry.test.ts b/src/infra/retry.test.ts index 7099f5239..1c14364ed 100644 --- a/src/infra/retry.test.ts +++ b/src/infra/retry.test.ts @@ -25,4 +25,80 @@ describe("retryAsync", () => { await expect(retryAsync(fn, 2, 1)).rejects.toThrow("boom"); expect(fn).toHaveBeenCalledTimes(2); }); + + it("stops when shouldRetry returns false", async () => { + const fn = vi.fn().mockRejectedValue(new Error("boom")); + await expect( + retryAsync(fn, { attempts: 3, shouldRetry: () => false }), + ).rejects.toThrow("boom"); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it("calls onRetry before retrying", async () => { + const fn = vi + .fn() + .mockRejectedValueOnce(new Error("boom")) + .mockResolvedValueOnce("ok"); + const onRetry = vi.fn(); + const res = await retryAsync(fn, { + attempts: 2, + minDelayMs: 0, + maxDelayMs: 0, + onRetry, + }); + expect(res).toBe("ok"); + expect(onRetry).toHaveBeenCalledWith( + expect.objectContaining({ attempt: 1, maxAttempts: 2 }), + ); + }); + + it("clamps attempts to at least 1", async () => { + const fn = vi.fn().mockRejectedValue(new Error("boom")); + await expect( + retryAsync(fn, { attempts: 0, minDelayMs: 0, maxDelayMs: 0 }), + ).rejects.toThrow("boom"); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it("uses retryAfterMs when provided", async () => { + vi.useFakeTimers(); + const fn = vi + .fn() + .mockRejectedValueOnce(new Error("boom")) + .mockResolvedValueOnce("ok"); + const delays: number[] = []; + const promise = retryAsync(fn, { + attempts: 2, + minDelayMs: 0, + maxDelayMs: 1000, + jitter: 0, + retryAfterMs: () => 500, + onRetry: (info) => delays.push(info.delayMs), + }); + await vi.runAllTimersAsync(); + await expect(promise).resolves.toBe("ok"); + expect(delays[0]).toBe(500); + vi.useRealTimers(); + }); + + it("clamps retryAfterMs to maxDelayMs", async () => { + vi.useFakeTimers(); + const fn = vi + .fn() + .mockRejectedValueOnce(new Error("boom")) + .mockResolvedValueOnce("ok"); + const delays: number[] = []; + const promise = retryAsync(fn, { + attempts: 2, + minDelayMs: 0, + maxDelayMs: 100, + jitter: 0, + retryAfterMs: () => 500, + onRetry: (info) => delays.push(info.delayMs), + }); + await vi.runAllTimersAsync(); + await expect(promise).resolves.toBe("ok"); + expect(delays[0]).toBe(100); + vi.useRealTimers(); + }); }); diff --git a/src/infra/retry.ts b/src/infra/retry.ts index 234ab539c..0528953e7 100644 --- a/src/infra/retry.ts +++ b/src/infra/retry.ts @@ -1,18 +1,137 @@ +export type RetryConfig = { + attempts?: number; + minDelayMs?: number; + maxDelayMs?: number; + jitter?: number; +}; + +export type RetryInfo = { + attempt: number; + maxAttempts: number; + delayMs: number; + err: unknown; + label?: string; +}; + +export type RetryOptions = RetryConfig & { + label?: string; + shouldRetry?: (err: unknown, attempt: number) => boolean; + retryAfterMs?: (err: unknown) => number | undefined; + onRetry?: (info: RetryInfo) => void; +}; + +const DEFAULT_RETRY_CONFIG = { + attempts: 3, + minDelayMs: 300, + maxDelayMs: 30_000, + jitter: 0, +}; + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +const asFiniteNumber = (value: unknown): number | undefined => + typeof value === "number" && Number.isFinite(value) ? value : undefined; + +const clampNumber = ( + value: unknown, + fallback: number, + min?: number, + max?: number, +) => { + const next = asFiniteNumber(value); + if (next === undefined) return fallback; + const floor = typeof min === "number" ? min : Number.NEGATIVE_INFINITY; + const ceiling = typeof max === "number" ? max : Number.POSITIVE_INFINITY; + return Math.min(Math.max(next, floor), ceiling); +}; + +export function resolveRetryConfig( + defaults: Required = DEFAULT_RETRY_CONFIG, + overrides?: RetryConfig, +): Required { + const attempts = Math.max( + 1, + Math.round(clampNumber(overrides?.attempts, defaults.attempts, 1)), + ); + const minDelayMs = Math.max( + 0, + Math.round(clampNumber(overrides?.minDelayMs, defaults.minDelayMs, 0)), + ); + const maxDelayMs = Math.max( + minDelayMs, + Math.round(clampNumber(overrides?.maxDelayMs, defaults.maxDelayMs, 0)), + ); + const jitter = clampNumber(overrides?.jitter, defaults.jitter, 0, 1); + return { attempts, minDelayMs, maxDelayMs, jitter }; +} + +function applyJitter(delayMs: number, jitter: number): number { + if (jitter <= 0) return delayMs; + const offset = (Math.random() * 2 - 1) * jitter; + return Math.max(0, Math.round(delayMs * (1 + offset))); +} + export async function retryAsync( fn: () => Promise, - attempts = 3, + attemptsOrOptions: number | RetryOptions = 3, initialDelayMs = 300, ): Promise { + if (typeof attemptsOrOptions === "number") { + const attempts = Math.max(1, Math.round(attemptsOrOptions)); + let lastErr: unknown; + for (let i = 0; i < attempts; i += 1) { + try { + return await fn(); + } catch (err) { + lastErr = err; + if (i === attempts - 1) break; + const delay = initialDelayMs * 2 ** i; + await sleep(delay); + } + } + throw lastErr ?? new Error("Retry failed"); + } + + const options = attemptsOrOptions; + + const resolved = resolveRetryConfig(DEFAULT_RETRY_CONFIG, options); + const maxAttempts = resolved.attempts; + const minDelayMs = resolved.minDelayMs; + const maxDelayMs = + Number.isFinite(resolved.maxDelayMs) && resolved.maxDelayMs > 0 + ? resolved.maxDelayMs + : Number.POSITIVE_INFINITY; + const jitter = resolved.jitter; + const shouldRetry = options.shouldRetry ?? (() => true); let lastErr: unknown; - for (let i = 0; i < attempts; i += 1) { + + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { try { return await fn(); } catch (err) { lastErr = err; - if (i === attempts - 1) break; - const delay = initialDelayMs * 2 ** i; - await new Promise((r) => setTimeout(r, delay)); + if (attempt >= maxAttempts || !shouldRetry(err, attempt)) break; + + const retryAfterMs = options.retryAfterMs?.(err); + const hasRetryAfter = + typeof retryAfterMs === "number" && Number.isFinite(retryAfterMs); + const baseDelay = hasRetryAfter + ? Math.max(retryAfterMs, minDelayMs) + : minDelayMs * 2 ** (attempt - 1); + let delay = Math.min(baseDelay, maxDelayMs); + delay = applyJitter(delay, jitter); + delay = Math.min(Math.max(delay, minDelayMs), maxDelayMs); + + options.onRetry?.({ + attempt, + maxAttempts, + delayMs: delay, + err, + label: options.label, + }); + await sleep(delay); } } - throw lastErr; + + throw lastErr ?? new Error("Retry failed"); } diff --git a/src/infra/unhandled-rejections.ts b/src/infra/unhandled-rejections.ts index 5a789fab1..3ce17aa18 100644 --- a/src/infra/unhandled-rejections.ts +++ b/src/infra/unhandled-rejections.ts @@ -1,3 +1,5 @@ +import process from "node:process"; + type UnhandledRejectionHandler = (reason: unknown) => boolean; const handlers = new Set(); @@ -24,3 +26,14 @@ export function isUnhandledRejectionHandled(reason: unknown): boolean { } return false; } + +export function installUnhandledRejectionHandler(): void { + process.on("unhandledRejection", (reason, _promise) => { + if (isUnhandledRejectionHandled(reason)) return; + console.error( + "[clawdbot] Unhandled promise rejection:", + reason instanceof Error ? (reason.stack ?? reason.message) : reason, + ); + process.exit(1); + }); +} diff --git a/src/macos/relay.ts b/src/macos/relay.ts index d9dcc8f0f..65627999b 100644 --- a/src/macos/relay.ts +++ b/src/macos/relay.ts @@ -59,21 +59,14 @@ async function main() { const { assertSupportedRuntime } = await import("../infra/runtime-guard.js"); assertSupportedRuntime(); - const { isUnhandledRejectionHandled } = await import( + const { installUnhandledRejectionHandler } = await import( "../infra/unhandled-rejections.js" ); const { buildProgram } = await import("../cli/program.js"); const program = buildProgram(); - process.on("unhandledRejection", (reason, _promise) => { - if (isUnhandledRejectionHandled(reason)) return; - console.error( - "[clawdbot] Unhandled promise rejection:", - reason instanceof Error ? (reason.stack ?? reason.message) : reason, - ); - process.exit(1); - }); + installUnhandledRejectionHandler(); process.on("uncaughtException", (error) => { console.error( diff --git a/src/routing/session-key.ts b/src/routing/session-key.ts index 52563936f..f0efb1004 100644 --- a/src/routing/session-key.ts +++ b/src/routing/session-key.ts @@ -89,3 +89,20 @@ export function buildAgentPeerSessionKey(params: { const peerId = (params.peerId ?? "").trim() || "unknown"; return `agent:${normalizeAgentId(params.agentId)}:${provider}:${peerKind}:${peerId}`; } + +export function resolveThreadSessionKeys(params: { + baseSessionKey: string; + threadId?: string | null; + parentSessionKey?: string; + useSuffix?: boolean; +}): { sessionKey: string; parentSessionKey?: string } { + const threadId = (params.threadId ?? "").trim(); + if (!threadId) { + return { sessionKey: params.baseSessionKey, parentSessionKey: undefined }; + } + const useSuffix = params.useSuffix ?? true; + const sessionKey = useSuffix + ? `${params.baseSessionKey}:thread:${threadId}` + : params.baseSessionKey; + return { sessionKey, parentSessionKey: params.parentSessionKey }; +} diff --git a/src/slack/monitor.tool-result.test.ts b/src/slack/monitor.tool-result.test.ts index 55ab49cf1..540a065b8 100644 --- a/src/slack/monitor.tool-result.test.ts +++ b/src/slack/monitor.tool-result.test.ts @@ -57,6 +57,7 @@ vi.mock("@slack/bolt", () => { info: vi.fn().mockResolvedValue({ channel: { name: "dm", is_im: true }, }), + replies: vi.fn().mockResolvedValue({ messages: [] }), }, users: { info: vi.fn().mockResolvedValue({ @@ -283,6 +284,177 @@ describe("monitorSlackProvider tool results", () => { expect(sendMock.mock.calls[0][2]).toMatchObject({ threadTs: "456" }); }); + it("treats parent_user_id as a thread reply even when thread_ts matches ts", async () => { + replyMock.mockResolvedValue({ text: "thread reply" }); + + const controller = new AbortController(); + const run = monitorSlackProvider({ + botToken: "bot-token", + appToken: "app-token", + abortSignal: controller.signal, + }); + + await waitForEvent("message"); + const handler = getSlackHandlers()?.get("message"); + if (!handler) throw new Error("Slack message handler not registered"); + + await handler({ + event: { + type: "message", + user: "U1", + text: "hello", + ts: "123", + thread_ts: "123", + parent_user_id: "U2", + channel: "C1", + channel_type: "im", + }, + }); + + await flush(); + controller.abort(); + await run; + + expect(replyMock).toHaveBeenCalledTimes(1); + const ctx = replyMock.mock.calls[0]?.[0] as { + SessionKey?: string; + ParentSessionKey?: string; + }; + expect(ctx.SessionKey).toBe("agent:main:main:thread:123"); + expect(ctx.ParentSessionKey).toBe("agent:main:main"); + }); + + it("forks thread sessions and injects starter context", async () => { + replyMock.mockResolvedValue({ text: "ok" }); + + const client = getSlackClient(); + if (client?.conversations?.info) { + client.conversations.info.mockResolvedValue({ + channel: { name: "general", is_channel: true }, + }); + } + if (client?.conversations?.replies) { + client.conversations.replies.mockResolvedValue({ + messages: [{ text: "starter message", user: "U2", ts: "111.222" }], + }); + } + + config = { + messages: { responsePrefix: "PFX" }, + slack: { + dm: { enabled: true, policy: "open", allowFrom: ["*"] }, + channels: { C1: { allow: true, requireMention: false } }, + }, + routing: { allowFrom: [] }, + }; + + const controller = new AbortController(); + const run = monitorSlackProvider({ + botToken: "bot-token", + appToken: "app-token", + abortSignal: controller.signal, + }); + + await waitForEvent("message"); + const handler = getSlackHandlers()?.get("message"); + if (!handler) throw new Error("Slack message handler not registered"); + + await handler({ + event: { + type: "message", + user: "U1", + text: "thread reply", + ts: "123.456", + thread_ts: "111.222", + channel: "C1", + channel_type: "channel", + }, + }); + + await flush(); + controller.abort(); + await run; + + expect(replyMock).toHaveBeenCalledTimes(1); + const ctx = replyMock.mock.calls[0]?.[0] as { + SessionKey?: string; + ParentSessionKey?: string; + ThreadStarterBody?: string; + ThreadLabel?: string; + }; + expect(ctx.SessionKey).toBe("agent:main:slack:channel:C1:thread:111.222"); + expect(ctx.ParentSessionKey).toBe("agent:main:slack:channel:C1"); + expect(ctx.ThreadStarterBody).toContain("starter message"); + expect(ctx.ThreadLabel).toContain("Slack thread #general"); + }); + + it("scopes thread session keys to the routed agent", async () => { + replyMock.mockResolvedValue({ text: "ok" }); + config = { + messages: { responsePrefix: "PFX" }, + slack: { + dm: { enabled: true, policy: "open", allowFrom: ["*"] }, + channels: { C1: { allow: true, requireMention: false } }, + }, + routing: { + allowFrom: [], + bindings: [ + { agentId: "support", match: { provider: "slack", teamId: "T1" } }, + ], + }, + }; + + const client = getSlackClient(); + if (client?.auth?.test) { + client.auth.test.mockResolvedValue({ + user_id: "bot-user", + team_id: "T1", + }); + } + if (client?.conversations?.info) { + client.conversations.info.mockResolvedValue({ + channel: { name: "general", is_channel: true }, + }); + } + + const controller = new AbortController(); + const run = monitorSlackProvider({ + botToken: "bot-token", + appToken: "app-token", + abortSignal: controller.signal, + }); + + await waitForEvent("message"); + const handler = getSlackHandlers()?.get("message"); + if (!handler) throw new Error("Slack message handler not registered"); + + await handler({ + event: { + type: "message", + user: "U1", + text: "thread reply", + ts: "123.456", + thread_ts: "111.222", + channel: "C1", + channel_type: "channel", + }, + }); + + await flush(); + controller.abort(); + await run; + + expect(replyMock).toHaveBeenCalledTimes(1); + const ctx = replyMock.mock.calls[0]?.[0] as { + SessionKey?: string; + ParentSessionKey?: string; + }; + expect(ctx.SessionKey).toBe( + "agent:support:slack:channel:C1:thread:111.222", + ); + expect(ctx.ParentSessionKey).toBe("agent:support:slack:channel:C1"); + }); + it("keeps replies in channel root when message is not threaded", async () => { replyMock.mockResolvedValue({ text: "root reply" }); diff --git a/src/slack/monitor.ts b/src/slack/monitor.ts index 91586f2ba..042849126 100644 --- a/src/slack/monitor.ts +++ b/src/slack/monitor.ts @@ -3,6 +3,7 @@ import { type SlackCommandMiddlewareArgs, type SlackEventMiddlewareArgs, } from "@slack/bolt"; +import type { WebClient as SlackWebClient } from "@slack/web-api"; import { chunkMarkdownText, resolveTextChunkLimit, @@ -13,7 +14,10 @@ import { listNativeCommandSpecs, shouldHandleTextCommands, } from "../auto-reply/commands-registry.js"; -import { formatAgentEnvelope } from "../auto-reply/envelope.js"; +import { + formatAgentEnvelope, + formatThreadStarterEnvelope, +} from "../auto-reply/envelope.js"; import { dispatchReplyFromConfig } from "../auto-reply/reply/dispatch-from-config.js"; import { buildMentionRegexes, @@ -43,6 +47,7 @@ import { upsertProviderPairingRequest, } from "../pairing/pairing-store.js"; import { resolveAgentRoute } from "../routing/resolve-route.js"; +import { resolveThreadSessionKeys } from "../routing/session-key.js"; import type { RuntimeEnv } from "../runtime.js"; import { reactSlackMessage } from "./actions.js"; import { sendMessageSlack } from "./send.js"; @@ -74,6 +79,7 @@ type SlackMessageEvent = { text?: string; ts?: string; thread_ts?: string; + parent_user_id?: string; channel: string; channel_type?: "im" | "mpim" | "channel" | "group"; files?: SlackFile[]; @@ -86,6 +92,7 @@ type SlackAppMentionEvent = { text?: string; ts?: string; thread_ts?: string; + parent_user_id?: string; channel: string; channel_type?: "im" | "mpim" | "channel" | "group"; }; @@ -390,6 +397,44 @@ async function resolveSlackMedia(params: { return null; } +type SlackThreadStarter = { + text: string; + userId?: string; + ts?: string; +}; + +const THREAD_STARTER_CACHE = new Map(); + +async function resolveSlackThreadStarter(params: { + channelId: string; + threadTs: string; + client: SlackWebClient; +}): Promise { + const cacheKey = `${params.channelId}:${params.threadTs}`; + const cached = THREAD_STARTER_CACHE.get(cacheKey); + if (cached) return cached; + try { + const response = (await params.client.conversations.replies({ + channel: params.channelId, + ts: params.threadTs, + limit: 1, + inclusive: true, + })) as { messages?: Array<{ text?: string; user?: string; ts?: string }> }; + const message = response?.messages?.[0]; + const text = (message?.text ?? "").trim(); + if (!message || !text) return null; + const starter: SlackThreadStarter = { + text, + userId: message.user, + ts: message.ts, + }; + THREAD_STARTER_CACHE.set(cacheKey, starter); + return starter; + } catch { + return null; + } +} + export async function monitorSlackProvider(opts: MonitorSlackOpts = {}) { const cfg = loadConfig(); const sessionCfg = cfg.session; @@ -883,7 +928,18 @@ export async function monitorSlackProvider(opts: MonitorSlackOpts = {}) { id: isDirectMessage ? (message.user ?? "unknown") : message.channel, }, }); - const sessionKey = route.sessionKey; + const baseSessionKey = route.sessionKey; + const threadTs = message.thread_ts; + const hasThreadTs = typeof threadTs === "string" && threadTs.length > 0; + const isThreadReply = + hasThreadTs && + (threadTs !== message.ts || Boolean(message.parent_user_id)); + const threadKeys = resolveThreadSessionKeys({ + baseSessionKey, + threadId: isThreadReply ? threadTs : undefined, + parentSessionKey: isThreadReply ? baseSessionKey : undefined, + }); + const sessionKey = threadKeys.sessionKey; enqueueSystemEvent(`${inboundLabel}: ${preview}`, { sessionKey, contextKey: `slack:message:${message.channel}:${message.ts ?? "unknown"}`, @@ -912,11 +968,39 @@ export async function monitorSlackProvider(opts: MonitorSlackOpts = {}) { ].filter((entry): entry is string => Boolean(entry)); const groupSystemPrompt = systemPromptParts.length > 0 ? systemPromptParts.join("\n\n") : undefined; + let threadStarterBody: string | undefined; + let threadLabel: string | undefined; + if (isThreadReply && threadTs) { + const starter = await resolveSlackThreadStarter({ + channelId: message.channel, + threadTs, + client: app.client, + }); + if (starter?.text) { + const starterUser = starter.userId + ? await resolveUserName(starter.userId) + : null; + const starterName = starterUser?.name ?? starter.userId ?? "Unknown"; + const starterWithId = `${starter.text}\n[slack message id: ${starter.ts ?? threadTs} channel: ${message.channel}]`; + threadStarterBody = formatThreadStarterEnvelope({ + provider: "Slack", + author: starterName, + timestamp: starter.ts + ? Math.round(Number(starter.ts) * 1000) + : undefined, + body: starterWithId, + }); + const snippet = starter.text.replace(/\s+/g, " ").slice(0, 80); + threadLabel = `Slack thread ${roomLabel}${snippet ? `: ${snippet}` : ""}`; + } else { + threadLabel = `Slack thread ${roomLabel}`; + } + } const ctxPayload = { Body: body, From: slackFrom, To: slackTo, - SessionKey: route.sessionKey, + SessionKey: sessionKey, AccountId: route.accountId, ChatType: isDirectMessage ? "direct" : isRoom ? "room" : "group", GroupSubject: isRoomish ? roomLabel : undefined, @@ -927,6 +1011,9 @@ export async function monitorSlackProvider(opts: MonitorSlackOpts = {}) { Surface: "slack" as const, MessageSid: message.ts, ReplyToId: message.thread_ts ?? message.ts, + ParentSessionKey: threadKeys.parentSessionKey, + ThreadStarterBody: threadStarterBody, + ThreadLabel: threadLabel, Timestamp: message.ts ? Math.round(Number(message.ts) * 1000) : undefined, WasMentioned: isRoomish ? wasMentioned : undefined, MediaPath: media?.path, diff --git a/src/telegram/bot.media.test.ts b/src/telegram/bot.media.test.ts index 068f0fa7c..8f4038c1c 100644 --- a/src/telegram/bot.media.test.ts +++ b/src/telegram/bot.media.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from "vitest"; const useSpy = vi.fn(); +const middlewareUseSpy = vi.fn(); const onSpy = vi.fn(); const stopSpy = vi.fn(); const sendChatActionSpy = vi.fn(); @@ -18,6 +19,7 @@ const apiStub: ApiStub = { vi.mock("grammy", () => ({ Bot: class { api = apiStub; + use = middlewareUseSpy; on = onSpy; stop = stopSpy; constructor(public token: string) {} @@ -26,6 +28,10 @@ vi.mock("grammy", () => ({ webhookCallback: vi.fn(), })); +vi.mock("@grammyjs/runner", () => ({ + sequentialize: () => vi.fn(), +})); + const throttlerSpy = vi.fn(() => "throttler"); vi.mock("@grammyjs/transformer-throttler", () => ({ apiThrottler: () => throttlerSpy(), diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts index f43faf89a..85a590a09 100644 --- a/src/telegram/bot.test.ts +++ b/src/telegram/bot.test.ts @@ -1,6 +1,9 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import { beforeEach, describe, expect, it, vi } from "vitest"; import * as replyModule from "../auto-reply/reply.js"; -import { createTelegramBot } from "./bot.js"; +import { createTelegramBot, getTelegramSequentialKey } from "./bot.js"; const { loadWebMedia } = vi.hoisted(() => ({ loadWebMedia: vi.fn(), @@ -37,6 +40,7 @@ vi.mock("./pairing-store.js", () => ({ })); const useSpy = vi.fn(); +const middlewareUseSpy = vi.fn(); const onSpy = vi.fn(); const stopSpy = vi.fn(); const commandSpy = vi.fn(); @@ -68,6 +72,7 @@ const apiStub: ApiStub = { vi.mock("grammy", () => ({ Bot: class { api = apiStub; + use = middlewareUseSpy; on = onSpy; stop = stopSpy; command = commandSpy; @@ -77,6 +82,16 @@ vi.mock("grammy", () => ({ webhookCallback: vi.fn(), })); +const sequentializeMiddleware = vi.fn(); +const sequentializeSpy = vi.fn(() => sequentializeMiddleware); +let sequentializeKey: ((ctx: unknown) => string) | undefined; +vi.mock("@grammyjs/runner", () => ({ + sequentialize: (keyFn: (ctx: unknown) => string) => { + sequentializeKey = keyFn; + return sequentializeSpy(); + }, +})); + const throttlerSpy = vi.fn(() => "throttler"); vi.mock("@grammyjs/transformer-throttler", () => ({ @@ -101,6 +116,9 @@ describe("createTelegramBot", () => { sendPhotoSpy.mockReset(); setMessageReactionSpy.mockReset(); setMyCommandsSpy.mockReset(); + middlewareUseSpy.mockReset(); + sequentializeSpy.mockReset(); + sequentializeKey = undefined; }); it("installs grammY throttler", () => { @@ -109,6 +127,28 @@ describe("createTelegramBot", () => { expect(useSpy).toHaveBeenCalledWith("throttler"); }); + it("sequentializes updates by chat and thread", () => { + createTelegramBot({ token: "tok" }); + expect(sequentializeSpy).toHaveBeenCalledTimes(1); + expect(middlewareUseSpy).toHaveBeenCalledWith( + sequentializeSpy.mock.results[0]?.value, + ); + expect(sequentializeKey).toBe(getTelegramSequentialKey); + expect(getTelegramSequentialKey({ message: { chat: { id: 123 } } })).toBe( + "telegram:123", + ); + expect( + getTelegramSequentialKey({ + message: { chat: { id: 123 }, message_thread_id: 9 }, + }), + ).toBe("telegram:123:topic:9"); + expect( + getTelegramSequentialKey({ + update: { message: { chat: { id: 555 } } }, + }), + ).toBe("telegram:555"); + }); + it("wraps inbound message with Telegram envelope", async () => { const originalTz = process.env.TZ; process.env.TZ = "Europe/Vienna"; @@ -671,6 +711,57 @@ describe("createTelegramBot", () => { expect(replySpy).not.toHaveBeenCalled(); }); + it("honors routed group activation from session store", async () => { + onSpy.mockReset(); + const replySpy = replyModule.__replySpy as unknown as ReturnType< + typeof vi.fn + >; + replySpy.mockReset(); + const storeDir = fs.mkdtempSync( + path.join(os.tmpdir(), "clawdbot-telegram-"), + ); + const storePath = path.join(storeDir, "sessions.json"); + fs.writeFileSync( + storePath, + JSON.stringify({ + "agent:ops:telegram:group:123": { groupActivation: "always" }, + }), + "utf-8", + ); + loadConfig.mockReturnValue({ + telegram: { groups: { "*": { requireMention: true } } }, + routing: { + bindings: [ + { + agentId: "ops", + match: { + provider: "telegram", + peer: { kind: "group", id: "123" }, + }, + }, + ], + }, + session: { store: storePath }, + }); + + createTelegramBot({ token: "tok" }); + const handler = onSpy.mock.calls[0][1] as ( + ctx: Record, + ) => Promise; + + await handler({ + message: { + chat: { id: 123, type: "group", title: "Routing" }, + text: "hello", + date: 1736380800, + }, + me: { username: "clawdbot_bot" }, + getFile: async () => ({ download: async () => new Uint8Array() }), + }); + + expect(replySpy).toHaveBeenCalledTimes(1); + }); + it("allows per-group requireMention override", async () => { onSpy.mockReset(); const replySpy = replyModule.__replySpy as unknown as ReturnType< diff --git a/src/telegram/bot.ts b/src/telegram/bot.ts index 034ce8059..bde8de6d6 100644 --- a/src/telegram/bot.ts +++ b/src/telegram/bot.ts @@ -1,6 +1,7 @@ // @ts-nocheck import { Buffer } from "node:buffer"; +import { sequentialize } from "@grammyjs/runner"; import { apiThrottler } from "@grammyjs/transformer-throttler"; import type { ApiClientOptions, Message } from "grammy"; import { Bot, InputFile, webhookCallback } from "grammy"; @@ -24,13 +25,17 @@ import { import { createReplyDispatcherWithTyping } from "../auto-reply/reply/reply-dispatcher.js"; import { getReplyFromConfig } from "../auto-reply/reply.js"; import type { ReplyPayload } from "../auto-reply/types.js"; -import type { ReplyToMode } from "../config/config.js"; +import type { ClawdbotConfig, ReplyToMode } from "../config/config.js"; import { loadConfig } from "../config/config.js"; import { resolveProviderGroupPolicy, resolveProviderGroupRequireMention, } from "../config/group-policy.js"; -import { resolveStorePath, updateLastRoute } from "../config/sessions.js"; +import { + loadSessionStore, + resolveStorePath, + updateLastRoute, +} from "../config/sessions.js"; import { danger, logVerbose, shouldLogVerbose } from "../globals.js"; import { formatErrorMessage } from "../infra/errors.js"; import { getChildLogger } from "../logging.js"; @@ -107,8 +112,33 @@ export type TelegramBotOptions = { mediaMaxMb?: number; replyToMode?: ReplyToMode; proxyFetch?: typeof fetch; + config?: ClawdbotConfig; }; +export function getTelegramSequentialKey(ctx: { + chat?: { id?: number }; + message?: TelegramMessage; + update?: { + message?: TelegramMessage; + edited_message?: TelegramMessage; + callback_query?: { message?: TelegramMessage }; + }; +}): string { + const msg = + ctx.message ?? + ctx.update?.message ?? + ctx.update?.edited_message ?? + ctx.update?.callback_query?.message; + const chatId = msg?.chat?.id ?? ctx.chat?.id; + const threadId = msg?.message_thread_id; + if (typeof chatId === "number") { + return threadId != null + ? `telegram:${chatId}:topic:${threadId}` + : `telegram:${chatId}`; + } + return "telegram:unknown"; +} + export function createTelegramBot(opts: TelegramBotOptions) { const runtime: RuntimeEnv = opts.runtime ?? { log: console.log, @@ -123,10 +153,11 @@ export function createTelegramBot(opts: TelegramBotOptions) { const bot = new Bot(opts.token, { client }); bot.api.config.use(apiThrottler()); + bot.use(sequentialize(getTelegramSequentialKey)); const mediaGroupBuffer = new Map(); - const cfg = loadConfig(); + const cfg = opts.config ?? loadConfig(); const textLimit = resolveTextChunkLimit(cfg, "telegram"); const dmPolicy = cfg.telegram?.dmPolicy ?? "pairing"; const allowFrom = opts.allowFrom ?? cfg.telegram?.allowFrom; @@ -208,6 +239,27 @@ export function createTelegramBot(opts: TelegramBotOptions) { provider: "telegram", groupId: String(chatId), }); + const resolveGroupActivation = (params: { + chatId: string | number; + agentId?: string; + messageThreadId?: number; + sessionKey?: string; + }) => { + const agentId = params.agentId ?? cfg.agent?.id ?? "main"; + const sessionKey = + params.sessionKey ?? + `agent:${agentId}:telegram:group:${buildTelegramGroupPeerId(params.chatId, params.messageThreadId)}`; + const storePath = resolveStorePath(cfg.session?.store, { agentId }); + try { + const store = loadSessionStore(storePath); + const entry = store[sessionKey]; + if (entry?.groupActivation === "always") return false; + if (entry?.groupActivation === "mention") return true; + } catch (err) { + logVerbose(`Failed to load session for activation check: ${String(err)}`); + } + return undefined; + }; const resolveGroupRequireMention = (chatId: string | number) => resolveProviderGroupRequireMention({ cfg, @@ -246,6 +298,17 @@ export function createTelegramBot(opts: TelegramBotOptions) { chatId, messageThreadId, ); + const peerId = isGroup + ? buildTelegramGroupPeerId(chatId, messageThreadId) + : String(chatId); + const route = resolveAgentRoute({ + cfg, + provider: "telegram", + peer: { + kind: isGroup ? "group" : "dm", + id: peerId, + }, + }); const effectiveDmAllow = normalizeAllowFrom([ ...(allowFrom ?? []), ...storeAllowFrom, @@ -380,8 +443,15 @@ export function createTelegramBot(opts: TelegramBotOptions) { const hasAnyMention = (msg.entities ?? msg.caption_entities ?? []).some( (ent) => ent.type === "mention", ); + const activationOverride = resolveGroupActivation({ + chatId, + messageThreadId, + sessionKey: route.sessionKey, + agentId: route.agentId, + }); const baseRequireMention = resolveGroupRequireMention(chatId); const requireMention = firstDefined( + activationOverride, topicConfig?.requireMention, groupConfig?.requireMention, baseRequireMention, @@ -471,16 +541,6 @@ export function createTelegramBot(opts: TelegramBotOptions) { body: `${bodyText}${replySuffix}`, }); - const route = resolveAgentRoute({ - cfg, - provider: "telegram", - peer: { - kind: isGroup ? "group" : "dm", - id: isGroup - ? buildTelegramGroupPeerId(chatId, messageThreadId) - : buildTelegramDmPeerId(chatId, messageThreadId), - }, - }); const skillFilter = firstDefined(topicConfig?.skills, groupConfig?.skills); const systemPromptParts = [ groupConfig?.systemPrompt?.trim() || null, @@ -825,7 +885,7 @@ export function createTelegramBot(opts: TelegramBotOptions) { kind: isGroup ? "group" : "dm", id: isGroup ? buildTelegramGroupPeerId(chatId, messageThreadId) - : buildTelegramDmPeerId(chatId, messageThreadId), + : String(chatId), }, }); const skillFilter = firstDefined( @@ -1216,15 +1276,6 @@ function buildTelegramGroupPeerId( : String(chatId); } -function buildTelegramDmPeerId( - chatId: number | string, - messageThreadId?: number, -) { - return messageThreadId != null - ? `${chatId}:topic:${messageThreadId}` - : String(chatId); -} - function buildTelegramGroupFrom( chatId: number | string, messageThreadId?: number, diff --git a/src/telegram/monitor.test.ts b/src/telegram/monitor.test.ts index 1453ffc82..740d28d95 100644 --- a/src/telegram/monitor.test.ts +++ b/src/telegram/monitor.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; import { monitorTelegramProvider } from "./monitor.js"; @@ -23,6 +23,25 @@ const api = { setWebhook: vi.fn(), deleteWebhook: vi.fn(), }; +const { initSpy, runSpy, loadConfig } = vi.hoisted(() => ({ + initSpy: vi.fn(async () => undefined), + runSpy: vi.fn(() => ({ + task: () => Promise.resolve(), + stop: vi.fn(), + })), + loadConfig: vi.fn(() => ({ + agent: { maxConcurrent: 2 }, + telegram: {}, + })), +})); + +vi.mock("../config/config.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig, + }; +}); vi.mock("./bot.js", () => ({ createTelegramBot: () => { @@ -38,6 +57,7 @@ vi.mock("./bot.js", () => ({ on: vi.fn(), api, me: { username: "mybot" }, + init: initSpy, stop: vi.fn(), start: vi.fn(), }; @@ -45,6 +65,11 @@ vi.mock("./bot.js", () => ({ createTelegramWebhookCallback: vi.fn(), })); +// Mock the grammyjs/runner to resolve immediately +vi.mock("@grammyjs/runner", () => ({ + run: runSpy, +})); + vi.mock("../auto-reply/reply.js", () => ({ getReplyFromConfig: async (ctx: { Body?: string }) => ({ text: `echo:${ctx.Body}`, @@ -52,6 +77,15 @@ vi.mock("../auto-reply/reply.js", () => ({ })); describe("monitorTelegramProvider (grammY)", () => { + beforeEach(() => { + loadConfig.mockReturnValue({ + agent: { maxConcurrent: 2 }, + telegram: {}, + }); + initSpy.mockClear(); + runSpy.mockClear(); + }); + it("processes a DM and sends reply", async () => { Object.values(api).forEach((fn) => { fn?.mockReset?.(); @@ -72,6 +106,23 @@ describe("monitorTelegramProvider (grammY)", () => { }); }); + it("uses agent maxConcurrent for runner concurrency", async () => { + runSpy.mockClear(); + loadConfig.mockReturnValue({ + agent: { maxConcurrent: 3 }, + telegram: {}, + }); + + await monitorTelegramProvider({ token: "tok" }); + + expect(runSpy).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + sink: { concurrency: 3 }, + }), + ); + }); + it("requires mention in groups by default", async () => { Object.values(api).forEach((fn) => { fn?.mockReset?.(); diff --git a/src/telegram/monitor.ts b/src/telegram/monitor.ts index 9f8328f8b..6be0da70a 100644 --- a/src/telegram/monitor.ts +++ b/src/telegram/monitor.ts @@ -1,3 +1,5 @@ +import { type RunOptions, run } from "@grammyjs/runner"; +import type { ClawdbotConfig } from "../config/config.js"; import { loadConfig } from "../config/config.js"; import type { RuntimeEnv } from "../runtime.js"; import { createTelegramBot } from "./bot.js"; @@ -17,8 +19,25 @@ export type MonitorTelegramOpts = { webhookUrl?: string; }; +export function createTelegramRunnerOptions( + cfg: ClawdbotConfig, +): RunOptions { + return { + sink: { + concurrency: cfg.agent?.maxConcurrent ?? 1, + }, + runner: { + fetch: { + // Match grammY defaults + timeout: 30, + }, + }, + }; +} + export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) { - const { token } = resolveTelegramToken(loadConfig(), { + const cfg = loadConfig(); + const { token } = resolveTelegramToken(cfg, { envToken: opts.token, }); if (!token) { @@ -29,14 +48,15 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) { const proxyFetch = opts.proxyFetch ?? - (loadConfig().telegram?.proxy - ? makeProxyFetch(loadConfig().telegram?.proxy as string) + (cfg.telegram?.proxy + ? makeProxyFetch(cfg.telegram?.proxy as string) : undefined); const bot = createTelegramBot({ token, runtime: opts.runtime, proxyFetch, + config: cfg, }); if (opts.useWebhook) { @@ -53,13 +73,19 @@ export async function monitorTelegramProvider(opts: MonitorTelegramOpts = {}) { return; } - // Long polling + // Use grammyjs/runner for concurrent update processing + const runner = run(bot, createTelegramRunnerOptions(cfg)); + const stopOnAbort = () => { - if (opts.abortSignal?.aborted) void bot.stop(); + if (opts.abortSignal?.aborted) { + void runner.stop(); + } }; opts.abortSignal?.addEventListener("abort", stopOnAbort, { once: true }); + try { - await bot.start(); + // runner.task() returns a promise that resolves when the runner stops + await runner.task(); } finally { opts.abortSignal?.removeEventListener("abort", stopOnAbort); } diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index d3a9aad27..7c72f8cd1 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -80,6 +80,56 @@ describe("sendMessageTelegram", () => { ).rejects.toThrow(/chat_id=123/); }); + it("retries on transient errors with retry_after", async () => { + vi.useFakeTimers(); + const chatId = "123"; + const err = Object.assign(new Error("429"), { + parameters: { retry_after: 0.5 }, + }); + const sendMessage = vi + .fn() + .mockRejectedValueOnce(err) + .mockResolvedValueOnce({ + message_id: 1, + chat: { id: chatId }, + }); + const api = { sendMessage } as unknown as { + sendMessage: typeof sendMessage; + }; + const setTimeoutSpy = vi.spyOn(global, "setTimeout"); + + const promise = sendMessageTelegram(chatId, "hi", { + token: "tok", + api, + retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 1000, jitter: 0 }, + }); + + await vi.runAllTimersAsync(); + await expect(promise).resolves.toEqual({ messageId: "1", chatId }); + expect(setTimeoutSpy.mock.calls[0]?.[1]).toBe(500); + setTimeoutSpy.mockRestore(); + vi.useRealTimers(); + }); + + it("does not retry on non-transient errors", async () => { + const chatId = "123"; + const sendMessage = vi + .fn() + .mockRejectedValue(new Error("400: Bad Request")); + const api = { sendMessage } as unknown as { + sendMessage: typeof sendMessage; + }; + + await expect( + sendMessageTelegram(chatId, "hi", { + token: "tok", + api, + retry: { attempts: 3, minDelayMs: 0, maxDelayMs: 0, jitter: 0 }, + }), + ).rejects.toThrow(/Bad Request/); + expect(sendMessage).toHaveBeenCalledTimes(1); + }); + it("sends GIF media as animation", async () => { const chatId = "123"; const sendAnimation = vi.fn().mockResolvedValue({ diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 3b90e2840..9fafeb1ab 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -1,9 +1,14 @@ // @ts-nocheck import { Bot, InputFile } from "grammy"; +import { loadConfig } from "../config/config.js"; +import type { ClawdbotConfig } from "../config/types.js"; import { formatErrorMessage } from "../infra/errors.js"; +import type { RetryConfig } from "../infra/retry.js"; +import { createTelegramRetryRunner } from "../infra/retry-policy.js"; import { mediaKindFromMime } from "../media/constants.js"; import { isGifMedia } from "../media/mime.js"; import { loadWebMedia } from "../web/media.js"; +import { resolveTelegramToken } from "./token.js"; type TelegramSendOpts = { token?: string; @@ -12,6 +17,7 @@ type TelegramSendOpts = { maxBytes?: number; messageThreadId?: number; api?: Bot["api"]; + retry?: RetryConfig; }; type TelegramSendResult = { @@ -23,16 +29,19 @@ type TelegramReactionOpts = { token?: string; api?: Bot["api"]; remove?: boolean; + verbose?: boolean; + retry?: RetryConfig; }; const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i; -function resolveToken(explicit?: string): string { - const token = explicit ?? process.env.TELEGRAM_BOT_TOKEN; +function resolveToken(explicit?: string, cfg?: ClawdbotConfig): string { + if (explicit?.trim()) return explicit.trim(); + const { token } = resolveTelegramToken(cfg); if (!token) { throw new Error( - "TELEGRAM_BOT_TOKEN is required for Telegram sends (Bot API)", + "TELEGRAM_BOT_TOKEN (or telegram.botToken/tokenFile) is required for Telegram sends (Bot API)", ); } return token.trim(); @@ -84,7 +93,8 @@ export async function sendMessageTelegram( text: string, opts: TelegramSendOpts = {}, ): Promise { - const token = resolveToken(opts.token); + const cfg = loadConfig(); + const token = resolveToken(opts.token, cfg); const chatId = normalizeChatId(to); const bot = opts.api ? null : new Bot(token); const api = opts.api ?? bot?.api; @@ -93,34 +103,11 @@ export async function sendMessageTelegram( typeof opts.messageThreadId === "number" ? { message_thread_id: Math.trunc(opts.messageThreadId) } : undefined; - - const sleep = (ms: number) => - new Promise((resolve) => setTimeout(resolve, ms)); - const sendWithRetry = async (fn: () => Promise, label: string) => { - let lastErr: unknown; - for (let attempt = 1; attempt <= 3; attempt++) { - try { - return await fn(); - } catch (err) { - lastErr = err; - const errText = formatErrorMessage(err); - const terminal = - attempt === 3 || - !/429|timeout|connect|reset|closed|unavailable|temporarily/i.test( - errText, - ); - if (terminal) break; - const backoff = 400 * attempt; - if (opts.verbose) { - console.warn( - `telegram send retry ${attempt}/2 for ${label} in ${backoff}ms: ${errText}`, - ); - } - await sleep(backoff); - } - } - throw lastErr ?? new Error(`Telegram send failed (${label})`); - }; + const request = createTelegramRetryRunner({ + retry: opts.retry, + configRetry: cfg.telegram?.retry, + verbose: opts.verbose, + }); const wrapChatNotFound = (err: unknown) => { if (!/400: Bad Request: chat not found/i.test(formatErrorMessage(err))) @@ -154,35 +141,35 @@ export async function sendMessageTelegram( | Awaited> | Awaited>; if (isGif) { - result = await sendWithRetry( + result = await request( () => api.sendAnimation(chatId, file, { caption, ...threadParams }), "animation", ).catch((err) => { throw wrapChatNotFound(err); }); } else if (kind === "image") { - result = await sendWithRetry( + result = await request( () => api.sendPhoto(chatId, file, { caption, ...threadParams }), "photo", ).catch((err) => { throw wrapChatNotFound(err); }); } else if (kind === "video") { - result = await sendWithRetry( + result = await request( () => api.sendVideo(chatId, file, { caption, ...threadParams }), "video", ).catch((err) => { throw wrapChatNotFound(err); }); } else if (kind === "audio") { - result = await sendWithRetry( + result = await request( () => api.sendAudio(chatId, file, { caption, ...threadParams }), "audio", ).catch((err) => { throw wrapChatNotFound(err); }); } else { - result = await sendWithRetry( + result = await request( () => api.sendDocument(chatId, file, { caption, ...threadParams }), "document", ).catch((err) => { @@ -196,7 +183,7 @@ export async function sendMessageTelegram( if (!text || !text.trim()) { throw new Error("Message must be non-empty for Telegram sends"); } - const res = await sendWithRetry( + const res = await request( () => api.sendMessage(chatId, text, { parse_mode: "Markdown", @@ -213,7 +200,7 @@ export async function sendMessageTelegram( `telegram markdown parse failed, retrying as plain text: ${errText}`, ); } - return await sendWithRetry( + return await request( () => threadParams ? api.sendMessage(chatId, text, threadParams) @@ -235,11 +222,17 @@ export async function reactMessageTelegram( emoji: string, opts: TelegramReactionOpts = {}, ): Promise<{ ok: true }> { - const token = resolveToken(opts.token); + const cfg = loadConfig(); + const token = resolveToken(opts.token, cfg); const chatId = normalizeChatId(String(chatIdInput)); const messageId = normalizeMessageId(messageIdInput); const bot = opts.api ? null : new Bot(token); const api = opts.api ?? bot?.api; + const request = createTelegramRetryRunner({ + retry: opts.retry, + configRetry: cfg.telegram?.retry, + verbose: opts.verbose, + }); const remove = opts.remove === true; const trimmedEmoji = emoji.trim(); const reactions = @@ -247,7 +240,10 @@ export async function reactMessageTelegram( if (typeof api.setMessageReaction !== "function") { throw new Error("Telegram reactions are unavailable in this bot API."); } - await api.setMessageReaction(chatId, messageId, reactions); + await request( + () => api.setMessageReaction(chatId, messageId, reactions), + "reaction", + ); return { ok: true }; } diff --git a/src/web/accounts.ts b/src/web/accounts.ts index a9fcffaad..1ed06a4a3 100644 --- a/src/web/accounts.ts +++ b/src/web/accounts.ts @@ -12,6 +12,7 @@ export type ResolvedWhatsAppAccount = { enabled: boolean; authDir: string; isLegacyAuthDir: boolean; + selfChatMode?: boolean; allowFrom?: string[]; groupAllowFrom?: string[]; groupPolicy?: GroupPolicy; @@ -103,6 +104,7 @@ export function resolveWhatsAppAccount(params: { enabled, authDir, isLegacyAuthDir: isLegacy, + selfChatMode: accountCfg?.selfChatMode ?? params.cfg.whatsapp?.selfChatMode, allowFrom: accountCfg?.allowFrom ?? params.cfg.whatsapp?.allowFrom, groupAllowFrom: accountCfg?.groupAllowFrom ?? params.cfg.whatsapp?.groupAllowFrom, diff --git a/src/web/inbound.ts b/src/web/inbound.ts index 7c73fb7b1..c70b7ab47 100644 --- a/src/web/inbound.ts +++ b/src/web/inbound.ts @@ -202,6 +202,9 @@ export async function monitorWebInbox(options: { : undefined); const isSamePhone = from === selfE164; const isSelfChat = isSelfChatMode(selfE164, configuredAllowFrom); + const isFromMe = Boolean(msg.key?.fromMe); + const selfChatMode = account.selfChatMode ?? false; + const selfPhoneMode = selfChatMode || isSelfChat; // Pre-compute normalized allowlists for filtering const dmHasWildcard = allowFrom?.includes("*") ?? false; @@ -246,6 +249,12 @@ export async function monitorWebInbox(options: { // DM access control (secure defaults): "pairing" (default) / "allowlist" / "open" / "disabled" if (!group) { + if (isFromMe && !isSamePhone && selfPhoneMode) { + logVerbose( + "Skipping outbound self-phone DM (fromMe); no pairing reply needed.", + ); + continue; + } if (dmPolicy === "disabled") { logVerbose("Blocked dm (dmPolicy: disabled)"); continue; diff --git a/src/web/monitor-inbox.test.ts b/src/web/monitor-inbox.test.ts index 6abc8e6fa..3ae395d66 100644 --- a/src/web/monitor-inbox.test.ts +++ b/src/web/monitor-inbox.test.ts @@ -1099,6 +1099,110 @@ describe("web monitor inbox", () => { await listener.close(); }); + it("skips pairing replies for outbound DMs in same-phone mode", async () => { + mockLoadConfig.mockReturnValue({ + whatsapp: { + dmPolicy: "pairing", + selfChatMode: true, + }, + messages: { + messagePrefix: undefined, + responsePrefix: undefined, + }, + }); + + const onMessage = vi.fn(); + const listener = await monitorWebInbox({ verbose: false, onMessage }); + const sock = await createWaSocket(); + + const upsert = { + type: "notify", + messages: [ + { + key: { + id: "fromme-1", + fromMe: true, + remoteJid: "999@s.whatsapp.net", + }, + message: { conversation: "hello" }, + messageTimestamp: 1_700_000_000, + }, + ], + }; + + sock.ev.emit("messages.upsert", upsert); + await new Promise((resolve) => setImmediate(resolve)); + + expect(onMessage).not.toHaveBeenCalled(); + expect(upsertPairingRequestMock).not.toHaveBeenCalled(); + expect(sock.sendMessage).not.toHaveBeenCalled(); + + mockLoadConfig.mockReturnValue({ + whatsapp: { + allowFrom: ["*"], + }, + messages: { + messagePrefix: undefined, + responsePrefix: undefined, + }, + }); + + await listener.close(); + }); + + it("still pairs outbound DMs when same-phone mode is disabled", async () => { + mockLoadConfig.mockReturnValue({ + whatsapp: { + dmPolicy: "pairing", + selfChatMode: false, + }, + messages: { + messagePrefix: undefined, + responsePrefix: undefined, + }, + }); + + const onMessage = vi.fn(); + const listener = await monitorWebInbox({ verbose: false, onMessage }); + const sock = await createWaSocket(); + + const upsert = { + type: "notify", + messages: [ + { + key: { + id: "fromme-2", + fromMe: true, + remoteJid: "999@s.whatsapp.net", + }, + message: { conversation: "hello again" }, + messageTimestamp: 1_700_000_000, + }, + ], + }; + + sock.ev.emit("messages.upsert", upsert); + await new Promise((resolve) => setImmediate(resolve)); + + expect(onMessage).not.toHaveBeenCalled(); + expect(upsertPairingRequestMock).toHaveBeenCalledTimes(1); + expect(sock.sendMessage).toHaveBeenCalledWith("999@s.whatsapp.net", { + text: expect.stringContaining("Pairing code: PAIRCODE"), + }); + + mockLoadConfig.mockReturnValue({ + whatsapp: { + allowFrom: ["*"], + }, + messages: { + messagePrefix: undefined, + responsePrefix: undefined, + }, + }); + + await listener.close(); + }); + it("handles append messages by marking them read but skipping auto-reply", async () => { const onMessage = vi.fn(); const listener = await monitorWebInbox({ verbose: false, onMessage }); diff --git a/ui/src/ui/app-render.ts b/ui/src/ui/app-render.ts index eb2d48cc8..85f15b740 100644 --- a/ui/src/ui/app-render.ts +++ b/ui/src/ui/app-render.ts @@ -247,6 +247,7 @@ export function renderApp(state: AppViewState) { state.applySettings({ ...state.settings, sessionKey: next }); }, onRefresh: () => state.loadOverview(), + onReconnect: () => state.connect(), }) : nothing} diff --git a/ui/src/ui/app.ts b/ui/src/ui/app.ts index 660fe5ec4..5a6aa3435 100644 --- a/ui/src/ui/app.ts +++ b/ui/src/ui/app.ts @@ -663,12 +663,29 @@ export class ClawdbotApp extends LitElement { private applySettingsFromUrl() { if (!window.location.search) return; const params = new URLSearchParams(window.location.search); - const token = params.get("token")?.trim(); - if (!token) return; - if (!this.settings.token) { - this.applySettings({ ...this.settings, token }); + const tokenRaw = params.get("token"); + const passwordRaw = params.get("password"); + let changed = false; + + if (tokenRaw != null) { + const token = tokenRaw.trim(); + if (token && !this.settings.token) { + this.applySettings({ ...this.settings, token }); + changed = true; + } + params.delete("token"); } - params.delete("token"); + + if (passwordRaw != null) { + const password = passwordRaw.trim(); + if (password) { + this.password = password; + changed = true; + } + params.delete("password"); + } + + if (!changed && tokenRaw == null && passwordRaw == null) return; const url = new URL(window.location.href); url.search = params.toString(); window.history.replaceState({}, "", url.toString()); diff --git a/ui/src/ui/navigation.browser.test.ts b/ui/src/ui/navigation.browser.test.ts index 6c3b68b0c..69d9af71e 100644 --- a/ui/src/ui/navigation.browser.test.ts +++ b/ui/src/ui/navigation.browser.test.ts @@ -128,4 +128,26 @@ describe("control UI routing", () => { expect(window.location.pathname).toBe("/ui/overview"); expect(window.location.search).toBe(""); }); + + it("hydrates password from URL params and strips it", async () => { + const app = mountApp("/ui/overview?password=sekret"); + await app.updateComplete; + + expect(app.password).toBe("sekret"); + expect(window.location.pathname).toBe("/ui/overview"); + expect(window.location.search).toBe(""); + }); + + it("strips auth params even when settings already set", async () => { + localStorage.setItem( + "clawdbot.control.settings.v1", + JSON.stringify({ token: "existing-token" }), + ); + const app = mountApp("/ui/overview?token=abc123"); + await app.updateComplete; + + expect(app.settings.token).toBe("existing-token"); + expect(window.location.pathname).toBe("/ui/overview"); + expect(window.location.search).toBe(""); + }); }); diff --git a/ui/src/ui/views/overview.ts b/ui/src/ui/views/overview.ts index 6a728808a..39f348ec2 100644 --- a/ui/src/ui/views/overview.ts +++ b/ui/src/ui/views/overview.ts @@ -20,6 +20,7 @@ export type OverviewProps = { onPasswordChange: (next: string) => void; onSessionKeyChange: (next: string) => void; onRefresh: () => void; + onReconnect: () => void; }; export function renderOverview(props: OverviewProps) { @@ -84,7 +85,8 @@ export function renderOverview(props: OverviewProps) {
- Reconnect to apply changes. + + Reconnect to apply URL/password changes.