diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1330931..fbe151592 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.clawd.bot Status: unreleased. ### Changes +- Skills: add multi-image input support to Nano Banana Pro skill. (#1958) Thanks @tyler6204. - Agents: honor tools.exec.safeBins in exec allowlist checks. (#2281) - Docs: tighten Fly private deployment steps. (#2289) Thanks @dguido. - Docs: add migration guide for moving to a new machine. (#2381) @@ -50,6 +51,8 @@ Status: unreleased. - **BREAKING:** Gateway auth mode "none" is removed; gateway now requires token/password (Tailscale Serve identity still allowed). ### Fixes +- Security: pin npm overrides to keep tar@7.5.4 for install toolchains. +- BlueBubbles: coalesce inbound URL link preview messages. (#1981) Thanks @tyler6204. - Agents: include memory.md when bootstrapping memory context. (#2318) Thanks @czekaj. - Telegram: wrap reasoning italics per line to avoid raw underscores. (#2181) Thanks @YuriNachos. - Voice Call: enforce Twilio webhook signature verification for ngrok URLs; disable ngrok free tier bypass by default. diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index eaba866b1..31dd1602b 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -954,6 +954,8 @@ Notes: - `commands.debug: true` enables `/debug` (runtime-only overrides). - `commands.restart: true` enables `/restart` and the gateway tool restart action. - `commands.useAccessGroups: false` allows commands to bypass access-group allowlists/policies. +- Slash commands and directives are only honored for **authorized senders**. Authorization is derived from + channel allowlists/pairing plus `commands.useAccessGroups`. ### `web` (WhatsApp web channel runtime) diff --git a/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md b/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md index d28481ebb..d7fd921e7 100644 --- a/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md +++ b/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md @@ -59,6 +59,8 @@ Two layers matter: Rules of thumb: - `deny` always wins. - If `allow` is non-empty, everything else is treated as blocked. +- Tool policy is the hard stop: `/exec` cannot override a denied `exec` tool. +- `/exec` only changes session defaults for authorized senders; it does not grant tool access. Provider tool keys accept either `provider` (e.g. `google-antigravity`) or `provider/model` (e.g. `openai/gpt-5.2`). ### Tool groups (shorthands) @@ -95,6 +97,7 @@ Elevated does **not** grant extra tools; it only affects `exec`. - Use `/elevated full` to skip exec approvals for the session. - If you’re already running direct, elevated is effectively a no-op (still gated). - Elevated is **not** skill-scoped and does **not** override tool allow/deny. +- `/exec` is separate from elevated. It only adjusts per-session exec defaults for authorized senders. Gates: - Enablement: `tools.elevated.enabled` (and optionally `agents.list[].tools.elevated.enabled`) diff --git a/docs/gateway/sandboxing.md b/docs/gateway/sandboxing.md index b9b1bd8fe..fcbc46b9b 100644 --- a/docs/gateway/sandboxing.md +++ b/docs/gateway/sandboxing.md @@ -142,6 +142,8 @@ Tool allow/deny policies still apply before sandbox rules. If a tool is denied globally or per-agent, sandboxing doesn’t bring it back. `tools.elevated` is an explicit escape hatch that runs `exec` on the host. +`/exec` directives only apply for authorized senders and persist per session; to hard-disable +`exec`, use tool policy deny (see [Sandbox vs Tool Policy vs Elevated](/gateway/sandbox-vs-tool-policy-vs-elevated)). Debugging: - Use `clawdbot sandbox explain` to inspect effective sandbox mode, tool policy, and fix-it config keys. diff --git a/docs/gateway/security.md b/docs/gateway/security.md index cee21c7c2..52671d864 100644 --- a/docs/gateway/security.md +++ b/docs/gateway/security.md @@ -142,6 +142,16 @@ Clawdbot’s stance: - **Scope next:** decide where the bot is allowed to act (group allowlists + mention gating, tools, sandboxing, device permissions). - **Model last:** assume the model can be manipulated; design so manipulation has limited blast radius. +## Command authorization model + +Slash commands and directives are only honored for **authorized senders**. Authorization is derived from +channel allowlists/pairing plus `commands.useAccessGroups` (see [Configuration](/gateway/configuration) +and [Slash commands](/tools/slash-commands)). If a channel allowlist is empty or includes `"*"`, +commands are effectively open for that channel. + +`/exec` is a session-only convenience for authorized operators. It does **not** write config or +change other sessions. + ## Plugins/extensions Plugins run **in-process** with the Gateway. Treat them as trusted code: @@ -211,7 +221,7 @@ Even with strong system prompts, **prompt injection is not solved**. What helps - Prefer mention gating in groups; avoid “always-on” bots in public rooms. - Treat links, attachments, and pasted instructions as hostile by default. - Run sensitive tool execution in a sandbox; keep secrets out of the agent’s reachable filesystem. -- Note: sandboxing is opt-in; if sandbox mode is off, exec runs on the gateway host even though tools.exec.host defaults to sandbox. +- Note: sandboxing is opt-in. If sandbox mode is off, exec runs on the gateway host even though tools.exec.host defaults to sandbox, and host exec does not require approvals unless you set host=gateway and configure exec approvals. - Limit high-risk tools (`exec`, `browser`, `web_fetch`, `web_search`) to trusted agents or explicit allowlists. - **Model choice matters:** older/legacy models can be less robust against prompt injection and tool misuse. Prefer modern, instruction-hardened models for any bot with tools. We recommend Anthropic Opus 4.5 because it’s quite good at recognizing prompt injections (see [“A step forward on safety”](https://www.anthropic.com/news/claude-opus-4-5)). diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index cd574b26e..46713c939 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -104,6 +104,7 @@ Notes: - `mock` is a local dev provider (no network calls). - `skipSignatureVerification` is for local testing only. - If you use ngrok free tier, set `publicUrl` to the exact ngrok URL; signature verification is always enforced. +- `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only. - Ngrok free tier URLs can change or add interstitial behavior; if `publicUrl` drifts, Twilio signatures will fail. For production, prefer a stable domain or Tailscale funnel. ## TTS for calls diff --git a/docs/tools/elevated.md b/docs/tools/elevated.md index 863c53a1f..7635bbbee 100644 --- a/docs/tools/elevated.md +++ b/docs/tools/elevated.md @@ -23,6 +23,7 @@ read_when: - **Approvals**: `full` skips exec approvals; `on`/`ask` honor them when allowlist/ask rules require. - **Unsandboxed agents**: no-op for location; only affects gating, logging, and status. - **Tool policy still applies**: if `exec` is denied by tool policy, elevated cannot be used. +- **Separate from `/exec`**: `/exec` adjusts per-session defaults for authorized senders and does not require elevated. ## Resolution order 1. Inline directive on the message (applies only to that message). diff --git a/docs/tools/exec-approvals.md b/docs/tools/exec-approvals.md index ec350f9d9..2ec8ec191 100644 --- a/docs/tools/exec-approvals.md +++ b/docs/tools/exec-approvals.md @@ -216,6 +216,9 @@ Approval-gated execs reuse the approval id as the `runId` in these messages for - **full** is powerful; prefer allowlists when possible. - **ask** keeps you in the loop while still allowing fast approvals. - Per-agent allowlists prevent one agent’s approvals from leaking into others. +- Approvals only apply to host exec requests from **authorized senders**. Unauthorized senders cannot issue `/exec`. +- `/exec security=full` is a session-level convenience for authorized operators and skips approvals by design. + To hard-block host exec, set approvals security to `deny` or deny the `exec` tool via tool policy. Related: - [Exec tool](/tools/exec) diff --git a/docs/tools/exec.md b/docs/tools/exec.md index e2088137b..2524c3665 100644 --- a/docs/tools/exec.md +++ b/docs/tools/exec.md @@ -34,6 +34,9 @@ Notes: - If multiple nodes are available, set `exec.node` or `tools.exec.node` to select one. - On non-Windows hosts, exec uses `SHELL` when set; if `SHELL` is `fish`, it prefers `bash` (or `sh`) from `PATH` to avoid fish-incompatible scripts, then falls back to `SHELL` if neither exists. +- Important: sandboxing is **off by default**. If sandboxing is off, `host=sandbox` runs directly on + the gateway host (no container) and **does not require approvals**. To require approvals, run with + `host=gateway` and configure exec approvals (or enable sandboxing). ## Config @@ -88,6 +91,13 @@ Example: /exec host=gateway security=allowlist ask=on-miss node=mac-1 ``` +## Authorization model + +`/exec` is only honored for **authorized senders** (channel allowlists/pairing plus `commands.useAccessGroups`). +It updates **session state only** and does not write config. To hard-disable exec, deny it via tool +policy (`tools.deny: ["exec"]` or per-agent). Host approvals still apply unless you explicitly set +`security=full` and `ask=off`. + ## Exec approvals (companion app / node host) Sandboxed agents can require per-request approval before `exec` runs on the gateway or node host. diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md index 93b51d5ae..138ede9d0 100644 --- a/docs/tools/slash-commands.md +++ b/docs/tools/slash-commands.md @@ -16,6 +16,8 @@ There are two related systems: - Directives are stripped from the message before the model sees it. - In normal chat messages (not directive-only), they are treated as “inline hints” and do **not** persist session settings. - In directive-only messages (the message contains only directives), they persist to the session and reply with an acknowledgement. + - Directives are only applied for **authorized senders** (channel allowlists/pairing plus `commands.useAccessGroups`). + Unauthorized senders see directives treated as plain text. There are also a few **inline shortcuts** (allowlisted/authorized senders only): `/help`, `/commands`, `/status`, `/whoami` (`/id`). They run immediately, are stripped before the model sees the message, and the remaining text continues through the normal flow. diff --git a/extensions/bluebubbles/src/monitor.test.ts b/extensions/bluebubbles/src/monitor.test.ts index 12aef679c..76c9eebf6 100644 --- a/extensions/bluebubbles/src/monitor.test.ts +++ b/extensions/bluebubbles/src/monitor.test.ts @@ -146,8 +146,14 @@ function createMockRuntime(): PluginRuntime { resolveRequireMention: mockResolveRequireMention as unknown as PluginRuntime["channel"]["groups"]["resolveRequireMention"], }, debounce: { - createInboundDebouncer: vi.fn() as unknown as PluginRuntime["channel"]["debounce"]["createInboundDebouncer"], - resolveInboundDebounceMs: vi.fn() as unknown as PluginRuntime["channel"]["debounce"]["resolveInboundDebounceMs"], + // Create a pass-through debouncer that immediately calls onFlush + createInboundDebouncer: vi.fn((params: { onFlush: (items: unknown[]) => Promise }) => ({ + enqueue: async (item: unknown) => { + await params.onFlush([item]); + }, + flushKey: vi.fn(), + })) as unknown as PluginRuntime["channel"]["debounce"]["createInboundDebouncer"], + resolveInboundDebounceMs: vi.fn(() => 0) as unknown as PluginRuntime["channel"]["debounce"]["resolveInboundDebounceMs"], }, commands: { resolveCommandAuthorizedFromAuthorizers: mockResolveCommandAuthorizedFromAuthorizers as unknown as PluginRuntime["channel"]["commands"]["resolveCommandAuthorizedFromAuthorizers"], diff --git a/extensions/bluebubbles/src/monitor.ts b/extensions/bluebubbles/src/monitor.ts index 8635b183e..98431775a 100644 --- a/extensions/bluebubbles/src/monitor.ts +++ b/extensions/bluebubbles/src/monitor.ts @@ -250,8 +250,178 @@ type WebhookTarget = { statusSink?: (patch: { lastInboundAt?: number; lastOutboundAt?: number }) => void; }; +/** + * Entry type for debouncing inbound messages. + * Captures the normalized message and its target for later combined processing. + */ +type BlueBubblesDebounceEntry = { + message: NormalizedWebhookMessage; + target: WebhookTarget; +}; + +/** + * Default debounce window for inbound message coalescing (ms). + * This helps combine URL text + link preview balloon messages that BlueBubbles + * sends as separate webhook events when no explicit inbound debounce config exists. + */ +const DEFAULT_INBOUND_DEBOUNCE_MS = 350; + +/** + * Combines multiple debounced messages into a single message for processing. + * Used when multiple webhook events arrive within the debounce window. + */ +function combineDebounceEntries(entries: BlueBubblesDebounceEntry[]): NormalizedWebhookMessage { + if (entries.length === 0) { + throw new Error("Cannot combine empty entries"); + } + if (entries.length === 1) { + return entries[0].message; + } + + // Use the first message as the base (typically the text message) + const first = entries[0].message; + + // Combine text from all entries, filtering out duplicates and empty strings + const seenTexts = new Set(); + const textParts: string[] = []; + + for (const entry of entries) { + const text = entry.message.text.trim(); + if (!text) continue; + // Skip duplicate text (URL might be in both text message and balloon) + const normalizedText = text.toLowerCase(); + if (seenTexts.has(normalizedText)) continue; + seenTexts.add(normalizedText); + textParts.push(text); + } + + // Merge attachments from all entries + const allAttachments = entries.flatMap((e) => e.message.attachments ?? []); + + // Use the latest timestamp + const timestamps = entries + .map((e) => e.message.timestamp) + .filter((t): t is number => typeof t === "number"); + const latestTimestamp = timestamps.length > 0 ? Math.max(...timestamps) : first.timestamp; + + // Collect all message IDs for reference + const messageIds = entries + .map((e) => e.message.messageId) + .filter((id): id is string => Boolean(id)); + + // Prefer reply context from any entry that has it + const entryWithReply = entries.find((e) => e.message.replyToId); + + return { + ...first, + text: textParts.join(" "), + attachments: allAttachments.length > 0 ? allAttachments : first.attachments, + timestamp: latestTimestamp, + // Use first message's ID as primary (for reply reference), but we've coalesced others + messageId: messageIds[0] ?? first.messageId, + // Preserve reply context if present + replyToId: entryWithReply?.message.replyToId ?? first.replyToId, + replyToBody: entryWithReply?.message.replyToBody ?? first.replyToBody, + replyToSender: entryWithReply?.message.replyToSender ?? first.replyToSender, + // Clear balloonBundleId since we've combined (the combined message is no longer just a balloon) + balloonBundleId: undefined, + }; +} + const webhookTargets = new Map(); +/** + * Maps webhook targets to their inbound debouncers. + * Each target gets its own debouncer keyed by a unique identifier. + */ +const targetDebouncers = new Map< + WebhookTarget, + ReturnType +>(); + +function resolveBlueBubblesDebounceMs( + config: ClawdbotConfig, + core: BlueBubblesCoreRuntime, +): number { + const inbound = config.messages?.inbound; + const hasExplicitDebounce = + typeof inbound?.debounceMs === "number" || typeof inbound?.byChannel?.bluebubbles === "number"; + if (!hasExplicitDebounce) return DEFAULT_INBOUND_DEBOUNCE_MS; + return core.channel.debounce.resolveInboundDebounceMs({ cfg: config, channel: "bluebubbles" }); +} + +/** + * Creates or retrieves a debouncer for a webhook target. + */ +function getOrCreateDebouncer(target: WebhookTarget) { + const existing = targetDebouncers.get(target); + if (existing) return existing; + + const { account, config, runtime, core } = target; + + const debouncer = core.channel.debounce.createInboundDebouncer({ + debounceMs: resolveBlueBubblesDebounceMs(config, core), + buildKey: (entry) => { + const msg = entry.message; + // Build key from account + chat + sender to coalesce messages from same source + const chatKey = + msg.chatGuid?.trim() ?? + msg.chatIdentifier?.trim() ?? + (msg.chatId ? String(msg.chatId) : "dm"); + return `bluebubbles:${account.accountId}:${chatKey}:${msg.senderId}`; + }, + shouldDebounce: (entry) => { + const msg = entry.message; + // Skip debouncing for messages with attachments - process immediately + if (msg.attachments && msg.attachments.length > 0) return false; + // Skip debouncing for from-me messages (they're just cached, not processed) + if (msg.fromMe) return false; + // Skip debouncing for control commands - process immediately + if (core.channel.text.hasControlCommand(msg.text, config)) return false; + // Debounce normal text messages and URL balloon messages + return true; + }, + onFlush: async (entries) => { + if (entries.length === 0) return; + + // Use target from first entry (all entries have same target due to key structure) + const flushTarget = entries[0].target; + + if (entries.length === 1) { + // Single message - process normally + await processMessage(entries[0].message, flushTarget); + return; + } + + // Multiple messages - combine and process + const combined = combineDebounceEntries(entries); + + if (core.logging.shouldLogVerbose()) { + const count = entries.length; + const preview = combined.text.slice(0, 50); + runtime.log?.( + `[bluebubbles] coalesced ${count} messages: "${preview}${combined.text.length > 50 ? "..." : ""}"`, + ); + } + + await processMessage(combined, flushTarget); + }, + onError: (err) => { + runtime.error?.(`[${account.accountId}] [bluebubbles] debounce flush failed: ${String(err)}`); + }, + }); + + targetDebouncers.set(target, debouncer); + return debouncer; +} + +/** + * Removes a debouncer for a target (called during unregistration). + */ +function removeDebouncer(target: WebhookTarget): void { + targetDebouncers.delete(target); +} + function normalizeWebhookPath(raw: string): string { const trimmed = raw.trim(); if (!trimmed) return "/"; @@ -275,6 +445,8 @@ export function registerBlueBubblesWebhookTarget(target: WebhookTarget): () => v } else { webhookTargets.delete(key); } + // Clean up debouncer when target is unregistered + removeDebouncer(normalizedTarget); }; } @@ -1205,7 +1377,10 @@ export async function handleBlueBubblesWebhookRequest( ); }); } else if (message) { - processMessage(message, target).catch((err) => { + // Route messages through debouncer to coalesce rapid-fire events + // (e.g., text message + URL balloon arriving as separate webhooks) + const debouncer = getOrCreateDebouncer(target); + debouncer.enqueue({ message, target }).catch((err) => { target.runtime.error?.( `[${target.account.accountId}] BlueBubbles webhook failed: ${String(err)}`, ); diff --git a/extensions/voice-call/CHANGELOG.md b/extensions/voice-call/CHANGELOG.md index a8721d47d..588817858 100644 --- a/extensions/voice-call/CHANGELOG.md +++ b/extensions/voice-call/CHANGELOG.md @@ -6,6 +6,7 @@ - Breaking: voice-call TTS now uses core `messages.tts` (plugin TTS config deep‑merges with core). - Telephony TTS supports OpenAI + ElevenLabs; Edge TTS is ignored for calls. - Removed legacy `tts.model`/`tts.voice`/`tts.instructions` plugin fields. +- Ngrok free-tier bypass renamed to `tunnel.allowNgrokFreeTierLoopbackBypass` and gated to loopback + `tunnel.provider="ngrok"`. ## 2026.1.23 diff --git a/extensions/voice-call/README.md b/extensions/voice-call/README.md index d96f90392..5f009aa28 100644 --- a/extensions/voice-call/README.md +++ b/extensions/voice-call/README.md @@ -74,6 +74,7 @@ Put under `plugins.entries.voice-call.config`: Notes: - Twilio/Telnyx/Plivo require a **publicly reachable** webhook URL. - `mock` is a local dev provider (no network calls). +- `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only. ## TTS for calls diff --git a/extensions/voice-call/clawdbot.plugin.json b/extensions/voice-call/clawdbot.plugin.json index 2a4f04466..cfac7ad9d 100644 --- a/extensions/voice-call/clawdbot.plugin.json +++ b/extensions/voice-call/clawdbot.plugin.json @@ -78,8 +78,8 @@ "label": "ngrok Domain", "advanced": true }, - "tunnel.allowNgrokFreeTier": { - "label": "Allow ngrok Free Tier", + "tunnel.allowNgrokFreeTierLoopbackBypass": { + "label": "Allow ngrok Free Tier (Loopback Bypass)", "advanced": true }, "streaming.enabled": { @@ -330,7 +330,7 @@ "ngrokDomain": { "type": "string" }, - "allowNgrokFreeTier": { + "allowNgrokFreeTierLoopbackBypass": { "type": "boolean" } } diff --git a/extensions/voice-call/index.ts b/extensions/voice-call/index.ts index 60076bbe2..60cb64eb2 100644 --- a/extensions/voice-call/index.ts +++ b/extensions/voice-call/index.ts @@ -62,8 +62,8 @@ const voiceCallConfigSchema = { advanced: true, }, "tunnel.ngrokDomain": { label: "ngrok Domain", advanced: true }, - "tunnel.allowNgrokFreeTier": { - label: "Allow ngrok Free Tier", + "tunnel.allowNgrokFreeTierLoopbackBypass": { + label: "Allow ngrok Free Tier (Loopback Bypass)", advanced: true, }, "streaming.enabled": { label: "Enable Streaming", advanced: true }, diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index aac9fe44c..dde17e122 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -19,7 +19,7 @@ function createBaseConfig( maxConcurrentCalls: 1, serve: { port: 3334, bind: "127.0.0.1", path: "/voice/webhook" }, tailscale: { mode: "off", path: "/voice/webhook" }, - tunnel: { provider: "none", allowNgrokFreeTier: false }, + tunnel: { provider: "none", allowNgrokFreeTierLoopbackBypass: false }, streaming: { enabled: false, sttProvider: "openai-realtime", diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index 99916e49d..7784406e7 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -217,12 +217,17 @@ export const VoiceCallTunnelConfigSchema = z /** * Allow ngrok free tier compatibility mode. * When true, signature verification failures on ngrok-free.app URLs - * will include extra diagnostics. Signature verification is still required. + * will be allowed only for loopback requests (ngrok local agent). */ - allowNgrokFreeTier: z.boolean().default(false), + allowNgrokFreeTierLoopbackBypass: z.boolean().default(false), + /** + * Legacy ngrok free tier compatibility mode (deprecated). + * Use allowNgrokFreeTierLoopbackBypass instead. + */ + allowNgrokFreeTier: z.boolean().optional(), }) .strict() - .default({ provider: "none", allowNgrokFreeTier: false }); + .default({ provider: "none", allowNgrokFreeTierLoopbackBypass: false }); export type VoiceCallTunnelConfig = z.infer; // ----------------------------------------------------------------------------- @@ -419,8 +424,12 @@ export function resolveVoiceCallConfig(config: VoiceCallConfig): VoiceCallConfig // Tunnel Config resolved.tunnel = resolved.tunnel ?? { provider: "none", - allowNgrokFreeTier: false, + allowNgrokFreeTierLoopbackBypass: false, }; + resolved.tunnel.allowNgrokFreeTierLoopbackBypass = + resolved.tunnel.allowNgrokFreeTierLoopbackBypass || + resolved.tunnel.allowNgrokFreeTier || + false; resolved.tunnel.ngrokAuthToken = resolved.tunnel.ngrokAuthToken ?? process.env.NGROK_AUTHTOKEN; resolved.tunnel.ngrokDomain = diff --git a/extensions/voice-call/src/providers/twilio.ts b/extensions/voice-call/src/providers/twilio.ts index be9dd6eda..87c0f244d 100644 --- a/extensions/voice-call/src/providers/twilio.ts +++ b/extensions/voice-call/src/providers/twilio.ts @@ -31,8 +31,8 @@ import { verifyTwilioProviderWebhook } from "./twilio/webhook.js"; * @see https://www.twilio.com/docs/voice/media-streams */ export interface TwilioProviderOptions { - /** Allow ngrok free tier compatibility mode (less secure) */ - allowNgrokFreeTier?: boolean; + /** Allow ngrok free tier compatibility mode (loopback only, less secure) */ + allowNgrokFreeTierLoopbackBypass?: boolean; /** Override public URL for signature verification */ publicUrl?: string; /** Path for media stream WebSocket (e.g., /voice/stream) */ diff --git a/extensions/voice-call/src/providers/twilio/webhook.ts b/extensions/voice-call/src/providers/twilio/webhook.ts index 1cddcb164..d5c3abb95 100644 --- a/extensions/voice-call/src/providers/twilio/webhook.ts +++ b/extensions/voice-call/src/providers/twilio/webhook.ts @@ -11,7 +11,8 @@ export function verifyTwilioProviderWebhook(params: { }): WebhookVerificationResult { const result = verifyTwilioWebhook(params.ctx, params.authToken, { publicUrl: params.currentPublicUrl || undefined, - allowNgrokFreeTier: params.options.allowNgrokFreeTier ?? false, + allowNgrokFreeTierLoopbackBypass: + params.options.allowNgrokFreeTierLoopbackBypass ?? false, skipVerification: params.options.skipVerification, }); diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index ffa95ddff..6f638ab5b 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -33,7 +33,19 @@ type Logger = { debug: (message: string) => void; }; +function isLoopbackBind(bind: string | undefined): boolean { + if (!bind) return false; + return bind === "127.0.0.1" || bind === "::1" || bind === "localhost"; +} + function resolveProvider(config: VoiceCallConfig): VoiceCallProvider { + const allowNgrokFreeTierLoopbackBypass = + config.tunnel?.provider === "ngrok" && + isLoopbackBind(config.serve?.bind) && + (config.tunnel?.allowNgrokFreeTierLoopbackBypass || + config.tunnel?.allowNgrokFreeTier || + false); + switch (config.provider) { case "telnyx": return new TelnyxProvider({ @@ -48,7 +60,7 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider { authToken: config.twilio?.authToken, }, { - allowNgrokFreeTier: config.tunnel?.allowNgrokFreeTier ?? false, + allowNgrokFreeTierLoopbackBypass, publicUrl: config.publicUrl, skipVerification: config.skipSignatureVerification, streamPath: config.streaming?.enabled diff --git a/extensions/voice-call/src/types.ts b/extensions/voice-call/src/types.ts index 7f3928778..68cca11e6 100644 --- a/extensions/voice-call/src/types.ts +++ b/extensions/voice-call/src/types.ts @@ -180,6 +180,7 @@ export type WebhookContext = { url: string; method: "GET" | "POST" | "PUT" | "DELETE" | "PATCH"; query?: Record; + remoteAddress?: string; }; export type ProviderWebhookParseResult = { diff --git a/extensions/voice-call/src/webhook-security.test.ts b/extensions/voice-call/src/webhook-security.test.ts index 98d8a451c..3db2983ec 100644 --- a/extensions/voice-call/src/webhook-security.test.ts +++ b/extensions/voice-call/src/webhook-security.test.ts @@ -221,13 +221,40 @@ describe("verifyTwilioWebhook", () => { rawBody: postBody, url: "http://127.0.0.1:3334/voice/webhook", method: "POST", + remoteAddress: "203.0.113.10", }, authToken, - { allowNgrokFreeTier: true }, + { allowNgrokFreeTierLoopbackBypass: true }, ); expect(result.ok).toBe(false); expect(result.isNgrokFreeTier).toBe(true); expect(result.reason).toMatch(/Invalid signature/); }); + + it("allows invalid signatures for ngrok free tier only on loopback", () => { + const authToken = "test-auth-token"; + const postBody = "CallSid=CS123&CallStatus=completed&From=%2B15550000000"; + + const result = verifyTwilioWebhook( + { + headers: { + host: "127.0.0.1:3334", + "x-forwarded-proto": "https", + "x-forwarded-host": "local.ngrok-free.app", + "x-twilio-signature": "invalid", + }, + rawBody: postBody, + url: "http://127.0.0.1:3334/voice/webhook", + method: "POST", + remoteAddress: "127.0.0.1", + }, + authToken, + { allowNgrokFreeTierLoopbackBypass: true }, + ); + + expect(result.ok).toBe(true); + expect(result.isNgrokFreeTier).toBe(true); + expect(result.reason).toMatch(/compatibility mode/); + }); }); diff --git a/extensions/voice-call/src/webhook-security.ts b/extensions/voice-call/src/webhook-security.ts index 98b1d9837..6c7d4d9ab 100644 --- a/extensions/voice-call/src/webhook-security.ts +++ b/extensions/voice-call/src/webhook-security.ts @@ -131,6 +131,13 @@ function getHeader( return value; } +function isLoopbackAddress(address?: string): boolean { + if (!address) return false; + if (address === "127.0.0.1" || address === "::1") return true; + if (address.startsWith("::ffff:127.")) return true; + return false; +} + /** * Result of Twilio webhook verification with detailed info. */ @@ -155,8 +162,8 @@ export function verifyTwilioWebhook( options?: { /** Override the public URL (e.g., from config) */ publicUrl?: string; - /** Allow ngrok free tier compatibility mode (less secure) */ - allowNgrokFreeTier?: boolean; + /** Allow ngrok free tier compatibility mode (loopback only, less secure) */ + allowNgrokFreeTierLoopbackBypass?: boolean; /** Skip verification entirely (only for development) */ skipVerification?: boolean; }, @@ -195,6 +202,22 @@ export function verifyTwilioWebhook( verificationUrl.includes(".ngrok-free.app") || verificationUrl.includes(".ngrok.io"); + if ( + isNgrokFreeTier && + options?.allowNgrokFreeTierLoopbackBypass && + isLoopbackAddress(ctx.remoteAddress) + ) { + console.warn( + "[voice-call] Twilio signature validation failed (ngrok free tier compatibility, loopback only)", + ); + return { + ok: true, + reason: "ngrok free tier compatibility mode (loopback only)", + verificationUrl, + isNgrokFreeTier: true, + }; + } + return { ok: false, reason: `Invalid signature for URL: ${verificationUrl}`, diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index 6ab4d0eed..09e96ffed 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -252,6 +252,7 @@ export class VoiceCallWebhookServer { url: `http://${req.headers.host}${req.url}`, method: "POST", query: Object.fromEntries(url.searchParams), + remoteAddress: req.socket.remoteAddress ?? undefined, }; // Verify signature diff --git a/package.json b/package.json index 0c63d5d69..1299d72d5 100644 --- a/package.json +++ b/package.json @@ -237,6 +237,9 @@ "vitest": "^4.0.18", "wireit": "^0.14.12" }, + "overrides": { + "tar": "7.5.4" + }, "pnpm": { "minimumReleaseAge": 2880, "overrides": { diff --git a/skills/nano-banana-pro/SKILL.md b/skills/nano-banana-pro/SKILL.md index a36c21f64..469576ec7 100644 --- a/skills/nano-banana-pro/SKILL.md +++ b/skills/nano-banana-pro/SKILL.md @@ -14,9 +14,14 @@ Generate uv run {baseDir}/scripts/generate_image.py --prompt "your image description" --filename "output.png" --resolution 1K ``` -Edit +Edit (single image) ```bash -uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" --input-image "/path/in.png" --resolution 2K +uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K +``` + +Multi-image composition (up to 14 images) +```bash +uv run {baseDir}/scripts/generate_image.py --prompt "combine these into one scene" --filename "output.png" -i img1.png -i img2.png -i img3.png ``` API key diff --git a/skills/nano-banana-pro/scripts/generate_image.py b/skills/nano-banana-pro/scripts/generate_image.py index 48dd9e9e5..32fc1fc32 100755 --- a/skills/nano-banana-pro/scripts/generate_image.py +++ b/skills/nano-banana-pro/scripts/generate_image.py @@ -11,6 +11,9 @@ Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API. Usage: uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY] + +Multi-image editing (up to 14 images): + uv run generate_image.py --prompt "combine these images" --filename "output.png" -i img1.png -i img2.png -i img3.png """ import argparse @@ -42,7 +45,10 @@ def main(): ) parser.add_argument( "--input-image", "-i", - help="Optional input image path for editing/modification" + action="append", + dest="input_images", + metavar="IMAGE", + help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)." ) parser.add_argument( "--resolution", "-r", @@ -78,34 +84,43 @@ def main(): output_path = Path(args.filename) output_path.parent.mkdir(parents=True, exist_ok=True) - # Load input image if provided - input_image = None + # Load input images if provided (up to 14 supported by Nano Banana Pro) + input_images = [] output_resolution = args.resolution - if args.input_image: - try: - input_image = PILImage.open(args.input_image) - print(f"Loaded input image: {args.input_image}") - - # Auto-detect resolution if not explicitly set by user - if args.resolution == "1K": # Default value - # Map input image size to resolution - width, height = input_image.size - max_dim = max(width, height) - if max_dim >= 3000: - output_resolution = "4K" - elif max_dim >= 1500: - output_resolution = "2K" - else: - output_resolution = "1K" - print(f"Auto-detected resolution: {output_resolution} (from input {width}x{height})") - except Exception as e: - print(f"Error loading input image: {e}", file=sys.stderr) + if args.input_images: + if len(args.input_images) > 14: + print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr) sys.exit(1) - # Build contents (image first if editing, prompt only if generating) - if input_image: - contents = [input_image, args.prompt] - print(f"Editing image with resolution {output_resolution}...") + max_input_dim = 0 + for img_path in args.input_images: + try: + img = PILImage.open(img_path) + input_images.append(img) + print(f"Loaded input image: {img_path}") + + # Track largest dimension for auto-resolution + width, height = img.size + max_input_dim = max(max_input_dim, width, height) + except Exception as e: + print(f"Error loading input image '{img_path}': {e}", file=sys.stderr) + sys.exit(1) + + # Auto-detect resolution from largest input if not explicitly set + if args.resolution == "1K" and max_input_dim > 0: # Default value + if max_input_dim >= 3000: + output_resolution = "4K" + elif max_input_dim >= 1500: + output_resolution = "2K" + else: + output_resolution = "1K" + print(f"Auto-detected resolution: {output_resolution} (from max input dimension {max_input_dim})") + + # Build contents (images first if editing, prompt only if generating) + if input_images: + contents = [*input_images, args.prompt] + img_count = len(input_images) + print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...") else: contents = args.prompt print(f"Generating image with resolution {output_resolution}...") diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 939fa92f0..3537972e4 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -89,6 +89,7 @@ export async function runAgentTurnWithFallback(params: { registerAgentRunContext(runId, { sessionKey: params.sessionKey, verboseLevel: params.resolvedVerboseLevel, + isHeartbeat: params.isHeartbeat, }); } let runResult: Awaited>; diff --git a/src/gateway/server-chat.ts b/src/gateway/server-chat.ts index 9ef62e688..8c67767a6 100644 --- a/src/gateway/server-chat.ts +++ b/src/gateway/server-chat.ts @@ -1,8 +1,28 @@ import { normalizeVerboseLevel } from "../auto-reply/thinking.js"; +import { loadConfig } from "../config/config.js"; import { type AgentEventPayload, getAgentRunContext } from "../infra/agent-events.js"; +import { resolveHeartbeatVisibility } from "../infra/heartbeat-visibility.js"; import { loadSessionEntry } from "./session-utils.js"; import { formatForLog } from "./ws-log.js"; +/** + * Check if webchat broadcasts should be suppressed for heartbeat runs. + * Returns true if the run is a heartbeat and showOk is false. + */ +function shouldSuppressHeartbeatBroadcast(runId: string): boolean { + const runContext = getAgentRunContext(runId); + if (!runContext?.isHeartbeat) return false; + + try { + const cfg = loadConfig(); + const visibility = resolveHeartbeatVisibility({ cfg, channel: "webchat" }); + return !visibility.showOk; + } catch { + // Default to suppressing if we can't load config + return true; + } +} + export type ChatRunEntry = { sessionKey: string; clientRunId: string; @@ -130,7 +150,10 @@ export function createAgentEventHandler({ timestamp: now, }, }; - broadcast("chat", payload, { dropIfSlow: true }); + // Suppress webchat broadcast for heartbeat runs when showOk is false + if (!shouldSuppressHeartbeatBroadcast(clientRunId)) { + broadcast("chat", payload, { dropIfSlow: true }); + } nodeSendToSession(sessionKey, "chat", payload); }; @@ -158,7 +181,10 @@ export function createAgentEventHandler({ } : undefined, }; - broadcast("chat", payload); + // Suppress webchat broadcast for heartbeat runs when showOk is false + if (!shouldSuppressHeartbeatBroadcast(clientRunId)) { + broadcast("chat", payload); + } nodeSendToSession(sessionKey, "chat", payload); return; } diff --git a/src/infra/agent-events.ts b/src/infra/agent-events.ts index c11dff8ab..5c41c3c95 100644 --- a/src/infra/agent-events.ts +++ b/src/infra/agent-events.ts @@ -14,6 +14,7 @@ export type AgentEventPayload = { export type AgentRunContext = { sessionKey?: string; verboseLevel?: VerboseLevel; + isHeartbeat?: boolean; }; // Keep per-run counters so streams stay strictly monotonic per runId. @@ -34,6 +35,9 @@ export function registerAgentRunContext(runId: string, context: AgentRunContext) if (context.verboseLevel && existing.verboseLevel !== context.verboseLevel) { existing.verboseLevel = context.verboseLevel; } + if (context.isHeartbeat !== undefined && existing.isHeartbeat !== context.isHeartbeat) { + existing.isHeartbeat = context.isHeartbeat; + } } export function getAgentRunContext(runId: string) { diff --git a/src/infra/heartbeat-visibility.test.ts b/src/infra/heartbeat-visibility.test.ts index 17a7dc128..e98054bbb 100644 --- a/src/infra/heartbeat-visibility.test.ts +++ b/src/infra/heartbeat-visibility.test.ts @@ -247,4 +247,58 @@ describe("resolveHeartbeatVisibility", () => { useIndicator: true, }); }); + + it("webchat uses channel defaults only (no per-channel config)", () => { + const cfg = { + channels: { + defaults: { + heartbeat: { + showOk: true, + showAlerts: false, + useIndicator: false, + }, + }, + }, + } as ClawdbotConfig; + + const result = resolveHeartbeatVisibility({ cfg, channel: "webchat" }); + + expect(result).toEqual({ + showOk: true, + showAlerts: false, + useIndicator: false, + }); + }); + + it("webchat returns defaults when no channel defaults configured", () => { + const cfg = {} as ClawdbotConfig; + + const result = resolveHeartbeatVisibility({ cfg, channel: "webchat" }); + + expect(result).toEqual({ + showOk: false, + showAlerts: true, + useIndicator: true, + }); + }); + + it("webchat ignores accountId (only uses defaults)", () => { + const cfg = { + channels: { + defaults: { + heartbeat: { + showOk: true, + }, + }, + }, + } as ClawdbotConfig; + + const result = resolveHeartbeatVisibility({ + cfg, + channel: "webchat", + accountId: "some-account", + }); + + expect(result.showOk).toBe(true); + }); }); diff --git a/src/infra/heartbeat-visibility.ts b/src/infra/heartbeat-visibility.ts index 75555b878..e4943464c 100644 --- a/src/infra/heartbeat-visibility.ts +++ b/src/infra/heartbeat-visibility.ts @@ -1,6 +1,6 @@ import type { ClawdbotConfig } from "../config/config.js"; import type { ChannelHeartbeatVisibilityConfig } from "../config/types.channels.js"; -import type { DeliverableMessageChannel } from "../utils/message-channel.js"; +import type { DeliverableMessageChannel, GatewayMessageChannel } from "../utils/message-channel.js"; export type ResolvedHeartbeatVisibility = { showOk: boolean; @@ -14,13 +14,28 @@ const DEFAULT_VISIBILITY: ResolvedHeartbeatVisibility = { useIndicator: true, // Emit indicator events }; +/** + * Resolve heartbeat visibility settings for a channel. + * Supports both deliverable channels (telegram, signal, etc.) and webchat. + * For webchat, uses channels.defaults.heartbeat since webchat doesn't have per-channel config. + */ export function resolveHeartbeatVisibility(params: { cfg: ClawdbotConfig; - channel: DeliverableMessageChannel; + channel: GatewayMessageChannel; accountId?: string; }): ResolvedHeartbeatVisibility { const { cfg, channel, accountId } = params; + // Webchat uses channel defaults only (no per-channel or per-account config) + if (channel === "webchat") { + const channelDefaults = cfg.channels?.defaults?.heartbeat; + return { + showOk: channelDefaults?.showOk ?? DEFAULT_VISIBILITY.showOk, + showAlerts: channelDefaults?.showAlerts ?? DEFAULT_VISIBILITY.showAlerts, + useIndicator: channelDefaults?.useIndicator ?? DEFAULT_VISIBILITY.useIndicator, + }; + } + // Layer 1: Global channel defaults const channelDefaults = cfg.channels?.defaults?.heartbeat;