diff --git a/CHANGELOG.md b/CHANGELOG.md index 402faac26..463756435 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Docs: https://docs.clawd.bot - Docs: expand FAQ (migration, scheduling, concurrency, model recommendations, OpenAI subscription auth, Pi sizing, hackable install, docs SSL workaround). - Docs: add verbose installer troubleshooting guidance. - Docs: update Fly.io guide notes. +- Tools: add link understanding with CLI preprocessing, SSRF guard, and structured link summaries. (#1637) Thanks @dasilva333. - Docs: add Bedrock EC2 instance role setup + IAM steps. (#1625) Thanks @sergical. https://docs.clawd.bot/bedrock - Exec approvals: forward approval prompts to chat with `/approve` for all channels (including plugins). (#1621) Thanks @czekaj. https://docs.clawd.bot/tools/exec-approvals https://docs.clawd.bot/tools/slash-commands - Gateway: expose config.patch in the gateway tool with safe partial updates + restart sentinel. (#1653) Thanks @Glucksberg. diff --git a/docs/docs.json b/docs/docs.json index f5e858909..d545f1179 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1005,6 +1005,7 @@ "plugins/zalouser", "tools/exec", "tools/web", + "tools/link-understanding", "tools/apply-patch", "tools/elevated", "tools/browser", diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index d4fe5e12f..b35abe5b7 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -2010,6 +2010,35 @@ Example: } ``` +`tools.links` configures link understanding (URL preprocessing via CLI): +- `tools.links.enabled`: enable/disable link understanding (default: enabled when models are configured). +- `tools.links.maxLinks`: max URLs per message (default 3). +- `tools.links.timeoutSeconds`: default CLI timeout (default 30). +- `tools.links.scope`: optional gating rules (same shape as media understanding scope). +- `tools.links.models`: ordered CLI entries (fallbacks in order). + - `command`: executable to run (required). + - `args`: templated args (supports `{{LinkUrl}}`). + - `timeoutSeconds`: optional per-entry override. + +Example: +```json5 +{ + tools: { + links: { + enabled: true, + maxLinks: 2, + scope: { + default: "deny", + rules: [{ action: "allow", match: { chatType: "direct" } }] + }, + models: [ + { command: "link-understand", args: ["--url", "{{LinkUrl}}"] } + ] + } + } +} +``` + `agents.defaults.subagents` configures sub-agent defaults: - `model`: default model for spawned sub-agents (string or `{ primary, fallbacks }`). If omitted, sub-agents inherit the caller’s model unless overridden per agent or per call. - `maxConcurrent`: max concurrent sub-agent runs (default 1) @@ -3180,7 +3209,7 @@ clawdbot dns setup --apply ## Template variables -Template placeholders are expanded in `tools.media.*.models[].args` and `tools.media.models[].args` (and any future templated argument fields). +Template placeholders are expanded in `tools.media.*.models[].args`, `tools.media.models[].args`, and `tools.links.models[].args` (and any future templated argument fields). | Variable | Description | |----------|-------------| @@ -3195,6 +3224,7 @@ Template placeholders are expanded in `tools.media.*.models[].args` and `tools.m | `{{MediaUrl}}` | Inbound media pseudo-URL (if present) | | `{{MediaPath}}` | Local media path (if downloaded) | | `{{MediaType}}` | Media type (image/audio/document/…) | +| `{{LinkUrl}}` | URL currently being processed (link understanding only) | | `{{Transcript}}` | Audio transcript (when enabled) | | `{{Prompt}}` | Resolved media prompt for CLI entries | | `{{MaxChars}}` | Resolved max output chars for CLI entries | diff --git a/docs/tools/index.md b/docs/tools/index.md index 7d9b3f581..b4fc9df65 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -232,6 +232,13 @@ Notes: - See [Web tools](/tools/web) for setup. - See [Firecrawl](/tools/firecrawl) for the optional anti-bot fallback. +### Link understanding (inbound) +Preprocess inbound messages by running a CLI on detected URLs and appending the +results to the message body before the agent sees it. This is **not** an agent +tool; it runs in the inbound auto-reply pipeline. + +See [Link understanding](/tools/link-understanding) for setup and configuration. + ### `browser` Control the dedicated clawd browser. diff --git a/docs/tools/link-understanding.md b/docs/tools/link-understanding.md new file mode 100644 index 000000000..9f096ca0d --- /dev/null +++ b/docs/tools/link-understanding.md @@ -0,0 +1,326 @@ +--- +summary: "Run a CLI on URLs in inbound messages and inject results before the agent replies" +read_when: + - You want to summarize or enrich URLs in inbound messages + - You need to enable or scope link understanding + - You want to wire a custom CLI to preprocess links +--- + +# Link understanding + +Link understanding is the **inbound URL enrichment pipeline**: Clawdbot detects +links in a message, runs your **local CLI** to interpret them, and injects the +results into the message body **before the agent sees it**. The goal is a +predictable, structured prompt envelope without adding a browsing tool. + +This is **not** an agent tool. It is a preprocessing step in the auto-reply +pipeline, shared across all channels (including webchat). + +## What it enables + +- Summarize or extract data from links (docs, tickets, dashboards, runbooks). +- Inject structured context (titles, owners, status, timestamps, key facts). +- Normalize or transform link payloads (HTML → text, JSON → bullets). +- Gate link processing by channel, chat type, or session key. +- Apply SSRF protections before your CLI runs. + +## When it runs (pipeline order) + +Link understanding runs after inbound normalization and media understanding, +but before the agent executes the reply run: + +1) Inbound message is normalized and routed. +2) Media understanding may rewrite the body (images/audio/video). +3) Link understanding detects URLs in the current command/body and appends results. +4) The agent receives the final body and command parsing uses the updated text. + +## How it works + +1) Clawdbot scans the inbound message for **bare** `http/https` URLs. +2) It dedupes links in order and caps them at `tools.links.maxLinks` (default 3). +3) For each link, it runs your configured CLI entries **in order** until one returns non-empty stdout. +4) It appends the resulting outputs to the message body (structured envelope). +5) The agent sees the original text plus the appended link outputs. + +No requests are made by Clawdbot itself; **your CLI does the fetching/parsing**. + +## URL detection rules + +- Only **bare URLs** are extracted (e.g. `https://example.com`). +- Markdown links are ignored (`[label](https://example.com)` is stripped). +- Links are deduped (first occurrence wins). +- Only `http` and `https` are allowed. +- Local and private hosts are blocked (see **Security** below). + +## Configuration + +Link understanding is configured under `tools.links`: + +```json5 +{ + tools: { + links: { + enabled: true, + maxLinks: 3, + timeoutSeconds: 30, + scope: { + default: "allow", + rules: [{ action: "deny", match: { chatType: "group" } }] + }, + models: [ + { + command: "link-understand", + args: ["--url", "{{LinkUrl}}", "--format", "markdown"], + timeoutSeconds: 20 + } + ] + } + } +} +``` + +### `tools.links` fields + +- `enabled`: enable/disable link understanding (default: enabled when models are configured). +- `maxLinks`: max URLs processed per message (default 3). +- `timeoutSeconds`: default timeout for CLI runs (default 30). +- `scope`: optional gating rules (same structure as media understanding scope). +- `models`: ordered list of CLI entries (fallbacks in order). + +### Model entries (`tools.links.models[]`) + +Each entry is a CLI command: + +- `type`: optional, only `"cli"` is supported (default). +- `command`: executable to run (required). +- `args`: CLI args (templated; see **Template variables**). +- `timeoutSeconds`: optional override for this entry. + +The first entry that returns **non-empty stdout** wins. If a command fails or +returns empty output, Clawdbot tries the next entry. + +## Performance + ordering + +- Links are processed **sequentially** (per message, in order). +- Each CLI entry has a timeout (per entry or default). +- Failure to run a CLI does **not** abort the agent run; it just tries the next entry. +- `maxLinks` is the primary cap for latency. Keep it low for chatty channels. + +## Template variables + +`tools.links.models[].args` supports the standard template variables plus `{{LinkUrl}}`. +See [Template variables](/gateway/configuration#template-variables) for the full list. + +Link-specific variable: + +| Variable | Description | +|----------|-------------| +| `{{LinkUrl}}` | URL currently being processed | + +## Output format + +Each link is wrapped in a predictable envelope and appended to the message body: + +``` + + +[Link] +URL: https://example.com +Source: link-understand +Summary: + +``` + +When multiple links are present, the header is numbered (`[Link 1/2]`, `[Link 2/2]`). +`Source` is the CLI `command` that produced the output. + +### Output best practices + +- Keep summaries short and stable (avoid prompt bloat). +- Prefer a compact, consistent structure (title → bullets → status). +- Avoid surrounding fences unless you want the agent to treat it as literal text. + + +## Scope gating + +`tools.links.scope` uses the same rules as media understanding: + +- `default`: `"allow"` or `"deny"`. +- `rules[]`: first match wins. + - `match.channel` (surface/channel) + - `match.chatType` (`direct`, `group`, `channel`) + - `match.keyPrefix` (session key prefix) + +Example (only allow direct messages): + +```json5 +{ + tools: { + links: { + scope: { + default: "deny", + rules: [{ action: "allow", match: { chatType: "direct" } }] + }, + models: [{ command: "link-understand", args: ["--url", "{{LinkUrl}}"] }] + } + } +} +``` + +## Security (SSRF guard) + +Clawdbot blocks local/private hosts before invoking your CLI. The guard rejects: + +- `localhost`, `*.localhost`, `*.local`, `*.internal` +- Loopback addresses (IPv4 + IPv6) +- Private and link-local IP ranges (IPv4 + IPv6) +- `metadata.google.internal` +- Addresses that resolve to private/internal IPs + +If a URL is blocked, it is **skipped** and your CLI is not invoked for it. +URLs that fail DNS resolution are also skipped. + +## Decision tracking + +Each run records a decision payload (useful in logs/debugging) in `ctx.LinkUnderstandingDecisions`: + +```json5 +{ + outcome: "success" | "skipped" | "disabled" | "scope-deny" | "no-links", + urls: [ + { + url: "https://example.com", + attempts: [ + { type: "cli", command: "link-understand", outcome: "success" } + ], + chosen: { type: "cli", command: "link-understand", outcome: "success" } + } + ] +} +``` + +## Custom usage patterns + +### Ticket summarizer (single link) + +```json5 +{ + tools: { + links: { + maxLinks: 1, + models: [ + { + command: "ticket-summary", + args: ["--url", "{{LinkUrl}}", "--format", "brief"] + } + ] + } + } +} +``` + +### Per-channel allowlist (only Discord + Slack) + +```json5 +{ + tools: { + links: { + scope: { + default: "deny", + rules: [ + { action: "allow", match: { channel: "discord" } }, + { action: "allow", match: { channel: "slack" } } + ] + }, + models: [{ command: "link-understand", args: ["--url", "{{LinkUrl}}"] }] + } + } +} +``` + +### Per-agent override (support agent only) + +```json5 +{ + agents: { + list: [ + { + id: "support", + tools: { + links: { + enabled: true, + maxLinks: 2, + models: [{ command: "link-understand", args: ["--url", "{{LinkUrl}}"] }] + } + } + } + ] + } +} +``` + +### Markdown link behavior (ignored) + +Message: +``` +Please check [our docs](https://docs.example.com) and https://status.example.com +``` + +Only `https://status.example.com` is processed (markdown links are stripped). + +## Troubleshooting + +- **Nothing happens:** ensure `tools.links.models` is set and `enabled` is not `false`. +- **No links detected:** only bare URLs are extracted; markdown links are ignored. +- **Output missing:** your CLI returned empty stdout; try logging or return a default line. +- **Wrong channel scope:** check `tools.links.scope` rules and `match.chatType`. +- **SSRF blocked:** local/private URLs are skipped by design. + +Enable verbose logs to see CLI execution and scope decisions: + +```bash +clawdbot gateway run --verbose +``` + +## Examples + +### Minimal CLI runner + +```json5 +{ + tools: { + links: { + models: [{ command: "link-understand", args: ["--url", "{{LinkUrl}}"] }] + } + } +} +``` + +### Limit to 1 link, custom timeout + +```json5 +{ + tools: { + links: { + maxLinks: 1, + timeoutSeconds: 10, + models: [{ command: "link-understand", args: ["--url", "{{LinkUrl}}"] }] + } + } +} +``` + +### Fallback chain (first non-empty output wins) + +```json5 +{ + tools: { + links: { + models: [ + { command: "link-understand", args: ["--url", "{{LinkUrl}}"] }, + { command: "link-backup", args: ["{{LinkUrl}}"] } + ] + } + } +} +``` diff --git a/src/auto-reply/templating.ts b/src/auto-reply/templating.ts index dd424ee71..f495b81fa 100644 --- a/src/auto-reply/templating.ts +++ b/src/auto-reply/templating.ts @@ -5,6 +5,7 @@ import type { MediaUnderstandingDecision, MediaUnderstandingOutput, } from "../media-understanding/types.js"; +import type { LinkUnderstandingDecision } from "../link-understanding/types.js"; /** Valid message channels for routing. */ export type OriginatingChannelType = ChannelId | InternalMessageChannel; @@ -72,6 +73,7 @@ export type MsgContext = { MediaUnderstanding?: MediaUnderstandingOutput[]; MediaUnderstandingDecisions?: MediaUnderstandingDecision[]; LinkUnderstanding?: string[]; + LinkUnderstandingDecisions?: LinkUnderstandingDecision[]; Prompt?: string; MaxChars?: number; ChatType?: string; diff --git a/src/channels/plugins/group-mentions.ts b/src/channels/plugins/group-mentions.ts index b15ce1b07..a63e44aae 100644 --- a/src/channels/plugins/group-mentions.ts +++ b/src/channels/plugins/group-mentions.ts @@ -240,6 +240,17 @@ export function resolveIMessageGroupToolPolicy( }); } +export function resolveGoogleChatGroupToolPolicy( + params: GroupMentionParams, +): GroupToolPolicyConfig | undefined { + return resolveChannelGroupToolsPolicy({ + cfg: params.cfg, + channel: "googlechat", + groupId: params.groupId, + accountId: params.accountId, + }); +} + export function resolveDiscordGroupToolPolicy( params: GroupMentionParams, ): GroupToolPolicyConfig | undefined { diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index fd2ccc2a5..50f441779 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -14,8 +14,6 @@ import { type ResponsePrefixContext, } from "../../auto-reply/reply/response-prefix-template.js"; import type { MsgContext } from "../../auto-reply/templating.js"; -import { formatLinkUnderstandingBody } from "../../link-understanding/format.js"; -import { runLinkUnderstanding } from "../../link-understanding/runner.js"; import { resolveSendPolicy } from "../../sessions/send-policy.js"; import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js"; import { @@ -414,31 +412,6 @@ export const chatHandlers: GatewayRequestHandlers = { return; } - try { - const linkCtx: MsgContext = { - Body: parsedMessage, - RawBody: parsedMessage, - CommandBody: parsedMessage, - SessionKey: p.sessionKey, - ChatType: entry?.chatType, - Surface: entry?.channel ?? "webchat", - }; - const linkResult = await runLinkUnderstanding({ - cfg, - ctx: linkCtx, - message: parsedMessage, - }); - if (linkResult.outputs.length > 0) { - parsedMessage = formatLinkUnderstandingBody({ - body: parsedMessage, - outputs: linkResult.outputs, - }); - } - } catch (err) { - respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err))); - return; - } - try { const abortController = new AbortController(); context.chatAbortControllers.set(clientRunId, { diff --git a/src/link-understanding/apply.test.ts b/src/link-understanding/apply.test.ts new file mode 100644 index 000000000..683e2ed2b --- /dev/null +++ b/src/link-understanding/apply.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it, vi } from "vitest"; + +import type { ClawdbotConfig } from "../config/config.js"; +import type { MsgContext } from "../auto-reply/templating.js"; +import { runLinkUnderstanding } from "./runner.js"; + +vi.mock("./runner.js", () => ({ + runLinkUnderstanding: vi.fn(), +})); + +describe("applyLinkUnderstanding", () => { + it("keeps command parsing bodies unchanged when link output is applied", async () => { + const mockedRunLinkUnderstanding = vi.mocked(runLinkUnderstanding); + mockedRunLinkUnderstanding.mockResolvedValue({ + urls: ["https://example.com"], + outputs: [{ url: "https://example.com", text: "Summary", source: "link-cli" }], + decisions: [], + }); + + const { applyLinkUnderstanding } = await import("./apply.js"); + const ctx: MsgContext = { + Body: "check https://example.com", + RawBody: "raw override", + CommandBody: "/think low check https://example.com", + }; + const cfg: ClawdbotConfig = { + tools: { links: { models: [{ command: "link-cli" }] } }, + }; + + await applyLinkUnderstanding({ ctx, cfg }); + + expect(ctx.Body).toContain("[Link]"); + expect(ctx.CommandBody).toBe("/think low check https://example.com"); + expect(ctx.RawBody).toBe("raw override"); + expect(ctx.BodyForCommands).toBe("/think low check https://example.com"); + expect(ctx.BodyForCommands).not.toContain("[Link]"); + }); + + it("preserves original body for command parsing when no overrides exist", async () => { + const mockedRunLinkUnderstanding = vi.mocked(runLinkUnderstanding); + mockedRunLinkUnderstanding.mockResolvedValue({ + urls: ["https://example.com"], + outputs: [{ url: "https://example.com", text: "Summary", source: "link-cli" }], + decisions: [], + }); + + const { applyLinkUnderstanding } = await import("./apply.js"); + const ctx: MsgContext = { + Body: "check https://example.com", + }; + const cfg: ClawdbotConfig = { + tools: { links: { models: [{ command: "link-cli" }] } }, + }; + + await applyLinkUnderstanding({ ctx, cfg }); + + expect(ctx.Body).toContain("[Link]"); + expect(ctx.RawBody).toBe("check https://example.com"); + expect(ctx.BodyForCommands).toBe("check https://example.com"); + }); +}); diff --git a/src/link-understanding/apply.ts b/src/link-understanding/apply.ts index 82cd1e9f4..7b6a9fd75 100644 --- a/src/link-understanding/apply.ts +++ b/src/link-understanding/apply.ts @@ -1,12 +1,14 @@ import type { ClawdbotConfig } from "../config/config.js"; import type { MsgContext } from "../auto-reply/templating.js"; import { finalizeInboundContext } from "../auto-reply/reply/inbound-context.js"; -import { formatLinkUnderstandingBody } from "./format.js"; +import { formatLinkUnderstandingBody, formatLinkUnderstandingSections } from "./format.js"; import { runLinkUnderstanding } from "./runner.js"; +import type { LinkUnderstandingDecision, LinkUnderstandingOutput } from "./types.js"; export type ApplyLinkUnderstandingResult = { - outputs: string[]; + outputs: LinkUnderstandingOutput[]; urls: string[]; + decisions: LinkUnderstandingDecision[]; }; export async function applyLinkUnderstanding(params: { @@ -18,16 +20,29 @@ export async function applyLinkUnderstanding(params: { ctx: params.ctx, }); + if (result.decisions.length > 0) { + params.ctx.LinkUnderstandingDecisions = [ + ...(params.ctx.LinkUnderstandingDecisions ?? []), + ...result.decisions, + ]; + } + if (result.outputs.length === 0) { return result; } - params.ctx.LinkUnderstanding = [...(params.ctx.LinkUnderstanding ?? []), ...result.outputs]; + const originalBody = params.ctx.Body; + const sections = formatLinkUnderstandingSections(result.outputs); + if (sections.length > 0) { + params.ctx.LinkUnderstanding = [...(params.ctx.LinkUnderstanding ?? []), ...sections]; + } params.ctx.Body = formatLinkUnderstandingBody({ body: params.ctx.Body, outputs: result.outputs, }); - + if (!params.ctx.CommandBody && !params.ctx.RawBody && typeof originalBody === "string") { + params.ctx.RawBody = originalBody; + } finalizeInboundContext(params.ctx, { forceBodyForAgent: true, forceBodyForCommands: true, diff --git a/src/link-understanding/detect.test.ts b/src/link-understanding/detect.test.ts index 38beeaa88..2dd653429 100644 --- a/src/link-understanding/detect.test.ts +++ b/src/link-understanding/detect.test.ts @@ -20,8 +20,10 @@ describe("extractLinksFromMessage", () => { expect(links).toEqual(["https://bare.example"]); }); - it("blocks localhost", () => { - const links = extractLinksFromMessage("http://127.0.0.1/test https://ok.test"); + it("blocks loopback and private hosts", () => { + const links = extractLinksFromMessage( + "http://localhost/test http://127.0.0.1/test http://[::1]/test http://10.0.0.1/test https://ok.test", + ); expect(links).toEqual(["https://ok.test"]); }); }); diff --git a/src/link-understanding/detect.ts b/src/link-understanding/detect.ts index 9edecde63..f3047f97e 100644 --- a/src/link-understanding/detect.ts +++ b/src/link-understanding/detect.ts @@ -1,3 +1,4 @@ +import { isBlockedHostname, isPrivateIpAddress } from "../infra/net/ssrf.js"; import { DEFAULT_MAX_LINKS } from "./defaults.js"; // Remove markdown link syntax so only bare URLs are considered. @@ -19,7 +20,10 @@ function isAllowedUrl(raw: string): boolean { try { const parsed = new URL(raw); if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return false; - if (parsed.hostname === "127.0.0.1") return false; + const hostname = parsed.hostname; + if (!hostname) return false; + if (isBlockedHostname(hostname)) return false; + if (isPrivateIpAddress(hostname)) return false; return true; } catch { return false; diff --git a/src/link-understanding/format.test.ts b/src/link-understanding/format.test.ts new file mode 100644 index 000000000..c5944dd16 --- /dev/null +++ b/src/link-understanding/format.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from "vitest"; + +import { formatLinkUnderstandingBody } from "./format.js"; + +describe("formatLinkUnderstandingBody", () => { + it("appends a structured link summary with source", () => { + const body = formatLinkUnderstandingBody({ + body: "Check this", + outputs: [{ url: "https://example.com", text: "Summary here", source: "link-cli" }], + }); + expect(body).toBe( + "Check this\n\n[Link]\nURL: https://example.com\nSource: link-cli\nSummary:\nSummary here", + ); + }); + + it("numbers multiple links and omits empty body", () => { + const body = formatLinkUnderstandingBody({ + outputs: [ + { url: "https://a.example", text: "First", source: "cli-a" }, + { url: "https://b.example", text: "Second", source: "cli-b" }, + ], + }); + expect(body).toBe( + "[Link 1/2]\nURL: https://a.example\nSource: cli-a\nSummary:\nFirst\n\n[Link 2/2]\nURL: https://b.example\nSource: cli-b\nSummary:\nSecond", + ); + }); +}); diff --git a/src/link-understanding/format.ts b/src/link-understanding/format.ts index b28d16a1a..18c677383 100644 --- a/src/link-understanding/format.ts +++ b/src/link-understanding/format.ts @@ -1,10 +1,31 @@ -export function formatLinkUnderstandingBody(params: { body?: string; outputs: string[] }): string { - const outputs = params.outputs.map((output) => output.trim()).filter(Boolean); - if (outputs.length === 0) { +import type { LinkUnderstandingOutput } from "./types.js"; + +function formatSection(output: LinkUnderstandingOutput, index: number, total: number): string { + const label = total > 1 ? `Link ${index + 1}/${total}` : "Link"; + const source = output.source?.trim() || "unknown"; + const lines = [`[${label}]`, `URL: ${output.url}`, `Source: ${source}`]; + lines.push(`Summary:\n${output.text.trim()}`); + return lines.join("\n"); +} + +export function formatLinkUnderstandingSections(outputs: LinkUnderstandingOutput[]): string[] { + const trimmed = outputs + .map((output) => ({ ...output, text: output.text.trim(), url: output.url.trim() })) + .filter((output) => output.text && output.url); + if (trimmed.length === 0) return []; + return trimmed.map((output, index) => formatSection(output, index, trimmed.length)); +} + +export function formatLinkUnderstandingBody(params: { + body?: string; + outputs: LinkUnderstandingOutput[]; +}): string { + const sections = formatLinkUnderstandingSections(params.outputs); + if (sections.length === 0) { return params.body ?? ""; } const base = (params.body ?? "").trim(); - if (!base) return outputs.join("\n"); - return `${base}\n\n${outputs.join("\n")}`; + if (!base) return sections.join("\n\n"); + return `${base}\n\n${sections.join("\n\n")}`; } diff --git a/src/link-understanding/index.ts b/src/link-understanding/index.ts index d772f9655..79efe97a8 100644 --- a/src/link-understanding/index.ts +++ b/src/link-understanding/index.ts @@ -2,3 +2,10 @@ export { applyLinkUnderstanding } from "./apply.js"; export { extractLinksFromMessage } from "./detect.js"; export { formatLinkUnderstandingBody } from "./format.js"; export { runLinkUnderstanding } from "./runner.js"; +export type { + LinkUnderstandingDecision, + LinkUnderstandingDecisionOutcome, + LinkUnderstandingModelDecision, + LinkUnderstandingOutput, + LinkUnderstandingUrlDecision, +} from "./types.js"; diff --git a/src/link-understanding/runner.test.ts b/src/link-understanding/runner.test.ts new file mode 100644 index 000000000..e9cd37b20 --- /dev/null +++ b/src/link-understanding/runner.test.ts @@ -0,0 +1,84 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +import type { MsgContext } from "../auto-reply/templating.js"; +import type { ClawdbotConfig } from "../config/config.js"; +import { assertPublicHostname } from "../infra/net/ssrf.js"; +import { runExec } from "../process/exec.js"; + +vi.mock("../process/exec.js", () => ({ + runExec: vi.fn(), +})); +vi.mock("../infra/net/ssrf.js", async () => { + const actual = + await vi.importActual("../infra/net/ssrf.js"); + return { + ...actual, + assertPublicHostname: vi.fn(), + }; +}); + +async function loadRunner() { + return await import("./runner.js"); +} + +describe("runLinkUnderstanding", () => { + const mockedRunExec = vi.mocked(runExec); + const mockedAssertPublicHostname = vi.mocked(assertPublicHostname); + + beforeEach(() => { + mockedRunExec.mockReset(); + mockedAssertPublicHostname.mockReset(); + mockedAssertPublicHostname.mockResolvedValue(undefined); + }); + + it("falls back to the next model when a CLI entry fails", async () => { + const { runLinkUnderstanding } = await loadRunner(); + mockedRunExec.mockImplementation(async (command) => { + if (command === "fail") throw new Error("boom"); + return { stdout: "summary", stderr: "" }; + }); + + const cfg: ClawdbotConfig = { + tools: { + links: { + enabled: true, + models: [{ command: "fail" }, { command: "ok" }], + }, + }, + }; + const ctx: MsgContext = { + Body: "see https://example.com", + SessionKey: "session-1", + Surface: "discord", + ChatType: "direct", + }; + + const result = await runLinkUnderstanding({ cfg, ctx }); + expect(result.urls).toEqual(["https://example.com"]); + expect(result.outputs).toEqual([{ url: "https://example.com", text: "summary", source: "ok" }]); + }); + + it("skips links that fail the public hostname check", async () => { + const { runLinkUnderstanding } = await loadRunner(); + mockedAssertPublicHostname.mockRejectedValueOnce(new Error("Blocked: private/internal IP")); + + const cfg: ClawdbotConfig = { + tools: { + links: { + enabled: true, + models: [{ command: "ok" }], + }, + }, + }; + const ctx: MsgContext = { + Body: "see https://intranet.example.com", + SessionKey: "session-1", + Surface: "discord", + ChatType: "direct", + }; + + const result = await runLinkUnderstanding({ cfg, ctx }); + expect(result.outputs).toEqual([]); + expect(result.decisions[0]?.urls[0]?.attempts[0]?.reason).toContain("Blocked"); + }); +}); diff --git a/src/link-understanding/runner.ts b/src/link-understanding/runner.ts index 8a6f7fa5d..885f2162c 100644 --- a/src/link-understanding/runner.ts +++ b/src/link-understanding/runner.ts @@ -10,12 +10,20 @@ import { normalizeMediaUnderstandingChatType, resolveMediaUnderstandingScope, } from "../media-understanding/scope.js"; +import { assertPublicHostname } from "../infra/net/ssrf.js"; import { DEFAULT_LINK_TIMEOUT_SECONDS } from "./defaults.js"; import { extractLinksFromMessage } from "./detect.js"; +import type { + LinkUnderstandingDecision, + LinkUnderstandingModelDecision, + LinkUnderstandingOutput, + LinkUnderstandingUrlDecision, +} from "./types.js"; export type LinkUnderstandingResult = { urls: string[]; - outputs: string[]; + outputs: LinkUnderstandingOutput[]; + decisions: LinkUnderstandingDecision[]; }; function resolveScopeDecision(params: { @@ -69,12 +77,42 @@ async function runCliEntry(params: { return trimmed || null; } +function buildModelDecision(params: { + entry: LinkModelConfig; + outcome: LinkUnderstandingModelDecision["outcome"]; + reason?: string; +}): LinkUnderstandingModelDecision { + const command = params.entry.command?.trim(); + return { + type: "cli", + command: command || undefined, + outcome: params.outcome, + reason: params.reason, + }; +} + +async function assertUrlIsPublic( + url: string, +): Promise<{ ok: true } | { ok: false; reason: string }> { + try { + const parsed = new URL(url); + await assertPublicHostname(parsed.hostname); + return { ok: true }; + } catch (err) { + return { ok: false, reason: String(err) }; + } +} + async function runLinkEntries(params: { entries: LinkModelConfig[]; ctx: MsgContext; url: string; config?: LinkToolsConfig; -}): Promise { +}): Promise<{ + output: LinkUnderstandingOutput | null; + attempts: LinkUnderstandingModelDecision[]; +}> { + const attempts: LinkUnderstandingModelDecision[] = []; for (const entry of params.entries) { try { const output = await runCliEntry({ @@ -83,12 +121,33 @@ async function runLinkEntries(params: { url: params.url, config: params.config, }); - if (output) return output; + if (output) { + const decision = buildModelDecision({ entry, outcome: "success" }); + attempts.push(decision); + return { + output: { + url: params.url, + text: output, + source: entry.command?.trim() || undefined, + }, + attempts, + }; + } + attempts.push(buildModelDecision({ entry, outcome: "skipped", reason: "empty output" })); } catch (err) { - throw new Error(`link processor failed for ${params.url}: ${String(err)}`); + if (shouldLogVerbose()) { + logVerbose(`Link understanding failed for ${params.url}: ${String(err)}`); + } + attempts.push( + buildModelDecision({ + entry, + outcome: "failed", + reason: String(err), + }), + ); } } - return null; + return { output: null, attempts }; } export async function runLinkUnderstanding(params: { @@ -97,33 +156,66 @@ export async function runLinkUnderstanding(params: { message?: string; }): Promise { const config = params.cfg.tools?.links; - if (!config || config.enabled === false) return { urls: [], outputs: [] }; + if (!config || config.enabled === false) { + return { urls: [], outputs: [], decisions: [{ outcome: "disabled", urls: [] }] }; + } const scopeDecision = resolveScopeDecision({ config, ctx: params.ctx }); if (scopeDecision === "deny") { if (shouldLogVerbose()) { logVerbose("Link understanding disabled by scope policy."); } - return { urls: [], outputs: [] }; + return { urls: [], outputs: [], decisions: [{ outcome: "scope-deny", urls: [] }] }; } const message = params.message ?? params.ctx.CommandBody ?? params.ctx.RawBody ?? params.ctx.Body; const links = extractLinksFromMessage(message ?? "", { maxLinks: config?.maxLinks }); - if (links.length === 0) return { urls: [], outputs: [] }; + if (links.length === 0) { + return { urls: [], outputs: [], decisions: [{ outcome: "no-links", urls: [] }] }; + } const entries = config?.models ?? []; - if (entries.length === 0) return { urls: links, outputs: [] }; + if (entries.length === 0) { + const urlDecisions: LinkUnderstandingUrlDecision[] = links.map((url) => ({ + url, + attempts: [], + })); + return { + urls: links, + outputs: [], + decisions: [{ outcome: "skipped", urls: urlDecisions }], + }; + } - const outputs: string[] = []; + const outputs: LinkUnderstandingOutput[] = []; + const urlDecisions: LinkUnderstandingUrlDecision[] = []; for (const url of links) { - const output = await runLinkEntries({ + const ssrfCheck = await assertUrlIsPublic(url); + if (!ssrfCheck.ok) { + urlDecisions.push({ + url, + attempts: [ + { + type: "cli", + command: "ssrf", + outcome: "skipped", + reason: ssrfCheck.reason, + }, + ], + }); + continue; + } + const { output, attempts } = await runLinkEntries({ entries, ctx: params.ctx, url, config, }); + const chosen = attempts.find((attempt) => attempt.outcome === "success"); + urlDecisions.push({ url, attempts, chosen }); if (output) outputs.push(output); } - return { urls: links, outputs }; + const outcome = outputs.length > 0 ? "success" : "skipped"; + return { urls: links, outputs, decisions: [{ outcome, urls: urlDecisions }] }; } diff --git a/src/link-understanding/types.ts b/src/link-understanding/types.ts new file mode 100644 index 000000000..d1f6debcd --- /dev/null +++ b/src/link-understanding/types.ts @@ -0,0 +1,30 @@ +export type LinkUnderstandingOutput = { + url: string; + text: string; + source?: string; +}; + +export type LinkUnderstandingModelDecision = { + type: "cli"; + command?: string; + outcome: "success" | "skipped" | "failed"; + reason?: string; +}; + +export type LinkUnderstandingUrlDecision = { + url: string; + attempts: LinkUnderstandingModelDecision[]; + chosen?: LinkUnderstandingModelDecision; +}; + +export type LinkUnderstandingDecisionOutcome = + | "success" + | "skipped" + | "disabled" + | "scope-deny" + | "no-links"; + +export type LinkUnderstandingDecision = { + outcome: LinkUnderstandingDecisionOutcome; + urls: LinkUnderstandingUrlDecision[]; +};