diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index b127cd49f..832c9e486 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -8,6 +8,7 @@ import { SsrFBlockedError, } from "../../infra/net/ssrf.js"; import type { Dispatcher } from "undici"; +import { ProxyAgent } from "undici"; import { stringEnum } from "../schema/typebox.js"; import type { AnyAgentTool } from "./common.js"; import { jsonResult, readNumberParam, readStringParam } from "./common.js"; @@ -94,6 +95,12 @@ function resolveFetchReadabilityEnabled(fetch?: WebFetchConfig): boolean { return true; } +function resolveFetchProxy(fetch?: WebFetchConfig): string | undefined { + if (!fetch || typeof fetch !== "object") return undefined; + const proxy = "proxy" in fetch && typeof fetch.proxy === "string" ? fetch.proxy.trim() : ""; + return proxy || undefined; +} + function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig { if (!fetch || typeof fetch !== "object") return undefined; const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined; @@ -173,25 +180,37 @@ async function fetchWithRedirects(params: { maxRedirects: number; timeoutSeconds: number; userAgent: string; + proxy?: string; }): Promise<{ response: Response; finalUrl: string; dispatcher: Dispatcher }> { const signal = withTimeout(undefined, params.timeoutSeconds * 1000); const visited = new Set(); let currentUrl = params.url; let redirectCount = 0; + // Create proxy dispatcher if configured + const proxyDispatcher = params.proxy ? new ProxyAgent(params.proxy) : null; + while (true) { let parsedUrl: URL; try { parsedUrl = new URL(currentUrl); } catch { + if (proxyDispatcher) await closeDispatcher(proxyDispatcher); throw new Error("Invalid URL: must be http or https"); } if (!["http:", "https:"].includes(parsedUrl.protocol)) { + if (proxyDispatcher) await closeDispatcher(proxyDispatcher); throw new Error("Invalid URL: must be http or https"); } - const pinned = await resolvePinnedHostname(parsedUrl.hostname); - const dispatcher = createPinnedDispatcher(pinned); + // Use proxy dispatcher if available, otherwise use pinned dispatcher for SSRF protection + let dispatcher: Dispatcher; + if (proxyDispatcher) { + dispatcher = proxyDispatcher; + } else { + const pinned = await resolvePinnedHostname(parsedUrl.hostname); + dispatcher = createPinnedDispatcher(pinned); + } let res: Response; try { res = await fetch(parsedUrl.toString(), { @@ -206,29 +225,35 @@ async function fetchWithRedirects(params: { dispatcher, } as RequestInit); } catch (err) { - await closeDispatcher(dispatcher); + // Only close non-proxy dispatchers on error; proxy dispatcher is reused + if (!proxyDispatcher) await closeDispatcher(dispatcher); + else await closeDispatcher(proxyDispatcher); throw err; } if (isRedirectStatus(res.status)) { const location = res.headers.get("location"); if (!location) { - await closeDispatcher(dispatcher); + if (!proxyDispatcher) await closeDispatcher(dispatcher); + else await closeDispatcher(proxyDispatcher); throw new Error(`Redirect missing location header (${res.status})`); } redirectCount += 1; if (redirectCount > params.maxRedirects) { - await closeDispatcher(dispatcher); + if (!proxyDispatcher) await closeDispatcher(dispatcher); + else await closeDispatcher(proxyDispatcher); throw new Error(`Too many redirects (limit: ${params.maxRedirects})`); } const nextUrl = new URL(location, parsedUrl).toString(); if (visited.has(nextUrl)) { - await closeDispatcher(dispatcher); + if (!proxyDispatcher) await closeDispatcher(dispatcher); + else await closeDispatcher(proxyDispatcher); throw new Error("Redirect loop detected"); } visited.add(nextUrl); void res.body?.cancel(); - await closeDispatcher(dispatcher); + // Only close pinned dispatcher on redirect; proxy dispatcher is reused + if (!proxyDispatcher) await closeDispatcher(dispatcher); currentUrl = nextUrl; continue; } @@ -338,6 +363,7 @@ async function runWebFetch(params: { cacheTtlMs: number; userAgent: string; readabilityEnabled: boolean; + proxy?: string; firecrawlEnabled: boolean; firecrawlApiKey?: string; firecrawlBaseUrl: string; @@ -373,6 +399,7 @@ async function runWebFetch(params: { maxRedirects: params.maxRedirects, timeoutSeconds: params.timeoutSeconds, userAgent: params.userAgent, + proxy: params.proxy, }); res = result.response; finalUrl = result.finalUrl; @@ -576,6 +603,7 @@ export function createWebFetchTool(options?: { const fetch = resolveFetchConfig(options?.config); if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) return null; const readabilityEnabled = resolveFetchReadabilityEnabled(fetch); + const proxy = resolveFetchProxy(fetch); const firecrawl = resolveFirecrawlConfig(fetch); const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl); const firecrawlEnabled = resolveFirecrawlEnabled({ firecrawl, apiKey: firecrawlApiKey }); @@ -609,6 +637,7 @@ export function createWebFetchTool(options?: { cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES), userAgent, readabilityEnabled, + proxy, firecrawlEnabled, firecrawlApiKey, firecrawlBaseUrl, diff --git a/src/config/schema.ts b/src/config/schema.ts index 1401b0574..0319a13f2 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -199,6 +199,7 @@ const FIELD_LABELS: Record = { "tools.web.fetch.cacheTtlMinutes": "Web Fetch Cache TTL (min)", "tools.web.fetch.maxRedirects": "Web Fetch Max Redirects", "tools.web.fetch.userAgent": "Web Fetch User-Agent", + "tools.web.fetch.proxy": "Web Fetch Proxy", "gateway.controlUi.basePath": "Control UI Base Path", "gateway.controlUi.allowInsecureAuth": "Allow Insecure Control UI Auth", "gateway.controlUi.dangerouslyDisableDeviceAuth": "Dangerously Disable Control UI Device Auth", @@ -454,6 +455,7 @@ const FIELD_HELP: Record = { "tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.", "tools.web.fetch.readability": "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).", + "tools.web.fetch.proxy": "HTTP proxy URL for web_fetch requests (e.g. http://127.0.0.1:7890).", "tools.web.fetch.firecrawl.enabled": "Enable Firecrawl fallback for web_fetch (if configured).", "tools.web.fetch.firecrawl.apiKey": "Firecrawl API key (fallback: FIRECRAWL_API_KEY env var).", "tools.web.fetch.firecrawl.baseUrl": diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index db32cb59d..4f9462368 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -371,6 +371,8 @@ export type ToolsConfig = { userAgent?: string; /** Use Readability to extract main content (default: true). */ readability?: boolean; + /** HTTP proxy URL for fetch requests (e.g. http://127.0.0.1:7890). */ + proxy?: string; firecrawl?: { /** Enable Firecrawl fallback (default: true when apiKey is set). */ enabled?: boolean; diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 7e95c3538..2d21c2bce 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -190,6 +190,7 @@ export const ToolsWebFetchSchema = z cacheTtlMinutes: z.number().nonnegative().optional(), maxRedirects: z.number().int().nonnegative().optional(), userAgent: z.string().optional(), + proxy: z.string().optional(), }) .strict() .optional();