This commit is contained in:
曹亚文 2026-01-30 11:55:32 +00:00 committed by GitHub
commit acd5023aa4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 41 additions and 7 deletions

View File

@ -8,6 +8,7 @@ import {
SsrFBlockedError, SsrFBlockedError,
} from "../../infra/net/ssrf.js"; } from "../../infra/net/ssrf.js";
import type { Dispatcher } from "undici"; import type { Dispatcher } from "undici";
import { ProxyAgent } from "undici";
import { stringEnum } from "../schema/typebox.js"; import { stringEnum } from "../schema/typebox.js";
import type { AnyAgentTool } from "./common.js"; import type { AnyAgentTool } from "./common.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js"; import { jsonResult, readNumberParam, readStringParam } from "./common.js";
@ -94,6 +95,12 @@ function resolveFetchReadabilityEnabled(fetch?: WebFetchConfig): boolean {
return true; return true;
} }
function resolveFetchProxy(fetch?: WebFetchConfig): string | undefined {
if (!fetch || typeof fetch !== "object") return undefined;
const proxy = "proxy" in fetch && typeof fetch.proxy === "string" ? fetch.proxy.trim() : "";
return proxy || undefined;
}
function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig { function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig {
if (!fetch || typeof fetch !== "object") return undefined; if (!fetch || typeof fetch !== "object") return undefined;
const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined; const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined;
@ -173,25 +180,37 @@ async function fetchWithRedirects(params: {
maxRedirects: number; maxRedirects: number;
timeoutSeconds: number; timeoutSeconds: number;
userAgent: string; userAgent: string;
proxy?: string;
}): Promise<{ response: Response; finalUrl: string; dispatcher: Dispatcher }> { }): Promise<{ response: Response; finalUrl: string; dispatcher: Dispatcher }> {
const signal = withTimeout(undefined, params.timeoutSeconds * 1000); const signal = withTimeout(undefined, params.timeoutSeconds * 1000);
const visited = new Set<string>(); const visited = new Set<string>();
let currentUrl = params.url; let currentUrl = params.url;
let redirectCount = 0; let redirectCount = 0;
// Create proxy dispatcher if configured
const proxyDispatcher = params.proxy ? new ProxyAgent(params.proxy) : null;
while (true) { while (true) {
let parsedUrl: URL; let parsedUrl: URL;
try { try {
parsedUrl = new URL(currentUrl); parsedUrl = new URL(currentUrl);
} catch { } catch {
if (proxyDispatcher) await closeDispatcher(proxyDispatcher);
throw new Error("Invalid URL: must be http or https"); throw new Error("Invalid URL: must be http or https");
} }
if (!["http:", "https:"].includes(parsedUrl.protocol)) { if (!["http:", "https:"].includes(parsedUrl.protocol)) {
if (proxyDispatcher) await closeDispatcher(proxyDispatcher);
throw new Error("Invalid URL: must be http or https"); throw new Error("Invalid URL: must be http or https");
} }
// Use proxy dispatcher if available, otherwise use pinned dispatcher for SSRF protection
let dispatcher: Dispatcher;
if (proxyDispatcher) {
dispatcher = proxyDispatcher;
} else {
const pinned = await resolvePinnedHostname(parsedUrl.hostname); const pinned = await resolvePinnedHostname(parsedUrl.hostname);
const dispatcher = createPinnedDispatcher(pinned); dispatcher = createPinnedDispatcher(pinned);
}
let res: Response; let res: Response;
try { try {
res = await fetch(parsedUrl.toString(), { res = await fetch(parsedUrl.toString(), {
@ -206,29 +225,35 @@ async function fetchWithRedirects(params: {
dispatcher, dispatcher,
} as RequestInit); } as RequestInit);
} catch (err) { } catch (err) {
await closeDispatcher(dispatcher); // Only close non-proxy dispatchers on error; proxy dispatcher is reused
if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw err; throw err;
} }
if (isRedirectStatus(res.status)) { if (isRedirectStatus(res.status)) {
const location = res.headers.get("location"); const location = res.headers.get("location");
if (!location) { if (!location) {
await closeDispatcher(dispatcher); if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw new Error(`Redirect missing location header (${res.status})`); throw new Error(`Redirect missing location header (${res.status})`);
} }
redirectCount += 1; redirectCount += 1;
if (redirectCount > params.maxRedirects) { if (redirectCount > params.maxRedirects) {
await closeDispatcher(dispatcher); if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`); throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
} }
const nextUrl = new URL(location, parsedUrl).toString(); const nextUrl = new URL(location, parsedUrl).toString();
if (visited.has(nextUrl)) { if (visited.has(nextUrl)) {
await closeDispatcher(dispatcher); if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw new Error("Redirect loop detected"); throw new Error("Redirect loop detected");
} }
visited.add(nextUrl); visited.add(nextUrl);
void res.body?.cancel(); void res.body?.cancel();
await closeDispatcher(dispatcher); // Only close pinned dispatcher on redirect; proxy dispatcher is reused
if (!proxyDispatcher) await closeDispatcher(dispatcher);
currentUrl = nextUrl; currentUrl = nextUrl;
continue; continue;
} }
@ -338,6 +363,7 @@ async function runWebFetch(params: {
cacheTtlMs: number; cacheTtlMs: number;
userAgent: string; userAgent: string;
readabilityEnabled: boolean; readabilityEnabled: boolean;
proxy?: string;
firecrawlEnabled: boolean; firecrawlEnabled: boolean;
firecrawlApiKey?: string; firecrawlApiKey?: string;
firecrawlBaseUrl: string; firecrawlBaseUrl: string;
@ -373,6 +399,7 @@ async function runWebFetch(params: {
maxRedirects: params.maxRedirects, maxRedirects: params.maxRedirects,
timeoutSeconds: params.timeoutSeconds, timeoutSeconds: params.timeoutSeconds,
userAgent: params.userAgent, userAgent: params.userAgent,
proxy: params.proxy,
}); });
res = result.response; res = result.response;
finalUrl = result.finalUrl; finalUrl = result.finalUrl;
@ -576,6 +603,7 @@ export function createWebFetchTool(options?: {
const fetch = resolveFetchConfig(options?.config); const fetch = resolveFetchConfig(options?.config);
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) return null; if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) return null;
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch); const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
const proxy = resolveFetchProxy(fetch);
const firecrawl = resolveFirecrawlConfig(fetch); const firecrawl = resolveFirecrawlConfig(fetch);
const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl); const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl);
const firecrawlEnabled = resolveFirecrawlEnabled({ firecrawl, apiKey: firecrawlApiKey }); const firecrawlEnabled = resolveFirecrawlEnabled({ firecrawl, apiKey: firecrawlApiKey });
@ -609,6 +637,7 @@ export function createWebFetchTool(options?: {
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES), cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
userAgent, userAgent,
readabilityEnabled, readabilityEnabled,
proxy,
firecrawlEnabled, firecrawlEnabled,
firecrawlApiKey, firecrawlApiKey,
firecrawlBaseUrl, firecrawlBaseUrl,

View File

@ -199,6 +199,7 @@ const FIELD_LABELS: Record<string, string> = {
"tools.web.fetch.cacheTtlMinutes": "Web Fetch Cache TTL (min)", "tools.web.fetch.cacheTtlMinutes": "Web Fetch Cache TTL (min)",
"tools.web.fetch.maxRedirects": "Web Fetch Max Redirects", "tools.web.fetch.maxRedirects": "Web Fetch Max Redirects",
"tools.web.fetch.userAgent": "Web Fetch User-Agent", "tools.web.fetch.userAgent": "Web Fetch User-Agent",
"tools.web.fetch.proxy": "Web Fetch Proxy",
"gateway.controlUi.basePath": "Control UI Base Path", "gateway.controlUi.basePath": "Control UI Base Path",
"gateway.controlUi.allowInsecureAuth": "Allow Insecure Control UI Auth", "gateway.controlUi.allowInsecureAuth": "Allow Insecure Control UI Auth",
"gateway.controlUi.dangerouslyDisableDeviceAuth": "Dangerously Disable Control UI Device Auth", "gateway.controlUi.dangerouslyDisableDeviceAuth": "Dangerously Disable Control UI Device Auth",
@ -454,6 +455,7 @@ const FIELD_HELP: Record<string, string> = {
"tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.", "tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.",
"tools.web.fetch.readability": "tools.web.fetch.readability":
"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).", "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
"tools.web.fetch.proxy": "HTTP proxy URL for web_fetch requests (e.g. http://127.0.0.1:7890).",
"tools.web.fetch.firecrawl.enabled": "Enable Firecrawl fallback for web_fetch (if configured).", "tools.web.fetch.firecrawl.enabled": "Enable Firecrawl fallback for web_fetch (if configured).",
"tools.web.fetch.firecrawl.apiKey": "Firecrawl API key (fallback: FIRECRAWL_API_KEY env var).", "tools.web.fetch.firecrawl.apiKey": "Firecrawl API key (fallback: FIRECRAWL_API_KEY env var).",
"tools.web.fetch.firecrawl.baseUrl": "tools.web.fetch.firecrawl.baseUrl":

View File

@ -371,6 +371,8 @@ export type ToolsConfig = {
userAgent?: string; userAgent?: string;
/** Use Readability to extract main content (default: true). */ /** Use Readability to extract main content (default: true). */
readability?: boolean; readability?: boolean;
/** HTTP proxy URL for fetch requests (e.g. http://127.0.0.1:7890). */
proxy?: string;
firecrawl?: { firecrawl?: {
/** Enable Firecrawl fallback (default: true when apiKey is set). */ /** Enable Firecrawl fallback (default: true when apiKey is set). */
enabled?: boolean; enabled?: boolean;

View File

@ -190,6 +190,7 @@ export const ToolsWebFetchSchema = z
cacheTtlMinutes: z.number().nonnegative().optional(), cacheTtlMinutes: z.number().nonnegative().optional(),
maxRedirects: z.number().int().nonnegative().optional(), maxRedirects: z.number().int().nonnegative().optional(),
userAgent: z.string().optional(), userAgent: z.string().optional(),
proxy: z.string().optional(),
}) })
.strict() .strict()
.optional(); .optional();