This commit is contained in:
曹亚文 2026-01-30 11:55:32 +00:00 committed by GitHub
commit acd5023aa4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 41 additions and 7 deletions

View File

@ -8,6 +8,7 @@ import {
SsrFBlockedError,
} from "../../infra/net/ssrf.js";
import type { Dispatcher } from "undici";
import { ProxyAgent } from "undici";
import { stringEnum } from "../schema/typebox.js";
import type { AnyAgentTool } from "./common.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
@ -94,6 +95,12 @@ function resolveFetchReadabilityEnabled(fetch?: WebFetchConfig): boolean {
return true;
}
function resolveFetchProxy(fetch?: WebFetchConfig): string | undefined {
if (!fetch || typeof fetch !== "object") return undefined;
const proxy = "proxy" in fetch && typeof fetch.proxy === "string" ? fetch.proxy.trim() : "";
return proxy || undefined;
}
function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig {
if (!fetch || typeof fetch !== "object") return undefined;
const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined;
@ -173,25 +180,37 @@ async function fetchWithRedirects(params: {
maxRedirects: number;
timeoutSeconds: number;
userAgent: string;
proxy?: string;
}): Promise<{ response: Response; finalUrl: string; dispatcher: Dispatcher }> {
const signal = withTimeout(undefined, params.timeoutSeconds * 1000);
const visited = new Set<string>();
let currentUrl = params.url;
let redirectCount = 0;
// Create proxy dispatcher if configured
const proxyDispatcher = params.proxy ? new ProxyAgent(params.proxy) : null;
while (true) {
let parsedUrl: URL;
try {
parsedUrl = new URL(currentUrl);
} catch {
if (proxyDispatcher) await closeDispatcher(proxyDispatcher);
throw new Error("Invalid URL: must be http or https");
}
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
if (proxyDispatcher) await closeDispatcher(proxyDispatcher);
throw new Error("Invalid URL: must be http or https");
}
const pinned = await resolvePinnedHostname(parsedUrl.hostname);
const dispatcher = createPinnedDispatcher(pinned);
// Use proxy dispatcher if available, otherwise use pinned dispatcher for SSRF protection
let dispatcher: Dispatcher;
if (proxyDispatcher) {
dispatcher = proxyDispatcher;
} else {
const pinned = await resolvePinnedHostname(parsedUrl.hostname);
dispatcher = createPinnedDispatcher(pinned);
}
let res: Response;
try {
res = await fetch(parsedUrl.toString(), {
@ -206,29 +225,35 @@ async function fetchWithRedirects(params: {
dispatcher,
} as RequestInit);
} catch (err) {
await closeDispatcher(dispatcher);
// Only close non-proxy dispatchers on error; proxy dispatcher is reused
if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw err;
}
if (isRedirectStatus(res.status)) {
const location = res.headers.get("location");
if (!location) {
await closeDispatcher(dispatcher);
if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw new Error(`Redirect missing location header (${res.status})`);
}
redirectCount += 1;
if (redirectCount > params.maxRedirects) {
await closeDispatcher(dispatcher);
if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
}
const nextUrl = new URL(location, parsedUrl).toString();
if (visited.has(nextUrl)) {
await closeDispatcher(dispatcher);
if (!proxyDispatcher) await closeDispatcher(dispatcher);
else await closeDispatcher(proxyDispatcher);
throw new Error("Redirect loop detected");
}
visited.add(nextUrl);
void res.body?.cancel();
await closeDispatcher(dispatcher);
// Only close pinned dispatcher on redirect; proxy dispatcher is reused
if (!proxyDispatcher) await closeDispatcher(dispatcher);
currentUrl = nextUrl;
continue;
}
@ -338,6 +363,7 @@ async function runWebFetch(params: {
cacheTtlMs: number;
userAgent: string;
readabilityEnabled: boolean;
proxy?: string;
firecrawlEnabled: boolean;
firecrawlApiKey?: string;
firecrawlBaseUrl: string;
@ -373,6 +399,7 @@ async function runWebFetch(params: {
maxRedirects: params.maxRedirects,
timeoutSeconds: params.timeoutSeconds,
userAgent: params.userAgent,
proxy: params.proxy,
});
res = result.response;
finalUrl = result.finalUrl;
@ -576,6 +603,7 @@ export function createWebFetchTool(options?: {
const fetch = resolveFetchConfig(options?.config);
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) return null;
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
const proxy = resolveFetchProxy(fetch);
const firecrawl = resolveFirecrawlConfig(fetch);
const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl);
const firecrawlEnabled = resolveFirecrawlEnabled({ firecrawl, apiKey: firecrawlApiKey });
@ -609,6 +637,7 @@ export function createWebFetchTool(options?: {
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
userAgent,
readabilityEnabled,
proxy,
firecrawlEnabled,
firecrawlApiKey,
firecrawlBaseUrl,

View File

@ -199,6 +199,7 @@ const FIELD_LABELS: Record<string, string> = {
"tools.web.fetch.cacheTtlMinutes": "Web Fetch Cache TTL (min)",
"tools.web.fetch.maxRedirects": "Web Fetch Max Redirects",
"tools.web.fetch.userAgent": "Web Fetch User-Agent",
"tools.web.fetch.proxy": "Web Fetch Proxy",
"gateway.controlUi.basePath": "Control UI Base Path",
"gateway.controlUi.allowInsecureAuth": "Allow Insecure Control UI Auth",
"gateway.controlUi.dangerouslyDisableDeviceAuth": "Dangerously Disable Control UI Device Auth",
@ -454,6 +455,7 @@ const FIELD_HELP: Record<string, string> = {
"tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.",
"tools.web.fetch.readability":
"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
"tools.web.fetch.proxy": "HTTP proxy URL for web_fetch requests (e.g. http://127.0.0.1:7890).",
"tools.web.fetch.firecrawl.enabled": "Enable Firecrawl fallback for web_fetch (if configured).",
"tools.web.fetch.firecrawl.apiKey": "Firecrawl API key (fallback: FIRECRAWL_API_KEY env var).",
"tools.web.fetch.firecrawl.baseUrl":

View File

@ -371,6 +371,8 @@ export type ToolsConfig = {
userAgent?: string;
/** Use Readability to extract main content (default: true). */
readability?: boolean;
/** HTTP proxy URL for fetch requests (e.g. http://127.0.0.1:7890). */
proxy?: string;
firecrawl?: {
/** Enable Firecrawl fallback (default: true when apiKey is set). */
enabled?: boolean;

View File

@ -190,6 +190,7 @@ export const ToolsWebFetchSchema = z
cacheTtlMinutes: z.number().nonnegative().optional(),
maxRedirects: z.number().int().nonnegative().optional(),
userAgent: z.string().optional(),
proxy: z.string().optional(),
})
.strict()
.optional();