add date filtering and language support, normalize formats between Brave and Perplexity
This commit is contained in:
parent
cbc19719b3
commit
a5df12618c
@ -32,9 +32,47 @@ Clawdbot supports Brave Search as a web search provider for `web_search`.
|
||||
}
|
||||
```
|
||||
|
||||
## Tool parameters
|
||||
|
||||
| Parameter | Description |
|
||||
|-----------|-------------|
|
||||
| `query` | Search query (required) |
|
||||
| `count` | Number of results to return (1-10, default: 5) |
|
||||
| `country` | 2-letter ISO country code (e.g., "US", "DE") |
|
||||
| `language` | ISO 639-1 language code for search results (e.g., "en", "de", "fr") |
|
||||
| `ui_lang` | ISO language code for UI elements |
|
||||
| `freshness` | Time filter: `day` (24h), `week`, `month`, or `year` |
|
||||
| `date_after` | Only results published after this date (YYYY-MM-DD) |
|
||||
| `date_before` | Only results published before this date (YYYY-MM-DD) |
|
||||
|
||||
**Examples:**
|
||||
|
||||
```javascript
|
||||
// Country and language-specific search
|
||||
await web_search({
|
||||
query: "renewable energy",
|
||||
country: "DE",
|
||||
language: "de"
|
||||
});
|
||||
|
||||
// Recent results (past week)
|
||||
await web_search({
|
||||
query: "AI news",
|
||||
freshness: "week"
|
||||
});
|
||||
|
||||
// Date range search
|
||||
await web_search({
|
||||
query: "AI developments",
|
||||
date_after: "2024-01-01",
|
||||
date_before: "2024-06-30"
|
||||
});
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- The Data for AI plan is **not** compatible with `web_search`.
|
||||
- Brave provides a free tier plus paid plans; check the Brave API portal for current limits.
|
||||
- Results are cached for 15 minutes by default (configurable via `cacheTtlMinutes`).
|
||||
|
||||
See [Web tools](/tools/web) for the full web_search configuration.
|
||||
|
||||
@ -59,11 +59,74 @@ Perplexity Search returns structured results (title, URL, snippet) for fast rese
|
||||
environment. For a gateway install, put it in `~/.clawdbot/.env` (or your
|
||||
service environment). See [Env vars](/help/faq#how-does-clawdbot-load-environment-variables).
|
||||
|
||||
## Tool parameters
|
||||
|
||||
| Parameter | Description |
|
||||
|-----------|-------------|
|
||||
| `query` | Search query (required) |
|
||||
| `count` | Number of results to return (1-10, default: 5) |
|
||||
| `country` | 2-letter ISO country code (e.g., "US", "DE") |
|
||||
| `language` | ISO 639-1 language code (e.g., "en", "de", "fr") |
|
||||
| `freshness` | Time filter: `day` (24h), `week`, `month`, or `year` |
|
||||
| `date_after` | Only results published after this date (YYYY-MM-DD) |
|
||||
| `date_before` | Only results published before this date (YYYY-MM-DD) |
|
||||
| `domain_filter` | Domain allowlist/denylist array (max 20) |
|
||||
| `max_tokens` | Total content budget (default: 25000, max: 1000000) |
|
||||
| `max_tokens_per_page` | Per-page token limit (default: 2048) |
|
||||
|
||||
**Examples:**
|
||||
|
||||
```javascript
|
||||
// Country and language-specific search
|
||||
await web_search({
|
||||
query: "renewable energy",
|
||||
country: "DE",
|
||||
language: "de"
|
||||
});
|
||||
|
||||
// Recent results (past week)
|
||||
await web_search({
|
||||
query: "AI news",
|
||||
freshness: "week"
|
||||
});
|
||||
|
||||
// Date range search
|
||||
await web_search({
|
||||
query: "AI developments",
|
||||
date_after: "2024-01-01",
|
||||
date_before: "2024-06-30"
|
||||
});
|
||||
|
||||
// Domain filtering (allowlist)
|
||||
await web_search({
|
||||
query: "climate research",
|
||||
domain_filter: ["nature.com", "science.org", ".edu"]
|
||||
});
|
||||
|
||||
// Domain filtering (denylist - prefix with -)
|
||||
await web_search({
|
||||
query: "product reviews",
|
||||
domain_filter: ["-reddit.com", "-pinterest.com"]
|
||||
});
|
||||
|
||||
// More content extraction
|
||||
await web_search({
|
||||
query: "detailed AI research",
|
||||
max_tokens: 50000,
|
||||
max_tokens_per_page: 4096
|
||||
});
|
||||
```
|
||||
|
||||
### Domain filter rules
|
||||
|
||||
- Maximum 20 domains per filter
|
||||
- Cannot mix allowlist and denylist in the same request
|
||||
- Use `-` prefix for denylist entries (e.g., `["-reddit.com"]`)
|
||||
|
||||
## Notes
|
||||
|
||||
- Perplexity Search API returns structured results (title, URL, snippet) similar to Brave Search
|
||||
- Perplexity Search API returns structured web search results (title, URL, snippet)
|
||||
- Results are cached for 15 minutes by default (configurable via `cacheTtlMinutes`)
|
||||
- Supports country-specific search via the `country` parameter
|
||||
- Supports domain filtering (can be added as a future enhancement)
|
||||
|
||||
See [Web tools](/tools/web) for the full web_search configuration.
|
||||
See [Perplexity Search API docs](https://docs.perplexity.ai/guides/search-quickstart) for more details.
|
||||
|
||||
@ -51,6 +51,8 @@ Use `clawdbot configure --section web` to set up your API key and choose a provi
|
||||
|
||||
Perplexity provides $5 in API credits on a monthly rolling basis to Perplexity Pro subscribers. Check the Perplexity API docs for current limits and pricing.
|
||||
|
||||
See [Perplexity Search API Docs](https://docs.perplexity.ai/guides/search-quickstart) for more details.
|
||||
|
||||
### Brave Search
|
||||
|
||||
1) Create a Brave Search API account at https://brave.com/search/api/
|
||||
@ -132,12 +134,21 @@ Search the web using your configured provider.
|
||||
|
||||
### Tool parameters
|
||||
|
||||
- `query` (required)
|
||||
- `count` (1–10; default from config)
|
||||
- `country` (optional): 2-letter country code for region-specific results (e.g., "DE", "US", "ALL"). If omitted, Brave chooses its default region.
|
||||
- `search_lang` (optional): ISO language code for search results (e.g., "de", "en", "fr")
|
||||
- `ui_lang` (optional): ISO language code for UI elements
|
||||
- `freshness` (optional, Brave only): filter by discovery time (`pd`, `pw`, `pm`, `py`, or `YYYY-MM-DDtoYYYY-MM-DD`)
|
||||
All parameters work for both Brave and Perplexity unless noted.
|
||||
|
||||
| Parameter | Description |
|
||||
|-----------|-------------|
|
||||
| `query` | Search query (required) |
|
||||
| `count` | Results to return (1-10, default: 5) |
|
||||
| `country` | 2-letter ISO country code (e.g., "US", "DE") |
|
||||
| `language` | ISO 639-1 language code (e.g., "en", "de") |
|
||||
| `freshness` | Time filter: `day`, `week`, `month`, or `year` |
|
||||
| `date_after` | Results after this date (YYYY-MM-DD) |
|
||||
| `date_before` | Results before this date (YYYY-MM-DD) |
|
||||
| `ui_lang` | UI language code (Brave only) |
|
||||
| `domain_filter` | Domain allowlist/denylist array (Perplexity only) |
|
||||
| `max_tokens` | Total content budget, default 25000 (Perplexity only) |
|
||||
| `max_tokens_per_page` | Per-page token limit, default 2048 (Perplexity only) |
|
||||
|
||||
**Examples:**
|
||||
|
||||
@ -145,23 +156,40 @@ Search the web using your configured provider.
|
||||
// German-specific search
|
||||
await web_search({
|
||||
query: "TV online schauen",
|
||||
count: 10,
|
||||
country: "DE",
|
||||
search_lang: "de"
|
||||
});
|
||||
|
||||
// French search with French UI
|
||||
await web_search({
|
||||
query: "actualités",
|
||||
country: "FR",
|
||||
search_lang: "fr",
|
||||
ui_lang: "fr"
|
||||
language: "de"
|
||||
});
|
||||
|
||||
// Recent results (past week)
|
||||
await web_search({
|
||||
query: "TMBG interview",
|
||||
freshness: "pw"
|
||||
freshness: "week"
|
||||
});
|
||||
|
||||
// Date range search
|
||||
await web_search({
|
||||
query: "AI developments",
|
||||
date_after: "2024-01-01",
|
||||
date_before: "2024-06-30"
|
||||
});
|
||||
|
||||
// Domain filtering (Perplexity only)
|
||||
await web_search({
|
||||
query: "climate research",
|
||||
domain_filter: ["nature.com", "science.org", ".edu"]
|
||||
});
|
||||
|
||||
// Exclude domains (Perplexity only)
|
||||
await web_search({
|
||||
query: "product reviews",
|
||||
domain_filter: ["-reddit.com", "-pinterest.com"]
|
||||
});
|
||||
|
||||
// More content extraction (Perplexity only)
|
||||
await web_search({
|
||||
query: "detailed AI research",
|
||||
max_tokens: 50000,
|
||||
max_tokens_per_page: 4096
|
||||
});
|
||||
```
|
||||
|
||||
|
||||
@ -2,21 +2,55 @@ import { describe, expect, it } from "vitest";
|
||||
|
||||
import { __testing } from "./web-search.js";
|
||||
|
||||
const { normalizeFreshness } = __testing;
|
||||
const { normalizeFreshness, normalizeToIsoDate, isoToPerplexityDate } = __testing;
|
||||
|
||||
describe("web_search freshness normalization", () => {
|
||||
it("accepts Brave shortcut values", () => {
|
||||
expect(normalizeFreshness("pd")).toBe("pd");
|
||||
expect(normalizeFreshness("PW")).toBe("pw");
|
||||
it("accepts Brave shortcut values and maps for Perplexity", () => {
|
||||
expect(normalizeFreshness("pd", "brave")).toBe("pd");
|
||||
expect(normalizeFreshness("PW", "brave")).toBe("pw");
|
||||
expect(normalizeFreshness("pd", "perplexity")).toBe("day");
|
||||
expect(normalizeFreshness("pw", "perplexity")).toBe("week");
|
||||
});
|
||||
|
||||
it("accepts valid date ranges", () => {
|
||||
expect(normalizeFreshness("2024-01-01to2024-01-31")).toBe("2024-01-01to2024-01-31");
|
||||
it("accepts Perplexity values and maps for Brave", () => {
|
||||
expect(normalizeFreshness("day", "perplexity")).toBe("day");
|
||||
expect(normalizeFreshness("week", "perplexity")).toBe("week");
|
||||
expect(normalizeFreshness("day", "brave")).toBe("pd");
|
||||
expect(normalizeFreshness("week", "brave")).toBe("pw");
|
||||
});
|
||||
|
||||
it("rejects invalid date ranges", () => {
|
||||
expect(normalizeFreshness("2024-13-01to2024-01-31")).toBeUndefined();
|
||||
expect(normalizeFreshness("2024-02-30to2024-03-01")).toBeUndefined();
|
||||
expect(normalizeFreshness("2024-03-10to2024-03-01")).toBeUndefined();
|
||||
it("rejects invalid values", () => {
|
||||
expect(normalizeFreshness("yesterday", "brave")).toBeUndefined();
|
||||
expect(normalizeFreshness("yesterday", "perplexity")).toBeUndefined();
|
||||
expect(normalizeFreshness("2024-01-01to2024-01-31", "perplexity")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("web_search date normalization", () => {
|
||||
it("accepts ISO format", () => {
|
||||
expect(normalizeToIsoDate("2024-01-15")).toBe("2024-01-15");
|
||||
expect(normalizeToIsoDate("2025-12-31")).toBe("2025-12-31");
|
||||
});
|
||||
|
||||
it("accepts Perplexity format and converts to ISO", () => {
|
||||
expect(normalizeToIsoDate("1/15/2024")).toBe("2024-01-15");
|
||||
expect(normalizeToIsoDate("12/31/2025")).toBe("2025-12-31");
|
||||
});
|
||||
|
||||
it("rejects invalid formats", () => {
|
||||
expect(normalizeToIsoDate("01-15-2024")).toBeUndefined();
|
||||
expect(normalizeToIsoDate("2024/01/15")).toBeUndefined();
|
||||
expect(normalizeToIsoDate("invalid")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("converts ISO to Perplexity format", () => {
|
||||
expect(isoToPerplexityDate("2024-01-15")).toBe("1/15/2024");
|
||||
expect(isoToPerplexityDate("2025-12-31")).toBe("12/31/2025");
|
||||
expect(isoToPerplexityDate("2024-03-05")).toBe("3/5/2024");
|
||||
});
|
||||
|
||||
it("rejects invalid ISO dates", () => {
|
||||
expect(isoToPerplexityDate("1/15/2024")).toBeUndefined();
|
||||
expect(isoToPerplexityDate("invalid")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@ -26,9 +26,42 @@ const PERPLEXITY_SEARCH_ENDPOINT = "https://api.perplexity.ai/search";
|
||||
|
||||
const SEARCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
|
||||
const BRAVE_FRESHNESS_SHORTCUTS = new Set(["pd", "pw", "pm", "py"]);
|
||||
const BRAVE_FRESHNESS_RANGE = /^(\d{4}-\d{2}-\d{2})to(\d{4}-\d{2}-\d{2})$/;
|
||||
const PERPLEXITY_RECENCY_VALUES = new Set(["day", "week", "month", "year"]);
|
||||
|
||||
const FRESHNESS_TO_RECENCY: Record<string, string> = {
|
||||
pd: "day",
|
||||
pw: "week",
|
||||
pm: "month",
|
||||
py: "year",
|
||||
};
|
||||
const RECENCY_TO_FRESHNESS: Record<string, string> = {
|
||||
day: "pd",
|
||||
week: "pw",
|
||||
month: "pm",
|
||||
year: "py",
|
||||
};
|
||||
|
||||
const ISO_DATE_PATTERN = /^(\d{4})-(\d{2})-(\d{2})$/;
|
||||
const PERPLEXITY_DATE_PATTERN = /^(\d{1,2})\/(\d{1,2})\/(\d{4})$/;
|
||||
|
||||
function isoToPerplexityDate(iso: string): string | undefined {
|
||||
const match = iso.match(ISO_DATE_PATTERN);
|
||||
if (!match) return undefined;
|
||||
const [, year, month, day] = match;
|
||||
return `${parseInt(month, 10)}/${parseInt(day, 10)}/${year}`;
|
||||
}
|
||||
|
||||
function normalizeToIsoDate(value: string): string | undefined {
|
||||
const trimmed = value.trim();
|
||||
if (ISO_DATE_PATTERN.test(trimmed)) return trimmed;
|
||||
const match = trimmed.match(PERPLEXITY_DATE_PATTERN);
|
||||
if (match) {
|
||||
const [, month, day, year] = match;
|
||||
return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function createWebSearchSchema(provider: (typeof SEARCH_PROVIDERS)[number]) {
|
||||
const baseSchema = {
|
||||
query: Type.String({ description: "Search query string." }),
|
||||
@ -45,47 +78,58 @@ function createWebSearchSchema(provider: (typeof SEARCH_PROVIDERS)[number]) {
|
||||
"2-letter country code for region-specific results (e.g., 'DE', 'US', 'ALL'). Default: 'US'.",
|
||||
}),
|
||||
),
|
||||
language: Type.Optional(
|
||||
Type.String({
|
||||
description: "ISO 639-1 language code for results (e.g., 'en', 'de', 'fr').",
|
||||
}),
|
||||
),
|
||||
freshness: Type.Optional(
|
||||
Type.String({
|
||||
description: "Filter by time: 'day' (24h), 'week', 'month', or 'year'.",
|
||||
}),
|
||||
),
|
||||
date_after: Type.Optional(
|
||||
Type.String({
|
||||
description: "Only results published after this date (YYYY-MM-DD).",
|
||||
}),
|
||||
),
|
||||
date_before: Type.Optional(
|
||||
Type.String({
|
||||
description: "Only results published before this date (YYYY-MM-DD).",
|
||||
}),
|
||||
),
|
||||
} as const;
|
||||
|
||||
if (provider === "brave") {
|
||||
return Type.Object({
|
||||
...baseSchema,
|
||||
search_lang: Type.Optional(
|
||||
Type.String({
|
||||
description: "ISO language code for search results (e.g., 'de', 'en', 'fr').",
|
||||
}),
|
||||
),
|
||||
ui_lang: Type.Optional(
|
||||
Type.String({
|
||||
description: "ISO language code for UI elements.",
|
||||
}),
|
||||
),
|
||||
freshness: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Filter results by discovery time. Values: 'pd' (past 24h), 'pw' (past week), 'pm' (past month), 'py' (past year), or date range 'YYYY-MM-DDtoYYYY-MM-DD'.",
|
||||
}),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
// Perplexity provider schema
|
||||
return Type.Object({
|
||||
...baseSchema,
|
||||
recency: Type.Optional(
|
||||
Type.String({
|
||||
description: "Filter by time period: 'day', 'week', 'month', or 'year'.",
|
||||
}),
|
||||
),
|
||||
domain_filter: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description:
|
||||
"Domain filter (max 20). Allowlist: ['nature.com'] or denylist: ['-reddit.com']. Cannot mix.",
|
||||
}),
|
||||
),
|
||||
language_filter: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description: "ISO 639-1 language codes (max 10). Example: ['en', 'de'].",
|
||||
max_tokens: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Total content budget across all results (default: 25000, max: 1000000).",
|
||||
minimum: 1,
|
||||
maximum: 1000000,
|
||||
}),
|
||||
),
|
||||
max_tokens_per_page: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Max tokens extracted per page (default: 2048).",
|
||||
minimum: 1,
|
||||
}),
|
||||
),
|
||||
});
|
||||
@ -208,33 +252,30 @@ function resolveSearchCount(value: unknown, fallback: number): number {
|
||||
return clamped;
|
||||
}
|
||||
|
||||
function normalizeFreshness(value: string | undefined): string | undefined {
|
||||
/**
|
||||
* Normalizes freshness shortcut to the provider's expected format.
|
||||
* Accepts both Brave format (pd/pw/pm/py) and Perplexity format (day/week/month/year).
|
||||
* Use date_after/date_before for specific date ranges.
|
||||
*/
|
||||
function normalizeFreshness(
|
||||
value: string | undefined,
|
||||
provider: (typeof SEARCH_PROVIDERS)[number],
|
||||
): string | undefined {
|
||||
if (!value) return undefined;
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
const lower = trimmed.toLowerCase();
|
||||
if (BRAVE_FRESHNESS_SHORTCUTS.has(lower)) return lower;
|
||||
|
||||
const match = trimmed.match(BRAVE_FRESHNESS_RANGE);
|
||||
if (!match) return undefined;
|
||||
if (BRAVE_FRESHNESS_SHORTCUTS.has(lower)) {
|
||||
return provider === "brave" ? lower : FRESHNESS_TO_RECENCY[lower];
|
||||
}
|
||||
|
||||
const [, start, end] = match;
|
||||
if (!isValidIsoDate(start) || !isValidIsoDate(end)) return undefined;
|
||||
if (start > end) return undefined;
|
||||
if (PERPLEXITY_RECENCY_VALUES.has(lower)) {
|
||||
return provider === "perplexity" ? lower : RECENCY_TO_FRESHNESS[lower];
|
||||
}
|
||||
|
||||
return `${start}to${end}`;
|
||||
}
|
||||
|
||||
function isValidIsoDate(value: string): boolean {
|
||||
if (!/^\d{4}-\d{2}-\d{2}$/.test(value)) return false;
|
||||
const [year, month, day] = value.split("-").map((part) => Number.parseInt(part, 10));
|
||||
if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) return false;
|
||||
|
||||
const date = new Date(Date.UTC(year, month - 1, day));
|
||||
return (
|
||||
date.getUTCFullYear() === year && date.getUTCMonth() === month - 1 && date.getUTCDate() === day
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveSiteName(url: string | undefined): string | undefined {
|
||||
@ -255,6 +296,10 @@ async function runPerplexitySearchApi(params: {
|
||||
searchDomainFilter?: string[];
|
||||
searchRecencyFilter?: string;
|
||||
searchLanguageFilter?: string[];
|
||||
searchAfterDate?: string;
|
||||
searchBeforeDate?: string;
|
||||
maxTokens?: number;
|
||||
maxTokensPerPage?: number;
|
||||
}): Promise<
|
||||
Array<{ title: string; url: string; description: string; published?: string; siteName?: string }>
|
||||
> {
|
||||
@ -275,6 +320,18 @@ async function runPerplexitySearchApi(params: {
|
||||
if (params.searchLanguageFilter && params.searchLanguageFilter.length > 0) {
|
||||
body.search_language_filter = params.searchLanguageFilter;
|
||||
}
|
||||
if (params.searchAfterDate) {
|
||||
body.search_after_date = params.searchAfterDate;
|
||||
}
|
||||
if (params.searchBeforeDate) {
|
||||
body.search_before_date = params.searchBeforeDate;
|
||||
}
|
||||
if (params.maxTokens !== undefined) {
|
||||
body.max_tokens = params.maxTokens;
|
||||
}
|
||||
if (params.maxTokensPerPage !== undefined) {
|
||||
body.max_tokens_per_page = params.maxTokensPerPage;
|
||||
}
|
||||
|
||||
const res = await fetch(PERPLEXITY_SEARCH_ENDPOINT, {
|
||||
method: "POST",
|
||||
@ -313,17 +370,17 @@ async function runWebSearch(params: {
|
||||
cacheTtlMs: number;
|
||||
provider: (typeof SEARCH_PROVIDERS)[number];
|
||||
country?: string;
|
||||
search_lang?: string;
|
||||
language?: string;
|
||||
ui_lang?: string;
|
||||
freshness?: string;
|
||||
dateAfter?: string;
|
||||
dateBefore?: string;
|
||||
searchDomainFilter?: string[];
|
||||
searchRecencyFilter?: string;
|
||||
searchLanguageFilter?: string[];
|
||||
maxTokens?: number;
|
||||
maxTokensPerPage?: number;
|
||||
}): Promise<Record<string, unknown>> {
|
||||
const cacheKey = normalizeCacheKey(
|
||||
params.provider === "brave"
|
||||
? `${params.provider}:${params.query}:${params.count}:${params.country || "default"}:${params.search_lang || "default"}:${params.ui_lang || "default"}:${params.freshness || "default"}`
|
||||
: `${params.provider}:${params.query}:${params.count}:${params.country || "default"}:${params.searchDomainFilter?.join(",") || "default"}:${params.searchRecencyFilter || "default"}:${params.searchLanguageFilter?.join(",") || "default"}`,
|
||||
`${params.provider}:${params.query}:${params.count}:${params.country || "default"}:${params.language || "default"}:${params.ui_lang || "default"}:${params.freshness || "default"}:${params.dateAfter || "default"}:${params.dateBefore || "default"}:${params.searchDomainFilter?.join(",") || "default"}:${params.maxTokens || "default"}:${params.maxTokensPerPage || "default"}`,
|
||||
);
|
||||
const cached = readCache(SEARCH_CACHE, cacheKey);
|
||||
if (cached) return { ...cached.value, cached: true };
|
||||
@ -338,8 +395,12 @@ async function runWebSearch(params: {
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
country: params.country,
|
||||
searchDomainFilter: params.searchDomainFilter,
|
||||
searchRecencyFilter: params.searchRecencyFilter,
|
||||
searchLanguageFilter: params.searchLanguageFilter,
|
||||
searchRecencyFilter: params.freshness,
|
||||
searchLanguageFilter: params.language ? [params.language] : undefined,
|
||||
searchAfterDate: params.dateAfter ? isoToPerplexityDate(params.dateAfter) : undefined,
|
||||
searchBeforeDate: params.dateBefore ? isoToPerplexityDate(params.dateBefore) : undefined,
|
||||
maxTokens: params.maxTokens,
|
||||
maxTokensPerPage: params.maxTokensPerPage,
|
||||
});
|
||||
|
||||
const payload = {
|
||||
@ -363,14 +424,23 @@ async function runWebSearch(params: {
|
||||
if (params.country) {
|
||||
url.searchParams.set("country", params.country);
|
||||
}
|
||||
if (params.search_lang) {
|
||||
url.searchParams.set("search_lang", params.search_lang);
|
||||
if (params.language) {
|
||||
url.searchParams.set("search_lang", params.language);
|
||||
}
|
||||
if (params.ui_lang) {
|
||||
url.searchParams.set("ui_lang", params.ui_lang);
|
||||
}
|
||||
if (params.freshness) {
|
||||
url.searchParams.set("freshness", params.freshness);
|
||||
} else if (params.dateAfter && params.dateBefore) {
|
||||
url.searchParams.set("freshness", `${params.dateAfter}to${params.dateBefore}`);
|
||||
} else if (params.dateAfter) {
|
||||
url.searchParams.set(
|
||||
"freshness",
|
||||
`${params.dateAfter}to${new Date().toISOString().slice(0, 10)}`,
|
||||
);
|
||||
} else if (params.dateBefore) {
|
||||
url.searchParams.set("freshness", `1970-01-01to${params.dateBefore}`);
|
||||
}
|
||||
|
||||
const res = await fetch(url.toString(), {
|
||||
@ -420,7 +490,7 @@ export function createWebSearchTool(options?: {
|
||||
|
||||
const description =
|
||||
provider === "perplexity"
|
||||
? "Search the web using Perplexity Search API. Returns structured results (title, URL, snippet). Supports recency filter, domain filtering, and language filtering."
|
||||
? "Search the web using Perplexity Search API. Returns structured results (title, URL, snippet). Supports freshness filter, domain filtering, and language filtering."
|
||||
: "Search the web using Brave Search API. Supports region-specific and localized search via country and language parameters. Returns titles, URLs, and snippets for fast research.";
|
||||
|
||||
return {
|
||||
@ -442,32 +512,38 @@ export function createWebSearchTool(options?: {
|
||||
const count =
|
||||
readNumberParam(params, "count", { integer: true }) ?? search?.maxResults ?? undefined;
|
||||
const country = readStringParam(params, "country");
|
||||
const search_lang = readStringParam(params, "search_lang");
|
||||
const language = readStringParam(params, "language");
|
||||
const ui_lang = readStringParam(params, "ui_lang");
|
||||
const rawFreshness = readStringParam(params, "freshness");
|
||||
const freshness = rawFreshness ? normalizeFreshness(rawFreshness) : undefined;
|
||||
const freshness = rawFreshness ? normalizeFreshness(rawFreshness, provider) : undefined;
|
||||
if (rawFreshness && !freshness) {
|
||||
return jsonResult({
|
||||
error: "invalid_freshness",
|
||||
message:
|
||||
"freshness must be one of pd, pw, pm, py, or a range like YYYY-MM-DDtoYYYY-MM-DD.",
|
||||
message: "freshness must be day, week, month, or year.",
|
||||
docs: "https://docs.clawd.bot/tools/web",
|
||||
});
|
||||
}
|
||||
const rawRecency = readStringParam(params, "recency");
|
||||
const recency =
|
||||
rawRecency && PERPLEXITY_RECENCY_VALUES.has(rawRecency.toLowerCase())
|
||||
? rawRecency.toLowerCase()
|
||||
: undefined;
|
||||
if (rawRecency && !recency) {
|
||||
const rawDateAfter = readStringParam(params, "date_after");
|
||||
const dateAfter = rawDateAfter ? normalizeToIsoDate(rawDateAfter) : undefined;
|
||||
if (rawDateAfter && !dateAfter) {
|
||||
return jsonResult({
|
||||
error: "invalid_recency",
|
||||
message: "recency must be one of: day, week, month, year.",
|
||||
error: "invalid_date",
|
||||
message: "date_after must be YYYY-MM-DD format.",
|
||||
docs: "https://docs.clawd.bot/tools/web",
|
||||
});
|
||||
}
|
||||
const rawDateBefore = readStringParam(params, "date_before");
|
||||
const dateBefore = rawDateBefore ? normalizeToIsoDate(rawDateBefore) : undefined;
|
||||
if (rawDateBefore && !dateBefore) {
|
||||
return jsonResult({
|
||||
error: "invalid_date",
|
||||
message: "date_before must be YYYY-MM-DD format.",
|
||||
docs: "https://docs.clawd.bot/tools/web",
|
||||
});
|
||||
}
|
||||
const domainFilter = readStringArrayParam(params, "domain_filter");
|
||||
const languageFilter = readStringArrayParam(params, "language_filter");
|
||||
const maxTokens = readNumberParam(params, "max_tokens", { integer: true });
|
||||
const maxTokensPerPage = readNumberParam(params, "max_tokens_per_page", { integer: true });
|
||||
|
||||
const result = await runWebSearch({
|
||||
query,
|
||||
@ -477,12 +553,14 @@ export function createWebSearchTool(options?: {
|
||||
cacheTtlMs: resolveCacheTtlMs(search?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|
||||
provider,
|
||||
country,
|
||||
search_lang,
|
||||
language,
|
||||
ui_lang,
|
||||
freshness,
|
||||
dateAfter,
|
||||
dateBefore,
|
||||
searchDomainFilter: domainFilter,
|
||||
searchRecencyFilter: recency,
|
||||
searchLanguageFilter: languageFilter,
|
||||
maxTokens: maxTokens ?? undefined,
|
||||
maxTokensPerPage: maxTokensPerPage ?? undefined,
|
||||
});
|
||||
return jsonResult(result);
|
||||
},
|
||||
@ -491,6 +569,9 @@ export function createWebSearchTool(options?: {
|
||||
|
||||
export const __testing = {
|
||||
normalizeFreshness,
|
||||
normalizeToIsoDate,
|
||||
isoToPerplexityDate,
|
||||
SEARCH_CACHE,
|
||||
PERPLEXITY_RECENCY_VALUES,
|
||||
FRESHNESS_TO_RECENCY,
|
||||
RECENCY_TO_FRESHNESS,
|
||||
} as const;
|
||||
|
||||
@ -56,7 +56,7 @@ describe("web_search country and language parameters", () => {
|
||||
expect(url.searchParams.get("country")).toBe("DE");
|
||||
});
|
||||
|
||||
it("should pass search_lang parameter to Brave API", async () => {
|
||||
it("should pass language parameter to Brave API as search_lang", async () => {
|
||||
const mockFetch = vi.fn(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
@ -67,7 +67,7 @@ describe("web_search country and language parameters", () => {
|
||||
global.fetch = mockFetch;
|
||||
|
||||
const tool = createWebSearchTool({ config: undefined, sandboxed: true });
|
||||
await tool?.execute?.(1, { query: "test", search_lang: "de" });
|
||||
await tool?.execute?.(1, { query: "test", language: "de" });
|
||||
|
||||
const url = new URL(mockFetch.mock.calls[0][0] as string);
|
||||
expect(url.searchParams.get("search_lang")).toBe("de");
|
||||
@ -261,7 +261,7 @@ describe("web_search perplexity Search API", () => {
|
||||
expect(authHeader).toBe("Bearer pplx-config");
|
||||
});
|
||||
|
||||
it("passes recency filter to Perplexity Search API", async () => {
|
||||
it("passes freshness filter to Perplexity Search API", async () => {
|
||||
vi.stubEnv("PERPLEXITY_API_KEY", "pplx-test");
|
||||
const mockFetch = vi.fn(() =>
|
||||
Promise.resolve({
|
||||
@ -276,14 +276,14 @@ describe("web_search perplexity Search API", () => {
|
||||
config: { tools: { web: { search: { provider: "perplexity" } } } },
|
||||
sandboxed: true,
|
||||
});
|
||||
await tool?.execute?.(1, { query: "test", recency: "week" });
|
||||
await tool?.execute?.(1, { query: "test", freshness: "week" });
|
||||
|
||||
expect(mockFetch).toHaveBeenCalled();
|
||||
const body = JSON.parse(mockFetch.mock.calls[0]?.[1]?.body as string);
|
||||
expect(body.search_recency_filter).toBe("week");
|
||||
});
|
||||
|
||||
it("accepts all valid recency values", async () => {
|
||||
it("accepts all valid freshness values for Perplexity", async () => {
|
||||
vi.stubEnv("PERPLEXITY_API_KEY", "pplx-test");
|
||||
const mockFetch = vi.fn(() =>
|
||||
Promise.resolve({
|
||||
@ -299,15 +299,15 @@ describe("web_search perplexity Search API", () => {
|
||||
sandboxed: true,
|
||||
});
|
||||
|
||||
for (const recency of ["day", "week", "month", "year"]) {
|
||||
for (const freshness of ["day", "week", "month", "year"]) {
|
||||
webSearchTesting.SEARCH_CACHE.clear();
|
||||
await tool?.execute?.(1, { query: `test-${recency}`, recency });
|
||||
await tool?.execute?.(1, { query: `test-${freshness}`, freshness });
|
||||
const body = JSON.parse(mockFetch.mock.calls.at(-1)?.[1]?.body as string);
|
||||
expect(body.search_recency_filter).toBe(recency);
|
||||
expect(body.search_recency_filter).toBe(freshness);
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects invalid recency values", async () => {
|
||||
it("rejects invalid freshness values", async () => {
|
||||
vi.stubEnv("PERPLEXITY_API_KEY", "pplx-test");
|
||||
const mockFetch = vi.fn(() =>
|
||||
Promise.resolve({
|
||||
@ -322,10 +322,10 @@ describe("web_search perplexity Search API", () => {
|
||||
config: { tools: { web: { search: { provider: "perplexity" } } } },
|
||||
sandboxed: true,
|
||||
});
|
||||
const result = await tool?.execute?.(1, { query: "test", recency: "yesterday" });
|
||||
const result = await tool?.execute?.(1, { query: "test", freshness: "yesterday" });
|
||||
|
||||
expect(mockFetch).not.toHaveBeenCalled();
|
||||
expect(result?.details).toMatchObject({ error: "invalid_recency" });
|
||||
expect(result?.details).toMatchObject({ error: "invalid_freshness" });
|
||||
});
|
||||
|
||||
it("passes domain filter to Perplexity Search API", async () => {
|
||||
@ -378,7 +378,7 @@ describe("web_search perplexity Search API", () => {
|
||||
expect(body.search_domain_filter).toEqual(["-reddit.com", "-pinterest.com"]);
|
||||
});
|
||||
|
||||
it("passes language filter to Perplexity Search API", async () => {
|
||||
it("passes language to Perplexity Search API as search_language_filter array", async () => {
|
||||
vi.stubEnv("PERPLEXITY_API_KEY", "pplx-test");
|
||||
const mockFetch = vi.fn(() =>
|
||||
Promise.resolve({
|
||||
@ -395,12 +395,12 @@ describe("web_search perplexity Search API", () => {
|
||||
});
|
||||
await tool?.execute?.(1, {
|
||||
query: "test",
|
||||
language_filter: ["en", "de", "fr"],
|
||||
language: "en",
|
||||
});
|
||||
|
||||
expect(mockFetch).toHaveBeenCalled();
|
||||
const body = JSON.parse(mockFetch.mock.calls[0]?.[1]?.body as string);
|
||||
expect(body.search_language_filter).toEqual(["en", "de", "fr"]);
|
||||
expect(body.search_language_filter).toEqual(["en"]);
|
||||
});
|
||||
|
||||
it("passes multiple filters together to Perplexity Search API", async () => {
|
||||
@ -421,9 +421,9 @@ describe("web_search perplexity Search API", () => {
|
||||
await tool?.execute?.(1, {
|
||||
query: "climate research",
|
||||
country: "US",
|
||||
recency: "month",
|
||||
freshness: "month",
|
||||
domain_filter: ["nature.com", ".gov"],
|
||||
language_filter: ["en"],
|
||||
language: "en",
|
||||
});
|
||||
|
||||
expect(mockFetch).toHaveBeenCalled();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user