diff --git a/.env.example b/.env.example index 292f42a44..d0eb54346 100644 --- a/.env.example +++ b/.env.example @@ -106,6 +106,8 @@ VIDEO_SORA_BASE_URL= # --- Web Search --------------------------------------------------------------- TAVILY_API_KEY= +BRAVE_API_KEY= +BAIDU_API_KEY= # --- Proxy (optional) -------------------------------------------------------- diff --git a/app/api/web-search/route.ts b/app/api/web-search/route.ts index f2ff627f4..064baf971 100644 --- a/app/api/web-search/route.ts +++ b/app/api/web-search/route.ts @@ -2,38 +2,74 @@ * Web Search API * * POST /api/web-search - * Simple JSON request/response using Tavily search. + * Simple JSON request/response supporting multiple search providers. */ -import { searchWithTavily, formatSearchResultsAsContext } from '@/lib/web-search/tavily'; +import { searchWithTavily } from '@/lib/web-search/tavily'; +import { searchWithBrave } from '@/lib/web-search/brave'; +import { searchWithBaidu } from '@/lib/web-search/baidu'; +import { formatSearchResultsAsContext } from '@/lib/web-search/tavily'; import { resolveWebSearchApiKey } from '@/lib/server/provider-config'; import { createLogger } from '@/lib/logger'; import { apiError, apiSuccess } from '@/lib/server/api-response'; +import type { WebSearchProviderId } from '@/lib/web-search/types'; const log = createLogger('WebSearch'); export async function POST(req: Request) { try { const body = await req.json(); - const { query, apiKey: clientApiKey } = body as { + const { + query, + apiKey: clientApiKey, + provider = 'tavily', + baiduSubSources, + } = body as { query?: string; apiKey?: string; + provider?: WebSearchProviderId; + baiduSubSources?: { webSearch?: boolean; baike?: boolean; scholar?: boolean }; }; if (!query || !query.trim()) { return apiError('MISSING_REQUIRED_FIELD', 400, 'query is required'); } - const apiKey = resolveWebSearchApiKey(clientApiKey); - if (!apiKey) { + // Brave Search doesn't require an API key + const needsApiKey = provider !== 'brave'; + const apiKey = needsApiKey ? resolveWebSearchApiKey(provider, clientApiKey) : ''; + + if (needsApiKey && !apiKey) { + const providerNames: Record = { + tavily: 'Tavily', + baidu: 'Baidu', + }; + const name = providerNames[provider] || provider; return apiError( 'MISSING_API_KEY', 400, - 'Tavily API key is not configured. Set it in Settings → Web Search or set TAVILY_API_KEY env var.', + `${name} API key is not configured. Set it in Settings → Web Search or set the corresponding env var.`, ); } - const result = await searchWithTavily({ query: query.trim(), apiKey }); + let result; + switch (provider) { + case 'brave': + result = await searchWithBrave({ query: query.trim() }); + break; + case 'baidu': + result = await searchWithBaidu({ + query: query.trim(), + apiKey, + subSources: baiduSubSources, + }); + break; + case 'tavily': + default: + result = await searchWithTavily({ query: query.trim(), apiKey }); + break; + } + const context = formatSearchResultsAsContext(result); return apiSuccess({ diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index 213a51409..46b310549 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -312,6 +312,9 @@ function GenerationPreviewContent() { body: JSON.stringify({ query: currentSession.requirements.requirement, apiKey: wsApiKey || undefined, + provider: wsSettings.webSearchProviderId, + baiduSubSources: + wsSettings.webSearchProviderId === 'baidu' ? wsSettings.baiduSubSources : undefined, }), signal, }); diff --git a/app/page.tsx b/app/page.tsx index 80dfbd850..e11a48cbb 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -32,6 +32,7 @@ import { nanoid } from 'nanoid'; import { storePdfBlob } from '@/lib/utils/image-storage'; import type { UserRequirements } from '@/lib/types/generation'; import { useSettingsStore } from '@/lib/store/settings'; +import { WEB_SEARCH_PROVIDERS } from '@/lib/web-search/constants'; import { useUserProfileStore, AVATAR_OPTIONS } from '@/lib/store/user-profile'; import { StageListItem, @@ -247,6 +248,20 @@ function HomePage() { return; } + // Block if web search is enabled but the selected provider is unusable + if (form.webSearch) { + const settings = useSettingsStore.getState(); + const wsProvider = WEB_SEARCH_PROVIDERS[settings.webSearchProviderId]; + const wsCfg = settings.webSearchProvidersConfig[settings.webSearchProviderId]; + const isUsable = + wsProvider && + (!wsProvider.requiresApiKey || !!wsCfg?.apiKey || !!wsCfg?.isServerConfigured); + if (!isUsable) { + toast.warning(t('toolbar.webSearchProviderUnavailable')); + return; + } + } + setError(null); try { diff --git a/components/generation/generation-toolbar.tsx b/components/generation/generation-toolbar.tsx index 27301bbd8..4f632124c 100644 --- a/components/generation/generation-toolbar.tsx +++ b/components/generation/generation-toolbar.tsx @@ -1,6 +1,7 @@ 'use client'; import { useState, useRef, useMemo } from 'react'; +import { toast } from 'sonner'; import { Bot, Check, ChevronLeft, Globe, Paperclip, FileText, X, Globe2 } from 'lucide-react'; import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { @@ -64,14 +65,19 @@ export function GenerationToolbar({ const fileInputRef = useRef(null); const [isDragging, setIsDragging] = useState(false); - // Check if the selected web search provider has a valid config (API key or server-configured) - const webSearchProvider = WEB_SEARCH_PROVIDERS[webSearchProviderId]; - const webSearchConfig = webSearchProvidersConfig[webSearchProviderId]; - const webSearchAvailable = webSearchProvider - ? !webSearchProvider.requiresApiKey || - !!webSearchConfig?.apiKey || - !!webSearchConfig?.isServerConfigured - : false; + // Check if ANY web search provider is usable (has API key, server-configured, or doesn't need key) + const webSearchAvailable = Object.values(WEB_SEARCH_PROVIDERS).some((provider) => { + const cfg = webSearchProvidersConfig[provider.id]; + return !provider.requiresApiKey || !!cfg?.apiKey || !!cfg?.isServerConfigured; + }); + + // Check if the *selected* provider can actually execute a search + const isSelectedProviderUsable = (() => { + const provider = WEB_SEARCH_PROVIDERS[webSearchProviderId]; + if (!provider) return false; + const cfg = webSearchProvidersConfig[provider.id]; + return !provider.requiresApiKey || !!cfg?.apiKey || !!cfg?.isServerConfigured; + })(); // Configured LLM providers (only those with valid credentials + models + endpoint) const configuredProviders = providersConfig @@ -276,7 +282,15 @@ export function GenerationToolbar({ - -

{t('settings.webSearchApiKeyHint')}

- + {!provider.requiresApiKey && !isServerConfigured && ( +
+ {t('settings.webSearchNoApiKeyNeeded')} +
+ )} -
- +
+ {showApiKeyInput && ( +
+ +
setWebSearchProviderConfig(selectedProviderId, { - baseUrl: e.target.value, + apiKey: e.target.value, }) } - className="text-sm" + className="font-mono text-sm pr-10" /> +
+

{t('settings.webSearchApiKeyHint')}

+ )} + +
+ + + setWebSearchProviderConfig(selectedProviderId, { + baseUrl: e.target.value, + }) + } + className="text-sm" + /> +
+
- {/* Request URL Preview */} - {(() => { - const effectiveBaseUrl = - webSearchProvidersConfig[selectedProviderId]?.baseUrl || - provider.defaultBaseUrl || - ''; - if (!effectiveBaseUrl) return null; - const fullUrl = effectiveBaseUrl + '/search'; - return ( -

- {t('settings.requestUrl')}: {fullUrl} -

- ); - })()} - + {effectiveBaseUrl && ( +

+ {t('settings.requestUrl')}: {effectiveBaseUrl}/search +

+ )} + + {selectedProviderId === 'baidu' && ( +
+ +
+ {( + Object.entries(BAIDU_SUB_SOURCES) as [ + keyof BaiduSubSources, + (typeof BAIDU_SUB_SOURCES)[keyof typeof BAIDU_SUB_SOURCES], + ][] + ).map(([key, meta]) => { + const enabled = baiduSubSources?.[key] ?? true; + return ( +
+ + {meta.label[locale]} + + setBaiduSubSources({ [key]: checked })} + className="scale-[0.85] origin-right" + /> +
+ ); + })} +
+

+ {locale === 'zh-CN' + ? '选择百度搜索时使用的数据源,至少启用一个' + : 'Choose which Baidu data sources to query. Enable at least one.'} +

+
)}
); diff --git a/lib/i18n/common.ts b/lib/i18n/common.ts index 1bceb5d61..a78a9a318 100644 --- a/lib/i18n/common.ts +++ b/lib/i18n/common.ts @@ -19,6 +19,7 @@ export const commonZhCN = { webSearchDesc: '生成前搜索网络获取最新资料,让内容更丰富准确', webSearchProvider: '搜索引擎', webSearchNoProvider: '请在设置中配置搜索引擎 API Key', + webSearchProviderUnavailable: '当前搜索引擎不可用,请先切换到可用的搜索引擎', selectProvider: '选择模型服务商', configureProvider: '配置模型', configureProviderHint: '请先配置至少一个模型服务商才能生成课程', @@ -60,6 +61,8 @@ export const commonEnUS = { webSearchDesc: 'Search the web for up-to-date information before generation', webSearchProvider: 'Search engine', webSearchNoProvider: 'Configure search API key in Settings', + webSearchProviderUnavailable: + 'Selected search provider is unavailable. Please switch to an available one.', selectProvider: 'Select provider', configureProvider: 'Set up model', configureProviderHint: 'Configure at least one model provider to generate courses', diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts index 1e6918263..c726d31cf 100644 --- a/lib/i18n/settings.ts +++ b/lib/i18n/settings.ts @@ -543,12 +543,13 @@ export const settingsZhCN = { clearCacheFailed: '清空缓存失败,请重试', // Web Search settings webSearchSettings: '网络搜索', - webSearchApiKey: 'Tavily API Key', - webSearchApiKeyPlaceholder: '输入你的 Tavily API Key', + webSearchApiKey: 'API Key', + webSearchApiKeyPlaceholder: '输入搜索引擎的 API Key', webSearchApiKeyPlaceholderServer: '已配置服务端密钥,可选填覆盖', - webSearchApiKeyHint: '从 tavily.com 获取 API Key,用于网络搜索', + webSearchApiKeyHint: '输入搜索引擎的 API Key,用于网络搜索', webSearchBaseUrl: 'Base URL', - webSearchServerConfigured: '服务端已配置 Tavily API Key', + webSearchServerConfigured: '服务端已配置搜索引擎 API Key', + webSearchNoApiKeyNeeded: '此搜索引擎无需 API Key,免费使用', optional: '可选', }, profile: { @@ -1134,12 +1135,13 @@ export const settingsEnUS = { clearCacheFailed: 'Failed to clear cache, please try again', // Web Search settings webSearchSettings: 'Web Search', - webSearchApiKey: 'Tavily API Key', - webSearchApiKeyPlaceholder: 'Enter your Tavily API Key', + webSearchApiKey: 'API Key', + webSearchApiKeyPlaceholder: 'Enter your search provider API Key', webSearchApiKeyPlaceholderServer: 'Server key configured, optionally override', - webSearchApiKeyHint: 'Get an API key from tavily.com for web search', + webSearchApiKeyHint: 'Enter an API key for the selected search provider', webSearchBaseUrl: 'Base URL', - webSearchServerConfigured: 'Server-side Tavily API key is configured', + webSearchServerConfigured: 'Server-side search API key is configured', + webSearchNoApiKeyNeeded: 'This search provider is free and requires no API key', optional: 'Optional', }, profile: { diff --git a/lib/server/provider-config.ts b/lib/server/provider-config.ts index b1e0dd47b..b0994b65a 100644 --- a/lib/server/provider-config.ts +++ b/lib/server/provider-config.ts @@ -82,6 +82,8 @@ const VIDEO_ENV_MAP: Record = { const WEB_SEARCH_ENV_MAP: Record = { TAVILY: 'tavily', + BRAVE: 'brave', + BAIDU: 'baidu', }; // --------------------------------------------------------------------------- @@ -374,7 +376,7 @@ export function resolveVideoBaseUrl( } // --------------------------------------------------------------------------- -// Public API — Web Search (Tavily) +// Public API — Web Search // --------------------------------------------------------------------------- /** Returns server-configured web search providers (no apiKeys exposed) */ @@ -388,10 +390,17 @@ export function getServerWebSearchProviders(): Record server key > TAVILY_API_KEY env > empty */ -export function resolveWebSearchApiKey(clientKey?: string): string { +/** Provider-specific env var fallbacks for web search API keys */ +const WEB_SEARCH_ENV_FALLBACKS: Record = { + tavily: 'TAVILY_API_KEY', + baidu: 'BAIDU_API_KEY', +}; + +/** Resolve web search API key: client key > server key > env var > empty */ +export function resolveWebSearchApiKey(providerId: string = 'tavily', clientKey?: string): string { if (clientKey) return clientKey; - const serverKey = getConfig().webSearch.tavily?.apiKey; + const serverKey = getConfig().webSearch[providerId]?.apiKey; if (serverKey) return serverKey; - return process.env.TAVILY_API_KEY || ''; + const envVar = WEB_SEARCH_ENV_FALLBACKS[providerId]; + return (envVar && process.env[envVar]) || ''; } diff --git a/lib/store/settings.ts b/lib/store/settings.ts index 8d68448b4..486b0e7cb 100644 --- a/lib/store/settings.ts +++ b/lib/store/settings.ts @@ -14,7 +14,7 @@ import type { PDFProviderId } from '@/lib/pdf/types'; import type { ImageProviderId, VideoProviderId } from '@/lib/media/types'; import { IMAGE_PROVIDERS } from '@/lib/media/image-providers'; import { VIDEO_PROVIDERS } from '@/lib/media/video-providers'; -import type { WebSearchProviderId } from '@/lib/web-search/types'; +import type { WebSearchProviderId, BaiduSubSources } from '@/lib/web-search/types'; import { createLogger } from '@/lib/logger'; const log = createLogger('Settings'); @@ -123,6 +123,7 @@ export interface SettingsState { serverBaseUrl?: string; } >; + baiduSubSources: BaiduSubSources; // Global TTS/ASR toggles ttsEnabled: boolean; @@ -227,6 +228,7 @@ export interface SettingsState { providerId: WebSearchProviderId, config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>, ) => void; + setBaiduSubSources: (sources: Partial) => void; // Server provider actions fetchServerProviders: () => Promise; @@ -310,7 +312,14 @@ const getDefaultWebSearchConfig = () => ({ webSearchProviderId: 'tavily' as WebSearchProviderId, webSearchProvidersConfig: { tavily: { apiKey: '', baseUrl: '', enabled: true }, + brave: { apiKey: '', baseUrl: '', enabled: true }, + baidu: { apiKey: '', baseUrl: '', enabled: false }, } as Record, + baiduSubSources: { + webSearch: true, + baike: true, + scholar: true, + } as BaiduSubSources, }); /** @@ -627,6 +636,14 @@ export const useSettingsStore = create()( }, })), + setBaiduSubSources: (sources) => + set((state) => ({ + baiduSubSources: { + ...state.baiduSubSources, + ...sources, + }, + })), + // Fetch server-configured providers and merge into local state fetchServerProviders: async () => { try { diff --git a/lib/web-search/baidu.ts b/lib/web-search/baidu.ts new file mode 100644 index 000000000..3bb7cd3ed --- /dev/null +++ b/lib/web-search/baidu.ts @@ -0,0 +1,232 @@ +/** + * Baidu Unified Search Integration + * + * Aggregates search results from three Baidu services: + * 1. Web Search — Qianfan AI Search API (general web results) + * 2. Baike — Baidu Baike API (encyclopedia entries) + * 3. Scholar — Baidu Scholar API (academic papers) + * + * All share the same BAIDU_API_KEY (Bearer token). + */ + +import { proxyFetch } from '@/lib/server/proxy-fetch'; +import type { WebSearchResult, WebSearchSource } from '@/lib/types/web-search'; +import { createLogger } from '@/lib/logger'; +import { normalizeWebSearchQuery } from './utils'; + +const log = createLogger('BaiduSearch'); + +// ── API Endpoints ────────────────────────────────────────────── +const BAIDU_WEB_SEARCH_URL = 'https://qianfan.baidubce.com/v2/ai_search/web_search'; +const BAIDU_BAIKE_URL = 'https://appbuilder.baidu.com/v2/baike/lemma/get_content'; +const BAIDU_SCHOLAR_URL = 'https://qianfan.baidubce.com/v2/tools/baidu_scholar/search'; + +// ── Common Auth Headers ──────────────────────────────────────── +function baiduHeaders(apiKey: string): Record { + return { + Authorization: `Bearer ${apiKey}`, + 'X-Appbuilder-From': 'openclaw', + 'Content-Type': 'application/json', + }; +} + +// ── Response Types ───────────────────────────────────────────── +interface BaiduWebReference { + title?: string; + url?: string; + site_name?: string; + content?: string; +} + +interface BaiduWebResponse { + code?: number; + message?: string; + references?: BaiduWebReference[]; +} + +interface BaiduBaikeResult { + lemma_title?: string; + lemma_desc?: string; + lemma_url?: string; + abstract_text?: string; +} + +interface BaiduBaikeResponse { + errno?: number; + errmsg?: string; + result?: BaiduBaikeResult; +} + +interface BaiduScholarPaper { + title?: string; + abstract?: string; + url?: string; + publishYear?: string; + keyword?: string; +} + +interface BaiduScholarResponse { + code?: number; + message?: string; + results?: BaiduScholarPaper[]; +} + +// ── Individual Source Fetchers ────────────────────────────────── + +/** 1. Baidu Web Search */ +async function fetchWebSearch( + query: string, + apiKey: string, + maxResults: number, +): Promise { + try { + const res = await proxyFetch(BAIDU_WEB_SEARCH_URL, { + method: 'POST', + headers: baiduHeaders(apiKey), + body: JSON.stringify({ + messages: [{ content: query, role: 'user' }], + search_source: 'baidu_search_v2', + resource_type_filter: [{ type: 'web', top_k: maxResults }], + }), + }); + + if (!res.ok) { + const errText = await res.text().catch(() => ''); + log.warn(`[Baidu Web] HTTP ${res.status}: ${errText}`); + return []; + } + + const data = (await res.json()) as BaiduWebResponse; + if (data.code) { + log.warn(`[Baidu Web] API error ${data.code}: ${data.message}`); + return []; + } + + return (data.references || []).map((ref, i) => ({ + title: ref.title || ref.site_name || '', + url: ref.url || '', + content: ref.content || '', + score: 0.9 - i * 0.05, + })); + } catch (err) { + log.warn('[Baidu Web] Failed:', err); + return []; + } +} + +/** 2. Baidu Baike (encyclopedia) */ +async function fetchBaike(query: string, apiKey: string): Promise { + try { + const url = `${BAIDU_BAIKE_URL}?search_type=lemmaTitle&search_key=${encodeURIComponent(query)}`; + const res = await proxyFetch(url, { + method: 'GET', + headers: baiduHeaders(apiKey), + }); + + if (!res.ok) return []; + + const data = (await res.json()) as BaiduBaikeResponse; + if (data.errno && data.errno !== 0) return []; + if (!data.result) return []; + + const r = data.result; + return [ + { + title: `📖 ${r.lemma_title || query} — 百度百科`, + url: r.lemma_url || `https://baike.baidu.com/item/${encodeURIComponent(query)}`, + content: r.abstract_text || r.lemma_desc || '', + score: 0.95, // High relevance — authoritative source + }, + ]; + } catch (err) { + log.warn('[Baidu Baike] Failed:', err); + return []; + } +} + +/** 3. Baidu Scholar (academic papers) */ +async function fetchScholar( + query: string, + apiKey: string, + maxResults: number, +): Promise { + try { + const url = `${BAIDU_SCHOLAR_URL}?wd=${encodeURIComponent(query)}&pageNum=0&enable_abstract=true`; + const res = await proxyFetch(url, { + method: 'GET', + headers: baiduHeaders(apiKey), + }); + + if (!res.ok) return []; + + const data = (await res.json()) as BaiduScholarResponse; + if (data.code) return []; + + return (data.results || []).slice(0, maxResults).map((paper, i) => ({ + title: `🔬 ${paper.title || ''}`, + url: paper.url || '', + content: [ + paper.abstract || '', + paper.publishYear ? `(${paper.publishYear})` : '', + paper.keyword ? `Keywords: ${paper.keyword}` : '', + ] + .filter(Boolean) + .join(' '), + score: 0.85 - i * 0.05, + })); + } catch (err) { + log.warn('[Baidu Scholar] Failed:', err); + return []; + } +} + +// ── Main Entry Point ─────────────────────────────────────────── + +/** + * Unified Baidu Search — aggregates results from Web, Baike, and Scholar. + * Sources are fetched in parallel for speed. Individual failures are + * swallowed so the overall search still returns partial results. + */ +export async function searchWithBaidu(params: { + query: string; + apiKey: string; + maxResults?: number; + subSources?: { webSearch?: boolean; baike?: boolean; scholar?: boolean }; +}): Promise { + const { query: rawQuery, apiKey, maxResults = 10 } = params; + const query = normalizeWebSearchQuery(rawQuery); + const sub = { + webSearch: params.subSources?.webSearch ?? true, + baike: params.subSources?.baike ?? true, + scholar: params.subSources?.scholar ?? true, + }; + + const startTime = Date.now(); + + // Fire enabled sources in parallel + const [webResults, baikeResults, scholarResults] = await Promise.all([ + sub.webSearch ? fetchWebSearch(query, apiKey, maxResults) : Promise.resolve([]), + sub.baike ? fetchBaike(query, apiKey) : Promise.resolve([]), + sub.scholar ? fetchScholar(query, apiKey, 3) : Promise.resolve([]), + ]); + + const responseTime = (Date.now() - startTime) / 1000; + + // Merge: Baike first (authoritative), then web, then scholar + const allSources = [...baikeResults, ...webResults, ...scholarResults]; + + // De-duplicate by URL + const seen = new Set(); + const sources = allSources.filter((s) => { + if (!s.url || seen.has(s.url)) return false; + seen.add(s.url); + return true; + }); + + return { + answer: '', + sources, + query, + responseTime, + }; +} diff --git a/lib/web-search/brave.ts b/lib/web-search/brave.ts new file mode 100644 index 000000000..eb8c8f6a9 --- /dev/null +++ b/lib/web-search/brave.ts @@ -0,0 +1,123 @@ +/** + * Brave Web Search Integration + * + * Scrapes Brave Search public page via proxyFetch. + * No API key required — uses the public search endpoint. + */ + +import { proxyFetch } from '@/lib/server/proxy-fetch'; +import type { WebSearchResult, WebSearchSource } from '@/lib/types/web-search'; +import { normalizeWebSearchQuery } from './utils'; + +const BRAVE_SEARCH_URL = 'https://search.brave.com/search'; + +/** + * Browser-like headers to avoid being blocked by Brave. + */ +const BRAVE_HEADERS: Record = { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.9', + 'sec-ch-ua': '"Chromium";v="142", "Google Chrome";v="142", "Not_A Brand";v="99"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'none', + 'sec-fetch-user': '?1', +}; + +/** + * Parse Brave Search HTML response and extract search results. + * Uses regex-based parsing to avoid JSDOM dependency. + */ +function parseBraveResults(html: string, maxResults: number): WebSearchSource[] { + const results: WebSearchSource[] = []; + + // Match each search result snippet block with data-type="web" + // Actual HTML:
+ const snippetRegex = + /]*class="snippet[^"]*"[^>]*data-type="web"[^>]*>([\s\S]*?)(?=]*class="snippet[^"]*"[^>]*data-type="web"|]*href="([^"]+)"[^>]*>/); + if (!linkMatch || linkMatch[1].includes('brave.com')) continue; + const url = linkMatch[1]; + + // Extract title — actual class: "title search-snippet-title line-clamp-1 svelte-xxx" + const titleMatch = block.match( + /]*class="[^"]*search-snippet-title[^"]*"[^>]*>([^<]+)<\/span>/, + ); + const title = titleMatch ? titleMatch[1].trim() : ''; + if (!title) continue; + + // Extract snippet/description content + // Try generic-snippet div first (current Brave structure), + // then fall back to snippet-description p tag (legacy) + let content = ''; + const genericMatch = block.match( + /]*class="[^"]*generic-snippet[^"]*"[^>]*>([\s\S]*?)<\/div>/, + ); + const descMatch = block.match( + /]*class="[^"]*snippet-description[^"]*"[^>]*>([\s\S]*?)<\/p>/, + ); + const rawDesc = genericMatch?.[1] || descMatch?.[1] || ''; + if (rawDesc) { + content = rawDesc.replace(/<[^>]+>/g, '').trim(); + // Remove date prefix if present (e.g., "2 days ago -" or "Jan 1, 2026 - ") + content = content.replace(/^\d+ \w+ ago\s*[-—]\s*/, ''); + content = content.replace(/^[A-Z][a-z]+ \d+, \d{4}\s*[-—]\s*/, ''); + } + + results.push({ + title, + url, + content, + score: 1 - results.length * 0.1, // Position-based score: 1.0, 0.9, 0.8, ... + }); + } + + return results; +} + +/** + * Search the web using Brave Search (public page scraping) and return structured results. + */ +export async function searchWithBrave(params: { + query: string; + maxResults?: number; +}): Promise { + const { query: rawQuery, maxResults = 5 } = params; + const query = normalizeWebSearchQuery(rawQuery); + + const url = `${BRAVE_SEARCH_URL}?q=${encodeURIComponent(query)}`; + + const startTime = Date.now(); + const res = await proxyFetch(url, { + method: 'GET', + headers: BRAVE_HEADERS, + }); + + if (!res.ok) { + const errorText = await res.text().catch(() => ''); + throw new Error(`Brave Search error (${res.status}): ${errorText || res.statusText}`); + } + + const html = await res.text(); + const responseTime = (Date.now() - startTime) / 1000; + + const sources = parseBraveResults(html, maxResults); + + return { + answer: '', // Brave Search doesn't provide a direct answer + sources, + query, + responseTime, + }; +} diff --git a/lib/web-search/constants.ts b/lib/web-search/constants.ts index 6542bbb2a..47f756810 100644 --- a/lib/web-search/constants.ts +++ b/lib/web-search/constants.ts @@ -13,9 +13,33 @@ export const WEB_SEARCH_PROVIDERS: Record { const { query, apiKey, maxResults = 5 } = params; - // Tavily rejects queries over 400 characters with a 400 error - const truncatedQuery = query.slice(0, TAVILY_MAX_QUERY_LENGTH); + const truncatedQuery = normalizeWebSearchQuery(query); const res = await proxyFetch(TAVILY_API_URL, { method: 'POST', diff --git a/lib/web-search/types.ts b/lib/web-search/types.ts index f83822c7c..0d4113de1 100644 --- a/lib/web-search/types.ts +++ b/lib/web-search/types.ts @@ -5,7 +5,16 @@ /** * Web Search Provider IDs */ -export type WebSearchProviderId = 'tavily'; +export type WebSearchProviderId = 'tavily' | 'brave' | 'baidu'; + +/** + * Baidu sub-source toggles + */ +export interface BaiduSubSources { + webSearch: boolean; + baike: boolean; + scholar: boolean; +} /** * Web Search Provider Configuration diff --git a/lib/web-search/utils.ts b/lib/web-search/utils.ts new file mode 100644 index 000000000..9e61c5253 --- /dev/null +++ b/lib/web-search/utils.ts @@ -0,0 +1,17 @@ +/** + * Shared utilities for web search providers. + */ + +/** Maximum query length accepted by web search providers. */ +export const MAX_WEB_SEARCH_QUERY_LENGTH = 400; + +/** + * Normalize a web search query: trim whitespace and truncate to + * {@link MAX_WEB_SEARCH_QUERY_LENGTH} characters. + * + * Every provider should call this before using the query to ensure + * consistent behaviour regardless of the call-site. + */ +export function normalizeWebSearchQuery(query: string): string { + return query.trim().slice(0, MAX_WEB_SEARCH_QUERY_LENGTH); +} diff --git a/public/icons/baidu.png b/public/icons/baidu.png new file mode 100644 index 000000000..a8b50a364 Binary files /dev/null and b/public/icons/baidu.png differ diff --git a/public/icons/brave.png b/public/icons/brave.png new file mode 100644 index 000000000..8e384d90f Binary files /dev/null and b/public/icons/brave.png differ diff --git a/public/icons/tavily.jpg b/public/icons/tavily.jpg new file mode 100644 index 000000000..eda503126 Binary files /dev/null and b/public/icons/tavily.jpg differ