main
1/**
2 * Search backends for multi-backend web search.
3 *
4 * Each backend implements the SearchBackend interface.
5 * Backends are tried in priority order with automatic fallback.
6 */
7
8export interface SearchResult {
9 title: string;
10 url: string;
11 snippet: string;
12}
13
14export interface SearchBackend {
15 name: string;
16 search(query: string, maxResults: number, signal?: AbortSignal, engines?: string): Promise<SearchResult[]>;
17 isAvailable(): Promise<boolean>;
18}
19
20/**
21 * Brave Search API backend - uses the Brave Search API with an API key.
22 * Primary backend when BRAVE_API_KEY is set.
23 * Free tier: 2,000 queries/month. See https://brave.com/search/api/
24 */
25export class BraveAPIBackend implements SearchBackend {
26 name = "Brave API";
27 private apiKey: string;
28
29 constructor(apiKey: string) {
30 this.apiKey = apiKey;
31 }
32
33 async isAvailable(): Promise<boolean> {
34 return !!this.apiKey;
35 }
36
37 async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
38 const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${maxResults}`;
39 const response = await fetch(url, {
40 headers: {
41 "Accept": "application/json",
42 "Accept-Encoding": "gzip",
43 "X-Subscription-Token": this.apiKey,
44 },
45 signal,
46 });
47
48 if (!response.ok) {
49 throw new Error(`Brave API request failed with status ${response.status}`);
50 }
51
52 const data = await response.json();
53 const results: SearchResult[] = (data.web?.results || []).slice(0, maxResults).map((r: any) => ({
54 title: r.title || "(no title)",
55 url: r.url || "",
56 snippet: r.description || "",
57 }));
58
59 return results;
60 }
61}
62
63/**
64 * SearXNG backend - queries a self-hosted SearXNG instance via JSON API.
65 * Fallback backend for unlimited private searches.
66 */
67export class SearXNGBackend implements SearchBackend {
68 name = "SearXNG";
69 private baseUrl: string;
70
71 constructor(baseUrl: string) {
72 this.baseUrl = baseUrl.replace(/\/$/, "");
73 }
74
75 async isAvailable(): Promise<boolean> {
76 try {
77 const response = await fetch(`${this.baseUrl}/search?q=test&format=json`, {
78 signal: AbortSignal.timeout(5000),
79 });
80 return response.ok;
81 } catch {
82 return false;
83 }
84 }
85
86 async search(query: string, maxResults: number, signal?: AbortSignal, engines?: string): Promise<SearchResult[]> {
87 let url = `${this.baseUrl}/search?q=${encodeURIComponent(query)}&format=json`;
88 if (engines) {
89 url += `&engines=${encodeURIComponent(engines)}`;
90 }
91 const response = await fetch(url, { signal });
92
93 if (!response.ok) {
94 throw new Error(`SearXNG request failed with status ${response.status}`);
95 }
96
97 const data = await response.json();
98 const results: SearchResult[] = (data.results || []).slice(0, maxResults).map((r: any) => ({
99 title: r.title || "(no title)",
100 url: r.url || "",
101 snippet: r.content || "",
102 }));
103
104 return results;
105 }
106}
107
108/**
109 * DuckDuckGo backend - uses the DuckDuckGo Instant Answer API.
110 * Fallback backend, no API key required.
111 *
112 * Note: The DDG Instant Answer API returns limited results (abstracts + related topics).
113 * For better results, consider using ddgr CLI if available.
114 */
115export class DuckDuckGoBackend implements SearchBackend {
116 name = "DuckDuckGo";
117
118 async isAvailable(): Promise<boolean> {
119 return true; // Always available, no API key needed
120 }
121
122 async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
123 const url = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1`;
124 const response = await fetch(url, { signal });
125
126 if (!response.ok) {
127 throw new Error(`DuckDuckGo API request failed with status ${response.status}`);
128 }
129
130 const data = await response.json();
131 const results: SearchResult[] = [];
132
133 // Add abstract if available
134 if (data.AbstractText && data.AbstractURL) {
135 results.push({
136 title: data.Heading || "Abstract",
137 url: data.AbstractURL,
138 snippet: data.AbstractText,
139 });
140 }
141
142 // Add related topics
143 for (const topic of (data.RelatedTopics || [])) {
144 if (results.length >= maxResults) break;
145 if (topic.Text && topic.FirstURL) {
146 results.push({
147 title: topic.Text.split(" - ")[0] || topic.Text,
148 url: topic.FirstURL,
149 snippet: topic.Text,
150 });
151 }
152 }
153
154 return results;
155 }
156}
157
158/**
159 * ddgr CLI backend - uses the ddgr command-line tool for DuckDuckGo searches.
160 * Provides much better results than the DDG Instant Answer API.
161 * Requires ddgr to be installed.
162 */
163export class DdgrBackend implements SearchBackend {
164 name = "ddgr";
165 private execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>;
166
167 constructor(execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>) {
168 this.execFn = execFn;
169 }
170
171 async isAvailable(): Promise<boolean> {
172 try {
173 const result = await this.execFn("which", ["ddgr"]);
174 return result.code === 0;
175 } catch {
176 return false;
177 }
178 }
179
180 async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
181 const result = await this.execFn("ddgr", ["--json", "-n", String(maxResults), query], { signal });
182
183 if (result.code !== 0) {
184 throw new Error(`ddgr failed: ${result.stderr}`);
185 }
186
187 const data = JSON.parse(result.stdout);
188 return (data || []).map((r: any) => ({
189 title: r.title || "(no title)",
190 url: r.url || "",
191 snippet: r.abstract || "",
192 }));
193 }
194}
195
196/**
197 * Playwright browser search backend - uses headless Chrome via a Python script.
198 * Fallback when API-based backends fail due to rate limiting or bot detection.
199 * Supports Bing (reliable, no CAPTCHA) and Brave Search (rate-limited after a few queries).
200 *
201 * Requirements:
202 * - Python 3 with playwright package (auto-bootstrapped in ~/.local/share/pi/playwright-env)
203 * - System Chrome (google-chrome-stable) or Playwright's bundled Chromium
204 */
205export class PlaywrightBackend implements SearchBackend {
206 name: string;
207 private engine: string;
208 private execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>;
209 private scriptPath: string;
210 private venvPath: string;
211
212 constructor(
213 engine: "bing" | "brave" | "mojeek" | "ecosia",
214 execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>,
215 scriptDir: string,
216 ) {
217 this.engine = engine;
218 this.name = `Playwright/${engine}`;
219 this.execFn = execFn;
220 this.scriptPath = `${scriptDir}/browser-search.py`;
221 this.venvPath = `${process.env.HOME}/.local/share/pi/playwright-env`;
222 }
223
224 /**
225 * Ensure the Python venv with playwright exists.
226 * Creates it on first use, reuses on subsequent calls.
227 */
228 private async ensureVenv(): Promise<string> {
229 const pythonPath = `${this.venvPath}/bin/python3`;
230
231 // Check if venv already exists and has playwright
232 try {
233 const check = await this.execFn(pythonPath, ["-c", "import playwright"], { timeout: 5000 });
234 if (check.code === 0) return pythonPath;
235 } catch {
236 // venv doesn't exist or is broken, create it
237 }
238
239 // Create venv and install playwright
240 const uv = await this.execFn("which", ["uv"]);
241 if (uv.code !== 0) {
242 throw new Error("uv not found — required to bootstrap playwright venv");
243 }
244
245 const create = await this.execFn("uv", ["venv", this.venvPath], { timeout: 30000 });
246 if (create.code !== 0) {
247 throw new Error(`Failed to create venv: ${create.stderr}`);
248 }
249
250 const install = await this.execFn(
251 "uv", ["pip", "install", "--python", pythonPath, "playwright"],
252 { timeout: 60000 },
253 );
254 if (install.code !== 0) {
255 throw new Error(`Failed to install playwright: ${install.stderr}`);
256 }
257
258 // Install browser (only if no system Chrome available)
259 const chromeCheck = await this.execFn("test", ["-x", "/run/current-system/sw/bin/google-chrome-stable"]);
260 if (chromeCheck.code !== 0) {
261 const browserInstall = await this.execFn(
262 `${this.venvPath}/bin/playwright`, ["install", "chromium"],
263 { timeout: 120000 },
264 );
265 if (browserInstall.code !== 0) {
266 throw new Error(`Failed to install Playwright browser: ${browserInstall.stderr}`);
267 }
268 }
269
270 return pythonPath;
271 }
272
273 async isAvailable(): Promise<boolean> {
274 // Check if system Chrome exists (preferred) or if we can bootstrap
275 try {
276 const chrome = await this.execFn("test", ["-x", "/run/current-system/sw/bin/google-chrome-stable"]);
277 if (chrome.code === 0) return true;
278
279 const uv = await this.execFn("which", ["uv"]);
280 return uv.code === 0;
281 } catch {
282 return false;
283 }
284 }
285
286 async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
287 const pythonPath = await this.ensureVenv();
288
289 const result = await this.execFn(
290 pythonPath,
291 [this.scriptPath, this.engine, String(maxResults), query],
292 { signal, timeout: 30000 },
293 );
294
295 if (result.code !== 0) {
296 throw new Error(`Playwright/${this.engine} search failed: ${result.stderr}`);
297 }
298
299 const data = JSON.parse(result.stdout);
300 return (data || []).map((r: any) => ({
301 title: r.title || "(no title)",
302 url: r.url || "",
303 snippet: r.snippet || "",
304 }));
305 }
306}
307
308/**
309 * Multi-backend search with automatic fallback.
310 *
311 * Tries backends in order, falling back to the next one on failure.
312 * Returns results from the first backend that succeeds.
313 */
314export async function searchWithFallback(
315 backends: SearchBackend[],
316 query: string,
317 maxResults: number,
318 signal?: AbortSignal,
319 engines?: string,
320): Promise<{ results: SearchResult[]; backend: string; errors: string[] }> {
321 const errors: string[] = [];
322
323 for (const backend of backends) {
324 try {
325 const results = await backend.search(query, maxResults, signal, engines);
326 return { results, backend: backend.name, errors };
327 } catch (e: any) {
328 errors.push(`${backend.name}: ${e.message}`);
329 }
330 }
331
332 return { results: [], backend: "none", errors };
333}
334
335/**
336 * Format search results into a readable string for LLM consumption.
337 */
338export function formatResults(results: SearchResult[], backend: string): string {
339 if (results.length === 0) {
340 return "No results found.";
341 }
342
343 const header = `Results from ${backend}:`;
344 const formatted = results.map((r, i) => {
345 const parts = [`${i + 1}. ${r.title}`];
346 if (r.url) parts.push(` URL: ${r.url}`);
347 if (r.snippet) parts.push(` ${r.snippet}`);
348 return parts.join("\n");
349 }).join("\n\n");
350
351 return `${header}\n\n${formatted}`;
352}