flake-update-20260505
  1/**
  2 * Search backends for multi-backend web search.
  3 *
  4 * Each backend implements the SearchBackend interface.
  5 * Backends are tried in priority order with automatic fallback.
  6 */
  7
  8export interface SearchResult {
  9  title: string;
 10  url: string;
 11  snippet: string;
 12}
 13
 14export interface SearchBackend {
 15  name: string;
 16  search(query: string, maxResults: number, signal?: AbortSignal, engines?: string): Promise<SearchResult[]>;
 17  isAvailable(): Promise<boolean>;
 18}
 19
 20/**
 21 * Brave Search API backend - uses the Brave Search API with an API key.
 22 * Primary backend when BRAVE_API_KEY is set.
 23 * Free tier: 2,000 queries/month. See https://brave.com/search/api/
 24 */
 25export class BraveAPIBackend implements SearchBackend {
 26  name = "Brave API";
 27  private apiKey: string;
 28
 29  constructor(apiKey: string) {
 30    this.apiKey = apiKey;
 31  }
 32
 33  async isAvailable(): Promise<boolean> {
 34    return !!this.apiKey;
 35  }
 36
 37  async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
 38    const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${maxResults}`;
 39    const response = await fetch(url, {
 40      headers: {
 41        "Accept": "application/json",
 42        "Accept-Encoding": "gzip",
 43        "X-Subscription-Token": this.apiKey,
 44      },
 45      signal,
 46    });
 47
 48    if (!response.ok) {
 49      throw new Error(`Brave API request failed with status ${response.status}`);
 50    }
 51
 52    const data = await response.json();
 53    const results: SearchResult[] = (data.web?.results || []).slice(0, maxResults).map((r: any) => ({
 54      title: r.title || "(no title)",
 55      url: r.url || "",
 56      snippet: r.description || "",
 57    }));
 58
 59    return results;
 60  }
 61}
 62
 63/**
 64 * SearXNG backend - queries a self-hosted SearXNG instance via JSON API.
 65 * Fallback backend for unlimited private searches.
 66 */
 67export class SearXNGBackend implements SearchBackend {
 68  name = "SearXNG";
 69  private baseUrl: string;
 70
 71  constructor(baseUrl: string) {
 72    this.baseUrl = baseUrl.replace(/\/$/, "");
 73  }
 74
 75  async isAvailable(): Promise<boolean> {
 76    try {
 77      const response = await fetch(`${this.baseUrl}/search?q=test&format=json`, {
 78        signal: AbortSignal.timeout(5000),
 79      });
 80      return response.ok;
 81    } catch {
 82      return false;
 83    }
 84  }
 85
 86  async search(query: string, maxResults: number, signal?: AbortSignal, engines?: string): Promise<SearchResult[]> {
 87    let url = `${this.baseUrl}/search?q=${encodeURIComponent(query)}&format=json`;
 88    if (engines) {
 89      url += `&engines=${encodeURIComponent(engines)}`;
 90    }
 91    const response = await fetch(url, { signal });
 92
 93    if (!response.ok) {
 94      throw new Error(`SearXNG request failed with status ${response.status}`);
 95    }
 96
 97    const data = await response.json();
 98    const results: SearchResult[] = (data.results || []).slice(0, maxResults).map((r: any) => ({
 99      title: r.title || "(no title)",
100      url: r.url || "",
101      snippet: r.content || "",
102    }));
103
104    return results;
105  }
106}
107
108/**
109 * DuckDuckGo backend - uses the DuckDuckGo Instant Answer API.
110 * Fallback backend, no API key required.
111 *
112 * Note: The DDG Instant Answer API returns limited results (abstracts + related topics).
113 * For better results, consider using ddgr CLI if available.
114 */
115export class DuckDuckGoBackend implements SearchBackend {
116  name = "DuckDuckGo";
117
118  async isAvailable(): Promise<boolean> {
119    return true; // Always available, no API key needed
120  }
121
122  async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
123    const url = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1`;
124    const response = await fetch(url, { signal });
125
126    if (!response.ok) {
127      throw new Error(`DuckDuckGo API request failed with status ${response.status}`);
128    }
129
130    const data = await response.json();
131    const results: SearchResult[] = [];
132
133    // Add abstract if available
134    if (data.AbstractText && data.AbstractURL) {
135      results.push({
136        title: data.Heading || "Abstract",
137        url: data.AbstractURL,
138        snippet: data.AbstractText,
139      });
140    }
141
142    // Add related topics
143    for (const topic of (data.RelatedTopics || [])) {
144      if (results.length >= maxResults) break;
145      if (topic.Text && topic.FirstURL) {
146        results.push({
147          title: topic.Text.split(" - ")[0] || topic.Text,
148          url: topic.FirstURL,
149          snippet: topic.Text,
150        });
151      }
152    }
153
154    return results;
155  }
156}
157
158/**
159 * ddgr CLI backend - uses the ddgr command-line tool for DuckDuckGo searches.
160 * Provides much better results than the DDG Instant Answer API.
161 * Requires ddgr to be installed.
162 */
163export class DdgrBackend implements SearchBackend {
164  name = "ddgr";
165  private execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>;
166
167  constructor(execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>) {
168    this.execFn = execFn;
169  }
170
171  async isAvailable(): Promise<boolean> {
172    try {
173      const result = await this.execFn("which", ["ddgr"]);
174      return result.code === 0;
175    } catch {
176      return false;
177    }
178  }
179
180  async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
181    const result = await this.execFn("ddgr", ["--json", "-n", String(maxResults), query], { signal });
182
183    if (result.code !== 0) {
184      throw new Error(`ddgr failed: ${result.stderr}`);
185    }
186
187    const data = JSON.parse(result.stdout);
188    return (data || []).map((r: any) => ({
189      title: r.title || "(no title)",
190      url: r.url || "",
191      snippet: r.abstract || "",
192    }));
193  }
194}
195
196/**
197 * Playwright browser search backend - uses headless Chrome via a Python script.
198 * Fallback when API-based backends fail due to rate limiting or bot detection.
199 * Supports Bing (reliable, no CAPTCHA) and Brave Search (rate-limited after a few queries).
200 *
201 * Requirements:
202 * - Python 3 with playwright package (auto-bootstrapped in ~/.local/share/pi/playwright-env)
203 * - System Chrome (google-chrome-stable) or Playwright's bundled Chromium
204 */
205export class PlaywrightBackend implements SearchBackend {
206  name: string;
207  private engine: string;
208  private execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>;
209  private scriptPath: string;
210  private venvPath: string;
211
212  constructor(
213    engine: "bing" | "brave" | "mojeek" | "ecosia",
214    execFn: (cmd: string, args: string[], opts?: any) => Promise<{ stdout: string; stderr: string; code: number }>,
215    scriptDir: string,
216  ) {
217    this.engine = engine;
218    this.name = `Playwright/${engine}`;
219    this.execFn = execFn;
220    this.scriptPath = `${scriptDir}/browser-search.py`;
221    this.venvPath = `${process.env.HOME}/.local/share/pi/playwright-env`;
222  }
223
224  /**
225   * Ensure the Python venv with playwright exists.
226   * Creates it on first use, reuses on subsequent calls.
227   */
228  private async ensureVenv(): Promise<string> {
229    const pythonPath = `${this.venvPath}/bin/python3`;
230
231    // Check if venv already exists and has playwright
232    try {
233      const check = await this.execFn(pythonPath, ["-c", "import playwright"], { timeout: 5000 });
234      if (check.code === 0) return pythonPath;
235    } catch {
236      // venv doesn't exist or is broken, create it
237    }
238
239    // Create venv and install playwright
240    const uv = await this.execFn("which", ["uv"]);
241    if (uv.code !== 0) {
242      throw new Error("uv not found — required to bootstrap playwright venv");
243    }
244
245    const create = await this.execFn("uv", ["venv", this.venvPath], { timeout: 30000 });
246    if (create.code !== 0) {
247      throw new Error(`Failed to create venv: ${create.stderr}`);
248    }
249
250    const install = await this.execFn(
251      "uv", ["pip", "install", "--python", pythonPath, "playwright"],
252      { timeout: 60000 },
253    );
254    if (install.code !== 0) {
255      throw new Error(`Failed to install playwright: ${install.stderr}`);
256    }
257
258    // Install browser (only if no system Chrome available)
259    const chromeCheck = await this.execFn("test", ["-x", "/run/current-system/sw/bin/google-chrome-stable"]);
260    if (chromeCheck.code !== 0) {
261      const browserInstall = await this.execFn(
262        `${this.venvPath}/bin/playwright`, ["install", "chromium"],
263        { timeout: 120000 },
264      );
265      if (browserInstall.code !== 0) {
266        throw new Error(`Failed to install Playwright browser: ${browserInstall.stderr}`);
267      }
268    }
269
270    return pythonPath;
271  }
272
273  async isAvailable(): Promise<boolean> {
274    // Check if system Chrome exists (preferred) or if we can bootstrap
275    try {
276      const chrome = await this.execFn("test", ["-x", "/run/current-system/sw/bin/google-chrome-stable"]);
277      if (chrome.code === 0) return true;
278
279      const uv = await this.execFn("which", ["uv"]);
280      return uv.code === 0;
281    } catch {
282      return false;
283    }
284  }
285
286  async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResult[]> {
287    const pythonPath = await this.ensureVenv();
288
289    const result = await this.execFn(
290      pythonPath,
291      [this.scriptPath, this.engine, String(maxResults), query],
292      { signal, timeout: 30000 },
293    );
294
295    if (result.code !== 0) {
296      throw new Error(`Playwright/${this.engine} search failed: ${result.stderr}`);
297    }
298
299    const data = JSON.parse(result.stdout);
300    return (data || []).map((r: any) => ({
301      title: r.title || "(no title)",
302      url: r.url || "",
303      snippet: r.snippet || "",
304    }));
305  }
306}
307
308/**
309 * Multi-backend search with automatic fallback.
310 *
311 * Tries backends in order, falling back to the next one on failure.
312 * Returns results from the first backend that succeeds.
313 */
314export async function searchWithFallback(
315  backends: SearchBackend[],
316  query: string,
317  maxResults: number,
318  signal?: AbortSignal,
319  engines?: string,
320): Promise<{ results: SearchResult[]; backend: string; errors: string[] }> {
321  const errors: string[] = [];
322
323  for (const backend of backends) {
324    try {
325      const results = await backend.search(query, maxResults, signal, engines);
326      return { results, backend: backend.name, errors };
327    } catch (e: any) {
328      errors.push(`${backend.name}: ${e.message}`);
329    }
330  }
331
332  return { results: [], backend: "none", errors };
333}
334
335/**
336 * Format search results into a readable string for LLM consumption.
337 */
338export function formatResults(results: SearchResult[], backend: string): string {
339  if (results.length === 0) {
340    return "No results found.";
341  }
342
343  const header = `Results from ${backend}:`;
344  const formatted = results.map((r, i) => {
345    const parts = [`${i + 1}. ${r.title}`];
346    if (r.url) parts.push(`   URL: ${r.url}`);
347    if (r.snippet) parts.push(`   ${r.snippet}`);
348    return parts.join("\n");
349  }).join("\n\n");
350
351  return `${header}\n\n${formatted}`;
352}