feature/pi-refactor
  1/**
  2 * Web Search Tool - Search the web using SearXNG, ddgr, or DuckDuckGo API
  3 * 
  4 * Tries in order: SearXNG → ddgr → DuckDuckGo API
  5 */
  6
  7import { Type } from "@sinclair/typebox";
  8import type { AgentTool } from "@mariozechner/pi-agent-core";
  9import { exec } from "child_process";
 10import { promisify } from "util";
 11
 12const execAsync = promisify(exec);
 13
 14interface SearchResult {
 15  title: string;
 16  url: string;
 17  snippet: string;
 18}
 19
 20/**
 21 * Search using ddgr (DuckDuckGo command-line tool)
 22 */
 23async function searchDdgr(query: string, maxResults: number = 5): Promise<SearchResult[]> {
 24  if (!query.trim()) {
 25    return [];
 26  }
 27
 28  try {
 29    // Use ddgr with JSON output
 30    const { stdout, stderr } = await execAsync(
 31      `ddgr --json -n ${maxResults} ${JSON.stringify(query)}`,
 32      { timeout: 10000 }
 33    );
 34
 35    // Check for HTTP errors in stderr
 36    if (stderr && stderr.includes('HTTP Error')) {
 37      if (process.env.DANEEL_DEBUG) {
 38        console.warn(`[ddgr] DuckDuckGo rate limiting detected, skipping`);
 39      }
 40      return [];
 41    }
 42
 43    const results: SearchResult[] = [];
 44    
 45    // ddgr returns a JSON array
 46    const trimmed = stdout.trim();
 47    if (!trimmed || trimmed === '[]') {
 48      return [];
 49    }
 50    
 51    const data = JSON.parse(trimmed);
 52    
 53    if (Array.isArray(data)) {
 54      for (const item of data) {
 55        if (item.url && item.title) {
 56          results.push({
 57            title: item.title,
 58            url: item.url,
 59            snippet: item.abstract || item.title,
 60          });
 61        }
 62      }
 63    }
 64
 65    return results;
 66  } catch (error) {
 67    // ddgr not available or failed
 68    if (process.env.DANEEL_DEBUG) {
 69      const msg = error instanceof Error ? error.message : String(error);
 70      console.warn(`[ddgr] Unavailable (${msg})`);
 71    }
 72    return [];
 73  }
 74}
 75
 76/**
 77 * Search using SearXNG instance
 78 */
 79async function searchSearXNG(query: string, maxResults: number = 5, baseUrl?: string): Promise<SearchResult[]> {
 80  if (!query.trim()) {
 81    return [];
 82  }
 83
 84  const searxngUrl = baseUrl || process.env.SEARXNG_URL || "https://search.sbr.pm";
 85
 86  try {
 87    const url = new URL("/search", searxngUrl);
 88    url.searchParams.set("q", query);
 89    url.searchParams.set("format", "json");
 90    url.searchParams.set("pageno", "1");
 91
 92    const response = await fetch(url.toString(), {
 93      headers: {
 94        'User-Agent': 'Daneel-Bot/1.0',
 95      },
 96    });
 97
 98    if (!response.ok) {
 99      throw new Error(`SearXNG search failed: ${response.statusText}`);
100    }
101
102    const data = await response.json();
103    const results: SearchResult[] = [];
104
105    if (data.results && Array.isArray(data.results)) {
106      for (const result of data.results.slice(0, maxResults)) {
107        if (result.url && result.title) {
108          results.push({
109            title: result.title,
110            url: result.url,
111            snippet: result.content || result.title,
112          });
113        }
114      }
115    }
116
117    return results;
118  } catch (error) {
119    // SearXNG unavailable - this is expected if instance is down
120    if (process.env.DANEEL_DEBUG) {
121      const msg = error instanceof Error ? error.message : String(error);
122      console.warn(`[SearXNG] Unavailable (${msg}), falling back to DuckDuckGo`);
123    }
124    return [];
125  }
126}
127
128/**
129 * Search using DuckDuckGo API (no API key required)
130 * Returns related topics which include URLs
131 */
132async function searchDuckDuckGo(query: string, maxResults: number = 5): Promise<SearchResult[]> {
133  if (!query.trim()) {
134    return [];
135  }
136
137  try {
138    // Use DuckDuckGo instant answer API
139    const encodedQuery = encodeURIComponent(query);
140    const url = `https://api.duckduckgo.com/?q=${encodedQuery}&format=json&no_html=1&skip_disambig=1`;
141    
142    const response = await fetch(url);
143
144    if (!response.ok) {
145      throw new Error(`Search failed: ${response.statusText}`);
146    }
147
148    const data = await response.json();
149    const results: SearchResult[] = [];
150
151    // Get results from RelatedTopics
152    if (data.RelatedTopics && Array.isArray(data.RelatedTopics)) {
153      for (const topic of data.RelatedTopics) {
154        if (results.length >= maxResults) break;
155        
156        // Handle nested topics
157        if (topic.Topics && Array.isArray(topic.Topics)) {
158          for (const subtopic of topic.Topics) {
159            if (results.length >= maxResults) break;
160            if (subtopic.FirstURL && subtopic.Text) {
161              results.push({
162                title: subtopic.Text.split(' - ')[0] || subtopic.Text.substring(0, 100),
163                url: subtopic.FirstURL,
164                snippet: subtopic.Text,
165              });
166            }
167          }
168        } else if (topic.FirstURL && topic.Text) {
169          results.push({
170            title: topic.Text.split(' - ')[0] || topic.Text.substring(0, 100),
171            url: topic.FirstURL,
172            snippet: topic.Text,
173          });
174        }
175      }
176    }
177
178    // If we got an abstract with a URL, add that too
179    if (results.length === 0 && data.AbstractURL && data.AbstractText) {
180      results.push({
181        title: data.Heading || query,
182        url: data.AbstractURL,
183        snippet: data.AbstractText,
184      });
185    }
186
187    return results;
188  } catch (error) {
189    console.error("Web search error:", error);
190    return [];
191  }
192}
193
194export const webSearchTool: AgentTool = {
195  name: "web_search",
196  label: "Web Search",
197  description: "Search the web for current information. Uses SearXNG, ddgr, or DuckDuckGo API (in order of preference). Returns relevant web pages with titles, URLs, and snippets.",
198  parameters: Type.Object({
199    query: Type.String({ description: "The search query" }),
200    maxResults: Type.Optional(Type.Number({ 
201      description: "Maximum number of results to return (default: 5, max: 10)",
202      minimum: 1,
203      maximum: 10,
204    })),
205  }),
206  execute: async (_toolCallId, params, _signal, _onUpdate) => {
207    const { query, maxResults = 5 } = params as { query: string; maxResults?: number };
208
209    if (!query || query.trim().length === 0) {
210      return {
211        content: [
212          {
213            type: "text" as const,
214            text: "Error: Search query cannot be empty.",
215          },
216        ],
217        details: { error: "empty_query" },
218      };
219    }
220
221    // Try search engines in order of preference
222    let results: SearchResult[] = [];
223    let searchEngine = "none";
224    
225    // 1. Try SearXNG if configured
226    if (process.env.SEARXNG_URL) {
227      results = await searchSearXNG(query, maxResults);
228      if (results.length > 0) {
229        searchEngine = "SearXNG";
230      }
231    }
232    
233    // 2. Try ddgr (DuckDuckGo CLI)
234    if (results.length === 0) {
235      results = await searchDdgr(query, maxResults);
236      if (results.length > 0) {
237        searchEngine = "ddgr";
238      }
239    }
240    
241    // 3. Fall back to DuckDuckGo API (limited)
242    if (results.length === 0) {
243      results = await searchDuckDuckGo(query, maxResults);
244      if (results.length > 0) {
245        searchEngine = "DuckDuckGo API";
246      }
247    }
248
249    if (results.length === 0) {
250      let message = `No results found for: "${query}"`;
251      
252      // Provide helpful context based on what failed
253      if (searchEngine === "none") {
254        if (process.env.SEARXNG_URL) {
255          message += `\n\n⚠️ Web search is currently unavailable:\n`;
256          message += `• SearXNG (${process.env.SEARXNG_URL}): Down (502 Bad Gateway)\n`;
257          message += `• ddgr: DuckDuckGo is rate-limiting automated requests\n`;
258          message += `• DuckDuckGo API: No instant answers for this query\n\n`;
259          message += `To fix: Please restore the SearXNG instance at ${process.env.SEARXNG_URL}`;
260        } else {
261          message += `\n\n⚠️ Web search is currently unavailable due to DuckDuckGo rate limiting. Please configure SEARXNG_URL for reliable search.`;
262        }
263      } else if (searchEngine === "DuckDuckGo API") {
264        message += `\n\nNote: The DuckDuckGo API has limited coverage for general queries. Try a more specific search term.`;
265      }
266      
267      return {
268        content: [
269          {
270            type: "text" as const,
271            text: message,
272          },
273        ],
274        details: { query, resultCount: 0, searchEngine },
275      };
276    }
277
278    // Format results
279    let resultText = `Search results for "${query}" (via ${searchEngine}):\n\n`;
280    
281    results.forEach((result, index) => {
282      resultText += `${index + 1}. **${result.title}**\n`;
283      resultText += `   ${result.url}\n`;
284      resultText += `   ${result.snippet}\n\n`;
285    });
286
287    return {
288      content: [
289        {
290          type: "text" as const,
291          text: resultText.trim(),
292        },
293      ],
294      details: {
295        query,
296        resultCount: results.length,
297        searchEngine,
298        results: results.map(r => ({
299          title: r.title,
300          url: r.url,
301          snippet: r.snippet,
302        })),
303      },
304    };
305  },
306};