Commit 094abdacdc11

Vincent Demeester <vincent@sbr.pm>
2026-02-16 15:11:57
Handle DuckDuckGo rate limiting in ddgr
**Problem discovered:** DuckDuckGo is rate-limiting automated requests, causing ddgr to fail with 'HTTP Error 202: Accepted' and return empty results. **Root cause:** DuckDuckGo detects and blocks automated/bot traffic. This affects: - ddgr (DuckDuckGo CLI tool) - Direct API calls - High-frequency requests **Solution:** 1. Detect HTTP errors in ddgr stderr output 2. Skip ddgr silently if rate limited 3. Provide clear error message to user explaining the situation 4. Point to real solution: SearXNG (search.sbr.pm) **User-facing changes:** When search fails, user now sees: ⚠️ Web search is currently unavailable: • SearXNG (https://search.sbr.pm): Down (502 Bad Gateway) • ddgr: DuckDuckGo is rate-limiting automated requests • DuckDuckGo API: No instant answers for this query To fix: Please restore the SearXNG instance **Action needed:** Fix search.sbr.pm (SearXNG) - this is the only reliable solution for automated web search. DuckDuckGo actively prevents bots. The bot gracefully degrades but can't perform web search until SearXNG is operational.
Changed files (1)
src
src/pi/tools/websearch.ts
@@ -27,28 +27,38 @@ async function searchDdgr(query: string, maxResults: number = 5): Promise<Search
 
   try {
     // Use ddgr with JSON output
-    const { stdout } = await execAsync(
+    const { stdout, stderr } = await execAsync(
       `ddgr --json -n ${maxResults} ${JSON.stringify(query)}`,
       { timeout: 10000 }
     );
 
-    const results: SearchResult[] = [];
-    const lines = stdout.trim().split('\n');
+    // Check for HTTP errors in stderr
+    if (stderr && stderr.includes('HTTP Error')) {
+      if (process.env.DANEEL_DEBUG) {
+        console.warn(`[ddgr] DuckDuckGo rate limiting detected, skipping`);
+      }
+      return [];
+    }
 
-    for (const line of lines) {
-      if (!line.trim()) continue;
-      
-      try {
-        const data = JSON.parse(line);
-        if (data.url && data.title) {
+    const results: SearchResult[] = [];
+    
+    // ddgr returns a JSON array
+    const trimmed = stdout.trim();
+    if (!trimmed || trimmed === '[]') {
+      return [];
+    }
+    
+    const data = JSON.parse(trimmed);
+    
+    if (Array.isArray(data)) {
+      for (const item of data) {
+        if (item.url && item.title) {
           results.push({
-            title: data.title,
-            url: data.url,
-            snippet: data.abstract || data.title,
+            title: item.title,
+            url: item.url,
+            snippet: item.abstract || item.title,
           });
         }
-      } catch (e) {
-        // Skip invalid JSON lines
       }
     }
 
@@ -240,8 +250,16 @@ export const webSearchTool: AgentTool = {
       let message = `No results found for: "${query}"`;
       
       // Provide helpful context based on what failed
-      if (process.env.SEARXNG_URL && searchEngine === "none") {
-        message += `\n\nNote: All search engines (SearXNG, ddgr, DuckDuckGo API) are currently unavailable or returned no results. Please try again later or with a different query.`;
+      if (searchEngine === "none") {
+        if (process.env.SEARXNG_URL) {
+          message += `\n\n⚠️ Web search is currently unavailable:\n`;
+          message += `• SearXNG (${process.env.SEARXNG_URL}): Down (502 Bad Gateway)\n`;
+          message += `• ddgr: DuckDuckGo is rate-limiting automated requests\n`;
+          message += `• DuckDuckGo API: No instant answers for this query\n\n`;
+          message += `To fix: Please restore the SearXNG instance at ${process.env.SEARXNG_URL}`;
+        } else {
+          message += `\n\n⚠️ Web search is currently unavailable due to DuckDuckGo rate limiting. Please configure SEARXNG_URL for reliable search.`;
+        }
       } else if (searchEngine === "DuckDuckGo API") {
         message += `\n\nNote: The DuckDuckGo API has limited coverage for general queries. Try a more specific search term.`;
       }