feature/pi-refactor
1/**
2 * Web Search Tool - Search the web using SearXNG, ddgr, or DuckDuckGo API
3 *
4 * Tries in order: SearXNG → ddgr → DuckDuckGo API
5 */
6
7import { Type } from "@sinclair/typebox";
8import type { AgentTool } from "@mariozechner/pi-agent-core";
9import { exec } from "child_process";
10import { promisify } from "util";
11
12const execAsync = promisify(exec);
13
14interface SearchResult {
15 title: string;
16 url: string;
17 snippet: string;
18}
19
20/**
21 * Search using ddgr (DuckDuckGo command-line tool)
22 */
23async function searchDdgr(query: string, maxResults: number = 5): Promise<SearchResult[]> {
24 if (!query.trim()) {
25 return [];
26 }
27
28 try {
29 // Use ddgr with JSON output
30 const { stdout, stderr } = await execAsync(
31 `ddgr --json -n ${maxResults} ${JSON.stringify(query)}`,
32 { timeout: 10000 }
33 );
34
35 // Check for HTTP errors in stderr
36 if (stderr && stderr.includes('HTTP Error')) {
37 if (process.env.DANEEL_DEBUG) {
38 console.warn(`[ddgr] DuckDuckGo rate limiting detected, skipping`);
39 }
40 return [];
41 }
42
43 const results: SearchResult[] = [];
44
45 // ddgr returns a JSON array
46 const trimmed = stdout.trim();
47 if (!trimmed || trimmed === '[]') {
48 return [];
49 }
50
51 const data = JSON.parse(trimmed);
52
53 if (Array.isArray(data)) {
54 for (const item of data) {
55 if (item.url && item.title) {
56 results.push({
57 title: item.title,
58 url: item.url,
59 snippet: item.abstract || item.title,
60 });
61 }
62 }
63 }
64
65 return results;
66 } catch (error) {
67 // ddgr not available or failed
68 if (process.env.DANEEL_DEBUG) {
69 const msg = error instanceof Error ? error.message : String(error);
70 console.warn(`[ddgr] Unavailable (${msg})`);
71 }
72 return [];
73 }
74}
75
76/**
77 * Search using SearXNG instance
78 */
79async function searchSearXNG(query: string, maxResults: number = 5, baseUrl?: string): Promise<SearchResult[]> {
80 if (!query.trim()) {
81 return [];
82 }
83
84 const searxngUrl = baseUrl || process.env.SEARXNG_URL || "https://search.sbr.pm";
85
86 try {
87 const url = new URL("/search", searxngUrl);
88 url.searchParams.set("q", query);
89 url.searchParams.set("format", "json");
90 url.searchParams.set("pageno", "1");
91
92 const response = await fetch(url.toString(), {
93 headers: {
94 'User-Agent': 'Daneel-Bot/1.0',
95 },
96 });
97
98 if (!response.ok) {
99 throw new Error(`SearXNG search failed: ${response.statusText}`);
100 }
101
102 const data = await response.json();
103 const results: SearchResult[] = [];
104
105 if (data.results && Array.isArray(data.results)) {
106 for (const result of data.results.slice(0, maxResults)) {
107 if (result.url && result.title) {
108 results.push({
109 title: result.title,
110 url: result.url,
111 snippet: result.content || result.title,
112 });
113 }
114 }
115 }
116
117 return results;
118 } catch (error) {
119 // SearXNG unavailable - this is expected if instance is down
120 if (process.env.DANEEL_DEBUG) {
121 const msg = error instanceof Error ? error.message : String(error);
122 console.warn(`[SearXNG] Unavailable (${msg}), falling back to DuckDuckGo`);
123 }
124 return [];
125 }
126}
127
128/**
129 * Search using DuckDuckGo API (no API key required)
130 * Returns related topics which include URLs
131 */
132async function searchDuckDuckGo(query: string, maxResults: number = 5): Promise<SearchResult[]> {
133 if (!query.trim()) {
134 return [];
135 }
136
137 try {
138 // Use DuckDuckGo instant answer API
139 const encodedQuery = encodeURIComponent(query);
140 const url = `https://api.duckduckgo.com/?q=${encodedQuery}&format=json&no_html=1&skip_disambig=1`;
141
142 const response = await fetch(url);
143
144 if (!response.ok) {
145 throw new Error(`Search failed: ${response.statusText}`);
146 }
147
148 const data = await response.json();
149 const results: SearchResult[] = [];
150
151 // Get results from RelatedTopics
152 if (data.RelatedTopics && Array.isArray(data.RelatedTopics)) {
153 for (const topic of data.RelatedTopics) {
154 if (results.length >= maxResults) break;
155
156 // Handle nested topics
157 if (topic.Topics && Array.isArray(topic.Topics)) {
158 for (const subtopic of topic.Topics) {
159 if (results.length >= maxResults) break;
160 if (subtopic.FirstURL && subtopic.Text) {
161 results.push({
162 title: subtopic.Text.split(' - ')[0] || subtopic.Text.substring(0, 100),
163 url: subtopic.FirstURL,
164 snippet: subtopic.Text,
165 });
166 }
167 }
168 } else if (topic.FirstURL && topic.Text) {
169 results.push({
170 title: topic.Text.split(' - ')[0] || topic.Text.substring(0, 100),
171 url: topic.FirstURL,
172 snippet: topic.Text,
173 });
174 }
175 }
176 }
177
178 // If we got an abstract with a URL, add that too
179 if (results.length === 0 && data.AbstractURL && data.AbstractText) {
180 results.push({
181 title: data.Heading || query,
182 url: data.AbstractURL,
183 snippet: data.AbstractText,
184 });
185 }
186
187 return results;
188 } catch (error) {
189 console.error("Web search error:", error);
190 return [];
191 }
192}
193
194export const webSearchTool: AgentTool = {
195 name: "web_search",
196 label: "Web Search",
197 description: "Search the web for current information. Uses SearXNG, ddgr, or DuckDuckGo API (in order of preference). Returns relevant web pages with titles, URLs, and snippets.",
198 parameters: Type.Object({
199 query: Type.String({ description: "The search query" }),
200 maxResults: Type.Optional(Type.Number({
201 description: "Maximum number of results to return (default: 5, max: 10)",
202 minimum: 1,
203 maximum: 10,
204 })),
205 }),
206 execute: async (_toolCallId, params, _signal, _onUpdate) => {
207 const { query, maxResults = 5 } = params as { query: string; maxResults?: number };
208
209 if (!query || query.trim().length === 0) {
210 return {
211 content: [
212 {
213 type: "text" as const,
214 text: "Error: Search query cannot be empty.",
215 },
216 ],
217 details: { error: "empty_query" },
218 };
219 }
220
221 // Try search engines in order of preference
222 let results: SearchResult[] = [];
223 let searchEngine = "none";
224
225 // 1. Try SearXNG if configured
226 if (process.env.SEARXNG_URL) {
227 results = await searchSearXNG(query, maxResults);
228 if (results.length > 0) {
229 searchEngine = "SearXNG";
230 }
231 }
232
233 // 2. Try ddgr (DuckDuckGo CLI)
234 if (results.length === 0) {
235 results = await searchDdgr(query, maxResults);
236 if (results.length > 0) {
237 searchEngine = "ddgr";
238 }
239 }
240
241 // 3. Fall back to DuckDuckGo API (limited)
242 if (results.length === 0) {
243 results = await searchDuckDuckGo(query, maxResults);
244 if (results.length > 0) {
245 searchEngine = "DuckDuckGo API";
246 }
247 }
248
249 if (results.length === 0) {
250 let message = `No results found for: "${query}"`;
251
252 // Provide helpful context based on what failed
253 if (searchEngine === "none") {
254 if (process.env.SEARXNG_URL) {
255 message += `\n\n⚠️ Web search is currently unavailable:\n`;
256 message += `• SearXNG (${process.env.SEARXNG_URL}): Down (502 Bad Gateway)\n`;
257 message += `• ddgr: DuckDuckGo is rate-limiting automated requests\n`;
258 message += `• DuckDuckGo API: No instant answers for this query\n\n`;
259 message += `To fix: Please restore the SearXNG instance at ${process.env.SEARXNG_URL}`;
260 } else {
261 message += `\n\n⚠️ Web search is currently unavailable due to DuckDuckGo rate limiting. Please configure SEARXNG_URL for reliable search.`;
262 }
263 } else if (searchEngine === "DuckDuckGo API") {
264 message += `\n\nNote: The DuckDuckGo API has limited coverage for general queries. Try a more specific search term.`;
265 }
266
267 return {
268 content: [
269 {
270 type: "text" as const,
271 text: message,
272 },
273 ],
274 details: { query, resultCount: 0, searchEngine },
275 };
276 }
277
278 // Format results
279 let resultText = `Search results for "${query}" (via ${searchEngine}):\n\n`;
280
281 results.forEach((result, index) => {
282 resultText += `${index + 1}. **${result.title}**\n`;
283 resultText += ` ${result.url}\n`;
284 resultText += ` ${result.snippet}\n\n`;
285 });
286
287 return {
288 content: [
289 {
290 type: "text" as const,
291 text: resultText.trim(),
292 },
293 ],
294 details: {
295 query,
296 resultCount: results.length,
297 searchEngine,
298 results: results.map(r => ({
299 title: r.title,
300 url: r.url,
301 snippet: r.snippet,
302 })),
303 },
304 };
305 },
306};