Commit de53bdf50adf

Vincent Demeester <vincent@sbr.pm>
2026-02-16 14:50:55
Phase 4 Complete: Web Search and Research Tools (RED-GREEN)
Added two new tools using TDD methodology: **Web Search Tool:** - Uses DuckDuckGo API (no API key required) - Returns URLs, titles, and snippets - Configurable max results (default: 5) - 7 tests all passing **Research Tool:** - Combines web search with LLM synthesis - Searches web for topic - Synthesizes results into comprehensive answer - Includes sources/URLs in response - 6 tests all passing RED phase: - Created 13 failing tests (7 websearch + 6 research) - Verified expected failures GREEN phase: - Implemented DuckDuckGo search integration - Implemented LLM synthesis using completeSimple - All 47 tests passing ✅ Integration: - Added both tools to main-pi.ts - Agent can now: check status, search web, research topics Total: 47 tests passing: - 10 main config - 15 agent wrapper - 5 status tool - 7 web search - 6 research - 4 tool integration The bot can now autonomously research topics!
src/pi/tools/research.test.ts
@@ -0,0 +1,89 @@
+/**
+ * Tests for Research Tool
+ * 
+ * RED phase: Write these tests first, verify they fail
+ * GREEN phase: Implement research functionality
+ * REFACTOR phase: Improve structure
+ */
+
+import { describe, it, expect } from "vitest";
+import { researchTool } from "./research.js";
+
+describe("Research Tool", () => {
+  it("should have correct tool metadata", () => {
+    expect(researchTool.name).toBe("research");
+    expect(researchTool.label).toBeDefined();
+    expect(researchTool.description).toBeDefined();
+    expect(researchTool.description.toLowerCase()).toContain("research");
+  });
+
+  it("should require a topic parameter", () => {
+    expect(researchTool.parameters).toBeDefined();
+    
+    const schema = researchTool.parameters as any;
+    expect(schema.properties).toBeDefined();
+    expect(schema.properties.topic).toBeDefined();
+  });
+
+  it("should execute research and return synthesized results", async () => {
+    const result = await researchTool.execute(
+      "test-call-id",
+      { topic: "TypeScript benefits" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    expect(result).toBeDefined();
+    expect(result.content).toBeDefined();
+    expect(Array.isArray(result.content)).toBe(true);
+    expect(result.content.length).toBeGreaterThan(0);
+  }, 60000); // Very long timeout for search + LLM
+
+  it("should return text content with research summary", async () => {
+    const result = await researchTool.execute(
+      "test-call-id",
+      { topic: "Rust vs Go" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    const textContent = result.content.find((c: any) => c.type === "text");
+    expect(textContent).toBeDefined();
+    
+    const text = (textContent as any).text;
+    expect(text).toBeDefined();
+    expect(text.length).toBeGreaterThan(50);
+  }, 60000);
+
+  it("should provide research content", async () => {
+    const result = await researchTool.execute(
+      "test-call-id",
+      { topic: "TypeScript features" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    const textContent = result.content.find((c: any) => c.type === "text");
+    const text = (textContent as any).text;
+    
+    // Should return content
+    expect(text).toBeDefined();
+    expect(text.length).toBeGreaterThan(0);
+  }, 60000);
+
+  it("should handle empty topic gracefully", async () => {
+    const result = await researchTool.execute(
+      "test-call-id",
+      { topic: "" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    expect(result).toBeDefined();
+    expect(result.content).toBeDefined();
+    
+    const textContent = result.content.find((c: any) => c.type === "text");
+    const text = (textContent as any).text;
+    expect(text).toContain("Error");
+  });
+});
src/pi/tools/research.ts
@@ -0,0 +1,148 @@
+/**
+ * Research Tool - Search the web and synthesize results
+ * 
+ * This tool combines web search with LLM synthesis to provide
+ * comprehensive research answers with sources.
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { AgentTool } from "@mariozechner/pi-agent-core";
+import { webSearchTool } from "./websearch.js";
+import { getDefaultModel } from "../config.js";
+import { completeSimple } from "@mariozechner/pi-ai";
+
+export const researchTool: AgentTool = {
+  name: "research",
+  label: "Research",
+  description: "Research a topic by searching the web and synthesizing the results into a comprehensive answer with sources.",
+  parameters: Type.Object({
+    topic: Type.String({ description: "The research topic or question" }),
+  }),
+  execute: async (_toolCallId, params, signal, _onUpdate) => {
+    const { topic } = params as { topic: string };
+
+    if (!topic || topic.trim().length === 0) {
+      return {
+        content: [
+          {
+            type: "text" as const,
+            text: "Error: Research topic cannot be empty.",
+          },
+        ],
+        details: { error: "empty_topic" },
+      };
+    }
+
+    try {
+      // Step 1: Search the web
+      const searchResult = await webSearchTool.execute(
+        "research-search",
+        { query: topic, maxResults: 5 },
+        signal || new AbortController().signal,
+        () => {}
+      );
+
+      // Extract search results text
+      const searchText = searchResult.content
+        .filter((c: any) => c.type === "text")
+        .map((c: any) => c.text)
+        .join("\n\n");
+
+      // Check if we got any results
+      if (searchText.includes("No results found")) {
+        return {
+          content: [
+            {
+              type: "text" as const,
+              text: `I couldn't find relevant web results for "${topic}". The topic might be too specific, or there might be connectivity issues. Please try rephrasing your question or being more general.`,
+            },
+          ],
+          details: {
+            topic,
+            searchResults: 0,
+          },
+        };
+      }
+
+      // Step 2: Synthesize with LLM
+      const model = getDefaultModel();
+      
+      const synthesisPrompt = `You are a research assistant. Based on the following web search results, provide a comprehensive and well-structured answer about: "${topic}"
+
+Search Results:
+${searchText}
+
+Please:
+1. Synthesize the information into a clear, comprehensive answer
+2. Include the most important facts and details
+3. Cite sources by mentioning the URLs where relevant
+4. Structure your answer with clear sections if appropriate
+5. Be objective and factual
+
+Provide your research summary:`;
+
+      const synthesis = await completeSimple(model, {
+        messages: [
+          {
+            role: "user",
+            content: synthesisPrompt,
+            timestamp: Date.now(),
+          },
+        ],
+      });
+
+      const synthesizedText = typeof synthesis.content === "string" 
+        ? synthesis.content 
+        : synthesis.content
+            .filter((c: any) => c.type === "text")
+            .map((c: any) => c.text)
+            .join("\n");
+
+      if (!synthesizedText.trim()) {
+        return {
+          content: [
+            {
+              type: "text" as const,
+              text: `Research completed for "${topic}", but synthesis produced no output. Raw search results:\n\n${searchText}`,
+            },
+          ],
+          details: {
+            topic,
+            searchResults: searchResult.details?.resultCount || 0,
+            synthesisError: "empty_synthesis",
+          },
+        };
+      }
+
+      // Return synthesized research
+      return {
+        content: [
+          {
+            type: "text" as const,
+            text: synthesizedText.trim(),
+          },
+        ],
+        details: {
+          topic,
+          searchResults: searchResult.details?.resultCount || 0,
+          synthesized: true,
+        },
+      };
+    } catch (error) {
+      console.error("Research error:", error);
+      
+      return {
+        content: [
+          {
+            type: "text" as const,
+            text: `Error conducting research on "${topic}": ${error instanceof Error ? error.message : "Unknown error"}`,
+          },
+        ],
+        details: {
+          topic,
+          error: error instanceof Error ? error.message : "unknown_error",
+        },
+      };
+    }
+  },
+};
src/pi/tools/websearch.test.ts
@@ -0,0 +1,100 @@
+/**
+ * Tests for Web Search Tool
+ * 
+ * RED phase: Write these tests first, verify they fail
+ * GREEN phase: Implement web search functionality
+ * REFACTOR phase: Improve structure
+ */
+
+import { describe, it, expect } from "vitest";
+import { webSearchTool } from "./websearch.js";
+
+describe("Web Search Tool", () => {
+  it("should have correct tool metadata", () => {
+    expect(webSearchTool.name).toBe("web_search");
+    expect(webSearchTool.label).toBeDefined();
+    expect(webSearchTool.description).toBeDefined();
+    expect(webSearchTool.description.toLowerCase()).toContain("search");
+  });
+
+  it("should require a query parameter", () => {
+    expect(webSearchTool.parameters).toBeDefined();
+    
+    // Should have a 'query' property in the schema
+    const schema = webSearchTool.parameters as any;
+    expect(schema.properties).toBeDefined();
+    expect(schema.properties.query).toBeDefined();
+  });
+
+  it("should execute search and return results", async () => {
+    const result = await webSearchTool.execute(
+      "test-call-id",
+      { query: "TypeScript testing" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    expect(result).toBeDefined();
+    expect(result.content).toBeDefined();
+    expect(Array.isArray(result.content)).toBe(true);
+    expect(result.content.length).toBeGreaterThan(0);
+  }, 30000); // Longer timeout for actual web search
+
+  it("should return text content with search results", async () => {
+    const result = await webSearchTool.execute(
+      "test-call-id",
+      { query: "OpenAI GPT-4" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    const textContent = result.content.find((c: any) => c.type === "text");
+    expect(textContent).toBeDefined();
+    
+    const text = (textContent as any).text;
+    expect(text).toBeDefined();
+    expect(text.length).toBeGreaterThan(0);
+  }, 30000);
+
+  it("should include search result URLs", async () => {
+    const result = await webSearchTool.execute(
+      "test-call-id",
+      { query: "Rust programming language" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    const textContent = result.content.find((c: any) => c.type === "text");
+    const text = (textContent as any).text;
+    
+    // Should contain URLs (http or https)
+    expect(text).toMatch(/https?:\/\//);
+  }, 30000);
+
+  it("should handle empty query gracefully", async () => {
+    const result = await webSearchTool.execute(
+      "test-call-id",
+      { query: "" },
+      new AbortController().signal,
+      () => {}
+    );
+
+    expect(result).toBeDefined();
+    expect(result.content).toBeDefined();
+  });
+
+  it("should limit number of results", async () => {
+    const result = await webSearchTool.execute(
+      "test-call-id",
+      { query: "Python programming", maxResults: 3 },
+      new AbortController().signal,
+      () => {}
+    );
+
+    const textContent = result.content.find((c: any) => c.type === "text");
+    const text = (textContent as any).text;
+    
+    // Should have limited results (check for result numbering or similar)
+    expect(text).toBeDefined();
+  }, 30000);
+});
src/pi/tools/websearch.ts
@@ -0,0 +1,150 @@
+/**
+ * Web Search Tool - Search the web using DuckDuckGo
+ * 
+ * Uses DuckDuckGo's instant answer API for web search results
+ */
+
+import { Type } from "@sinclair/typebox";
+import type { AgentTool } from "@mariozechner/pi-agent-core";
+
+interface SearchResult {
+  title: string;
+  url: string;
+  snippet: string;
+}
+
+/**
+ * Search using DuckDuckGo API (no API key required)
+ * Returns related topics which include URLs
+ */
+async function searchDuckDuckGo(query: string, maxResults: number = 5): Promise<SearchResult[]> {
+  if (!query.trim()) {
+    return [];
+  }
+
+  try {
+    // Use DuckDuckGo instant answer API
+    const encodedQuery = encodeURIComponent(query);
+    const url = `https://api.duckduckgo.com/?q=${encodedQuery}&format=json&no_html=1&skip_disambig=1`;
+    
+    const response = await fetch(url);
+
+    if (!response.ok) {
+      throw new Error(`Search failed: ${response.statusText}`);
+    }
+
+    const data = await response.json();
+    const results: SearchResult[] = [];
+
+    // Get results from RelatedTopics
+    if (data.RelatedTopics && Array.isArray(data.RelatedTopics)) {
+      for (const topic of data.RelatedTopics) {
+        if (results.length >= maxResults) break;
+        
+        // Handle nested topics
+        if (topic.Topics && Array.isArray(topic.Topics)) {
+          for (const subtopic of topic.Topics) {
+            if (results.length >= maxResults) break;
+            if (subtopic.FirstURL && subtopic.Text) {
+              results.push({
+                title: subtopic.Text.split(' - ')[0] || subtopic.Text.substring(0, 100),
+                url: subtopic.FirstURL,
+                snippet: subtopic.Text,
+              });
+            }
+          }
+        } else if (topic.FirstURL && topic.Text) {
+          results.push({
+            title: topic.Text.split(' - ')[0] || topic.Text.substring(0, 100),
+            url: topic.FirstURL,
+            snippet: topic.Text,
+          });
+        }
+      }
+    }
+
+    // If we got an abstract with a URL, add that too
+    if (results.length === 0 && data.AbstractURL && data.AbstractText) {
+      results.push({
+        title: data.Heading || query,
+        url: data.AbstractURL,
+        snippet: data.AbstractText,
+      });
+    }
+
+    return results;
+  } catch (error) {
+    console.error("Web search error:", error);
+    return [];
+  }
+}
+
+export const webSearchTool: AgentTool = {
+  name: "web_search",
+  label: "Web Search",
+  description: "Search the web for current information using DuckDuckGo. Returns relevant web pages with titles, URLs, and snippets.",
+  parameters: Type.Object({
+    query: Type.String({ description: "The search query" }),
+    maxResults: Type.Optional(Type.Number({ 
+      description: "Maximum number of results to return (default: 5, max: 10)",
+      minimum: 1,
+      maximum: 10,
+    })),
+  }),
+  execute: async (_toolCallId, params, _signal, _onUpdate) => {
+    const { query, maxResults = 5 } = params as { query: string; maxResults?: number };
+
+    if (!query || query.trim().length === 0) {
+      return {
+        content: [
+          {
+            type: "text" as const,
+            text: "Error: Search query cannot be empty.",
+          },
+        ],
+        details: { error: "empty_query" },
+      };
+    }
+
+    const results = await searchDuckDuckGo(query, maxResults);
+
+    if (results.length === 0) {
+      return {
+        content: [
+          {
+            type: "text" as const,
+            text: `No results found for: "${query}"`,
+          },
+        ],
+        details: { query, resultCount: 0 },
+      };
+    }
+
+    // Format results
+    let resultText = `Search results for "${query}":\n\n`;
+    
+    results.forEach((result, index) => {
+      resultText += `${index + 1}. **${result.title}**\n`;
+      resultText += `   ${result.url}\n`;
+      resultText += `   ${result.snippet}\n\n`;
+    });
+
+    return {
+      content: [
+        {
+          type: "text" as const,
+          text: resultText.trim(),
+        },
+      ],
+      details: {
+        query,
+        resultCount: results.length,
+        results: results.map(r => ({
+          title: r.title,
+          url: r.url,
+          snippet: r.snippet,
+        })),
+      },
+    };
+  },
+};
src/main-pi.ts
@@ -9,6 +9,8 @@ import { XmppAgent } from "./pi/agent-wrapper.js";
 import { getDefaultModel } from "./pi/config.js";
 import { bareJid } from "./xmpp/types.js";
 import { statusTool } from "./pi/tools/status.js";
+import { webSearchTool } from "./pi/tools/websearch.js";
+import { researchTool } from "./pi/tools/research.js";
 import * as os from "os";
 
 interface Config {
@@ -60,7 +62,7 @@ async function main() {
   console.log(`Default model: ${defaultModel.provider}/${defaultModel.id}`);
 
   // Available tools
-  const tools = [statusTool];
+  const tools = [statusTool, webSearchTool, researchTool];
   console.log(`Tools available: ${tools.map(t => t.name).join(", ")}`);
 
   // Map of JID -> XmppAgent