Commit 7a471284093e

Vincent Demeester <vincent@sbr.pm>
2026-01-14 19:25:56
feat(xmpp-research-bot): add Gemini 3 Flash support and dynamic YAML commands
Major refactor adding two key features: ## 1. Gemini 3 Flash Support - Added google-generativeai dependency - Integrated Gemini 3 Flash (gemini-3-flash-preview) via Vertex AI - Model selection now supports: opus:, sonnet:, gemini: (or o:, s:, g:) - Uses new Google Gen AI SDK (google.genai) per 2026 recommendations - Gracefully degrades if Gemini SDK not available Performance: - Gemini 3 Flash: 3x faster than Gemini 2.5 Pro, fraction of cost - Pricing: $0.50/1M input, $3/1M output ## 2. Dynamic YAML Command Configuration Bot now loads commands from external YAML file (no rebuild needed!): ```yaml commands: research: description: "Research assistant" system_prompt: "You are a research assistant..." default_model: sonnet max_tokens: 2000 save_to_inbox: true ``` Features: - Add unlimited custom commands via YAML - No systemd restart needed - use /reload-commands - Each command can specify: prompt, model, max_tokens, save behavior - Falls back to default "research" command if no YAML provided - Example config: commands.example.yaml Built-in commands: - /help - Auto-generated from YAML - /ping - Health check - /reload-commands - Hot-reload YAML without restart ## Other Changes - Simplified to 2 messages (start + done) - Model name saved in org properties - Better error handling and logging - Module option: commandsPath for custom commands file Example usage: ``` /research gemini: explain XMPP protocol /summarize sonnet: <paste long article> /analyze opus: distributed systems trade-offs ``` Sources: - Gemini 3 Flash: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-flash - Google Gen AI SDK: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/sdks/overview Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent eb9a53c
Changed files (3)
modules/xmpp-research-bot/bot.py
@@ -2,7 +2,7 @@
 """
 XMPP Research Bot - Automated research assistant via XMPP
 
-Listens for /research commands and uses Claude API to generate research summaries.
+Listens for dynamic commands and uses Claude/Gemini APIs to generate responses.
 Results are saved to inbox.org for later review.
 """
 
@@ -10,27 +10,77 @@ import asyncio
 import logging
 import os
 import sys
+import yaml
 from datetime import datetime
 from pathlib import Path
+from typing import Dict, Any, Optional
 
 import slixmpp
 from anthropic import AnthropicVertex
 
+try:
+    from google import genai
+    GEMINI_AVAILABLE = True
+except ImportError:
+    GEMINI_AVAILABLE = False
+    logging.warning("google-genai not available, Gemini support disabled")
+
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 log = logging.getLogger(__name__)
 
 
-class ResearchBot(slixmpp.ClientXMPP):
-    """XMPP bot that performs research using Claude API via Vertex AI"""
+# Default commands configuration
+DEFAULT_COMMANDS = {
+    "research": {
+        "description": "Research assistant for quick, accurate queries",
+        "system_prompt": """You are a research assistant helping with quick, accurate research queries.
 
-    def __init__(self, jid, password, owner_jid, project_id, region, inbox_path):
+Your task is to provide concise, well-structured research summaries that can be saved as notes.
+
+Guidelines:
+- Provide factual, accurate information
+- Structure responses with clear headings
+- Include relevant sources or references when possible
+- Keep responses focused and actionable
+- Use markdown formatting
+- Aim for 200-500 words unless more detail is requested""",
+        "default_model": "sonnet",
+        "max_tokens": 2000,
+        "save_to_inbox": True,
+    }
+}
+
+
+class ResearchBot(slixmpp.ClientXMPP):
+    """XMPP bot that performs research using Claude/Gemini APIs via Vertex AI"""
+
+    def __init__(self, jid, password, owner_jid, project_id, region, inbox_path, commands_path=None):
         super().__init__(jid, password)
         self.owner_jid = owner_jid
         self.inbox_path = Path(inbox_path)
+        self.project_id = project_id
+        self.region = region
+        self.commands_path = commands_path
 
-        # Initialize Vertex AI client (uses Application Default Credentials)
-        self.client = AnthropicVertex(project_id=project_id, region=region)
+        # Initialize Vertex AI clients
+        self.anthropic_client = AnthropicVertex(project_id=project_id, region=region)
+
+        # Initialize Gemini client if available
+        self.gemini_client = None
+        if GEMINI_AVAILABLE:
+            try:
+                self.gemini_client = genai.Client(
+                    vertexai=True,
+                    project=project_id,
+                    location=region
+                )
+                log.info("Gemini client initialized successfully")
+            except Exception as e:
+                log.warning(f"Failed to initialize Gemini client: {e}")
+
+        # Load commands configuration
+        self.commands = self.load_commands()
 
         # Register plugins
         self.register_plugin('xep_0030')  # Service Discovery
@@ -44,6 +94,22 @@ class ResearchBot(slixmpp.ClientXMPP):
         log.info(f"Owner: {owner_jid}")
         log.info(f"Vertex AI: {project_id} / {region}")
         log.info(f"Inbox: {inbox_path}")
+        log.info(f"Commands loaded: {', '.join(self.commands.keys())}")
+
+    def load_commands(self) -> Dict[str, Any]:
+        """Load commands from YAML file or use defaults"""
+        if self.commands_path and Path(self.commands_path).exists():
+            try:
+                with open(self.commands_path, 'r') as f:
+                    loaded = yaml.safe_load(f)
+                    if loaded and 'commands' in loaded:
+                        log.info(f"Loaded {len(loaded['commands'])} commands from {self.commands_path}")
+                        return loaded['commands']
+            except Exception as e:
+                log.error(f"Failed to load commands from {self.commands_path}: {e}")
+
+        log.info("Using default commands configuration")
+        return DEFAULT_COMMANDS
 
     async def on_session_start(self, event):
         """Called when XMPP session starts"""
@@ -65,106 +131,172 @@ class ResearchBot(slixmpp.ClientXMPP):
         body = msg["body"].strip()
         log.info(f"Received from {sender}: {body}")
 
-        # Handle /research command
-        if body.startswith("/research "):
-            query = body[len("/research "):].strip()
-            if not query:
-                msg.reply("Usage: /research <your research question>\n\nModel selection:\n  opus: or o: - Use Claude Opus 4.5\n  sonnet: or s: - Use Claude Sonnet 4.5 (default)\n\nExample: /research opus: explain quantum computing").send()
-                return
+        # Check if message starts with /
+        if not body.startswith("/"):
+            return
 
-            # Parse model selection (format: "model: query")
-            model = "sonnet"  # default
+        # Parse command and arguments
+        parts = body[1:].split(None, 1)
+        command = parts[0]
+        args = parts[1] if len(parts) > 1 else ""
+
+        # Handle built-in commands
+        if command == "help":
+            await self.cmd_help(msg)
+        elif command == "ping":
+            await self.cmd_ping(msg)
+        elif command == "reload-commands":
+            await self.cmd_reload_commands(msg)
+        elif command in self.commands:
+            await self.cmd_dynamic(msg, command, args)
+        else:
+            msg.reply(f"Unknown command: /{command}\nType /help for available commands").send()
+
+    async def cmd_help(self, msg):
+        """Show help message with all available commands"""
+        help_lines = ["Available commands:"]
+
+        # Dynamic commands
+        for cmd_name, cmd_config in self.commands.items():
+            desc = cmd_config.get('description', 'No description')
+            help_lines.append(f"/{cmd_name} <query> - {desc}")
+            if cmd_name == "research":  # Show model selection for research
+                help_lines.append("  Model selection:")
+                help_lines.append("    opus: or o: - Use Claude Opus 4.5 (most intelligent)")
+                help_lines.append("    sonnet: or s: - Use Claude Sonnet 4.5 (default, faster)")
+                if self.gemini_client:
+                    help_lines.append("    gemini: or g: - Use Gemini 3 Flash (fast, cost-effective)")
+                help_lines.append("  Examples:")
+                help_lines.append("    /research how does XMPP work?")
+                help_lines.append("    /research opus: analyze distributed systems complexity")
+
+        # Built-in commands
+        help_lines.append("/help - Show this help message")
+        help_lines.append("/ping - Check if bot is alive")
+        help_lines.append("/reload-commands - Reload commands from YAML file")
+
+        msg.reply("\n".join(help_lines)).send()
+
+    async def cmd_ping(self, msg):
+        """Ping command"""
+        msg.reply("๐Ÿค– Pong! Bot is alive.").send()
+
+    async def cmd_reload_commands(self, msg):
+        """Reload commands from YAML file"""
+        old_count = len(self.commands)
+        self.commands = self.load_commands()
+        new_count = len(self.commands)
+        msg.reply(f"โœ… Reloaded commands: {old_count} โ†’ {new_count}").send()
+        log.info("Commands reloaded via /reload-commands")
+
+    async def cmd_dynamic(self, msg, command: str, args: str):
+        """Execute a dynamic command from YAML configuration"""
+        if not args:
+            usage = self.commands[command].get('usage', f'Usage: /{command} <query>')
+            msg.reply(usage).send()
+            return
+
+        cmd_config = self.commands[command]
+        query = args.strip()
+
+        # Parse model selection (format: "model: query")
+        model = cmd_config.get('default_model', 'sonnet')
+        model_name = self.get_model_display_name(model)
+
+        if query.startswith("opus:") or query.startswith("o:"):
+            model = "opus"
+            model_name = "Opus 4.5"
+            query = query.split(":", 1)[1].strip()
+        elif query.startswith("sonnet:") or query.startswith("s:"):
+            model = "sonnet"
             model_name = "Sonnet 4.5"
+            query = query.split(":", 1)[1].strip()
+        elif query.startswith("gemini:") or query.startswith("g:"):
+            if not self.gemini_client:
+                msg.reply("โŒ Gemini is not available (client not initialized)").send()
+                return
+            model = "gemini"
+            model_name = "Gemini 3 Flash"
+            query = query.split(":", 1)[1].strip()
 
-            if query.startswith("opus:") or query.startswith("o:"):
-                model = "opus"
-                model_name = "Opus 4.5"
-                query = query.split(":", 1)[1].strip()
-            elif query.startswith("sonnet:") or query.startswith("s:"):
-                model = "sonnet"
-                model_name = "Sonnet 4.5"
-                query = query.split(":", 1)[1].strip()
+        # Acknowledge we picked up the work
+        msg.reply(f"๐Ÿ” {cmd_config.get('description', 'Processing')} with {model_name}: {query}").send()
 
-            # Acknowledge receipt
-            msg.reply(f"๐Ÿ” Researching with {model_name}: {query}").send()
+        try:
+            # Perform research/processing
+            result = await self.process_query(
+                query=query,
+                model=model,
+                system_prompt=cmd_config.get('system_prompt', ''),
+                max_tokens=cmd_config.get('max_tokens', 2000)
+            )
 
-            try:
-                # Notify user that API call is in progress
-                msg.reply("โณ Calling Claude API, please wait...").send()
-
-                # Perform research
-                result = await self.research(query, model=model)
-
-                # Save to inbox
+            # Save to inbox if configured
+            if cmd_config.get('save_to_inbox', True):
                 await self.save_to_inbox(query, result, model=model_name)
+                msg.reply(f"โœ… Complete! Saved to inbox.org\n\nPreview:\n{result[:200]}...").send()
+            else:
+                msg.reply(f"โœ… Complete!\n\n{result}").send()
 
-                # Send confirmation
-                msg.reply(f"โœ… Research complete! Saved to inbox.org\n\nPreview:\n{result[:200]}...").send()
-                log.info(f"Research completed for: {query} (model: {model})")
+            log.info(f"/{command} completed for: {query} (model: {model})")
 
-            except Exception as e:
-                log.error(f"Research failed: {e}", exc_info=True)
-                msg.reply(f"โŒ Research failed: {str(e)}").send()
+        except Exception as e:
+            log.error(f"/{command} failed: {e}", exc_info=True)
+            msg.reply(f"โŒ Failed: {str(e)}").send()
 
-        elif body == "/help":
-            help_text = """Available commands:
-/research <question> - Perform research on a topic
-  Model selection:
-    opus: or o: - Use Claude Opus 4.5 (most intelligent)
-    sonnet: or s: - Use Claude Sonnet 4.5 (default, faster)
-  Examples:
-    /research how does XMPP work?
-    /research opus: analyze the complexity of distributed systems
-/help - Show this help message
-/ping - Check if bot is alive"""
-            msg.reply(help_text).send()
+    def get_model_display_name(self, model: str) -> str:
+        """Get human-readable model name"""
+        model_names = {
+            "sonnet": "Sonnet 4.5",
+            "opus": "Opus 4.5",
+            "gemini": "Gemini 3 Flash",
+        }
+        return model_names.get(model, model.title())
 
-        elif body == "/ping":
-            msg.reply("๐Ÿค– Pong! Bot is alive.").send()
-
-    async def research(self, query: str, model: str = "sonnet") -> str:
+    async def process_query(self, query: str, model: str, system_prompt: str, max_tokens: int) -> str:
         """
-        Perform research using Claude API with prompt caching.
+        Process query using specified model.
 
         Args:
-            query: Research question to answer
-            model: Model to use - "sonnet" (default) or "opus"
+            query: Question to answer
+            model: Model to use - "sonnet", "opus", or "gemini"
+            system_prompt: System prompt for the model
+            max_tokens: Maximum tokens to generate
 
-        Uses cached system prompt for efficiency.
+        Returns:
+            Generated response text
         """
-        log.info(f"Starting research for: {query} (model: {model})")
+        log.info(f"Processing query with {model}: {query}")
 
+        if model in ("sonnet", "opus"):
+            return await self.process_claude(query, model, system_prompt, max_tokens)
+        elif model == "gemini":
+            return await self.process_gemini(query, system_prompt, max_tokens)
+        else:
+            raise ValueError(f"Unknown model: {model}")
+
+    async def process_claude(self, query: str, model: str, system_prompt: str, max_tokens: int) -> str:
+        """Process query with Claude (Anthropic)"""
         # Map model names to Vertex AI model IDs
         model_ids = {
             "sonnet": "claude-sonnet-4-5@20250929",
             "opus": "claude-opus-4-5@20251101",
         }
 
-        # System prompt (will be cached)
-        system_prompt = [
+        # System prompt with caching
+        system = [
             {
                 "type": "text",
-                "text": """You are a research assistant helping with quick, accurate research queries.
-
-Your task is to provide concise, well-structured research summaries that can be saved as notes.
-
-Guidelines:
-- Provide factual, accurate information
-- Structure responses with clear headings
-- Include relevant sources or references when possible
-- Keep responses focused and actionable
-- Use markdown formatting
-- Aim for 200-500 words unless more detail is requested""",
+                "text": system_prompt,
                 "cache_control": {"type": "ephemeral"},
             }
         ]
 
         # Call Claude API via Vertex AI
-        # Note: Vertex AI uses different model ID format than direct API
-        response = self.client.messages.create(
-            model=model_ids.get(model, model_ids["sonnet"]),
-            max_tokens=2000,
-            system=system_prompt,
+        response = self.anthropic_client.messages.create(
+            model=model_ids[model],
+            max_tokens=max_tokens,
+            system=system,
             messages=[{"role": "user", "content": query}],
         )
 
@@ -173,7 +305,7 @@ Guidelines:
         # Log cache usage
         usage = response.usage
         log.info(
-            f"API usage - Input: {usage.input_tokens}, "
+            f"Claude usage - Input: {usage.input_tokens}, "
             f"Cache creation: {getattr(usage, 'cache_creation_input_tokens', 0)}, "
             f"Cache read: {getattr(usage, 'cache_read_input_tokens', 0)}, "
             f"Output: {usage.output_tokens}"
@@ -181,6 +313,30 @@ Guidelines:
 
         return result
 
+    async def process_gemini(self, query: str, system_prompt: str, max_tokens: int) -> str:
+        """Process query with Gemini"""
+        if not self.gemini_client:
+            raise RuntimeError("Gemini client not initialized")
+
+        # Combine system prompt with query
+        full_prompt = f"{system_prompt}\n\nUser query: {query}"
+
+        # Call Gemini API via Vertex AI
+        response = self.gemini_client.models.generate_content(
+            model="gemini-3-flash-preview",
+            contents=full_prompt,
+            config={
+                "max_output_tokens": max_tokens,
+                "temperature": 1.0,
+            }
+        )
+
+        result = response.text
+
+        log.info(f"Gemini response generated successfully")
+
+        return result
+
     async def save_to_inbox(self, query: str, result: str, model: str = "Sonnet 4.5"):
         """Save research result to inbox.org"""
         timestamp = datetime.now().strftime("%Y-%m-%d %a %H:%M")
@@ -218,6 +374,7 @@ async def main():
     project_id = os.getenv("VERTEX_PROJECT_ID")
     region = os.getenv("VERTEX_REGION", "us-east5")
     inbox_path = os.getenv("INBOX_PATH", "/home/vincent/desktop/org/inbox.org")
+    commands_path = os.getenv("COMMANDS_PATH")
 
     if not all([jid, password, owner_jid, project_id]):
         log.error("Missing required environment variables:")
@@ -225,7 +382,7 @@ async def main():
         sys.exit(1)
 
     # Create and start bot
-    bot = ResearchBot(jid, password, owner_jid, project_id, region, inbox_path)
+    bot = ResearchBot(jid, password, owner_jid, project_id, region, inbox_path, commands_path)
 
     log.info("Connecting to XMPP server...")
     bot.connect()
modules/xmpp-research-bot/commands.example.yaml
@@ -0,0 +1,113 @@
+# Example commands configuration for XMPP Research Bot
+#
+# Place your custom commands.yaml in a location and point to it with:
+#   services.xmpp-research-bot.commandsPath = "/path/to/commands.yaml";
+#
+# After editing, reload with: /reload-commands
+
+commands:
+  research:
+    description: "Research assistant for quick, accurate queries"
+    system_prompt: |
+      You are a research assistant helping with quick, accurate research queries.
+
+      Your task is to provide concise, well-structured research summaries that can be saved as notes.
+
+      Guidelines:
+      - Provide factual, accurate information
+      - Structure responses with clear headings
+      - Include relevant sources or references when possible
+      - Keep responses focused and actionable
+      - Use markdown formatting
+      - Aim for 200-500 words unless more detail is requested
+    default_model: sonnet
+    max_tokens: 2000
+    save_to_inbox: true
+
+  summarize:
+    description: "Summarize long text or articles"
+    system_prompt: |
+      You are an expert at concise summarization.
+
+      Your task is to create clear, accurate summaries that capture the key points.
+
+      Guidelines:
+      - Extract main ideas and key takeaways
+      - Use bullet points for clarity
+      - Maintain accuracy - don't add interpretations
+      - Keep summaries to 150-300 words
+      - Use markdown formatting
+    default_model: sonnet
+    max_tokens: 1000
+    save_to_inbox: true
+
+  analyze:
+    description: "Deep analysis of complex topics"
+    system_prompt: |
+      You are an analytical thinker who breaks down complex topics systematically.
+
+      Your task is to provide in-depth analysis with multiple perspectives.
+
+      Guidelines:
+      - Break down complexity into understandable parts
+      - Consider multiple viewpoints
+      - Identify underlying assumptions
+      - Highlight trade-offs and implications
+      - Use structured markdown with headings
+      - Aim for comprehensive analysis (500-1000 words)
+    default_model: opus  # Use more powerful model for analysis
+    max_tokens: 3000
+    save_to_inbox: true
+
+  eli5:
+    description: "Explain like I'm 5 - simple explanations"
+    system_prompt: |
+      You explain complex topics in simple, easy-to-understand language.
+
+      Your task is to make complicated ideas accessible to anyone.
+
+      Guidelines:
+      - Use simple words and short sentences
+      - Use analogies and everyday examples
+      - Avoid jargon - explain technical terms if needed
+      - Make it engaging and friendly
+      - Keep it brief (200-400 words)
+      - Use markdown for readability
+    default_model: sonnet
+    max_tokens: 1500
+    save_to_inbox: false  # Don't save to inbox, just reply
+
+  translate:
+    description: "Translate text between languages"
+    system_prompt: |
+      You are a professional translator.
+
+      Your task is to provide accurate, natural-sounding translations.
+
+      Guidelines:
+      - Preserve meaning and tone
+      - Use natural phrasing in target language
+      - Note any cultural adaptations needed
+      - If source language isn't specified, detect it
+      - Format: "From [language] to [language]:\n\n[translation]"
+    default_model: gemini  # Fast and cost-effective for translation
+    max_tokens: 2000
+    save_to_inbox: false
+
+  brainstorm:
+    description: "Generate creative ideas and solutions"
+    system_prompt: |
+      You are a creative brainstorming partner.
+
+      Your task is to generate diverse, innovative ideas.
+
+      Guidelines:
+      - Think divergently - no idea is too wild initially
+      - Provide 5-10 distinct ideas
+      - Mix practical and creative approaches
+      - Explain the potential of each idea briefly
+      - Use bullet points for clarity
+      - Encourage exploration
+    default_model: opus  # More creative with opus
+    max_tokens: 2000
+    save_to_inbox: true
modules/xmpp-research-bot/default.nix
@@ -12,6 +12,8 @@ let
       slixmpp
       anthropic
       google-auth
+      pyyaml
+      google-generativeai
     ]
   );
 
@@ -22,6 +24,9 @@ let
     export VERTEX_PROJECT_ID="${cfg.vertexProjectId}"
     export VERTEX_REGION="${cfg.vertexRegion}"
     export INBOX_PATH="${cfg.inboxPath}"
+    ${lib.optionalString (cfg.commandsPath != null) ''
+    export COMMANDS_PATH="${cfg.commandsPath}"
+    ''}
 
     exec ${pythonEnv}/bin/python3 ${./bot.py}
   '';
@@ -75,6 +80,12 @@ in
       default = "users";
       description = "Group to run the bot as";
     };
+
+    commandsPath = lib.mkOption {
+      type = lib.types.nullOr lib.types.path;
+      default = null;
+      description = "Path to commands.yaml configuration file (optional)";
+    };
   };
 
   config = lib.mkIf cfg.enable {