Commit 64456ea63b40

Vincent Demeester <vincent@sbr.pm>
2026-02-13 23:51:42
feat(pi): added multi-provider support to subagent extension
Added intelligent model resolution across providers with fuzzy matching: - Model cache loaded from pi --list-models on startup - Configurable provider preference via settings or flag - Automatic version suffix matching (claude-haiku-4-5 -> @20251001) - New /subagent-config command to show configuration - Clean agent definitions without version suffixes Changes: - Enhanced findModelAcrossProviders with model cache - Added subagentProviderPreference to settings - Updated ensure-settings.sh to merge provider preference - Added comprehensive README documentation Resolves subagent API key errors by routing to configured providers.
1 parent ec2fb13
Changed files (4)
dots/pi/agent/extensions/subagent/index.ts
@@ -28,6 +28,57 @@ const MAX_PARALLEL_TASKS = 8;
 const MAX_CONCURRENCY = 4;
 const COLLAPSED_ITEM_COUNT = 10;
 
+// Default provider preference order for model resolution
+// Can be overridden via settings.json "subagentProviderPreference" or --subagent-providers flag
+// Empty array means: try all providers, preferring those with API keys
+const DEFAULT_PROVIDER_PREFERENCE: string[] = [];
+
+// Cache of available models, loaded once on extension startup
+interface ModelCacheEntry {
+	provider: string;
+	modelId: string;
+}
+let modelCache: ModelCacheEntry[] | null = null;
+
+/**
+ * Load available models from pi --list-models into cache
+ * Called once on extension startup
+ */
+async function loadModelCache(pi: any): Promise<void> {
+	if (modelCache !== null) return; // Already loaded
+
+	try {
+		const result = await pi.exec("pi", ["--list-models"], { timeout: 10000 });
+		if (result.code === 0 && result.stdout) {
+			const entries: ModelCacheEntry[] = [];
+			const lines = result.stdout.split("\n");
+			
+			// Skip header line, parse each model entry
+			for (let i = 1; i < lines.length; i++) {
+				const line = lines[i].trim();
+				if (!line) continue;
+				
+				// Format: "provider  model-id  context  max-out  thinking  images"
+				const parts = line.split(/\s+/);
+				if (parts.length >= 2) {
+					entries.push({
+						provider: parts[0],
+						modelId: parts[1],
+					});
+				}
+			}
+			
+			modelCache = entries;
+		} else {
+			// Fallback to empty cache if pi --list-models fails
+			modelCache = [];
+		}
+	} catch (error) {
+		// If exec fails, use empty cache (will fall back to exact matching)
+		modelCache = [];
+	}
+}
+
 function formatTokens(count: number): string {
 	if (count < 1000) return count.toString();
 	if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
@@ -207,6 +258,121 @@ async function mapWithConcurrencyLimit<TIn, TOut>(
 	return results;
 }
 
+/**
+ * Find a model by ID across all providers, preferring those with API keys
+ * Supports fuzzy matching: "claude-haiku-4-5" will match "claude-haiku-4-5@20251001"
+ * 
+ * @param modelId - The model ID to search for (e.g., "claude-sonnet-4-5", "claude-haiku-4-5@20251001")
+ * @param modelRegistry - The model registry from context
+ * @param preferredProviders - Ordered list of preferred providers (empty = all providers)
+ * @returns { provider, model } or null if not found
+ */
+async function findModelAcrossProviders(
+	modelId: string,
+	modelRegistry: any,
+	preferredProviders: string[],
+): Promise<{ provider: string; modelId: string } | null> {
+	// Known providers - expand this list as needed
+	const knownProviders = [
+		"anthropic",
+		"openai",
+		"google",
+		"google-vertex",
+		"google-vertex-claude",
+		"vertex",
+		"llama-cpp",
+		"openrouter",
+		"groq",
+		"xai",
+		"deepseek",
+		"copilot",
+		"codex",
+	];
+	
+	// If no preference list, use all known providers
+	const providersToTry = preferredProviders.length > 0 ? preferredProviders : knownProviders;
+
+	/**
+	 * Try to find a model on a provider, with fuzzy matching support
+	 * Returns the actual model ID if found (which may include version suffix)
+	 */
+	const tryFindModel = async (providerName: string, requestedId: string): Promise<string | null> => {
+		// Try exact match first
+		let model = modelRegistry.find(providerName, requestedId);
+		if (model) return requestedId;
+		
+		// Use model cache for fuzzy matching
+		if (modelCache && modelCache.length > 0) {
+			// Find models on this provider that match the requested ID
+			for (const entry of modelCache) {
+				if (entry.provider === providerName) {
+					// Match if exact or if model ID starts with requested ID followed by @
+					if (entry.modelId === requestedId || entry.modelId.startsWith(requestedId + "@")) {
+						// Verify it actually exists (double-check with registry)
+						const verifyModel = modelRegistry.find(providerName, entry.modelId);
+						if (verifyModel) {
+							return entry.modelId;
+						}
+					}
+				}
+			}
+		}
+		
+		return null;
+	};
+
+	// First pass: try preferred providers with API keys (exact or fuzzy match)
+	for (const providerName of providersToTry) {
+		const foundModelId = await tryFindModel(providerName, modelId);
+		if (foundModelId) {
+			const model = modelRegistry.find(providerName, foundModelId);
+			if (model) {
+				const apiKey = await modelRegistry.getApiKey(model);
+				if (apiKey) {
+					return { provider: providerName, modelId: foundModelId };
+				}
+			}
+		}
+	}
+
+	// Second pass: try other known providers with API keys (not in preference list)
+	if (preferredProviders.length > 0) {
+		for (const providerName of knownProviders) {
+			if (preferredProviders.includes(providerName)) continue;
+			const foundModelId = await tryFindModel(providerName, modelId);
+			if (foundModelId) {
+				const model = modelRegistry.find(providerName, foundModelId);
+				if (model) {
+					const apiKey = await modelRegistry.getApiKey(model);
+					if (apiKey) {
+						return { provider: providerName, modelId: foundModelId };
+					}
+				}
+			}
+		}
+	}
+
+	// Third pass: try preferred providers without API keys (will fail but with clearer error)
+	for (const providerName of providersToTry) {
+		const foundModelId = await tryFindModel(providerName, modelId);
+		if (foundModelId) {
+			return { provider: providerName, modelId: foundModelId };
+		}
+	}
+
+	// Last resort: try any known provider (if we had a preference list)
+	if (preferredProviders.length > 0) {
+		for (const providerName of knownProviders) {
+			const foundModelId = await tryFindModel(providerName, modelId);
+			if (foundModelId) {
+				return { provider: providerName, modelId: foundModelId };
+			}
+		}
+	}
+
+	return null;
+}
+
 function writePromptToTempFile(agentName: string, prompt: string): { dir: string; filePath: string } {
 	const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-subagent-"));
 	const safeName = agentName.replace(/[^\w.-]+/g, "_");
@@ -227,6 +393,8 @@ async function runSingleAgent(
 	signal: AbortSignal | undefined,
 	onUpdate: OnUpdateCallback | undefined,
 	makeDetails: (results: SingleResult[]) => SubagentDetails,
+	modelRegistry: any,
+	providerPreference: string[],
 ): Promise<SingleResult> {
 	const agent = agents.find((a) => a.name === agentName);
 
@@ -244,7 +412,19 @@ async function runSingleAgent(
 	}
 
 	const args: string[] = ["--mode", "json", "-p", "--no-session"];
-	if (agent.model) args.push("--model", agent.model);
+	
+	// Resolve model across providers
+	if (agent.model) {
+		const resolved = await findModelAcrossProviders(agent.model, modelRegistry, providerPreference);
+		if (resolved) {
+			args.push("--provider", resolved.provider);
+			args.push("--model", resolved.modelId);
+		} else {
+			// Fallback to just passing model ID (will likely fail but with pi's error message)
+			args.push("--model", agent.model);
+		}
+	}
+	
 	if (agent.tools && agent.tools.length > 0) args.push("--tools", agent.tools.join(","));
 
 	let tmpPromptDir: string | null = null;
@@ -409,6 +589,11 @@ const SubagentParams = Type.Object({
 });
 
 export default function (pi: ExtensionAPI) {
+	// Load model cache on session start
+	pi.on("session_start", async (_event, _ctx) => {
+		await loadModelCache(pi);
+	});
+
 	pi.registerTool({
 		name: "subagent",
 		label: "Subagent",
@@ -431,6 +616,29 @@ export default function (pi: ExtensionAPI) {
 			const discovery = discoverAgents(ctx.cwd, agentScope);
 			const agents = discovery.agents;
 			const confirmProjectAgents = params.confirmProjectAgents ?? true;
+			
+			// Get provider preference from (in order):
+			// 1. --subagent-providers flag
+			// 2. settings.json "subagentProviderPreference"
+			// 3. DEFAULT_PROVIDER_PREFERENCE (empty = all providers)
+			let providerPreference = DEFAULT_PROVIDER_PREFERENCE;
+			const flagValue = pi.getFlag("--subagent-providers") as string;
+			if (flagValue) {
+				providerPreference = flagValue.split(",").map(p => p.trim()).filter(Boolean);
+			} else {
+				// Try to load from settings.json
+				try {
+					const settingsPath = path.join(os.homedir(), ".pi", "agent", "settings.json");
+					if (fs.existsSync(settingsPath)) {
+						const settings = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
+						if (Array.isArray(settings.subagentProviderPreference)) {
+							providerPreference = settings.subagentProviderPreference;
+						}
+					}
+				} catch {
+					// Fall back to default
+				}
+			}
 
 			const hasChain = (params.chain?.length ?? 0) > 0;
 			const hasTasks = (params.tasks?.length ?? 0) > 0;
@@ -517,6 +725,8 @@ export default function (pi: ExtensionAPI) {
 						signal,
 						chainUpdate,
 						makeDetails("chain"),
+						ctx.modelRegistry,
+						providerPreference,
 					);
 					results.push(result);
 
@@ -597,6 +807,8 @@ export default function (pi: ExtensionAPI) {
 							}
 						},
 						makeDetails("parallel"),
+						ctx.modelRegistry,
+						providerPreference,
 					);
 					allResults[index] = result;
 					emitParallelUpdate();
@@ -631,6 +843,8 @@ export default function (pi: ExtensionAPI) {
 					signal,
 					onUpdate,
 					makeDetails("single"),
+					ctx.modelRegistry,
+					providerPreference,
 				);
 				const isError = result.exitCode !== 0 || result.stopReason === "error" || result.stopReason === "aborted";
 				if (isError) {
@@ -970,4 +1184,99 @@ export default function (pi: ExtensionAPI) {
 			return new Text(text?.type === "text" ? text.text : "(no output)", 0, 0);
 		},
 	});
+
+	// Register flag for provider preference
+	pi.registerFlag("subagent-providers", {
+		description: "Comma-separated list of preferred providers for subagent model resolution (e.g., 'google-vertex-claude,google,llama-cpp')",
+		type: "string",
+	});
+
+	// Helper command to show subagent configuration
+	pi.registerCommand("subagent-config", {
+		description: "Show subagent provider preference configuration",
+		handler: async (_args, ctx) => {
+			let providerPreference = DEFAULT_PROVIDER_PREFERENCE;
+			let source = "default";
+			const flagValue = pi.getFlag("--subagent-providers") as string;
+			
+			if (flagValue) {
+				providerPreference = flagValue.split(",").map(p => p.trim()).filter(Boolean);
+				source = "flag";
+			} else {
+				try {
+					const settingsPath = path.join(os.homedir(), ".pi", "agent", "settings.json");
+					if (fs.existsSync(settingsPath)) {
+						const settings = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
+						if (Array.isArray(settings.subagentProviderPreference)) {
+							providerPreference = settings.subagentProviderPreference;
+							source = "settings.json";
+						}
+					}
+				} catch {
+					// Ignore
+				}
+			}
+
+			// Build a formatted message
+			const lines: string[] = [];
+			
+			lines.push("Subagent Provider Configuration");
+			lines.push("═".repeat(50));
+			lines.push("");
+			
+			// Model cache status
+			lines.push("Model Cache:");
+			if (modelCache === null) {
+				lines.push("  Status: Not loaded (will load on first use)");
+			} else if (modelCache.length === 0) {
+				lines.push("  Status: Empty (pi --list-models failed)");
+			} else {
+				const providers = new Set(modelCache.map(m => m.provider));
+				lines.push(`  Status: Loaded (${modelCache.length} models across ${providers.size} providers)`);
+			}
+			lines.push("");
+			
+			// Current setting
+			lines.push("Provider Preference:");
+			if (providerPreference.length > 0) {
+				lines.push(`  Source: ${source}`);
+				lines.push(`  Order: ${providerPreference.join(" → ")}`);
+			} else {
+				lines.push("  No preference (tries all providers with API keys)");
+			}
+			lines.push("");
+			
+			// Configuration methods
+			lines.push("Configuration:");
+			lines.push("  1. Flag:     pi --subagent-providers google-vertex-claude,google");
+			lines.push("  2. Settings: Add to ~/.pi/agent/settings.json:");
+			lines.push('               "subagentProviderPreference": ["google-vertex-claude", ...]');
+			lines.push("");
+			
+			// How it works
+			lines.push("Resolution Order:");
+			lines.push("  1. Try preferred providers WITH API keys");
+			lines.push("  2. Try other providers WITH API keys");
+			lines.push("  3. Fallback to any provider (may fail)");
+			lines.push("");
+			
+			// Example
+			lines.push("Example:");
+			lines.push("  Agent defines:    model: claude-haiku-4-5");
+			if (providerPreference.length > 0) {
+				lines.push(`  Resolves to:      ${providerPreference[0]}/claude-haiku-4-5`);
+			} else {
+				lines.push("  Resolves to:      (first provider with API key)");
+			}
+			
+			const output = lines.join("\n");
+			
+			// Use sendMessage to inject as a system message (visible in conversation)
+			pi.sendMessage({
+				customType: "subagent-config",
+				content: output,
+				display: true,
+			});
+		},
+	});
 }
dots/pi/agent/extensions/subagent/README.md
@@ -0,0 +1,269 @@
+# Subagent Extension - Multi-Model Provider Support
+
+Delegate tasks to specialized subagents with isolated context windows and intelligent multi-provider model resolution.
+
+## Features
+
+- **Isolated context**: Each subagent runs in a separate `pi` process
+- **Multi-model support**: Each agent can use a different model
+- **Intelligent provider resolution**: Automatically finds models across providers
+- **Provider preference**: Configure which providers to prefer for model lookup
+- **Streaming output**: See tool calls and progress as they happen
+- **Parallel execution**: Run multiple agents concurrently
+- **Chain workflows**: Sequential execution with output passing
+
+## Model Resolution
+
+When an agent specifies a model ID (e.g., `claude-sonnet-4-5`), the extension:
+
+1. First tries **preferred providers with API keys** (in order)
+2. Then tries **other providers with API keys**
+3. Falls back to any provider (will fail if no API key configured)
+
+This allows you to:
+- Use `google-vertex-claude` for Claude models (if configured)
+- Fall back to `anthropic` if vertex isn't available
+- Use local models via `llama-cpp`
+- Configure provider preferences globally or per-session
+
+## Configuration
+
+### Provider Preference
+
+Configure provider preference in **three ways** (priority order):
+
+#### 1. Command-line flag (highest priority)
+
+```bash
+pi --subagent-providers google-vertex-claude,google,llama-cpp
+```
+
+#### 2. Settings file (`~/.pi/agent/settings.json`)
+
+```json
+{
+  "subagentProviderPreference": [
+    "google-vertex-claude",
+    "google", 
+    "llama-cpp",
+    "anthropic",
+    "openai"
+  ]
+}
+```
+
+#### 3. Default behavior (lowest priority)
+
+If not configured, the extension tries **all providers**, preferring those with API keys.
+
+### View Current Configuration
+
+```bash
+pi
+> /subagent-config
+```
+
+## Agent Definitions
+
+Create agents in `~/.pi/agent/agents/*.md`:
+
+```markdown
+---
+name: scout
+description: Fast reconnaissance
+tools: read, grep, find, ls
+model: claude-haiku-4-5
+---
+
+You are a scout. Quickly investigate a codebase...
+```
+
+The `model:` field is just the model ID - the provider is resolved automatically.
+
+### Available Models by Provider
+
+**Anthropic Claude via Google Vertex:**
+- `claude-opus-4-6`
+- `claude-sonnet-4-5@20250929`
+- `claude-haiku-4-5@20251001`
+
+**Google Gemini:**
+- `gemini-2.5-flash`
+- `gemini-2.5-pro`
+
+**Local (llama-cpp):**
+- `Qwen/Qwen3-8B-GGUF:Q4_K_M`
+- `bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M`
+
+## Usage Examples
+
+### Simple Usage
+
+```
+Use scout to find all Tekton tasks
+```
+
+### Chain Workflow
+
+```
+Chain: scout finds auth code, planner creates refactor plan, worker implements it
+```
+
+This might use:
+- scout → `claude-haiku-4-5` via `google-vertex-claude`
+- planner → `claude-sonnet-4-5@20250929` via `google-vertex-claude`
+- worker → `claude-sonnet-4-5@20250929` via `google-vertex-claude`
+
+### Slash Commands
+
+The `subagent-commands.ts` extension provides shortcuts:
+
+```
+/scout find all authentication code
+/implement add caching to session store
+/scout-and-plan refactor database layer
+/review-code check security in API handlers
+```
+
+## Example Agents
+
+### Scout (Fast, Cheap)
+
+```markdown
+---
+name: scout
+description: Fast reconnaissance
+tools: read, grep, find, ls, bash
+model: claude-haiku-4-5
+---
+
+Quickly investigate and return compressed findings.
+```
+
+### Planner (Detailed Analysis)
+
+```markdown
+---
+name: planner
+description: Creates implementation plans
+tools: read, grep, find, ls
+model: claude-sonnet-4-5@20250929
+---
+
+Receive context and create detailed implementation plans.
+```
+
+### Local Researcher
+
+```markdown
+---
+name: local-researcher
+description: Research using local model
+tools: web_search, github_search, stack_overflow_search
+model: Qwen/Qwen3-8B-GGUF:Q4_K_M
+---
+
+Research using a local model to save API costs.
+```
+
+## Cost Optimization
+
+Configure provider preference to optimize costs:
+
+```json
+{
+  "subagentProviderPreference": [
+    "llama-cpp",              // Free (local)
+    "google-vertex-claude",   // Discounted Claude via Vertex
+    "google",                 // Gemini
+    "anthropic"               // Full-price Claude (fallback)
+  ]
+}
+```
+
+Now agents requesting `claude-haiku-4-5` will:
+1. Try `llama-cpp` first (no claude there, skip)
+2. Try `google-vertex-claude` (found! use it)
+3. Skip remaining providers
+
+## Troubleshooting
+
+### "No API key configured for provider"
+
+**Problem:** Agent fails because the subagent process doesn't have an API key.
+
+**Solution:** Add provider preference to route to a configured provider:
+
+```json
+{
+  "subagentProviderPreference": ["google-vertex-claude"]
+}
+```
+
+### Model not found
+
+**Problem:** Agent specifies a model that doesn't exist on any provider.
+
+**Solution:** Check available models:
+
+```bash
+pi --list-models | grep claude-haiku
+```
+
+Update agent definition with correct model ID.
+
+### Wrong provider being used
+
+**Problem:** Agent uses an unexpected provider.
+
+**Solution:** 
+1. Check current config: `/subagent-config`
+2. Add explicit preference in settings
+3. Or use flag: `pi --subagent-providers google-vertex-claude`
+
+## API Reference
+
+### Tool Parameters
+
+```typescript
+{
+  agent: "scout",                    // Single mode
+  task: "find auth code",
+  
+  // OR parallel mode:
+  tasks: [
+    { agent: "scout", task: "..." },
+    { agent: "planner", task: "..." }
+  ],
+  
+  // OR chain mode:
+  chain: [
+    { agent: "scout", task: "find auth" },
+    { agent: "planner", task: "plan using {previous}" }
+  ],
+  
+  agentScope: "user" | "project" | "both",  // Default: "user"
+  confirmProjectAgents: true,               // Default: true
+  cwd: "/path/to/working/dir"              // Optional
+}
+```
+
+### Settings Schema
+
+```json
+{
+  "subagentProviderPreference": ["provider1", "provider2", "..."]
+}
+```
+
+### Command-line Flag
+
+```bash
+--subagent-providers <comma-separated-list>
+```
+
+## Security
+
+**Project-local agents** (`.pi/agents/*.md`) can execute arbitrary code. Only use `agentScope: "both"` or `agentScope: "project"` for repositories you trust.
+
+By default, only user-level agents (`~/.pi/agent/agents/`) are loaded.
dots/pi/agent/ensure-settings.sh
@@ -11,7 +11,12 @@ TEMPLATE_SETTINGS="$(dirname "$0")/settings.json"
 REQUIRED_SETTINGS='{
   "hideThinkingBlock": true,
   "quietStartup": true,
-  "skills": ["~/.config/claude/skills"]
+  "skills": ["~/.config/claude/skills"],
+  "subagentProviderPreference": [
+    "google-vertex-claude",
+    "google",
+    "llama-cpp"
+  ]
 }'
 
 # Create runtime settings directory if it doesn't exist
@@ -19,29 +24,31 @@ mkdir -p "$(dirname "$RUNTIME_SETTINGS")"
 
 # If runtime settings doesn't exist, copy from template
 if [ ! -f "$RUNTIME_SETTINGS" ]; then
-  echo "📝 Creating $RUNTIME_SETTINGS from template..."
-  cp "$TEMPLATE_SETTINGS" "$RUNTIME_SETTINGS"
-  exit 0
+	echo "📝 Creating $RUNTIME_SETTINGS from template..."
+	cp "$TEMPLATE_SETTINGS" "$RUNTIME_SETTINGS"
+	exit 0
 fi
 
 # Use jq to merge required settings into existing settings
 # This preserves user settings while ensuring required ones are present
 if command -v jq >/dev/null 2>&1; then
-  echo "🔧 Ensuring required pi agent settings..."
-  
-  TEMP_FILE=$(mktemp)
-  jq -s '.[0] * .[1]' "$RUNTIME_SETTINGS" <(echo "$REQUIRED_SETTINGS") > "$TEMP_FILE"
-  mv "$TEMP_FILE" "$RUNTIME_SETTINGS"
-  
-  echo "✅ Pi agent settings updated:"
-  echo "   - hideThinkingBlock: true"
-  echo "   - quietStartup: true"
-  echo "   - skills: ~/.config/claude/skills"
+	echo "🔧 Ensuring required pi agent settings..."
+
+	TEMP_FILE=$(mktemp)
+	jq -s '.[0] * .[1]' "$RUNTIME_SETTINGS" <(echo "$REQUIRED_SETTINGS") >"$TEMP_FILE"
+	mv "$TEMP_FILE" "$RUNTIME_SETTINGS"
+
+	echo "✅ Pi agent settings updated:"
+	echo "   - hideThinkingBlock: true"
+	echo "   - quietStartup: true"
+	echo "   - skills: ~/.config/claude/skills"
+	echo "   - subagentProviderPreference: google-vertex-claude, vertex, google, llama-cpp, ..."
 else
-  echo "⚠️  jq not found - cannot merge settings automatically"
-  echo "   Please ensure these settings are in $RUNTIME_SETTINGS:"
-  echo "   - hideThinkingBlock: true"
-  echo "   - quietStartup: true"
-  echo "   - skills: [\"~/.config/claude/skills\"]"
-  exit 1
+	echo "⚠️  jq not found - cannot merge settings automatically"
+	echo "   Please ensure these settings are in $RUNTIME_SETTINGS:"
+	echo "   - hideThinkingBlock: true"
+	echo "   - quietStartup: true"
+	echo "   - skills: [\"~/.config/claude/skills\"]"
+	echo "   - subagentProviderPreference: [\"google-vertex-claude\", \"vertex\", ...]"
+	exit 1
 fi
dots/pi/agent/settings.json
@@ -14,5 +14,13 @@
   "hideThinkingBlock": true,
   "skills": [
     "~/.config/claude/skills"
+  ],
+  "subagentProviderPreference": [
+    "google-vertex-claude",
+    "vertex",
+    "google",
+    "llama-cpp",
+    "anthropic",
+    "openai"
   ]
 }