Commit 81c502017c4f

Vincent Demeester <vincent@sbr.pm>
2026-02-05 23:12:34
feat(ai-storage): improve session auto-recovery
- Used github-copilot/gpt-5-mini for background summarization instead of Google (already authenticated via gh CLI). - Added lock file mechanism to prevent race conditions when multiple recovery processes run simultaneously. - Strip terminal control codes from AI output to correctly extract session titles for filenames. - Added suffix (-1, -2, etc.) for duplicate filenames. - Log summarizer output to .log files for debugging, cleaned up on success. - Added notification showing log location on manual recovery.
1 parent ae6079f
Changed files (2)
dots
pi
agent
extensions
dots/pi/agent/extensions/ai-storage/index.ts
@@ -22,7 +22,7 @@
 
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 import { writeFile, mkdir, appendFile, readFile, readdir, unlink } from "node:fs/promises";
-import { existsSync } from "node:fs";
+import { existsSync, openSync } from "node:fs";
 import { join, dirname } from "node:path";
 import { homedir, hostname } from "node:os";
 import { spawn } from "node:child_process";
@@ -533,10 +533,14 @@ export default function (pi: ExtensionAPI) {
 			for (const file of pendingFiles) {
 				const filepath = join(PENDING_DIR, file);
 
+				// Log file for debugging
+				const logFile = join(PENDING_DIR, `${file}.log`);
+				const logFd = openSync(logFile, "a");
+
 				// Spawn summarizer as detached background process
 				const child = spawn("bun", ["run", SUMMARIZER_SCRIPT, filepath], {
 					detached: true,
-					stdio: "ignore",
+					stdio: ["ignore", logFd, logFd],
 					env: {
 						...process.env,
 						AI_SESSIONS_DIR: SESSIONS_DIR,
@@ -931,9 +935,13 @@ After generating the summary, use the save_session_to_history tool to save it${c
 				for (const file of pendingFiles) {
 					const filepath = join(PENDING_DIR, file);
 
+					// Log file for debugging
+					const logFile = join(PENDING_DIR, `${file}.log`);
+					const logFd = openSync(logFile, "a");
+
 					const child = spawn("bun", ["run", SUMMARIZER_SCRIPT, filepath], {
 						detached: true,
-						stdio: "ignore",
+						stdio: ["ignore", logFd, logFd],
 						env: {
 							...process.env,
 							AI_SESSIONS_DIR: SESSIONS_DIR,
@@ -943,6 +951,7 @@ After generating the summary, use the save_session_to_history tool to save it${c
 				}
 
 				ctx.ui.notify(`Started recovery for ${pendingFiles.length} session(s)`, "success");
+				ctx.ui.notify(`Logs: ${PENDING_DIR}/*.log`, "info");
 			} catch (error) {
 				ctx.ui.notify(`Error: ${error}`, "error");
 			}
dots/pi/agent/extensions/ai-storage/summarizer.ts
@@ -14,6 +14,7 @@
  */
 
 import { readFile, writeFile, mkdir, unlink } from "node:fs/promises";
+import { existsSync } from "node:fs";
 import { join } from "node:path";
 import { homedir } from "node:os";
 import { execSync, spawnSync } from "node:child_process";
@@ -53,7 +54,8 @@ function summarizeWithPi(transcript: Transcript): string | null {
 		.map((m) => `${m.role.toUpperCase()}: ${m.content.slice(0, 2000)}`) // Truncate long messages
 		.join("\n\n---\n\n");
 
-	const prompt = `Summarize this AI coding session conversation into a structured markdown document.
+	const prompt = `Based on this AI coding session, generate a brief, descriptive title of 3-5 words.
+Then, create a structured markdown summary.
 
 Session metadata:
 - Date: ${transcript.savedAt.split("T")[0]}
@@ -65,9 +67,9 @@ Session metadata:
 Conversation:
 ${conversationText}
 
-Generate a summary in this exact format (output ONLY the markdown, no explanation):
+Output ONLY the markdown, with this exact format:
 
-# Session: <Brief descriptive title>
+# Session: <3-5 Word Title>
 
 **Date:** ${transcript.savedAt.split("T")[0]}
 **Time:** ${transcript.savedAt.split("T")[1]?.slice(0, 5) || "unknown"}
@@ -104,13 +106,13 @@ Result of the session.
 
 	try {
 		// Use pi in non-interactive mode with a lightweight model preference
-		// Try gemini-2.0-flash first (fast and cheap), fall back to default
+		// Try github-copilot first (already authenticated via gh CLI), fall back to default
 		const result = spawnSync(
 			"pi",
 			[
 				"-p", // Non-interactive mode
-				"--provider", "google", // Prefer Google for cost
-				"--model", "gemini-2.0-flash", // Fast and cheap
+				"--provider", "github-copilot",
+				"--model", "gpt-5-mini", // Fast and cheap
 				prompt,
 			],
 			{
@@ -125,12 +127,16 @@ Result of the session.
 			}
 		);
 
+		console.log("Ran pi with github-copilot/gpt-5-mini, status:", result.status);
+
 		if (result.status === 0 && result.stdout) {
 			return result.stdout.trim();
 		}
 
+		console.log("github-copilot/gpt-5-mini failed, trying default model...");
+		console.log("Stderr from gpt-5-mini attempt:", result.stderr);
+
 		// If specific model failed, try with default config
-		console.log("Gemini flash failed, trying default model...");
 		const fallbackResult = spawnSync("pi", ["-p", prompt], {
 			encoding: "utf-8",
 			timeout: 120000,
@@ -141,11 +147,14 @@ Result of the session.
 			},
 		});
 
+		console.log("Ran pi with default model, status:", fallbackResult.status);
+
 		if (fallbackResult.status === 0 && fallbackResult.stdout) {
 			return fallbackResult.stdout.trim();
 		}
 
 		console.error("Pi summarization failed:", result.stderr || fallbackResult.stderr);
+		console.error("Stderr from default model attempt:", fallbackResult.stderr);
 		return null;
 	} catch (error) {
 		console.error("Pi execution error:", error);
@@ -190,9 +199,39 @@ ${lastUserMessage && lastUserMessage !== firstUserMessage ? `## Last User Messag
 `;
 }
 
+// Strip terminal control codes (OSC sequences, ANSI escapes, etc.)
+function stripControlCodes(text: string): string {
+	return text
+		// Remove OSC sequences: ESC ] ... ST (where ST is ESC \ or BEL)
+		// Pattern: \x1b] followed by anything until \x1b\ or \x07
+		.replace(/\x1b\][^\x07\x1b]*(?:\x1b\\|\x07)/g, "")
+		// Also handle bare ] sequences without leading ESC (seen in output)
+		.replace(/\][0-9;:=\w]+(?:\\|\x1b\\)/g, "")
+		// Remove ANSI escape sequences (CSI)
+		.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "")
+		.trim();
+}
+
 // Main
 async function main() {
+	const lockFile = `${transcriptPath}.lock`;
+	
 	try {
+		// Check if already being processed (simple lock)
+		if (existsSync(lockFile)) {
+			console.log(`Already being processed (lock exists): ${transcriptPath}`);
+			process.exit(0);
+		}
+		
+		// Check if pending file still exists
+		if (!existsSync(transcriptPath)) {
+			console.log(`Pending file already processed: ${transcriptPath}`);
+			process.exit(0);
+		}
+		
+		// Create lock file
+		await writeFile(lockFile, String(process.pid), "utf-8");
+		
 		// Read transcript
 		const content = await readFile(transcriptPath, "utf-8");
 		const transcript: Transcript = JSON.parse(content);
@@ -210,6 +249,9 @@ async function main() {
 			usedFallback = true;
 		}
 
+		// Strip terminal control codes from output
+		summary = stripControlCodes(summary);
+
 		// Extract title from summary for filename
 		const titleMatch = summary.match(/^# Session: (.+)$/m);
 		const title = titleMatch?.[1] || "auto-recovered-session";
@@ -223,15 +265,26 @@ async function main() {
 		const date = transcript.savedAt.split("T")[0];
 		const yearMonth = date.slice(0, 7);
 		const sessionDir = join(SESSIONS_DIR, yearMonth);
-		const filename = `${date}-${slug}.md`;
-		const filepath = join(sessionDir, filename);
+		
+		// Find unique filename (add suffix if needed)
+		await mkdir(sessionDir, { recursive: true });
+		let filename = `${date}-${slug}.md`;
+		let filepath = join(sessionDir, filename);
+		let suffix = 1;
+		
+		while (existsSync(filepath)) {
+			filename = `${date}-${slug}-${suffix}.md`;
+			filepath = join(sessionDir, filename);
+			suffix++;
+		}
 
 		// Save summary
-		await mkdir(sessionDir, { recursive: true });
 		await writeFile(filepath, summary, "utf-8");
 
-		// Delete pending file
+		// Delete pending file, lock, and log file
 		await unlink(transcriptPath);
+		await unlink(lockFile).catch(() => {});
+		await unlink(`${transcriptPath}.log`).catch(() => {});
 
 		// Notify user
 		const notifyMsg = usedFallback ? `Recovered (needs review): ${filename}` : `Recovered: ${filename}`;
@@ -239,6 +292,8 @@ async function main() {
 
 		console.log(`✓ Recovered session: ${filepath}`);
 	} catch (error) {
+		// Clean up lock file on error
+		await unlink(lockFile).catch(() => {});
 		console.error("Failed to recover session:", error);
 		notify("Session Recovery Failed", String(error));
 		process.exit(1);