summarizer.ts

  1#!/usr/bin/env bun
  2/**
  3 * Background Session Summarizer
  4 *
  5 * Reads a pending transcript, summarizes it using pi's configured model,
  6 * saves to sessions directory, and cleans up.
  7 *
  8 * Usage: bun run summarizer.ts <path-to-pending-transcript.json>
  9 *
 10 * Environment:
 11 * - AI_SESSIONS_DIR: Where to save session summaries (optional, defaults to ~/.local/share/ai/sessions)
 12 *
 13 * Uses pi's own configuration for model/provider selection via `pi -p` (non-interactive mode).
 14 */
 15
 16import { readFile, writeFile, mkdir, unlink, stat } from "node:fs/promises";
 17import { existsSync } from "node:fs";
 18import { join } from "node:path";
 19import { homedir } from "node:os";
 20import { execSync, spawnSync } from "node:child_process";
 21
 22interface Transcript {
 23	savedAt: string;
 24	cwd: string;
 25	host: string;
 26	tool: string;
 27	messageCount: number;
 28	messages: Array<{ role: string; content: string }>;
 29}
 30
 31// Paths
 32const SESSIONS_DIR = process.env.AI_SESSIONS_DIR || join(homedir(), ".local", "share", "ai", "sessions");
 33
 34// Get the pending transcript path from command line
 35const transcriptPath = process.argv[2];
 36if (!transcriptPath) {
 37	console.error("Usage: bun run summarizer.ts <path-to-pending-transcript.json>");
 38	process.exit(1);
 39}
 40
 41// Desktop notification helper
 42function notify(title: string, body: string) {
 43	try {
 44		execSync(`notify-send -u low -t 5000 -a "ai-storage" "${title}" "${body}"`, { stdio: "ignore" });
 45	} catch {
 46		// Ignore notification failures
 47	}
 48}
 49
 50// Summarize using pi's configured model (non-interactive mode)
 51function summarizeWithPi(transcript: Transcript): string | null {
 52	const conversationText = transcript.messages
 53		.slice(0, 50) // Limit to first 50 messages to avoid token limits
 54		.filter((m) => m.content) // Skip messages with undefined content
 55		.map((m) => `${m.role.toUpperCase()}: ${m.content.slice(0, 2000)}`) // Truncate long messages
 56		.join("\n\n---\n\n");
 57
 58	// Try to detect git remote from the working directory
 59	let gitRemote = "";
 60	try {
 61		gitRemote = execSync(`git -C "${transcript.cwd}" remote get-url origin 2>/dev/null`, {
 62			encoding: "utf-8",
 63			timeout: 3000,
 64		}).trim();
 65	} catch {
 66		// Not a git repo or no remote
 67	}
 68
 69	const prompt = `Based on this AI coding session, generate a brief, descriptive title of 3-5 words.
 70Then, create a structured markdown summary.
 71
 72Session metadata:
 73- Date: ${transcript.savedAt.split("T")[0]}
 74- Time: ${transcript.savedAt.split("T")[1]?.slice(0, 5) || "unknown"}
 75- Host: ${transcript.host}
 76- Tool: ${transcript.tool}
 77- Working directory: ${transcript.cwd}
 78${gitRemote ? `- Git remote: ${gitRemote}` : ""}
 79
 80Conversation:
 81${conversationText}
 82
 83IMPORTANT: The metadata header MUST include **Project:** — this is REQUIRED.
 84Infer the project name as org/repo (e.g. "tektoncd/pipeline", "vdemeester/chisel") from the git remote, working directory, or conversation context.
 85If working in a git worktree, use the real project name, NOT the worktree path.
 86
 87Output ONLY the markdown, with this exact format:
 88
 89# Session: <3-5 Word Title>
 90
 91**Date:** ${transcript.savedAt.split("T")[0]}
 92**Time:** ${transcript.savedAt.split("T")[1]?.slice(0, 5) || "unknown"}
 93**Host:** ${transcript.host}
 94**Tool:** ${transcript.tool}
 95**Project:** <REQUIRED: org/repo or ~/path>
 96**Directory:** ${transcript.cwd}
 97
 98## Summary
 99Brief description of what was accomplished.
100
101## What Was Accomplished
102- Task 1
103- Task 2
104
105## Files Changed
106- \`path/to/file\` - Description of change
107
108## Commands Run
109\`\`\`bash
110# Key commands executed
111\`\`\`
112
113## Outcome
114Result of the session.
115
116## Next Steps
117- [ ] TODO 1
118- [ ] TODO 2
119
120### Tags
121#${transcript.tool} #auto-recovered #relevant-tags
122
123---
124*This session was auto-recovered from an unsaved transcript.*`;
125
126	try {
127		// Use pi in non-interactive mode with a lightweight model preference
128		// Try github-copilot first (already authenticated via gh CLI), fall back to default
129		const result = spawnSync(
130			"pi",
131			[
132				"-p", // Non-interactive mode
133				"--no-extensions", // CRITICAL: prevent recursive summarization
134				"--provider", "github-copilot",
135				"--model", "gpt-5-mini", // Fast and cheap
136				prompt,
137			],
138			{
139				encoding: "utf-8",
140				timeout: 120000, // 2 minute timeout
141				maxBuffer: 10 * 1024 * 1024, // 10MB buffer
142				env: {
143					...process.env,
144				},
145
146			}
147		);
148
149		console.log("Ran pi with github-copilot/gpt-5-mini, status:", result.status);
150
151		if (result.status === 0 && result.stdout) {
152			return result.stdout.trim();
153		}
154
155		console.log("github-copilot/gpt-5-mini failed, trying default model...");
156		console.log("Stderr from gpt-5-mini attempt:", result.stderr);
157
158		// If specific model failed, try with default config
159		const fallbackResult = spawnSync("pi", ["-p", "--no-extensions", prompt], {
160			encoding: "utf-8",
161			timeout: 120000,
162			maxBuffer: 10 * 1024 * 1024,
163		});
164
165		console.log("Ran pi with default model, status:", fallbackResult.status);
166
167		if (fallbackResult.status === 0 && fallbackResult.stdout) {
168			return fallbackResult.stdout.trim();
169		}
170
171		console.error("Pi summarization failed:", result.stderr || fallbackResult.stderr);
172		console.error("Stderr from default model attempt:", fallbackResult.stderr);
173		return null;
174	} catch (error) {
175		console.error("Pi execution error:", error);
176		return null;
177	}
178}
179
180// Generate a simple summary without AI (last resort fallback)
181function generateFallbackSummary(transcript: Transcript): string {
182	const date = transcript.savedAt.split("T")[0];
183	const time = transcript.savedAt.split("T")[1]?.slice(0, 5) || "unknown";
184
185	// Extract some context from messages
186	const userMessages = transcript.messages.filter((m) => m.role === "user");
187	const firstUserMessage = userMessages[0]?.content?.slice(0, 500) || "No user message";
188	const lastUserMessage = userMessages[userMessages.length - 1]?.content?.slice(0, 500) || "";
189
190	return `# Session: Auto-recovered session (needs review)
191
192**Date:** ${date}
193**Time:** ${time}
194**Host:** ${transcript.host}
195**Tool:** ${transcript.tool}
196**Directory:** ${transcript.cwd}
197
198## Summary
199This session was auto-recovered from an unsaved transcript. AI summarization failed - manual review recommended.
200
201## Context
202- **Message count:** ${transcript.messageCount}
203
204## First User Message
205> ${firstUserMessage}${firstUserMessage.length >= 500 ? "..." : ""}
206
207${lastUserMessage && lastUserMessage !== firstUserMessage ? `## Last User Message\n> ${lastUserMessage}${lastUserMessage.length >= 500 ? "..." : ""}` : ""}
208
209### Tags
210#${transcript.tool} #auto-recovered #needs-review
211
212---
213*This session was auto-recovered without AI summarization. Please review and update manually.*
214`;
215}
216
217// Strip terminal control codes (OSC sequences, ANSI escapes, etc.)
218function stripControlCodes(text: string): string {
219	return text
220		// Remove OSC sequences: ESC ] ... ST (where ST is ESC \ or BEL)
221		// Pattern: \x1b] followed by anything until \x1b\ or \x07
222		.replace(/\x1b\][^\x07\x1b]*(?:\x1b\\|\x07)/g, "")
223		// Also handle bare ] sequences without leading ESC (seen in output)
224		.replace(/\][0-9;:=\w]+(?:\\|\x1b\\)/g, "")
225		// Remove ANSI escape sequences (CSI)
226		.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "")
227		.trim();
228}
229
230// Check if a PID is still alive
231function isPidAlive(pid: number): boolean {
232	try {
233		process.kill(pid, 0); // Signal 0 = just check existence
234		return true;
235	} catch {
236		return false;
237	}
238}
239
240// Check if a lock file is stale (process dead or lock older than maxAge)
241const LOCK_MAX_AGE_MS = 5 * 60 * 1000; // 5 minutes
242
243async function isStaleLock(lockFile: string): Promise<boolean> {
244	try {
245		const content = await readFile(lockFile, "utf-8");
246		const pid = parseInt(content.trim(), 10);
247
248		// If PID is not alive, lock is stale
249		if (!isNaN(pid) && !isPidAlive(pid)) {
250			console.log(`Stale lock detected: PID ${pid} is dead`);
251			return true;
252		}
253
254		// If lock is older than max age, consider it stale (process may be hung)
255		const lockStat = await stat(lockFile);
256		const ageMs = Date.now() - lockStat.mtimeMs;
257		if (ageMs > LOCK_MAX_AGE_MS) {
258			console.log(`Stale lock detected: lock is ${Math.round(ageMs / 1000)}s old (max ${LOCK_MAX_AGE_MS / 1000}s)`);
259			return true;
260		}
261
262		return false;
263	} catch {
264		// Can't read lock file - treat as stale
265		return true;
266	}
267}
268
269// Main
270async function main() {
271	const lockFile = `${transcriptPath}.lock`;
272	
273	try {
274		// Check if already being processed
275		if (existsSync(lockFile)) {
276			if (await isStaleLock(lockFile)) {
277				console.log(`Removing stale lock: ${lockFile}`);
278				await unlink(lockFile).catch(() => {});
279			} else {
280				console.log(`Already being processed (lock exists, PID alive): ${transcriptPath}`);
281				process.exit(0);
282			}
283		}
284		
285		// Check if pending file still exists
286		if (!existsSync(transcriptPath)) {
287			console.log(`Pending file already processed: ${transcriptPath}`);
288			process.exit(0);
289		}
290		
291		// Create lock file
292		await writeFile(lockFile, String(process.pid), "utf-8");
293		
294		// Read transcript
295		const content = await readFile(transcriptPath, "utf-8");
296		const transcript: Transcript = JSON.parse(content);
297
298		console.log(`Processing transcript: ${transcriptPath}`);
299		console.log(`  Messages: ${transcript.messageCount}, Host: ${transcript.host}`);
300
301		// Try to summarize with pi
302		let summary = summarizeWithPi(transcript);
303		let usedFallback = false;
304
305		if (!summary) {
306			console.log("AI summarization failed, using fallback template");
307			summary = generateFallbackSummary(transcript);
308			usedFallback = true;
309		}
310
311		// Strip terminal control codes from output
312		summary = stripControlCodes(summary);
313
314		// Extract title from summary for filename
315		const titleMatch = summary.match(/^# Session: (.+)$/m);
316		const title = titleMatch?.[1] || "auto-recovered-session";
317		const slug = title
318			.toLowerCase()
319			.replace(/[^a-z0-9]+/g, "-")
320			.replace(/^-+|-+$/g, "")
321			.substring(0, 60);
322
323		// Determine output path
324		const date = transcript.savedAt.split("T")[0];
325		const yearMonth = date.slice(0, 7);
326		const sessionDir = join(SESSIONS_DIR, yearMonth);
327		
328		// Find unique filename (add suffix if needed)
329		await mkdir(sessionDir, { recursive: true });
330		let filename = `${date}-${slug}.md`;
331		let filepath = join(sessionDir, filename);
332		let suffix = 1;
333		
334		while (existsSync(filepath)) {
335			filename = `${date}-${slug}-${suffix}.md`;
336			filepath = join(sessionDir, filename);
337			suffix++;
338		}
339
340		// Save summary
341		await writeFile(filepath, summary, "utf-8");
342
343		// Delete pending file, lock, and log file
344		await unlink(transcriptPath);
345		await unlink(lockFile).catch(() => {});
346		await unlink(`${transcriptPath}.log`).catch(() => {});
347
348		// Notify user
349		const notifyMsg = usedFallback ? `Recovered (needs review): ${filename}` : `Recovered: ${filename}`;
350		notify("Session Recovered", notifyMsg);
351
352		console.log(`✓ Recovered session: ${filepath}`);
353	} catch (error) {
354		// Clean up lock file on error
355		await unlink(lockFile).catch(() => {});
356		console.error("Failed to recover session:", error);
357		notify("Session Recovery Failed", String(error));
358		process.exit(1);
359	}
360}
361
362main();