main
1#!/usr/bin/env -S uv run --script
2# /// script
3# requires-python = ">=3.11"
4# dependencies = ["requests", "click"]
5# ///
6"""
7readwise-reader — Fetch, analyze, and triage Readwise Reader documents.
8
9Commands:
10 fetch Fetch documents from Readwise Reader API
11 analyze Score documents for relevance using LLMs
12 report Generate an interactive HTML triage report
13 archive Bulk-archive documents by relevance or age
14 delete Bulk-delete documents by relevance or age
15"""
16
17import click
18import html as html_mod
19import json
20import os
21import subprocess
22import time
23from collections import Counter, defaultdict
24from datetime import datetime, timezone
25from pathlib import Path
26
27DATA_DIR = Path(os.environ.get("XDG_DATA_HOME", Path.home() / ".local/share")) / "readwise"
28PROFILE_FILE = DATA_DIR / "profile.toml"
29LATEST_LINK = DATA_DIR / "reader-latest.json"
30ANALYZED_FILE = DATA_DIR / "reader-analyzed.json"
31CHECKPOINT_FILE = DATA_DIR / "analysis-checkpoint.json"
32REPORT_FILE = DATA_DIR / "triage-report.html"
33
34READER_API_BASE = "https://readwise.io/api/v3"
35READER_RATE_DELAY = 3.1 # 20 req/min for list endpoint
36
37MAX_RETRIES = 6
38
39MODELS = {
40 "opus": {"backend": "vertex-claude", "model_id": "claude-opus-4-6", "batch_size": 25, "rate_delay": 2, "max_output_tokens": 16384},
41 "sonnet": {"backend": "vertex-claude", "model_id": "claude-sonnet-4@20250514", "batch_size": 30, "rate_delay": 1, "max_output_tokens": 16384},
42 "gemini": {"backend": "gemini-api", "model_id": "gemini-3-pro-preview", "batch_size": 35, "rate_delay": 2, "max_output_tokens": 16384},
43 "gemini25": {"backend": "gemini-api", "model_id": "gemini-2.5-pro", "batch_size": 35, "rate_delay": 2, "max_output_tokens": 16384},
44}
45
46
47# ═══════════════════════════════════════════════════════════════════════
48# UTILITIES
49# ═══════════════════════════════════════════════════════════════════════
50
51def ensure_data_dir():
52 DATA_DIR.mkdir(parents=True, exist_ok=True)
53
54
55def get_readwise_token():
56 result = subprocess.run(["passage", "show", "readwise/key"], capture_output=True, text=True)
57 if result.returncode != 0:
58 raise click.ClickException("Failed to get Readwise token from passage (readwise/key)")
59 return result.stdout.strip()
60
61
62def age_label(days):
63 if days < 7: return f"{days}d"
64 if days < 30: return f"{days // 7}w"
65 if days < 365: return f"{days // 30}mo"
66 return f"{days // 365}y{(days % 365) // 30}mo"
67
68
69# ═══════════════════════════════════════════════════════════════════════
70# PROFILE
71# ═══════════════════════════════════════════════════════════════════════
72
73def load_profile(path: Path) -> dict:
74 """Load profile from TOML with a minimal parser (no toml dependency)."""
75 if not path.exists():
76 return {}
77
78 profile = {}
79 current_section = ""
80 current_section_for_key = ""
81 current_key = None
82 current_list = None
83
84 for line in path.read_text().splitlines():
85 stripped = line.strip()
86 if not stripped or stripped.startswith("#"):
87 continue
88 if stripped.startswith("["):
89 if current_key and current_list is not None:
90 profile[f"{current_section_for_key}.{current_key}"] = current_list
91 current_list = None
92 current_key = None
93 current_section = stripped.strip("[]").strip()
94 current_section_for_key = current_section
95 continue
96 if "=" in stripped and not stripped.startswith('"'):
97 if current_key and current_list is not None:
98 profile[f"{current_section_for_key}.{current_key}"] = current_list
99 current_list = None
100 key, val = stripped.split("=", 1)
101 key = key.strip()
102 val = val.strip()
103 current_key = key
104 current_section_for_key = current_section
105 if val == "[":
106 current_list = []
107 elif val.startswith("[") and val.endswith("]"):
108 current_list = [s.strip().strip('"').strip("'") for s in val[1:-1].split(",") if s.strip().strip('"').strip("'")]
109 profile[f"{current_section}.{current_key}"] = current_list
110 current_list = None
111 current_key = None
112 elif val.startswith('"') or val.startswith("'"):
113 profile[f"{current_section}.{current_key}"] = val.strip('"').strip("'")
114 current_key = None
115 else:
116 profile[f"{current_section}.{current_key}"] = val
117 current_key = None
118 elif current_list is not None:
119 val = stripped.rstrip(",").strip().strip('"').strip("'")
120 if val and val != "]":
121 if "#" in val and not val.startswith("#"):
122 val = val[:val.index("#")].strip().rstrip(",").strip().strip('"').strip("'")
123 if val:
124 current_list.append(val)
125 if stripped.rstrip().endswith("]") or stripped == "]":
126 profile[f"{current_section_for_key}.{current_key}"] = current_list
127 current_list = None
128 current_key = None
129
130 if current_key and current_list is not None:
131 profile[f"{current_section_for_key}.{current_key}"] = current_list
132 return profile
133
134
135def build_prompt_context(profile: dict) -> tuple[str, str]:
136 def get(key): return profile.get(key, [])
137 def fmt(items): return ", ".join(items) if items else "N/A"
138
139 name = profile.get("user.name", "User")
140 role = profile.get("user.role", "Software Engineer")
141 desc = f"""{name} — {role}
142
143Core languages: {fmt(get('interests.core'))}
144Infrastructure: {fmt(get('interests.infrastructure'))}
145Editor: {fmt(get('interests.editor'))}
146Kubernetes/Containers: {fmt(get('interests.kubernetes'))}
147CLI tools: {fmt(get('interests.tools'))}
148AI tooling: {fmt(get('interests.ai'))}
149Side interests: {fmt(get('interests.side'))}
150Values: {fmt(get('interests.values'))}
151Currently exploring: {fmt(get('interests.exploring'))}
152NOT interested in: {fmt(get('interests.not_interested'))}"""
153
154 fav_authors = get("favorites.authors")
155 fav_sites = get("favorites.sites")
156 if fav_authors or fav_sites:
157 desc += "\n\nFAVORITE AUTHORS/SITES (auto-boost to at least 4★, always worth_reading):\n"
158 if fav_authors: desc += "Authors: " + ", ".join(fav_authors) + "\n"
159 if fav_sites: desc += "Sites: " + ", ".join(fav_sites) + "\n"
160 desc += "These are trusted voices — mark as interesting even on tangential topics."
161
162 return desc, profile.get("summary.style", "detailed")
163
164
165# ═══════════════════════════════════════════════════════════════════════
166# LLM BACKENDS
167# ═══════════════════════════════════════════════════════════════════════
168
169_token_cache = {"token": None, "ts": 0}
170
171
172def _get_vertex_token():
173 if time.time() - _token_cache["ts"] > 2400:
174 result = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True, timeout=10)
175 if result.returncode != 0:
176 raise click.ClickException(f"gcloud auth failed: {result.stderr}")
177 _token_cache["token"] = result.stdout.strip()
178 _token_cache["ts"] = time.time()
179 return _token_cache["token"]
180
181
182def _call_vertex_claude(model_id, prompt, max_tokens):
183 import requests
184 project = os.environ.get("GOOGLE_CLOUD_PROJECT", "itpc-gcp-pnd-pe-eng-claude")
185 location = os.environ.get("GOOGLE_CLOUD_LOCATION", "global")
186 if location == "global":
187 url = f"https://aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
188 else:
189 url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
190 for attempt in range(MAX_RETRIES):
191 token = _get_vertex_token()
192 try:
193 resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
194 json={"anthropic_version": "vertex-2023-10-16", "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.2}, timeout=180)
195 if resp.status_code == 429:
196 wait = (2 ** attempt) * 5
197 click.echo(f" ⏳ Rate limited, waiting {wait}s...", err=True)
198 time.sleep(wait)
199 continue
200 if resp.status_code == 401:
201 _token_cache["ts"] = 0
202 continue
203 resp.raise_for_status()
204 return resp.json()["content"][0]["text"]
205 except Exception as e:
206 if attempt < MAX_RETRIES - 1:
207 wait = (2 ** attempt) * 3
208 click.echo(f" ⚠ {e}, retrying in {wait}s...", err=True)
209 time.sleep(wait)
210 continue
211 raise
212 raise click.ClickException(f"Failed after {MAX_RETRIES} retries")
213
214
215def _get_gemini_key():
216 key = os.environ.get("GEMINI_API_KEY")
217 if key: return key
218 result = subprocess.run(["passage", "show", "redhat/google/osp/vdeemest-api-key"], capture_output=True, text=True, timeout=10)
219 if result.returncode != 0:
220 raise click.ClickException("No GEMINI_API_KEY and passage lookup failed")
221 return result.stdout.strip()
222
223
224def _call_gemini(model_id, prompt, max_tokens):
225 import requests
226 api_key = _get_gemini_key()
227 url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_id}:generateContent?key={api_key}"
228 for attempt in range(MAX_RETRIES):
229 try:
230 resp = requests.post(url, headers={"Content-Type": "application/json"},
231 json={"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.2, "maxOutputTokens": max_tokens, "responseMimeType": "application/json"}}, timeout=180)
232 if resp.status_code == 429:
233 wait = (2 ** attempt) * 5
234 click.echo(f" ⏳ Rate limited, waiting {wait}s...", err=True)
235 time.sleep(wait)
236 continue
237 resp.raise_for_status()
238 return resp.json()["candidates"][0]["content"]["parts"][0]["text"]
239 except Exception as e:
240 if attempt < MAX_RETRIES - 1:
241 wait = (2 ** attempt) * 3
242 click.echo(f" ⚠ {e}, retrying in {wait}s...", err=True)
243 time.sleep(wait)
244 continue
245 raise
246 raise click.ClickException(f"Failed after {MAX_RETRIES} retries")
247
248
249def call_llm(backend, model_id, prompt, max_tokens):
250 if backend == "vertex-claude": return _call_vertex_claude(model_id, prompt, max_tokens)
251 if backend == "gemini-api": return _call_gemini(model_id, prompt, max_tokens)
252 raise ValueError(f"Unknown backend: {backend}")
253
254
255def parse_json_response(text):
256 import re
257 text = text.strip()
258 if text.startswith("```"):
259 text = text.split("\n", 1)[1]
260 if text.endswith("```"): text = text[:-3]
261 text = text.strip()
262 try:
263 return json.loads(text)
264 except json.JSONDecodeError:
265 match = re.search(r'\{.*\}', text, re.DOTALL)
266 if match: return json.loads(match.group())
267 raise
268
269
270# ═══════════════════════════════════════════════════════════════════════
271# CLI
272# ═══════════════════════════════════════════════════════════════════════
273
274@click.group()
275def cli():
276 """Fetch, analyze, and triage Readwise Reader documents."""
277 ensure_data_dir()
278
279
280# ─── FETCH ──────────────────────────────────────────────────────────────
281
282@cli.command()
283@click.option("--locations", default="new,later", help="Comma-separated locations to fetch (default: new,later)")
284def fetch(locations):
285 """Fetch documents from Readwise Reader API."""
286 import requests
287
288 token = get_readwise_token()
289 all_docs = {}
290 locs = [l.strip() for l in locations.split(",")]
291
292 for loc in locs:
293 click.echo(f"\nFetching '{loc}'...", err=True)
294 docs = []
295 cursor = None
296 page = 1
297 while True:
298 params = {"location": loc, "limit": 100}
299 if cursor: params["pageCursor"] = cursor
300 click.echo(f" page {page}...", err=True)
301 resp = requests.get(f"{READER_API_BASE}/list/", params=params, headers={"Authorization": f"Token {token}"})
302 resp.raise_for_status()
303 data = resp.json()
304 docs.extend(data.get("results", []))
305 cursor = data.get("nextPageCursor")
306 click.echo(f" {len(data.get('results', []))} docs (total: {data.get('count', '?')})", err=True)
307 if not cursor: break
308 page += 1
309 time.sleep(READER_RATE_DELAY)
310 all_docs[loc] = docs
311 click.echo(f" Total {loc}: {len(docs)}", err=True)
312 if loc != locs[-1]: time.sleep(READER_RATE_DELAY)
313
314 timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
315 output = {"fetched_at": timestamp, "counts": {l: len(d) for l, d in all_docs.items()}, "documents": all_docs}
316 outfile = DATA_DIR / f"reader-{'-'.join(locs)}-{timestamp}.json"
317 outfile.write_text(json.dumps(output, indent=2, ensure_ascii=False))
318 LATEST_LINK.unlink(missing_ok=True)
319 LATEST_LINK.symlink_to(outfile.name)
320 total = sum(len(d) for d in all_docs.values())
321 click.echo(f"\n✅ Saved {total} documents to {outfile}", err=True)
322
323
324# ─── ANALYZE ────────────────────────────────────────────────────────────
325
326@cli.command()
327@click.option("--model", "-m", type=click.Choice(list(MODELS.keys())), default="opus", help="LLM model")
328@click.option("--reset", is_flag=True, help="Reset checkpoint and re-analyze everything")
329@click.option("--batch-size", type=int, help="Override batch size")
330@click.option("--profile", "profile_path", type=click.Path(exists=False), default=str(PROFILE_FILE), help="Profile TOML path")
331def analyze(model, reset, batch_size, profile_path):
332 """Analyze documents with LLM for relevance scoring."""
333 cfg = MODELS[model].copy()
334 if batch_size: cfg["batch_size"] = batch_size
335
336 if reset and CHECKPOINT_FILE.exists():
337 CHECKPOINT_FILE.unlink()
338 click.echo("🔄 Checkpoint reset.", err=True)
339
340 profile = load_profile(Path(profile_path))
341 if profile:
342 fav_count = len(profile.get("favorites.authors", [])) + len(profile.get("favorites.sites", []))
343 click.echo(f"👤 Profile: {profile_path} ({fav_count} favorites)", err=True)
344 profile_text, summary_style = build_prompt_context(profile)
345 click.echo(f"📝 Summary: {summary_style}", err=True)
346
347 if not LATEST_LINK.exists():
348 raise click.ClickException("No data found. Run 'readwise-reader fetch' first.")
349
350 with open(LATEST_LINK) as f:
351 data = json.load(f)
352
353 all_docs = []
354 for loc in data["documents"]:
355 for d in data["documents"][loc]:
356 d["_location"] = loc
357 all_docs.append(d)
358
359 # Load checkpoint — keyed by document ID, so survives re-fetches
360 analyzed = {}
361 if CHECKPOINT_FILE.exists():
362 with open(CHECKPOINT_FILE) as f:
363 analyzed = json.load(f)
364
365 to_analyze = [d for d in all_docs if d["id"] not in analyzed]
366
367 click.echo(f"📚 Total: {len(all_docs)} | ✅ Already analyzed: {len(analyzed)} | 🔍 Remaining: {len(to_analyze)}", err=True)
368 click.echo(f"🤖 {cfg['model_id']} ({cfg['backend']}) | batch={cfg['batch_size']} | max_out={cfg['max_output_tokens']}", err=True)
369
370 if not to_analyze:
371 click.echo("Nothing new to analyze.", err=True)
372 else:
373 bs = cfg["batch_size"]
374 total_batches = (len(to_analyze) + bs - 1) // bs
375
376 for bn in range(total_batches):
377 batch = to_analyze[bn * bs: (bn + 1) * bs]
378 pct = len(analyzed) / len(all_docs) * 100
379 click.echo(f"\n[{bn+1}/{total_batches}] {len(batch)} docs ({pct:.0f}%)...", err=True)
380
381 try:
382 prompt = _build_analysis_prompt(profile_text, summary_style, batch)
383 raw = call_llm(cfg["backend"], cfg["model_id"], prompt, cfg["max_output_tokens"])
384 parsed = parse_json_response(raw)
385 matched = 0
386 for a in parsed.get("analyses", []):
387 idx = a.get("id")
388 if idx is None: continue
389 try:
390 idx = int(idx)
391 if 0 <= idx < len(batch):
392 analyzed[batch[idx]["id"]] = {
393 "summary": a.get("summary", ""),
394 "relevance": a.get("relevance", 3),
395 "reason": a.get("reason", ""),
396 "action": a.get("action", "skim"),
397 "tags": a.get("tags", []),
398 }
399 matched += 1
400 except (ValueError, IndexError): pass
401 click.echo(f" ✓ {matched}/{len(batch)}", err=True)
402 except Exception as e:
403 click.echo(f" ✗ {e}", err=True)
404
405 CHECKPOINT_FILE.write_text(json.dumps(analyzed))
406 if bn < total_batches - 1: time.sleep(cfg["rate_delay"])
407
408 # Build enriched output
409 click.echo(f"\n📊 Analyzed: {len(analyzed)}/{len(all_docs)}", err=True)
410 enriched = {}
411 for loc in data["documents"]:
412 enriched[loc] = []
413 for d in data["documents"][loc]:
414 d["_analysis"] = analyzed.get(d["id"], {"summary": d.get("summary", ""), "relevance": 3, "reason": "Not analyzed", "action": "skim", "tags": []})
415 enriched[loc].append(d)
416
417 output = {"fetched_at": data.get("fetched_at", ""), "analyzed_at": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
418 "model": cfg["model_id"], "counts": {l: len(d) for l, d in enriched.items()}, "documents": enriched}
419 ANALYZED_FILE.write_text(json.dumps(output, indent=2, ensure_ascii=False))
420 click.echo(f"✅ {ANALYZED_FILE}", err=True)
421
422 # Stats
423 rels = Counter(analyzed[k]["relevance"] for k in analyzed if "relevance" in analyzed[k])
424 if rels:
425 click.echo("\nRelevance: " + " | ".join(f"{s}★={rels.get(s,0)}" for s in [5,4,3,2,1]), err=True)
426 acts = Counter(analyzed[k]["action"] for k in analyzed if "action" in analyzed[k])
427 if acts:
428 click.echo("Actions: " + " | ".join(f"{a}={c}" for a,c in acts.most_common()), err=True)
429
430
431def _build_analysis_prompt(profile_text, summary_style, docs_batch):
432 entries = []
433 for i, d in enumerate(docs_batch):
434 summary = (d.get("summary") or "")[:600]
435 if summary.strip().lower() in ("comments", ""): summary = "N/A — infer from title/author/URL"
436 entries.append(f'[{i}] "{d.get("title", "Untitled")}"\n'
437 f' Author: {d.get("author", "")} | Source: {d.get("site_name") or d.get("source", "")} | '
438 f'{d.get("category", "")} | {d.get("word_count", 0)}w\n'
439 f' URL: {d.get("source_url", "")}\n Summary: {summary}')
440
441 if summary_style == "detailed":
442 s_inst = ("**summary**: 4-6 sentences. What is this actually about? Key argument, technique, insight? "
443 "Include specific details. If summary is N/A, infer from title/author/URL.")
444 r_inst = ("**reason**: 2-3 sentences. Reference specific user interests that match or don't.")
445 else:
446 s_inst = "**summary**: 1-2 sentences about the actual content."
447 r_inst = "**reason**: 5-15 words."
448
449 return f"""Analyze these {len(docs_batch)} articles. Score each for relevance to this user.
450
451<user_profile>
452{profile_text}
453</user_profile>
454
455<documents>
456{chr(10).join(entries)}
457</documents>
458
459For each document return:
4601. {s_inst}
4612. **relevance**: 1-5 (5=must-read for this user, 1=skip)
4623. {r_inst}
4634. **action**: must_read / worth_reading / skim / archive / delete
4645. **tags**: 1-3 from: nix, go, rust, python, emacs, kubernetes, containers, ci-cd, git, security, homelab, networking, ai-llm, coding-agents, linux, open-source, privacy, productivity, pkm, career, culture, french, hardware, web, devtools, monitoring, tekton
465
466IMPORTANT:
467- Favorite authors/sites → minimum 4★ and worth_reading.
468- Be discriminating: generic listicles = 1-2★, deep technical = 4-5★.
469
470Return ONLY valid JSON:
471{{"analyses":[{{"id":0,"summary":"...","relevance":4,"reason":"...","action":"worth_reading","tags":["nix"]}},...]}}\
472"""
473
474
475# ─── ARCHIVE / DELETE ──────────────────────────────────────────────────
476
477@cli.command()
478@click.option("--max-relevance", type=int, default=2, help="Archive docs with relevance ≤ this (default: 2)")
479@click.option("--min-age", type=int, default=365, help="Only docs older than N days (default: 365)")
480@click.option("--unread-only", is_flag=True, default=True, help="Only unread docs (default: true)")
481@click.option("--dry-run", is_flag=True, help="Show what would be archived without doing it")
482@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
483def archive(max_relevance, min_age, unread_only, dry_run, yes):
484 """Bulk-archive documents on Readwise Reader."""
485 _bulk_action("archive", max_relevance, min_age, unread_only, dry_run, yes)
486
487
488@cli.command()
489@click.option("--max-relevance", type=int, default=1, help="Delete docs with relevance ≤ this (default: 1)")
490@click.option("--min-age", type=int, default=365, help="Only docs older than N days (default: 365)")
491@click.option("--unread-only", is_flag=True, default=True, help="Only unread docs (default: true)")
492@click.option("--dry-run", is_flag=True, help="Show what would be deleted without doing it")
493@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
494def delete(max_relevance, min_age, unread_only, dry_run, yes):
495 """Bulk-delete documents on Readwise Reader."""
496 _bulk_action("delete", max_relevance, min_age, unread_only, dry_run, yes)
497
498
499def _bulk_action(action, max_relevance, min_age, unread_only, dry_run, yes):
500 import requests
501
502 if not ANALYZED_FILE.exists():
503 raise click.ClickException("No analyzed data. Run 'readwise-reader analyze' first.")
504
505 with open(ANALYZED_FILE) as f:
506 data = json.load(f)
507
508 now = datetime.now(timezone.utc)
509 candidates = []
510 for loc in data["documents"]:
511 for d in data["documents"][loc]:
512 saved = d.get("saved_at") or d.get("created_at")
513 age = (now - datetime.fromisoformat(saved)).days if saved else 0
514 progress = d.get("reading_progress", 0)
515 rel = d.get("_analysis", {}).get("relevance", 3)
516
517 if rel > max_relevance: continue
518 if age < min_age: continue
519 if unread_only and progress > 0: continue
520 candidates.append(d)
521
522 if not candidates:
523 click.echo(f"No documents match criteria (relevance ≤ {max_relevance}, age ≥ {min_age}d, unread={unread_only})")
524 return
525
526 # Show summary
527 rels = Counter(d.get("_analysis", {}).get("relevance", 3) for d in candidates)
528 click.echo(f"\n{'DRY RUN — ' if dry_run else ''}{action.upper()}: {len(candidates)} documents")
529 click.echo(f" Criteria: relevance ≤ {max_relevance}★, age ≥ {min_age}d, unread_only={unread_only}")
530 click.echo(" Relevance breakdown: " + ", ".join(f"{s}★={rels.get(s,0)}" for s in sorted(rels.keys())))
531
532 # Show sample
533 click.echo("\n Sample (first 10):")
534 for d in candidates[:10]:
535 a = d.get("_analysis", {})
536 title = (d.get("title") or "Untitled")[:60]
537 click.echo(f" {a.get('relevance',3)}★ | {title}")
538 if len(candidates) > 10:
539 click.echo(f" ... and {len(candidates) - 10} more")
540
541 if dry_run:
542 click.echo("\n🔍 Dry run — no changes made.")
543 return
544
545 if not yes:
546 click.confirm(f"\n{action.capitalize()} {len(candidates)} documents?", abort=True)
547
548 token = get_readwise_token()
549
550 if action == "archive":
551 # Use bulk_update endpoint (50 per request)
552 batch_size = 50
553 total = len(candidates)
554 done = 0
555 for i in range(0, total, batch_size):
556 batch = candidates[i:i + batch_size]
557 updates = [{"id": d["id"], "location": "archive"} for d in batch]
558 resp = requests.patch(
559 f"{READER_API_BASE}/bulk_update/",
560 headers={"Authorization": f"Token {token}", "Content-Type": "application/json"},
561 json={"updates": updates},
562 )
563 if resp.status_code in (200, 207):
564 results = resp.json().get("results", [])
565 ok = sum(1 for r in results if r.get("success"))
566 done += ok
567 if ok < len(batch):
568 fails = [r for r in results if not r.get("success")]
569 click.echo(f" ⚠ {len(fails)} failed in batch", err=True)
570 else:
571 click.echo(f" ✗ Batch failed: {resp.status_code} {resp.text[:200]}", err=True)
572 click.echo(f" Archived {done}/{total}...", err=True)
573 if i + batch_size < total: time.sleep(3.1)
574 click.echo(f"\n✅ Archived {done} documents")
575
576 elif action == "delete":
577 # Delete is per-document (no bulk endpoint)
578 done = 0
579 total = len(candidates)
580 for i, d in enumerate(candidates):
581 resp = requests.delete(
582 f"{READER_API_BASE}/delete/{d['id']}/",
583 headers={"Authorization": f"Token {token}"},
584 )
585 if resp.status_code == 204:
586 done += 1
587 else:
588 click.echo(f" ✗ Failed to delete {d['id']}: {resp.status_code}", err=True)
589 if (i + 1) % 20 == 0:
590 click.echo(f" Deleted {done}/{total}...", err=True)
591 time.sleep(3.1) # rate limit: 20/min
592 click.echo(f"\n✅ Deleted {done} documents")
593
594
595# ─── REPORT ─────────────────────────────────────────────────────────────
596
597TAG_DISPLAY = {
598 "nix": ("❄️", "NixOS & Nix", "#7eb8da"), "emacs": ("📝", "Emacs & Org-mode", "#7f5ab6"),
599 "go": ("🐹", "Go", "#00add8"), "rust": ("🦀", "Rust", "#dea584"),
600 "python": ("🐍", "Python", "#3776ab"), "tekton": ("🔧", "Tekton & CI/CD", "#fd495c"),
601 "kubernetes": ("☸️", "Kubernetes", "#326ce5"), "containers": ("📦", "Containers", "#2496ed"),
602 "ci-cd": ("🔄", "CI/CD", "#fd495c"), "homelab": ("🏠", "Homelab", "#e8a87c"),
603 "git": ("🔀", "Git & VCS", "#f14e32"), "coding-agents": ("🤖", "Coding Agents", "#a855f7"),
604 "ai-llm": ("🧠", "AI & LLM", "#8b5cf6"), "security": ("🔒", "Security", "#ef4444"),
605 "linux": ("🐧", "Linux", "#fcc624"), "networking": ("🌐", "Networking", "#06b6d4"),
606 "monitoring": ("📊", "Monitoring", "#10b981"), "devtools": ("🛠️", "Dev Tools", "#64748b"),
607 "open-source": ("⚖️", "Open Source", "#22c55e"), "privacy": ("🛡️", "Privacy", "#f59e0b"),
608 "productivity": ("📋", "Productivity", "#6366f1"), "pkm": ("🧩", "PKM", "#ec4899"),
609 "career": ("👔", "Career", "#14b8a6"), "culture": ("📖", "Culture", "#a78bfa"),
610 "french": ("🇫🇷", "French", "#3b82f6"), "hardware": ("⌨️", "Hardware", "#f97316"),
611 "web": ("🌍", "Web", "#06b6d4"), "other": ("📄", "Other", "#94a3b8"),
612}
613
614ACTION_GROUPS = [
615 ("must_read", "⭐ Must Read", "AI rates these highly relevant.", "#f59e0b"),
616 ("finish_reading", "🏃 Finish — >50% Done", "You started these. Finish them.", "#2196f3"),
617 ("archive_finished", "✅ Archive — Finished", "100% read. Archive.", "#4caf50"),
618 ("keep_triage", "📚 Active Queue", "Review and decide.", "#9c27b0"),
619 ("archive_old_unread", "📦 Old & Unread (relevant)", "1yr+ unread but 3★+.", "#ff9800"),
620 ("archive_low_relevance", "🗑️ Old & Low Relevance", "1yr+ unread, ≤2★. Purge.", "#f44336"),
621 ("archive_old_barely", "🤔 Old & Barely Started", "1yr+, <10% read.", "#795548"),
622]
623
624
625def _categorize(d):
626 p = d.get("reading_progress", 0)
627 age = d.get("_age", 0)
628 rel = d.get("_analysis", {}).get("relevance", 3)
629 act = d.get("_analysis", {}).get("action", "")
630 if p >= 1.0: return "archive_finished"
631 if age > 365 and p == 0 and rel <= 2: return "archive_low_relevance"
632 if age > 365 and p == 0: return "archive_old_unread"
633 if age > 365 and p < 0.1: return "archive_old_barely"
634 if p > 0.5: return "finish_reading"
635 if act in ("must_read", "worth_reading") and rel >= 4: return "must_read"
636 return "keep_triage"
637
638
639def _primary_tag(d):
640 tags = d.get("_analysis", {}).get("tags", [])
641 for t in ["nix","emacs","go","rust","python","tekton","kubernetes","containers","ci-cd","homelab","git",
642 "coding-agents","ai-llm","security","linux","networking","monitoring","devtools","open-source",
643 "privacy","productivity","pkm","career","culture","french","hardware","web"]:
644 if t in tags: return t
645 return tags[0] if tags else "other"
646
647
648@cli.command()
649@click.option("--no-open", is_flag=True, help="Don't auto-open in browser")
650def report(no_open):
651 """Generate interactive HTML triage report."""
652 src = ANALYZED_FILE if ANALYZED_FILE.exists() else LATEST_LINK
653 if not src.exists():
654 raise click.ClickException("No data. Run 'readwise-reader fetch' first.")
655 click.echo(f"Using: {src}", err=True)
656
657 with open(src) as f:
658 data = json.load(f)
659
660 now = datetime.now(timezone.utc)
661 all_docs = []
662 for loc in data["documents"]:
663 for d in data["documents"][loc]:
664 d["_location"] = loc
665 saved = d.get("saved_at") or d.get("created_at")
666 d["_age"] = (now - datetime.fromisoformat(saved)).days if saved else 0
667 d["_primary_tag"] = _primary_tag(d)
668 all_docs.append(d)
669
670 groups = defaultdict(list)
671 for d in all_docs:
672 groups[_categorize(d)].append(d)
673
674 has_analysis = any(d.get("_analysis", {}).get("reason", "") not in ("Not analyzed", "Analysis failed", "") for d in all_docs)
675 total = len(all_docs)
676
677 html_parts = [_report_head(total, now, has_analysis, groups, all_docs)]
678
679 for key, title, desc, color in ACTION_GROUPS:
680 docs = groups.get(key, [])
681 if not docs: continue
682 docs.sort(key=lambda d: (-d.get("_analysis", {}).get("relevance", 3), d.get("_age", 0)))
683 tag_groups = defaultdict(list)
684 for d in docs: tag_groups[d["_primary_tag"]].append(d)
685
686 html_parts.append(f'<div class="group" id="{key}">'
687 f'<div class="group-hdr" style="background:{color}15;border-left:4px solid {color}" onclick="toggle(this)">'
688 f'<h2><span class="arrow">▼</span> {title}</h2><span class="badge">{len(docs)}</span></div>'
689 f'<div class="group-desc">{desc}</div><div class="group-body">')
690
691 for tag, tdocs in sorted(tag_groups.items(), key=lambda x: -len(x[1])):
692 icon, label, _ = TAG_DISPLAY.get(tag, ("📄", tag, "#94a3b8"))
693 tdocs.sort(key=lambda d: (-d.get("_analysis", {}).get("relevance", 3), -d.get("reading_progress", 0)))
694
695 sources = Counter((d.get("site_name") or d.get("source") or "?") for d in tdocs)
696 top_src = [(s, n) for s, n in sources.most_common(5) if n >= 2]
697 wcs = [d.get("word_count", 0) for d in tdocs if d.get("word_count")]
698 rh = sum(wcs) / 15000 if wcs else 0
699 avg_rel = sum(d.get("_analysis", {}).get("relevance", 3) for d in tdocs) / len(tdocs)
700
701 insights = []
702 if top_src: insights.append("Sources: " + ", ".join(f"{s} ({n})" for s, n in top_src))
703 insights.append(f"Stats: {len(tdocs)} docs, ~{rh:.1f}h reading, avg {avg_rel:.1f}★")
704
705 html_parts.append(f'<div class="cluster"><div class="cluster-hdr" onclick="toggle(this)">'
706 f'<h3><span class="arrow">▼</span> {icon} {label}</h3><span class="cnt">{len(tdocs)}</span></div>'
707 f'<div class="cluster-insights">{html_mod.escape(" | ".join(insights))}</div>'
708 f'<ul class="doc-list">')
709 for d in tdocs:
710 html_parts.append(_doc_html(d))
711 html_parts.append('</ul></div>')
712 html_parts.append('</div></div>')
713
714 html_parts.append(_report_foot())
715 REPORT_FILE.write_text("\n".join(html_parts))
716 click.echo(f"✅ {REPORT_FILE}", err=True)
717
718 if not no_open:
719 subprocess.Popen(["xdg-open", str(REPORT_FILE)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
720
721
722def _doc_html(d):
723 p = d.get("reading_progress", 0)
724 a = d.get("_analysis", {})
725 rel = a.get("relevance", 3)
726 summary = html_mod.escape(a.get("summary", ""))
727 reason = html_mod.escape(a.get("reason", ""))
728 title = html_mod.escape(d.get("title") or "Untitled")
729 src = html_mod.escape(d.get("site_name") or d.get("source") or "")
730 url = html_mod.escape(d.get("url") or "#")
731 source_url = d.get("source_url") or ""
732 tags = a.get("tags", [])
733 loc = d["_location"]
734 age = d.get("_age", 0)
735 wc = d.get("word_count") or 0
736 doc_id = d.get("id", "")
737
738 rc = {5: "#22c55e", 4: "#84cc16", 3: "#eab308", 2: "#f97316", 1: "#ef4444"}.get(rel, "#94a3b8")
739 pc = "#4ade80" if p >= 1 else "#60a5fa" if p > 0.5 else "#f59e0b" if p > 0 else "#475569"
740 lc = "loc-new" if loc == "new" else "loc-later"
741 th = "".join(f'<span class="tag">{t}</span>' for t in tags[:3])
742 rt = f"{wc // 250}min" if wc else ""
743 sl = html_mod.escape(source_url)
744
745 first = summary.split('. ')[0] + '.' if '. ' in summary else summary
746 if len(first) > len(summary) - 5: first = summary
747 has_more = first != summary
748
749 return (
750 f'<li class="doc" data-title="{title.lower()}" data-rel="{rel}" data-tags="{",".join(tags)}" data-id="{doc_id}">'
751 f'<div class="doc-rel" style="color:{rc}" title="{reason}">{rel}★</div>'
752 f'<div class="doc-prog"><div style="color:{pc}">{p:.0%}</div>'
753 f'<div class="bar"><div class="fill" style="width:{p*100:.0f}%;background:{pc}"></div></div></div>'
754 f'<div class="doc-info"><div class="doc-title"><a href="{url}" target="_blank">{title}</a></div>'
755 f'<div class="doc-summary">'
756 f'<div class="short" onclick="this.nextElementSibling.classList.toggle(\'open\')">{first}{"▸" if has_more else ""}</div>'
757 f'<div class="full">{summary}<div class="reason">{reason}</div></div>'
758 f'</div>'
759 f'<div class="doc-meta"><span class="loc {lc}">{loc}</span> {src} · {rt} · {age_label(age)} ago {th}</div></div>'
760 f'<div class="doc-right">'
761 f'{"<a href=" + chr(34) + sl + chr(34) + " target=" + chr(34) + "_blank" + chr(34) + ">↗</a>" if source_url else ""}'
762 f'</div></li>'
763 )
764
765
766def _report_head(total, now, has_analysis, groups, all_docs):
767 stats = ""
768 for key, title, _, color in ACTION_GROUPS:
769 n = len(groups.get(key, []))
770 label = title.split("—")[0].strip()
771 stats += f'<div class="stat"><div class="n" style="color:{color}">{n}</div><div class="l">{label}</div></div>'
772
773 relbar = ""
774 if has_analysis:
775 rc = Counter(d.get("_analysis", {}).get("relevance", 3) for d in all_docs)
776 colors = {5: "#22c55e", 4: "#84cc16", 3: "#eab308", 2: "#f97316", 1: "#ef4444"}
777 labels = {5: "Must read", 4: "Relevant", 3: "Interesting", 2: "Low", 1: "Skip"}
778 segs = "".join(f'<div class="seg" style="width:{rc.get(s,0)/total*100}%;background:{colors[s]}">{rc.get(s,0)}</div>' for s in [5,4,3,2,1] if rc.get(s,0))
779 legend = "".join(f'<span><span class="rel-dot" style="background:{colors[s]}"></span>{s}★ {labels[s]} ({rc.get(s,0)})</span>' for s in [5,4,3,2,1])
780 relbar = f'<div style="max-width:600px;margin:1rem auto 0"><div class="rel-bar">{segs}</div><div class="rel-legend">{legend}</div></div>'
781
782 atags = Counter()
783 for d in all_docs:
784 for t in d.get("_analysis", {}).get("tags", []): atags[t] += 1
785 tag_opts = "".join(f'<option value="{t}">{t} ({c})</option>' for t, c in atags.most_common())
786
787 toc = ""
788 for key, title, _, _ in ACTION_GROUPS:
789 n = len(groups.get(key, []))
790 if n: toc += f'<a href="#{key}">{title}<span class="cnt">({n})</span></a>'
791
792 return f"""<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
793<title>Readwise Reader Triage</title>
794<style>
795:root{{--bg:#0f172a;--surface:#1e293b;--surface2:#334155;--text:#e2e8f0;--dim:#94a3b8;--accent:#f59e0b;--link:#38bdf8;--green:#4ade80;--red:#f87171}}
796*{{box-sizing:border-box;margin:0;padding:0}}
797body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg);color:var(--text);line-height:1.6}}
798.header{{background:linear-gradient(135deg,#1e293b,#312e81);padding:2rem;text-align:center;border-bottom:3px solid var(--accent)}}
799.header h1{{font-size:1.8rem;margin-bottom:.3rem}}.header .sub{{color:var(--dim);font-size:.85rem}}
800.stats{{display:flex;justify-content:center;gap:1rem;flex-wrap:wrap;margin-top:1.2rem}}
801.stat{{background:rgba(0,0,0,.3);padding:.6rem 1.2rem;border-radius:8px;text-align:center;min-width:100px}}
802.stat .n{{font-size:1.5rem;font-weight:700}}.stat .l{{font-size:.7rem;color:var(--dim);text-transform:uppercase}}
803.container{{max-width:1200px;margin:0 auto;padding:1rem}}
804.rel-bar{{display:flex;height:28px;border-radius:6px;overflow:hidden;margin:1.5rem 0 .5rem}}
805.rel-bar .seg{{display:flex;align-items:center;justify-content:center;font-size:.75rem;font-weight:600;color:#000}}
806.rel-legend{{display:flex;gap:1rem;justify-content:center;font-size:.75rem;color:var(--dim);margin-bottom:1.5rem;flex-wrap:wrap}}
807.rel-legend span{{display:flex;align-items:center;gap:.3rem}}.rel-dot{{width:10px;height:10px;border-radius:50%;display:inline-block}}
808.toc{{background:var(--surface);border-radius:8px;padding:1.2rem;margin:1rem 0}}
809.toc h2{{font-size:1rem;margin-bottom:.8rem}}
810.toc-grid{{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.4rem}}
811.toc a{{color:var(--link);text-decoration:none;font-size:.85rem}}.toc a:hover{{text-decoration:underline}}.toc .cnt{{opacity:.5;margin-left:.3rem}}
812.group{{margin:1.5rem 0;border-radius:10px;border:1px solid rgba(255,255,255,.08);overflow:hidden}}
813.group-hdr{{padding:1rem 1.2rem;display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
814.group-hdr h2{{font-size:1.15rem}}.badge{{background:rgba(0,0,0,.3);padding:.2rem .7rem;border-radius:14px;font-size:.85rem;font-weight:600}}
815.group-desc{{padding:0 1.2rem .8rem;color:var(--dim);font-size:.85rem;font-style:italic}}
816.cluster{{margin:.4rem .8rem;background:rgba(0,0,0,.2);border-radius:8px;overflow:hidden}}
817.cluster-hdr{{padding:.6rem 1rem;background:rgba(0,0,0,.15);display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
818.cluster-hdr h3{{font-size:.95rem}}.cnt{{background:rgba(255,255,255,.08);padding:.15rem .5rem;border-radius:10px;font-size:.78rem}}
819.cluster-insights{{padding:.5rem 1rem;font-size:.8rem;color:var(--dim);border-top:1px solid rgba(255,255,255,.04)}}
820.doc-list{{list-style:none}}.doc{{padding:.5rem 1rem;border-top:1px solid rgba(255,255,255,.04);display:grid;grid-template-columns:2.8rem 2.5rem 1fr auto;gap:.6rem;align-items:center;transition:background .1s}}
821.doc:hover{{background:rgba(255,255,255,.03)}}
822.doc-rel{{text-align:center;font-weight:700;font-size:.9rem}}
823.doc-prog{{text-align:center;font-size:.75rem}}.doc-prog .bar{{width:2.2rem;height:3px;background:rgba(255,255,255,.1);border-radius:2px;margin:2px auto 0;overflow:hidden}}
824.doc-prog .fill{{height:100%;border-radius:2px}}
825.doc-info{{min-width:0}}.doc-title{{font-weight:500;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font-size:.9rem}}
826.doc-title a{{color:var(--text);text-decoration:none}}.doc-title a:hover{{color:var(--link)}}
827.doc-meta{{font-size:.73rem;color:var(--dim);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}}
828.doc-summary{{font-size:.78rem;color:var(--dim);margin-top:.15rem}}
829.doc-summary .short{{cursor:pointer}}.doc-summary .short:hover{{color:var(--text)}}
830.doc-summary .full{{display:none;margin-top:.3rem;line-height:1.5;color:var(--dim);border-left:2px solid rgba(255,255,255,.1);padding-left:.6rem}}
831.doc-summary .full.open{{display:block}}
832.doc-summary .reason{{font-size:.72rem;color:var(--accent);margin-top:.2rem;font-style:italic}}
833.doc-right{{text-align:right;font-size:.73rem;white-space:nowrap;display:flex;flex-direction:column;align-items:flex-end;gap:.3rem}}.doc-right a{{color:var(--link)}}
834
835.tag{{display:inline-block;padding:.05rem .35rem;border-radius:6px;font-size:.65rem;margin-right:.15rem;background:rgba(255,255,255,.06)}}
836.loc{{display:inline-block;padding:.05rem .3rem;border-radius:6px;font-size:.65rem;font-weight:600}}
837.loc-new{{background:#14532d;color:#86efac}}.loc-later{{background:#1e3a5f;color:#93c5fd}}
838.arrow{{display:inline-block;transition:transform .15s}}.arrow.shut{{transform:rotate(-90deg)}}
839.filter-bar{{background:var(--surface);padding:.8rem 1rem;border-radius:8px;margin:1rem 0;display:flex;gap:.5rem;flex-wrap:wrap;align-items:center}}
840.filter-bar label{{font-size:.8rem;color:var(--dim)}}
841.filter-bar select,.filter-bar input{{background:var(--surface2);color:var(--text);border:1px solid rgba(255,255,255,.1);border-radius:6px;padding:.3rem .6rem;font-size:.8rem}}
842
843@media(max-width:768px){{.doc{{grid-template-columns:2rem 2rem 1fr}}.doc-right{{display:none}}.stats{{gap:.5rem}}.stat{{padding:.4rem .8rem;min-width:70px}}.stat .n{{font-size:1.2rem}}}}
844</style></head><body>
845<div class="header"><h1>📚 Readwise Reader Triage</h1>
846<p class="sub">{now.strftime('%Y-%m-%d')} · {total} documents · {'AI-analyzed' if has_analysis else 'keyword-only'}</p>
847<div class="stats">{stats}</div>{relbar}</div>
848<div class="container">
849<div class="toc"><h2>📑 Sections</h2><div class="toc-grid">{toc}</div></div>
850<div class="filter-bar"><label>Filter:</label>
851<input type="text" id="search" placeholder="Search titles/summaries..." oninput="filterDocs()">
852<select id="relFilter" onchange="filterDocs()"><option value="">All relevance</option><option value="5">5★</option><option value="4">4★+</option><option value="3">3★+</option></select>
853<select id="tagFilter" onchange="filterDocs()"><option value="">All tags</option>{tag_opts}</select></div>
854"""
855
856
857def _report_foot():
858 return """<script>
859function toggle(el){const b=el.parentElement.querySelector('.group-body,.doc-list');if(!b)return;const a=el.querySelector('.arrow');if(b.style.display==='none'){b.style.display='';a?.classList.remove('shut')}else{b.style.display='none';a?.classList.add('shut')}}
860function filterDocs(){const q=document.getElementById('search').value.toLowerCase();const r=document.getElementById('relFilter').value;const t=document.getElementById('tagFilter').value;document.querySelectorAll('.doc').forEach(el=>{const title=(el.dataset.title||'');const summary=(el.querySelector('.doc-summary')?.textContent||'').toLowerCase();const rv=parseInt(el.dataset.rel||'3');const tags=el.dataset.tags||'';let s=true;if(q&&!title.includes(q)&&!summary.includes(q))s=false;if(r&&rv<parseInt(r))s=false;if(t&&!tags.includes(t))s=false;el.style.display=s?'':'none'})}
861
862</script></div></body></html>"""
863
864
865# ═══════════════════════════════════════════════════════════════════════
866
867if __name__ == "__main__":
868 cli()