readwise-reader.py

  1#!/usr/bin/env -S uv run --script
  2# /// script
  3# requires-python = ">=3.11"
  4# dependencies = ["requests", "click"]
  5# ///
  6"""
  7readwise-reader — Fetch, analyze, and triage Readwise Reader documents.
  8
  9Commands:
 10  fetch     Fetch documents from Readwise Reader API
 11  analyze   Score documents for relevance using LLMs
 12  report    Generate an interactive HTML triage report
 13  archive   Bulk-archive documents by relevance or age
 14  delete    Bulk-delete documents by relevance or age
 15"""
 16
 17import click
 18import html as html_mod
 19import json
 20import os
 21import subprocess
 22import time
 23from collections import Counter, defaultdict
 24from datetime import datetime, timezone
 25from pathlib import Path
 26
 27DATA_DIR = Path(os.environ.get("XDG_DATA_HOME", Path.home() / ".local/share")) / "readwise"
 28PROFILE_FILE = DATA_DIR / "profile.toml"
 29LATEST_LINK = DATA_DIR / "reader-latest.json"
 30ANALYZED_FILE = DATA_DIR / "reader-analyzed.json"
 31CHECKPOINT_FILE = DATA_DIR / "analysis-checkpoint.json"
 32REPORT_FILE = DATA_DIR / "triage-report.html"
 33
 34READER_API_BASE = "https://readwise.io/api/v3"
 35READER_RATE_DELAY = 3.1  # 20 req/min for list endpoint
 36
 37MAX_RETRIES = 6
 38
 39MODELS = {
 40    "opus": {"backend": "vertex-claude", "model_id": "claude-opus-4-6", "batch_size": 25, "rate_delay": 2, "max_output_tokens": 16384},
 41    "sonnet": {"backend": "vertex-claude", "model_id": "claude-sonnet-4@20250514", "batch_size": 30, "rate_delay": 1, "max_output_tokens": 16384},
 42    "gemini": {"backend": "gemini-api", "model_id": "gemini-3-pro-preview", "batch_size": 35, "rate_delay": 2, "max_output_tokens": 16384},
 43    "gemini25": {"backend": "gemini-api", "model_id": "gemini-2.5-pro", "batch_size": 35, "rate_delay": 2, "max_output_tokens": 16384},
 44}
 45
 46
 47# ═══════════════════════════════════════════════════════════════════════
 48#  UTILITIES
 49# ═══════════════════════════════════════════════════════════════════════
 50
 51def ensure_data_dir():
 52    DATA_DIR.mkdir(parents=True, exist_ok=True)
 53
 54
 55def get_readwise_token():
 56    result = subprocess.run(["passage", "show", "readwise/key"], capture_output=True, text=True)
 57    if result.returncode != 0:
 58        raise click.ClickException("Failed to get Readwise token from passage (readwise/key)")
 59    return result.stdout.strip()
 60
 61
 62def age_label(days):
 63    if days < 7: return f"{days}d"
 64    if days < 30: return f"{days // 7}w"
 65    if days < 365: return f"{days // 30}mo"
 66    return f"{days // 365}y{(days % 365) // 30}mo"
 67
 68
 69# ═══════════════════════════════════════════════════════════════════════
 70#  PROFILE
 71# ═══════════════════════════════════════════════════════════════════════
 72
 73def load_profile(path: Path) -> dict:
 74    """Load profile from TOML with a minimal parser (no toml dependency)."""
 75    if not path.exists():
 76        return {}
 77
 78    profile = {}
 79    current_section = ""
 80    current_section_for_key = ""
 81    current_key = None
 82    current_list = None
 83
 84    for line in path.read_text().splitlines():
 85        stripped = line.strip()
 86        if not stripped or stripped.startswith("#"):
 87            continue
 88        if stripped.startswith("["):
 89            if current_key and current_list is not None:
 90                profile[f"{current_section_for_key}.{current_key}"] = current_list
 91                current_list = None
 92                current_key = None
 93            current_section = stripped.strip("[]").strip()
 94            current_section_for_key = current_section
 95            continue
 96        if "=" in stripped and not stripped.startswith('"'):
 97            if current_key and current_list is not None:
 98                profile[f"{current_section_for_key}.{current_key}"] = current_list
 99                current_list = None
100            key, val = stripped.split("=", 1)
101            key = key.strip()
102            val = val.strip()
103            current_key = key
104            current_section_for_key = current_section
105            if val == "[":
106                current_list = []
107            elif val.startswith("[") and val.endswith("]"):
108                current_list = [s.strip().strip('"').strip("'") for s in val[1:-1].split(",") if s.strip().strip('"').strip("'")]
109                profile[f"{current_section}.{current_key}"] = current_list
110                current_list = None
111                current_key = None
112            elif val.startswith('"') or val.startswith("'"):
113                profile[f"{current_section}.{current_key}"] = val.strip('"').strip("'")
114                current_key = None
115            else:
116                profile[f"{current_section}.{current_key}"] = val
117                current_key = None
118        elif current_list is not None:
119            val = stripped.rstrip(",").strip().strip('"').strip("'")
120            if val and val != "]":
121                if "#" in val and not val.startswith("#"):
122                    val = val[:val.index("#")].strip().rstrip(",").strip().strip('"').strip("'")
123                if val:
124                    current_list.append(val)
125            if stripped.rstrip().endswith("]") or stripped == "]":
126                profile[f"{current_section_for_key}.{current_key}"] = current_list
127                current_list = None
128                current_key = None
129
130    if current_key and current_list is not None:
131        profile[f"{current_section_for_key}.{current_key}"] = current_list
132    return profile
133
134
135def build_prompt_context(profile: dict) -> tuple[str, str]:
136    def get(key): return profile.get(key, [])
137    def fmt(items): return ", ".join(items) if items else "N/A"
138
139    name = profile.get("user.name", "User")
140    role = profile.get("user.role", "Software Engineer")
141    desc = f"""{name} — {role}
142
143Core languages: {fmt(get('interests.core'))}
144Infrastructure: {fmt(get('interests.infrastructure'))}
145Editor: {fmt(get('interests.editor'))}
146Kubernetes/Containers: {fmt(get('interests.kubernetes'))}
147CLI tools: {fmt(get('interests.tools'))}
148AI tooling: {fmt(get('interests.ai'))}
149Side interests: {fmt(get('interests.side'))}
150Values: {fmt(get('interests.values'))}
151Currently exploring: {fmt(get('interests.exploring'))}
152NOT interested in: {fmt(get('interests.not_interested'))}"""
153
154    fav_authors = get("favorites.authors")
155    fav_sites = get("favorites.sites")
156    if fav_authors or fav_sites:
157        desc += "\n\nFAVORITE AUTHORS/SITES (auto-boost to at least 4★, always worth_reading):\n"
158        if fav_authors: desc += "Authors: " + ", ".join(fav_authors) + "\n"
159        if fav_sites: desc += "Sites: " + ", ".join(fav_sites) + "\n"
160        desc += "These are trusted voices — mark as interesting even on tangential topics."
161
162    return desc, profile.get("summary.style", "detailed")
163
164
165# ═══════════════════════════════════════════════════════════════════════
166#  LLM BACKENDS
167# ═══════════════════════════════════════════════════════════════════════
168
169_token_cache = {"token": None, "ts": 0}
170
171
172def _get_vertex_token():
173    if time.time() - _token_cache["ts"] > 2400:
174        result = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True, timeout=10)
175        if result.returncode != 0:
176            raise click.ClickException(f"gcloud auth failed: {result.stderr}")
177        _token_cache["token"] = result.stdout.strip()
178        _token_cache["ts"] = time.time()
179    return _token_cache["token"]
180
181
182def _call_vertex_claude(model_id, prompt, max_tokens):
183    import requests
184    project = os.environ.get("GOOGLE_CLOUD_PROJECT", "itpc-gcp-pnd-pe-eng-claude")
185    location = os.environ.get("GOOGLE_CLOUD_LOCATION", "global")
186    if location == "global":
187        url = f"https://aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
188    else:
189        url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
190    for attempt in range(MAX_RETRIES):
191        token = _get_vertex_token()
192        try:
193            resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
194                json={"anthropic_version": "vertex-2023-10-16", "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.2}, timeout=180)
195            if resp.status_code == 429:
196                wait = (2 ** attempt) * 5
197                click.echo(f"    ⏳ Rate limited, waiting {wait}s...", err=True)
198                time.sleep(wait)
199                continue
200            if resp.status_code == 401:
201                _token_cache["ts"] = 0
202                continue
203            resp.raise_for_status()
204            return resp.json()["content"][0]["text"]
205        except Exception as e:
206            if attempt < MAX_RETRIES - 1:
207                wait = (2 ** attempt) * 3
208                click.echo(f"    ⚠ {e}, retrying in {wait}s...", err=True)
209                time.sleep(wait)
210                continue
211            raise
212    raise click.ClickException(f"Failed after {MAX_RETRIES} retries")
213
214
215def _get_gemini_key():
216    key = os.environ.get("GEMINI_API_KEY")
217    if key: return key
218    result = subprocess.run(["passage", "show", "redhat/google/osp/vdeemest-api-key"], capture_output=True, text=True, timeout=10)
219    if result.returncode != 0:
220        raise click.ClickException("No GEMINI_API_KEY and passage lookup failed")
221    return result.stdout.strip()
222
223
224def _call_gemini(model_id, prompt, max_tokens):
225    import requests
226    api_key = _get_gemini_key()
227    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_id}:generateContent?key={api_key}"
228    for attempt in range(MAX_RETRIES):
229        try:
230            resp = requests.post(url, headers={"Content-Type": "application/json"},
231                json={"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.2, "maxOutputTokens": max_tokens, "responseMimeType": "application/json"}}, timeout=180)
232            if resp.status_code == 429:
233                wait = (2 ** attempt) * 5
234                click.echo(f"    ⏳ Rate limited, waiting {wait}s...", err=True)
235                time.sleep(wait)
236                continue
237            resp.raise_for_status()
238            return resp.json()["candidates"][0]["content"]["parts"][0]["text"]
239        except Exception as e:
240            if attempt < MAX_RETRIES - 1:
241                wait = (2 ** attempt) * 3
242                click.echo(f"    ⚠ {e}, retrying in {wait}s...", err=True)
243                time.sleep(wait)
244                continue
245            raise
246    raise click.ClickException(f"Failed after {MAX_RETRIES} retries")
247
248
249def call_llm(backend, model_id, prompt, max_tokens):
250    if backend == "vertex-claude": return _call_vertex_claude(model_id, prompt, max_tokens)
251    if backend == "gemini-api": return _call_gemini(model_id, prompt, max_tokens)
252    raise ValueError(f"Unknown backend: {backend}")
253
254
255def parse_json_response(text):
256    import re
257    text = text.strip()
258    if text.startswith("```"):
259        text = text.split("\n", 1)[1]
260        if text.endswith("```"): text = text[:-3]
261        text = text.strip()
262    try:
263        return json.loads(text)
264    except json.JSONDecodeError:
265        match = re.search(r'\{.*\}', text, re.DOTALL)
266        if match: return json.loads(match.group())
267        raise
268
269
270# ═══════════════════════════════════════════════════════════════════════
271#  CLI
272# ═══════════════════════════════════════════════════════════════════════
273
274@click.group()
275def cli():
276    """Fetch, analyze, and triage Readwise Reader documents."""
277    ensure_data_dir()
278
279
280# ─── FETCH ──────────────────────────────────────────────────────────────
281
282@cli.command()
283@click.option("--locations", default="new,later", help="Comma-separated locations to fetch (default: new,later)")
284def fetch(locations):
285    """Fetch documents from Readwise Reader API."""
286    import requests
287
288    token = get_readwise_token()
289    all_docs = {}
290    locs = [l.strip() for l in locations.split(",")]
291
292    for loc in locs:
293        click.echo(f"\nFetching '{loc}'...", err=True)
294        docs = []
295        cursor = None
296        page = 1
297        while True:
298            params = {"location": loc, "limit": 100}
299            if cursor: params["pageCursor"] = cursor
300            click.echo(f"  page {page}...", err=True)
301            resp = requests.get(f"{READER_API_BASE}/list/", params=params, headers={"Authorization": f"Token {token}"})
302            resp.raise_for_status()
303            data = resp.json()
304            docs.extend(data.get("results", []))
305            cursor = data.get("nextPageCursor")
306            click.echo(f"    {len(data.get('results', []))} docs (total: {data.get('count', '?')})", err=True)
307            if not cursor: break
308            page += 1
309            time.sleep(READER_RATE_DELAY)
310        all_docs[loc] = docs
311        click.echo(f"  Total {loc}: {len(docs)}", err=True)
312        if loc != locs[-1]: time.sleep(READER_RATE_DELAY)
313
314    timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
315    output = {"fetched_at": timestamp, "counts": {l: len(d) for l, d in all_docs.items()}, "documents": all_docs}
316    outfile = DATA_DIR / f"reader-{'-'.join(locs)}-{timestamp}.json"
317    outfile.write_text(json.dumps(output, indent=2, ensure_ascii=False))
318    LATEST_LINK.unlink(missing_ok=True)
319    LATEST_LINK.symlink_to(outfile.name)
320    total = sum(len(d) for d in all_docs.values())
321    click.echo(f"\n✅ Saved {total} documents to {outfile}", err=True)
322
323
324# ─── ANALYZE ────────────────────────────────────────────────────────────
325
326@cli.command()
327@click.option("--model", "-m", type=click.Choice(list(MODELS.keys())), default="opus", help="LLM model")
328@click.option("--reset", is_flag=True, help="Reset checkpoint and re-analyze everything")
329@click.option("--batch-size", type=int, help="Override batch size")
330@click.option("--profile", "profile_path", type=click.Path(exists=False), default=str(PROFILE_FILE), help="Profile TOML path")
331def analyze(model, reset, batch_size, profile_path):
332    """Analyze documents with LLM for relevance scoring."""
333    cfg = MODELS[model].copy()
334    if batch_size: cfg["batch_size"] = batch_size
335
336    if reset and CHECKPOINT_FILE.exists():
337        CHECKPOINT_FILE.unlink()
338        click.echo("🔄 Checkpoint reset.", err=True)
339
340    profile = load_profile(Path(profile_path))
341    if profile:
342        fav_count = len(profile.get("favorites.authors", [])) + len(profile.get("favorites.sites", []))
343        click.echo(f"👤 Profile: {profile_path} ({fav_count} favorites)", err=True)
344    profile_text, summary_style = build_prompt_context(profile)
345    click.echo(f"📝 Summary: {summary_style}", err=True)
346
347    if not LATEST_LINK.exists():
348        raise click.ClickException("No data found. Run 'readwise-reader fetch' first.")
349
350    with open(LATEST_LINK) as f:
351        data = json.load(f)
352
353    all_docs = []
354    for loc in data["documents"]:
355        for d in data["documents"][loc]:
356            d["_location"] = loc
357            all_docs.append(d)
358
359    # Load checkpoint — keyed by document ID, so survives re-fetches
360    analyzed = {}
361    if CHECKPOINT_FILE.exists():
362        with open(CHECKPOINT_FILE) as f:
363            analyzed = json.load(f)
364
365    to_analyze = [d for d in all_docs if d["id"] not in analyzed]
366
367    click.echo(f"📚 Total: {len(all_docs)} | ✅ Already analyzed: {len(analyzed)} | 🔍 Remaining: {len(to_analyze)}", err=True)
368    click.echo(f"🤖 {cfg['model_id']} ({cfg['backend']}) | batch={cfg['batch_size']} | max_out={cfg['max_output_tokens']}", err=True)
369
370    if not to_analyze:
371        click.echo("Nothing new to analyze.", err=True)
372    else:
373        bs = cfg["batch_size"]
374        total_batches = (len(to_analyze) + bs - 1) // bs
375
376        for bn in range(total_batches):
377            batch = to_analyze[bn * bs: (bn + 1) * bs]
378            pct = len(analyzed) / len(all_docs) * 100
379            click.echo(f"\n[{bn+1}/{total_batches}] {len(batch)} docs ({pct:.0f}%)...", err=True)
380
381            try:
382                prompt = _build_analysis_prompt(profile_text, summary_style, batch)
383                raw = call_llm(cfg["backend"], cfg["model_id"], prompt, cfg["max_output_tokens"])
384                parsed = parse_json_response(raw)
385                matched = 0
386                for a in parsed.get("analyses", []):
387                    idx = a.get("id")
388                    if idx is None: continue
389                    try:
390                        idx = int(idx)
391                        if 0 <= idx < len(batch):
392                            analyzed[batch[idx]["id"]] = {
393                                "summary": a.get("summary", ""),
394                                "relevance": a.get("relevance", 3),
395                                "reason": a.get("reason", ""),
396                                "action": a.get("action", "skim"),
397                                "tags": a.get("tags", []),
398                            }
399                            matched += 1
400                    except (ValueError, IndexError): pass
401                click.echo(f"  ✓ {matched}/{len(batch)}", err=True)
402            except Exception as e:
403                click.echo(f"  ✗ {e}", err=True)
404
405            CHECKPOINT_FILE.write_text(json.dumps(analyzed))
406            if bn < total_batches - 1: time.sleep(cfg["rate_delay"])
407
408    # Build enriched output
409    click.echo(f"\n📊 Analyzed: {len(analyzed)}/{len(all_docs)}", err=True)
410    enriched = {}
411    for loc in data["documents"]:
412        enriched[loc] = []
413        for d in data["documents"][loc]:
414            d["_analysis"] = analyzed.get(d["id"], {"summary": d.get("summary", ""), "relevance": 3, "reason": "Not analyzed", "action": "skim", "tags": []})
415            enriched[loc].append(d)
416
417    output = {"fetched_at": data.get("fetched_at", ""), "analyzed_at": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
418              "model": cfg["model_id"], "counts": {l: len(d) for l, d in enriched.items()}, "documents": enriched}
419    ANALYZED_FILE.write_text(json.dumps(output, indent=2, ensure_ascii=False))
420    click.echo(f"✅ {ANALYZED_FILE}", err=True)
421
422    # Stats
423    rels = Counter(analyzed[k]["relevance"] for k in analyzed if "relevance" in analyzed[k])
424    if rels:
425        click.echo("\nRelevance: " + " | ".join(f"{s}★={rels.get(s,0)}" for s in [5,4,3,2,1]), err=True)
426    acts = Counter(analyzed[k]["action"] for k in analyzed if "action" in analyzed[k])
427    if acts:
428        click.echo("Actions: " + " | ".join(f"{a}={c}" for a,c in acts.most_common()), err=True)
429
430
431def _build_analysis_prompt(profile_text, summary_style, docs_batch):
432    entries = []
433    for i, d in enumerate(docs_batch):
434        summary = (d.get("summary") or "")[:600]
435        if summary.strip().lower() in ("comments", ""): summary = "N/A — infer from title/author/URL"
436        entries.append(f'[{i}] "{d.get("title", "Untitled")}"\n'
437                       f'    Author: {d.get("author", "")} | Source: {d.get("site_name") or d.get("source", "")} | '
438                       f'{d.get("category", "")} | {d.get("word_count", 0)}w\n'
439                       f'    URL: {d.get("source_url", "")}\n    Summary: {summary}')
440
441    if summary_style == "detailed":
442        s_inst = ("**summary**: 4-6 sentences. What is this actually about? Key argument, technique, insight? "
443                  "Include specific details. If summary is N/A, infer from title/author/URL.")
444        r_inst = ("**reason**: 2-3 sentences. Reference specific user interests that match or don't.")
445    else:
446        s_inst = "**summary**: 1-2 sentences about the actual content."
447        r_inst = "**reason**: 5-15 words."
448
449    return f"""Analyze these {len(docs_batch)} articles. Score each for relevance to this user.
450
451<user_profile>
452{profile_text}
453</user_profile>
454
455<documents>
456{chr(10).join(entries)}
457</documents>
458
459For each document return:
4601. {s_inst}
4612. **relevance**: 1-5 (5=must-read for this user, 1=skip)
4623. {r_inst}
4634. **action**: must_read / worth_reading / skim / archive / delete
4645. **tags**: 1-3 from: nix, go, rust, python, emacs, kubernetes, containers, ci-cd, git, security, homelab, networking, ai-llm, coding-agents, linux, open-source, privacy, productivity, pkm, career, culture, french, hardware, web, devtools, monitoring, tekton
465
466IMPORTANT:
467- Favorite authors/sites → minimum 4★ and worth_reading.
468- Be discriminating: generic listicles = 1-2★, deep technical = 4-5★.
469
470Return ONLY valid JSON:
471{{"analyses":[{{"id":0,"summary":"...","relevance":4,"reason":"...","action":"worth_reading","tags":["nix"]}},...]}}\
472"""
473
474
475# ─── ARCHIVE / DELETE ──────────────────────────────────────────────────
476
477@cli.command()
478@click.option("--max-relevance", type=int, default=2, help="Archive docs with relevance ≤ this (default: 2)")
479@click.option("--min-age", type=int, default=365, help="Only docs older than N days (default: 365)")
480@click.option("--unread-only", is_flag=True, default=True, help="Only unread docs (default: true)")
481@click.option("--dry-run", is_flag=True, help="Show what would be archived without doing it")
482@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
483def archive(max_relevance, min_age, unread_only, dry_run, yes):
484    """Bulk-archive documents on Readwise Reader."""
485    _bulk_action("archive", max_relevance, min_age, unread_only, dry_run, yes)
486
487
488@cli.command()
489@click.option("--max-relevance", type=int, default=1, help="Delete docs with relevance ≤ this (default: 1)")
490@click.option("--min-age", type=int, default=365, help="Only docs older than N days (default: 365)")
491@click.option("--unread-only", is_flag=True, default=True, help="Only unread docs (default: true)")
492@click.option("--dry-run", is_flag=True, help="Show what would be deleted without doing it")
493@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
494def delete(max_relevance, min_age, unread_only, dry_run, yes):
495    """Bulk-delete documents on Readwise Reader."""
496    _bulk_action("delete", max_relevance, min_age, unread_only, dry_run, yes)
497
498
499def _bulk_action(action, max_relevance, min_age, unread_only, dry_run, yes):
500    import requests
501
502    if not ANALYZED_FILE.exists():
503        raise click.ClickException("No analyzed data. Run 'readwise-reader analyze' first.")
504
505    with open(ANALYZED_FILE) as f:
506        data = json.load(f)
507
508    now = datetime.now(timezone.utc)
509    candidates = []
510    for loc in data["documents"]:
511        for d in data["documents"][loc]:
512            saved = d.get("saved_at") or d.get("created_at")
513            age = (now - datetime.fromisoformat(saved)).days if saved else 0
514            progress = d.get("reading_progress", 0)
515            rel = d.get("_analysis", {}).get("relevance", 3)
516
517            if rel > max_relevance: continue
518            if age < min_age: continue
519            if unread_only and progress > 0: continue
520            candidates.append(d)
521
522    if not candidates:
523        click.echo(f"No documents match criteria (relevance ≤ {max_relevance}, age ≥ {min_age}d, unread={unread_only})")
524        return
525
526    # Show summary
527    rels = Counter(d.get("_analysis", {}).get("relevance", 3) for d in candidates)
528    click.echo(f"\n{'DRY RUN — ' if dry_run else ''}{action.upper()}: {len(candidates)} documents")
529    click.echo(f"  Criteria: relevance ≤ {max_relevance}★, age ≥ {min_age}d, unread_only={unread_only}")
530    click.echo("  Relevance breakdown: " + ", ".join(f"{s}★={rels.get(s,0)}" for s in sorted(rels.keys())))
531
532    # Show sample
533    click.echo("\n  Sample (first 10):")
534    for d in candidates[:10]:
535        a = d.get("_analysis", {})
536        title = (d.get("title") or "Untitled")[:60]
537        click.echo(f"    {a.get('relevance',3)}★ | {title}")
538    if len(candidates) > 10:
539        click.echo(f"    ... and {len(candidates) - 10} more")
540
541    if dry_run:
542        click.echo("\n🔍 Dry run — no changes made.")
543        return
544
545    if not yes:
546        click.confirm(f"\n{action.capitalize()} {len(candidates)} documents?", abort=True)
547
548    token = get_readwise_token()
549
550    if action == "archive":
551        # Use bulk_update endpoint (50 per request)
552        batch_size = 50
553        total = len(candidates)
554        done = 0
555        for i in range(0, total, batch_size):
556            batch = candidates[i:i + batch_size]
557            updates = [{"id": d["id"], "location": "archive"} for d in batch]
558            resp = requests.patch(
559                f"{READER_API_BASE}/bulk_update/",
560                headers={"Authorization": f"Token {token}", "Content-Type": "application/json"},
561                json={"updates": updates},
562            )
563            if resp.status_code in (200, 207):
564                results = resp.json().get("results", [])
565                ok = sum(1 for r in results if r.get("success"))
566                done += ok
567                if ok < len(batch):
568                    fails = [r for r in results if not r.get("success")]
569                    click.echo(f"  ⚠ {len(fails)} failed in batch", err=True)
570            else:
571                click.echo(f"  ✗ Batch failed: {resp.status_code} {resp.text[:200]}", err=True)
572            click.echo(f"  Archived {done}/{total}...", err=True)
573            if i + batch_size < total: time.sleep(3.1)
574        click.echo(f"\n✅ Archived {done} documents")
575
576    elif action == "delete":
577        # Delete is per-document (no bulk endpoint)
578        done = 0
579        total = len(candidates)
580        for i, d in enumerate(candidates):
581            resp = requests.delete(
582                f"{READER_API_BASE}/delete/{d['id']}/",
583                headers={"Authorization": f"Token {token}"},
584            )
585            if resp.status_code == 204:
586                done += 1
587            else:
588                click.echo(f"  ✗ Failed to delete {d['id']}: {resp.status_code}", err=True)
589            if (i + 1) % 20 == 0:
590                click.echo(f"  Deleted {done}/{total}...", err=True)
591                time.sleep(3.1)  # rate limit: 20/min
592        click.echo(f"\n✅ Deleted {done} documents")
593
594
595# ─── REPORT ─────────────────────────────────────────────────────────────
596
597TAG_DISPLAY = {
598    "nix": ("❄️", "NixOS & Nix", "#7eb8da"), "emacs": ("📝", "Emacs & Org-mode", "#7f5ab6"),
599    "go": ("🐹", "Go", "#00add8"), "rust": ("🦀", "Rust", "#dea584"),
600    "python": ("🐍", "Python", "#3776ab"), "tekton": ("🔧", "Tekton & CI/CD", "#fd495c"),
601    "kubernetes": ("☸️", "Kubernetes", "#326ce5"), "containers": ("📦", "Containers", "#2496ed"),
602    "ci-cd": ("🔄", "CI/CD", "#fd495c"), "homelab": ("🏠", "Homelab", "#e8a87c"),
603    "git": ("🔀", "Git & VCS", "#f14e32"), "coding-agents": ("🤖", "Coding Agents", "#a855f7"),
604    "ai-llm": ("🧠", "AI & LLM", "#8b5cf6"), "security": ("🔒", "Security", "#ef4444"),
605    "linux": ("🐧", "Linux", "#fcc624"), "networking": ("🌐", "Networking", "#06b6d4"),
606    "monitoring": ("📊", "Monitoring", "#10b981"), "devtools": ("🛠️", "Dev Tools", "#64748b"),
607    "open-source": ("⚖️", "Open Source", "#22c55e"), "privacy": ("🛡️", "Privacy", "#f59e0b"),
608    "productivity": ("📋", "Productivity", "#6366f1"), "pkm": ("🧩", "PKM", "#ec4899"),
609    "career": ("👔", "Career", "#14b8a6"), "culture": ("📖", "Culture", "#a78bfa"),
610    "french": ("🇫🇷", "French", "#3b82f6"), "hardware": ("⌨️", "Hardware", "#f97316"),
611    "web": ("🌍", "Web", "#06b6d4"), "other": ("📄", "Other", "#94a3b8"),
612}
613
614ACTION_GROUPS = [
615    ("must_read", "⭐ Must Read", "AI rates these highly relevant.", "#f59e0b"),
616    ("finish_reading", "🏃 Finish — >50% Done", "You started these. Finish them.", "#2196f3"),
617    ("archive_finished", "✅ Archive — Finished", "100% read. Archive.", "#4caf50"),
618    ("keep_triage", "📚 Active Queue", "Review and decide.", "#9c27b0"),
619    ("archive_old_unread", "📦 Old & Unread (relevant)", "1yr+ unread but 3★+.", "#ff9800"),
620    ("archive_low_relevance", "🗑️ Old & Low Relevance", "1yr+ unread, ≤2★. Purge.", "#f44336"),
621    ("archive_old_barely", "🤔 Old & Barely Started", "1yr+, <10% read.", "#795548"),
622]
623
624
625def _categorize(d):
626    p = d.get("reading_progress", 0)
627    age = d.get("_age", 0)
628    rel = d.get("_analysis", {}).get("relevance", 3)
629    act = d.get("_analysis", {}).get("action", "")
630    if p >= 1.0: return "archive_finished"
631    if age > 365 and p == 0 and rel <= 2: return "archive_low_relevance"
632    if age > 365 and p == 0: return "archive_old_unread"
633    if age > 365 and p < 0.1: return "archive_old_barely"
634    if p > 0.5: return "finish_reading"
635    if act in ("must_read", "worth_reading") and rel >= 4: return "must_read"
636    return "keep_triage"
637
638
639def _primary_tag(d):
640    tags = d.get("_analysis", {}).get("tags", [])
641    for t in ["nix","emacs","go","rust","python","tekton","kubernetes","containers","ci-cd","homelab","git",
642              "coding-agents","ai-llm","security","linux","networking","monitoring","devtools","open-source",
643              "privacy","productivity","pkm","career","culture","french","hardware","web"]:
644        if t in tags: return t
645    return tags[0] if tags else "other"
646
647
648@cli.command()
649@click.option("--no-open", is_flag=True, help="Don't auto-open in browser")
650def report(no_open):
651    """Generate interactive HTML triage report."""
652    src = ANALYZED_FILE if ANALYZED_FILE.exists() else LATEST_LINK
653    if not src.exists():
654        raise click.ClickException("No data. Run 'readwise-reader fetch' first.")
655    click.echo(f"Using: {src}", err=True)
656
657    with open(src) as f:
658        data = json.load(f)
659
660    now = datetime.now(timezone.utc)
661    all_docs = []
662    for loc in data["documents"]:
663        for d in data["documents"][loc]:
664            d["_location"] = loc
665            saved = d.get("saved_at") or d.get("created_at")
666            d["_age"] = (now - datetime.fromisoformat(saved)).days if saved else 0
667            d["_primary_tag"] = _primary_tag(d)
668            all_docs.append(d)
669
670    groups = defaultdict(list)
671    for d in all_docs:
672        groups[_categorize(d)].append(d)
673
674    has_analysis = any(d.get("_analysis", {}).get("reason", "") not in ("Not analyzed", "Analysis failed", "") for d in all_docs)
675    total = len(all_docs)
676
677    html_parts = [_report_head(total, now, has_analysis, groups, all_docs)]
678
679    for key, title, desc, color in ACTION_GROUPS:
680        docs = groups.get(key, [])
681        if not docs: continue
682        docs.sort(key=lambda d: (-d.get("_analysis", {}).get("relevance", 3), d.get("_age", 0)))
683        tag_groups = defaultdict(list)
684        for d in docs: tag_groups[d["_primary_tag"]].append(d)
685
686        html_parts.append(f'<div class="group" id="{key}">'
687            f'<div class="group-hdr" style="background:{color}15;border-left:4px solid {color}" onclick="toggle(this)">'
688            f'<h2><span class="arrow">▼</span> {title}</h2><span class="badge">{len(docs)}</span></div>'
689            f'<div class="group-desc">{desc}</div><div class="group-body">')
690
691        for tag, tdocs in sorted(tag_groups.items(), key=lambda x: -len(x[1])):
692            icon, label, _ = TAG_DISPLAY.get(tag, ("📄", tag, "#94a3b8"))
693            tdocs.sort(key=lambda d: (-d.get("_analysis", {}).get("relevance", 3), -d.get("reading_progress", 0)))
694
695            sources = Counter((d.get("site_name") or d.get("source") or "?") for d in tdocs)
696            top_src = [(s, n) for s, n in sources.most_common(5) if n >= 2]
697            wcs = [d.get("word_count", 0) for d in tdocs if d.get("word_count")]
698            rh = sum(wcs) / 15000 if wcs else 0
699            avg_rel = sum(d.get("_analysis", {}).get("relevance", 3) for d in tdocs) / len(tdocs)
700
701            insights = []
702            if top_src: insights.append("Sources: " + ", ".join(f"{s} ({n})" for s, n in top_src))
703            insights.append(f"Stats: {len(tdocs)} docs, ~{rh:.1f}h reading, avg {avg_rel:.1f}★")
704
705            html_parts.append(f'<div class="cluster"><div class="cluster-hdr" onclick="toggle(this)">'
706                f'<h3><span class="arrow">▼</span> {icon} {label}</h3><span class="cnt">{len(tdocs)}</span></div>'
707                f'<div class="cluster-insights">{html_mod.escape(" | ".join(insights))}</div>'
708                f'<ul class="doc-list">')
709            for d in tdocs:
710                html_parts.append(_doc_html(d))
711            html_parts.append('</ul></div>')
712        html_parts.append('</div></div>')
713
714    html_parts.append(_report_foot())
715    REPORT_FILE.write_text("\n".join(html_parts))
716    click.echo(f"✅ {REPORT_FILE}", err=True)
717
718    if not no_open:
719        subprocess.Popen(["xdg-open", str(REPORT_FILE)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
720
721
722def _doc_html(d):
723    p = d.get("reading_progress", 0)
724    a = d.get("_analysis", {})
725    rel = a.get("relevance", 3)
726    summary = html_mod.escape(a.get("summary", ""))
727    reason = html_mod.escape(a.get("reason", ""))
728    title = html_mod.escape(d.get("title") or "Untitled")
729    src = html_mod.escape(d.get("site_name") or d.get("source") or "")
730    url = html_mod.escape(d.get("url") or "#")
731    source_url = d.get("source_url") or ""
732    tags = a.get("tags", [])
733    loc = d["_location"]
734    age = d.get("_age", 0)
735    wc = d.get("word_count") or 0
736    doc_id = d.get("id", "")
737
738    rc = {5: "#22c55e", 4: "#84cc16", 3: "#eab308", 2: "#f97316", 1: "#ef4444"}.get(rel, "#94a3b8")
739    pc = "#4ade80" if p >= 1 else "#60a5fa" if p > 0.5 else "#f59e0b" if p > 0 else "#475569"
740    lc = "loc-new" if loc == "new" else "loc-later"
741    th = "".join(f'<span class="tag">{t}</span>' for t in tags[:3])
742    rt = f"{wc // 250}min" if wc else ""
743    sl = html_mod.escape(source_url)
744
745    first = summary.split('. ')[0] + '.' if '. ' in summary else summary
746    if len(first) > len(summary) - 5: first = summary
747    has_more = first != summary
748
749    return (
750        f'<li class="doc" data-title="{title.lower()}" data-rel="{rel}" data-tags="{",".join(tags)}" data-id="{doc_id}">'
751        f'<div class="doc-rel" style="color:{rc}" title="{reason}">{rel}★</div>'
752        f'<div class="doc-prog"><div style="color:{pc}">{p:.0%}</div>'
753        f'<div class="bar"><div class="fill" style="width:{p*100:.0f}%;background:{pc}"></div></div></div>'
754        f'<div class="doc-info"><div class="doc-title"><a href="{url}" target="_blank">{title}</a></div>'
755        f'<div class="doc-summary">'
756        f'<div class="short" onclick="this.nextElementSibling.classList.toggle(\'open\')">{first}{"▸" if has_more else ""}</div>'
757        f'<div class="full">{summary}<div class="reason">{reason}</div></div>'
758        f'</div>'
759        f'<div class="doc-meta"><span class="loc {lc}">{loc}</span> {src} · {rt} · {age_label(age)} ago {th}</div></div>'
760        f'<div class="doc-right">'
761        f'{"<a href=" + chr(34) + sl + chr(34) + " target=" + chr(34) + "_blank" + chr(34) + ">↗</a>" if source_url else ""}'
762        f'</div></li>'
763    )
764
765
766def _report_head(total, now, has_analysis, groups, all_docs):
767    stats = ""
768    for key, title, _, color in ACTION_GROUPS:
769        n = len(groups.get(key, []))
770        label = title.split("—")[0].strip()
771        stats += f'<div class="stat"><div class="n" style="color:{color}">{n}</div><div class="l">{label}</div></div>'
772
773    relbar = ""
774    if has_analysis:
775        rc = Counter(d.get("_analysis", {}).get("relevance", 3) for d in all_docs)
776        colors = {5: "#22c55e", 4: "#84cc16", 3: "#eab308", 2: "#f97316", 1: "#ef4444"}
777        labels = {5: "Must read", 4: "Relevant", 3: "Interesting", 2: "Low", 1: "Skip"}
778        segs = "".join(f'<div class="seg" style="width:{rc.get(s,0)/total*100}%;background:{colors[s]}">{rc.get(s,0)}</div>' for s in [5,4,3,2,1] if rc.get(s,0))
779        legend = "".join(f'<span><span class="rel-dot" style="background:{colors[s]}"></span>{s}★ {labels[s]} ({rc.get(s,0)})</span>' for s in [5,4,3,2,1])
780        relbar = f'<div style="max-width:600px;margin:1rem auto 0"><div class="rel-bar">{segs}</div><div class="rel-legend">{legend}</div></div>'
781
782    atags = Counter()
783    for d in all_docs:
784        for t in d.get("_analysis", {}).get("tags", []): atags[t] += 1
785    tag_opts = "".join(f'<option value="{t}">{t} ({c})</option>' for t, c in atags.most_common())
786
787    toc = ""
788    for key, title, _, _ in ACTION_GROUPS:
789        n = len(groups.get(key, []))
790        if n: toc += f'<a href="#{key}">{title}<span class="cnt">({n})</span></a>'
791
792    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
793<title>Readwise Reader Triage</title>
794<style>
795:root{{--bg:#0f172a;--surface:#1e293b;--surface2:#334155;--text:#e2e8f0;--dim:#94a3b8;--accent:#f59e0b;--link:#38bdf8;--green:#4ade80;--red:#f87171}}
796*{{box-sizing:border-box;margin:0;padding:0}}
797body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg);color:var(--text);line-height:1.6}}
798.header{{background:linear-gradient(135deg,#1e293b,#312e81);padding:2rem;text-align:center;border-bottom:3px solid var(--accent)}}
799.header h1{{font-size:1.8rem;margin-bottom:.3rem}}.header .sub{{color:var(--dim);font-size:.85rem}}
800.stats{{display:flex;justify-content:center;gap:1rem;flex-wrap:wrap;margin-top:1.2rem}}
801.stat{{background:rgba(0,0,0,.3);padding:.6rem 1.2rem;border-radius:8px;text-align:center;min-width:100px}}
802.stat .n{{font-size:1.5rem;font-weight:700}}.stat .l{{font-size:.7rem;color:var(--dim);text-transform:uppercase}}
803.container{{max-width:1200px;margin:0 auto;padding:1rem}}
804.rel-bar{{display:flex;height:28px;border-radius:6px;overflow:hidden;margin:1.5rem 0 .5rem}}
805.rel-bar .seg{{display:flex;align-items:center;justify-content:center;font-size:.75rem;font-weight:600;color:#000}}
806.rel-legend{{display:flex;gap:1rem;justify-content:center;font-size:.75rem;color:var(--dim);margin-bottom:1.5rem;flex-wrap:wrap}}
807.rel-legend span{{display:flex;align-items:center;gap:.3rem}}.rel-dot{{width:10px;height:10px;border-radius:50%;display:inline-block}}
808.toc{{background:var(--surface);border-radius:8px;padding:1.2rem;margin:1rem 0}}
809.toc h2{{font-size:1rem;margin-bottom:.8rem}}
810.toc-grid{{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.4rem}}
811.toc a{{color:var(--link);text-decoration:none;font-size:.85rem}}.toc a:hover{{text-decoration:underline}}.toc .cnt{{opacity:.5;margin-left:.3rem}}
812.group{{margin:1.5rem 0;border-radius:10px;border:1px solid rgba(255,255,255,.08);overflow:hidden}}
813.group-hdr{{padding:1rem 1.2rem;display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
814.group-hdr h2{{font-size:1.15rem}}.badge{{background:rgba(0,0,0,.3);padding:.2rem .7rem;border-radius:14px;font-size:.85rem;font-weight:600}}
815.group-desc{{padding:0 1.2rem .8rem;color:var(--dim);font-size:.85rem;font-style:italic}}
816.cluster{{margin:.4rem .8rem;background:rgba(0,0,0,.2);border-radius:8px;overflow:hidden}}
817.cluster-hdr{{padding:.6rem 1rem;background:rgba(0,0,0,.15);display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
818.cluster-hdr h3{{font-size:.95rem}}.cnt{{background:rgba(255,255,255,.08);padding:.15rem .5rem;border-radius:10px;font-size:.78rem}}
819.cluster-insights{{padding:.5rem 1rem;font-size:.8rem;color:var(--dim);border-top:1px solid rgba(255,255,255,.04)}}
820.doc-list{{list-style:none}}.doc{{padding:.5rem 1rem;border-top:1px solid rgba(255,255,255,.04);display:grid;grid-template-columns:2.8rem 2.5rem 1fr auto;gap:.6rem;align-items:center;transition:background .1s}}
821.doc:hover{{background:rgba(255,255,255,.03)}}
822.doc-rel{{text-align:center;font-weight:700;font-size:.9rem}}
823.doc-prog{{text-align:center;font-size:.75rem}}.doc-prog .bar{{width:2.2rem;height:3px;background:rgba(255,255,255,.1);border-radius:2px;margin:2px auto 0;overflow:hidden}}
824.doc-prog .fill{{height:100%;border-radius:2px}}
825.doc-info{{min-width:0}}.doc-title{{font-weight:500;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font-size:.9rem}}
826.doc-title a{{color:var(--text);text-decoration:none}}.doc-title a:hover{{color:var(--link)}}
827.doc-meta{{font-size:.73rem;color:var(--dim);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}}
828.doc-summary{{font-size:.78rem;color:var(--dim);margin-top:.15rem}}
829.doc-summary .short{{cursor:pointer}}.doc-summary .short:hover{{color:var(--text)}}
830.doc-summary .full{{display:none;margin-top:.3rem;line-height:1.5;color:var(--dim);border-left:2px solid rgba(255,255,255,.1);padding-left:.6rem}}
831.doc-summary .full.open{{display:block}}
832.doc-summary .reason{{font-size:.72rem;color:var(--accent);margin-top:.2rem;font-style:italic}}
833.doc-right{{text-align:right;font-size:.73rem;white-space:nowrap;display:flex;flex-direction:column;align-items:flex-end;gap:.3rem}}.doc-right a{{color:var(--link)}}
834
835.tag{{display:inline-block;padding:.05rem .35rem;border-radius:6px;font-size:.65rem;margin-right:.15rem;background:rgba(255,255,255,.06)}}
836.loc{{display:inline-block;padding:.05rem .3rem;border-radius:6px;font-size:.65rem;font-weight:600}}
837.loc-new{{background:#14532d;color:#86efac}}.loc-later{{background:#1e3a5f;color:#93c5fd}}
838.arrow{{display:inline-block;transition:transform .15s}}.arrow.shut{{transform:rotate(-90deg)}}
839.filter-bar{{background:var(--surface);padding:.8rem 1rem;border-radius:8px;margin:1rem 0;display:flex;gap:.5rem;flex-wrap:wrap;align-items:center}}
840.filter-bar label{{font-size:.8rem;color:var(--dim)}}
841.filter-bar select,.filter-bar input{{background:var(--surface2);color:var(--text);border:1px solid rgba(255,255,255,.1);border-radius:6px;padding:.3rem .6rem;font-size:.8rem}}
842
843@media(max-width:768px){{.doc{{grid-template-columns:2rem 2rem 1fr}}.doc-right{{display:none}}.stats{{gap:.5rem}}.stat{{padding:.4rem .8rem;min-width:70px}}.stat .n{{font-size:1.2rem}}}}
844</style></head><body>
845<div class="header"><h1>📚 Readwise Reader Triage</h1>
846<p class="sub">{now.strftime('%Y-%m-%d')} · {total} documents · {'AI-analyzed' if has_analysis else 'keyword-only'}</p>
847<div class="stats">{stats}</div>{relbar}</div>
848<div class="container">
849<div class="toc"><h2>📑 Sections</h2><div class="toc-grid">{toc}</div></div>
850<div class="filter-bar"><label>Filter:</label>
851<input type="text" id="search" placeholder="Search titles/summaries..." oninput="filterDocs()">
852<select id="relFilter" onchange="filterDocs()"><option value="">All relevance</option><option value="5">5★</option><option value="4">4★+</option><option value="3">3★+</option></select>
853<select id="tagFilter" onchange="filterDocs()"><option value="">All tags</option>{tag_opts}</select></div>
854"""
855
856
857def _report_foot():
858    return """<script>
859function toggle(el){const b=el.parentElement.querySelector('.group-body,.doc-list');if(!b)return;const a=el.querySelector('.arrow');if(b.style.display==='none'){b.style.display='';a?.classList.remove('shut')}else{b.style.display='none';a?.classList.add('shut')}}
860function filterDocs(){const q=document.getElementById('search').value.toLowerCase();const r=document.getElementById('relFilter').value;const t=document.getElementById('tagFilter').value;document.querySelectorAll('.doc').forEach(el=>{const title=(el.dataset.title||'');const summary=(el.querySelector('.doc-summary')?.textContent||'').toLowerCase();const rv=parseInt(el.dataset.rel||'3');const tags=el.dataset.tags||'';let s=true;if(q&&!title.includes(q)&&!summary.includes(q))s=false;if(r&&rv<parseInt(r))s=false;if(t&&!tags.includes(t))s=false;el.style.display=s?'':'none'})}
861
862</script></div></body></html>"""
863
864
865# ═══════════════════════════════════════════════════════════════════════
866
867if __name__ == "__main__":
868    cli()