Commit `a818be8e3865`

Vincent Demeester <vincent@sbr.pm>

2026-04-10 23:59:56

feat: add readwise-reader tool for triage and analysis

Added a new tool to fetch, analyze, and triage Readwise Reader documents. Supports multiple LLM backends (Claude Opus/Sonnet via Vertex AI, Gemini via API key) for relevance scoring and generates interactive HTML triage reports with expandable summaries. User profile loaded from XDG data dir for customizable interests and favorite authors/sites.

main

1 parent bb47f16

Changed files (4)

pkgs

default.nix

tools

readwise-reader

default.nix

README.md

readwise-reader.py

@@ -31,6 +31,7 @@ in
   jellyfin-favorites-sync = pkgs.callPackage ../tools/jellyfin-favorites-sync { };
   jellyfin-manage-playlist = pkgs.callPackage ../tools/jellyfin-manage-playlist { };
   music-playlist-dl = pkgs.callPackage ../tools/music-playlist-dl { };
+  readwise-reader = pkgs.callPackage ../tools/readwise-reader { };
   nix-flake-update = pkgs.callPackage ../tools/nix-flake-update { };
   slack-archive = pkgs.callPackage ../tools/slack-archive { };
   gcal-to-org = pkgs.callPackage ../tools/gcal-to-org { };

@@ -0,0 +1,43 @@
+{
+  lib,
+  python3,
+  google-cloud-sdk,
+  passage,
+}:
+
+python3.pkgs.buildPythonApplication {
+  pname = "readwise-reader";
+  version = "1.0.0";
+  format = "other";
+
+  src = ./.;
+
+  propagatedBuildInputs = with python3.pkgs; [
+    requests
+  ];
+
+  makeWrapperArgs = [
+    "--prefix PATH : ${lib.makeBinPath [ google-cloud-sdk passage ]}"
+  ];
+
+  dontUnpack = true;
+  dontBuild = true;
+
+  installPhase = ''
+    runHook preInstall
+
+    mkdir -p $out/bin
+    cp ${./readwise-reader.py} $out/bin/readwise-reader
+    chmod +x $out/bin/readwise-reader
+
+    runHook postInstall
+  '';
+
+  meta = with lib; {
+    description = "Fetch, analyze, and triage Readwise Reader documents";
+    homepage = "https://github.com/vdemeester/home";
+    license = licenses.mit;
+    maintainers = [ ];
+    mainProgram = "readwise-reader";
+  };
+}

@@ -0,0 +1,65 @@
+# readwise-reader
+
+Fetch, analyze, and triage [Readwise Reader](https://readwise.io/read) documents.
+
+## Usage
+
+```bash
+# 1. Fetch all Inbox + Later documents from Readwise Reader API
+readwise-reader fetch
+
+# 2. Analyze with LLM (default: Claude Opus 4-6 via Vertex AI)
+readwise-reader analyze
+
+# Or use a different model:
+readwise-reader analyze -m sonnet    # Claude Sonnet 4 (faster, cheaper)
+readwise-reader analyze -m gemini    # Gemini 3 Pro Preview (fastest)
+readwise-reader analyze -m gemini25  # Gemini 2.5 Pro
+
+# Resume interrupted analysis (checkpoints automatically):
+readwise-reader analyze
+
+# Start fresh:
+readwise-reader analyze --reset
+
+# 3. Generate interactive HTML triage report
+readwise-reader report
+readwise-reader report --no-open    # Don't auto-open browser
+```
+
+## Profile
+
+User profile is loaded from `$XDG_DATA_HOME/readwise/profile.toml` (default: `~/.local/share/readwise/profile.toml`).
+
+The profile controls:
+- **User interests** — what topics are relevant to you
+- **Favorite authors/sites** — auto-boosted to 4★+ regardless of topic
+- **Summary style** — `brief` or `detailed`
+
+See the example in this directory or create your own.
+
+## Data
+
+All data stored in `$XDG_DATA_HOME/readwise/`:
+- `reader-latest.json` — symlink to latest fetch
+- `reader-analyzed.json` — enriched with LLM analysis
+- `analysis-checkpoint.json` — resume point for interrupted analysis
+- `triage-report.html` — generated report
+- `profile.toml` — user profile
+
+## Requirements
+
+- `passage` — for Readwise API token (`readwise/key`)
+- `gcloud` — for Vertex AI auth (Opus/Sonnet models)
+- `GEMINI_API_KEY` env var or `passage` — for Gemini models
+- `requests` Python package
+
+## Report Features
+
+- Documents grouped by action (Must Read, Finish, Archive, Delete, etc.)
+- Within each group, clustered by topic (Nix, Go, Emacs, AI, etc.)
+- Relevance scores (1-5★) with LLM-generated summaries and reasons
+- Click summaries to expand full analysis
+- Filter by title, relevance, or tag
+- Collapsible sections
+- Dark theme

@@ -0,0 +1,856 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.11"
+# dependencies = ["requests"]
+# ///
+"""
+readwise-reader — Fetch, analyze, and triage Readwise Reader documents.
+
+Subcommands:
+  fetch     Fetch all documents from Readwise Reader API (Inbox + Later)
+  analyze   Score documents for relevance using LLMs (Opus, Sonnet, Gemini)
+  report    Generate an interactive HTML triage report
+
+Data stored in $XDG_DATA_HOME/readwise/ (default: ~/.local/share/readwise/).
+User profile loaded from $XDG_DATA_HOME/readwise/profile.toml.
+"""
+
+import argparse
+import json
+import html as html_mod
+import os
+import subprocess
+import sys
+import time
+from collections import Counter, defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+DATA_DIR = Path(os.environ.get("XDG_DATA_HOME", Path.home() / ".local/share")) / "readwise"
+PROFILE_FILE = DATA_DIR / "profile.toml"
+LATEST_LINK = DATA_DIR / "reader-latest.json"
+ANALYZED_FILE = DATA_DIR / "reader-analyzed.json"
+CHECKPOINT_FILE = DATA_DIR / "analysis-checkpoint.json"
+REPORT_FILE = DATA_DIR / "triage-report.html"
+
+# ═══════════════════════════════════════════════════════════════════════
+#  FETCH
+# ═══════════════════════════════════════════════════════════════════════
+
+READER_API = "https://readwise.io/api/v3/list/"
+READER_RATE_DELAY = 3.1  # 20 req/min
+
+
+def get_readwise_token():
+    result = subprocess.run(["passage", "show", "readwise/key"], capture_output=True, text=True)
+    if result.returncode != 0:
+        print("Failed to get Readwise token from passage", file=sys.stderr)
+        sys.exit(1)
+    return result.stdout.strip()
+
+
+def fetch_documents(token: str, location: str) -> list[dict]:
+    import requests
+
+    docs = []
+    cursor = None
+    page = 1
+    while True:
+        params = {"location": location, "limit": 100}
+        if cursor:
+            params["pageCursor"] = cursor
+        print(f"  Fetching {location} page {page}...", file=sys.stderr)
+        resp = requests.get(READER_API, params=params, headers={"Authorization": f"Token {token}"})
+        resp.raise_for_status()
+        data = resp.json()
+        docs.extend(data.get("results", []))
+        cursor = data.get("nextPageCursor")
+        count = data.get("count", "?")
+        print(f"    Got {len(data.get('results', []))} docs (total: {count})", file=sys.stderr)
+        if not cursor:
+            break
+        page += 1
+        time.sleep(READER_RATE_DELAY)
+    return docs
+
+
+def cmd_fetch(args):
+    import requests  # noqa: F811 — imported here for lazy loading
+
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    token = get_readwise_token()
+    all_docs = {}
+
+    locations = args.locations.split(",")
+    for loc in locations:
+        print(f"\nFetching '{loc}' documents...", file=sys.stderr)
+        docs = fetch_documents(token, loc)
+        all_docs[loc] = docs
+        print(f"  Total {loc}: {len(docs)}", file=sys.stderr)
+        if loc != locations[-1]:
+            time.sleep(READER_RATE_DELAY)
+
+    timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+    output = {
+        "fetched_at": timestamp,
+        "counts": {loc: len(docs) for loc, docs in all_docs.items()},
+        "documents": all_docs,
+    }
+
+    outfile = DATA_DIR / f"reader-{'-'.join(locations)}-{timestamp}.json"
+    with open(outfile, "w") as f:
+        json.dump(output, f, indent=2, ensure_ascii=False)
+
+    LATEST_LINK.unlink(missing_ok=True)
+    LATEST_LINK.symlink_to(outfile.name)
+    print(f"\n✅ Saved {sum(len(d) for d in all_docs.values())} documents to {outfile}", file=sys.stderr)
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  PROFILE
+# ═══════════════════════════════════════════════════════════════════════
+
+def load_profile(path: Path) -> dict:
+    """Load profile from TOML file with a minimal parser (no dependency)."""
+    if not path.exists():
+        return {}
+
+    profile = {}
+    current_section = ""
+    current_section_for_key = ""
+    current_key = None
+    current_list = None
+
+    for line in path.read_text().splitlines():
+        stripped = line.strip()
+        if not stripped or stripped.startswith("#"):
+            continue
+
+        if stripped.startswith("["):
+            if current_key and current_list is not None:
+                profile[f"{current_section_for_key}.{current_key}"] = current_list
+                current_list = None
+                current_key = None
+            current_section = stripped.strip("[]").strip()
+            current_section_for_key = current_section
+            continue
+
+        if "=" in stripped and not stripped.startswith('"'):
+            if current_key and current_list is not None:
+                profile[f"{current_section_for_key}.{current_key}"] = current_list
+                current_list = None
+
+            key, val = stripped.split("=", 1)
+            key = key.strip()
+            val = val.strip()
+            current_key = key
+            current_section_for_key = current_section
+
+            if val == "[":
+                current_list = []
+            elif val.startswith("[") and val.endswith("]"):
+                items = val[1:-1]
+                current_list = [s.strip().strip('"').strip("'") for s in items.split(",") if s.strip().strip('"').strip("'")]
+                profile[f"{current_section}.{current_key}"] = current_list
+                current_list = None
+                current_key = None
+            elif val.startswith('"') or val.startswith("'"):
+                profile[f"{current_section}.{current_key}"] = val.strip('"').strip("'")
+                current_key = None
+            else:
+                profile[f"{current_section}.{current_key}"] = val
+                current_key = None
+        elif current_list is not None:
+            val = stripped.rstrip(",").strip().strip('"').strip("'")
+            if val and val != "]":
+                if "#" in val and not val.startswith("#"):
+                    val = val[:val.index("#")].strip().rstrip(",").strip().strip('"').strip("'")
+                if val:
+                    current_list.append(val)
+            if stripped.rstrip().endswith("]") or stripped == "]":
+                profile[f"{current_section_for_key}.{current_key}"] = current_list
+                current_list = None
+                current_key = None
+
+    if current_key and current_list is not None:
+        profile[f"{current_section_for_key}.{current_key}"] = current_list
+
+    return profile
+
+
+def build_prompt_context(profile: dict) -> tuple[str, str]:
+    def get_list(key):
+        return profile.get(key, [])
+
+    def fmt(items):
+        return ", ".join(items) if items else "N/A"
+
+    name = profile.get("user.name", "User")
+    role = profile.get("user.role", "Software Engineer")
+
+    user_desc = f"""{name} — {role}
+
+Core languages: {fmt(get_list('interests.core'))}
+Infrastructure: {fmt(get_list('interests.infrastructure'))}
+Editor: {fmt(get_list('interests.editor'))}
+Kubernetes/Containers: {fmt(get_list('interests.kubernetes'))}
+CLI tools: {fmt(get_list('interests.tools'))}
+AI tooling: {fmt(get_list('interests.ai'))}
+Side interests: {fmt(get_list('interests.side'))}
+Values: {fmt(get_list('interests.values'))}
+Currently exploring: {fmt(get_list('interests.exploring'))}
+NOT interested in: {fmt(get_list('interests.not_interested'))}"""
+
+    fav_authors = get_list("favorites.authors")
+    fav_sites = get_list("favorites.sites")
+    if fav_authors or fav_sites:
+        user_desc += "\n\nFAVORITE AUTHORS/SITES (auto-boost to at least 4★, always worth_reading):\n"
+        if fav_authors:
+            user_desc += "Authors: " + ", ".join(fav_authors) + "\n"
+        if fav_sites:
+            user_desc += "Sites: " + ", ".join(fav_sites) + "\n"
+        user_desc += "These are trusted voices — mark as interesting even on tangential topics."
+
+    return user_desc, profile.get("summary.style", "detailed")
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  ANALYZE
+# ═══════════════════════════════════════════════════════════════════════
+
+MODELS = {
+    "opus": {
+        "backend": "vertex-claude",
+        "model_id": "claude-opus-4-6",
+        "batch_size": 25,
+        "rate_delay": 2,
+        "max_output_tokens": 16384,
+    },
+    "sonnet": {
+        "backend": "vertex-claude",
+        "model_id": "claude-sonnet-4@20250514",
+        "batch_size": 30,
+        "rate_delay": 1,
+        "max_output_tokens": 16384,
+    },
+    "gemini": {
+        "backend": "gemini-api",
+        "model_id": "gemini-3-pro-preview",
+        "batch_size": 35,
+        "rate_delay": 2,
+        "max_output_tokens": 16384,
+    },
+    "gemini25": {
+        "backend": "gemini-api",
+        "model_id": "gemini-2.5-pro",
+        "batch_size": 35,
+        "rate_delay": 2,
+        "max_output_tokens": 16384,
+    },
+}
+
+MAX_RETRIES = 6
+_token_cache = {"token": None, "ts": 0}
+
+
+def get_vertex_token():
+    if time.time() - _token_cache["ts"] > 2400:
+        result = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True, timeout=10)
+        if result.returncode != 0:
+            raise RuntimeError(f"gcloud auth failed: {result.stderr}")
+        _token_cache["token"] = result.stdout.strip()
+        _token_cache["ts"] = time.time()
+    return _token_cache["token"]
+
+
+def call_vertex_claude(model_id: str, prompt: str, max_tokens: int) -> str:
+    import requests
+
+    project = os.environ.get("GOOGLE_CLOUD_PROJECT", "itpc-gcp-pnd-pe-eng-claude")
+    location = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-east5")
+    url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
+
+    for attempt in range(MAX_RETRIES):
+        token = get_vertex_token()
+        try:
+            resp = __import__("requests").post(
+                url,
+                headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
+                json={
+                    "anthropic_version": "vertex-2023-10-16",
+                    "messages": [{"role": "user", "content": prompt}],
+                    "max_tokens": max_tokens,
+                    "temperature": 0.2,
+                },
+                timeout=180,
+            )
+            if resp.status_code == 429:
+                wait = (2 ** attempt) * 5
+                print(f"    ⏳ Rate limited, waiting {wait}s...", file=sys.stderr)
+                time.sleep(wait)
+                continue
+            if resp.status_code == 401:
+                _token_cache["ts"] = 0
+                continue
+            resp.raise_for_status()
+            return resp.json()["content"][0]["text"]
+        except Exception as e:
+            if attempt < MAX_RETRIES - 1:
+                wait = (2 ** attempt) * 3
+                print(f"    ⚠ {e}, retrying in {wait}s...", file=sys.stderr)
+                time.sleep(wait)
+                continue
+            raise
+    raise RuntimeError(f"Failed after {MAX_RETRIES} retries")
+
+
+def get_gemini_key():
+    key = os.environ.get("GEMINI_API_KEY")
+    if key:
+        return key
+    result = subprocess.run(["passage", "show", "redhat/google/osp/vdeemest-api-key"], capture_output=True, text=True, timeout=10)
+    if result.returncode != 0:
+        raise RuntimeError("No GEMINI_API_KEY and passage lookup failed")
+    return result.stdout.strip()
+
+
+def call_gemini(model_id: str, prompt: str, max_tokens: int) -> str:
+    import requests
+
+    api_key = get_gemini_key()
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_id}:generateContent?key={api_key}"
+
+    for attempt in range(MAX_RETRIES):
+        try:
+            resp = requests.post(
+                url,
+                headers={"Content-Type": "application/json"},
+                json={
+                    "contents": [{"parts": [{"text": prompt}]}],
+                    "generationConfig": {"temperature": 0.2, "maxOutputTokens": max_tokens, "responseMimeType": "application/json"},
+                },
+                timeout=180,
+            )
+            if resp.status_code == 429:
+                wait = (2 ** attempt) * 5
+                print(f"    ⏳ Rate limited, waiting {wait}s...", file=sys.stderr)
+                time.sleep(wait)
+                continue
+            resp.raise_for_status()
+            return resp.json()["candidates"][0]["content"]["parts"][0]["text"]
+        except Exception as e:
+            if attempt < MAX_RETRIES - 1:
+                wait = (2 ** attempt) * 3
+                print(f"    ⚠ {e}, retrying in {wait}s...", file=sys.stderr)
+                time.sleep(wait)
+                continue
+            raise
+    raise RuntimeError(f"Failed after {MAX_RETRIES} retries")
+
+
+def call_llm(backend: str, model_id: str, prompt: str, max_tokens: int) -> str:
+    if backend == "vertex-claude":
+        return call_vertex_claude(model_id, prompt, max_tokens)
+    elif backend == "gemini-api":
+        return call_gemini(model_id, prompt, max_tokens)
+    raise ValueError(f"Unknown backend: {backend}")
+
+
+def parse_json_response(text: str) -> dict:
+    text = text.strip()
+    if text.startswith("```"):
+        text = text.split("\n", 1)[1]
+        if text.endswith("```"):
+            text = text[:-3]
+        text = text.strip()
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        import re
+        match = re.search(r'\{.*\}', text, re.DOTALL)
+        if match:
+            return json.loads(match.group())
+        raise
+
+
+def build_analysis_prompt(profile_text: str, summary_style: str, docs_batch: list[dict]) -> str:
+    doc_entries = []
+    for i, d in enumerate(docs_batch):
+        title = d.get("title", "Untitled")
+        summary = (d.get("summary") or "")[:600]
+        source = d.get("site_name") or d.get("source") or ""
+        source_url = d.get("source_url", "")
+        category = d.get("category", "")
+        word_count = d.get("word_count", 0)
+        author = d.get("author", "")
+        if summary.strip().lower() in ("comments", ""):
+            summary = "N/A — infer content from title, author, source URL"
+        doc_entries.append(
+            f'[{i}] "{title}"\n'
+            f'    Author: {author} | Source: {source} | Category: {category} | {word_count} words\n'
+            f'    URL: {source_url}\n'
+            f'    Summary: {summary}'
+        )
+
+    if summary_style == "detailed":
+        s_inst = ("**summary**: 4-6 sentences. Describe what this article is actually about in depth: "
+                  "what problem does it address, what's the core argument or technique, what makes it interesting or unique? "
+                  "Include specific details (tools mentioned, approaches described, conclusions drawn). "
+                  "Don't just restate the title. If summary is N/A, use title/author/URL and your knowledge to infer the likely content.")
+        r_inst = ("**reason**: 2-3 sentences. Explain specifically why this is or isn't relevant. "
+                  "Reference concrete user interests that match (e.g. 'Uses NixOS daily and this covers flake patterns') "
+                  "or don't match (e.g. 'React frontend content, outside interest area').")
+    else:
+        s_inst = "**summary**: 1-2 sentences about the actual content."
+        r_inst = "**reason**: 5-15 words explaining the score."
+
+    return f"""Analyze these {len(docs_batch)} articles from a read-it-later app. Score each for relevance to this specific user.
+
+<user_profile>
+{profile_text}
+</user_profile>
+
+<documents>
+{chr(10).join(doc_entries)}
+</documents>
+
+For each document return:
+1. {s_inst}
+2. **relevance**: 1-5 score:
+   - 5 = Must read — directly about daily tools/work or from a favorite author/site
+   - 4 = Highly relevant — strongly aligned with interests, or from a favorite source on any topic
+   - 3 = Interesting — decent tech content, tangentially related
+   - 2 = Low relevance — not aligned with interests
+   - 1 = Skip — completely irrelevant or too superficial
+3. {r_inst}
+4. **action**: must_read / worth_reading / skim / archive / delete
+5. **tags**: 1-3 from: nix, go, rust, python, emacs, kubernetes, containers, ci-cd, git, security, homelab, networking, ai-llm, coding-agents, linux, open-source, privacy, productivity, pkm, career, culture, french, hardware, web, devtools, monitoring, tekton
+
+IMPORTANT:
+- Favorite authors/sites get minimum 4★ and "worth_reading", even on tangential topics.
+- French content from favorite French sources is scored on merit, not penalized for being French.
+- Be discriminating: generic listicles = 1-2★. Deep technical posts on relevant topics = 4-5★.
+
+Return ONLY valid JSON:
+{{"analyses":[{{"id":0,"summary":"...","relevance":4,"reason":"...","action":"worth_reading","tags":["nix","homelab"]}},...]}}\
+"""
+
+
+def cmd_analyze(args):
+    model_cfg = MODELS[args.model].copy()
+    if args.batch_size:
+        model_cfg["batch_size"] = args.batch_size
+
+    if args.reset and CHECKPOINT_FILE.exists():
+        CHECKPOINT_FILE.unlink()
+        print("🔄 Checkpoint reset.", file=sys.stderr)
+
+    profile = load_profile(args.profile)
+    if profile:
+        print(f"👤 Profile: {args.profile}", file=sys.stderr)
+        fav_count = len(profile.get("favorites.authors", [])) + len(profile.get("favorites.sites", []))
+        print(f"   {fav_count} favorite authors/sites", file=sys.stderr)
+    profile_text, summary_style = build_prompt_context(profile)
+    print(f"📝 Summary style: {summary_style}", file=sys.stderr)
+
+    with open(LATEST_LINK) as f:
+        data = json.load(f)
+
+    all_docs = []
+    for loc in data["documents"]:
+        for d in data["documents"][loc]:
+            d["_location"] = loc
+            all_docs.append(d)
+
+    print(f"📚 Total: {len(all_docs)} docs", file=sys.stderr)
+    print(f"🤖 Model: {model_cfg['model_id']} ({model_cfg['backend']})", file=sys.stderr)
+    print(f"📦 Batch: {model_cfg['batch_size']}, max output: {model_cfg['max_output_tokens']} tokens", file=sys.stderr)
+
+    analyzed = {}
+    if CHECKPOINT_FILE.exists():
+        with open(CHECKPOINT_FILE) as f:
+            analyzed = json.load(f)
+    print(f"💾 Checkpoint: {len(analyzed)} done", file=sys.stderr)
+
+    to_analyze = [d for d in all_docs if d["id"] not in analyzed]
+    print(f"🔍 Remaining: {len(to_analyze)}", file=sys.stderr)
+
+    if to_analyze:
+        bs = model_cfg["batch_size"]
+        total_batches = (len(to_analyze) + bs - 1) // bs
+
+        for bn in range(total_batches):
+            batch = to_analyze[bn * bs: (bn + 1) * bs]
+            pct = len(analyzed) / len(all_docs) * 100
+            print(f"\n[{bn+1}/{total_batches}] {len(batch)} docs ({pct:.0f}% done)...", file=sys.stderr)
+
+            try:
+                prompt = build_analysis_prompt(profile_text, summary_style, batch)
+                raw = call_llm(model_cfg["backend"], model_cfg["model_id"], prompt, model_cfg["max_output_tokens"])
+                parsed = parse_json_response(raw)
+
+                matched = 0
+                for a in parsed.get("analyses", []):
+                    idx = a.get("id")
+                    if idx is None:
+                        continue
+                    try:
+                        idx = int(idx)
+                        if 0 <= idx < len(batch):
+                            analyzed[batch[idx]["id"]] = {
+                                "summary": a.get("summary", ""),
+                                "relevance": a.get("relevance", 3),
+                                "reason": a.get("reason", ""),
+                                "action": a.get("action", "skim"),
+                                "tags": a.get("tags", []),
+                            }
+                            matched += 1
+                    except (ValueError, IndexError):
+                        pass
+                print(f"  ✓ {matched}/{len(batch)}", file=sys.stderr)
+            except Exception as e:
+                print(f"  ✗ {e}", file=sys.stderr)
+
+            with open(CHECKPOINT_FILE, "w") as f:
+                json.dump(analyzed, f)
+
+            if bn < total_batches - 1:
+                time.sleep(model_cfg["rate_delay"])
+
+    # Build output
+    print(f"\n📊 Analyzed: {len(analyzed)}/{len(all_docs)}", file=sys.stderr)
+    enriched = {}
+    for loc in data["documents"]:
+        enriched[loc] = []
+        for d in data["documents"][loc]:
+            d["_analysis"] = analyzed.get(d["id"], {"summary": d.get("summary", ""), "relevance": 3, "reason": "Not analyzed", "action": "skim", "tags": []})
+            enriched[loc].append(d)
+
+    output = {
+        "fetched_at": data.get("fetched_at", ""),
+        "analyzed_at": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
+        "model": model_cfg["model_id"],
+        "counts": {loc: len(docs) for loc, docs in enriched.items()},
+        "documents": enriched,
+    }
+    with open(ANALYZED_FILE, "w") as f:
+        json.dump(output, f, indent=2, ensure_ascii=False)
+    print(f"✅ Output: {ANALYZED_FILE}", file=sys.stderr)
+
+    # Stats
+    rels = Counter(analyzed[k]["relevance"] for k in analyzed if "relevance" in analyzed[k])
+    if rels:
+        print(f"\nRelevance:", file=sys.stderr)
+        for s in sorted(rels, reverse=True):
+            print(f"  {s}★: {rels[s]:4d} ({rels[s]*100//len(analyzed)}%)", file=sys.stderr)
+    acts = Counter(analyzed[k]["action"] for k in analyzed if "action" in analyzed[k])
+    if acts:
+        print(f"Actions:", file=sys.stderr)
+        for a, c in acts.most_common():
+            print(f"  {a:15s}: {c:4d}", file=sys.stderr)
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  REPORT
+# ═══════════════════════════════════════════════════════════════════════
+
+TAG_DISPLAY = {
+    "nix": ("❄️", "NixOS & Nix", "#7eb8da"), "emacs": ("📝", "Emacs & Org-mode", "#7f5ab6"),
+    "go": ("🐹", "Go", "#00add8"), "rust": ("🦀", "Rust", "#dea584"),
+    "python": ("🐍", "Python", "#3776ab"), "tekton": ("🔧", "Tekton & CI/CD", "#fd495c"),
+    "kubernetes": ("☸️", "Kubernetes", "#326ce5"), "containers": ("📦", "Containers", "#2496ed"),
+    "ci-cd": ("🔄", "CI/CD", "#fd495c"), "homelab": ("🏠", "Homelab", "#e8a87c"),
+    "git": ("🔀", "Git & VCS", "#f14e32"), "coding-agents": ("🤖", "Coding Agents", "#a855f7"),
+    "ai-llm": ("🧠", "AI & LLM", "#8b5cf6"), "security": ("🔒", "Security", "#ef4444"),
+    "linux": ("🐧", "Linux", "#fcc624"), "networking": ("🌐", "Networking", "#06b6d4"),
+    "monitoring": ("📊", "Monitoring", "#10b981"), "devtools": ("🛠️", "Dev Tools", "#64748b"),
+    "open-source": ("⚖️", "Open Source", "#22c55e"), "privacy": ("🛡️", "Privacy", "#f59e0b"),
+    "productivity": ("📋", "Productivity", "#6366f1"), "pkm": ("🧩", "PKM", "#ec4899"),
+    "career": ("👔", "Career", "#14b8a6"), "culture": ("📖", "Culture", "#a78bfa"),
+    "french": ("🇫🇷", "French", "#3b82f6"), "hardware": ("⌨️", "Hardware", "#f97316"),
+    "web": ("🌍", "Web", "#06b6d4"), "other": ("📄", "Other", "#94a3b8"),
+}
+
+ACTION_GROUPS = [
+    ("must_read", "⭐ Must Read", "AI rates these highly relevant to you.", "#f59e0b"),
+    ("finish_reading", "🏃 Finish Reading — >50% Done", "You started these. Finish them.", "#2196f3"),
+    ("archive_finished", "✅ Archive — Finished", "100% read. Archive.", "#4caf50"),
+    ("keep_triage", "📚 Active Queue", "Review and decide.", "#9c27b0"),
+    ("archive_old_unread", "📦 Old & Unread (still relevant)", "1yr+ unread but AI says 3★+.", "#ff9800"),
+    ("archive_low_relevance", "🗑️ Old, Unread & Low Relevance", "1yr+ unread, ≤2★. Safe to purge.", "#f44336"),
+    ("archive_old_barely", "🤔 Old & Barely Started", "1yr+ old, <10% read.", "#795548"),
+]
+
+
+def categorize(d):
+    p = d.get("reading_progress", 0)
+    age = d["_age"]
+    rel = d.get("_analysis", {}).get("relevance", 3)
+    if p >= 1.0: return "archive_finished"
+    if age > 365 and p == 0 and rel <= 2: return "archive_low_relevance"
+    if age > 365 and p == 0: return "archive_old_unread"
+    if age > 365 and p < 0.1: return "archive_old_barely"
+    if p > 0.5: return "finish_reading"
+    if d.get("_analysis", {}).get("action") in ("must_read", "worth_reading") and rel >= 4: return "must_read"
+    return "keep_triage"
+
+
+def primary_tag(d):
+    tags = d.get("_analysis", {}).get("tags", [])
+    priority = ["nix","emacs","go","rust","python","tekton","kubernetes","containers","ci-cd","homelab","git",
+                "coding-agents","ai-llm","security","linux","networking","monitoring","devtools","open-source",
+                "privacy","productivity","pkm","career","culture","french","hardware","web"]
+    for t in priority:
+        if t in tags: return t
+    return tags[0] if tags else "other"
+
+
+def age_label(days):
+    if days < 7: return f"{days}d"
+    if days < 30: return f"{days//7}w"
+    if days < 365: return f"{days//30}mo"
+    return f"{days//365}y{(days%365)//30}mo"
+
+
+def cmd_report(args):
+    src = ANALYZED_FILE if ANALYZED_FILE.exists() else LATEST_LINK
+    print(f"Using: {src}", file=sys.stderr)
+    with open(src) as f:
+        data = json.load(f)
+
+    now = datetime.now(timezone.utc)
+    all_docs = []
+    for loc in data["documents"]:
+        for d in data["documents"][loc]:
+            d["_location"] = loc
+            saved = d.get("saved_at") or d.get("created_at")
+            d["_age"] = (now - datetime.fromisoformat(saved)).days if saved else 0
+            d["_primary_tag"] = primary_tag(d)
+            all_docs.append(d)
+
+    groups = defaultdict(list)
+    for d in all_docs:
+        groups[categorize(d)].append(d)
+
+    has_analysis = any(d.get("_analysis", {}).get("reason", "") not in ("Not analyzed", "Analysis failed", "") for d in all_docs)
+    total = len(all_docs)
+
+    # ── Build HTML (inlined for single-file tool) ───────────────────────
+    html_parts = [_report_head(total, now, has_analysis, groups, all_docs)]
+
+    for key, title, desc, color in ACTION_GROUPS:
+        docs = groups.get(key, [])
+        if not docs: continue
+        docs.sort(key=lambda d: (-d.get("_analysis",{}).get("relevance",3), d["_age"]))
+        tag_groups = defaultdict(list)
+        for d in docs: tag_groups[d["_primary_tag"]].append(d)
+
+        html_parts.append(f'<div class="group" id="{key}">')
+        html_parts.append(f'<div class="group-hdr" style="background:{color}15;border-left:4px solid {color}" onclick="toggle(this)">')
+        html_parts.append(f'<h2><span class="arrow">▼</span> {title}</h2><span class="badge">{len(docs)}</span></div>')
+        html_parts.append(f'<div class="group-desc">{desc}</div><div class="group-body">')
+
+        for tag, tdocs in sorted(tag_groups.items(), key=lambda x: -len(x[1])):
+            icon, label, tc = TAG_DISPLAY.get(tag, ("📄", tag, "#94a3b8"))
+            tdocs.sort(key=lambda d: (-d.get("_analysis",{}).get("relevance",3), -d.get("reading_progress",0)))
+            html_parts.append(f'<div class="cluster"><div class="cluster-hdr" onclick="toggle(this)">')
+            html_parts.append(f'<h3><span class="arrow">▼</span> {icon} {label}</h3><span class="cnt">{len(tdocs)}</span></div>')
+
+            # Cluster insights
+            sources = Counter((d.get("site_name") or d.get("source") or "?") for d in tdocs)
+            top_src = [(s,n) for s,n in sources.most_common(5) if n >= 2]
+            atags = Counter()
+            for d in tdocs:
+                for t in d.get("_analysis",{}).get("tags",[]): atags[t] += 1
+            wcs = [d.get("word_count",0) for d in tdocs if d.get("word_count")]
+            rh = sum(wcs)/15000 if wcs else 0
+            avg_rel = sum(d.get("_analysis",{}).get("relevance",3) for d in tdocs)/len(tdocs)
+
+            insights = []
+            if top_src: insights.append("Sources: " + ", ".join(f"{s} ({n})" for s,n in top_src))
+            insights.append(f"Stats: {len(tdocs)} docs, ~{rh:.1f}h reading, avg {avg_rel:.1f}★")
+            html_parts.append('<div class="cluster-insights">' + "<br>".join(html_mod.escape(i) for i in insights) + '</div>')
+
+            html_parts.append('<ul class="doc-list">')
+            for d in tdocs:
+                html_parts.append(_doc_html(d))
+            html_parts.append('</ul></div>')
+        html_parts.append('</div></div>')
+
+    html_parts.append(_report_foot())
+
+    REPORT_FILE.write_text("\n".join(html_parts))
+    print(f"✅ Report: {REPORT_FILE}", file=sys.stderr)
+    if not args.no_open:
+        subprocess.Popen(["xdg-open", str(REPORT_FILE)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+
+def _doc_html(d):
+    p = d.get("reading_progress", 0)
+    a = d.get("_analysis", {})
+    rel = a.get("relevance", 3)
+    summary = html_mod.escape(a.get("summary", ""))
+    reason = html_mod.escape(a.get("reason", ""))
+    title = html_mod.escape(d.get("title") or "Untitled")
+    src = html_mod.escape(d.get("site_name") or d.get("source") or "")
+    url = html_mod.escape(d.get("url") or "#")
+    source_url = d.get("source_url") or ""
+    tags = a.get("tags", [])
+    loc = d["_location"]
+    age = d["_age"]
+    wc = d.get("word_count") or 0
+    rc = {5:"#22c55e",4:"#84cc16",3:"#eab308",2:"#f97316",1:"#ef4444"}.get(rel,"#94a3b8")
+    pc = "#4ade80" if p>=1 else "#60a5fa" if p>0.5 else "#f59e0b" if p>0 else "#475569"
+    lc = "loc-new" if loc=="new" else "loc-later"
+    th = "".join(f'<span class="tag">{t}</span>' for t in tags[:3])
+    rt = f"{wc//250}min" if wc else ""
+    sl = html_mod.escape(source_url)
+    # Build short summary (first sentence) and full
+    first_sentence = summary.split('. ')[0] + '.' if '. ' in summary else summary
+    if len(first_sentence) > len(summary) - 5:
+        first_sentence = summary  # Don't truncate if it's basically the whole thing
+
+    return (f'<li class="doc" data-title="{title.lower()}" data-rel="{rel}" data-tags="{",".join(tags)}">'
+            f'<div class="doc-rel" style="color:{rc}" title="{reason}">{rel}★</div>'
+            f'<div class="doc-prog"><div style="color:{pc}">{p:.0%}</div>'
+            f'<div class="bar"><div class="fill" style="width:{p*100:.0f}%;background:{pc}"></div></div></div>'
+            f'<div class="doc-info"><div class="doc-title"><a href="{url}" target="_blank">{title}</a></div>'
+            f'<div class="doc-summary">'
+            f'<div class="short" onclick="this.nextElementSibling.classList.toggle(\'open\')">{first_sentence} {"▸" if first_sentence != summary else ""}</div>'
+            f'<div class="full">{summary}{f"<div class=reason>{reason}</div>" if reason else ""}</div>'
+            f'</div>'
+            f'<div class="doc-meta"><span class="loc {lc}">{loc}</span> {src} · {rt} · {age_label(age)} ago {th}</div></div>'
+            f'<div class="doc-right">{f"<a href={chr(34)}{sl}{chr(34)} target={chr(34)}_blank{chr(34)}>↗</a>" if source_url else ""}</div></li>')
+
+
+def _report_head(total, now, has_analysis, groups, all_docs):
+    stats = ""
+    for key, title, _, color in ACTION_GROUPS:
+        n = len(groups.get(key, []))
+        label = title.split("—")[0].strip()
+        stats += f'<div class="stat"><div class="n" style="color:{color}">{n}</div><div class="l">{label}</div></div>'
+
+    relbar = ""
+    if has_analysis:
+        rc = Counter(d.get("_analysis",{}).get("relevance",3) for d in all_docs)
+        colors = {5:"#22c55e",4:"#84cc16",3:"#eab308",2:"#f97316",1:"#ef4444"}
+        labels = {5:"Must read",4:"Relevant",3:"Interesting",2:"Low",1:"Skip"}
+        segs = "".join(f'<div class="seg" style="width:{rc.get(s,0)/total*100}%;background:{colors[s]}">{rc.get(s,0)}</div>' for s in [5,4,3,2,1] if rc.get(s,0))
+        legend = "".join(f'<span><span class="rel-dot" style="background:{colors[s]}"></span>{s}★ {labels[s]} ({rc.get(s,0)})</span>' for s in [5,4,3,2,1])
+        relbar = f'<div style="max-width:600px;margin:1rem auto 0"><div class="rel-bar">{segs}</div><div class="rel-legend">{legend}</div></div>'
+
+    atags = Counter()
+    for d in all_docs:
+        for t in d.get("_analysis",{}).get("tags",[]): atags[t] += 1
+    tag_opts = "".join(f'<option value="{t}">{t} ({c})</option>' for t,c in atags.most_common())
+
+    toc = ""
+    for key, title, _, _ in ACTION_GROUPS:
+        n = len(groups.get(key, []))
+        if n: toc += f'<a href="#{key}">{title}<span class="cnt">({n})</span></a>'
+
+    return f"""<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
+<title>Readwise Reader Triage</title>
+<style>
+:root{{--bg:#0f172a;--surface:#1e293b;--surface2:#334155;--text:#e2e8f0;--dim:#94a3b8;--accent:#f59e0b;--link:#38bdf8}}
+*{{box-sizing:border-box;margin:0;padding:0}}
+body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg);color:var(--text);line-height:1.6}}
+.header{{background:linear-gradient(135deg,#1e293b,#312e81);padding:2rem;text-align:center;border-bottom:3px solid var(--accent)}}
+.header h1{{font-size:1.8rem;margin-bottom:.3rem}}.header .sub{{color:var(--dim);font-size:.85rem}}
+.stats{{display:flex;justify-content:center;gap:1rem;flex-wrap:wrap;margin-top:1.2rem}}
+.stat{{background:rgba(0,0,0,.3);padding:.6rem 1.2rem;border-radius:8px;text-align:center;min-width:100px}}
+.stat .n{{font-size:1.5rem;font-weight:700}}.stat .l{{font-size:.7rem;color:var(--dim);text-transform:uppercase}}
+.container{{max-width:1200px;margin:0 auto;padding:1rem}}
+.rel-bar{{display:flex;height:28px;border-radius:6px;overflow:hidden;margin:1.5rem 0 .5rem}}
+.rel-bar .seg{{display:flex;align-items:center;justify-content:center;font-size:.75rem;font-weight:600;color:#000}}
+.rel-legend{{display:flex;gap:1rem;justify-content:center;font-size:.75rem;color:var(--dim);margin-bottom:1.5rem;flex-wrap:wrap}}
+.rel-legend span{{display:flex;align-items:center;gap:.3rem}}
+.rel-dot{{width:10px;height:10px;border-radius:50%;display:inline-block}}
+.toc{{background:var(--surface);border-radius:8px;padding:1.2rem;margin:1rem 0}}
+.toc h2{{font-size:1rem;margin-bottom:.8rem}}
+.toc-grid{{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.4rem}}
+.toc a{{color:var(--link);text-decoration:none;font-size:.85rem}}.toc a:hover{{text-decoration:underline}}
+.toc .cnt{{opacity:.5;margin-left:.3rem}}
+.group{{margin:1.5rem 0;border-radius:10px;border:1px solid rgba(255,255,255,.08);overflow:hidden}}
+.group-hdr{{padding:1rem 1.2rem;display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
+.group-hdr h2{{font-size:1.15rem}}.badge{{background:rgba(0,0,0,.3);padding:.2rem .7rem;border-radius:14px;font-size:.85rem;font-weight:600}}
+.group-desc{{padding:0 1.2rem .8rem;color:var(--dim);font-size:.85rem;font-style:italic}}
+.cluster{{margin:.4rem .8rem;background:rgba(0,0,0,.2);border-radius:8px;overflow:hidden}}
+.cluster-hdr{{padding:.6rem 1rem;background:rgba(0,0,0,.15);display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
+.cluster-hdr h3{{font-size:.95rem}}.cnt{{background:rgba(255,255,255,.08);padding:.15rem .5rem;border-radius:10px;font-size:.78rem}}
+.cluster-insights{{padding:.5rem 1rem;font-size:.8rem;color:var(--dim);border-top:1px solid rgba(255,255,255,.04);line-height:1.8}}
+.doc-list{{list-style:none}}.doc{{padding:.5rem 1rem;border-top:1px solid rgba(255,255,255,.04);display:grid;grid-template-columns:2.8rem 2.5rem 1fr auto;gap:.6rem;align-items:center;transition:background .1s}}
+.doc:hover{{background:rgba(255,255,255,.03)}}
+.doc-rel{{text-align:center;font-weight:700;font-size:.9rem}}
+.doc-prog{{text-align:center;font-size:.75rem}}.doc-prog .bar{{width:2.2rem;height:3px;background:rgba(255,255,255,.1);border-radius:2px;margin:2px auto 0;overflow:hidden}}
+.doc-prog .fill{{height:100%;border-radius:2px}}
+.doc-info{{min-width:0}}.doc-title{{font-weight:500;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font-size:.9rem}}
+.doc-title a{{color:var(--text);text-decoration:none}}.doc-title a:hover{{color:var(--link)}}
+.doc-meta{{font-size:.73rem;color:var(--dim);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}}
+.doc-summary{{font-size:.78rem;color:var(--dim);margin-top:.15rem}}
+.doc-summary .short{{cursor:pointer}}
+.doc-summary .short:hover{{color:var(--text)}}
+.doc-summary .full{{display:none;margin-top:.3rem;line-height:1.5;color:var(--dim);border-left:2px solid rgba(255,255,255,.1);padding-left:.6rem}}
+.doc-summary .full.open{{display:block}}
+.doc-summary .reason{{font-size:.72rem;color:var(--accent);margin-top:.2rem;font-style:italic}}
+.doc-right{{text-align:right;font-size:.73rem;white-space:nowrap}}.doc-right a{{color:var(--link)}}
+.tag{{display:inline-block;padding:.05rem .35rem;border-radius:6px;font-size:.65rem;margin-right:.15rem;background:rgba(255,255,255,.06)}}
+.loc{{display:inline-block;padding:.05rem .3rem;border-radius:6px;font-size:.65rem;font-weight:600}}
+.loc-new{{background:#14532d;color:#86efac}}.loc-later{{background:#1e3a5f;color:#93c5fd}}
+.arrow{{display:inline-block;transition:transform .15s}}.arrow.shut{{transform:rotate(-90deg)}}
+.filter-bar{{background:var(--surface);padding:.8rem 1rem;border-radius:8px;margin:1rem 0;display:flex;gap:.5rem;flex-wrap:wrap;align-items:center}}
+.filter-bar label{{font-size:.8rem;color:var(--dim)}}
+.filter-bar select,.filter-bar input{{background:var(--surface2);color:var(--text);border:1px solid rgba(255,255,255,.1);border-radius:6px;padding:.3rem .6rem;font-size:.8rem}}
+@media(max-width:768px){{.doc{{grid-template-columns:2rem 2rem 1fr}}.doc-right{{display:none}}.stats{{gap:.5rem}}.stat{{padding:.4rem .8rem;min-width:70px}}.stat .n{{font-size:1.2rem}}}}
+</style></head><body>
+<div class="header"><h1>📚 Readwise Reader Triage</h1>
+<p class="sub">{now.strftime('%Y-%m-%d')} · {total} documents · {'AI-analyzed' if has_analysis else 'keyword-classified'}</p>
+<div class="stats">{stats}</div>{relbar}</div>
+<div class="container">
+<div class="toc"><h2>📑 Sections</h2><div class="toc-grid">{toc}</div></div>
+<div class="filter-bar"><label>Filter:</label>
+<input type="text" id="search" placeholder="Search titles/summaries..." oninput="filterDocs()">
+<select id="relFilter" onchange="filterDocs()"><option value="">All relevance</option><option value="5">5★</option><option value="4">4★+</option><option value="3">3★+</option></select>
+<select id="tagFilter" onchange="filterDocs()"><option value="">All tags</option>{tag_opts}</select></div>"""
+
+
+def _report_foot():
+    return """<script>
+function toggle(el){const b=el.parentElement.querySelector('.group-body,.doc-list');if(!b)return;const a=el.querySelector('.arrow');if(b.style.display==='none'){b.style.display='';a?.classList.remove('shut')}else{b.style.display='none';a?.classList.add('shut')}}
+function filterDocs(){const q=document.getElementById('search').value.toLowerCase();const r=document.getElementById('relFilter').value;const t=document.getElementById('tagFilter').value;document.querySelectorAll('.doc').forEach(el=>{const title=(el.dataset.title||'');const summary=(el.querySelector('.doc-summary')?.textContent||'').toLowerCase();const rv=parseInt(el.dataset.rel||'3');const tags=el.dataset.tags||'';let s=true;if(q&&!title.includes(q)&&!summary.includes(q))s=false;if(r&&rv<parseInt(r))s=false;if(t&&!tags.includes(t))s=false;el.style.display=s?'':'none'})}
+</script></div></body></html>"""
+
+
+# ═══════════════════════════════════════════════════════════════════════
+#  CLI
+# ═══════════════════════════════════════════════════════════════════════
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_fetch = sub.add_parser("fetch", help="Fetch documents from Readwise Reader API")
+    p_fetch.add_argument("--locations", default="new,later", help="Comma-separated locations (default: new,later)")
+
+    p_analyze = sub.add_parser("analyze", help="Analyze documents with LLM")
+    p_analyze.add_argument("--model", "-m", choices=list(MODELS.keys()), default="opus", help="Model (default: opus)")
+    p_analyze.add_argument("--reset", action="store_true", help="Reset checkpoint")
+    p_analyze.add_argument("--batch-size", type=int, help="Override batch size")
+    p_analyze.add_argument("--profile", type=Path, default=PROFILE_FILE, help=f"Profile TOML (default: {PROFILE_FILE})")
+
+    p_report = sub.add_parser("report", help="Generate HTML triage report")
+    p_report.add_argument("--no-open", action="store_true", help="Don't open in browser")
+
+    args = parser.parse_args()
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+    if args.command == "fetch":
+        cmd_fetch(args)
+    elif args.command == "analyze":
+        cmd_analyze(args)
+    elif args.command == "report":
+        cmd_report(args)
+
+
+if __name__ == "__main__":
+    main()

Commit a818be8e3865

Commit `a818be8e3865`