Commit a818be8e3865
Changed files (4)
pkgs
tools
readwise-reader
pkgs/default.nix
@@ -31,6 +31,7 @@ in
jellyfin-favorites-sync = pkgs.callPackage ../tools/jellyfin-favorites-sync { };
jellyfin-manage-playlist = pkgs.callPackage ../tools/jellyfin-manage-playlist { };
music-playlist-dl = pkgs.callPackage ../tools/music-playlist-dl { };
+ readwise-reader = pkgs.callPackage ../tools/readwise-reader { };
nix-flake-update = pkgs.callPackage ../tools/nix-flake-update { };
slack-archive = pkgs.callPackage ../tools/slack-archive { };
gcal-to-org = pkgs.callPackage ../tools/gcal-to-org { };
tools/readwise-reader/default.nix
@@ -0,0 +1,43 @@
+{
+ lib,
+ python3,
+ google-cloud-sdk,
+ passage,
+}:
+
+python3.pkgs.buildPythonApplication {
+ pname = "readwise-reader";
+ version = "1.0.0";
+ format = "other";
+
+ src = ./.;
+
+ propagatedBuildInputs = with python3.pkgs; [
+ requests
+ ];
+
+ makeWrapperArgs = [
+ "--prefix PATH : ${lib.makeBinPath [ google-cloud-sdk passage ]}"
+ ];
+
+ dontUnpack = true;
+ dontBuild = true;
+
+ installPhase = ''
+ runHook preInstall
+
+ mkdir -p $out/bin
+ cp ${./readwise-reader.py} $out/bin/readwise-reader
+ chmod +x $out/bin/readwise-reader
+
+ runHook postInstall
+ '';
+
+ meta = with lib; {
+ description = "Fetch, analyze, and triage Readwise Reader documents";
+ homepage = "https://github.com/vdemeester/home";
+ license = licenses.mit;
+ maintainers = [ ];
+ mainProgram = "readwise-reader";
+ };
+}
tools/readwise-reader/README.md
@@ -0,0 +1,65 @@
+# readwise-reader
+
+Fetch, analyze, and triage [Readwise Reader](https://readwise.io/read) documents.
+
+## Usage
+
+```bash
+# 1. Fetch all Inbox + Later documents from Readwise Reader API
+readwise-reader fetch
+
+# 2. Analyze with LLM (default: Claude Opus 4-6 via Vertex AI)
+readwise-reader analyze
+
+# Or use a different model:
+readwise-reader analyze -m sonnet # Claude Sonnet 4 (faster, cheaper)
+readwise-reader analyze -m gemini # Gemini 3 Pro Preview (fastest)
+readwise-reader analyze -m gemini25 # Gemini 2.5 Pro
+
+# Resume interrupted analysis (checkpoints automatically):
+readwise-reader analyze
+
+# Start fresh:
+readwise-reader analyze --reset
+
+# 3. Generate interactive HTML triage report
+readwise-reader report
+readwise-reader report --no-open # Don't auto-open browser
+```
+
+## Profile
+
+User profile is loaded from `$XDG_DATA_HOME/readwise/profile.toml` (default: `~/.local/share/readwise/profile.toml`).
+
+The profile controls:
+- **User interests** — what topics are relevant to you
+- **Favorite authors/sites** — auto-boosted to 4★+ regardless of topic
+- **Summary style** — `brief` or `detailed`
+
+See the example in this directory or create your own.
+
+## Data
+
+All data stored in `$XDG_DATA_HOME/readwise/`:
+- `reader-latest.json` — symlink to latest fetch
+- `reader-analyzed.json` — enriched with LLM analysis
+- `analysis-checkpoint.json` — resume point for interrupted analysis
+- `triage-report.html` — generated report
+- `profile.toml` — user profile
+
+## Requirements
+
+- `passage` — for Readwise API token (`readwise/key`)
+- `gcloud` — for Vertex AI auth (Opus/Sonnet models)
+- `GEMINI_API_KEY` env var or `passage` — for Gemini models
+- `requests` Python package
+
+## Report Features
+
+- Documents grouped by action (Must Read, Finish, Archive, Delete, etc.)
+- Within each group, clustered by topic (Nix, Go, Emacs, AI, etc.)
+- Relevance scores (1-5★) with LLM-generated summaries and reasons
+- Click summaries to expand full analysis
+- Filter by title, relevance, or tag
+- Collapsible sections
+- Dark theme
tools/readwise-reader/readwise-reader.py
@@ -0,0 +1,856 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.11"
+# dependencies = ["requests"]
+# ///
+"""
+readwise-reader — Fetch, analyze, and triage Readwise Reader documents.
+
+Subcommands:
+ fetch Fetch all documents from Readwise Reader API (Inbox + Later)
+ analyze Score documents for relevance using LLMs (Opus, Sonnet, Gemini)
+ report Generate an interactive HTML triage report
+
+Data stored in $XDG_DATA_HOME/readwise/ (default: ~/.local/share/readwise/).
+User profile loaded from $XDG_DATA_HOME/readwise/profile.toml.
+"""
+
+import argparse
+import json
+import html as html_mod
+import os
+import subprocess
+import sys
+import time
+from collections import Counter, defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+DATA_DIR = Path(os.environ.get("XDG_DATA_HOME", Path.home() / ".local/share")) / "readwise"
+PROFILE_FILE = DATA_DIR / "profile.toml"
+LATEST_LINK = DATA_DIR / "reader-latest.json"
+ANALYZED_FILE = DATA_DIR / "reader-analyzed.json"
+CHECKPOINT_FILE = DATA_DIR / "analysis-checkpoint.json"
+REPORT_FILE = DATA_DIR / "triage-report.html"
+
+# ═══════════════════════════════════════════════════════════════════════
+# FETCH
+# ═══════════════════════════════════════════════════════════════════════
+
+READER_API = "https://readwise.io/api/v3/list/"
+READER_RATE_DELAY = 3.1 # 20 req/min
+
+
+def get_readwise_token():
+ result = subprocess.run(["passage", "show", "readwise/key"], capture_output=True, text=True)
+ if result.returncode != 0:
+ print("Failed to get Readwise token from passage", file=sys.stderr)
+ sys.exit(1)
+ return result.stdout.strip()
+
+
+def fetch_documents(token: str, location: str) -> list[dict]:
+ import requests
+
+ docs = []
+ cursor = None
+ page = 1
+ while True:
+ params = {"location": location, "limit": 100}
+ if cursor:
+ params["pageCursor"] = cursor
+ print(f" Fetching {location} page {page}...", file=sys.stderr)
+ resp = requests.get(READER_API, params=params, headers={"Authorization": f"Token {token}"})
+ resp.raise_for_status()
+ data = resp.json()
+ docs.extend(data.get("results", []))
+ cursor = data.get("nextPageCursor")
+ count = data.get("count", "?")
+ print(f" Got {len(data.get('results', []))} docs (total: {count})", file=sys.stderr)
+ if not cursor:
+ break
+ page += 1
+ time.sleep(READER_RATE_DELAY)
+ return docs
+
+
+def cmd_fetch(args):
+ import requests # noqa: F811 — imported here for lazy loading
+
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
+ token = get_readwise_token()
+ all_docs = {}
+
+ locations = args.locations.split(",")
+ for loc in locations:
+ print(f"\nFetching '{loc}' documents...", file=sys.stderr)
+ docs = fetch_documents(token, loc)
+ all_docs[loc] = docs
+ print(f" Total {loc}: {len(docs)}", file=sys.stderr)
+ if loc != locations[-1]:
+ time.sleep(READER_RATE_DELAY)
+
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+ output = {
+ "fetched_at": timestamp,
+ "counts": {loc: len(docs) for loc, docs in all_docs.items()},
+ "documents": all_docs,
+ }
+
+ outfile = DATA_DIR / f"reader-{'-'.join(locations)}-{timestamp}.json"
+ with open(outfile, "w") as f:
+ json.dump(output, f, indent=2, ensure_ascii=False)
+
+ LATEST_LINK.unlink(missing_ok=True)
+ LATEST_LINK.symlink_to(outfile.name)
+ print(f"\n✅ Saved {sum(len(d) for d in all_docs.values())} documents to {outfile}", file=sys.stderr)
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# PROFILE
+# ═══════════════════════════════════════════════════════════════════════
+
+def load_profile(path: Path) -> dict:
+ """Load profile from TOML file with a minimal parser (no dependency)."""
+ if not path.exists():
+ return {}
+
+ profile = {}
+ current_section = ""
+ current_section_for_key = ""
+ current_key = None
+ current_list = None
+
+ for line in path.read_text().splitlines():
+ stripped = line.strip()
+ if not stripped or stripped.startswith("#"):
+ continue
+
+ if stripped.startswith("["):
+ if current_key and current_list is not None:
+ profile[f"{current_section_for_key}.{current_key}"] = current_list
+ current_list = None
+ current_key = None
+ current_section = stripped.strip("[]").strip()
+ current_section_for_key = current_section
+ continue
+
+ if "=" in stripped and not stripped.startswith('"'):
+ if current_key and current_list is not None:
+ profile[f"{current_section_for_key}.{current_key}"] = current_list
+ current_list = None
+
+ key, val = stripped.split("=", 1)
+ key = key.strip()
+ val = val.strip()
+ current_key = key
+ current_section_for_key = current_section
+
+ if val == "[":
+ current_list = []
+ elif val.startswith("[") and val.endswith("]"):
+ items = val[1:-1]
+ current_list = [s.strip().strip('"').strip("'") for s in items.split(",") if s.strip().strip('"').strip("'")]
+ profile[f"{current_section}.{current_key}"] = current_list
+ current_list = None
+ current_key = None
+ elif val.startswith('"') or val.startswith("'"):
+ profile[f"{current_section}.{current_key}"] = val.strip('"').strip("'")
+ current_key = None
+ else:
+ profile[f"{current_section}.{current_key}"] = val
+ current_key = None
+ elif current_list is not None:
+ val = stripped.rstrip(",").strip().strip('"').strip("'")
+ if val and val != "]":
+ if "#" in val and not val.startswith("#"):
+ val = val[:val.index("#")].strip().rstrip(",").strip().strip('"').strip("'")
+ if val:
+ current_list.append(val)
+ if stripped.rstrip().endswith("]") or stripped == "]":
+ profile[f"{current_section_for_key}.{current_key}"] = current_list
+ current_list = None
+ current_key = None
+
+ if current_key and current_list is not None:
+ profile[f"{current_section_for_key}.{current_key}"] = current_list
+
+ return profile
+
+
+def build_prompt_context(profile: dict) -> tuple[str, str]:
+ def get_list(key):
+ return profile.get(key, [])
+
+ def fmt(items):
+ return ", ".join(items) if items else "N/A"
+
+ name = profile.get("user.name", "User")
+ role = profile.get("user.role", "Software Engineer")
+
+ user_desc = f"""{name} — {role}
+
+Core languages: {fmt(get_list('interests.core'))}
+Infrastructure: {fmt(get_list('interests.infrastructure'))}
+Editor: {fmt(get_list('interests.editor'))}
+Kubernetes/Containers: {fmt(get_list('interests.kubernetes'))}
+CLI tools: {fmt(get_list('interests.tools'))}
+AI tooling: {fmt(get_list('interests.ai'))}
+Side interests: {fmt(get_list('interests.side'))}
+Values: {fmt(get_list('interests.values'))}
+Currently exploring: {fmt(get_list('interests.exploring'))}
+NOT interested in: {fmt(get_list('interests.not_interested'))}"""
+
+ fav_authors = get_list("favorites.authors")
+ fav_sites = get_list("favorites.sites")
+ if fav_authors or fav_sites:
+ user_desc += "\n\nFAVORITE AUTHORS/SITES (auto-boost to at least 4★, always worth_reading):\n"
+ if fav_authors:
+ user_desc += "Authors: " + ", ".join(fav_authors) + "\n"
+ if fav_sites:
+ user_desc += "Sites: " + ", ".join(fav_sites) + "\n"
+ user_desc += "These are trusted voices — mark as interesting even on tangential topics."
+
+ return user_desc, profile.get("summary.style", "detailed")
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# ANALYZE
+# ═══════════════════════════════════════════════════════════════════════
+
+MODELS = {
+ "opus": {
+ "backend": "vertex-claude",
+ "model_id": "claude-opus-4-6",
+ "batch_size": 25,
+ "rate_delay": 2,
+ "max_output_tokens": 16384,
+ },
+ "sonnet": {
+ "backend": "vertex-claude",
+ "model_id": "claude-sonnet-4@20250514",
+ "batch_size": 30,
+ "rate_delay": 1,
+ "max_output_tokens": 16384,
+ },
+ "gemini": {
+ "backend": "gemini-api",
+ "model_id": "gemini-3-pro-preview",
+ "batch_size": 35,
+ "rate_delay": 2,
+ "max_output_tokens": 16384,
+ },
+ "gemini25": {
+ "backend": "gemini-api",
+ "model_id": "gemini-2.5-pro",
+ "batch_size": 35,
+ "rate_delay": 2,
+ "max_output_tokens": 16384,
+ },
+}
+
+MAX_RETRIES = 6
+_token_cache = {"token": None, "ts": 0}
+
+
+def get_vertex_token():
+ if time.time() - _token_cache["ts"] > 2400:
+ result = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True, timeout=10)
+ if result.returncode != 0:
+ raise RuntimeError(f"gcloud auth failed: {result.stderr}")
+ _token_cache["token"] = result.stdout.strip()
+ _token_cache["ts"] = time.time()
+ return _token_cache["token"]
+
+
+def call_vertex_claude(model_id: str, prompt: str, max_tokens: int) -> str:
+ import requests
+
+ project = os.environ.get("GOOGLE_CLOUD_PROJECT", "itpc-gcp-pnd-pe-eng-claude")
+ location = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-east5")
+ url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
+
+ for attempt in range(MAX_RETRIES):
+ token = get_vertex_token()
+ try:
+ resp = __import__("requests").post(
+ url,
+ headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
+ json={
+ "anthropic_version": "vertex-2023-10-16",
+ "messages": [{"role": "user", "content": prompt}],
+ "max_tokens": max_tokens,
+ "temperature": 0.2,
+ },
+ timeout=180,
+ )
+ if resp.status_code == 429:
+ wait = (2 ** attempt) * 5
+ print(f" ⏳ Rate limited, waiting {wait}s...", file=sys.stderr)
+ time.sleep(wait)
+ continue
+ if resp.status_code == 401:
+ _token_cache["ts"] = 0
+ continue
+ resp.raise_for_status()
+ return resp.json()["content"][0]["text"]
+ except Exception as e:
+ if attempt < MAX_RETRIES - 1:
+ wait = (2 ** attempt) * 3
+ print(f" ⚠ {e}, retrying in {wait}s...", file=sys.stderr)
+ time.sleep(wait)
+ continue
+ raise
+ raise RuntimeError(f"Failed after {MAX_RETRIES} retries")
+
+
+def get_gemini_key():
+ key = os.environ.get("GEMINI_API_KEY")
+ if key:
+ return key
+ result = subprocess.run(["passage", "show", "redhat/google/osp/vdeemest-api-key"], capture_output=True, text=True, timeout=10)
+ if result.returncode != 0:
+ raise RuntimeError("No GEMINI_API_KEY and passage lookup failed")
+ return result.stdout.strip()
+
+
+def call_gemini(model_id: str, prompt: str, max_tokens: int) -> str:
+ import requests
+
+ api_key = get_gemini_key()
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_id}:generateContent?key={api_key}"
+
+ for attempt in range(MAX_RETRIES):
+ try:
+ resp = requests.post(
+ url,
+ headers={"Content-Type": "application/json"},
+ json={
+ "contents": [{"parts": [{"text": prompt}]}],
+ "generationConfig": {"temperature": 0.2, "maxOutputTokens": max_tokens, "responseMimeType": "application/json"},
+ },
+ timeout=180,
+ )
+ if resp.status_code == 429:
+ wait = (2 ** attempt) * 5
+ print(f" ⏳ Rate limited, waiting {wait}s...", file=sys.stderr)
+ time.sleep(wait)
+ continue
+ resp.raise_for_status()
+ return resp.json()["candidates"][0]["content"]["parts"][0]["text"]
+ except Exception as e:
+ if attempt < MAX_RETRIES - 1:
+ wait = (2 ** attempt) * 3
+ print(f" ⚠ {e}, retrying in {wait}s...", file=sys.stderr)
+ time.sleep(wait)
+ continue
+ raise
+ raise RuntimeError(f"Failed after {MAX_RETRIES} retries")
+
+
+def call_llm(backend: str, model_id: str, prompt: str, max_tokens: int) -> str:
+ if backend == "vertex-claude":
+ return call_vertex_claude(model_id, prompt, max_tokens)
+ elif backend == "gemini-api":
+ return call_gemini(model_id, prompt, max_tokens)
+ raise ValueError(f"Unknown backend: {backend}")
+
+
+def parse_json_response(text: str) -> dict:
+ text = text.strip()
+ if text.startswith("```"):
+ text = text.split("\n", 1)[1]
+ if text.endswith("```"):
+ text = text[:-3]
+ text = text.strip()
+ try:
+ return json.loads(text)
+ except json.JSONDecodeError:
+ import re
+ match = re.search(r'\{.*\}', text, re.DOTALL)
+ if match:
+ return json.loads(match.group())
+ raise
+
+
+def build_analysis_prompt(profile_text: str, summary_style: str, docs_batch: list[dict]) -> str:
+ doc_entries = []
+ for i, d in enumerate(docs_batch):
+ title = d.get("title", "Untitled")
+ summary = (d.get("summary") or "")[:600]
+ source = d.get("site_name") or d.get("source") or ""
+ source_url = d.get("source_url", "")
+ category = d.get("category", "")
+ word_count = d.get("word_count", 0)
+ author = d.get("author", "")
+ if summary.strip().lower() in ("comments", ""):
+ summary = "N/A — infer content from title, author, source URL"
+ doc_entries.append(
+ f'[{i}] "{title}"\n'
+ f' Author: {author} | Source: {source} | Category: {category} | {word_count} words\n'
+ f' URL: {source_url}\n'
+ f' Summary: {summary}'
+ )
+
+ if summary_style == "detailed":
+ s_inst = ("**summary**: 4-6 sentences. Describe what this article is actually about in depth: "
+ "what problem does it address, what's the core argument or technique, what makes it interesting or unique? "
+ "Include specific details (tools mentioned, approaches described, conclusions drawn). "
+ "Don't just restate the title. If summary is N/A, use title/author/URL and your knowledge to infer the likely content.")
+ r_inst = ("**reason**: 2-3 sentences. Explain specifically why this is or isn't relevant. "
+ "Reference concrete user interests that match (e.g. 'Uses NixOS daily and this covers flake patterns') "
+ "or don't match (e.g. 'React frontend content, outside interest area').")
+ else:
+ s_inst = "**summary**: 1-2 sentences about the actual content."
+ r_inst = "**reason**: 5-15 words explaining the score."
+
+ return f"""Analyze these {len(docs_batch)} articles from a read-it-later app. Score each for relevance to this specific user.
+
+<user_profile>
+{profile_text}
+</user_profile>
+
+<documents>
+{chr(10).join(doc_entries)}
+</documents>
+
+For each document return:
+1. {s_inst}
+2. **relevance**: 1-5 score:
+ - 5 = Must read — directly about daily tools/work or from a favorite author/site
+ - 4 = Highly relevant — strongly aligned with interests, or from a favorite source on any topic
+ - 3 = Interesting — decent tech content, tangentially related
+ - 2 = Low relevance — not aligned with interests
+ - 1 = Skip — completely irrelevant or too superficial
+3. {r_inst}
+4. **action**: must_read / worth_reading / skim / archive / delete
+5. **tags**: 1-3 from: nix, go, rust, python, emacs, kubernetes, containers, ci-cd, git, security, homelab, networking, ai-llm, coding-agents, linux, open-source, privacy, productivity, pkm, career, culture, french, hardware, web, devtools, monitoring, tekton
+
+IMPORTANT:
+- Favorite authors/sites get minimum 4★ and "worth_reading", even on tangential topics.
+- French content from favorite French sources is scored on merit, not penalized for being French.
+- Be discriminating: generic listicles = 1-2★. Deep technical posts on relevant topics = 4-5★.
+
+Return ONLY valid JSON:
+{{"analyses":[{{"id":0,"summary":"...","relevance":4,"reason":"...","action":"worth_reading","tags":["nix","homelab"]}},...]}}\
+"""
+
+
+def cmd_analyze(args):
+ model_cfg = MODELS[args.model].copy()
+ if args.batch_size:
+ model_cfg["batch_size"] = args.batch_size
+
+ if args.reset and CHECKPOINT_FILE.exists():
+ CHECKPOINT_FILE.unlink()
+ print("🔄 Checkpoint reset.", file=sys.stderr)
+
+ profile = load_profile(args.profile)
+ if profile:
+ print(f"👤 Profile: {args.profile}", file=sys.stderr)
+ fav_count = len(profile.get("favorites.authors", [])) + len(profile.get("favorites.sites", []))
+ print(f" {fav_count} favorite authors/sites", file=sys.stderr)
+ profile_text, summary_style = build_prompt_context(profile)
+ print(f"📝 Summary style: {summary_style}", file=sys.stderr)
+
+ with open(LATEST_LINK) as f:
+ data = json.load(f)
+
+ all_docs = []
+ for loc in data["documents"]:
+ for d in data["documents"][loc]:
+ d["_location"] = loc
+ all_docs.append(d)
+
+ print(f"📚 Total: {len(all_docs)} docs", file=sys.stderr)
+ print(f"🤖 Model: {model_cfg['model_id']} ({model_cfg['backend']})", file=sys.stderr)
+ print(f"📦 Batch: {model_cfg['batch_size']}, max output: {model_cfg['max_output_tokens']} tokens", file=sys.stderr)
+
+ analyzed = {}
+ if CHECKPOINT_FILE.exists():
+ with open(CHECKPOINT_FILE) as f:
+ analyzed = json.load(f)
+ print(f"💾 Checkpoint: {len(analyzed)} done", file=sys.stderr)
+
+ to_analyze = [d for d in all_docs if d["id"] not in analyzed]
+ print(f"🔍 Remaining: {len(to_analyze)}", file=sys.stderr)
+
+ if to_analyze:
+ bs = model_cfg["batch_size"]
+ total_batches = (len(to_analyze) + bs - 1) // bs
+
+ for bn in range(total_batches):
+ batch = to_analyze[bn * bs: (bn + 1) * bs]
+ pct = len(analyzed) / len(all_docs) * 100
+ print(f"\n[{bn+1}/{total_batches}] {len(batch)} docs ({pct:.0f}% done)...", file=sys.stderr)
+
+ try:
+ prompt = build_analysis_prompt(profile_text, summary_style, batch)
+ raw = call_llm(model_cfg["backend"], model_cfg["model_id"], prompt, model_cfg["max_output_tokens"])
+ parsed = parse_json_response(raw)
+
+ matched = 0
+ for a in parsed.get("analyses", []):
+ idx = a.get("id")
+ if idx is None:
+ continue
+ try:
+ idx = int(idx)
+ if 0 <= idx < len(batch):
+ analyzed[batch[idx]["id"]] = {
+ "summary": a.get("summary", ""),
+ "relevance": a.get("relevance", 3),
+ "reason": a.get("reason", ""),
+ "action": a.get("action", "skim"),
+ "tags": a.get("tags", []),
+ }
+ matched += 1
+ except (ValueError, IndexError):
+ pass
+ print(f" ✓ {matched}/{len(batch)}", file=sys.stderr)
+ except Exception as e:
+ print(f" ✗ {e}", file=sys.stderr)
+
+ with open(CHECKPOINT_FILE, "w") as f:
+ json.dump(analyzed, f)
+
+ if bn < total_batches - 1:
+ time.sleep(model_cfg["rate_delay"])
+
+ # Build output
+ print(f"\n📊 Analyzed: {len(analyzed)}/{len(all_docs)}", file=sys.stderr)
+ enriched = {}
+ for loc in data["documents"]:
+ enriched[loc] = []
+ for d in data["documents"][loc]:
+ d["_analysis"] = analyzed.get(d["id"], {"summary": d.get("summary", ""), "relevance": 3, "reason": "Not analyzed", "action": "skim", "tags": []})
+ enriched[loc].append(d)
+
+ output = {
+ "fetched_at": data.get("fetched_at", ""),
+ "analyzed_at": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
+ "model": model_cfg["model_id"],
+ "counts": {loc: len(docs) for loc, docs in enriched.items()},
+ "documents": enriched,
+ }
+ with open(ANALYZED_FILE, "w") as f:
+ json.dump(output, f, indent=2, ensure_ascii=False)
+ print(f"✅ Output: {ANALYZED_FILE}", file=sys.stderr)
+
+ # Stats
+ rels = Counter(analyzed[k]["relevance"] for k in analyzed if "relevance" in analyzed[k])
+ if rels:
+ print(f"\nRelevance:", file=sys.stderr)
+ for s in sorted(rels, reverse=True):
+ print(f" {s}★: {rels[s]:4d} ({rels[s]*100//len(analyzed)}%)", file=sys.stderr)
+ acts = Counter(analyzed[k]["action"] for k in analyzed if "action" in analyzed[k])
+ if acts:
+ print(f"Actions:", file=sys.stderr)
+ for a, c in acts.most_common():
+ print(f" {a:15s}: {c:4d}", file=sys.stderr)
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# REPORT
+# ═══════════════════════════════════════════════════════════════════════
+
+TAG_DISPLAY = {
+ "nix": ("❄️", "NixOS & Nix", "#7eb8da"), "emacs": ("📝", "Emacs & Org-mode", "#7f5ab6"),
+ "go": ("🐹", "Go", "#00add8"), "rust": ("🦀", "Rust", "#dea584"),
+ "python": ("🐍", "Python", "#3776ab"), "tekton": ("🔧", "Tekton & CI/CD", "#fd495c"),
+ "kubernetes": ("☸️", "Kubernetes", "#326ce5"), "containers": ("📦", "Containers", "#2496ed"),
+ "ci-cd": ("🔄", "CI/CD", "#fd495c"), "homelab": ("🏠", "Homelab", "#e8a87c"),
+ "git": ("🔀", "Git & VCS", "#f14e32"), "coding-agents": ("🤖", "Coding Agents", "#a855f7"),
+ "ai-llm": ("🧠", "AI & LLM", "#8b5cf6"), "security": ("🔒", "Security", "#ef4444"),
+ "linux": ("🐧", "Linux", "#fcc624"), "networking": ("🌐", "Networking", "#06b6d4"),
+ "monitoring": ("📊", "Monitoring", "#10b981"), "devtools": ("🛠️", "Dev Tools", "#64748b"),
+ "open-source": ("⚖️", "Open Source", "#22c55e"), "privacy": ("🛡️", "Privacy", "#f59e0b"),
+ "productivity": ("📋", "Productivity", "#6366f1"), "pkm": ("🧩", "PKM", "#ec4899"),
+ "career": ("👔", "Career", "#14b8a6"), "culture": ("📖", "Culture", "#a78bfa"),
+ "french": ("🇫🇷", "French", "#3b82f6"), "hardware": ("⌨️", "Hardware", "#f97316"),
+ "web": ("🌍", "Web", "#06b6d4"), "other": ("📄", "Other", "#94a3b8"),
+}
+
+ACTION_GROUPS = [
+ ("must_read", "⭐ Must Read", "AI rates these highly relevant to you.", "#f59e0b"),
+ ("finish_reading", "🏃 Finish Reading — >50% Done", "You started these. Finish them.", "#2196f3"),
+ ("archive_finished", "✅ Archive — Finished", "100% read. Archive.", "#4caf50"),
+ ("keep_triage", "📚 Active Queue", "Review and decide.", "#9c27b0"),
+ ("archive_old_unread", "📦 Old & Unread (still relevant)", "1yr+ unread but AI says 3★+.", "#ff9800"),
+ ("archive_low_relevance", "🗑️ Old, Unread & Low Relevance", "1yr+ unread, ≤2★. Safe to purge.", "#f44336"),
+ ("archive_old_barely", "🤔 Old & Barely Started", "1yr+ old, <10% read.", "#795548"),
+]
+
+
+def categorize(d):
+ p = d.get("reading_progress", 0)
+ age = d["_age"]
+ rel = d.get("_analysis", {}).get("relevance", 3)
+ if p >= 1.0: return "archive_finished"
+ if age > 365 and p == 0 and rel <= 2: return "archive_low_relevance"
+ if age > 365 and p == 0: return "archive_old_unread"
+ if age > 365 and p < 0.1: return "archive_old_barely"
+ if p > 0.5: return "finish_reading"
+ if d.get("_analysis", {}).get("action") in ("must_read", "worth_reading") and rel >= 4: return "must_read"
+ return "keep_triage"
+
+
+def primary_tag(d):
+ tags = d.get("_analysis", {}).get("tags", [])
+ priority = ["nix","emacs","go","rust","python","tekton","kubernetes","containers","ci-cd","homelab","git",
+ "coding-agents","ai-llm","security","linux","networking","monitoring","devtools","open-source",
+ "privacy","productivity","pkm","career","culture","french","hardware","web"]
+ for t in priority:
+ if t in tags: return t
+ return tags[0] if tags else "other"
+
+
+def age_label(days):
+ if days < 7: return f"{days}d"
+ if days < 30: return f"{days//7}w"
+ if days < 365: return f"{days//30}mo"
+ return f"{days//365}y{(days%365)//30}mo"
+
+
+def cmd_report(args):
+ src = ANALYZED_FILE if ANALYZED_FILE.exists() else LATEST_LINK
+ print(f"Using: {src}", file=sys.stderr)
+ with open(src) as f:
+ data = json.load(f)
+
+ now = datetime.now(timezone.utc)
+ all_docs = []
+ for loc in data["documents"]:
+ for d in data["documents"][loc]:
+ d["_location"] = loc
+ saved = d.get("saved_at") or d.get("created_at")
+ d["_age"] = (now - datetime.fromisoformat(saved)).days if saved else 0
+ d["_primary_tag"] = primary_tag(d)
+ all_docs.append(d)
+
+ groups = defaultdict(list)
+ for d in all_docs:
+ groups[categorize(d)].append(d)
+
+ has_analysis = any(d.get("_analysis", {}).get("reason", "") not in ("Not analyzed", "Analysis failed", "") for d in all_docs)
+ total = len(all_docs)
+
+ # ── Build HTML (inlined for single-file tool) ───────────────────────
+ html_parts = [_report_head(total, now, has_analysis, groups, all_docs)]
+
+ for key, title, desc, color in ACTION_GROUPS:
+ docs = groups.get(key, [])
+ if not docs: continue
+ docs.sort(key=lambda d: (-d.get("_analysis",{}).get("relevance",3), d["_age"]))
+ tag_groups = defaultdict(list)
+ for d in docs: tag_groups[d["_primary_tag"]].append(d)
+
+ html_parts.append(f'<div class="group" id="{key}">')
+ html_parts.append(f'<div class="group-hdr" style="background:{color}15;border-left:4px solid {color}" onclick="toggle(this)">')
+ html_parts.append(f'<h2><span class="arrow">▼</span> {title}</h2><span class="badge">{len(docs)}</span></div>')
+ html_parts.append(f'<div class="group-desc">{desc}</div><div class="group-body">')
+
+ for tag, tdocs in sorted(tag_groups.items(), key=lambda x: -len(x[1])):
+ icon, label, tc = TAG_DISPLAY.get(tag, ("📄", tag, "#94a3b8"))
+ tdocs.sort(key=lambda d: (-d.get("_analysis",{}).get("relevance",3), -d.get("reading_progress",0)))
+ html_parts.append(f'<div class="cluster"><div class="cluster-hdr" onclick="toggle(this)">')
+ html_parts.append(f'<h3><span class="arrow">▼</span> {icon} {label}</h3><span class="cnt">{len(tdocs)}</span></div>')
+
+ # Cluster insights
+ sources = Counter((d.get("site_name") or d.get("source") or "?") for d in tdocs)
+ top_src = [(s,n) for s,n in sources.most_common(5) if n >= 2]
+ atags = Counter()
+ for d in tdocs:
+ for t in d.get("_analysis",{}).get("tags",[]): atags[t] += 1
+ wcs = [d.get("word_count",0) for d in tdocs if d.get("word_count")]
+ rh = sum(wcs)/15000 if wcs else 0
+ avg_rel = sum(d.get("_analysis",{}).get("relevance",3) for d in tdocs)/len(tdocs)
+
+ insights = []
+ if top_src: insights.append("Sources: " + ", ".join(f"{s} ({n})" for s,n in top_src))
+ insights.append(f"Stats: {len(tdocs)} docs, ~{rh:.1f}h reading, avg {avg_rel:.1f}★")
+ html_parts.append('<div class="cluster-insights">' + "<br>".join(html_mod.escape(i) for i in insights) + '</div>')
+
+ html_parts.append('<ul class="doc-list">')
+ for d in tdocs:
+ html_parts.append(_doc_html(d))
+ html_parts.append('</ul></div>')
+ html_parts.append('</div></div>')
+
+ html_parts.append(_report_foot())
+
+ REPORT_FILE.write_text("\n".join(html_parts))
+ print(f"✅ Report: {REPORT_FILE}", file=sys.stderr)
+ if not args.no_open:
+ subprocess.Popen(["xdg-open", str(REPORT_FILE)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+
+def _doc_html(d):
+ p = d.get("reading_progress", 0)
+ a = d.get("_analysis", {})
+ rel = a.get("relevance", 3)
+ summary = html_mod.escape(a.get("summary", ""))
+ reason = html_mod.escape(a.get("reason", ""))
+ title = html_mod.escape(d.get("title") or "Untitled")
+ src = html_mod.escape(d.get("site_name") or d.get("source") or "")
+ url = html_mod.escape(d.get("url") or "#")
+ source_url = d.get("source_url") or ""
+ tags = a.get("tags", [])
+ loc = d["_location"]
+ age = d["_age"]
+ wc = d.get("word_count") or 0
+ rc = {5:"#22c55e",4:"#84cc16",3:"#eab308",2:"#f97316",1:"#ef4444"}.get(rel,"#94a3b8")
+ pc = "#4ade80" if p>=1 else "#60a5fa" if p>0.5 else "#f59e0b" if p>0 else "#475569"
+ lc = "loc-new" if loc=="new" else "loc-later"
+ th = "".join(f'<span class="tag">{t}</span>' for t in tags[:3])
+ rt = f"{wc//250}min" if wc else ""
+ sl = html_mod.escape(source_url)
+ # Build short summary (first sentence) and full
+ first_sentence = summary.split('. ')[0] + '.' if '. ' in summary else summary
+ if len(first_sentence) > len(summary) - 5:
+ first_sentence = summary # Don't truncate if it's basically the whole thing
+
+ return (f'<li class="doc" data-title="{title.lower()}" data-rel="{rel}" data-tags="{",".join(tags)}">'
+ f'<div class="doc-rel" style="color:{rc}" title="{reason}">{rel}★</div>'
+ f'<div class="doc-prog"><div style="color:{pc}">{p:.0%}</div>'
+ f'<div class="bar"><div class="fill" style="width:{p*100:.0f}%;background:{pc}"></div></div></div>'
+ f'<div class="doc-info"><div class="doc-title"><a href="{url}" target="_blank">{title}</a></div>'
+ f'<div class="doc-summary">'
+ f'<div class="short" onclick="this.nextElementSibling.classList.toggle(\'open\')">{first_sentence} {"▸" if first_sentence != summary else ""}</div>'
+ f'<div class="full">{summary}{f"<div class=reason>{reason}</div>" if reason else ""}</div>'
+ f'</div>'
+ f'<div class="doc-meta"><span class="loc {lc}">{loc}</span> {src} · {rt} · {age_label(age)} ago {th}</div></div>'
+ f'<div class="doc-right">{f"<a href={chr(34)}{sl}{chr(34)} target={chr(34)}_blank{chr(34)}>↗</a>" if source_url else ""}</div></li>')
+
+
+def _report_head(total, now, has_analysis, groups, all_docs):
+ stats = ""
+ for key, title, _, color in ACTION_GROUPS:
+ n = len(groups.get(key, []))
+ label = title.split("—")[0].strip()
+ stats += f'<div class="stat"><div class="n" style="color:{color}">{n}</div><div class="l">{label}</div></div>'
+
+ relbar = ""
+ if has_analysis:
+ rc = Counter(d.get("_analysis",{}).get("relevance",3) for d in all_docs)
+ colors = {5:"#22c55e",4:"#84cc16",3:"#eab308",2:"#f97316",1:"#ef4444"}
+ labels = {5:"Must read",4:"Relevant",3:"Interesting",2:"Low",1:"Skip"}
+ segs = "".join(f'<div class="seg" style="width:{rc.get(s,0)/total*100}%;background:{colors[s]}">{rc.get(s,0)}</div>' for s in [5,4,3,2,1] if rc.get(s,0))
+ legend = "".join(f'<span><span class="rel-dot" style="background:{colors[s]}"></span>{s}★ {labels[s]} ({rc.get(s,0)})</span>' for s in [5,4,3,2,1])
+ relbar = f'<div style="max-width:600px;margin:1rem auto 0"><div class="rel-bar">{segs}</div><div class="rel-legend">{legend}</div></div>'
+
+ atags = Counter()
+ for d in all_docs:
+ for t in d.get("_analysis",{}).get("tags",[]): atags[t] += 1
+ tag_opts = "".join(f'<option value="{t}">{t} ({c})</option>' for t,c in atags.most_common())
+
+ toc = ""
+ for key, title, _, _ in ACTION_GROUPS:
+ n = len(groups.get(key, []))
+ if n: toc += f'<a href="#{key}">{title}<span class="cnt">({n})</span></a>'
+
+ return f"""<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
+<title>Readwise Reader Triage</title>
+<style>
+:root{{--bg:#0f172a;--surface:#1e293b;--surface2:#334155;--text:#e2e8f0;--dim:#94a3b8;--accent:#f59e0b;--link:#38bdf8}}
+*{{box-sizing:border-box;margin:0;padding:0}}
+body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg);color:var(--text);line-height:1.6}}
+.header{{background:linear-gradient(135deg,#1e293b,#312e81);padding:2rem;text-align:center;border-bottom:3px solid var(--accent)}}
+.header h1{{font-size:1.8rem;margin-bottom:.3rem}}.header .sub{{color:var(--dim);font-size:.85rem}}
+.stats{{display:flex;justify-content:center;gap:1rem;flex-wrap:wrap;margin-top:1.2rem}}
+.stat{{background:rgba(0,0,0,.3);padding:.6rem 1.2rem;border-radius:8px;text-align:center;min-width:100px}}
+.stat .n{{font-size:1.5rem;font-weight:700}}.stat .l{{font-size:.7rem;color:var(--dim);text-transform:uppercase}}
+.container{{max-width:1200px;margin:0 auto;padding:1rem}}
+.rel-bar{{display:flex;height:28px;border-radius:6px;overflow:hidden;margin:1.5rem 0 .5rem}}
+.rel-bar .seg{{display:flex;align-items:center;justify-content:center;font-size:.75rem;font-weight:600;color:#000}}
+.rel-legend{{display:flex;gap:1rem;justify-content:center;font-size:.75rem;color:var(--dim);margin-bottom:1.5rem;flex-wrap:wrap}}
+.rel-legend span{{display:flex;align-items:center;gap:.3rem}}
+.rel-dot{{width:10px;height:10px;border-radius:50%;display:inline-block}}
+.toc{{background:var(--surface);border-radius:8px;padding:1.2rem;margin:1rem 0}}
+.toc h2{{font-size:1rem;margin-bottom:.8rem}}
+.toc-grid{{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.4rem}}
+.toc a{{color:var(--link);text-decoration:none;font-size:.85rem}}.toc a:hover{{text-decoration:underline}}
+.toc .cnt{{opacity:.5;margin-left:.3rem}}
+.group{{margin:1.5rem 0;border-radius:10px;border:1px solid rgba(255,255,255,.08);overflow:hidden}}
+.group-hdr{{padding:1rem 1.2rem;display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
+.group-hdr h2{{font-size:1.15rem}}.badge{{background:rgba(0,0,0,.3);padding:.2rem .7rem;border-radius:14px;font-size:.85rem;font-weight:600}}
+.group-desc{{padding:0 1.2rem .8rem;color:var(--dim);font-size:.85rem;font-style:italic}}
+.cluster{{margin:.4rem .8rem;background:rgba(0,0,0,.2);border-radius:8px;overflow:hidden}}
+.cluster-hdr{{padding:.6rem 1rem;background:rgba(0,0,0,.15);display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
+.cluster-hdr h3{{font-size:.95rem}}.cnt{{background:rgba(255,255,255,.08);padding:.15rem .5rem;border-radius:10px;font-size:.78rem}}
+.cluster-insights{{padding:.5rem 1rem;font-size:.8rem;color:var(--dim);border-top:1px solid rgba(255,255,255,.04);line-height:1.8}}
+.doc-list{{list-style:none}}.doc{{padding:.5rem 1rem;border-top:1px solid rgba(255,255,255,.04);display:grid;grid-template-columns:2.8rem 2.5rem 1fr auto;gap:.6rem;align-items:center;transition:background .1s}}
+.doc:hover{{background:rgba(255,255,255,.03)}}
+.doc-rel{{text-align:center;font-weight:700;font-size:.9rem}}
+.doc-prog{{text-align:center;font-size:.75rem}}.doc-prog .bar{{width:2.2rem;height:3px;background:rgba(255,255,255,.1);border-radius:2px;margin:2px auto 0;overflow:hidden}}
+.doc-prog .fill{{height:100%;border-radius:2px}}
+.doc-info{{min-width:0}}.doc-title{{font-weight:500;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;font-size:.9rem}}
+.doc-title a{{color:var(--text);text-decoration:none}}.doc-title a:hover{{color:var(--link)}}
+.doc-meta{{font-size:.73rem;color:var(--dim);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}}
+.doc-summary{{font-size:.78rem;color:var(--dim);margin-top:.15rem}}
+.doc-summary .short{{cursor:pointer}}
+.doc-summary .short:hover{{color:var(--text)}}
+.doc-summary .full{{display:none;margin-top:.3rem;line-height:1.5;color:var(--dim);border-left:2px solid rgba(255,255,255,.1);padding-left:.6rem}}
+.doc-summary .full.open{{display:block}}
+.doc-summary .reason{{font-size:.72rem;color:var(--accent);margin-top:.2rem;font-style:italic}}
+.doc-right{{text-align:right;font-size:.73rem;white-space:nowrap}}.doc-right a{{color:var(--link)}}
+.tag{{display:inline-block;padding:.05rem .35rem;border-radius:6px;font-size:.65rem;margin-right:.15rem;background:rgba(255,255,255,.06)}}
+.loc{{display:inline-block;padding:.05rem .3rem;border-radius:6px;font-size:.65rem;font-weight:600}}
+.loc-new{{background:#14532d;color:#86efac}}.loc-later{{background:#1e3a5f;color:#93c5fd}}
+.arrow{{display:inline-block;transition:transform .15s}}.arrow.shut{{transform:rotate(-90deg)}}
+.filter-bar{{background:var(--surface);padding:.8rem 1rem;border-radius:8px;margin:1rem 0;display:flex;gap:.5rem;flex-wrap:wrap;align-items:center}}
+.filter-bar label{{font-size:.8rem;color:var(--dim)}}
+.filter-bar select,.filter-bar input{{background:var(--surface2);color:var(--text);border:1px solid rgba(255,255,255,.1);border-radius:6px;padding:.3rem .6rem;font-size:.8rem}}
+@media(max-width:768px){{.doc{{grid-template-columns:2rem 2rem 1fr}}.doc-right{{display:none}}.stats{{gap:.5rem}}.stat{{padding:.4rem .8rem;min-width:70px}}.stat .n{{font-size:1.2rem}}}}
+</style></head><body>
+<div class="header"><h1>📚 Readwise Reader Triage</h1>
+<p class="sub">{now.strftime('%Y-%m-%d')} · {total} documents · {'AI-analyzed' if has_analysis else 'keyword-classified'}</p>
+<div class="stats">{stats}</div>{relbar}</div>
+<div class="container">
+<div class="toc"><h2>📑 Sections</h2><div class="toc-grid">{toc}</div></div>
+<div class="filter-bar"><label>Filter:</label>
+<input type="text" id="search" placeholder="Search titles/summaries..." oninput="filterDocs()">
+<select id="relFilter" onchange="filterDocs()"><option value="">All relevance</option><option value="5">5★</option><option value="4">4★+</option><option value="3">3★+</option></select>
+<select id="tagFilter" onchange="filterDocs()"><option value="">All tags</option>{tag_opts}</select></div>"""
+
+
+def _report_foot():
+ return """<script>
+function toggle(el){const b=el.parentElement.querySelector('.group-body,.doc-list');if(!b)return;const a=el.querySelector('.arrow');if(b.style.display==='none'){b.style.display='';a?.classList.remove('shut')}else{b.style.display='none';a?.classList.add('shut')}}
+function filterDocs(){const q=document.getElementById('search').value.toLowerCase();const r=document.getElementById('relFilter').value;const t=document.getElementById('tagFilter').value;document.querySelectorAll('.doc').forEach(el=>{const title=(el.dataset.title||'');const summary=(el.querySelector('.doc-summary')?.textContent||'').toLowerCase();const rv=parseInt(el.dataset.rel||'3');const tags=el.dataset.tags||'';let s=true;if(q&&!title.includes(q)&&!summary.includes(q))s=false;if(r&&rv<parseInt(r))s=false;if(t&&!tags.includes(t))s=false;el.style.display=s?'':'none'})}
+</script></div></body></html>"""
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# CLI
+# ═══════════════════════════════════════════════════════════════════════
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+ sub = parser.add_subparsers(dest="command", required=True)
+
+ p_fetch = sub.add_parser("fetch", help="Fetch documents from Readwise Reader API")
+ p_fetch.add_argument("--locations", default="new,later", help="Comma-separated locations (default: new,later)")
+
+ p_analyze = sub.add_parser("analyze", help="Analyze documents with LLM")
+ p_analyze.add_argument("--model", "-m", choices=list(MODELS.keys()), default="opus", help="Model (default: opus)")
+ p_analyze.add_argument("--reset", action="store_true", help="Reset checkpoint")
+ p_analyze.add_argument("--batch-size", type=int, help="Override batch size")
+ p_analyze.add_argument("--profile", type=Path, default=PROFILE_FILE, help=f"Profile TOML (default: {PROFILE_FILE})")
+
+ p_report = sub.add_parser("report", help="Generate HTML triage report")
+ p_report.add_argument("--no-open", action="store_true", help="Don't open in browser")
+
+ args = parser.parse_args()
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+ if args.command == "fetch":
+ cmd_fetch(args)
+ elif args.command == "analyze":
+ cmd_analyze(args)
+ elif args.command == "report":
+ cmd_report(args)
+
+
+if __name__ == "__main__":
+ main()