Commit cf67628398ef
Changed files (2)
tools
readwise-reader
tools/readwise-reader/default.nix
@@ -14,6 +14,7 @@ python3.pkgs.buildPythonApplication {
propagatedBuildInputs = with python3.pkgs; [
requests
+ click
];
makeWrapperArgs = [
tools/readwise-reader/readwise-reader.py
@@ -1,23 +1,22 @@
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
-# dependencies = ["requests"]
+# dependencies = ["requests", "click"]
# ///
"""
readwise-reader — Fetch, analyze, and triage Readwise Reader documents.
-Subcommands:
- fetch Fetch all documents from Readwise Reader API (Inbox + Later)
- analyze Score documents for relevance using LLMs (Opus, Sonnet, Gemini)
+Commands:
+ fetch Fetch documents from Readwise Reader API
+ analyze Score documents for relevance using LLMs
report Generate an interactive HTML triage report
-
-Data stored in $XDG_DATA_HOME/readwise/ (default: ~/.local/share/readwise/).
-User profile loaded from $XDG_DATA_HOME/readwise/profile.toml.
+ archive Bulk-archive documents by relevance or age
+ delete Bulk-delete documents by relevance or age
"""
-import argparse
-import json
+import click
import html as html_mod
+import json
import os
import subprocess
import sys
@@ -33,77 +32,39 @@ ANALYZED_FILE = DATA_DIR / "reader-analyzed.json"
CHECKPOINT_FILE = DATA_DIR / "analysis-checkpoint.json"
REPORT_FILE = DATA_DIR / "triage-report.html"
+READER_API_BASE = "https://readwise.io/api/v3"
+READER_RATE_DELAY = 3.1 # 20 req/min for list endpoint
+
+MAX_RETRIES = 6
+
+MODELS = {
+ "opus": {"backend": "vertex-claude", "model_id": "claude-opus-4-6", "batch_size": 25, "rate_delay": 2, "max_output_tokens": 16384},
+ "sonnet": {"backend": "vertex-claude", "model_id": "claude-sonnet-4@20250514", "batch_size": 30, "rate_delay": 1, "max_output_tokens": 16384},
+ "gemini": {"backend": "gemini-api", "model_id": "gemini-3-pro-preview", "batch_size": 35, "rate_delay": 2, "max_output_tokens": 16384},
+ "gemini25": {"backend": "gemini-api", "model_id": "gemini-2.5-pro", "batch_size": 35, "rate_delay": 2, "max_output_tokens": 16384},
+}
+
+
# ═══════════════════════════════════════════════════════════════════════
-# FETCH
+# UTILITIES
# ═══════════════════════════════════════════════════════════════════════
-READER_API = "https://readwise.io/api/v3/list/"
-READER_RATE_DELAY = 3.1 # 20 req/min
+def ensure_data_dir():
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
def get_readwise_token():
result = subprocess.run(["passage", "show", "readwise/key"], capture_output=True, text=True)
if result.returncode != 0:
- print("Failed to get Readwise token from passage", file=sys.stderr)
- sys.exit(1)
+ raise click.ClickException("Failed to get Readwise token from passage (readwise/key)")
return result.stdout.strip()
-def fetch_documents(token: str, location: str) -> list[dict]:
- import requests
-
- docs = []
- cursor = None
- page = 1
- while True:
- params = {"location": location, "limit": 100}
- if cursor:
- params["pageCursor"] = cursor
- print(f" Fetching {location} page {page}...", file=sys.stderr)
- resp = requests.get(READER_API, params=params, headers={"Authorization": f"Token {token}"})
- resp.raise_for_status()
- data = resp.json()
- docs.extend(data.get("results", []))
- cursor = data.get("nextPageCursor")
- count = data.get("count", "?")
- print(f" Got {len(data.get('results', []))} docs (total: {count})", file=sys.stderr)
- if not cursor:
- break
- page += 1
- time.sleep(READER_RATE_DELAY)
- return docs
-
-
-def cmd_fetch(args):
- import requests # noqa: F811 — imported here for lazy loading
-
- DATA_DIR.mkdir(parents=True, exist_ok=True)
- token = get_readwise_token()
- all_docs = {}
-
- locations = args.locations.split(",")
- for loc in locations:
- print(f"\nFetching '{loc}' documents...", file=sys.stderr)
- docs = fetch_documents(token, loc)
- all_docs[loc] = docs
- print(f" Total {loc}: {len(docs)}", file=sys.stderr)
- if loc != locations[-1]:
- time.sleep(READER_RATE_DELAY)
-
- timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
- output = {
- "fetched_at": timestamp,
- "counts": {loc: len(docs) for loc, docs in all_docs.items()},
- "documents": all_docs,
- }
-
- outfile = DATA_DIR / f"reader-{'-'.join(locations)}-{timestamp}.json"
- with open(outfile, "w") as f:
- json.dump(output, f, indent=2, ensure_ascii=False)
-
- LATEST_LINK.unlink(missing_ok=True)
- LATEST_LINK.symlink_to(outfile.name)
- print(f"\n✅ Saved {sum(len(d) for d in all_docs.values())} documents to {outfile}", file=sys.stderr)
+def age_label(days):
+ if days < 7: return f"{days}d"
+ if days < 30: return f"{days // 7}w"
+ if days < 365: return f"{days // 30}mo"
+ return f"{days // 365}y{(days % 365) // 30}mo"
# ═══════════════════════════════════════════════════════════════════════
@@ -111,7 +72,7 @@ def cmd_fetch(args):
# ═══════════════════════════════════════════════════════════════════════
def load_profile(path: Path) -> dict:
- """Load profile from TOML file with a minimal parser (no dependency)."""
+ """Load profile from TOML with a minimal parser (no toml dependency)."""
if not path.exists():
return {}
@@ -125,7 +86,6 @@ def load_profile(path: Path) -> dict:
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
-
if stripped.startswith("["):
if current_key and current_list is not None:
profile[f"{current_section_for_key}.{current_key}"] = current_list
@@ -134,23 +94,19 @@ def load_profile(path: Path) -> dict:
current_section = stripped.strip("[]").strip()
current_section_for_key = current_section
continue
-
if "=" in stripped and not stripped.startswith('"'):
if current_key and current_list is not None:
profile[f"{current_section_for_key}.{current_key}"] = current_list
current_list = None
-
key, val = stripped.split("=", 1)
key = key.strip()
val = val.strip()
current_key = key
current_section_for_key = current_section
-
if val == "[":
current_list = []
elif val.startswith("[") and val.endswith("]"):
- items = val[1:-1]
- current_list = [s.strip().strip('"').strip("'") for s in items.split(",") if s.strip().strip('"').strip("'")]
+ current_list = [s.strip().strip('"').strip("'") for s in val[1:-1].split(",") if s.strip().strip('"').strip("'")]
profile[f"{current_section}.{current_key}"] = current_list
current_list = None
current_key = None
@@ -174,119 +130,69 @@ def load_profile(path: Path) -> dict:
if current_key and current_list is not None:
profile[f"{current_section_for_key}.{current_key}"] = current_list
-
return profile
def build_prompt_context(profile: dict) -> tuple[str, str]:
- def get_list(key):
- return profile.get(key, [])
-
- def fmt(items):
- return ", ".join(items) if items else "N/A"
+ def get(key): return profile.get(key, [])
+ def fmt(items): return ", ".join(items) if items else "N/A"
name = profile.get("user.name", "User")
role = profile.get("user.role", "Software Engineer")
+ desc = f"""{name} — {role}
- user_desc = f"""{name} — {role}
+Core languages: {fmt(get('interests.core'))}
+Infrastructure: {fmt(get('interests.infrastructure'))}
+Editor: {fmt(get('interests.editor'))}
+Kubernetes/Containers: {fmt(get('interests.kubernetes'))}
+CLI tools: {fmt(get('interests.tools'))}
+AI tooling: {fmt(get('interests.ai'))}
+Side interests: {fmt(get('interests.side'))}
+Values: {fmt(get('interests.values'))}
+Currently exploring: {fmt(get('interests.exploring'))}
+NOT interested in: {fmt(get('interests.not_interested'))}"""
-Core languages: {fmt(get_list('interests.core'))}
-Infrastructure: {fmt(get_list('interests.infrastructure'))}
-Editor: {fmt(get_list('interests.editor'))}
-Kubernetes/Containers: {fmt(get_list('interests.kubernetes'))}
-CLI tools: {fmt(get_list('interests.tools'))}
-AI tooling: {fmt(get_list('interests.ai'))}
-Side interests: {fmt(get_list('interests.side'))}
-Values: {fmt(get_list('interests.values'))}
-Currently exploring: {fmt(get_list('interests.exploring'))}
-NOT interested in: {fmt(get_list('interests.not_interested'))}"""
-
- fav_authors = get_list("favorites.authors")
- fav_sites = get_list("favorites.sites")
+ fav_authors = get("favorites.authors")
+ fav_sites = get("favorites.sites")
if fav_authors or fav_sites:
- user_desc += "\n\nFAVORITE AUTHORS/SITES (auto-boost to at least 4★, always worth_reading):\n"
- if fav_authors:
- user_desc += "Authors: " + ", ".join(fav_authors) + "\n"
- if fav_sites:
- user_desc += "Sites: " + ", ".join(fav_sites) + "\n"
- user_desc += "These are trusted voices — mark as interesting even on tangential topics."
+ desc += "\n\nFAVORITE AUTHORS/SITES (auto-boost to at least 4★, always worth_reading):\n"
+ if fav_authors: desc += "Authors: " + ", ".join(fav_authors) + "\n"
+ if fav_sites: desc += "Sites: " + ", ".join(fav_sites) + "\n"
+ desc += "These are trusted voices — mark as interesting even on tangential topics."
- return user_desc, profile.get("summary.style", "detailed")
+ return desc, profile.get("summary.style", "detailed")
# ═══════════════════════════════════════════════════════════════════════
-# ANALYZE
+# LLM BACKENDS
# ═══════════════════════════════════════════════════════════════════════
-MODELS = {
- "opus": {
- "backend": "vertex-claude",
- "model_id": "claude-opus-4-6",
- "batch_size": 25,
- "rate_delay": 2,
- "max_output_tokens": 16384,
- },
- "sonnet": {
- "backend": "vertex-claude",
- "model_id": "claude-sonnet-4@20250514",
- "batch_size": 30,
- "rate_delay": 1,
- "max_output_tokens": 16384,
- },
- "gemini": {
- "backend": "gemini-api",
- "model_id": "gemini-3-pro-preview",
- "batch_size": 35,
- "rate_delay": 2,
- "max_output_tokens": 16384,
- },
- "gemini25": {
- "backend": "gemini-api",
- "model_id": "gemini-2.5-pro",
- "batch_size": 35,
- "rate_delay": 2,
- "max_output_tokens": 16384,
- },
-}
-
-MAX_RETRIES = 6
_token_cache = {"token": None, "ts": 0}
-def get_vertex_token():
+def _get_vertex_token():
if time.time() - _token_cache["ts"] > 2400:
result = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True, timeout=10)
if result.returncode != 0:
- raise RuntimeError(f"gcloud auth failed: {result.stderr}")
+ raise click.ClickException(f"gcloud auth failed: {result.stderr}")
_token_cache["token"] = result.stdout.strip()
_token_cache["ts"] = time.time()
return _token_cache["token"]
-def call_vertex_claude(model_id: str, prompt: str, max_tokens: int) -> str:
+def _call_vertex_claude(model_id, prompt, max_tokens):
import requests
-
project = os.environ.get("GOOGLE_CLOUD_PROJECT", "itpc-gcp-pnd-pe-eng-claude")
location = os.environ.get("GOOGLE_CLOUD_LOCATION", "us-east5")
url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model_id}:rawPredict"
-
for attempt in range(MAX_RETRIES):
- token = get_vertex_token()
+ token = _get_vertex_token()
try:
- resp = __import__("requests").post(
- url,
- headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
- json={
- "anthropic_version": "vertex-2023-10-16",
- "messages": [{"role": "user", "content": prompt}],
- "max_tokens": max_tokens,
- "temperature": 0.2,
- },
- timeout=180,
- )
+ resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
+ json={"anthropic_version": "vertex-2023-10-16", "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens, "temperature": 0.2}, timeout=180)
if resp.status_code == 429:
wait = (2 ** attempt) * 5
- print(f" ⏳ Rate limited, waiting {wait}s...", file=sys.stderr)
+ click.echo(f" ⏳ Rate limited, waiting {wait}s...", err=True)
time.sleep(wait)
continue
if resp.status_code == 401:
@@ -297,43 +203,33 @@ def call_vertex_claude(model_id: str, prompt: str, max_tokens: int) -> str:
except Exception as e:
if attempt < MAX_RETRIES - 1:
wait = (2 ** attempt) * 3
- print(f" ⚠ {e}, retrying in {wait}s...", file=sys.stderr)
+ click.echo(f" ⚠ {e}, retrying in {wait}s...", err=True)
time.sleep(wait)
continue
raise
- raise RuntimeError(f"Failed after {MAX_RETRIES} retries")
+ raise click.ClickException(f"Failed after {MAX_RETRIES} retries")
-def get_gemini_key():
+def _get_gemini_key():
key = os.environ.get("GEMINI_API_KEY")
- if key:
- return key
+ if key: return key
result = subprocess.run(["passage", "show", "redhat/google/osp/vdeemest-api-key"], capture_output=True, text=True, timeout=10)
if result.returncode != 0:
- raise RuntimeError("No GEMINI_API_KEY and passage lookup failed")
+ raise click.ClickException("No GEMINI_API_KEY and passage lookup failed")
return result.stdout.strip()
-def call_gemini(model_id: str, prompt: str, max_tokens: int) -> str:
+def _call_gemini(model_id, prompt, max_tokens):
import requests
-
- api_key = get_gemini_key()
+ api_key = _get_gemini_key()
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model_id}:generateContent?key={api_key}"
-
for attempt in range(MAX_RETRIES):
try:
- resp = requests.post(
- url,
- headers={"Content-Type": "application/json"},
- json={
- "contents": [{"parts": [{"text": prompt}]}],
- "generationConfig": {"temperature": 0.2, "maxOutputTokens": max_tokens, "responseMimeType": "application/json"},
- },
- timeout=180,
- )
+ resp = requests.post(url, headers={"Content-Type": "application/json"},
+ json={"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"temperature": 0.2, "maxOutputTokens": max_tokens, "responseMimeType": "application/json"}}, timeout=180)
if resp.status_code == 429:
wait = (2 ** attempt) * 5
- print(f" ⏳ Rate limited, waiting {wait}s...", file=sys.stderr)
+ click.echo(f" ⏳ Rate limited, waiting {wait}s...", err=True)
time.sleep(wait)
continue
resp.raise_for_status()
@@ -341,117 +237,113 @@ def call_gemini(model_id: str, prompt: str, max_tokens: int) -> str:
except Exception as e:
if attempt < MAX_RETRIES - 1:
wait = (2 ** attempt) * 3
- print(f" ⚠ {e}, retrying in {wait}s...", file=sys.stderr)
+ click.echo(f" ⚠ {e}, retrying in {wait}s...", err=True)
time.sleep(wait)
continue
raise
- raise RuntimeError(f"Failed after {MAX_RETRIES} retries")
+ raise click.ClickException(f"Failed after {MAX_RETRIES} retries")
-def call_llm(backend: str, model_id: str, prompt: str, max_tokens: int) -> str:
- if backend == "vertex-claude":
- return call_vertex_claude(model_id, prompt, max_tokens)
- elif backend == "gemini-api":
- return call_gemini(model_id, prompt, max_tokens)
+def call_llm(backend, model_id, prompt, max_tokens):
+ if backend == "vertex-claude": return _call_vertex_claude(model_id, prompt, max_tokens)
+ if backend == "gemini-api": return _call_gemini(model_id, prompt, max_tokens)
raise ValueError(f"Unknown backend: {backend}")
-def parse_json_response(text: str) -> dict:
+def parse_json_response(text):
+ import re
text = text.strip()
if text.startswith("```"):
text = text.split("\n", 1)[1]
- if text.endswith("```"):
- text = text[:-3]
+ if text.endswith("```"): text = text[:-3]
text = text.strip()
try:
return json.loads(text)
except json.JSONDecodeError:
- import re
match = re.search(r'\{.*\}', text, re.DOTALL)
- if match:
- return json.loads(match.group())
+ if match: return json.loads(match.group())
raise
-def build_analysis_prompt(profile_text: str, summary_style: str, docs_batch: list[dict]) -> str:
- doc_entries = []
- for i, d in enumerate(docs_batch):
- title = d.get("title", "Untitled")
- summary = (d.get("summary") or "")[:600]
- source = d.get("site_name") or d.get("source") or ""
- source_url = d.get("source_url", "")
- category = d.get("category", "")
- word_count = d.get("word_count", 0)
- author = d.get("author", "")
- if summary.strip().lower() in ("comments", ""):
- summary = "N/A — infer content from title, author, source URL"
- doc_entries.append(
- f'[{i}] "{title}"\n'
- f' Author: {author} | Source: {source} | Category: {category} | {word_count} words\n'
- f' URL: {source_url}\n'
- f' Summary: {summary}'
- )
+# ═══════════════════════════════════════════════════════════════════════
+# CLI
+# ═══════════════════════════════════════════════════════════════════════
- if summary_style == "detailed":
- s_inst = ("**summary**: 4-6 sentences. Describe what this article is actually about in depth: "
- "what problem does it address, what's the core argument or technique, what makes it interesting or unique? "
- "Include specific details (tools mentioned, approaches described, conclusions drawn). "
- "Don't just restate the title. If summary is N/A, use title/author/URL and your knowledge to infer the likely content.")
- r_inst = ("**reason**: 2-3 sentences. Explain specifically why this is or isn't relevant. "
- "Reference concrete user interests that match (e.g. 'Uses NixOS daily and this covers flake patterns') "
- "or don't match (e.g. 'React frontend content, outside interest area').")
- else:
- s_inst = "**summary**: 1-2 sentences about the actual content."
- r_inst = "**reason**: 5-15 words explaining the score."
-
- return f"""Analyze these {len(docs_batch)} articles from a read-it-later app. Score each for relevance to this specific user.
-
-<user_profile>
-{profile_text}
-</user_profile>
-
-<documents>
-{chr(10).join(doc_entries)}
-</documents>
-
-For each document return:
-1. {s_inst}
-2. **relevance**: 1-5 score:
- - 5 = Must read — directly about daily tools/work or from a favorite author/site
- - 4 = Highly relevant — strongly aligned with interests, or from a favorite source on any topic
- - 3 = Interesting — decent tech content, tangentially related
- - 2 = Low relevance — not aligned with interests
- - 1 = Skip — completely irrelevant or too superficial
-3. {r_inst}
-4. **action**: must_read / worth_reading / skim / archive / delete
-5. **tags**: 1-3 from: nix, go, rust, python, emacs, kubernetes, containers, ci-cd, git, security, homelab, networking, ai-llm, coding-agents, linux, open-source, privacy, productivity, pkm, career, culture, french, hardware, web, devtools, monitoring, tekton
-
-IMPORTANT:
-- Favorite authors/sites get minimum 4★ and "worth_reading", even on tangential topics.
-- French content from favorite French sources is scored on merit, not penalized for being French.
-- Be discriminating: generic listicles = 1-2★. Deep technical posts on relevant topics = 4-5★.
-
-Return ONLY valid JSON:
-{{"analyses":[{{"id":0,"summary":"...","relevance":4,"reason":"...","action":"worth_reading","tags":["nix","homelab"]}},...]}}\
-"""
+@click.group()
+def cli():
+ """Fetch, analyze, and triage Readwise Reader documents."""
+ ensure_data_dir()
-def cmd_analyze(args):
- model_cfg = MODELS[args.model].copy()
- if args.batch_size:
- model_cfg["batch_size"] = args.batch_size
+# ─── FETCH ──────────────────────────────────────────────────────────────
- if args.reset and CHECKPOINT_FILE.exists():
+@cli.command()
+@click.option("--locations", default="new,later", help="Comma-separated locations to fetch (default: new,later)")
+def fetch(locations):
+ """Fetch documents from Readwise Reader API."""
+ import requests
+
+ token = get_readwise_token()
+ all_docs = {}
+ locs = [l.strip() for l in locations.split(",")]
+
+ for loc in locs:
+ click.echo(f"\nFetching '{loc}'...", err=True)
+ docs = []
+ cursor = None
+ page = 1
+ while True:
+ params = {"location": loc, "limit": 100}
+ if cursor: params["pageCursor"] = cursor
+ click.echo(f" page {page}...", err=True)
+ resp = requests.get(f"{READER_API_BASE}/list/", params=params, headers={"Authorization": f"Token {token}"})
+ resp.raise_for_status()
+ data = resp.json()
+ docs.extend(data.get("results", []))
+ cursor = data.get("nextPageCursor")
+ click.echo(f" {len(data.get('results', []))} docs (total: {data.get('count', '?')})", err=True)
+ if not cursor: break
+ page += 1
+ time.sleep(READER_RATE_DELAY)
+ all_docs[loc] = docs
+ click.echo(f" Total {loc}: {len(docs)}", err=True)
+ if loc != locs[-1]: time.sleep(READER_RATE_DELAY)
+
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+ output = {"fetched_at": timestamp, "counts": {l: len(d) for l, d in all_docs.items()}, "documents": all_docs}
+ outfile = DATA_DIR / f"reader-{'-'.join(locs)}-{timestamp}.json"
+ outfile.write_text(json.dumps(output, indent=2, ensure_ascii=False))
+ LATEST_LINK.unlink(missing_ok=True)
+ LATEST_LINK.symlink_to(outfile.name)
+ total = sum(len(d) for d in all_docs.values())
+ click.echo(f"\n✅ Saved {total} documents to {outfile}", err=True)
+
+
+# ─── ANALYZE ────────────────────────────────────────────────────────────
+
+@cli.command()
+@click.option("--model", "-m", type=click.Choice(list(MODELS.keys())), default="opus", help="LLM model")
+@click.option("--reset", is_flag=True, help="Reset checkpoint and re-analyze everything")
+@click.option("--batch-size", type=int, help="Override batch size")
+@click.option("--profile", "profile_path", type=click.Path(exists=False), default=str(PROFILE_FILE), help="Profile TOML path")
+def analyze(model, reset, batch_size, profile_path):
+ """Analyze documents with LLM for relevance scoring."""
+ cfg = MODELS[model].copy()
+ if batch_size: cfg["batch_size"] = batch_size
+
+ if reset and CHECKPOINT_FILE.exists():
CHECKPOINT_FILE.unlink()
- print("🔄 Checkpoint reset.", file=sys.stderr)
+ click.echo("🔄 Checkpoint reset.", err=True)
- profile = load_profile(args.profile)
+ profile = load_profile(Path(profile_path))
if profile:
- print(f"👤 Profile: {args.profile}", file=sys.stderr)
fav_count = len(profile.get("favorites.authors", [])) + len(profile.get("favorites.sites", []))
- print(f" {fav_count} favorite authors/sites", file=sys.stderr)
+ click.echo(f"👤 Profile: {profile_path} ({fav_count} favorites)", err=True)
profile_text, summary_style = build_prompt_context(profile)
- print(f"📝 Summary style: {summary_style}", file=sys.stderr)
+ click.echo(f"📝 Summary: {summary_style}", err=True)
+
+ if not LATEST_LINK.exists():
+ raise click.ClickException(f"No data found. Run 'readwise-reader fetch' first.")
with open(LATEST_LINK) as f:
data = json.load(f)
@@ -462,38 +354,36 @@ def cmd_analyze(args):
d["_location"] = loc
all_docs.append(d)
- print(f"📚 Total: {len(all_docs)} docs", file=sys.stderr)
- print(f"🤖 Model: {model_cfg['model_id']} ({model_cfg['backend']})", file=sys.stderr)
- print(f"📦 Batch: {model_cfg['batch_size']}, max output: {model_cfg['max_output_tokens']} tokens", file=sys.stderr)
-
+ # Load checkpoint — keyed by document ID, so survives re-fetches
analyzed = {}
if CHECKPOINT_FILE.exists():
with open(CHECKPOINT_FILE) as f:
analyzed = json.load(f)
- print(f"💾 Checkpoint: {len(analyzed)} done", file=sys.stderr)
to_analyze = [d for d in all_docs if d["id"] not in analyzed]
- print(f"🔍 Remaining: {len(to_analyze)}", file=sys.stderr)
- if to_analyze:
- bs = model_cfg["batch_size"]
+ click.echo(f"📚 Total: {len(all_docs)} | ✅ Already analyzed: {len(analyzed)} | 🔍 Remaining: {len(to_analyze)}", err=True)
+ click.echo(f"🤖 {cfg['model_id']} ({cfg['backend']}) | batch={cfg['batch_size']} | max_out={cfg['max_output_tokens']}", err=True)
+
+ if not to_analyze:
+ click.echo("Nothing new to analyze.", err=True)
+ else:
+ bs = cfg["batch_size"]
total_batches = (len(to_analyze) + bs - 1) // bs
for bn in range(total_batches):
batch = to_analyze[bn * bs: (bn + 1) * bs]
pct = len(analyzed) / len(all_docs) * 100
- print(f"\n[{bn+1}/{total_batches}] {len(batch)} docs ({pct:.0f}% done)...", file=sys.stderr)
+ click.echo(f"\n[{bn+1}/{total_batches}] {len(batch)} docs ({pct:.0f}%)...", err=True)
try:
- prompt = build_analysis_prompt(profile_text, summary_style, batch)
- raw = call_llm(model_cfg["backend"], model_cfg["model_id"], prompt, model_cfg["max_output_tokens"])
+ prompt = _build_analysis_prompt(profile_text, summary_style, batch)
+ raw = call_llm(cfg["backend"], cfg["model_id"], prompt, cfg["max_output_tokens"])
parsed = parse_json_response(raw)
-
matched = 0
for a in parsed.get("analyses", []):
idx = a.get("id")
- if idx is None:
- continue
+ if idx is None: continue
try:
idx = int(idx)
if 0 <= idx < len(batch):
@@ -505,20 +395,16 @@ def cmd_analyze(args):
"tags": a.get("tags", []),
}
matched += 1
- except (ValueError, IndexError):
- pass
- print(f" ✓ {matched}/{len(batch)}", file=sys.stderr)
+ except (ValueError, IndexError): pass
+ click.echo(f" ✓ {matched}/{len(batch)}", err=True)
except Exception as e:
- print(f" ✗ {e}", file=sys.stderr)
+ click.echo(f" ✗ {e}", err=True)
- with open(CHECKPOINT_FILE, "w") as f:
- json.dump(analyzed, f)
+ CHECKPOINT_FILE.write_text(json.dumps(analyzed))
+ if bn < total_batches - 1: time.sleep(cfg["rate_delay"])
- if bn < total_batches - 1:
- time.sleep(model_cfg["rate_delay"])
-
- # Build output
- print(f"\n📊 Analyzed: {len(analyzed)}/{len(all_docs)}", file=sys.stderr)
+ # Build enriched output
+ click.echo(f"\n📊 Analyzed: {len(analyzed)}/{len(all_docs)}", err=True)
enriched = {}
for loc in data["documents"]:
enriched[loc] = []
@@ -526,33 +412,185 @@ def cmd_analyze(args):
d["_analysis"] = analyzed.get(d["id"], {"summary": d.get("summary", ""), "relevance": 3, "reason": "Not analyzed", "action": "skim", "tags": []})
enriched[loc].append(d)
- output = {
- "fetched_at": data.get("fetched_at", ""),
- "analyzed_at": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
- "model": model_cfg["model_id"],
- "counts": {loc: len(docs) for loc, docs in enriched.items()},
- "documents": enriched,
- }
- with open(ANALYZED_FILE, "w") as f:
- json.dump(output, f, indent=2, ensure_ascii=False)
- print(f"✅ Output: {ANALYZED_FILE}", file=sys.stderr)
+ output = {"fetched_at": data.get("fetched_at", ""), "analyzed_at": datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ"),
+ "model": cfg["model_id"], "counts": {l: len(d) for l, d in enriched.items()}, "documents": enriched}
+ ANALYZED_FILE.write_text(json.dumps(output, indent=2, ensure_ascii=False))
+ click.echo(f"✅ {ANALYZED_FILE}", err=True)
# Stats
rels = Counter(analyzed[k]["relevance"] for k in analyzed if "relevance" in analyzed[k])
if rels:
- print(f"\nRelevance:", file=sys.stderr)
- for s in sorted(rels, reverse=True):
- print(f" {s}★: {rels[s]:4d} ({rels[s]*100//len(analyzed)}%)", file=sys.stderr)
+ click.echo(f"\nRelevance: " + " | ".join(f"{s}★={rels.get(s,0)}" for s in [5,4,3,2,1]), err=True)
acts = Counter(analyzed[k]["action"] for k in analyzed if "action" in analyzed[k])
if acts:
- print(f"Actions:", file=sys.stderr)
- for a, c in acts.most_common():
- print(f" {a:15s}: {c:4d}", file=sys.stderr)
+ click.echo("Actions: " + " | ".join(f"{a}={c}" for a,c in acts.most_common()), err=True)
-# ═══════════════════════════════════════════════════════════════════════
-# REPORT
-# ═══════════════════════════════════════════════════════════════════════
+def _build_analysis_prompt(profile_text, summary_style, docs_batch):
+ entries = []
+ for i, d in enumerate(docs_batch):
+ summary = (d.get("summary") or "")[:600]
+ if summary.strip().lower() in ("comments", ""): summary = "N/A — infer from title/author/URL"
+ entries.append(f'[{i}] "{d.get("title", "Untitled")}"\n'
+ f' Author: {d.get("author", "")} | Source: {d.get("site_name") or d.get("source", "")} | '
+ f'{d.get("category", "")} | {d.get("word_count", 0)}w\n'
+ f' URL: {d.get("source_url", "")}\n Summary: {summary}')
+
+ if summary_style == "detailed":
+ s_inst = ("**summary**: 4-6 sentences. What is this actually about? Key argument, technique, insight? "
+ "Include specific details. If summary is N/A, infer from title/author/URL.")
+ r_inst = ("**reason**: 2-3 sentences. Reference specific user interests that match or don't.")
+ else:
+ s_inst = "**summary**: 1-2 sentences about the actual content."
+ r_inst = "**reason**: 5-15 words."
+
+ return f"""Analyze these {len(docs_batch)} articles. Score each for relevance to this user.
+
+<user_profile>
+{profile_text}
+</user_profile>
+
+<documents>
+{chr(10).join(entries)}
+</documents>
+
+For each document return:
+1. {s_inst}
+2. **relevance**: 1-5 (5=must-read for this user, 1=skip)
+3. {r_inst}
+4. **action**: must_read / worth_reading / skim / archive / delete
+5. **tags**: 1-3 from: nix, go, rust, python, emacs, kubernetes, containers, ci-cd, git, security, homelab, networking, ai-llm, coding-agents, linux, open-source, privacy, productivity, pkm, career, culture, french, hardware, web, devtools, monitoring, tekton
+
+IMPORTANT:
+- Favorite authors/sites → minimum 4★ and worth_reading.
+- Be discriminating: generic listicles = 1-2★, deep technical = 4-5★.
+
+Return ONLY valid JSON:
+{{"analyses":[{{"id":0,"summary":"...","relevance":4,"reason":"...","action":"worth_reading","tags":["nix"]}},...]}}\
+"""
+
+
+# ─── ARCHIVE / DELETE ──────────────────────────────────────────────────
+
+@cli.command()
+@click.option("--max-relevance", type=int, default=2, help="Archive docs with relevance ≤ this (default: 2)")
+@click.option("--min-age", type=int, default=365, help="Only docs older than N days (default: 365)")
+@click.option("--unread-only", is_flag=True, default=True, help="Only unread docs (default: true)")
+@click.option("--dry-run", is_flag=True, help="Show what would be archived without doing it")
+@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
+def archive(max_relevance, min_age, unread_only, dry_run, yes):
+ """Bulk-archive documents on Readwise Reader."""
+ _bulk_action("archive", max_relevance, min_age, unread_only, dry_run, yes)
+
+
+@cli.command()
+@click.option("--max-relevance", type=int, default=1, help="Delete docs with relevance ≤ this (default: 1)")
+@click.option("--min-age", type=int, default=365, help="Only docs older than N days (default: 365)")
+@click.option("--unread-only", is_flag=True, default=True, help="Only unread docs (default: true)")
+@click.option("--dry-run", is_flag=True, help="Show what would be deleted without doing it")
+@click.option("--yes", "-y", is_flag=True, help="Skip confirmation prompt")
+def delete(max_relevance, min_age, unread_only, dry_run, yes):
+ """Bulk-delete documents on Readwise Reader."""
+ _bulk_action("delete", max_relevance, min_age, unread_only, dry_run, yes)
+
+
+def _bulk_action(action, max_relevance, min_age, unread_only, dry_run, yes):
+ import requests
+
+ if not ANALYZED_FILE.exists():
+ raise click.ClickException("No analyzed data. Run 'readwise-reader analyze' first.")
+
+ with open(ANALYZED_FILE) as f:
+ data = json.load(f)
+
+ now = datetime.now(timezone.utc)
+ candidates = []
+ for loc in data["documents"]:
+ for d in data["documents"][loc]:
+ saved = d.get("saved_at") or d.get("created_at")
+ age = (now - datetime.fromisoformat(saved)).days if saved else 0
+ progress = d.get("reading_progress", 0)
+ rel = d.get("_analysis", {}).get("relevance", 3)
+
+ if rel > max_relevance: continue
+ if age < min_age: continue
+ if unread_only and progress > 0: continue
+ candidates.append(d)
+
+ if not candidates:
+ click.echo(f"No documents match criteria (relevance ≤ {max_relevance}, age ≥ {min_age}d, unread={unread_only})")
+ return
+
+ # Show summary
+ rels = Counter(d.get("_analysis", {}).get("relevance", 3) for d in candidates)
+ click.echo(f"\n{'DRY RUN — ' if dry_run else ''}{action.upper()}: {len(candidates)} documents")
+ click.echo(f" Criteria: relevance ≤ {max_relevance}★, age ≥ {min_age}d, unread_only={unread_only}")
+ click.echo(f" Relevance breakdown: " + ", ".join(f"{s}★={rels.get(s,0)}" for s in sorted(rels.keys())))
+
+ # Show sample
+ click.echo(f"\n Sample (first 10):")
+ for d in candidates[:10]:
+ a = d.get("_analysis", {})
+ title = (d.get("title") or "Untitled")[:60]
+ click.echo(f" {a.get('relevance',3)}★ | {title}")
+ if len(candidates) > 10:
+ click.echo(f" ... and {len(candidates) - 10} more")
+
+ if dry_run:
+ click.echo(f"\n🔍 Dry run — no changes made.")
+ return
+
+ if not yes:
+ click.confirm(f"\n{action.capitalize()} {len(candidates)} documents?", abort=True)
+
+ token = get_readwise_token()
+
+ if action == "archive":
+ # Use bulk_update endpoint (50 per request)
+ batch_size = 50
+ total = len(candidates)
+ done = 0
+ for i in range(0, total, batch_size):
+ batch = candidates[i:i + batch_size]
+ updates = [{"id": d["id"], "location": "archive"} for d in batch]
+ resp = requests.patch(
+ f"{READER_API_BASE}/bulk_update/",
+ headers={"Authorization": f"Token {token}", "Content-Type": "application/json"},
+ json={"updates": updates},
+ )
+ if resp.status_code in (200, 207):
+ results = resp.json().get("results", [])
+ ok = sum(1 for r in results if r.get("success"))
+ done += ok
+ if ok < len(batch):
+ fails = [r for r in results if not r.get("success")]
+ click.echo(f" ⚠ {len(fails)} failed in batch", err=True)
+ else:
+ click.echo(f" ✗ Batch failed: {resp.status_code} {resp.text[:200]}", err=True)
+ click.echo(f" Archived {done}/{total}...", err=True)
+ if i + batch_size < total: time.sleep(3.1)
+ click.echo(f"\n✅ Archived {done} documents")
+
+ elif action == "delete":
+ # Delete is per-document (no bulk endpoint)
+ done = 0
+ total = len(candidates)
+ for i, d in enumerate(candidates):
+ resp = requests.delete(
+ f"{READER_API_BASE}/delete/{d['id']}/",
+ headers={"Authorization": f"Token {token}"},
+ )
+ if resp.status_code == 204:
+ done += 1
+ else:
+ click.echo(f" ✗ Failed to delete {d['id']}: {resp.status_code}", err=True)
+ if (i + 1) % 20 == 0:
+ click.echo(f" Deleted {done}/{total}...", err=True)
+ time.sleep(3.1) # rate limit: 20/min
+ click.echo(f"\n✅ Deleted {done} documents")
+
+
+# ─── REPORT ─────────────────────────────────────────────────────────────
TAG_DISPLAY = {
"nix": ("❄️", "NixOS & Nix", "#7eb8da"), "emacs": ("📝", "Emacs & Org-mode", "#7f5ab6"),
@@ -572,49 +610,48 @@ TAG_DISPLAY = {
}
ACTION_GROUPS = [
- ("must_read", "⭐ Must Read", "AI rates these highly relevant to you.", "#f59e0b"),
- ("finish_reading", "🏃 Finish Reading — >50% Done", "You started these. Finish them.", "#2196f3"),
+ ("must_read", "⭐ Must Read", "AI rates these highly relevant.", "#f59e0b"),
+ ("finish_reading", "🏃 Finish — >50% Done", "You started these. Finish them.", "#2196f3"),
("archive_finished", "✅ Archive — Finished", "100% read. Archive.", "#4caf50"),
("keep_triage", "📚 Active Queue", "Review and decide.", "#9c27b0"),
- ("archive_old_unread", "📦 Old & Unread (still relevant)", "1yr+ unread but AI says 3★+.", "#ff9800"),
- ("archive_low_relevance", "🗑️ Old, Unread & Low Relevance", "1yr+ unread, ≤2★. Safe to purge.", "#f44336"),
- ("archive_old_barely", "🤔 Old & Barely Started", "1yr+ old, <10% read.", "#795548"),
+ ("archive_old_unread", "📦 Old & Unread (relevant)", "1yr+ unread but 3★+.", "#ff9800"),
+ ("archive_low_relevance", "🗑️ Old & Low Relevance", "1yr+ unread, ≤2★. Purge.", "#f44336"),
+ ("archive_old_barely", "🤔 Old & Barely Started", "1yr+, <10% read.", "#795548"),
]
-def categorize(d):
+def _categorize(d):
p = d.get("reading_progress", 0)
- age = d["_age"]
+ age = d.get("_age", 0)
rel = d.get("_analysis", {}).get("relevance", 3)
+ act = d.get("_analysis", {}).get("action", "")
if p >= 1.0: return "archive_finished"
if age > 365 and p == 0 and rel <= 2: return "archive_low_relevance"
if age > 365 and p == 0: return "archive_old_unread"
if age > 365 and p < 0.1: return "archive_old_barely"
if p > 0.5: return "finish_reading"
- if d.get("_analysis", {}).get("action") in ("must_read", "worth_reading") and rel >= 4: return "must_read"
+ if act in ("must_read", "worth_reading") and rel >= 4: return "must_read"
return "keep_triage"
-def primary_tag(d):
+def _primary_tag(d):
tags = d.get("_analysis", {}).get("tags", [])
- priority = ["nix","emacs","go","rust","python","tekton","kubernetes","containers","ci-cd","homelab","git",
- "coding-agents","ai-llm","security","linux","networking","monitoring","devtools","open-source",
- "privacy","productivity","pkm","career","culture","french","hardware","web"]
- for t in priority:
+ for t in ["nix","emacs","go","rust","python","tekton","kubernetes","containers","ci-cd","homelab","git",
+ "coding-agents","ai-llm","security","linux","networking","monitoring","devtools","open-source",
+ "privacy","productivity","pkm","career","culture","french","hardware","web"]:
if t in tags: return t
return tags[0] if tags else "other"
-def age_label(days):
- if days < 7: return f"{days}d"
- if days < 30: return f"{days//7}w"
- if days < 365: return f"{days//30}mo"
- return f"{days//365}y{(days%365)//30}mo"
-
-
-def cmd_report(args):
+@cli.command()
+@click.option("--no-open", is_flag=True, help="Don't auto-open in browser")
+def report(no_open):
+ """Generate interactive HTML triage report."""
src = ANALYZED_FILE if ANALYZED_FILE.exists() else LATEST_LINK
- print(f"Using: {src}", file=sys.stderr)
+ if not src.exists():
+ raise click.ClickException("No data. Run 'readwise-reader fetch' first.")
+ click.echo(f"Using: {src}", err=True)
+
with open(src) as f:
data = json.load(f)
@@ -625,63 +662,58 @@ def cmd_report(args):
d["_location"] = loc
saved = d.get("saved_at") or d.get("created_at")
d["_age"] = (now - datetime.fromisoformat(saved)).days if saved else 0
- d["_primary_tag"] = primary_tag(d)
+ d["_primary_tag"] = _primary_tag(d)
all_docs.append(d)
groups = defaultdict(list)
for d in all_docs:
- groups[categorize(d)].append(d)
+ groups[_categorize(d)].append(d)
has_analysis = any(d.get("_analysis", {}).get("reason", "") not in ("Not analyzed", "Analysis failed", "") for d in all_docs)
total = len(all_docs)
- # ── Build HTML (inlined for single-file tool) ───────────────────────
html_parts = [_report_head(total, now, has_analysis, groups, all_docs)]
for key, title, desc, color in ACTION_GROUPS:
docs = groups.get(key, [])
if not docs: continue
- docs.sort(key=lambda d: (-d.get("_analysis",{}).get("relevance",3), d["_age"]))
+ docs.sort(key=lambda d: (-d.get("_analysis", {}).get("relevance", 3), d.get("_age", 0)))
tag_groups = defaultdict(list)
for d in docs: tag_groups[d["_primary_tag"]].append(d)
- html_parts.append(f'<div class="group" id="{key}">')
- html_parts.append(f'<div class="group-hdr" style="background:{color}15;border-left:4px solid {color}" onclick="toggle(this)">')
- html_parts.append(f'<h2><span class="arrow">▼</span> {title}</h2><span class="badge">{len(docs)}</span></div>')
- html_parts.append(f'<div class="group-desc">{desc}</div><div class="group-body">')
+ html_parts.append(f'<div class="group" id="{key}">'
+ f'<div class="group-hdr" style="background:{color}15;border-left:4px solid {color}" onclick="toggle(this)">'
+ f'<h2><span class="arrow">▼</span> {title}</h2><span class="badge">{len(docs)}</span></div>'
+ f'<div class="group-desc">{desc}</div><div class="group-body">')
for tag, tdocs in sorted(tag_groups.items(), key=lambda x: -len(x[1])):
- icon, label, tc = TAG_DISPLAY.get(tag, ("📄", tag, "#94a3b8"))
- tdocs.sort(key=lambda d: (-d.get("_analysis",{}).get("relevance",3), -d.get("reading_progress",0)))
- html_parts.append(f'<div class="cluster"><div class="cluster-hdr" onclick="toggle(this)">')
- html_parts.append(f'<h3><span class="arrow">▼</span> {icon} {label}</h3><span class="cnt">{len(tdocs)}</span></div>')
+ icon, label, _ = TAG_DISPLAY.get(tag, ("📄", tag, "#94a3b8"))
+ tdocs.sort(key=lambda d: (-d.get("_analysis", {}).get("relevance", 3), -d.get("reading_progress", 0)))
- # Cluster insights
sources = Counter((d.get("site_name") or d.get("source") or "?") for d in tdocs)
- top_src = [(s,n) for s,n in sources.most_common(5) if n >= 2]
- atags = Counter()
- for d in tdocs:
- for t in d.get("_analysis",{}).get("tags",[]): atags[t] += 1
- wcs = [d.get("word_count",0) for d in tdocs if d.get("word_count")]
- rh = sum(wcs)/15000 if wcs else 0
- avg_rel = sum(d.get("_analysis",{}).get("relevance",3) for d in tdocs)/len(tdocs)
+ top_src = [(s, n) for s, n in sources.most_common(5) if n >= 2]
+ wcs = [d.get("word_count", 0) for d in tdocs if d.get("word_count")]
+ rh = sum(wcs) / 15000 if wcs else 0
+ avg_rel = sum(d.get("_analysis", {}).get("relevance", 3) for d in tdocs) / len(tdocs)
insights = []
- if top_src: insights.append("Sources: " + ", ".join(f"{s} ({n})" for s,n in top_src))
+ if top_src: insights.append("Sources: " + ", ".join(f"{s} ({n})" for s, n in top_src))
insights.append(f"Stats: {len(tdocs)} docs, ~{rh:.1f}h reading, avg {avg_rel:.1f}★")
- html_parts.append('<div class="cluster-insights">' + "<br>".join(html_mod.escape(i) for i in insights) + '</div>')
- html_parts.append('<ul class="doc-list">')
+ html_parts.append(f'<div class="cluster"><div class="cluster-hdr" onclick="toggle(this)">'
+ f'<h3><span class="arrow">▼</span> {icon} {label}</h3><span class="cnt">{len(tdocs)}</span></div>'
+ f'<div class="cluster-insights">{html_mod.escape(" | ".join(insights))}</div>'
+ f'<ul class="doc-list">')
for d in tdocs:
html_parts.append(_doc_html(d))
html_parts.append('</ul></div>')
html_parts.append('</div></div>')
html_parts.append(_report_foot())
-
REPORT_FILE.write_text("\n".join(html_parts))
- print(f"✅ Report: {REPORT_FILE}", file=sys.stderr)
- if not args.no_open:
+ click.echo(f"✅ {REPORT_FILE}", err=True)
+
+ if not no_open:
subprocess.Popen(["xdg-open", str(REPORT_FILE)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@@ -697,30 +729,40 @@ def _doc_html(d):
source_url = d.get("source_url") or ""
tags = a.get("tags", [])
loc = d["_location"]
- age = d["_age"]
+ age = d.get("_age", 0)
wc = d.get("word_count") or 0
- rc = {5:"#22c55e",4:"#84cc16",3:"#eab308",2:"#f97316",1:"#ef4444"}.get(rel,"#94a3b8")
- pc = "#4ade80" if p>=1 else "#60a5fa" if p>0.5 else "#f59e0b" if p>0 else "#475569"
- lc = "loc-new" if loc=="new" else "loc-later"
- th = "".join(f'<span class="tag">{t}</span>' for t in tags[:3])
- rt = f"{wc//250}min" if wc else ""
- sl = html_mod.escape(source_url)
- # Build short summary (first sentence) and full
- first_sentence = summary.split('. ')[0] + '.' if '. ' in summary else summary
- if len(first_sentence) > len(summary) - 5:
- first_sentence = summary # Don't truncate if it's basically the whole thing
+ doc_id = d.get("id", "")
- return (f'<li class="doc" data-title="{title.lower()}" data-rel="{rel}" data-tags="{",".join(tags)}">'
- f'<div class="doc-rel" style="color:{rc}" title="{reason}">{rel}★</div>'
- f'<div class="doc-prog"><div style="color:{pc}">{p:.0%}</div>'
- f'<div class="bar"><div class="fill" style="width:{p*100:.0f}%;background:{pc}"></div></div></div>'
- f'<div class="doc-info"><div class="doc-title"><a href="{url}" target="_blank">{title}</a></div>'
- f'<div class="doc-summary">'
- f'<div class="short" onclick="this.nextElementSibling.classList.toggle(\'open\')">{first_sentence} {"▸" if first_sentence != summary else ""}</div>'
- f'<div class="full">{summary}{f"<div class=reason>{reason}</div>" if reason else ""}</div>'
- f'</div>'
- f'<div class="doc-meta"><span class="loc {lc}">{loc}</span> {src} · {rt} · {age_label(age)} ago {th}</div></div>'
- f'<div class="doc-right">{f"<a href={chr(34)}{sl}{chr(34)} target={chr(34)}_blank{chr(34)}>↗</a>" if source_url else ""}</div></li>')
+ rc = {5: "#22c55e", 4: "#84cc16", 3: "#eab308", 2: "#f97316", 1: "#ef4444"}.get(rel, "#94a3b8")
+ pc = "#4ade80" if p >= 1 else "#60a5fa" if p > 0.5 else "#f59e0b" if p > 0 else "#475569"
+ lc = "loc-new" if loc == "new" else "loc-later"
+ th = "".join(f'<span class="tag">{t}</span>' for t in tags[:3])
+ rt = f"{wc // 250}min" if wc else ""
+ sl = html_mod.escape(source_url)
+
+ first = summary.split('. ')[0] + '.' if '. ' in summary else summary
+ if len(first) > len(summary) - 5: first = summary
+ has_more = first != summary
+
+ return (
+ f'<li class="doc" data-title="{title.lower()}" data-rel="{rel}" data-tags="{",".join(tags)}" data-id="{doc_id}">'
+ f'<div class="doc-rel" style="color:{rc}" title="{reason}">{rel}★</div>'
+ f'<div class="doc-prog"><div style="color:{pc}">{p:.0%}</div>'
+ f'<div class="bar"><div class="fill" style="width:{p*100:.0f}%;background:{pc}"></div></div></div>'
+ f'<div class="doc-info"><div class="doc-title"><a href="{url}" target="_blank">{title}</a></div>'
+ f'<div class="doc-summary">'
+ f'<div class="short" onclick="this.nextElementSibling.classList.toggle(\'open\')">{first}{"▸" if has_more else ""}</div>'
+ f'<div class="full">{summary}<div class="reason">{reason}</div></div>'
+ f'</div>'
+ f'<div class="doc-meta"><span class="loc {lc}">{loc}</span> {src} · {rt} · {age_label(age)} ago {th}</div></div>'
+ f'<div class="doc-right">'
+ f'<div class="doc-actions">'
+ f'<button class="act-btn act-archive" onclick="actOnDoc(\'{doc_id}\',\'archive\',this)" title="Archive">📦</button>'
+ f'<button class="act-btn act-delete" onclick="actOnDoc(\'{doc_id}\',\'delete\',this)" title="Delete">🗑️</button>'
+ f'</div>'
+ f'{"<a href=" + chr(34) + sl + chr(34) + " target=" + chr(34) + "_blank" + chr(34) + ">↗</a>" if source_url else ""}'
+ f'</div></li>'
+ )
def _report_head(total, now, has_analysis, groups, all_docs):
@@ -732,17 +774,17 @@ def _report_head(total, now, has_analysis, groups, all_docs):
relbar = ""
if has_analysis:
- rc = Counter(d.get("_analysis",{}).get("relevance",3) for d in all_docs)
- colors = {5:"#22c55e",4:"#84cc16",3:"#eab308",2:"#f97316",1:"#ef4444"}
- labels = {5:"Must read",4:"Relevant",3:"Interesting",2:"Low",1:"Skip"}
+ rc = Counter(d.get("_analysis", {}).get("relevance", 3) for d in all_docs)
+ colors = {5: "#22c55e", 4: "#84cc16", 3: "#eab308", 2: "#f97316", 1: "#ef4444"}
+ labels = {5: "Must read", 4: "Relevant", 3: "Interesting", 2: "Low", 1: "Skip"}
segs = "".join(f'<div class="seg" style="width:{rc.get(s,0)/total*100}%;background:{colors[s]}">{rc.get(s,0)}</div>' for s in [5,4,3,2,1] if rc.get(s,0))
legend = "".join(f'<span><span class="rel-dot" style="background:{colors[s]}"></span>{s}★ {labels[s]} ({rc.get(s,0)})</span>' for s in [5,4,3,2,1])
relbar = f'<div style="max-width:600px;margin:1rem auto 0"><div class="rel-bar">{segs}</div><div class="rel-legend">{legend}</div></div>'
atags = Counter()
for d in all_docs:
- for t in d.get("_analysis",{}).get("tags",[]): atags[t] += 1
- tag_opts = "".join(f'<option value="{t}">{t} ({c})</option>' for t,c in atags.most_common())
+ for t in d.get("_analysis", {}).get("tags", []): atags[t] += 1
+ tag_opts = "".join(f'<option value="{t}">{t} ({c})</option>' for t, c in atags.most_common())
toc = ""
for key, title, _, _ in ACTION_GROUPS:
@@ -752,7 +794,7 @@ def _report_head(total, now, has_analysis, groups, all_docs):
return f"""<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>Readwise Reader Triage</title>
<style>
-:root{{--bg:#0f172a;--surface:#1e293b;--surface2:#334155;--text:#e2e8f0;--dim:#94a3b8;--accent:#f59e0b;--link:#38bdf8}}
+:root{{--bg:#0f172a;--surface:#1e293b;--surface2:#334155;--text:#e2e8f0;--dim:#94a3b8;--accent:#f59e0b;--link:#38bdf8;--green:#4ade80;--red:#f87171}}
*{{box-sizing:border-box;margin:0;padding:0}}
body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg);color:var(--text);line-height:1.6}}
.header{{background:linear-gradient(135deg,#1e293b,#312e81);padding:2rem;text-align:center;border-bottom:3px solid var(--accent)}}
@@ -764,13 +806,11 @@ body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg
.rel-bar{{display:flex;height:28px;border-radius:6px;overflow:hidden;margin:1.5rem 0 .5rem}}
.rel-bar .seg{{display:flex;align-items:center;justify-content:center;font-size:.75rem;font-weight:600;color:#000}}
.rel-legend{{display:flex;gap:1rem;justify-content:center;font-size:.75rem;color:var(--dim);margin-bottom:1.5rem;flex-wrap:wrap}}
-.rel-legend span{{display:flex;align-items:center;gap:.3rem}}
-.rel-dot{{width:10px;height:10px;border-radius:50%;display:inline-block}}
+.rel-legend span{{display:flex;align-items:center;gap:.3rem}}.rel-dot{{width:10px;height:10px;border-radius:50%;display:inline-block}}
.toc{{background:var(--surface);border-radius:8px;padding:1.2rem;margin:1rem 0}}
.toc h2{{font-size:1rem;margin-bottom:.8rem}}
.toc-grid{{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.4rem}}
-.toc a{{color:var(--link);text-decoration:none;font-size:.85rem}}.toc a:hover{{text-decoration:underline}}
-.toc .cnt{{opacity:.5;margin-left:.3rem}}
+.toc a{{color:var(--link);text-decoration:none;font-size:.85rem}}.toc a:hover{{text-decoration:underline}}.toc .cnt{{opacity:.5;margin-left:.3rem}}
.group{{margin:1.5rem 0;border-radius:10px;border:1px solid rgba(255,255,255,.08);overflow:hidden}}
.group-hdr{{padding:1rem 1.2rem;display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
.group-hdr h2{{font-size:1.15rem}}.badge{{background:rgba(0,0,0,.3);padding:.2rem .7rem;border-radius:14px;font-size:.85rem;font-weight:600}}
@@ -778,9 +818,9 @@ body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg
.cluster{{margin:.4rem .8rem;background:rgba(0,0,0,.2);border-radius:8px;overflow:hidden}}
.cluster-hdr{{padding:.6rem 1rem;background:rgba(0,0,0,.15);display:flex;align-items:center;justify-content:space-between;cursor:pointer;user-select:none}}
.cluster-hdr h3{{font-size:.95rem}}.cnt{{background:rgba(255,255,255,.08);padding:.15rem .5rem;border-radius:10px;font-size:.78rem}}
-.cluster-insights{{padding:.5rem 1rem;font-size:.8rem;color:var(--dim);border-top:1px solid rgba(255,255,255,.04);line-height:1.8}}
+.cluster-insights{{padding:.5rem 1rem;font-size:.8rem;color:var(--dim);border-top:1px solid rgba(255,255,255,.04)}}
.doc-list{{list-style:none}}.doc{{padding:.5rem 1rem;border-top:1px solid rgba(255,255,255,.04);display:grid;grid-template-columns:2.8rem 2.5rem 1fr auto;gap:.6rem;align-items:center;transition:background .1s}}
-.doc:hover{{background:rgba(255,255,255,.03)}}
+.doc:hover{{background:rgba(255,255,255,.03)}}.doc.acted{{opacity:.3;text-decoration:line-through}}
.doc-rel{{text-align:center;font-weight:700;font-size:.9rem}}
.doc-prog{{text-align:center;font-size:.75rem}}.doc-prog .bar{{width:2.2rem;height:3px;background:rgba(255,255,255,.1);border-radius:2px;margin:2px auto 0;overflow:hidden}}
.doc-prog .fill{{height:100%;border-radius:2px}}
@@ -788,12 +828,15 @@ body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg
.doc-title a{{color:var(--text);text-decoration:none}}.doc-title a:hover{{color:var(--link)}}
.doc-meta{{font-size:.73rem;color:var(--dim);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}}
.doc-summary{{font-size:.78rem;color:var(--dim);margin-top:.15rem}}
-.doc-summary .short{{cursor:pointer}}
-.doc-summary .short:hover{{color:var(--text)}}
+.doc-summary .short{{cursor:pointer}}.doc-summary .short:hover{{color:var(--text)}}
.doc-summary .full{{display:none;margin-top:.3rem;line-height:1.5;color:var(--dim);border-left:2px solid rgba(255,255,255,.1);padding-left:.6rem}}
.doc-summary .full.open{{display:block}}
.doc-summary .reason{{font-size:.72rem;color:var(--accent);margin-top:.2rem;font-style:italic}}
-.doc-right{{text-align:right;font-size:.73rem;white-space:nowrap}}.doc-right a{{color:var(--link)}}
+.doc-right{{text-align:right;font-size:.73rem;white-space:nowrap;display:flex;flex-direction:column;align-items:flex-end;gap:.3rem}}.doc-right a{{color:var(--link)}}
+.doc-actions{{display:flex;gap:.2rem}}
+.act-btn{{background:none;border:1px solid rgba(255,255,255,.1);border-radius:4px;cursor:pointer;font-size:.7rem;padding:.1rem .3rem;opacity:.4;transition:opacity .15s}}
+.act-btn:hover{{opacity:1}}.act-archive:hover{{border-color:var(--accent)}}.act-delete:hover{{border-color:var(--red)}}
+.act-btn.done{{opacity:.2;pointer-events:none}}
.tag{{display:inline-block;padding:.05rem .35rem;border-radius:6px;font-size:.65rem;margin-right:.15rem;background:rgba(255,255,255,.06)}}
.loc{{display:inline-block;padding:.05rem .3rem;border-radius:6px;font-size:.65rem;font-weight:600}}
.loc-new{{background:#14532d;color:#86efac}}.loc-later{{background:#1e3a5f;color:#93c5fd}}
@@ -801,56 +844,54 @@ body{{font-family:'Inter',-apple-system,system-ui,sans-serif;background:var(--bg
.filter-bar{{background:var(--surface);padding:.8rem 1rem;border-radius:8px;margin:1rem 0;display:flex;gap:.5rem;flex-wrap:wrap;align-items:center}}
.filter-bar label{{font-size:.8rem;color:var(--dim)}}
.filter-bar select,.filter-bar input{{background:var(--surface2);color:var(--text);border:1px solid rgba(255,255,255,.1);border-radius:6px;padding:.3rem .6rem;font-size:.8rem}}
+.toast{{position:fixed;bottom:1rem;right:1rem;background:var(--surface);border:1px solid rgba(255,255,255,.1);border-radius:8px;padding:.6rem 1rem;font-size:.85rem;display:none;z-index:999;box-shadow:0 4px 12px rgba(0,0,0,.4)}}
+.toast.show{{display:block}}
@media(max-width:768px){{.doc{{grid-template-columns:2rem 2rem 1fr}}.doc-right{{display:none}}.stats{{gap:.5rem}}.stat{{padding:.4rem .8rem;min-width:70px}}.stat .n{{font-size:1.2rem}}}}
</style></head><body>
<div class="header"><h1>📚 Readwise Reader Triage</h1>
-<p class="sub">{now.strftime('%Y-%m-%d')} · {total} documents · {'AI-analyzed' if has_analysis else 'keyword-classified'}</p>
+<p class="sub">{now.strftime('%Y-%m-%d')} · {total} documents · {'AI-analyzed' if has_analysis else 'keyword-only'}</p>
<div class="stats">{stats}</div>{relbar}</div>
<div class="container">
<div class="toc"><h2>📑 Sections</h2><div class="toc-grid">{toc}</div></div>
<div class="filter-bar"><label>Filter:</label>
<input type="text" id="search" placeholder="Search titles/summaries..." oninput="filterDocs()">
<select id="relFilter" onchange="filterDocs()"><option value="">All relevance</option><option value="5">5★</option><option value="4">4★+</option><option value="3">3★+</option></select>
-<select id="tagFilter" onchange="filterDocs()"><option value="">All tags</option>{tag_opts}</select></div>"""
+<select id="tagFilter" onchange="filterDocs()"><option value="">All tags</option>{tag_opts}</select></div>
+<div id="toast" class="toast"></div>"""
def _report_foot():
return """<script>
function toggle(el){const b=el.parentElement.querySelector('.group-body,.doc-list');if(!b)return;const a=el.querySelector('.arrow');if(b.style.display==='none'){b.style.display='';a?.classList.remove('shut')}else{b.style.display='none';a?.classList.add('shut')}}
function filterDocs(){const q=document.getElementById('search').value.toLowerCase();const r=document.getElementById('relFilter').value;const t=document.getElementById('tagFilter').value;document.querySelectorAll('.doc').forEach(el=>{const title=(el.dataset.title||'');const summary=(el.querySelector('.doc-summary')?.textContent||'').toLowerCase();const rv=parseInt(el.dataset.rel||'3');const tags=el.dataset.tags||'';let s=true;if(q&&!title.includes(q)&&!summary.includes(q))s=false;if(r&&rv<parseInt(r))s=false;if(t&&!tags.includes(t))s=false;el.style.display=s?'':'none'})}
+function showToast(msg,ms){const t=document.getElementById('toast');t.textContent=msg;t.classList.add('show');setTimeout(()=>t.classList.remove('show'),ms||3000)}
+
+// Readwise API token — prompted once, stored in sessionStorage
+function getToken(){let t=sessionStorage.getItem('rw_token');if(t)return Promise.resolve(t);return new Promise(resolve=>{t=prompt('Enter your Readwise access token (from readwise.io/access_token):');if(t){sessionStorage.setItem('rw_token',t);resolve(t)}else{resolve(null)}})}
+
+async function actOnDoc(id,action,btn){
+ const token=await getToken();
+ if(!token){showToast('No token provided');return}
+ const row=btn.closest('.doc');
+ try{
+ if(action==='archive'){
+ const r=await fetch('https://readwise.io/api/v3/update/'+id+'/',{method:'PATCH',headers:{'Authorization':'Token '+token,'Content-Type':'application/json'},body:JSON.stringify({location:'archive'})});
+ if(!r.ok)throw new Error(r.status);
+ row.classList.add('acted');btn.classList.add('done');
+ showToast('📦 Archived');
+ }else if(action==='delete'){
+ if(!confirm('Delete this document permanently?'))return;
+ const r=await fetch('https://readwise.io/api/v3/delete/'+id+'/',{method:'DELETE',headers:{'Authorization':'Token '+token}});
+ if(r.status!==204)throw new Error(r.status);
+ row.classList.add('acted');btn.classList.add('done');
+ showToast('🗑️ Deleted');
+ }
+ }catch(e){showToast('❌ Failed: '+e.message,5000)}
+}
</script></div></body></html>"""
# ═══════════════════════════════════════════════════════════════════════
-# CLI
-# ═══════════════════════════════════════════════════════════════════════
-
-def main():
- parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
- sub = parser.add_subparsers(dest="command", required=True)
-
- p_fetch = sub.add_parser("fetch", help="Fetch documents from Readwise Reader API")
- p_fetch.add_argument("--locations", default="new,later", help="Comma-separated locations (default: new,later)")
-
- p_analyze = sub.add_parser("analyze", help="Analyze documents with LLM")
- p_analyze.add_argument("--model", "-m", choices=list(MODELS.keys()), default="opus", help="Model (default: opus)")
- p_analyze.add_argument("--reset", action="store_true", help="Reset checkpoint")
- p_analyze.add_argument("--batch-size", type=int, help="Override batch size")
- p_analyze.add_argument("--profile", type=Path, default=PROFILE_FILE, help=f"Profile TOML (default: {PROFILE_FILE})")
-
- p_report = sub.add_parser("report", help="Generate HTML triage report")
- p_report.add_argument("--no-open", action="store_true", help="Don't open in browser")
-
- args = parser.parse_args()
- DATA_DIR.mkdir(parents=True, exist_ok=True)
-
- if args.command == "fetch":
- cmd_fetch(args)
- elif args.command == "analyze":
- cmd_analyze(args)
- elif args.command == "report":
- cmd_report(args)
-
if __name__ == "__main__":
- main()
+ cli()