Commit 49b75a21475b

Vincent Demeester <vincent@sbr.pm>
2026-03-18 16:59:01
feat(emacs): add web capture templates with archiving
Added org-capture templates for web page capture using org-web-tools (readable content via eww-readable + pandoc) and monolith (single-file HTML archiving). Restructured link templates into a group with readable, archived, and combined variants. URL is sourced from org-protocol when available, otherwise prompts with clipboard as default.
1 parent 25cb994
Changed files (1)
dots
config
emacs
dots/config/emacs/init.el
@@ -1931,6 +1931,111 @@ Works with any org link type. Creates a new tab, then opens the link."
 				  ("sp" . "src python")
 				  ("v" . "verse"))))
 
+;;; Web capture helpers โ€” used in org-capture templates
+;; These functions fetch web content for org-capture %(sexp) expansion.
+;; They require: org-web-tools (eww-readable + pandoc), monolith (single-file HTML archiver).
+
+(defconst vde/web-archive-dir (expand-file-name "web-archive" org-directory)
+  "Directory for monolith web page archives.")
+
+(defun vde/web-archive--ensure-dir ()
+  "Ensure `vde/web-archive-dir' exists."
+  (unless (file-directory-p vde/web-archive-dir)
+    (make-directory vde/web-archive-dir t)))
+
+(defun vde/web-archive--monolith (url)
+  "Archive URL with monolith, return path to saved file or nil on failure.
+Saves to `vde/web-archive-dir' with a timestamped filename."
+  (vde/web-archive--ensure-dir)
+  (let* ((timestamp (format-time-string "%Y%m%dT%H%M%S"))
+         (safe-name (replace-regexp-in-string
+                     "[^a-zA-Z0-9._-]" "_"
+                     (url-host (url-generic-parse-url url))))
+         (filename (format "%s--%s.html" timestamp safe-name))
+         (filepath (expand-file-name filename vde/web-archive-dir)))
+    (message "Archiving %s with monolith..." url)
+    (if (zerop (call-process "monolith" nil nil nil
+                             url "-o" filepath
+                             "-I" "-j" "-t" "30"))
+        (progn
+          (message "Archived to %s" filepath)
+          filepath)
+      (message "monolith failed for %s" url)
+      nil)))
+
+(defun vde/web-capture--read-url ()
+  "Read URL for web capture, prompting with clipboard URL as default.
+When called from org-protocol, use the protocol-provided URL via
+`org-store-link-plist'."
+  (require 'org-web-tools)
+  (let ((proto-url (and (boundp 'org-store-link-plist)
+                        (plist-get org-store-link-plist :link)))
+        (clip-url (org-web-tools--get-first-url)))
+    (or proto-url
+        (read-string "URL: " clip-url))))
+
+(defun vde/org-capture-web-page-readable ()
+  "Return Org entry with readable content of URL.
+Prompts for URL with clipboard as default.  Suitable for %(sexp) in capture templates."
+  (require 'org-web-tools)
+  (let ((url (vde/web-capture--read-url)))
+    (or (ignore-errors (org-web-tools--url-as-readable-org url))
+        (format "* [[%s][%s]] :website:\n\n%s\n\n(Failed to extract readable content)"
+                url url (format-time-string (org-time-stamp-format 'with-time 'inactive))))))
+
+(defun vde/org-capture-web-page-archived ()
+  "Return Org entry for URL with monolith archive.
+Prompts for URL with clipboard as default.  Suitable for %(sexp) in capture templates."
+  (require 'org-web-tools)
+  (let* ((url (vde/web-capture--read-url))
+         (title (or (ignore-errors
+                      (let* ((dom (plz 'get url :as #'org-web-tools--sanitized-dom))
+                             (result (org-web-tools--eww-readable dom)))
+                        (org-web-tools--cleanup-title (or (car result) ""))))
+                    url))
+         (link (org-link-make-string url title))
+         (timestamp (format-time-string (org-time-stamp-format 'with-time 'inactive)))
+         (archive-path (vde/web-archive--monolith url))
+         (archive-link (if archive-path
+                          (format "[[file:%s][Local archive]]" archive-path)
+                        "(archive failed)")))
+    (format "* %s :website:archive:\n\n%s\n\nArchive: %s\n" link timestamp archive-link)))
+
+(defun vde/org-capture-web-page-both ()
+  "Return Org entry with readable content AND monolith archive.
+Prompts for URL with clipboard as default.  Suitable for %(sexp) in capture templates."
+  (require 'org-web-tools)
+  (let* ((url (vde/web-capture--read-url))
+         (dom (ignore-errors (plz 'get url :as #'org-web-tools--sanitized-dom)))
+         (readable-result (when dom (ignore-errors (org-web-tools--eww-readable dom))))
+         (title (org-web-tools--cleanup-title (or (car readable-result) "")))
+         (readable-html (cdr readable-result))
+         (converted (when readable-html
+                      (ignore-errors (org-web-tools--html-to-org-with-pandoc readable-html))))
+         (link (org-link-make-string url (if (string-empty-p title) url title)))
+         (timestamp (format-time-string (org-time-stamp-format 'with-time 'inactive)))
+         (archive-path (vde/web-archive--monolith url))
+         (archive-link (if archive-path
+                          (format "[[file:%s][Local archive]]" archive-path)
+                        "(archive failed)")))
+    (with-temp-buffer
+      (org-mode)
+      (if converted
+          (progn
+            (insert converted)
+            (org-web-tools--demote-headings-below 2)
+            (goto-char (point-min))
+            (insert "* " link " :website:archive:" "\n\n"
+                    timestamp "\n\n"
+                    "Archive: " archive-link "\n\n"
+                    "** Article" "\n\n"))
+        ;; Fallback if readable extraction failed
+        (insert "* " link " :website:archive:" "\n\n"
+                timestamp "\n\n"
+                "Archive: " archive-link "\n\n"
+                "(Failed to extract readable content)\n"))
+      (buffer-string))))
+
 (use-package org-capture
   :commands (org-capture)
   :bind (("C-c o c" . org-capture))
@@ -1964,11 +2069,29 @@ Works with any org link type. Creates a new tab, then opens the link."
 		 :empty-lines 1)
 	       t)
   (add-to-list 'org-capture-templates
-	       `("l" "๐Ÿ”— Link" entry
+	       `("l" "๐Ÿ”— Links")
+	       t)
+  (add-to-list 'org-capture-templates
+	       `("ll" "๐Ÿ”— Link" entry
 		 (file ,org-inbox-file)
 		 "* %a\n%U\n%?\n%i"
 		 :empty-lines 1)
 	       t)
+  (add-to-list 'org-capture-templates
+	       `("lw" "๐ŸŒ Web page (readable)" entry
+		 (file ,org-inbox-file)
+		 "%(vde/org-capture-web-page-readable)")
+	       t)
+  (add-to-list 'org-capture-templates
+	       `("la" "๐Ÿ“ฆ Web page (archived)" entry
+		 (file ,org-inbox-file)
+		 "%(vde/org-capture-web-page-archived)")
+	       t)
+  (add-to-list 'org-capture-templates
+	       `("lb" "๐Ÿ“ฆ๐ŸŒ Web page (both)" entry
+		 (file ,org-inbox-file)
+		 "%(vde/org-capture-web-page-both)")
+	       t)
   (add-to-list 'org-capture-templates
 	       `("m" "โœ‰ Email Workflow")
 	       t)