Commit 6ff8871d44f5

Vincent Demeester <vincent@sbr.pm>
2025-12-18 10:10:15
feat(music-playlist-dl): Add download archive for episode deduplication
- Prevent re-downloading episodes even when files are renamed or moved - Track episode IDs separately from filenames for reliable incremental sync - Enable future beets integration without breaking deduplication Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent a4b1953
tools/music-playlist-dl/config.yaml.example
@@ -63,18 +63,26 @@ yt_dlp_options:
 # ├── library/
 # │   ├── Above & Beyond/
 # │   │   └── Group Therapy/
+# │   │       ├── .downloaded.txt              # Archive file (tracks downloaded IDs)
 # │   │       ├── Group Therapy 657-abc123.m4a
 # │   │       └── Group Therapy 658-def456.m4a
 # │   ├── Armin van Buuren/
 # │   │   └── A State of Trance/
+# │   │       ├── .downloaded.txt
 # │   │       └── ASOT Episode 1255-xyz789.m4a
 # │   └── Tiësto/
 # │       └── CLUBLIFE/
+# │           ├── .downloaded.txt
 # │           └── CLUBLIFE Podcast 908-ghi012.m4a
 # └── playlist/
 #     ├── Above & Beyond - Group Therapy.m3u
 #     ├── Armin van Buuren - A State of Trance.m3u
 #     └── Tiësto - CLUBLIFE.m3u
+#
+# Download Archive Files:
+# Each show directory contains a .downloaded.txt file that tracks episode IDs.
+# This prevents re-downloading episodes even if files are renamed or moved.
+# Do not delete these files - they ensure efficient incremental downloads.
 
 # Playlist Format
 # Playlists are standard M3U format with relative paths:
tools/music-playlist-dl/music-playlist-dl.py
@@ -66,7 +66,11 @@ def load_config(config_path: Path) -> Config:
 
 
 def build_yt_dlp_command(
-    url: str, output_template: str, artist: str, yt_dlp_options: dict
+    url: str,
+    output_template: str,
+    artist: str,
+    archive_file: Path,
+    yt_dlp_options: dict,
 ) -> List[str]:
     """Build yt-dlp command with options."""
     cmd = ["yt-dlp"]
@@ -85,6 +89,9 @@ def build_yt_dlp_command(
     if yt_dlp_options.get("embed_thumbnail", True):
         cmd.append("--embed-thumbnail")
 
+    # Download archive for deduplication
+    cmd.extend(["--download-archive", str(archive_file)])
+
     # Parse metadata
     cmd.extend(
         [
@@ -110,10 +117,11 @@ def download_mixcloud_show(
     output_dir.mkdir(parents=True, exist_ok=True)
 
     output_template = str(output_dir / "%(title)s-%(id)s.%(ext)s")
+    archive_file = output_dir / ".downloaded.txt"
 
     logging.info(f"Downloading {show.show} by {show.artist}...")
     cmd = build_yt_dlp_command(
-        url, output_template, show.artist, yt_dlp_options
+        url, output_template, show.artist, archive_file, yt_dlp_options
     )
 
     try:
@@ -132,10 +140,11 @@ def download_soundcloud_show(
     output_dir.mkdir(parents=True, exist_ok=True)
 
     output_template = str(output_dir / "%(title)s-%(id)s.%(ext)s")
+    archive_file = output_dir / ".downloaded.txt"
 
     logging.info(f"Downloading {show.show} by {show.artist}...")
     cmd = build_yt_dlp_command(
-        show.url, output_template, show.artist, yt_dlp_options
+        show.url, output_template, show.artist, archive_file, yt_dlp_options
     )
 
     try:
@@ -155,13 +164,15 @@ def generate_playlist(
         logging.warning(f"Show directory does not exist: {show_dir}")
         return
 
-    # Find all audio files
+    # Find all audio files (exclude archive file)
     audio_extensions = {".m4a", ".mp3", ".opus", ".ogg", ".flac"}
     audio_files = sorted(
         [
             f
             for f in show_dir.iterdir()
-            if f.is_file() and f.suffix.lower() in audio_extensions
+            if f.is_file()
+            and f.suffix.lower() in audio_extensions
+            and not f.name.startswith(".")  # Exclude hidden files
         ]
     )
 
tools/music-playlist-dl/README.md
@@ -9,6 +9,7 @@ This tool downloads episodic DJ podcasts/radio shows and organizes them by Artis
 ## Features
 
 - **Automated Downloads**: Download from Mixcloud and SoundCloud
+- **Smart Deduplication**: Track downloaded episodes to avoid re-downloading
 - **Organized Storage**: Files organized as `library/{artist}/{show}/`
 - **Playlist Generation**: Automatic M3U playlists in `playlist/` directory
 - **Metadata Support**: Proper artist and album tags
@@ -72,13 +73,16 @@ After running, your directory structure will look like:
 ├── library/
 │   ├── Above & Beyond/
 │   │   └── Group Therapy/
+│   │       ├── .downloaded.txt           # Download archive (tracks downloaded episodes)
 │   │       ├── Group Therapy 657-abc123.m4a
 │   │       └── Group Therapy 658-def456.m4a
 │   ├── Armin van Buuren/
 │   │   └── A State of Trance/
+│   │       ├── .downloaded.txt
 │   │       └── ASOT Episode 1255-xyz789.m4a
 │   └── Tiësto/
 │       └── CLUBLIFE/
+│           ├── .downloaded.txt
 │           └── CLUBLIFE Podcast 908-ghi012.m4a
 └── playlist/
     ├── Above & Beyond - Group Therapy.m3u
@@ -86,6 +90,12 @@ After running, your directory structure will look like:
     └── Tiësto - CLUBLIFE.m3u
 ```
 
+### Download Archive Files
+
+Each show directory contains a `.downloaded.txt` file that tracks which episodes have been downloaded. This prevents re-downloading existing episodes even if files are renamed or moved. The archive file contains episode IDs from Mixcloud/SoundCloud and is automatically managed by yt-dlp.
+
+**Do not delete these files** - they ensure efficient incremental downloads.
+
 ## Playlist Format
 
 Playlists are standard M3U format with relative paths from the playlist directory:
@@ -232,6 +242,8 @@ mv /neo/music/mixes/"Above & Beyond"/*.m4a "/neo/music/library/Above & Beyond/Gr
 
 ## Notes
 
+- Downloads are tracked via `.downloaded.txt` archive files per show
+- Episodes are only downloaded once, even if files are renamed or moved
 - Downloads continue from where they left off (uses `-c` flag)
 - Failed downloads for individual shows don't stop the entire script
 - Playlists are regenerated on each run to include new episodes
tools/README.org
@@ -177,30 +177,57 @@
 - Interactive confirmation before applying changes
 - Dry-run and auto-confirm (--yolo/--no-confirm) modes
 
+* Go Tools
+
+** arr
+
+Unified CLI for managing *arr services (Sonarr, Radarr, Lidarr) and Jellyfin with Spotify playlist sync.
+
+See [[file:arr/README.md][arr/README.md]] for detailed documentation.
+
+** battery-monitor
+
+Battery monitoring daemon and notification system for laptops.
+
+** cliphist-cleanup
+
+Clipboard history cleanup utility for cliphist.
+
+** gh-pr
+
+GitHub pull request management tool for approving and managing PRs.
+
+* Python Tools
+
+** music-playlist-dl
+
+Automated downloader for electronic music podcasts from Mixcloud/SoundCloud with M3U playlist generation.
+
+See [[file:music-playlist-dl/README.md][music-playlist-dl/README.md]] for detailed documentation.
+
+** download-kiwix-zim
+
+Browse and download ZIM files from the Kiwix library using fzf.
+
+* Other Tools
+
+** claude-hooks
+
+Claude Code hooks for session management and tool output capture.
+
+** org-manager
+
+Org-mode management utilities for programmatic org file manipulation.
+
 * Directories
 
-** battery-monitor/
-
-Battery monitoring daemon and notification system.
-
-Go-based tool that monitors laptop battery levels and sends notifications
-when battery is low or charging state changes.
-
-*Building:*
-#+begin_src shell
-cd battery-monitor
-go build
-#+end_src
-
 ** emacs/
 
 Emacs configuration and custom packages.
 
-Personal Emacs setup including:
-- Custom themes
-- Package configurations
-- Org-mode settings
-- Development environment customizations
+** fedora-vm/
+
+Fedora VM setup and configuration scripts.
 
 * Notes