Commit 23f8cbecd6e6

Vincent Demeester <vincent@sbr.pm>
2025-12-18 11:43:34
feat(music-playlist-dl): Add optional beets integration for metadata
- Enable rich metadata management and smart playlists without file movement - Support hierarchical tags (global defaults + per-show overrides) - Preserve yt-dlp deduplication by importing files in-place Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent 553fec6
Changed files (3)
tools/music-playlist-dl/config.yaml.example
@@ -6,18 +6,44 @@
 # - Playlists go to: {base_dir}/playlist/{artist} - {show}.m3u
 base_dir: /neo/music
 
+# Beets integration (optional, disabled by default)
+# Enable this to automatically import podcasts to beets music library manager
+# for enhanced metadata management, smart playlists, and database queries
+beets:
+  enable: false  # Set to true to enable beets integration
+
+  # Import new downloads automatically after each run
+  import_after_download: true
+
+  # Write beets metadata to file tags (recommended)
+  write_tags: true
+
+  # Default tags applied to ALL shows
+  # These can be overridden per-show using beets_tags
+  default_tags:
+    albumtype: podcast
+    genre: Electronic
+    language: eng
+
 # Mixcloud shows to download
-# Format: Mixcloud handle, artist name, and show name
+# Format: Mixcloud handle, artist name, show name, and optional beets_tags
 mixcloud_shows:
   - handle: aboveandbeyond
     artist: Above & Beyond
     show: Group Therapy
+    beets_tags:  # Optional: override/extend default_tags
+      genre: Progressive Trance
+      comments: "Weekly trance radio show"
   - handle: ArminvanBuuren
     artist: Armin van Buuren
     show: A State of Trance
+    beets_tags:
+      genre: Trance
+      comments: "Longest-running trance show (since 2001)"
   - handle: CosmicGate
     artist: Cosmic Gate
     show: Wake Your Mind Radio
+    # No beets_tags - uses default_tags only
   - handle: FerryCorsten
     artist: Ferry Corsten
     show: Resonation Radio
@@ -120,3 +146,30 @@ yt_dlp_options:
 # Tiësto - CLUBLIFE
 #   Weekly club tracks since 2007
 #   https://podcasts.apple.com/us/podcast/clublife
+
+# Beets Integration Workflow
+#
+# Beets (https://beets.io/) is a music library manager that provides:
+# - Rich metadata management and tagging
+# - Smart playlists based on queries
+# - Database-driven music organization
+# - MusicBrainz integration (not used for podcasts)
+#
+# Setup:
+# 1. Enable beets integration by setting beets.enable: true above
+# 2. Run music-playlist-dl --import-existing to import all existing files once
+# 3. Future runs will automatically import new downloads
+#
+# Files stay in library/{artist}/{show}/ - beets imports in-place (doesn't move)
+# This preserves yt-dlp's download archive (.downloaded.txt) for deduplication
+#
+# Tag Hierarchy:
+# - default_tags: Applied to ALL shows (albumtype, genre, language, etc.)
+# - beets_tags: Per-show overrides/extensions (genre, comments, year, etc.)
+# - Always set: artist and album (from show configuration)
+#
+# Example queries after import:
+#   beet ls albumtype:podcast          # List all podcasts
+#   beet ls genre:Trance                # List all trance shows
+#   beet ls artist:"Armin van Buuren"  # List ASOT episodes
+#   beet write albumtype:podcast        # Write tags to files
tools/music-playlist-dl/music-playlist-dl.py
@@ -24,6 +24,11 @@ class MixcloudShow:
     handle: str
     artist: str
     show: str
+    beets_tags: dict = None  # Optional per-show metadata
+
+    def __post_init__(self):
+        if self.beets_tags is None:
+            self.beets_tags = {}
 
 
 @dataclass
@@ -33,6 +38,25 @@ class SoundcloudShow:
     url: str
     artist: str
     show: str
+    beets_tags: dict = None  # Optional per-show metadata
+
+    def __post_init__(self):
+        if self.beets_tags is None:
+            self.beets_tags = {}
+
+
+@dataclass
+class BeetsConfig:
+    """Beets integration configuration."""
+
+    enable: bool = False
+    import_after_download: bool = True
+    write_tags: bool = True
+    default_tags: dict = None
+
+    def __post_init__(self):
+        if self.default_tags is None:
+            self.default_tags = {}
 
 
 @dataclass
@@ -43,6 +67,7 @@ class Config:
     mixcloud_shows: List[MixcloudShow]
     soundcloud_shows: List[SoundcloudShow]
     yt_dlp_options: dict
+    beets: BeetsConfig
 
 
 def load_config(config_path: Path) -> Config:
@@ -57,11 +82,21 @@ def load_config(config_path: Path) -> Config:
         SoundcloudShow(**show) for show in data.get("soundcloud_shows", [])
     ]
 
+    # Load beets config if present
+    beets_data = data.get("beets", {})
+    beets_config = BeetsConfig(
+        enable=beets_data.get("enable", False),
+        import_after_download=beets_data.get("import_after_download", True),
+        write_tags=beets_data.get("write_tags", True),
+        default_tags=beets_data.get("default_tags", {}),
+    )
+
     return Config(
         base_dir=Path(data.get("base_dir", "/neo/music")),
         mixcloud_shows=mixcloud_shows,
         soundcloud_shows=soundcloud_shows,
         yt_dlp_options=data.get("yt_dlp_options", {}),
+        beets=beets_config,
     )
 
 
@@ -197,6 +232,62 @@ def generate_playlist(
             f.write(f"{relative_path}\n")
 
 
+def import_to_beets(
+    library_dir: Path,
+    artist: str,
+    show: str,
+    show_beets_tags: dict,
+    beets_config: BeetsConfig,
+) -> bool:
+    """Import show to beets database with merged metadata."""
+    if not beets_config.enable:
+        return True  # Skip if disabled
+
+    show_dir = library_dir / artist / show
+    if not show_dir.exists():
+        logging.warning(f"Show directory does not exist: {show_dir}")
+        return False
+
+    # Merge tags: default_tags < show_beets_tags
+    merged_tags = {**beets_config.default_tags, **show_beets_tags}
+
+    # Always set artist and album from show config
+    merged_tags["artist"] = artist
+    merged_tags["album"] = show
+
+    # Build beets import command
+    cmd = [
+        "beet",
+        "import",
+        "-C",  # Don't move files (keep in place)
+        "-A",  # Don't autotag (skip MusicBrainz)
+        "-q",  # Quiet mode
+    ]
+
+    # Add all merged tags
+    for key, value in merged_tags.items():
+        cmd.extend(["--set", f"{key}={value}"])
+
+    cmd.append(str(show_dir))
+
+    try:
+        result = subprocess.run(
+            cmd, check=True, capture_output=True, text=True
+        )
+        logging.debug(f"Beets import output: {result.stdout}")
+
+        # Write metadata to file tags if enabled
+        if beets_config.write_tags:
+            write_cmd = ["beet", "write", "-q", f"album:{show}"]
+            subprocess.run(write_cmd, check=False, capture_output=True)
+
+        logging.info(f"✓ Imported {show} to beets")
+        return True
+    except subprocess.CalledProcessError as e:
+        logging.warning(f"Failed to import {show} to beets: {e.stderr}")
+        return False
+
+
 def main():
     """Main entry point."""
     parser = argparse.ArgumentParser(
@@ -211,6 +302,11 @@ def main():
     parser.add_argument(
         "--verbose", "-v", action="store_true", help="Enable verbose logging"
     )
+    parser.add_argument(
+        "--import-existing",
+        action="store_true",
+        help="Import all existing files to beets (run once after enabling)",
+    )
     args = parser.parse_args()
 
     # Setup logging
@@ -255,6 +351,50 @@ def main():
     for show in config.soundcloud_shows:
         generate_playlist(show.artist, show.show, library_dir, playlist_dir)
 
+    # Import to beets if enabled
+    if config.beets.enable:
+        if args.import_existing:
+            logging.info("=" * 60)
+            logging.info("Importing all existing files to beets...")
+            for show in config.mixcloud_shows:
+                import_to_beets(
+                    library_dir,
+                    show.artist,
+                    show.show,
+                    show.beets_tags,
+                    config.beets,
+                )
+            for show in config.soundcloud_shows:
+                import_to_beets(
+                    library_dir,
+                    show.artist,
+                    show.show,
+                    show.beets_tags,
+                    config.beets,
+                )
+            logging.info("Beets import complete!")
+            sys.exit(0)
+
+        elif config.beets.import_after_download:
+            logging.info("=" * 60)
+            logging.info("Importing new downloads to beets...")
+            for show in config.mixcloud_shows:
+                import_to_beets(
+                    library_dir,
+                    show.artist,
+                    show.show,
+                    show.beets_tags,
+                    config.beets,
+                )
+            for show in config.soundcloud_shows:
+                import_to_beets(
+                    library_dir,
+                    show.artist,
+                    show.show,
+                    show.beets_tags,
+                    config.beets,
+                )
+
     logging.info("=" * 60)
     logging.info("Download complete!")
 
tools/music-playlist-dl/README.md
@@ -12,7 +12,8 @@ This tool downloads episodic DJ podcasts/radio shows and organizes them by Artis
 - **Smart Deduplication**: Track downloaded episodes to avoid re-downloading
 - **Organized Storage**: Files organized as `library/{artist}/{show}/`
 - **Playlist Generation**: Automatic M3U playlists in `playlist/` directory
-- **Metadata Support**: Proper artist and album tags
+- **Beets Integration**: Optional integration with beets music library manager for rich metadata and smart playlists
+- **Metadata Support**: Proper artist and album tags with hierarchical customization
 - **Resume Support**: Continue interrupted downloads
 - **Notification Support**: ntfy notifications on success/failure
 - **NixOS Integration**: Systemd timer for scheduled execution
@@ -108,6 +109,116 @@ Playlists are standard M3U format with relative paths from the playlist director
 
 This allows music players to correctly resolve the file paths regardless of where they're accessed from.
 
+## Beets Integration
+
+**Optional** integration with [beets](https://beets.io/) music library manager for enhanced metadata management and smart playlists.
+
+### What is Beets?
+
+Beets is a powerful music library manager that provides:
+- Database-driven organization and querying
+- Rich metadata management
+- Smart playlists based on queries
+- Tag-based searching and filtering
+- Automatic metadata writing to files
+
+### Configuration
+
+Enable beets integration in your config file:
+
+```yaml
+beets:
+  enable: true  # Enable beets integration
+  import_after_download: true  # Auto-import new downloads
+  write_tags: true  # Write metadata to file tags
+
+  # Default tags applied to ALL shows
+  default_tags:
+    albumtype: podcast
+    genre: Electronic
+    language: eng
+
+# Per-show metadata overrides
+mixcloud_shows:
+  - handle: ArminvanBuuren
+    artist: Armin van Buuren
+    show: A State of Trance
+    beets_tags:  # Override/extend default_tags
+      genre: Trance
+      comments: "Longest-running trance show (since 2001)"
+```
+
+### Tag Hierarchy
+
+Tags are merged in priority order (highest to lowest):
+1. **Always set**: `artist`, `album` (from show config)
+2. **Per-show**: `beets_tags` (overrides defaults)
+3. **Global**: `default_tags`
+
+**Example:**
+```yaml
+default_tags:
+  genre: Electronic
+
+show:
+  artist: Armin van Buuren
+  beets_tags:
+    genre: Trance  # Overrides "Electronic"
+```
+
+Result: `genre: Trance`, `albumtype: podcast`, `language: eng`, `artist: Armin van Buuren`
+
+### Migration Workflow
+
+When enabling beets for the first time with existing downloads:
+
+```bash
+# 1. Enable beets in config
+vim /neo/music/music-playlist-dl.yaml  # Set beets.enable: true
+
+# 2. Import all existing files once
+music-playlist-dl --import-existing
+
+# 3. Future runs automatically import new downloads
+music-playlist-dl
+```
+
+### How It Works
+
+- **Files stay in place**: Beets imports files without moving them (`-C` flag)
+- **No re-downloads**: Download archives (`.downloaded.txt`) remain valid
+- **Incremental imports**: New downloads are automatically imported
+- **Tag writing**: Metadata is embedded in file tags if `write_tags: true`
+
+### Querying Your Library
+
+After import, use beets to query and manage your podcast library:
+
+```bash
+# List all podcasts
+beet ls albumtype:podcast
+
+# List by genre
+beet ls genre:Trance
+
+# List specific show
+beet ls artist:"Armin van Buuren"
+
+# Count episodes
+beet stats albumtype:podcast
+
+# Update file tags from database
+beet write albumtype:podcast
+```
+
+### Benefits
+
+✅ **Rich metadata**: Genre, language, comments, custom fields
+✅ **Smart playlists**: Query-based dynamic playlists
+✅ **Database queries**: Fast searching and filtering
+✅ **No file movement**: Works with yt-dlp deduplication
+✅ **Optional**: Disabled by default, no breaking changes
+
 ## Usage
 
 ### Manual Execution
@@ -121,6 +232,9 @@ music-playlist-dl --config /path/to/config.yaml
 
 # Verbose output
 music-playlist-dl --verbose
+
+# Import existing files to beets (run once after enabling beets)
+music-playlist-dl --import-existing
 ```
 
 ### Systemd Service