Commit 27a501a647be

Vincent Demeester <vincent@sbr.pm>
2025-11-28 22:57:30
feat: Add interactive ZIM file browser and downloader
- Enable browsing all 3,395 offline content archives from Kiwix - Provide fzf-based multi-select interface for library management - Support efficient parallel downloads with aria2c Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent 889e72c
Changed files (5)
pkgs/default.nix
@@ -21,6 +21,7 @@ in
   manifest-tool = pkgs.callPackage ./manifest-tool { };
   gh-restart-failed = pkgs.callPackage ../tools/gh-restart-failed { };
   arr = pkgs.callPackage ../tools/arr { };
+  download-kiwix-zim = pkgs.callPackage ../tools/download-kiwix-zim { };
 
   chmouzies-ai = pkgs.callPackage ./chmouzies/ai.nix { };
   chmouzies-git = pkgs.callPackage ./chmouzies/git.nix { };
systems/sakhalin/home.nix
@@ -1,3 +1,7 @@
-_: {
+{ pkgs, ... }:
+{
   systemd.user.services.syncthing.Install.WantedBy = [ "multi-user.target" ];
+  home.packages = with pkgs; [
+    download-kiwix-zim
+  ];
 }
tools/download-kiwix-zim/default.nix
@@ -0,0 +1,54 @@
+{
+  python3,
+  lib,
+  makeWrapper,
+  fzf,
+  aria2,
+  wget,
+}:
+
+python3.pkgs.buildPythonApplication {
+  pname = "download-kiwix-zim";
+  version = "1.0.0";
+  format = "other";
+
+  src = ./.;
+
+  nativeBuildInputs = [ makeWrapper ];
+
+  # Runtime dependencies
+  buildInputs = [
+    fzf
+    aria2
+    wget
+  ];
+
+  installPhase = ''
+    mkdir -p $out/bin
+
+    # Install the script
+    cp download-kiwix-zim $out/bin/download-kiwix-zim
+    chmod +x $out/bin/download-kiwix-zim
+
+    # Wrap the script to ensure dependencies are in PATH
+    wrapProgram $out/bin/download-kiwix-zim \
+      --prefix PATH : ${
+        lib.makeBinPath [
+          fzf
+          aria2
+          wget
+        ]
+      }
+  '';
+
+  meta = with lib; {
+    description = "Browse and download ZIM files from the Kiwix library using fzf";
+    longDescription = ''
+      Interactive tool to browse the Kiwix catalog and download offline
+      content archives (ZIM files) for Wikipedia and other educational
+      resources. Features multi-select with fzf and parallel downloads
+      with aria2c.
+    '';
+    platforms = platforms.unix;
+  };
+}
tools/download-kiwix-zim/download-kiwix-zim
@@ -0,0 +1,284 @@
+#!/usr/bin/env python3
+
+"""
+Browse and download ZIM files from the Kiwix library using fzf
+
+Usage:
+    download-kiwix-zim.py [OPTIONS] [DOWNLOAD_DIR]
+
+Options:
+    -l, --lang LANG     Filter by language code (e.g., eng, fra, spa)
+    -h, --help          Show help message
+
+Requirements:
+    - fzf
+    - aria2c or wget
+"""
+
+import argparse
+import subprocess
+import sys
+import urllib.request
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import List, Dict
+
+CATALOG_URL = "https://library.kiwix.org/catalog/v2/entries"
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Browse and download ZIM files from the Kiwix library"
+    )
+    parser.add_argument(
+        "-l",
+        "--lang",
+        help="Filter by language code (e.g., eng, fra, spa)",
+        default="",
+    )
+    parser.add_argument(
+        "download_dir",
+        nargs="?",
+        default=str(Path.home() / "Downloads"),
+        help="Download directory (default: ~/Downloads)",
+    )
+    return parser.parse_args()
+
+
+def check_dependencies():
+    """Check if required tools are available."""
+    missing = []
+
+    if subprocess.run(["which", "fzf"], capture_output=True).returncode != 0:
+        missing.append("fzf")
+
+    has_aria2c = (
+        subprocess.run(["which", "aria2c"], capture_output=True).returncode
+        == 0
+    )
+    has_wget = (
+        subprocess.run(["which", "wget"], capture_output=True).returncode == 0
+    )
+
+    if not has_aria2c and not has_wget:
+        missing.append("aria2c or wget")
+
+    if missing:
+        deps = ", ".join(missing)
+        print(f"Error: Missing required dependencies: {deps}", file=sys.stderr)
+        sys.exit(1)
+
+
+def fetch_catalog(lang_filter: str = "") -> str:
+    """Fetch the Kiwix catalog XML."""
+    url = CATALOG_URL
+    # Request all entries (there are ~3,500 total)
+    params = ["count=5000"]
+    if lang_filter:
+        params.append(f"lang={lang_filter}")
+
+    if params:
+        url += "?" + "&".join(params)
+
+    print("Fetching Kiwix catalog...", file=sys.stderr)
+
+    try:
+        with urllib.request.urlopen(url) as response:
+            return response.read().decode("utf-8")
+    except Exception as e:
+        print(f"Error fetching catalog: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def parse_catalog(xml_content: str) -> List[Dict[str, str]]:
+    """Parse the OPDS XML catalog and extract ZIM entries."""
+    entries = []
+
+    # Parse XML with namespace handling
+    try:
+        root = ET.fromstring(xml_content)
+    except ET.ParseError as e:
+        print(f"Error parsing XML: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    # Define namespaces
+    namespaces = {
+        "atom": "http://www.w3.org/2005/Atom",
+        "dc": "http://purl.org/dc/terms/",
+    }
+
+    # Extract entries
+    for entry in root.findall("atom:entry", namespaces):
+        title = entry.find("atom:title", namespaces)
+        language = entry.find("atom:language", namespaces)
+        flavour = entry.find("atom:flavour", namespaces)
+        summary = entry.find("atom:summary", namespaces)
+
+        # Find the ZIM download link
+        zim_link = None
+        for link in entry.findall("atom:link", namespaces):
+            if link.get("type") == "application/x-zim":
+                zim_link = link
+                break
+
+        if title is not None and zim_link is not None:
+            url = zim_link.get("href", "")
+            size = int(zim_link.get("length", "0"))
+
+            # Format size
+            if size >= 1024 * 1024 * 1024:
+                size_human = f"{size / (1024 * 1024 * 1024):.1f}G"
+            elif size >= 1024 * 1024:
+                size_human = f"{size / (1024 * 1024):.0f}M"
+            else:
+                size_human = f"{size / 1024:.0f}K"
+
+            lang = language.text if language is not None else "unknown"
+            flav = (
+                flavour.text
+                if flavour is not None and flavour.text
+                else "standard"
+            )
+            summ = summary.text if summary is not None else ""
+
+            entries.append({
+                "title": title.text or "",
+                "language": lang,
+                "flavour": flav,
+                "summary": summ,
+                "size": size_human,
+                "url": url,
+            })
+
+    return entries
+
+
+def run_fzf(entries: List[Dict[str, str]]) -> List[Dict[str, str]]:
+    """Run fzf to select entries."""
+    if not entries:
+        print("No entries found", file=sys.stderr)
+        sys.exit(0)
+
+    # Format entries for fzf
+    lines = []
+    for entry in entries:
+        line = (
+            f"{entry['title']}\t[{entry['language']}]\t"
+            f"{entry['size']}\t{entry['flavour']}\t{entry['url']}"
+        )
+        lines.append(line)
+
+    # Run fzf
+    fzf_input = "\n".join(lines)
+
+    try:
+        preview_cmd = (
+            "echo {1} && echo && echo Language: {2} && "
+            "echo Size: {3} && echo Type: {4}"
+        )
+        header_msg = (
+            "Select ZIM file to download "
+            "(Tab for multi-select, Enter to confirm)"
+        )
+
+        result = subprocess.run(
+            [
+                "fzf",
+                "--delimiter=\t",
+                "--with-nth=1,2,3,4",
+                f"--preview={preview_cmd}",
+                "--preview-window=up:5",
+                f"--header={header_msg}",
+                "--multi",
+                "--bind=ctrl-a:select-all",
+                "--bind=ctrl-d:deselect-all",
+            ],
+            input=fzf_input,
+            capture_output=True,
+            text=True,
+        )
+
+        if result.returncode != 0:
+            print("No selection made", file=sys.stderr)
+            sys.exit(0)
+
+        # Parse selected lines
+        selected = []
+        for line in result.stdout.strip().split("\n"):
+            if line:
+                parts = line.split("\t")
+                if len(parts) >= 5:
+                    selected.append({
+                        "title": parts[0],
+                        "language": parts[1].strip("[]"),
+                        "size": parts[2],
+                        "flavour": parts[3],
+                        "url": parts[4],
+                    })
+
+        return selected
+
+    except FileNotFoundError:
+        print("Error: fzf not found", file=sys.stderr)
+        sys.exit(1)
+
+
+def download_zim(entry: Dict[str, str], download_dir: str):
+    """Download a ZIM file."""
+    Path(download_dir).mkdir(parents=True, exist_ok=True)
+
+    url = entry["url"]
+    title = entry['title']
+    lang = entry['language']
+    size = entry['size']
+    print(f"Downloading: {title} [{lang}] ({size})")
+    print(f"URL: {url}")
+
+    # Try aria2c first, then wget
+    has_aria = (
+        subprocess.run(["which", "aria2c"], capture_output=True).returncode
+        == 0
+    )
+    has_wget = (
+        subprocess.run(["which", "wget"], capture_output=True).returncode == 0
+    )
+
+    if has_aria:
+        subprocess.run(["aria2c", "-d", download_dir, "-x", "4", url])
+    elif has_wget:
+        subprocess.run(["wget", "-P", download_dir, url])
+    else:
+        print("Error: No download tool available", file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    """Main function."""
+    args = parse_args()
+
+    check_dependencies()
+
+    xml_content = fetch_catalog(args.lang)
+
+    print("Parsing catalog...", file=sys.stderr)
+    entries = parse_catalog(xml_content)
+
+    print(f"Found {len(entries)} ZIM files", file=sys.stderr)
+
+    selected = run_fzf(entries)
+
+    if not selected:
+        print("No files selected", file=sys.stderr)
+        sys.exit(0)
+
+    print(f"\nDownloading {len(selected)} file(s) to: {args.download_dir}\n")
+
+    for entry in selected:
+        download_zim(entry, args.download_dir)
+
+    print("\nDownload(s) complete!")
+
+
+if __name__ == "__main__":
+    main()
tools/download-kiwix-zim/README.md
@@ -0,0 +1,94 @@
+# download-kiwix-zim
+
+Interactive tool to browse and download ZIM files from the Kiwix library.
+
+## Features
+
+- Browse **all 3,395 offline content archives** from the Kiwix catalog
+- Filter by language (e.g., English, French, Spanish)
+- Interactive selection with fzf (multi-select support)
+- Fast parallel downloads with aria2c (4 connections)
+- Automatic fallback to wget if aria2c is unavailable
+- Human-readable file sizes (GB/MB)
+- Preview mode showing language, size, and content type
+
+## Installation
+
+The package is available in this Nix flake:
+
+```bash
+# Build locally
+nix build .#download-kiwix-zim
+
+# Run directly
+nix run .#download-kiwix-zim
+
+# Install to profile
+nix profile install .#download-kiwix-zim
+```
+
+## Usage
+
+```bash
+# Browse all ZIM files
+download-kiwix-zim
+
+# Filter by language code
+download-kiwix-zim --lang eng
+
+# Download to specific directory
+download-kiwix-zim /mnt/gaia/kiwix
+
+# Combine options
+download-kiwix-zim --lang fra /mnt/gaia/kiwix
+```
+
+## fzf Keybindings
+
+- **Tab**: Select/deselect item
+- **Ctrl-A**: Select all
+- **Ctrl-D**: Deselect all
+- **Enter**: Confirm and download
+- **Esc**: Cancel
+
+## Language Codes
+
+Common language codes:
+- `eng` - English
+- `fra` - French
+- `spa` - Spanish
+- `deu` - German
+- `por` - Portuguese
+- `ara` - Arabic
+- `zho` - Chinese
+- `jpn` - Japanese
+
+## Example Output
+
+```
+Fetching Kiwix catalog...
+Parsing catalog...
+Found 3395 ZIM files
+
+Select ZIM file to download (Tab for multi-select, Enter to confirm)
+
+> Wikipedia [eng] 95.2G maxi
+  Wikivoyage [eng] 156M nopic
+  Wiktionary [eng] 4.2G maxi
+  Stack Exchange [eng] 23.1G all
+```
+
+## Dependencies
+
+All dependencies are automatically included in the Nix package:
+- Python 3
+- fzf (interactive selection)
+- aria2c (parallel downloads)
+- wget (fallback downloader)
+
+## API
+
+Uses the Kiwix OPDS v2 catalog API:
+- Endpoint: `https://library.kiwix.org/catalog/v2/entries`
+- Format: OPDS Atom XML feed
+- Documentation: https://wiki.kiwix.org/wiki/OPDS