main
  1#!/usr/bin/env python3
  2
  3"""
  4Browse and download ZIM files from the Kiwix library using fzf
  5
  6Usage:
  7    download-kiwix-zim.py [OPTIONS] [DOWNLOAD_DIR]
  8
  9Options:
 10    -l, --lang LANG     Filter by language code (e.g., eng, fra, spa)
 11    -h, --help          Show help message
 12
 13Requirements:
 14    - fzf
 15    - aria2c or wget
 16"""
 17
 18import argparse
 19import subprocess
 20import sys
 21import urllib.request
 22import xml.etree.ElementTree as ET
 23from pathlib import Path
 24from typing import List, Dict
 25
 26CATALOG_URL = "https://library.kiwix.org/catalog/v2/entries"
 27
 28
 29def parse_args():
 30    """Parse command line arguments."""
 31    parser = argparse.ArgumentParser(
 32        description="Browse and download ZIM files from the Kiwix library"
 33    )
 34    parser.add_argument(
 35        "-l",
 36        "--lang",
 37        help="Filter by language code (e.g., eng, fra, spa)",
 38        default="",
 39    )
 40    parser.add_argument(
 41        "download_dir",
 42        nargs="?",
 43        default=str(Path.home() / "Downloads"),
 44        help="Download directory (default: ~/Downloads)",
 45    )
 46    return parser.parse_args()
 47
 48
 49def check_dependencies():
 50    """Check if required tools are available."""
 51    missing = []
 52
 53    if subprocess.run(["which", "fzf"], capture_output=True).returncode != 0:
 54        missing.append("fzf")
 55
 56    has_aria2c = (
 57        subprocess.run(["which", "aria2c"], capture_output=True).returncode
 58        == 0
 59    )
 60    has_wget = (
 61        subprocess.run(["which", "wget"], capture_output=True).returncode == 0
 62    )
 63
 64    if not has_aria2c and not has_wget:
 65        missing.append("aria2c or wget")
 66
 67    if missing:
 68        deps = ", ".join(missing)
 69        print(f"Error: Missing required dependencies: {deps}", file=sys.stderr)
 70        sys.exit(1)
 71
 72
 73def fetch_catalog(lang_filter: str = "") -> str:
 74    """Fetch the Kiwix catalog XML."""
 75    url = CATALOG_URL
 76    # Request all entries (there are ~3,500 total)
 77    params = ["count=5000"]
 78    if lang_filter:
 79        params.append(f"lang={lang_filter}")
 80
 81    if params:
 82        url += "?" + "&".join(params)
 83
 84    print("Fetching Kiwix catalog...", file=sys.stderr)
 85
 86    try:
 87        with urllib.request.urlopen(url) as response:
 88            return response.read().decode("utf-8")
 89    except Exception as e:
 90        print(f"Error fetching catalog: {e}", file=sys.stderr)
 91        sys.exit(1)
 92
 93
 94def parse_catalog(xml_content: str) -> List[Dict[str, str]]:
 95    """Parse the OPDS XML catalog and extract ZIM entries."""
 96    entries = []
 97
 98    # Parse XML with namespace handling
 99    try:
100        root = ET.fromstring(xml_content)
101    except ET.ParseError as e:
102        print(f"Error parsing XML: {e}", file=sys.stderr)
103        sys.exit(1)
104
105    # Define namespaces
106    namespaces = {
107        "atom": "http://www.w3.org/2005/Atom",
108        "dc": "http://purl.org/dc/terms/",
109    }
110
111    # Extract entries
112    for entry in root.findall("atom:entry", namespaces):
113        title = entry.find("atom:title", namespaces)
114        language = entry.find("atom:language", namespaces)
115        flavour = entry.find("atom:flavour", namespaces)
116        summary = entry.find("atom:summary", namespaces)
117
118        # Find the ZIM download link
119        zim_link = None
120        for link in entry.findall("atom:link", namespaces):
121            if link.get("type") == "application/x-zim":
122                zim_link = link
123                break
124
125        if title is not None and zim_link is not None:
126            url = zim_link.get("href", "")
127            size = int(zim_link.get("length", "0"))
128
129            # Format size
130            if size >= 1024 * 1024 * 1024:
131                size_human = f"{size / (1024 * 1024 * 1024):.1f}G"
132            elif size >= 1024 * 1024:
133                size_human = f"{size / (1024 * 1024):.0f}M"
134            else:
135                size_human = f"{size / 1024:.0f}K"
136
137            lang = language.text if language is not None else "unknown"
138            flav = (
139                flavour.text
140                if flavour is not None and flavour.text
141                else "standard"
142            )
143            summ = summary.text if summary is not None else ""
144
145            entries.append({
146                "title": title.text or "",
147                "language": lang,
148                "flavour": flav,
149                "summary": summ,
150                "size": size_human,
151                "url": url,
152            })
153
154    return entries
155
156
157def run_fzf(entries: List[Dict[str, str]]) -> List[Dict[str, str]]:
158    """Run fzf to select entries."""
159    if not entries:
160        print("No entries found", file=sys.stderr)
161        sys.exit(0)
162
163    # Format entries for fzf
164    lines = []
165    for entry in entries:
166        line = (
167            f"{entry['title']}\t[{entry['language']}]\t"
168            f"{entry['size']}\t{entry['flavour']}\t{entry['url']}"
169        )
170        lines.append(line)
171
172    # Run fzf
173    fzf_input = "\n".join(lines)
174
175    try:
176        preview_cmd = (
177            "echo {1} && echo && echo Language: {2} && "
178            "echo Size: {3} && echo Type: {4}"
179        )
180        header_msg = (
181            "Select ZIM file to download "
182            "(Tab for multi-select, Enter to confirm)"
183        )
184
185        result = subprocess.run(
186            [
187                "fzf",
188                "--delimiter=\t",
189                "--with-nth=1,2,3,4",
190                f"--preview={preview_cmd}",
191                "--preview-window=up:5",
192                f"--header={header_msg}",
193                "--multi",
194                "--bind=ctrl-a:select-all",
195                "--bind=ctrl-d:deselect-all",
196            ],
197            input=fzf_input,
198            capture_output=True,
199            text=True,
200        )
201
202        if result.returncode != 0:
203            print("No selection made", file=sys.stderr)
204            sys.exit(0)
205
206        # Parse selected lines
207        selected = []
208        for line in result.stdout.strip().split("\n"):
209            if line:
210                parts = line.split("\t")
211                if len(parts) >= 5:
212                    selected.append({
213                        "title": parts[0],
214                        "language": parts[1].strip("[]"),
215                        "size": parts[2],
216                        "flavour": parts[3],
217                        "url": parts[4],
218                    })
219
220        return selected
221
222    except FileNotFoundError:
223        print("Error: fzf not found", file=sys.stderr)
224        sys.exit(1)
225
226
227def download_zim(entry: Dict[str, str], download_dir: str):
228    """Download a ZIM file."""
229    Path(download_dir).mkdir(parents=True, exist_ok=True)
230
231    url = entry["url"]
232    title = entry['title']
233    lang = entry['language']
234    size = entry['size']
235    print(f"Downloading: {title} [{lang}] ({size})")
236    print(f"URL: {url}")
237
238    # Try aria2c first, then wget
239    has_aria = (
240        subprocess.run(["which", "aria2c"], capture_output=True).returncode
241        == 0
242    )
243    has_wget = (
244        subprocess.run(["which", "wget"], capture_output=True).returncode == 0
245    )
246
247    if has_aria:
248        subprocess.run(["aria2c", "-d", download_dir, "-x", "4", url])
249    elif has_wget:
250        subprocess.run(["wget", "-P", download_dir, url])
251    else:
252        print("Error: No download tool available", file=sys.stderr)
253        sys.exit(1)
254
255
256def main():
257    """Main function."""
258    args = parse_args()
259
260    check_dependencies()
261
262    xml_content = fetch_catalog(args.lang)
263
264    print("Parsing catalog...", file=sys.stderr)
265    entries = parse_catalog(xml_content)
266
267    print(f"Found {len(entries)} ZIM files", file=sys.stderr)
268
269    selected = run_fzf(entries)
270
271    if not selected:
272        print("No files selected", file=sys.stderr)
273        sys.exit(0)
274
275    print(f"\nDownloading {len(selected)} file(s) to: {args.download_dir}\n")
276
277    for entry in selected:
278        download_zim(entry, args.download_dir)
279
280    print("\nDownload(s) complete!")
281
282
283if __name__ == "__main__":
284    main()