main
1#!/usr/bin/env python3
2
3"""
4Browse and download ZIM files from the Kiwix library using fzf
5
6Usage:
7 download-kiwix-zim.py [OPTIONS] [DOWNLOAD_DIR]
8
9Options:
10 -l, --lang LANG Filter by language code (e.g., eng, fra, spa)
11 -h, --help Show help message
12
13Requirements:
14 - fzf
15 - aria2c or wget
16"""
17
18import argparse
19import subprocess
20import sys
21import urllib.request
22import xml.etree.ElementTree as ET
23from pathlib import Path
24from typing import List, Dict
25
26CATALOG_URL = "https://library.kiwix.org/catalog/v2/entries"
27
28
29def parse_args():
30 """Parse command line arguments."""
31 parser = argparse.ArgumentParser(
32 description="Browse and download ZIM files from the Kiwix library"
33 )
34 parser.add_argument(
35 "-l",
36 "--lang",
37 help="Filter by language code (e.g., eng, fra, spa)",
38 default="",
39 )
40 parser.add_argument(
41 "download_dir",
42 nargs="?",
43 default=str(Path.home() / "Downloads"),
44 help="Download directory (default: ~/Downloads)",
45 )
46 return parser.parse_args()
47
48
49def check_dependencies():
50 """Check if required tools are available."""
51 missing = []
52
53 if subprocess.run(["which", "fzf"], capture_output=True).returncode != 0:
54 missing.append("fzf")
55
56 has_aria2c = (
57 subprocess.run(["which", "aria2c"], capture_output=True).returncode
58 == 0
59 )
60 has_wget = (
61 subprocess.run(["which", "wget"], capture_output=True).returncode == 0
62 )
63
64 if not has_aria2c and not has_wget:
65 missing.append("aria2c or wget")
66
67 if missing:
68 deps = ", ".join(missing)
69 print(f"Error: Missing required dependencies: {deps}", file=sys.stderr)
70 sys.exit(1)
71
72
73def fetch_catalog(lang_filter: str = "") -> str:
74 """Fetch the Kiwix catalog XML."""
75 url = CATALOG_URL
76 # Request all entries (there are ~3,500 total)
77 params = ["count=5000"]
78 if lang_filter:
79 params.append(f"lang={lang_filter}")
80
81 if params:
82 url += "?" + "&".join(params)
83
84 print("Fetching Kiwix catalog...", file=sys.stderr)
85
86 try:
87 with urllib.request.urlopen(url) as response:
88 return response.read().decode("utf-8")
89 except Exception as e:
90 print(f"Error fetching catalog: {e}", file=sys.stderr)
91 sys.exit(1)
92
93
94def parse_catalog(xml_content: str) -> List[Dict[str, str]]:
95 """Parse the OPDS XML catalog and extract ZIM entries."""
96 entries = []
97
98 # Parse XML with namespace handling
99 try:
100 root = ET.fromstring(xml_content)
101 except ET.ParseError as e:
102 print(f"Error parsing XML: {e}", file=sys.stderr)
103 sys.exit(1)
104
105 # Define namespaces
106 namespaces = {
107 "atom": "http://www.w3.org/2005/Atom",
108 "dc": "http://purl.org/dc/terms/",
109 }
110
111 # Extract entries
112 for entry in root.findall("atom:entry", namespaces):
113 title = entry.find("atom:title", namespaces)
114 language = entry.find("atom:language", namespaces)
115 flavour = entry.find("atom:flavour", namespaces)
116 summary = entry.find("atom:summary", namespaces)
117
118 # Find the ZIM download link
119 zim_link = None
120 for link in entry.findall("atom:link", namespaces):
121 if link.get("type") == "application/x-zim":
122 zim_link = link
123 break
124
125 if title is not None and zim_link is not None:
126 url = zim_link.get("href", "")
127 size = int(zim_link.get("length", "0"))
128
129 # Format size
130 if size >= 1024 * 1024 * 1024:
131 size_human = f"{size / (1024 * 1024 * 1024):.1f}G"
132 elif size >= 1024 * 1024:
133 size_human = f"{size / (1024 * 1024):.0f}M"
134 else:
135 size_human = f"{size / 1024:.0f}K"
136
137 lang = language.text if language is not None else "unknown"
138 flav = (
139 flavour.text
140 if flavour is not None and flavour.text
141 else "standard"
142 )
143 summ = summary.text if summary is not None else ""
144
145 entries.append({
146 "title": title.text or "",
147 "language": lang,
148 "flavour": flav,
149 "summary": summ,
150 "size": size_human,
151 "url": url,
152 })
153
154 return entries
155
156
157def run_fzf(entries: List[Dict[str, str]]) -> List[Dict[str, str]]:
158 """Run fzf to select entries."""
159 if not entries:
160 print("No entries found", file=sys.stderr)
161 sys.exit(0)
162
163 # Format entries for fzf
164 lines = []
165 for entry in entries:
166 line = (
167 f"{entry['title']}\t[{entry['language']}]\t"
168 f"{entry['size']}\t{entry['flavour']}\t{entry['url']}"
169 )
170 lines.append(line)
171
172 # Run fzf
173 fzf_input = "\n".join(lines)
174
175 try:
176 preview_cmd = (
177 "echo {1} && echo && echo Language: {2} && "
178 "echo Size: {3} && echo Type: {4}"
179 )
180 header_msg = (
181 "Select ZIM file to download "
182 "(Tab for multi-select, Enter to confirm)"
183 )
184
185 result = subprocess.run(
186 [
187 "fzf",
188 "--delimiter=\t",
189 "--with-nth=1,2,3,4",
190 f"--preview={preview_cmd}",
191 "--preview-window=up:5",
192 f"--header={header_msg}",
193 "--multi",
194 "--bind=ctrl-a:select-all",
195 "--bind=ctrl-d:deselect-all",
196 ],
197 input=fzf_input,
198 capture_output=True,
199 text=True,
200 )
201
202 if result.returncode != 0:
203 print("No selection made", file=sys.stderr)
204 sys.exit(0)
205
206 # Parse selected lines
207 selected = []
208 for line in result.stdout.strip().split("\n"):
209 if line:
210 parts = line.split("\t")
211 if len(parts) >= 5:
212 selected.append({
213 "title": parts[0],
214 "language": parts[1].strip("[]"),
215 "size": parts[2],
216 "flavour": parts[3],
217 "url": parts[4],
218 })
219
220 return selected
221
222 except FileNotFoundError:
223 print("Error: fzf not found", file=sys.stderr)
224 sys.exit(1)
225
226
227def download_zim(entry: Dict[str, str], download_dir: str):
228 """Download a ZIM file."""
229 Path(download_dir).mkdir(parents=True, exist_ok=True)
230
231 url = entry["url"]
232 title = entry['title']
233 lang = entry['language']
234 size = entry['size']
235 print(f"Downloading: {title} [{lang}] ({size})")
236 print(f"URL: {url}")
237
238 # Try aria2c first, then wget
239 has_aria = (
240 subprocess.run(["which", "aria2c"], capture_output=True).returncode
241 == 0
242 )
243 has_wget = (
244 subprocess.run(["which", "wget"], capture_output=True).returncode == 0
245 )
246
247 if has_aria:
248 subprocess.run(["aria2c", "-d", download_dir, "-x", "4", url])
249 elif has_wget:
250 subprocess.run(["wget", "-P", download_dir, url])
251 else:
252 print("Error: No download tool available", file=sys.stderr)
253 sys.exit(1)
254
255
256def main():
257 """Main function."""
258 args = parse_args()
259
260 check_dependencies()
261
262 xml_content = fetch_catalog(args.lang)
263
264 print("Parsing catalog...", file=sys.stderr)
265 entries = parse_catalog(xml_content)
266
267 print(f"Found {len(entries)} ZIM files", file=sys.stderr)
268
269 selected = run_fzf(entries)
270
271 if not selected:
272 print("No files selected", file=sys.stderr)
273 sys.exit(0)
274
275 print(f"\nDownloading {len(selected)} file(s) to: {args.download_dir}\n")
276
277 for entry in selected:
278 download_zim(entry, args.download_dir)
279
280 print("\nDownload(s) complete!")
281
282
283if __name__ == "__main__":
284 main()