main
1#!/usr/bin/env bash
2# Slack Public Channels Archive Script
3# Archives public channels incrementally and generates static HTML viewer
4set -euo pipefail
5
6# --- Configuration ---
7DATA_DIR="${SLACK_ARCHIVE_DIR:-/var/lib/slack-archive}"
8ARCHIVE_DIR="$DATA_DIR/archive"
9EXPORT_DIR="$DATA_DIR/exports"
10HTML_DIR="${SLACK_ARCHIVE_HTML_DIR:-$DATA_DIR/html}"
11CHANNELS_FILE="$DATA_DIR/public-channels.txt"
12CHANNELS_JSON="$DATA_DIR/channels.json"
13
14# How often to refresh channel list (in days)
15CHANNEL_REFRESH_DAYS="${CHANNEL_REFRESH_DAYS:-7}"
16
17# --- Functions ---
18
19log() {
20 echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
21}
22
23die() {
24 log "ERROR: $*" >&2
25 exit 1
26}
27
28check_auth() {
29 # Check for cached credentials
30 local cache_dir="${XDG_CACHE_HOME:-$HOME/.cache}/slackdump"
31 if [[ -f "$cache_dir/provider.bin" ]]; then
32 log "Using cached credentials from $cache_dir"
33 return 0
34 fi
35
36 die "No cached credentials found. Run 'slackdump login' interactively first."
37}
38
39setup_dirs() {
40 mkdir -p "$DATA_DIR" "$EXPORT_DIR" "$HTML_DIR"
41}
42
43refresh_channel_list() {
44 local should_refresh=false
45
46 if [[ ! -f "$CHANNELS_FILE" ]]; then
47 log "Channel list not found, fetching..."
48 should_refresh=true
49 elif [[ -n "$(find "$CHANNELS_FILE" -mtime +"$CHANNEL_REFRESH_DAYS" 2>/dev/null)" ]]; then
50 log "Channel list older than $CHANNEL_REFRESH_DAYS days, refreshing..."
51 should_refresh=true
52 fi
53
54 if [[ "$should_refresh" == "true" ]] || [[ "${FORCE_REFRESH:-}" == "true" ]]; then
55 log "Fetching channel list from Slack..."
56 slackdump list channels -format JSON -no-json > "$CHANNELS_JSON"
57
58 # Filter to public channels only (exclude private, DMs, group DMs)
59 jq -r '.[] | select(.is_private == false and .is_im == false and .is_mpim == false) | .id' \
60 "$CHANNELS_JSON" > "$CHANNELS_FILE"
61
62 local count
63 count=$(wc -l < "$CHANNELS_FILE")
64 log "Found $count public channels"
65 else
66 log "Using cached channel list ($(wc -l < "$CHANNELS_FILE") channels)"
67 fi
68}
69
70run_archive() {
71 if [[ -d "$ARCHIVE_DIR" ]]; then
72 log "Resuming archive from previous state..."
73 slackdump resume "$ARCHIVE_DIR"
74 else
75 log "Starting fresh archive..."
76 slackdump archive -o "$ARCHIVE_DIR" @"$CHANNELS_FILE"
77 fi
78}
79
80convert_to_export() {
81 EXPORT_FILE="$EXPORT_DIR/slack-export-$(date +%Y-%m-%d).zip"
82 log "Converting archive to export format: $EXPORT_FILE"
83 slackdump convert -f export -storage standard -o "$EXPORT_FILE" "$ARCHIVE_DIR"
84}
85
86generate_html() {
87 local export_file="$1"
88 log "Generating HTML viewer..."
89 uvx slack-export-viewer -z "$export_file" --html-only -o "$HTML_DIR"
90
91 # Fix empty anchor tags (slack-export-viewer bug)
92 # Converts <a href='URL'></a> to <a href='URL'>URL</a>
93 log "Fixing empty links..."
94 find "$HTML_DIR" -name "*.html" -exec sed -i -E "s|<a href='([^']+)'></a>|<a href='\\1'>\\1</a>|g" {} \;
95
96 # Reverse message order (newest first)
97 log "Reversing message order (newest first)..."
98 find "$HTML_DIR" -name "*.html" -exec python3 -c '
99import sys
100from pathlib import Path
101from html.parser import HTMLParser
102
103class MessageReverser(HTMLParser):
104 def __init__(self):
105 super().__init__()
106 self.output = []
107 self.in_messages = False
108 self.messages_depth = 0
109 self.message_containers = []
110 self.current_container = []
111 self.in_container = False
112 self.container_depth = 0
113
114 def handle_starttag(self, tag, attrs):
115 attrs_dict = dict(attrs)
116 class_attr = attrs_dict.get("class", "")
117
118 raw = self.get_starttag_text()
119
120 if tag == "div" and "messages" in class_attr.split():
121 self.in_messages = True
122 self.messages_depth = 1
123 self.output.append(raw)
124 return
125
126 if self.in_messages and not self.in_container:
127 if tag == "div" and "message-container" in class_attr:
128 self.in_container = True
129 self.container_depth = 1
130 self.current_container = [raw]
131 return
132 elif tag == "div":
133 self.messages_depth += 1
134
135 if self.in_container:
136 self.current_container.append(raw)
137 if tag == "div":
138 self.container_depth += 1
139 else:
140 self.output.append(raw)
141
142 def handle_endtag(self, tag):
143 raw = f"</{tag}>"
144
145 if self.in_container:
146 if tag == "div":
147 self.container_depth -= 1
148 self.current_container.append(raw)
149 if self.container_depth == 0:
150 self.message_containers.append("".join(self.current_container))
151 self.current_container = []
152 self.in_container = False
153 elif self.in_messages:
154 if tag == "div":
155 self.messages_depth -= 1
156 if self.messages_depth == 0:
157 # Flush reversed containers
158 for container in reversed(self.message_containers):
159 self.output.append(container)
160 self.message_containers = []
161 self.in_messages = False
162 self.output.append(raw)
163 else:
164 self.output.append(raw)
165
166 def handle_data(self, data):
167 if self.in_container:
168 self.current_container.append(data)
169 else:
170 self.output.append(data)
171
172 def handle_entityref(self, name):
173 raw = f"&{name};"
174 if self.in_container:
175 self.current_container.append(raw)
176 else:
177 self.output.append(raw)
178
179 def handle_charref(self, name):
180 raw = f"&#{name};"
181 if self.in_container:
182 self.current_container.append(raw)
183 else:
184 self.output.append(raw)
185
186 def handle_comment(self, data):
187 raw = f"<!--{data}-->"
188 if self.in_container:
189 self.current_container.append(raw)
190 else:
191 self.output.append(raw)
192
193 def handle_decl(self, decl):
194 self.output.append(f"<!{decl}>")
195
196 def get_result(self):
197 return "".join(self.output)
198
199path = Path(sys.argv[1])
200content = path.read_text()
201parser = MessageReverser()
202parser.feed(content)
203path.write_text(parser.get_result())
204' {} \;
205
206 log "HTML generated at: $HTML_DIR"
207}
208
209serve_html() {
210 local port="${1:-8080}"
211 log "Starting server at http://localhost:$port"
212 python3 -m http.server -d "$HTML_DIR" "$port"
213}
214
215# --- Main ---
216
217main() {
218 local cmd="${1:-archive}"
219
220 case "$cmd" in
221 archive)
222 check_auth
223 setup_dirs
224 refresh_channel_list
225 run_archive
226 convert_to_export
227 generate_html "$EXPORT_FILE"
228 log "Done! View at: $HTML_DIR/index.html"
229 ;;
230 channels)
231 check_auth
232 setup_dirs
233 FORCE_REFRESH=true refresh_channel_list
234 ;;
235 html)
236 local latest_export
237 latest_export=$(find "$EXPORT_DIR" -maxdepth 1 -name "*.zip" -type f -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | cut -d' ' -f2-)
238 if [[ -z "$latest_export" ]]; then
239 die "No export found. Run '$0 archive' first."
240 fi
241 generate_html "$latest_export"
242 ;;
243 serve)
244 serve_html "${2:-8080}"
245 ;;
246 help|--help|-h)
247 echo "Usage: $0 [command]"
248 echo ""
249 echo "Commands:"
250 echo " archive Fetch channels, archive, convert, generate HTML (default)"
251 echo " channels Refresh channel list only"
252 echo " html Regenerate HTML from latest export"
253 echo " serve Start local HTTP server (port 8080 or specify)"
254 echo " help Show this help"
255 echo ""
256 echo "Environment:"
257 echo " SLACK_ARCHIVE_DIR Data directory (default: /var/lib/slack-archive)"
258 ;;
259 *)
260 die "Unknown command: $cmd. Run '$0 help' for usage."
261 ;;
262 esac
263}
264
265main "$@"