main
  1#!/usr/bin/env bash
  2# Slack Public Channels Archive Script
  3# Archives public channels incrementally and generates static HTML viewer
  4set -euo pipefail
  5
  6# --- Configuration ---
  7DATA_DIR="${SLACK_ARCHIVE_DIR:-/var/lib/slack-archive}"
  8ARCHIVE_DIR="$DATA_DIR/archive"
  9EXPORT_DIR="$DATA_DIR/exports"
 10HTML_DIR="${SLACK_ARCHIVE_HTML_DIR:-$DATA_DIR/html}"
 11CHANNELS_FILE="$DATA_DIR/public-channels.txt"
 12CHANNELS_JSON="$DATA_DIR/channels.json"
 13
 14# How often to refresh channel list (in days)
 15CHANNEL_REFRESH_DAYS="${CHANNEL_REFRESH_DAYS:-7}"
 16
 17# --- Functions ---
 18
 19log() {
 20    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
 21}
 22
 23die() {
 24    log "ERROR: $*" >&2
 25    exit 1
 26}
 27
 28check_auth() {
 29    # Check for cached credentials
 30    local cache_dir="${XDG_CACHE_HOME:-$HOME/.cache}/slackdump"
 31    if [[ -f "$cache_dir/provider.bin" ]]; then
 32        log "Using cached credentials from $cache_dir"
 33        return 0
 34    fi
 35
 36    die "No cached credentials found. Run 'slackdump login' interactively first."
 37}
 38
 39setup_dirs() {
 40    mkdir -p "$DATA_DIR" "$EXPORT_DIR" "$HTML_DIR"
 41}
 42
 43refresh_channel_list() {
 44    local should_refresh=false
 45
 46    if [[ ! -f "$CHANNELS_FILE" ]]; then
 47        log "Channel list not found, fetching..."
 48        should_refresh=true
 49    elif [[ -n "$(find "$CHANNELS_FILE" -mtime +"$CHANNEL_REFRESH_DAYS" 2>/dev/null)" ]]; then
 50        log "Channel list older than $CHANNEL_REFRESH_DAYS days, refreshing..."
 51        should_refresh=true
 52    fi
 53
 54    if [[ "$should_refresh" == "true" ]] || [[ "${FORCE_REFRESH:-}" == "true" ]]; then
 55        log "Fetching channel list from Slack..."
 56        slackdump list channels -format JSON -no-json > "$CHANNELS_JSON"
 57
 58        # Filter to public channels only (exclude private, DMs, group DMs)
 59        jq -r '.[] | select(.is_private == false and .is_im == false and .is_mpim == false) | .id' \
 60            "$CHANNELS_JSON" > "$CHANNELS_FILE"
 61
 62        local count
 63        count=$(wc -l < "$CHANNELS_FILE")
 64        log "Found $count public channels"
 65    else
 66        log "Using cached channel list ($(wc -l < "$CHANNELS_FILE") channels)"
 67    fi
 68}
 69
 70run_archive() {
 71    if [[ -d "$ARCHIVE_DIR" ]]; then
 72        log "Resuming archive from previous state..."
 73        slackdump resume "$ARCHIVE_DIR"
 74    else
 75        log "Starting fresh archive..."
 76        slackdump archive -o "$ARCHIVE_DIR" @"$CHANNELS_FILE"
 77    fi
 78}
 79
 80convert_to_export() {
 81    EXPORT_FILE="$EXPORT_DIR/slack-export-$(date +%Y-%m-%d).zip"
 82    log "Converting archive to export format: $EXPORT_FILE"
 83    slackdump convert -f export -storage standard -o "$EXPORT_FILE" "$ARCHIVE_DIR"
 84}
 85
 86generate_html() {
 87    local export_file="$1"
 88    log "Generating HTML viewer..."
 89    uvx slack-export-viewer -z "$export_file" --html-only -o "$HTML_DIR"
 90
 91    # Fix empty anchor tags (slack-export-viewer bug)
 92    # Converts <a href='URL'></a> to <a href='URL'>URL</a>
 93    log "Fixing empty links..."
 94    find "$HTML_DIR" -name "*.html" -exec sed -i -E "s|<a href='([^']+)'></a>|<a href='\\1'>\\1</a>|g" {} \;
 95
 96    # Reverse message order (newest first)
 97    log "Reversing message order (newest first)..."
 98    find "$HTML_DIR" -name "*.html" -exec python3 -c '
 99import sys
100from pathlib import Path
101from html.parser import HTMLParser
102
103class MessageReverser(HTMLParser):
104    def __init__(self):
105        super().__init__()
106        self.output = []
107        self.in_messages = False
108        self.messages_depth = 0
109        self.message_containers = []
110        self.current_container = []
111        self.in_container = False
112        self.container_depth = 0
113
114    def handle_starttag(self, tag, attrs):
115        attrs_dict = dict(attrs)
116        class_attr = attrs_dict.get("class", "")
117
118        raw = self.get_starttag_text()
119
120        if tag == "div" and "messages" in class_attr.split():
121            self.in_messages = True
122            self.messages_depth = 1
123            self.output.append(raw)
124            return
125
126        if self.in_messages and not self.in_container:
127            if tag == "div" and "message-container" in class_attr:
128                self.in_container = True
129                self.container_depth = 1
130                self.current_container = [raw]
131                return
132            elif tag == "div":
133                self.messages_depth += 1
134
135        if self.in_container:
136            self.current_container.append(raw)
137            if tag == "div":
138                self.container_depth += 1
139        else:
140            self.output.append(raw)
141
142    def handle_endtag(self, tag):
143        raw = f"</{tag}>"
144
145        if self.in_container:
146            if tag == "div":
147                self.container_depth -= 1
148            self.current_container.append(raw)
149            if self.container_depth == 0:
150                self.message_containers.append("".join(self.current_container))
151                self.current_container = []
152                self.in_container = False
153        elif self.in_messages:
154            if tag == "div":
155                self.messages_depth -= 1
156            if self.messages_depth == 0:
157                # Flush reversed containers
158                for container in reversed(self.message_containers):
159                    self.output.append(container)
160                self.message_containers = []
161                self.in_messages = False
162            self.output.append(raw)
163        else:
164            self.output.append(raw)
165
166    def handle_data(self, data):
167        if self.in_container:
168            self.current_container.append(data)
169        else:
170            self.output.append(data)
171
172    def handle_entityref(self, name):
173        raw = f"&{name};"
174        if self.in_container:
175            self.current_container.append(raw)
176        else:
177            self.output.append(raw)
178
179    def handle_charref(self, name):
180        raw = f"&#{name};"
181        if self.in_container:
182            self.current_container.append(raw)
183        else:
184            self.output.append(raw)
185
186    def handle_comment(self, data):
187        raw = f"<!--{data}-->"
188        if self.in_container:
189            self.current_container.append(raw)
190        else:
191            self.output.append(raw)
192
193    def handle_decl(self, decl):
194        self.output.append(f"<!{decl}>")
195
196    def get_result(self):
197        return "".join(self.output)
198
199path = Path(sys.argv[1])
200content = path.read_text()
201parser = MessageReverser()
202parser.feed(content)
203path.write_text(parser.get_result())
204' {} \;
205
206    log "HTML generated at: $HTML_DIR"
207}
208
209serve_html() {
210    local port="${1:-8080}"
211    log "Starting server at http://localhost:$port"
212    python3 -m http.server -d "$HTML_DIR" "$port"
213}
214
215# --- Main ---
216
217main() {
218    local cmd="${1:-archive}"
219
220    case "$cmd" in
221        archive)
222            check_auth
223            setup_dirs
224            refresh_channel_list
225            run_archive
226            convert_to_export
227            generate_html "$EXPORT_FILE"
228            log "Done! View at: $HTML_DIR/index.html"
229            ;;
230        channels)
231            check_auth
232            setup_dirs
233            FORCE_REFRESH=true refresh_channel_list
234            ;;
235        html)
236            local latest_export
237            latest_export=$(find "$EXPORT_DIR" -maxdepth 1 -name "*.zip" -type f -printf '%T@ %p\n' 2>/dev/null | sort -rn | head -1 | cut -d' ' -f2-)
238            if [[ -z "$latest_export" ]]; then
239                die "No export found. Run '$0 archive' first."
240            fi
241            generate_html "$latest_export"
242            ;;
243        serve)
244            serve_html "${2:-8080}"
245            ;;
246        help|--help|-h)
247            echo "Usage: $0 [command]"
248            echo ""
249            echo "Commands:"
250            echo "  archive   Fetch channels, archive, convert, generate HTML (default)"
251            echo "  channels  Refresh channel list only"
252            echo "  html      Regenerate HTML from latest export"
253            echo "  serve     Start local HTTP server (port 8080 or specify)"
254            echo "  help      Show this help"
255            echo ""
256            echo "Environment:"
257            echo "  SLACK_ARCHIVE_DIR   Data directory (default: /var/lib/slack-archive)"
258            ;;
259        *)
260            die "Unknown command: $cmd. Run '$0 help' for usage."
261            ;;
262    esac
263}
264
265main "$@"