flake-update-20260201
  1package sources
  2
  3import (
  4	"bufio"
  5	"context"
  6	"os"
  7	"path/filepath"
  8	"regexp"
  9	"strings"
 10	"time"
 11
 12	"github.com/vdemeester/home/tools/review-tool/internal/activity"
 13	"github.com/vdemeester/home/tools/review-tool/internal/config"
 14)
 15
 16var (
 17	// ** DONE [#2] Some task title
 18	orgHeadingRe = regexp.MustCompile(`^(\*+)\s+(TODO|DONE|STRT|NEXT|WAIT|CANX|CANCELED|KILL)\s+(?:\[#\d\]\s+)?(.+)$`)
 19	// * Section heading (level 1, no TODO state)
 20	sectionHeadingRe = regexp.MustCompile(`^\*\s+([^*].*)$`)
 21	// - State "DONE"       from "TODO"       [2026-01-25 Sat 15:30]
 22	stateChangeRe = regexp.MustCompile(`^-\s+State\s+"(\w+)"\s+from\s+"(\w+)"\s+\[(\d{4}-\d{2}-\d{2}\s+\w{3}\s+\d{2}:\d{2})\]`)
 23	// CLOCK: [2026-01-22 Wed 09:00]--[2026-01-22 Wed 11:15] =>  2:15
 24	clockEntryRe = regexp.MustCompile(`CLOCK:\s+\[([^\]]+)\]--\[([^\]]+)\]\s+=>\s+(\d+:\d+)`)
 25	// Org-mode links: [[url][description]] or [[url]]
 26	orgLinkWithDescRe = regexp.MustCompile(`\[\[([^\]]+)\]\[([^\]]+)\]\]`)
 27	orgLinkBareRe     = regexp.MustCompile(`\[\[([^\]]+)\]\]`)
 28	// Org-mode tags at end of heading: :tag1:tag2:
 29	orgTagsRe = regexp.MustCompile(`\s+:[A-Za-z0-9_@#%:]+:\s*$`)
 30)
 31
 32// OrgSource fetches activity from org-mode files.
 33type OrgSource struct {
 34	cfg *config.OrgConfig
 35}
 36
 37// NewOrgSource creates a new org-mode source.
 38func NewOrgSource(cfg *config.OrgConfig) *OrgSource {
 39	return &OrgSource{cfg: cfg}
 40}
 41
 42// Name returns the source identifier.
 43func (o *OrgSource) Name() string {
 44	return "org"
 45}
 46
 47// Validate checks if org files are accessible.
 48func (o *OrgSource) Validate() error {
 49	for _, f := range o.cfg.Files {
 50		if _, err := os.Stat(f); err != nil {
 51			return err
 52		}
 53	}
 54	return nil
 55}
 56
 57// Fetch retrieves org-mode activities within the time range.
 58func (o *OrgSource) Fetch(ctx context.Context, start, end time.Time) (*activity.Activity, error) {
 59	act := &activity.Activity{
 60		Source: "org",
 61		Items:  []activity.ActivityItem{},
 62	}
 63
 64	// Parse configured files
 65	for _, filePath := range o.cfg.Files {
 66		items, err := o.parseOrgFile(filePath, start, end)
 67		if err != nil {
 68			continue // Skip files that fail to parse
 69		}
 70		act.Items = append(act.Items, items...)
 71	}
 72
 73	// Parse archive directory if configured
 74	if o.cfg.ArchiveDir != "" {
 75		items, err := o.parseArchiveDir(o.cfg.ArchiveDir, start, end)
 76		if err == nil {
 77			act.Items = append(act.Items, items...)
 78		}
 79	}
 80
 81	return act, nil
 82}
 83
 84// parseArchiveDir scans an archive directory for org files with relevant state changes.
 85// It processes .org files and extensionless files (common for org-mode archives).
 86func (o *OrgSource) parseArchiveDir(dir string, start, end time.Time) ([]activity.ActivityItem, error) {
 87	var items []activity.ActivityItem
 88
 89	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
 90		if err != nil {
 91			return nil // Skip errors
 92		}
 93
 94		if info.IsDir() {
 95			return nil
 96		}
 97
 98		// Skip known non-org files
 99		lower := strings.ToLower(path)
100		if shouldSkipArchiveFile(lower) {
101			return nil
102		}
103
104		fileItems, err := o.parseOrgFile(path, start, end)
105		if err != nil {
106			return nil
107		}
108
109		items = append(items, fileItems...)
110		return nil
111	})
112
113	return items, err
114}
115
116// shouldSkipArchiveFile returns true for files that are definitely not org-mode files.
117func shouldSkipArchiveFile(path string) bool {
118	// Skip backup and temp files
119	if strings.HasSuffix(path, "~") || strings.HasSuffix(path, ".bak") ||
120		strings.Contains(path, "#") || strings.HasSuffix(path, ".swp") {
121		return true
122	}
123
124	// Skip known binary/non-org extensions
125	skipExtensions := []string{
126		".html", ".htm", ".pdf", ".png", ".jpg", ".jpeg", ".gif", ".svg",
127		".css", ".js", ".json", ".yaml", ".yml", ".xml",
128		".tar", ".gz", ".xz", ".zip", ".bz2",
129		".ttf", ".woff", ".woff2", ".eot",
130		".md", ".txt", ".log", ".ico", ".gpg",
131	}
132	for _, ext := range skipExtensions {
133		if strings.HasSuffix(path, ext) {
134			return true
135		}
136	}
137
138	// Skip legacy.bak directory
139	if strings.Contains(path, "legacy.bak") {
140		return true
141	}
142
143	return false
144}
145
146func (o *OrgSource) parseOrgFile(filePath string, start, end time.Time) ([]activity.ActivityItem, error) {
147	file, err := os.Open(filePath)
148	if err != nil {
149		return nil, err
150	}
151	defer file.Close()
152
153	var items []activity.ActivityItem
154	var currentHeading string
155	var currentSection string          // Reset for each file
156	seenItems := make(map[string]bool) // Dedup by heading+timestamp
157
158	scanner := bufio.NewScanner(file)
159	for scanner.Scan() {
160		line := scanner.Text()
161
162		// Track section (level-1 heading without TODO state)
163		// Must be exactly one asterisk followed by space (not ** or more)
164		if strings.HasPrefix(line, "* ") && !strings.HasPrefix(line, "** ") {
165			// Only update section if this is a plain heading (not a TODO)
166			if !orgHeadingRe.MatchString(line) {
167				// Extract section name and convert any org links
168				sectionName := strings.TrimPrefix(line, "* ")
169				sectionName = strings.TrimSpace(sectionName)
170				// Remove any tags at the end (like :ARCHIVE:)
171				if idx := strings.LastIndex(sectionName, ":"); idx > 0 {
172					// Check if this looks like a tag (ends with :)
173					if strings.HasSuffix(sectionName, ":") {
174						tagStart := strings.LastIndex(sectionName[:idx], " ")
175						if tagStart > 0 {
176							sectionName = strings.TrimSpace(sectionName[:tagStart])
177						}
178					}
179				}
180				currentSection = convertOrgLinksToMarkdown(sectionName)
181			}
182		}
183
184		// Track current heading context
185		if matches := orgHeadingRe.FindStringSubmatch(line); len(matches) > 0 {
186			currentHeading = strings.TrimSpace(matches[3])
187		}
188
189		// Parse state changes (DONE from TODO, etc.)
190		if o.cfg.IncludeDone || o.cfg.IncludeStateChanges {
191			if matches := stateChangeRe.FindStringSubmatch(line); len(matches) > 0 {
192				toState := matches[1]
193				// Only capture transitions to DONE
194				if toState == "DONE" && currentHeading != "" {
195					ts, err := parseOrgTimestamp(matches[3])
196					if err != nil {
197						continue
198					}
199
200					// Filter by date range
201					if ts.Before(start) || ts.After(end) {
202						continue
203					}
204
205					key := currentHeading + ts.Format("2006-01-02-15:04")
206					if seenItems[key] {
207						continue
208					}
209					seenItems[key] = true
210
211					title := cleanOrgTitle(currentHeading)
212					metadata := map[string]string{
213						"file":       filePath,
214						"from_state": matches[2],
215					}
216					if currentSection != "" {
217						metadata["section"] = currentSection
218					}
219
220					items = append(items, activity.ActivityItem{
221						ID:        filePath + ":" + currentHeading,
222						Title:     title,
223						Type:      "todo_done",
224						Category:  activity.CategoryOrg,
225						Timestamp: ts,
226						Metadata:  metadata,
227					})
228				}
229			}
230		}
231
232		// Parse CLOCK entries
233		if o.cfg.IncludeClockEntries {
234			if matches := clockEntryRe.FindStringSubmatch(line); len(matches) > 0 && currentHeading != "" {
235				// Parse start time
236				ts, err := parseOrgTimestamp(matches[1])
237				if err != nil {
238					continue
239				}
240
241				// Filter by date range
242				if ts.Before(start) || ts.After(end) {
243					continue
244				}
245
246				duration := matches[3]
247
248				key := "clock:" + currentHeading + ts.Format("2006-01-02-15:04")
249				if seenItems[key] {
250					continue
251				}
252				seenItems[key] = true
253
254				title := cleanOrgTitle(currentHeading)
255				metadata := map[string]string{
256					"file":     filePath,
257					"duration": duration,
258				}
259				if currentSection != "" {
260					metadata["section"] = currentSection
261				}
262
263				items = append(items, activity.ActivityItem{
264					ID:        filePath + ":clock:" + currentHeading,
265					Title:     title,
266					Type:      "clock_entry",
267					Category:  activity.CategoryOrg,
268					Timestamp: ts,
269					Metadata:  metadata,
270				})
271			}
272		}
273	}
274
275	return items, nil
276}
277
278func parseOrgTimestamp(s string) (time.Time, error) {
279	// Try various org timestamp formats
280	formats := []string{
281		"2006-01-02 Mon 15:04",
282		"2006-01-02 Mon",
283		"2006-01-02",
284	}
285
286	s = strings.TrimSpace(s)
287	for _, format := range formats {
288		if t, err := time.Parse(format, s); err == nil {
289			return t, nil
290		}
291	}
292
293	// Try with different day names
294	// Replace day name with Mon to normalize
295	s = regexp.MustCompile(`\s+(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+`).ReplaceAllString(s, " Mon ")
296	for _, format := range formats {
297		if t, err := time.Parse(format, s); err == nil {
298			return t, nil
299		}
300	}
301
302	return time.Time{}, nil
303}
304
305// convertOrgLinksToMarkdown converts org-mode links to markdown format.
306// [[url][description]] -> [description](url)
307// [[url]] -> [url](url)
308func convertOrgLinksToMarkdown(s string) string {
309	// First, replace links with descriptions: [[url][desc]] -> [desc](url)
310	s = orgLinkWithDescRe.ReplaceAllString(s, "[$2]($1)")
311
312	// Then, replace bare links: [[url]] -> [url](url)
313	s = orgLinkBareRe.ReplaceAllStringFunc(s, func(match string) string {
314		// Extract the URL from [[url]]
315		url := orgLinkBareRe.FindStringSubmatch(match)[1]
316		return "[" + url + "](" + url + ")"
317	})
318
319	return s
320}
321
322// cleanOrgTitle removes org-mode artifacts from a title.
323// - Converts org links to markdown
324// - Removes trailing tags like :ARCHIVE:, :CANX:, etc.
325func cleanOrgTitle(s string) string {
326	// Remove trailing tags
327	s = orgTagsRe.ReplaceAllString(s, "")
328	// Convert links
329	s = convertOrgLinksToMarkdown(s)
330	return strings.TrimSpace(s)
331}