auto-update-daily-20260202
1package sources
2
3import (
4 "bufio"
5 "context"
6 "os"
7 "path/filepath"
8 "regexp"
9 "strings"
10 "time"
11
12 "github.com/vdemeester/home/tools/review-tool/internal/activity"
13 "github.com/vdemeester/home/tools/review-tool/internal/config"
14)
15
16var (
17 // ** DONE [#2] Some task title
18 orgHeadingRe = regexp.MustCompile(`^(\*+)\s+(TODO|DONE|STRT|NEXT|WAIT|CANX|CANCELED|KILL)\s+(?:\[#\d\]\s+)?(.+)$`)
19 // * Section heading (level 1, no TODO state)
20 sectionHeadingRe = regexp.MustCompile(`^\*\s+([^*].*)$`)
21 // - State "DONE" from "TODO" [2026-01-25 Sat 15:30]
22 stateChangeRe = regexp.MustCompile(`^-\s+State\s+"(\w+)"\s+from\s+"(\w+)"\s+\[(\d{4}-\d{2}-\d{2}\s+\w{3}\s+\d{2}:\d{2})\]`)
23 // CLOCK: [2026-01-22 Wed 09:00]--[2026-01-22 Wed 11:15] => 2:15
24 clockEntryRe = regexp.MustCompile(`CLOCK:\s+\[([^\]]+)\]--\[([^\]]+)\]\s+=>\s+(\d+:\d+)`)
25 // Org-mode links: [[url][description]] or [[url]]
26 orgLinkWithDescRe = regexp.MustCompile(`\[\[([^\]]+)\]\[([^\]]+)\]\]`)
27 orgLinkBareRe = regexp.MustCompile(`\[\[([^\]]+)\]\]`)
28 // Org-mode tags at end of heading: :tag1:tag2:
29 orgTagsRe = regexp.MustCompile(`\s+:[A-Za-z0-9_@#%:]+:\s*$`)
30)
31
32// OrgSource fetches activity from org-mode files.
33type OrgSource struct {
34 cfg *config.OrgConfig
35}
36
37// NewOrgSource creates a new org-mode source.
38func NewOrgSource(cfg *config.OrgConfig) *OrgSource {
39 return &OrgSource{cfg: cfg}
40}
41
42// Name returns the source identifier.
43func (o *OrgSource) Name() string {
44 return "org"
45}
46
47// Validate checks if org files are accessible.
48func (o *OrgSource) Validate() error {
49 for _, f := range o.cfg.Files {
50 if _, err := os.Stat(f); err != nil {
51 return err
52 }
53 }
54 return nil
55}
56
57// Fetch retrieves org-mode activities within the time range.
58func (o *OrgSource) Fetch(ctx context.Context, start, end time.Time) (*activity.Activity, error) {
59 act := &activity.Activity{
60 Source: "org",
61 Items: []activity.ActivityItem{},
62 }
63
64 // Parse configured files
65 for _, filePath := range o.cfg.Files {
66 items, err := o.parseOrgFile(filePath, start, end)
67 if err != nil {
68 continue // Skip files that fail to parse
69 }
70 act.Items = append(act.Items, items...)
71 }
72
73 // Parse archive directory if configured
74 if o.cfg.ArchiveDir != "" {
75 items, err := o.parseArchiveDir(o.cfg.ArchiveDir, start, end)
76 if err == nil {
77 act.Items = append(act.Items, items...)
78 }
79 }
80
81 return act, nil
82}
83
84// parseArchiveDir scans an archive directory for org files with relevant state changes.
85// It processes .org files and extensionless files (common for org-mode archives).
86func (o *OrgSource) parseArchiveDir(dir string, start, end time.Time) ([]activity.ActivityItem, error) {
87 var items []activity.ActivityItem
88
89 err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
90 if err != nil {
91 return nil // Skip errors
92 }
93
94 if info.IsDir() {
95 return nil
96 }
97
98 // Skip known non-org files
99 lower := strings.ToLower(path)
100 if shouldSkipArchiveFile(lower) {
101 return nil
102 }
103
104 fileItems, err := o.parseOrgFile(path, start, end)
105 if err != nil {
106 return nil
107 }
108
109 items = append(items, fileItems...)
110 return nil
111 })
112
113 return items, err
114}
115
116// shouldSkipArchiveFile returns true for files that are definitely not org-mode files.
117func shouldSkipArchiveFile(path string) bool {
118 // Skip backup and temp files
119 if strings.HasSuffix(path, "~") || strings.HasSuffix(path, ".bak") ||
120 strings.Contains(path, "#") || strings.HasSuffix(path, ".swp") {
121 return true
122 }
123
124 // Skip known binary/non-org extensions
125 skipExtensions := []string{
126 ".html", ".htm", ".pdf", ".png", ".jpg", ".jpeg", ".gif", ".svg",
127 ".css", ".js", ".json", ".yaml", ".yml", ".xml",
128 ".tar", ".gz", ".xz", ".zip", ".bz2",
129 ".ttf", ".woff", ".woff2", ".eot",
130 ".md", ".txt", ".log", ".ico", ".gpg",
131 }
132 for _, ext := range skipExtensions {
133 if strings.HasSuffix(path, ext) {
134 return true
135 }
136 }
137
138 // Skip legacy.bak directory
139 if strings.Contains(path, "legacy.bak") {
140 return true
141 }
142
143 return false
144}
145
146func (o *OrgSource) parseOrgFile(filePath string, start, end time.Time) ([]activity.ActivityItem, error) {
147 file, err := os.Open(filePath)
148 if err != nil {
149 return nil, err
150 }
151 defer file.Close()
152
153 var items []activity.ActivityItem
154 var currentHeading string
155 var currentSection string // Reset for each file
156 seenItems := make(map[string]bool) // Dedup by heading+timestamp
157
158 scanner := bufio.NewScanner(file)
159 for scanner.Scan() {
160 line := scanner.Text()
161
162 // Track section (level-1 heading without TODO state)
163 // Must be exactly one asterisk followed by space (not ** or more)
164 if strings.HasPrefix(line, "* ") && !strings.HasPrefix(line, "** ") {
165 // Only update section if this is a plain heading (not a TODO)
166 if !orgHeadingRe.MatchString(line) {
167 // Extract section name and convert any org links
168 sectionName := strings.TrimPrefix(line, "* ")
169 sectionName = strings.TrimSpace(sectionName)
170 // Remove any tags at the end (like :ARCHIVE:)
171 if idx := strings.LastIndex(sectionName, ":"); idx > 0 {
172 // Check if this looks like a tag (ends with :)
173 if strings.HasSuffix(sectionName, ":") {
174 tagStart := strings.LastIndex(sectionName[:idx], " ")
175 if tagStart > 0 {
176 sectionName = strings.TrimSpace(sectionName[:tagStart])
177 }
178 }
179 }
180 currentSection = convertOrgLinksToMarkdown(sectionName)
181 }
182 }
183
184 // Track current heading context
185 if matches := orgHeadingRe.FindStringSubmatch(line); len(matches) > 0 {
186 currentHeading = strings.TrimSpace(matches[3])
187 }
188
189 // Parse state changes (DONE from TODO, etc.)
190 if o.cfg.IncludeDone || o.cfg.IncludeStateChanges {
191 if matches := stateChangeRe.FindStringSubmatch(line); len(matches) > 0 {
192 toState := matches[1]
193 // Only capture transitions to DONE
194 if toState == "DONE" && currentHeading != "" {
195 ts, err := parseOrgTimestamp(matches[3])
196 if err != nil {
197 continue
198 }
199
200 // Filter by date range
201 if ts.Before(start) || ts.After(end) {
202 continue
203 }
204
205 key := currentHeading + ts.Format("2006-01-02-15:04")
206 if seenItems[key] {
207 continue
208 }
209 seenItems[key] = true
210
211 title := cleanOrgTitle(currentHeading)
212 metadata := map[string]string{
213 "file": filePath,
214 "from_state": matches[2],
215 }
216 if currentSection != "" {
217 metadata["section"] = currentSection
218 }
219
220 items = append(items, activity.ActivityItem{
221 ID: filePath + ":" + currentHeading,
222 Title: title,
223 Type: "todo_done",
224 Category: activity.CategoryOrg,
225 Timestamp: ts,
226 Metadata: metadata,
227 })
228 }
229 }
230 }
231
232 // Parse CLOCK entries
233 if o.cfg.IncludeClockEntries {
234 if matches := clockEntryRe.FindStringSubmatch(line); len(matches) > 0 && currentHeading != "" {
235 // Parse start time
236 ts, err := parseOrgTimestamp(matches[1])
237 if err != nil {
238 continue
239 }
240
241 // Filter by date range
242 if ts.Before(start) || ts.After(end) {
243 continue
244 }
245
246 duration := matches[3]
247
248 key := "clock:" + currentHeading + ts.Format("2006-01-02-15:04")
249 if seenItems[key] {
250 continue
251 }
252 seenItems[key] = true
253
254 title := cleanOrgTitle(currentHeading)
255 metadata := map[string]string{
256 "file": filePath,
257 "duration": duration,
258 }
259 if currentSection != "" {
260 metadata["section"] = currentSection
261 }
262
263 items = append(items, activity.ActivityItem{
264 ID: filePath + ":clock:" + currentHeading,
265 Title: title,
266 Type: "clock_entry",
267 Category: activity.CategoryOrg,
268 Timestamp: ts,
269 Metadata: metadata,
270 })
271 }
272 }
273 }
274
275 return items, nil
276}
277
278func parseOrgTimestamp(s string) (time.Time, error) {
279 // Try various org timestamp formats
280 formats := []string{
281 "2006-01-02 Mon 15:04",
282 "2006-01-02 Mon",
283 "2006-01-02",
284 }
285
286 s = strings.TrimSpace(s)
287 for _, format := range formats {
288 if t, err := time.Parse(format, s); err == nil {
289 return t, nil
290 }
291 }
292
293 // Try with different day names
294 // Replace day name with Mon to normalize
295 s = regexp.MustCompile(`\s+(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+`).ReplaceAllString(s, " Mon ")
296 for _, format := range formats {
297 if t, err := time.Parse(format, s); err == nil {
298 return t, nil
299 }
300 }
301
302 return time.Time{}, nil
303}
304
305// convertOrgLinksToMarkdown converts org-mode links to markdown format.
306// [[url][description]] -> [description](url)
307// [[url]] -> [url](url)
308func convertOrgLinksToMarkdown(s string) string {
309 // First, replace links with descriptions: [[url][desc]] -> [desc](url)
310 s = orgLinkWithDescRe.ReplaceAllString(s, "[$2]($1)")
311
312 // Then, replace bare links: [[url]] -> [url](url)
313 s = orgLinkBareRe.ReplaceAllStringFunc(s, func(match string) string {
314 // Extract the URL from [[url]]
315 url := orgLinkBareRe.FindStringSubmatch(match)[1]
316 return "[" + url + "](" + url + ")"
317 })
318
319 return s
320}
321
322// cleanOrgTitle removes org-mode artifacts from a title.
323// - Converts org links to markdown
324// - Removes trailing tags like :ARCHIVE:, :CANX:, etc.
325func cleanOrgTitle(s string) string {
326 // Remove trailing tags
327 s = orgTagsRe.ReplaceAllString(s, "")
328 // Convert links
329 s = convertOrgLinksToMarkdown(s)
330 return strings.TrimSpace(s)
331}