Commit 9eb67e685b5d

Vincent Demeester <vincent@sbr.pm>
2024-06-17 16:31:33
go-org-readwise: almost "full" implementation.
Signed-off-by: Vincent Demeester <vincent@sbr.pm>
1 parent 9064c33
Changed files (6)
tools/go-org-readwise/internal/org/org.go
@@ -1,119 +1,38 @@
 package org
 
-import (
-	"context"
-	"errors"
-	"fmt"
-	"os"
-	"regexp"
-	"strings"
-
-	"github.com/vdemeester/home/tools/go-org-readwise/internal/readwise"
-)
-
-const (
-	// denote-id-format "%Y%m%dT%H%M%S"
-	denoteDateFormat = "20060102T150405"
-	// punctionation that is removed from file names.
-	denoteExcludedPunctuationRegexpStr = "[][{}!@#$%^&*()=+'\"?,.|;:~`‘’“”/]*"
-)
-
-var (
-	denoteExcludedPunctuationRegexp = regexp.MustCompile(denoteExcludedPunctuationRegexpStr)
-	replaceHypensRegexp             = regexp.MustCompile("[-]+")
-)
-
-/*
-For each results:
-- Define a filename (denote naming — gonna be weird but meh) — from title + first highlight date
-- Detect if the file exists
-- If the file doesn't exist, create the file
-- If the file exist, append
-
-For the file format: org file with denote naming
-And use the update date to add new highlights
-*/
-
-func Sync(ctx context.Context, target string, results []readwise.Result) error {
-	for _, result := range results {
-		// FIXME: handle the case where tags where added after
-		// a sync. In that case, we want to try different
-		// titles (without tags, …) ; most likely we want to
-		// use a regexp to "detect" part of the thing.
-		filename := denoteFilename(result)
-		fmt.Println("file", filename)
-		if _, err := os.Stat(filename); err == nil {
-			// Append to the file
-			return errors.New("Not implemented")
-		} else if errors.Is(err, os.ErrNotExist) {
-			// Create the file
-		} else {
-			// Schrodinger: file may or may not exist. See err for details.
-			// Therefore, do *NOT* use !os.IsNotExist(err) to test for file existence
-			return err
-		}
-	}
-	return nil
+// Document is a "full" org-mode document, used for a new "readwise
+// document" containing highlights. The "full" notion here being, what
+// I need to sync from readwise to org, not the full representation of
+// a org file.
+type Document struct {
+	Title       string
+	Author      string
+	Email       string
+	Date        string
+	FileTags    []string
+	Identifier  string
+	Category    string
+	URL         string
+	ReadwiseURL string
+	Summary     string
+	Highlights  []Highlight
 }
 
-// See https://protesilaos.com/emacs/denote#h:4e9c7512-84dc-4dfb-9fa9-e15d51178e5d
-// DATE==SIGNATURE--TITLE__KEYWORDS.EXTENSION
-// Examples:
-// - 20240611T100401--tuesday-11-june-2024__journal.org
-// - 20240511T100401==readwise--foo__bar_baz.org
-func denoteFilename(result readwise.Result) string {
-	var date, signature, title, keywords string
-	// The DATE field represents the date in year-month-day format
-	// followed by the capital letter T (for “time”) and the
-	// current time in hour-minute-second notation. The
-	// presentation is compact: 20220531T091625. The DATE serves
-	// as the unique identifier of each note and, as such, is also
-	// known as the file’s ID or identifier.
-	date = result.FirstHighlightDate().Format(denoteDateFormat)
-
-	// File names can include a string of alphanumeric characters
-	// in the SIGNATURE field. Signatures have no clearly defined
-	// purpose and are up to the user to define. One use-case is
-	// to use them to establish sequential relations between files
-	// (e.g. 1, 1a, 1b, 1b1, 1b2, …).
-	// We use signature to mark files synced from readwise.
-	signature = "==readwise"
-
-	// The TITLE field is the title of the note, as provided by
-	// the user. It automatically gets downcased by default and is
-	// also hyphenated (Sluggification of file name
-	// components). An entry about “Economics in the Euro Area”
-	// produces an economics-in-the-euro-area string for the TITLE
-	// of the file name.
-	title = sluggify(result.Title)
-
-	// The KEYWORDS field consists of one or more entries
-	// demarcated by an underscore (the separator is inserted
-	// automatically). Each keyword is a string provided by the
-	// user at the relevant prompt which broadly describes the
-	// contents of the entry.
-	if len(result.BookTags) > 0 {
-		tags := make([]string, len(result.BookTags))
-		for i, t := range result.BookTags {
-			tags[i] = sluggify(t.Name)
-		}
-		keywords = "__" + strings.Join(tags, "_")
-	}
-
-	return fmt.Sprintf("%s%s--%s%s.org", date, signature, title, keywords)
+// PartialDocument is a subset of org-mode used for an update of a
+// "readwise document", thus containing new highlights.
+type PartialDocument struct {
+	Date       string
+	Highlights []Highlight
 }
 
-func sluggify(s string) string {
-	// Remove punctuation
-	s = denoteExcludedPunctuationRegexp.ReplaceAllString(s, "")
-	// Replace spaces with hypens
-	s = strings.ReplaceAll(s, " ", "-")
-	// Replace underscore with hypens
-	s = strings.ReplaceAll(s, "_", "-")
-	// Replace multiple hypens with a single one
-	s = replaceHypensRegexp.ReplaceAllString(s, "-")
-	// Remove any leading and trailing hypen
-	s = strings.TrimPrefix(s, "-")
-	s = strings.TrimSuffix(s, "-")
-	return s
+// Highlight represent a readwise highlight in org-mode.
+type Highlight struct {
+	ID           string
+	URL          string
+	Location     string
+	LocationType string
+	Date         string
+	Note         string
+	Text         string
+	Tags         []string
 }
tools/go-org-readwise/internal/org/sync.go
@@ -0,0 +1,176 @@
+package org
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+
+	"github.com/vdemeester/home/tools/go-org-readwise/internal/readwise"
+)
+
+const (
+	// denote-id-format "%Y%m%dT%H%M%S"
+	denoteDateFormat = "20060102T150405"
+	// org-date-format 2024-06-17 Mon 12:05
+	orgDateFormat = "2006-01-02 Mon 15:04"
+	// punctionation that is removed from file names.
+	denoteExcludedPunctuationRegexpStr = "[][{}!@#$%^&*()=+'\"?,.|;:~`‘’“”/]*"
+)
+
+var (
+	denoteExcludedPunctuationRegexp = regexp.MustCompile(denoteExcludedPunctuationRegexpStr)
+	replaceHypensRegexp             = regexp.MustCompile("[-]+")
+)
+
+/*
+For each results:
+- Define a filename (denote naming — gonna be weird but meh) — from title + first highlight date
+- Detect if the file exists
+- If the file doesn't exist, create the file
+- If the file exist, append
+
+For the file format: org file with denote naming
+And use the update date to add new highlights
+*/
+
+func Sync(ctx context.Context, target string, results []readwise.Result) error {
+	for _, result := range results {
+		// FIXME: handle the case where tags where added after
+		// a sync. In that case, we want to try different
+		// titles (without tags, …) ; most likely we want to
+		// use a regexp to "detect" part of the thing.
+		denotefilename := denoteFilename(result)
+		filename := filepath.Join(target, denotefilename)
+		fmt.Println("file", filename)
+		if _, err := os.Stat(filename); err == nil {
+			// Append to the file
+			return errors.New("Not implemented")
+		} else if errors.Is(err, os.ErrNotExist) {
+			// Create the file
+			d := createNewOrgDocument(result)
+			content, err := convertDocument(d)
+			if err != nil {
+				return err
+			}
+			if err := os.WriteFile(filename, content, 0o644); err != nil {
+				return err
+			}
+		} else {
+			// Schrodinger: file may or may not exist. See err for details.
+			// Therefore, do *NOT* use !os.IsNotExist(err) to test for file existence
+			return err
+		}
+	}
+	return nil
+}
+
+func createNewOrgDocument(r readwise.Result) Document {
+	var filetags []string
+	if len(r.BookTags) > 0 {
+		filetags = make([]string, len(r.BookTags)+1)
+		for i, t := range r.BookTags {
+			filetags[i] = sluggify(t.Name)
+		}
+	}
+	d := Document{
+		Title:       r.Title,
+		Author:      r.Author,
+		ReadwiseURL: r.ReadwiseURL,
+		URL:         r.SourceURL,
+		Email:       "", // Figure out how to get the email
+		Date:        r.FirstHighlightDate().Format(orgDateFormat),
+		Identifier:  r.FirstHighlightDate().Format(denoteDateFormat),
+		FileTags:    filetags,
+		Category:    r.Category,
+		Summary:     r.Summary,
+		Highlights:  transformHighlights(r.Highlights),
+	}
+	return d
+}
+
+func transformHighlights(highlights []readwise.Highlight) []Highlight {
+	orgHighlights := make([]Highlight, len(highlights))
+	for i, h := range highlights {
+		var tags []string
+		if len(h.Tags) > 0 {
+			tags = make([]string, len(h.Tags))
+			for i, t := range h.Tags {
+				tags[i] = sluggify(t.Name)
+			}
+		}
+		orgHighlights[i] = Highlight{
+			ID:   fmt.Sprintf("%d", h.ID),
+			URL:  h.ReadwiseURL,
+			Date: h.HighlightedAt.Format(orgDateFormat),
+			Note: h.Note,
+			Text: h.Text,
+		}
+	}
+	return orgHighlights
+}
+
+// See https://protesilaos.com/emacs/denote#h:4e9c7512-84dc-4dfb-9fa9-e15d51178e5d
+// DATE==SIGNATURE--TITLE__KEYWORDS.EXTENSION
+// Examples:
+// - 20240611T100401--tuesday-11-june-2024__journal.org
+// - 20240511T100401==readwise--foo__bar_baz.org
+func denoteFilename(result readwise.Result) string {
+	var date, signature, title, keywords string
+	// The DATE field represents the date in year-month-day format
+	// followed by the capital letter T (for “time”) and the
+	// current time in hour-minute-second notation. The
+	// presentation is compact: 20220531T091625. The DATE serves
+	// as the unique identifier of each note and, as such, is also
+	// known as the file’s ID or identifier.
+	date = result.FirstHighlightDate().Format(denoteDateFormat)
+
+	// File names can include a string of alphanumeric characters
+	// in the SIGNATURE field. Signatures have no clearly defined
+	// purpose and are up to the user to define. One use-case is
+	// to use them to establish sequential relations between files
+	// (e.g. 1, 1a, 1b, 1b1, 1b2, …).
+	// We use signature to mark files synced from readwise.
+	signature = "==" + result.Category
+
+	// The TITLE field is the title of the note, as provided by
+	// the user. It automatically gets downcased by default and is
+	// also hyphenated (Sluggification of file name
+	// components). An entry about “Economics in the Euro Area”
+	// produces an economics-in-the-euro-area string for the TITLE
+	// of the file name.
+	title = sluggify(result.Title)
+
+	// The KEYWORDS field consists of one or more entries
+	// demarcated by an underscore (the separator is inserted
+	// automatically). Each keyword is a string provided by the
+	// user at the relevant prompt which broadly describes the
+	// contents of the entry.
+	if len(result.BookTags) > 0 {
+		tags := make([]string, len(result.BookTags))
+		for i, t := range result.BookTags {
+			tags[i] = sluggify(t.Name)
+		}
+		keywords = "__" + strings.Join(tags, "_")
+	}
+
+	return strings.ToLower(fmt.Sprintf("%s%s--%s%s.org", date, signature, title, keywords))
+}
+
+func sluggify(s string) string {
+	// Remove punctuation
+	s = denoteExcludedPunctuationRegexp.ReplaceAllString(s, "")
+	// Replace spaces with hypens
+	s = strings.ReplaceAll(s, " ", "-")
+	// Replace underscore with hypens
+	s = strings.ReplaceAll(s, "_", "-")
+	// Replace multiple hypens with a single one
+	s = replaceHypensRegexp.ReplaceAllString(s, "-")
+	// Remove any leading and trailing hypen
+	s = strings.TrimPrefix(s, "-")
+	s = strings.TrimSuffix(s, "-")
+	return s
+}
tools/go-org-readwise/internal/org/org_test.go → tools/go-org-readwise/internal/org/sync_test.go
File renamed without changes
tools/go-org-readwise/internal/org/write.go
@@ -0,0 +1,60 @@
+package org
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"text/template"
+)
+
+var (
+	additionnalTemplate = `* New Highlights on {{ .Date }}
+{{ range .Highlights }}
+{{ end }}`
+
+	mainTemplate = `#+title: {{ .Title }}
+#+author: {{ .Author }}
+#+date: {{ .Date }}
+#+identifier: {{ .Identifier }}
+#+category: {{ .Category }}
+{{ if .FileTags }}#+filetags: {{ orgtags .FileTags }}{{ end }}
+#+property: READWISE_URL: {{ .ReadwiseURL }}
+{{ if .URL }}#+property: URL: {{ .URL }}{{ end }}
+
+{{ .Summary }}
+
+* Highlights
+{{ range $h := .Highlights -}}
+** [{{ $h.Date }}] Highlight [[{{ $h.URL }}][{{ $h.ID }}]]{{ if $h.Tags }} {{ orgtags $h.Tags }}{{ end }}
+{{ $h.Text }}
+{{ if $h.Note }}*** Note
+{{ $h.Note }}
+{{ end -}}
+{{ end -}}
+`
+)
+
+func orgtags(tags []string) string {
+	if len(tags) == 0 {
+		return ""
+	}
+	return fmt.Sprintf(":%s:", strings.Join(tags, ":"))
+}
+
+func convertDocument(d Document) ([]byte, error) {
+	var err error
+
+	funcMap := template.FuncMap{
+		"orgtags": orgtags,
+	}
+
+	tmpl, err := template.New("org").Funcs(funcMap).Parse(mainTemplate)
+	if err != nil {
+		return []byte{}, err
+	}
+	var buff bytes.Buffer
+	if err := tmpl.Execute(&buff, d); err != nil {
+		return []byte{}, err
+	}
+	return buff.Bytes(), nil
+}
tools/go-org-readwise/internal/readwise/types.go
@@ -42,8 +42,7 @@ type Highlight struct {
 	Text          string    `json:"text"`
 	ID            int       `json:"id"`
 	Note          string    `json:"note"`
-	Location      int       `json:"location"`
-	LocationType  string    `json:"location_type"`
+	ReadwiseURL   string    `json:"readwise_url"`
 	HighlightedAt time.Time `json:"highlighted_at"`
 	BookID        int       `json:"book_id"`
 	URL           string    `json:"url"`
tools/go-org-readwise/main.go
@@ -14,11 +14,6 @@ import (
 )
 
 func main() {
-	// for _, n := range d.Nodes {
-	// 	fmt.Printf("%+v\n", n)
-	// }
-
-	os.Exit(1)
 	apiKeyFile := flag.String("apiKeyFile", "", "File to load the apiKey from. If empty, it will defer to the READWISE_KEY environment variable")
 	targetFolder := flag.String("targetFolder", "", "Folder to write highlights (in org file) into")
 	flag.Parse()