jorge/markup/smartify.go
Facundo Olano adb17ad9d2
run linter in github actions (#20)
* run linter in github actions

* try fixing go mod

* no install go

* again

* maybe this?

* separate install

* no v

* fix lint errors
2024-02-29 19:34:33 -03:00

80 lines
2.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package markup
// Implements a naive version of smart quote replacement, see https://daringfireball.net/projects/smartypants/
// The quote replacement code was adapted from gojekyll's smartify filter to work on entire HTML documents
// https://github.com/osteele/gojekyll/blob/f1794a874890bfb601cae767a0cce15d672e9058/filters/smartify.go
// MIT License: https://github.com/osteele/gojekyll/blob/f1794a874890bfb601cae767a0cce15d672e9058/LICENSE
import (
"bytes"
"io"
"regexp"
"slices"
"strings"
"golang.org/x/net/html"
)
var SKIP_TAGS = []string{"pre", "code", "kbd", "script", "math"}
func Smartify(extension string, contentReader io.Reader) (io.Reader, error) {
if extension != ".html" {
return contentReader, nil
}
node, err := html.Parse(contentReader)
if err != nil {
return nil, err
}
smartifyHTMLNode(node)
var buf bytes.Buffer
html.Render(&buf, node)
return &buf, nil
}
func smartifyHTMLNode(node *html.Node) {
for node := node.FirstChild; node != nil; node = node.NextSibling {
if node.Type == html.ElementNode && slices.Contains(SKIP_TAGS, node.Data) {
continue
}
if node.Type == html.TextNode {
node.Data = smartifyString(node.Data)
} else {
smartifyHTMLNode(node)
}
}
}
var smartifyTransforms = []struct {
match *regexp.Regexp
repl string
}{
{regexp.MustCompile("(^|[^[:alnum:]])``(.+?)''"), "$1“$2”"},
{regexp.MustCompile(`(^|[^[:alnum:]])'`), "$1"},
{regexp.MustCompile(`'`), ""},
{regexp.MustCompile(`(^|[^[:alnum:]])"`), "$1“"},
{regexp.MustCompile(`"($|[^[:alnum:]])`), "”$1"},
}
var smartifyReplacer *strings.Replacer
func init() {
smartifyReplacer = strings.NewReplacer(
"...", "…",
"(c)", "©",
"(r)", "®",
"(tm)", "™",
// moving the dashes to straight replacements instead of regex (which weren't accurate)
// NOTE: go-org already does dash and ellipsis replacement, remove altogether if it causes issues
"---", "—",
"--", "",
)
}
func smartifyString(s string) string {
for _, rule := range smartifyTransforms {
s = rule.match.ReplaceAllString(s, rule.repl)
}
return smartifyReplacer.Replace(s)
}