jorge/markup/smartify.go
facundo 4ef95bf910
Some checks failed
Test project / build (push) Has been cancelled
fix smartify repalcement of quoted questions/exclamations
2024-09-24 11:30:52 -03:00

82 lines
2.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package markup
// Implements a naive version of smart quote replacement, see https://daringfireball.net/projects/smartypants/
// The quote replacement code was adapted from gojekyll's smartify filter to work on entire HTML documents
// https://github.com/osteele/gojekyll/blob/f1794a874890bfb601cae767a0cce15d672e9058/filters/smartify.go
// MIT License: https://github.com/osteele/gojekyll/blob/f1794a874890bfb601cae767a0cce15d672e9058/LICENSE
import (
"bytes"
"io"
"regexp"
"slices"
"strings"
"golang.org/x/net/html"
)
var SKIP_TAGS = []string{"pre", "code", "kbd", "script", "math"}
func Smartify(extension string, contentReader io.Reader) (io.Reader, error) {
if extension != ".html" {
return contentReader, nil
}
node, err := html.Parse(contentReader)
if err != nil {
return nil, err
}
smartifyHTMLNode(node)
var buf bytes.Buffer
html.Render(&buf, node)
return &buf, nil
}
func smartifyHTMLNode(node *html.Node) {
for node := node.FirstChild; node != nil; node = node.NextSibling {
if node.Type == html.ElementNode && slices.Contains(SKIP_TAGS, node.Data) {
continue
}
if node.Type == html.TextNode {
node.Data = smartifyString(node.Data)
} else {
smartifyHTMLNode(node)
}
}
}
var smartifyTransforms = []struct {
match *regexp.Regexp
repl string
}{
{regexp.MustCompile("(^|[^[:alnum:]])``(.+?)''"), "$1“$2”"},
{regexp.MustCompile(`(^|[^[:alnum:]])'`), "$1"},
{regexp.MustCompile(`'`), ""},
{regexp.MustCompile(`(^|[^[:alnum:]?!])"`), "$1“"},
{regexp.MustCompile(`"($|[^[:alnum:]])`), "”$1"},
{regexp.MustCompile(`\\[]`), "'"}, // undo backslashed replacements
{regexp.MustCompile(`\\[“”]`), `"`}, // undo backslashed replacements
}
var smartifyReplacer *strings.Replacer
func init() {
smartifyReplacer = strings.NewReplacer(
"...", "…",
"(c)", "©",
"(r)", "®",
"(tm)", "™",
// moving the dashes to straight replacements instead of regex (which weren't accurate)
// NOTE: go-org already does dash and ellipsis replacement, remove altogether if it causes issues
"---", "—",
"--", "",
)
}
func smartifyString(s string) string {
for _, rule := range smartifyTransforms {
s = rule.match.ReplaceAllString(s, rule.repl)
}
return smartifyReplacer.Replace(s)
}