Add smart quotes replacement in output html (#14)

* first basic implementation with gojekyll port

* extract smartify file

* template struct comments

* rename templates package to markup

* move minify file to markup

* move html to markup

* move smartify to markup

* first stab at unit test

* better dash replacement

* do plain replacement of dashes
This commit is contained in:
Facundo Olano 2024-02-26 12:16:06 -03:00 committed by GitHub
parent dd94738555
commit 924e4629b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 196 additions and 54 deletions

2
go.mod
View file

@ -3,6 +3,7 @@ module github.com/facundoolano/jorge
go 1.22.0
require (
github.com/alecthomas/kong v0.8.1
github.com/elliotchance/orderedmap/v2 v2.2.0
github.com/fsnotify/fsnotify v1.7.0
github.com/niklasfasching/go-org v1.7.0
@ -16,7 +17,6 @@ require (
)
require (
github.com/alecthomas/kong v0.8.1 // indirect
github.com/osteele/tuesday v1.0.3 // indirect
github.com/tdewolff/parse/v2 v2.7.11 // indirect
golang.org/x/sys v0.16.0 // indirect

6
go.sum
View file

@ -1,11 +1,17 @@
github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
github.com/alecthomas/assert/v2 v2.1.0/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
github.com/alecthomas/kong v0.8.1 h1:acZdn3m4lLRobeh3Zi2S2EpnXTd1mOL6U7xVml+vfkY=
github.com/alecthomas/kong v0.8.1/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U=
github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE=
github.com/alecthomas/repr v0.1.0/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/niklasfasching/go-org v1.7.0 h1:vyMdcMWWTe/XmANk19F4k8XGBYg0GQ/gJGMimOjGMek=
github.com/niklasfasching/go-org v1.7.0/go.mod h1:WuVm4d45oePiE0eX25GqTDQIt/qPW1T9DGkRscqLW5o=
github.com/osteele/liquid v1.3.2 h1:G+MvVYt1HX2xuv99JgdrhV7zRVdlvFnNi8M5rN8gQmI=

View file

@ -1,4 +1,4 @@
package templates
package markup
import (
"bytes"

View file

@ -1,4 +1,4 @@
package site
package markup
import (
"bytes"

36
markup/minify.go Normal file
View file

@ -0,0 +1,36 @@
package markup
import (
"io"
"slices"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/css"
"github.com/tdewolff/minify/v2/html"
"github.com/tdewolff/minify/v2/js"
"github.com/tdewolff/minify/v2/xml"
)
var SUPPORTED_MINIFIERS = []string{".css", ".html", ".js", ".xml"}
type Minifier struct {
minifier *minify.M
}
func LoadMinifier() Minifier {
minifier := minify.New()
minifier.AddFunc(".css", css.Minify)
minifier.AddFunc(".html", html.Minify)
minifier.AddFunc(".js", js.Minify)
minifier.AddFunc(".xml", xml.Minify)
return Minifier{minifier}
}
// if enabled by config, minify web files
func (m *Minifier) Minify(extension string, contentReader io.Reader) io.Reader {
if !slices.Contains(SUPPORTED_MINIFIERS, extension) {
return contentReader
}
return m.minifier.Reader(extension, contentReader)
}

81
markup/smartify.go Normal file
View file

@ -0,0 +1,81 @@
package markup
// Implements a naive version of smart quote replacement, see https://daringfireball.net/projects/smartypants/
// The quote replacement code was adapted from gojekyll's smartify filter to work on entire HTML documents
// https://github.com/osteele/gojekyll/blob/f1794a874890bfb601cae767a0cce15d672e9058/filters/smartify.go
// MIT License: https://github.com/osteele/gojekyll/blob/f1794a874890bfb601cae767a0cce15d672e9058/LICENSE
import (
"bytes"
"io"
"regexp"
"slices"
"strings"
"golang.org/x/net/html"
)
var SKIP_TAGS = []string{"pre", "code", "kbd", "script", "math"}
func Smartify(extension string, contentReader io.Reader) (io.Reader, error) {
if extension != ".html" {
return contentReader, nil
}
node, err := html.Parse(contentReader)
if err != nil {
return nil, err
}
smartifyHTMLNode(node)
var buf bytes.Buffer
html.Render(&buf, node)
return &buf, nil
}
func smartifyHTMLNode(node *html.Node) {
for node := node.FirstChild; node != nil; node = node.NextSibling {
if node.Type == html.ElementNode && slices.Contains(SKIP_TAGS, node.Data) {
continue
}
if node.Type == html.TextNode {
node.Data = smartifyString(node.Data)
} else {
smartifyHTMLNode(node)
}
}
}
var smartifyTransforms = []struct {
match *regexp.Regexp
repl string
}{
{regexp.MustCompile("(^|[^[:alnum:]])``(.+?)''"), "$1“$2”"},
{regexp.MustCompile(`(^|[^[:alnum:]])'`), "$1"},
{regexp.MustCompile(`'`), ""},
{regexp.MustCompile(`(^|[^[:alnum:]])"`), "$1“"},
{regexp.MustCompile(`"($|[^[:alnum:]])`), "”$1"},
}
var smartifyReplacer *strings.Replacer
var smartifyReplaceSpans = map[string]string{}
func init() {
smartifyReplacer = strings.NewReplacer(
"...", "…",
"(c)", "©",
"(r)", "®",
"(tm)", "™",
// moving the dashes to straight replacements instead of regex (which weren't accurate)
// NOTE: go-org already does dash and ellipsis replacement, remove altogether if it causes issues
"---", "—",
"--", "",
)
}
func smartifyString(s string) string {
for _, rule := range smartifyTransforms {
s = rule.match.ReplaceAllString(s, rule.repl)
}
return smartifyReplacer.Replace(s)
}

37
markup/smartify_test.go Normal file
View file

@ -0,0 +1,37 @@
package markup
import (
"io"
"strings"
"testing"
)
func TestSmartify(t *testing.T) {
input := `<html>
<head>
<script type="text/javascript">
const url = 'http://localhost:4001/_events/';
const string = "joe's garage";
</script>
</head>
<body>
<p>the album is "Joe's Garage" --by Frank Zappa...</p>
</body>
</html>`
output, err := Smartify(".html", strings.NewReader(input))
buf := new(strings.Builder)
_, err = io.Copy(buf, output)
assertEqual(t, err, nil)
assertEqual(t, buf.String(), `<html><head>
<script type="text/javascript">
const url = 'http://localhost:4001/_events/';
const string = "joe's garage";
</script>
</head>
<body>
<p>the album is Joes Garage by Frank Zappa</p>
</body></html>`)
}

View file

@ -1,4 +1,4 @@
package templates
package markup
import (
"bufio"
@ -33,6 +33,11 @@ func NewEngine(siteUrl string, includesDir string) *Engine {
return e
}
// Try to parse a liquid template at the given location.
// Files starting with front matter (--- sorrrounded yaml)
// are considered templates. If the given file is not headed by front matter
// return (nil, nil).
// The front matter contents are stored in the returned template's Metadata.
func Parse(engine *Engine, path string) (*Template, error) {
file, err := os.Open(path)
if err != nil {
@ -97,6 +102,9 @@ func (templ Template) Ext() string {
return ext
}
// Renders the liquid template with the given context as bindings.
// If the template source is org or md, convert them to html after the
// liquid rendering.
func (templ Template) Render(context map[string]interface{}) ([]byte, error) {
// liquid rendering
content, err := templ.liquidTemplate.Render(context)

View file

@ -1,4 +1,4 @@
package templates
package markup
import (
"os"

View file

@ -1,33 +0,0 @@
package site
import (
"io"
"slices"
"github.com/tdewolff/minify/v2"
"github.com/tdewolff/minify/v2/css"
"github.com/tdewolff/minify/v2/html"
"github.com/tdewolff/minify/v2/js"
"github.com/tdewolff/minify/v2/xml"
)
var SUPPORTED_MINIFIERS = []string{".css", ".html", ".js", ".xml"}
type Minifier = minify.M
func (site *Site) loadMinifier() {
site.minifier = *minify.New()
site.minifier.AddFunc(".css", css.Minify)
site.minifier.AddFunc(".html", html.Minify)
site.minifier.AddFunc(".js", js.Minify)
site.minifier.AddFunc(".xml", xml.Minify)
}
// if enabled by config, minify web files
func (site *Site) minify(extension string, contentReader io.Reader) io.Reader {
if !site.Config.Minify || !slices.Contains(SUPPORTED_MINIFIERS, extension) {
return contentReader
}
return site.minifier.Reader(extension, contentReader)
}

View file

@ -14,7 +14,7 @@ import (
"time"
"github.com/facundoolano/jorge/config"
"github.com/facundoolano/jorge/templates"
"github.com/facundoolano/jorge/markup"
"gopkg.in/yaml.v3"
)
@ -22,26 +22,26 @@ const FILE_RW_MODE = 0777
type Site struct {
Config config.Config
layouts map[string]templates.Template
layouts map[string]markup.Template
posts []map[string]interface{}
pages []map[string]interface{}
tags map[string][]map[string]interface{}
data map[string]interface{}
templateEngine *templates.Engine
templates map[string]*templates.Template
templateEngine *markup.Engine
templates map[string]*markup.Template
minifier Minifier
minifier markup.Minifier
}
func Load(config config.Config) (*Site, error) {
site := Site{
layouts: make(map[string]templates.Template),
templates: make(map[string]*templates.Template),
layouts: make(map[string]markup.Template),
templates: make(map[string]*markup.Template),
Config: config,
tags: make(map[string][]map[string]interface{}),
data: make(map[string]interface{}),
templateEngine: templates.NewEngine(config.SiteUrl, config.IncludesDir),
templateEngine: markup.NewEngine(config.SiteUrl, config.IncludesDir),
}
if err := site.loadDataFiles(); err != nil {
@ -56,7 +56,7 @@ func Load(config config.Config) (*Site, error) {
return nil, err
}
site.loadMinifier()
site.minifier = markup.LoadMinifier()
return &site, nil
}
@ -74,7 +74,7 @@ func (site *Site) loadLayouts() error {
if !entry.IsDir() {
filename := entry.Name()
path := filepath.Join(site.Config.LayoutsDir, filename)
templ, err := templates.Parse(site.templateEngine, path)
templ, err := markup.Parse(site.templateEngine, path)
if err != nil {
return checkFileError(err)
}
@ -126,7 +126,7 @@ func (site *Site) loadTemplates() error {
err := filepath.WalkDir(site.Config.SrcDir, func(path string, entry fs.DirEntry, err error) error {
if !entry.IsDir() {
templ, err := templates.Parse(site.templateEngine, path)
templ, err := markup.Parse(site.templateEngine, path)
// if something fails or this is not a template, skip
if err != nil || templ == nil {
return checkFileError(err)
@ -265,18 +265,25 @@ func (site *Site) buildFile(path string) error {
contentReader = bytes.NewReader(content)
}
// post process file acording to extension and config
targetExt := filepath.Ext(targetPath)
contentReader, err = markup.Smartify(targetExt, contentReader)
if err != nil {
return err
}
contentReader, err = site.injectLiveReload(targetExt, contentReader)
if err != nil {
return err
}
contentReader = site.minify(targetExt, contentReader)
if site.Config.Minify {
contentReader = site.minifier.Minify(targetExt, contentReader)
}
// write the file contents over to target
return writeToFile(targetPath, contentReader)
}
func (site *Site) render(templ *templates.Template) ([]byte, error) {
func (site *Site) render(templ *markup.Template) ([]byte, error) {
ctx := map[string]interface{}{
"site": map[string]interface{}{
"config": site.Config.AsContext(),
@ -344,7 +351,7 @@ func writeToFile(targetPath string, source io.Reader) error {
// Assuming the given template is a post, try to generating an excerpt of it.
// If it contains an `excerpt` key in its metadata use that, otherwise try
// to render it as HTML and extract the text of its first <p>
func getExcerpt(templ *templates.Template) string {
func getExcerpt(templ *markup.Template) string {
if excerpt, ok := templ.Metadata["excerpt"]; ok {
return excerpt.(string)
}
@ -361,7 +368,7 @@ func getExcerpt(templ *templates.Template) string {
if err != nil {
return ""
}
return ExtractFirstParagraph(bytes.NewReader(content))
return markup.ExtractFirstParagraph(bytes.NewReader(content))
}
// if live reload is enabled, inject the reload snippet to html files
@ -384,5 +391,5 @@ eventSource.onerror = function (event) {
console.error('An error occurred:', event)
};`
script := fmt.Sprintf(JS_SNIPPET, site.Config.SiteUrl)
return InjectScript(contentReader, script)
return markup.InjectScript(contentReader, script)
}