jorge/markup/html.go

94 lines
2.1 KiB
Go
Raw Permalink Normal View History

package markup
2024-02-19 18:03:06 -03:00
import (
"bytes"
"io"
"golang.org/x/net/html"
)
// Find the first p tag in the given html document and return its text content.
func ExtractFirstParagraph(htmlReader io.Reader) string {
html, err := html.Parse(htmlReader)
if err != nil {
return ""
}
ptag := findFirstElement(html, "p")
if ptag == nil {
return ""
}
return getTextContent(ptag)
}
// Inject a <script> tag with the given JavaScript code into provided the HTML document
// and return the updated document as a new io.Reader
func InjectScript(htmlReader io.Reader, jsCode string) (io.Reader, error) {
doc, err := html.Parse(htmlReader)
if err != nil {
return nil, err
}
scriptNode := &html.Node{
Type: html.ElementNode,
Data: "script",
Attr: []html.Attribute{
{Key: "type", Val: "text/javascript"},
},
}
// insert the script code inside the script tag
scriptTextNode := &html.Node{
Type: html.TextNode,
Data: jsCode,
}
scriptNode.AppendChild(scriptTextNode)
head := findFirstElement(doc, "head")
if head == nil {
// If <head> element not found, create one and append it to the document
head = &html.Node{
Type: html.ElementNode,
Data: "head",
}
doc.InsertBefore(head, doc.FirstChild)
}
// Append the <script> element to the <head> element
head.AppendChild(scriptNode)
// Serialize the modified HTML document to a buffer
var buf bytes.Buffer
if err := html.Render(&buf, doc); err != nil {
return nil, err
}
// Return a reader for the modified HTML content
return &buf, nil
}
// Finds the first occurrence of the specified element in the HTML document
func findFirstElement(n *html.Node, tagName string) *html.Node {
if n.Type == html.ElementNode && n.Data == tagName {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if element := findFirstElement(c, tagName); element != nil {
return element
}
}
return nil
}
// Finds the <head> element in the HTML document
func getTextContent(node *html.Node) string {
var textContent string
if node.Type == html.TextNode {
textContent = node.Data
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
textContent += getTextContent(c)
}
return textContent
}