2024-02-26 12:16:06 -03:00
|
|
|
package markup
|
2024-02-19 18:03:06 -03:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"io"
|
|
|
|
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Find the first p tag in the given html document and return its text content.
|
|
|
|
func ExtractFirstParagraph(htmlReader io.Reader) string {
|
|
|
|
html, err := html.Parse(htmlReader)
|
|
|
|
if err != nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
ptag := findFirstElement(html, "p")
|
|
|
|
if ptag == nil {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
return getTextContent(ptag)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Inject a <script> tag with the given JavaScript code into provided the HTML document
|
|
|
|
// and return the updated document as a new io.Reader
|
|
|
|
func InjectScript(htmlReader io.Reader, jsCode string) (io.Reader, error) {
|
|
|
|
doc, err := html.Parse(htmlReader)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
scriptNode := &html.Node{
|
|
|
|
Type: html.ElementNode,
|
|
|
|
Data: "script",
|
|
|
|
Attr: []html.Attribute{
|
|
|
|
{Key: "type", Val: "text/javascript"},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
// insert the script code inside the script tag
|
|
|
|
scriptTextNode := &html.Node{
|
|
|
|
Type: html.TextNode,
|
|
|
|
Data: jsCode,
|
|
|
|
}
|
|
|
|
scriptNode.AppendChild(scriptTextNode)
|
|
|
|
|
|
|
|
head := findFirstElement(doc, "head")
|
|
|
|
if head == nil {
|
|
|
|
// If <head> element not found, create one and append it to the document
|
|
|
|
head = &html.Node{
|
|
|
|
Type: html.ElementNode,
|
|
|
|
Data: "head",
|
|
|
|
}
|
|
|
|
doc.InsertBefore(head, doc.FirstChild)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Append the <script> element to the <head> element
|
|
|
|
head.AppendChild(scriptNode)
|
|
|
|
|
|
|
|
// Serialize the modified HTML document to a buffer
|
|
|
|
var buf bytes.Buffer
|
|
|
|
if err := html.Render(&buf, doc); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return a reader for the modified HTML content
|
|
|
|
return &buf, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the first occurrence of the specified element in the HTML document
|
|
|
|
func findFirstElement(n *html.Node, tagName string) *html.Node {
|
|
|
|
if n.Type == html.ElementNode && n.Data == tagName {
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
if element := findFirstElement(c, tagName); element != nil {
|
|
|
|
return element
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the <head> element in the HTML document
|
|
|
|
func getTextContent(node *html.Node) string {
|
|
|
|
var textContent string
|
|
|
|
if node.Type == html.TextNode {
|
|
|
|
textContent = node.Data
|
|
|
|
}
|
|
|
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
|
|
|
textContent += getTextContent(c)
|
|
|
|
}
|
|
|
|
return textContent
|
|
|
|
}
|