content: Add spaces behind each node to reduce gap with plain text
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing

This commit is contained in:
nemunaire 2025-11-07 15:14:15 +07:00
commit 2172603ad5

View file

@ -627,7 +627,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
var extract func(*html.Node) var extract func(*html.Node)
extract = func(n *html.Node) { extract = func(n *html.Node) {
if n.Type == html.TextNode { if n.Type == html.TextNode {
text.WriteString(n.Data) text.WriteString(" " + n.Data)
} }
// Skip script and style tags // Skip script and style tags
if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") { if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") {
@ -639,7 +639,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
} }
extract(doc) extract(doc)
return text.String() return strings.TrimSpace(text.String())
} }
// calculateTextPlainConsistency compares plain text and HTML versions // calculateTextPlainConsistency compares plain text and HTML versions