content: Add spaces behind each node to reduce gap with plain text
This commit is contained in:
parent
c91ab96642
commit
2172603ad5
1 changed files with 2 additions and 2 deletions
|
|
@ -627,7 +627,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
|
|||
var extract func(*html.Node)
|
||||
extract = func(n *html.Node) {
|
||||
if n.Type == html.TextNode {
|
||||
text.WriteString(n.Data)
|
||||
text.WriteString(" " + n.Data)
|
||||
}
|
||||
// Skip script and style tags
|
||||
if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") {
|
||||
|
|
@ -639,7 +639,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
|
|||
}
|
||||
extract(doc)
|
||||
|
||||
return text.String()
|
||||
return strings.TrimSpace(text.String())
|
||||
}
|
||||
|
||||
// calculateTextPlainConsistency compares plain text and HTML versions
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue