content: Add spaces behind each node to reduce gap with plain text
This commit is contained in:
parent
c91ab96642
commit
2172603ad5
1 changed files with 2 additions and 2 deletions
|
|
@ -627,7 +627,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
|
||||||
var extract func(*html.Node)
|
var extract func(*html.Node)
|
||||||
extract = func(n *html.Node) {
|
extract = func(n *html.Node) {
|
||||||
if n.Type == html.TextNode {
|
if n.Type == html.TextNode {
|
||||||
text.WriteString(n.Data)
|
text.WriteString(" " + n.Data)
|
||||||
}
|
}
|
||||||
// Skip script and style tags
|
// Skip script and style tags
|
||||||
if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") {
|
if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") {
|
||||||
|
|
@ -639,7 +639,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
|
||||||
}
|
}
|
||||||
extract(doc)
|
extract(doc)
|
||||||
|
|
||||||
return text.String()
|
return strings.TrimSpace(text.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculateTextPlainConsistency compares plain text and HTML versions
|
// calculateTextPlainConsistency compares plain text and HTML versions
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue