From 2172603ad58009cb3c7fca3efe6372206231d91a Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Fri, 7 Nov 2025 15:14:15 +0700 Subject: [PATCH] content: Add spaces behind each node to reduce gap with plain text --- pkg/analyzer/content.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 87c423f..4a3b5b8 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -627,7 +627,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string { var extract func(*html.Node) extract = func(n *html.Node) { if n.Type == html.TextNode { - text.WriteString(n.Data) + text.WriteString(" " + n.Data) } // Skip script and style tags if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") { @@ -639,7 +639,7 @@ func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string { } extract(doc) - return text.String() + return strings.TrimSpace(text.String()) } // calculateTextPlainConsistency compares plain text and HTML versions