From c557d4d96d36b29041437ad347d611b020f95267 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Sat, 1 Nov 2025 18:08:06 +0700 Subject: [PATCH 1/2] Fix calculateTextPlainConsistency algorithm --- pkg/analyzer/content.go | 45 ++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 87c423f..8f5d059 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -659,30 +659,47 @@ func (c *ContentAnalyzer) calculateTextPlainConsistency(plainText, htmlText stri return 0.0 } - // Count common words - commonWords := 0 - plainWordSet := make(map[string]bool) + // Count common words by building sets + plainWordSet := make(map[string]int) for _, word := range plainWords { - plainWordSet[word] = true + plainWordSet[word]++ } + htmlWordSet := make(map[string]int) for _, word := range htmlWords { - if plainWordSet[word] { - commonWords++ + htmlWordSet[word]++ + } + + // Count matches: for each unique word, count minimum occurrences in both texts + commonWords := 0 + for word, plainCount := range plainWordSet { + if htmlCount, exists := htmlWordSet[word]; exists { + // Count the minimum occurrences between both texts + if plainCount < htmlCount { + commonWords += plainCount + } else { + commonWords += htmlCount + } } } - // Calculate ratio (Jaccard similarity approximation) - maxWords := len(plainWords) - if len(htmlWords) > maxWords { - maxWords = len(htmlWords) - } - - if maxWords == 0 { + // Calculate ratio using total words from both texts (union approach) + // This provides a balanced measure: perfect match = 1.0, partial overlap = 0.3-0.8 + totalWords := len(plainWords) + len(htmlWords) + if totalWords == 0 { return 0.0 } - return float32(commonWords) / float32(maxWords) + // Divide by average word count for better scoring + avgWords := float32(totalWords) / 2.0 + ratio := float32(commonWords) / avgWords + + // Cap at 1.0 for perfect matches + if ratio > 1.0 { + ratio = 1.0 + } + + return ratio } // normalizeText normalizes text for comparison -- 2.52.0 From 5395d1535c5f2748f967bdf17f3660dbcc17d6ab Mon Sep 17 00:00:00 2001 From: Renovate Bot Date: Sat, 1 Nov 2025 13:10:36 +0000 Subject: [PATCH 2/2] Update dependency @types/node to v24.9.2 --- web/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/package-lock.json b/web/package-lock.json index 01d6a6d..5aa6da8 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1352,9 +1352,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "24.9.1", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.1.tgz", - "integrity": "sha512-QoiaXANRkSXK6p0Duvt56W208du4P9Uye9hWLWgGMDTEoKPhuenzNcC4vGUmrNkiOKTlIrBoyNQYNpSwfEZXSg==", + "version": "24.9.2", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.9.2.tgz", + "integrity": "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA==", "dev": true, "license": "MIT", "peer": true, -- 2.52.0