diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 95e32aa..4a3b5b8 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -659,47 +659,30 @@ func (c *ContentAnalyzer) calculateTextPlainConsistency(plainText, htmlText stri return 0.0 } - // Count common words by building sets - plainWordSet := make(map[string]int) - for _, word := range plainWords { - plainWordSet[word]++ - } - - htmlWordSet := make(map[string]int) - for _, word := range htmlWords { - htmlWordSet[word]++ - } - - // Count matches: for each unique word, count minimum occurrences in both texts + // Count common words commonWords := 0 - for word, plainCount := range plainWordSet { - if htmlCount, exists := htmlWordSet[word]; exists { - // Count the minimum occurrences between both texts - if plainCount < htmlCount { - commonWords += plainCount - } else { - commonWords += htmlCount - } + plainWordSet := make(map[string]bool) + for _, word := range plainWords { + plainWordSet[word] = true + } + + for _, word := range htmlWords { + if plainWordSet[word] { + commonWords++ } } - // Calculate ratio using total words from both texts (union approach) - // This provides a balanced measure: perfect match = 1.0, partial overlap = 0.3-0.8 - totalWords := len(plainWords) + len(htmlWords) - if totalWords == 0 { + // Calculate ratio (Jaccard similarity approximation) + maxWords := len(plainWords) + if len(htmlWords) > maxWords { + maxWords = len(htmlWords) + } + + if maxWords == 0 { return 0.0 } - // Divide by average word count for better scoring - avgWords := float32(totalWords) / 2.0 - ratio := float32(commonWords) / avgWords - - // Cap at 1.0 for perfect matches - if ratio > 1.0 { - ratio = 1.0 - } - - return ratio + return float32(commonWords) / float32(maxWords) } // normalizeText normalizes text for comparison diff --git a/web/package-lock.json b/web/package-lock.json index 46186e5..10c565e 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -539,6 +539,19 @@ } } }, + "node_modules/@eslint/compat/node_modules/@eslint/core": { + "version": "0.17.0", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.17.0.tgz", + "integrity": "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@types/json-schema": "^7.0.15" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, "node_modules/@eslint/config-array": { "version": "0.21.1", "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.1.tgz", @@ -555,22 +568,22 @@ } }, "node_modules/@eslint/config-helpers": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.2.tgz", - "integrity": "sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.1.tgz", + "integrity": "sha512-csZAzkNhsgwb0I/UAV6/RGFTbiakPCf0ZrGmrIxQpYvGZ00PhTkSnyKNolphgIvmnJeGw6rcGVEXfTzUnFuEvw==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@eslint/core": "^0.17.0" + "@eslint/core": "^0.16.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, "node_modules/@eslint/core": { - "version": "0.17.0", - "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.17.0.tgz", - "integrity": "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==", + "version": "0.16.0", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.16.0.tgz", + "integrity": "sha512-nmC8/totwobIiFcGkDza3GIKfAw1+hLiYVrh3I1nIomQ8PEr5cxg34jnkmGawul/ep52wGRAcyeDCNtWKSOj4Q==", "dev": true, "license": "Apache-2.0", "dependencies": { @@ -618,9 +631,9 @@ } }, "node_modules/@eslint/js": { - "version": "9.39.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.0.tgz", - "integrity": "sha512-BIhe0sW91JGPiaF1mOuPy5v8NflqfjIcDNpC+LbW9f609WVRX1rArrhi6Z2ymvrAry9jw+5POTj4t2t62o8Bmw==", + "version": "9.38.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.38.0.tgz", + "integrity": "sha512-UZ1VpFvXf9J06YG9xQBdnzU+kthors6KjhMAl6f4gH4usHyh31rUf2DLGInT8RFYIReYXNSydgPY0V2LuWgl7A==", "dev": true, "license": "MIT", "engines": { @@ -641,13 +654,13 @@ } }, "node_modules/@eslint/plugin-kit": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.1.tgz", - "integrity": "sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==", + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.0.tgz", + "integrity": "sha512-sB5uyeq+dwCWyPi31B2gQlVlo+j5brPlWx4yZBrEaRo/nhdDE8Xke1gsGgtiBdaBTxuTkceLVuVt/pclrasb0A==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@eslint/core": "^0.17.0", + "@eslint/core": "^0.16.0", "levn": "^0.4.1" }, "engines": { @@ -2325,9 +2338,9 @@ } }, "node_modules/eslint": { - "version": "9.39.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.0.tgz", - "integrity": "sha512-iy2GE3MHrYTL5lrCtMZ0X1KLEKKUjmK0kzwcnefhR66txcEmXZD2YWgR5GNdcEwkNx3a0siYkSvl0vIC+Svjmg==", + "version": "9.38.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.38.0.tgz", + "integrity": "sha512-t5aPOpmtJcZcz5UJyY2GbvpDlsK5E8JqRqoKtfiKE3cNh437KIqfJr3A3AKf5k64NPx6d0G3dno6XDY05PqPtw==", "dev": true, "license": "MIT", "peer": true, @@ -2335,11 +2348,11 @@ "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", "@eslint/config-array": "^0.21.1", - "@eslint/config-helpers": "^0.4.2", - "@eslint/core": "^0.17.0", + "@eslint/config-helpers": "^0.4.1", + "@eslint/core": "^0.16.0", "@eslint/eslintrc": "^3.3.1", - "@eslint/js": "9.39.0", - "@eslint/plugin-kit": "^0.4.1", + "@eslint/js": "9.38.0", + "@eslint/plugin-kit": "^0.4.0", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.4.2",