diff --git a/go.mod b/go.mod index db2ac1d..b1c72ad 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.24.6 require ( github.com/JGLTechnologies/gin-rate-limit v1.5.6 github.com/emersion/go-smtp v0.24.0 - github.com/getkin/kin-openapi v0.133.0 github.com/gin-gonic/gin v1.11.0 github.com/google/uuid v1.6.0 github.com/oapi-codegen/runtime v1.1.2 @@ -16,7 +15,6 @@ require ( ) require ( - github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/bytedance/sonic v1.14.0 // indirect github.com/bytedance/sonic/loader v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -25,6 +23,7 @@ require ( github.com/dprotaso/go-yit v0.0.0-20220510233725-9ba8df137936 // indirect github.com/emersion/go-sasl v0.0.0-20241020182733-b788ff22d5a6 // indirect github.com/gabriel-vasile/mimetype v1.4.8 // indirect + github.com/getkin/kin-openapi v0.133.0 // indirect github.com/gin-contrib/sse v1.1.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect @@ -49,7 +48,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect - github.com/oapi-codegen/oapi-codegen/v2 v2.5.0 // indirect + github.com/oapi-codegen/oapi-codegen/v2 v2.5.1 // indirect github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 // indirect github.com/oasdiff/yaml3 v0.0.0-20250309153720-d2182401db90 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect diff --git a/go.sum b/go.sum index 266785d..d5915a7 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,5 @@ github.com/JGLTechnologies/gin-rate-limit v1.5.6 h1:BrL2wXrF7SSqmB88YTGFVKMGVcjURMUeKqwQrlmzweI= github.com/JGLTechnologies/gin-rate-limit v1.5.6/go.mod h1:fwUuBegxLKm8+/4ST0zDFssRFTFaVZ7bH3ApK7iNZww= -github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= -github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= -github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= -github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= @@ -98,7 +94,6 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= @@ -126,8 +121,8 @@ github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwd github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= -github.com/oapi-codegen/oapi-codegen/v2 v2.5.0 h1:iJvF8SdB/3/+eGOXEpsWkD8FQAHj6mqkb6Fnsoc8MFU= -github.com/oapi-codegen/oapi-codegen/v2 v2.5.0/go.mod h1:fwlMxUEMuQK5ih9aymrxKPQqNm2n8bdLk1ppjH+lr9w= +github.com/oapi-codegen/oapi-codegen/v2 v2.5.1 h1:5vHNY1uuPBRBWqB2Dp0G7YB03phxLQZupZTIZaeorjc= +github.com/oapi-codegen/oapi-codegen/v2 v2.5.1/go.mod h1:ro0npU1BWkcGpCgGD9QwPp44l5OIZ94tB3eabnT7DjQ= github.com/oapi-codegen/runtime v1.1.2 h1:P2+CubHq8fO4Q6fV1tqDBZHCwpVpvPg7oKiYzQgXIyI= github.com/oapi-codegen/runtime v1.1.2/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 h1:G7ERwszslrBzRxj//JalHPu/3yz+De2J+4aLtSRlHiY= @@ -166,7 +161,6 @@ github.com/speakeasy-api/jsonpath v0.6.0 h1:IhtFOV9EbXplhyRqsVhHoBmmYjblIRh5D1/g github.com/speakeasy-api/jsonpath v0.6.0/go.mod h1:ymb2iSkyOycmzKwbEAYPJV/yi2rSmvBCLZJcyD+VVWw= github.com/speakeasy-api/openapi-overlay v0.10.2 h1:VOdQ03eGKeiHnpb1boZCGm7x8Haj6gST0P3SGTX95GU= github.com/speakeasy-api/openapi-overlay v0.10.2/go.mod h1:n0iOU7AqKpNFfEt6tq7qYITC4f0yzVVdFw0S7hukemg= -github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 87c423f..8f5d059 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -659,30 +659,47 @@ func (c *ContentAnalyzer) calculateTextPlainConsistency(plainText, htmlText stri return 0.0 } - // Count common words - commonWords := 0 - plainWordSet := make(map[string]bool) + // Count common words by building sets + plainWordSet := make(map[string]int) for _, word := range plainWords { - plainWordSet[word] = true + plainWordSet[word]++ } + htmlWordSet := make(map[string]int) for _, word := range htmlWords { - if plainWordSet[word] { - commonWords++ + htmlWordSet[word]++ + } + + // Count matches: for each unique word, count minimum occurrences in both texts + commonWords := 0 + for word, plainCount := range plainWordSet { + if htmlCount, exists := htmlWordSet[word]; exists { + // Count the minimum occurrences between both texts + if plainCount < htmlCount { + commonWords += plainCount + } else { + commonWords += htmlCount + } } } - // Calculate ratio (Jaccard similarity approximation) - maxWords := len(plainWords) - if len(htmlWords) > maxWords { - maxWords = len(htmlWords) - } - - if maxWords == 0 { + // Calculate ratio using total words from both texts (union approach) + // This provides a balanced measure: perfect match = 1.0, partial overlap = 0.3-0.8 + totalWords := len(plainWords) + len(htmlWords) + if totalWords == 0 { return 0.0 } - return float32(commonWords) / float32(maxWords) + // Divide by average word count for better scoring + avgWords := float32(totalWords) / 2.0 + ratio := float32(commonWords) / avgWords + + // Cap at 1.0 for perfect matches + if ratio > 1.0 { + ratio = 1.0 + } + + return ratio } // normalizeText normalizes text for comparison