content: fix false-positive suspicious URL detection for email addresses in link text

The domain regex in hasDomainMisalignment matched local-parts like
"john.doe" in "john.doe@example.com" as if they were domain names,
causing legitimate mailto and http links to be incorrectly flagged.
Normalize email addresses in link text to their domain part before
applying the regex.
This commit is contained in:
nemunaire 2026-06-06 17:30:24 +09:00
commit 57022129e3

View file

@ -501,6 +501,11 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
return false return false
} }
// Replace email addresses with just their domain part to avoid false positives
// e.g. "john.doe@example.com" → "example.com" so local-part dots don't look like domains
emailAddrRegex := regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@([a-z0-9.\-]+\.[a-z]{2,})`)
linkText = emailAddrRegex.ReplaceAllString(linkText, "$1")
// Common generic link texts that shouldn't trigger warnings // Common generic link texts that shouldn't trigger warnings
genericTexts := []string{ genericTexts := []string{
"click here", "read more", "learn more", "download", "subscribe", "click here", "read more", "learn more", "download", "subscribe",