content: fix false-positive suspicious URL detection for email addresses in link text
The domain regex in hasDomainMisalignment matched local-parts like "john.doe" in "john.doe@example.com" as if they were domain names, causing legitimate mailto and http links to be incorrectly flagged. Normalize email addresses in link text to their domain part before applying the regex.
This commit is contained in:
parent
970cbc02a3
commit
57022129e3
1 changed files with 5 additions and 0 deletions
|
|
@ -501,6 +501,11 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Replace email addresses with just their domain part to avoid false positives
|
||||
// e.g. "john.doe@example.com" → "example.com" so local-part dots don't look like domains
|
||||
emailAddrRegex := regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@([a-z0-9.\-]+\.[a-z]{2,})`)
|
||||
linkText = emailAddrRegex.ReplaceAllString(linkText, "$1")
|
||||
|
||||
// Common generic link texts that shouldn't trigger warnings
|
||||
genericTexts := []string{
|
||||
"click here", "read more", "learn more", "download", "subscribe",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue