Compare commits
2 commits
88553cd3c8
...
1c1d474870
| Author | SHA1 | Date | |
|---|---|---|---|
| 1c1d474870 | |||
| 521d5da84c |
2 changed files with 38 additions and 13 deletions
|
|
@ -27,6 +27,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
@ -37,8 +38,9 @@ import (
|
||||||
|
|
||||||
// ContentAnalyzer analyzes email content (HTML, links, images)
|
// ContentAnalyzer analyzes email content (HTML, links, images)
|
||||||
type ContentAnalyzer struct {
|
type ContentAnalyzer struct {
|
||||||
Timeout time.Duration
|
Timeout time.Duration
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
|
listUnsubscribeURLs []string // URLs from List-Unsubscribe header
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewContentAnalyzer creates a new content analyzer with configurable timeout
|
// NewContentAnalyzer creates a new content analyzer with configurable timeout
|
||||||
|
|
@ -110,6 +112,9 @@ func (c *ContentAnalyzer) AnalyzeContent(email *EmailMessage) *ContentResults {
|
||||||
|
|
||||||
results.IsMultipart = len(email.Parts) > 1
|
results.IsMultipart = len(email.Parts) > 1
|
||||||
|
|
||||||
|
// Parse List-Unsubscribe header URLs for use in link detection
|
||||||
|
c.listUnsubscribeURLs = email.GetListUnsubscribeURLs()
|
||||||
|
|
||||||
// Get HTML and text parts
|
// Get HTML and text parts
|
||||||
htmlParts := email.GetHTMLParts()
|
htmlParts := email.GetHTMLParts()
|
||||||
textParts := email.GetTextParts()
|
textParts := email.GetTextParts()
|
||||||
|
|
@ -331,6 +336,11 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
|
||||||
|
|
||||||
// isUnsubscribeLink checks if a link is an unsubscribe link
|
// isUnsubscribeLink checks if a link is an unsubscribe link
|
||||||
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
||||||
|
// First check: does the href match a URL from the List-Unsubscribe header?
|
||||||
|
if slices.Contains(c.listUnsubscribeURLs, href) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Check href for unsubscribe keywords
|
// Check href for unsubscribe keywords
|
||||||
lowerHref := strings.ToLower(href)
|
lowerHref := strings.ToLower(href)
|
||||||
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
|
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
|
||||||
|
|
@ -439,7 +449,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
|
||||||
// Extract the actual destination domain/email based on scheme
|
// Extract the actual destination domain/email based on scheme
|
||||||
var actualDomain string
|
var actualDomain string
|
||||||
|
|
||||||
if parsedURL.Scheme == "mailto" {
|
switch parsedURL.Scheme {
|
||||||
|
case "mailto":
|
||||||
// Extract email address from mailto: URL
|
// Extract email address from mailto: URL
|
||||||
// Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=...
|
// Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=...
|
||||||
mailtoAddr := parsedURL.Opaque
|
mailtoAddr := parsedURL.Opaque
|
||||||
|
|
@ -457,7 +468,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
|
||||||
} else {
|
} else {
|
||||||
return false // Invalid mailto
|
return false // Invalid mailto
|
||||||
}
|
}
|
||||||
} else if parsedURL.Scheme == "http" || parsedURL.Scheme == "https" {
|
case "http":
|
||||||
|
case "https":
|
||||||
// Check if URL has a host
|
// Check if URL has a host
|
||||||
if parsedURL.Host == "" {
|
if parsedURL.Host == "" {
|
||||||
return false
|
return false
|
||||||
|
|
@ -469,7 +481,7 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
|
||||||
actualDomain = actualDomain[:idx]
|
actualDomain = actualDomain[:idx]
|
||||||
}
|
}
|
||||||
actualDomain = strings.ToLower(actualDomain)
|
actualDomain = strings.ToLower(actualDomain)
|
||||||
} else {
|
default:
|
||||||
// Skip checks for other URL schemes (tel, etc.)
|
// Skip checks for other URL schemes (tel, etc.)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
@ -492,10 +504,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
|
||||||
"email us", "contact us", "send email", "get in touch", "reach out",
|
"email us", "contact us", "send email", "get in touch", "reach out",
|
||||||
"contact", "email", "write to us",
|
"contact", "email", "write to us",
|
||||||
}
|
}
|
||||||
for _, generic := range genericTexts {
|
if slices.Contains(genericTexts, linkText) {
|
||||||
if linkText == generic {
|
return false
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract domain-like patterns from link text using regex
|
// Extract domain-like patterns from link text using regex
|
||||||
|
|
@ -562,10 +572,8 @@ func (c *ContentAnalyzer) isSuspiciousURL(urlStr string, parsedURL *url.URL) boo
|
||||||
"bit.ly", "tinyurl.com", "goo.gl", "ow.ly", "t.co",
|
"bit.ly", "tinyurl.com", "goo.gl", "ow.ly", "t.co",
|
||||||
"buff.ly", "is.gd", "bl.ink", "short.io",
|
"buff.ly", "is.gd", "bl.ink", "short.io",
|
||||||
}
|
}
|
||||||
for _, shortener := range shorteners {
|
if slices.Contains(shorteners, strings.ToLower(parsedURL.Host)) {
|
||||||
if strings.ToLower(parsedURL.Host) == shortener {
|
return true
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for excessive subdomains (possible obfuscation)
|
// Check for excessive subdomains (possible obfuscation)
|
||||||
|
|
|
||||||
|
|
@ -301,3 +301,20 @@ func (e *EmailMessage) GetHeaderValue(key string) string {
|
||||||
func (e *EmailMessage) HasHeader(key string) bool {
|
func (e *EmailMessage) HasHeader(key string) bool {
|
||||||
return e.Header.Get(key) != ""
|
return e.Header.Get(key) != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetListUnsubscribeURLs parses the List-Unsubscribe header and returns all URLs.
|
||||||
|
// The header format is: <url1>, <url2>, ...
|
||||||
|
func (e *EmailMessage) GetListUnsubscribeURLs() []string {
|
||||||
|
value := e.Header.Get("List-Unsubscribe")
|
||||||
|
if value == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var urls []string
|
||||||
|
for _, part := range strings.Split(value, ",") {
|
||||||
|
part = strings.TrimSpace(part)
|
||||||
|
if strings.HasPrefix(part, "<") && strings.HasSuffix(part, ">") {
|
||||||
|
urls = append(urls, part[1:len(part)-1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return urls
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue