Compare commits

...
Sign in to create a new pull request.

3 commits

Author SHA1 Message Date
43aec8fdc0 Add multilingual unsubscribe keywords for link detection
All checks were successful
continuous-integration/drone/push Build is passing
The list comes from github.com/knadh/listmonk i18n strings

Bug: https://github.com/happyDomain/happydeliver/issues/8
2026-02-22 15:20:17 +07:00
1c1d474870 Use List-Unsubscribe header URLs for unsubscribe link detection
All checks were successful
continuous-integration/drone/push Build is passing
Bug: https://github.com/happyDomain/happydeliver/issues/8
2026-02-20 00:21:02 +07:00
521d5da84c Use modern Go slices.Contains and switch instead of if/else if 2026-02-19 23:15:07 +07:00
2 changed files with 39 additions and 14 deletions

View file

@ -27,6 +27,7 @@ import (
"net/http"
"net/url"
"regexp"
"slices"
"strings"
"time"
"unicode"
@ -37,8 +38,9 @@ import (
// ContentAnalyzer analyzes email content (HTML, links, images)
type ContentAnalyzer struct {
Timeout time.Duration
httpClient *http.Client
Timeout time.Duration
httpClient *http.Client
listUnsubscribeURLs []string // URLs from List-Unsubscribe header
}
// NewContentAnalyzer creates a new content analyzer with configurable timeout
@ -110,6 +112,9 @@ func (c *ContentAnalyzer) AnalyzeContent(email *EmailMessage) *ContentResults {
results.IsMultipart = len(email.Parts) > 1
// Parse List-Unsubscribe header URLs for use in link detection
c.listUnsubscribeURLs = email.GetListUnsubscribeURLs()
// Get HTML and text parts
htmlParts := email.GetHTMLParts()
textParts := email.GetTextParts()
@ -331,9 +336,14 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
// isUnsubscribeLink checks if a link is an unsubscribe link
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
// First check: does the href match a URL from the List-Unsubscribe header?
if slices.Contains(c.listUnsubscribeURLs, href) {
return true
}
// Check href for unsubscribe keywords
lowerHref := strings.ToLower(href)
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe", "отписване", "desubscripció", "zrušit odběr", "dad-danysgrifio", "afmeld", "abmelden", "διαγραφή", "darse de baja", "poistu postituslistalta", "se désabonner", "ביטול רישום", "leiratkozás", "cancella iscrizione", "登録を取り消す", "구독 해지", "വരിക്കാരനല്ലാതാകുക", "uitschrijven", "meld av", "odsubskrybuj", "cancelar assinatura", "cancelar subscrição", "dezabonare", "отписаться", "avsluta prenumeration", "zrušiť odber", "odjava", "üyeliği sonlandır", "відписатися", "hủy đăng ký", "退订", "退訂"}
for _, keyword := range unsubKeywords {
if strings.Contains(lowerHref, keyword) {
return true
@ -439,7 +449,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
// Extract the actual destination domain/email based on scheme
var actualDomain string
if parsedURL.Scheme == "mailto" {
switch parsedURL.Scheme {
case "mailto":
// Extract email address from mailto: URL
// Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=...
mailtoAddr := parsedURL.Opaque
@ -457,7 +468,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
} else {
return false // Invalid mailto
}
} else if parsedURL.Scheme == "http" || parsedURL.Scheme == "https" {
case "http":
case "https":
// Check if URL has a host
if parsedURL.Host == "" {
return false
@ -469,7 +481,7 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
actualDomain = actualDomain[:idx]
}
actualDomain = strings.ToLower(actualDomain)
} else {
default:
// Skip checks for other URL schemes (tel, etc.)
return false
}
@ -492,10 +504,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
"email us", "contact us", "send email", "get in touch", "reach out",
"contact", "email", "write to us",
}
for _, generic := range genericTexts {
if linkText == generic {
return false
}
if slices.Contains(genericTexts, linkText) {
return false
}
// Extract domain-like patterns from link text using regex
@ -562,10 +572,8 @@ func (c *ContentAnalyzer) isSuspiciousURL(urlStr string, parsedURL *url.URL) boo
"bit.ly", "tinyurl.com", "goo.gl", "ow.ly", "t.co",
"buff.ly", "is.gd", "bl.ink", "short.io",
}
for _, shortener := range shorteners {
if strings.ToLower(parsedURL.Host) == shortener {
return true
}
if slices.Contains(shorteners, strings.ToLower(parsedURL.Host)) {
return true
}
// Check for excessive subdomains (possible obfuscation)

View file

@ -301,3 +301,20 @@ func (e *EmailMessage) GetHeaderValue(key string) string {
func (e *EmailMessage) HasHeader(key string) bool {
return e.Header.Get(key) != ""
}
// GetListUnsubscribeURLs parses the List-Unsubscribe header and returns all URLs.
// The header format is: <url1>, <url2>, ...
func (e *EmailMessage) GetListUnsubscribeURLs() []string {
value := e.Header.Get("List-Unsubscribe")
if value == "" {
return nil
}
var urls []string
for _, part := range strings.Split(value, ",") {
part = strings.TrimSpace(part)
if strings.HasPrefix(part, "<") && strings.HasSuffix(part, ">") {
urls = append(urls, part[1:len(part)-1])
}
}
return urls
}