Add multilingual unsubscribe keywords for link detection

The list comes from github.com/knadh/listmonk i18n strings Bug: https://github.com/happyDomain/happydeliver/issues/8
Use List-Unsubscribe header URLs for unsubscribe link detection
2026-02-22 15:20:17 +07:00 · 2026-02-20 00:21:02 +07:00 · 2026-02-19 23:15:07 +07:00
2 changed files with 39 additions and 14 deletions
--- a/pkg/analyzer/content.go
+++ b/pkg/analyzer/content.go
@ -27,6 +27,7 @@ import (
 	"net/http"
 	"net/url"
 	"regexp"
 	"slices"
 	"strings"
 	"time"
 	"unicode"
@ -37,8 +38,9 @@ import (
 // ContentAnalyzer analyzes email content (HTML, links, images)
 type ContentAnalyzer struct {
-	Timeout    time.Duration
+	Timeout             time.Duration
-	httpClient *http.Client
+	httpClient          *http.Client
 	listUnsubscribeURLs []string // URLs from List-Unsubscribe header
 }
 // NewContentAnalyzer creates a new content analyzer with configurable timeout
@ -110,6 +112,9 @@ func (c *ContentAnalyzer) AnalyzeContent(email *EmailMessage) *ContentResults {
 	results.IsMultipart = len(email.Parts) > 1
 	// Parse List-Unsubscribe header URLs for use in link detection
 	c.listUnsubscribeURLs = email.GetListUnsubscribeURLs()
 	// Get HTML and text parts
 	htmlParts := email.GetHTMLParts()
 	textParts := email.GetTextParts()
@ -331,9 +336,14 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
 // isUnsubscribeLink checks if a link is an unsubscribe link
 func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
 	// First check: does the href match a URL from the List-Unsubscribe header?
 	if slices.Contains(c.listUnsubscribeURLs, href) {
 		return true
 	}
 	// Check href for unsubscribe keywords
 	lowerHref := strings.ToLower(href)
-	unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
+	unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe", "отписване", "desubscripció", "zrušit odběr", "dad-danysgrifio", "afmeld", "abmelden", "διαγραφή", "darse de baja", "poistu postituslistalta", "se désabonner", "ביטול רישום", "leiratkozás", "cancella iscrizione", "登録を取り消す", "구독 해지", "വരിക്കാരനല്ലാതാകുക", "uitschrijven", "meld av", "odsubskrybuj", "cancelar assinatura", "cancelar subscrição", "dezabonare", "отписаться", "avsluta prenumeration", "zrušiť odber", "odjava", "üyeliği sonlandır", "відписатися", "hủy đăng ký", "退订", "退訂"}
 	for _, keyword := range unsubKeywords {
 		if strings.Contains(lowerHref, keyword) {
 			return true
@ -439,7 +449,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
 	// Extract the actual destination domain/email based on scheme
 	var actualDomain string
-	if parsedURL.Scheme == "mailto" {
+	switch parsedURL.Scheme {
 	case "mailto":
 		// Extract email address from mailto: URL
 		// Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=...
 		mailtoAddr := parsedURL.Opaque
@ -457,7 +468,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
 		} else {
 			return false // Invalid mailto
 		}
-	} else if parsedURL.Scheme == "http" || parsedURL.Scheme == "https" {
+	case "http":
 	case "https":
 		// Check if URL has a host
 		if parsedURL.Host == "" {
 			return false
@ -469,7 +481,7 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
 			actualDomain = actualDomain[:idx]
 		}
 		actualDomain = strings.ToLower(actualDomain)
-	} else {
+	default:
 		// Skip checks for other URL schemes (tel, etc.)
 		return false
 	}
@ -492,10 +504,8 @@ func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
 		"email us", "contact us", "send email", "get in touch", "reach out",
 		"contact", "email", "write to us",
 	}
-	for _, generic := range genericTexts {
+	if slices.Contains(genericTexts, linkText) {
-		if linkText == generic {
+		return false
 			return false
 		}
 	}
 	// Extract domain-like patterns from link text using regex
@ -562,10 +572,8 @@ func (c *ContentAnalyzer) isSuspiciousURL(urlStr string, parsedURL *url.URL) boo
 		"bit.ly", "tinyurl.com", "goo.gl", "ow.ly", "t.co",
 		"buff.ly", "is.gd", "bl.ink", "short.io",
 	}
-	for _, shortener := range shorteners {
+	if slices.Contains(shorteners, strings.ToLower(parsedURL.Host)) {
-		if strings.ToLower(parsedURL.Host) == shortener {
+		return true
 			return true
 		}
 	}
 	// Check for excessive subdomains (possible obfuscation)
--- a/pkg/analyzer/parser.go
+++ b/pkg/analyzer/parser.go
@ -301,3 +301,20 @@ func (e *EmailMessage) GetHeaderValue(key string) string {
 func (e *EmailMessage) HasHeader(key string) bool {
 	return e.Header.Get(key) != ""
 }
 // GetListUnsubscribeURLs parses the List-Unsubscribe header and returns all URLs.
 // The header format is: <url1>, <url2>, ...
 func (e *EmailMessage) GetListUnsubscribeURLs() []string {
 	value := e.Header.Get("List-Unsubscribe")
 	if value == "" {
 		return nil
 	}
 	var urls []string
 	for _, part := range strings.Split(value, ",") {
 		part = strings.TrimSpace(part)
 		if strings.HasPrefix(part, "<") && strings.HasSuffix(part, ">") {
 			urls = append(urls, part[1:len(part)-1])
 		}
 	}
 	return urls
 }
Author	SHA1	Message	Date
Pierre-Olivier Mercier	43aec8fdc0	Add multilingual unsubscribe keywords for link detection All checks were successful continuous-integration/drone/push Build is passing Details The list comes from github.com/knadh/listmonk i18n strings Bug: https://github.com/happyDomain/happydeliver/issues/8	2026-02-22 15:20:17 +07:00
Pierre-Olivier Mercier	1c1d474870	Use List-Unsubscribe header URLs for unsubscribe link detection All checks were successful continuous-integration/drone/push Build is passing Details Bug: https://github.com/happyDomain/happydeliver/issues/8	2026-02-20 00:21:02 +07:00
Pierre-Olivier Mercier	521d5da84c	Use modern Go slices.Contains and switch instead of if/else if	2026-02-19 23:15:07 +07:00