Use List-Unsubscribe header URLs for unsubscribe link detection
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Bug: https://github.com/happyDomain/happydeliver/issues/8
This commit is contained in:
parent
521d5da84c
commit
1c1d474870
2 changed files with 29 additions and 2 deletions
|
|
@ -27,6 +27,7 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
|
@ -37,8 +38,9 @@ import (
|
|||
|
||||
// ContentAnalyzer analyzes email content (HTML, links, images)
|
||||
type ContentAnalyzer struct {
|
||||
Timeout time.Duration
|
||||
httpClient *http.Client
|
||||
Timeout time.Duration
|
||||
httpClient *http.Client
|
||||
listUnsubscribeURLs []string // URLs from List-Unsubscribe header
|
||||
}
|
||||
|
||||
// NewContentAnalyzer creates a new content analyzer with configurable timeout
|
||||
|
|
@ -110,6 +112,9 @@ func (c *ContentAnalyzer) AnalyzeContent(email *EmailMessage) *ContentResults {
|
|||
|
||||
results.IsMultipart = len(email.Parts) > 1
|
||||
|
||||
// Parse List-Unsubscribe header URLs for use in link detection
|
||||
c.listUnsubscribeURLs = email.GetListUnsubscribeURLs()
|
||||
|
||||
// Get HTML and text parts
|
||||
htmlParts := email.GetHTMLParts()
|
||||
textParts := email.GetTextParts()
|
||||
|
|
@ -331,6 +336,11 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
|
|||
|
||||
// isUnsubscribeLink checks if a link is an unsubscribe link
|
||||
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
||||
// First check: does the href match a URL from the List-Unsubscribe header?
|
||||
if slices.Contains(c.listUnsubscribeURLs, href) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check href for unsubscribe keywords
|
||||
lowerHref := strings.ToLower(href)
|
||||
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
|
||||
|
|
|
|||
|
|
@ -301,3 +301,20 @@ func (e *EmailMessage) GetHeaderValue(key string) string {
|
|||
func (e *EmailMessage) HasHeader(key string) bool {
|
||||
return e.Header.Get(key) != ""
|
||||
}
|
||||
|
||||
// GetListUnsubscribeURLs parses the List-Unsubscribe header and returns all URLs.
|
||||
// The header format is: <url1>, <url2>, ...
|
||||
func (e *EmailMessage) GetListUnsubscribeURLs() []string {
|
||||
value := e.Header.Get("List-Unsubscribe")
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
var urls []string
|
||||
for _, part := range strings.Split(value, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
if strings.HasPrefix(part, "<") && strings.HasSuffix(part, ">") {
|
||||
urls = append(urls, part[1:len(part)-1])
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue