Compare commits
3 commits
a9e6cd601e
...
e4fdc7e1d6
| Author | SHA1 | Date | |
|---|---|---|---|
| e4fdc7e1d6 | |||
| 24cd68a39e | |||
| 878efb9bfd |
4 changed files with 128 additions and 5 deletions
|
|
@ -27,6 +27,7 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
|
@ -37,8 +38,10 @@ import (
|
|||
|
||||
// ContentAnalyzer analyzes email content (HTML, links, images)
|
||||
type ContentAnalyzer struct {
|
||||
Timeout time.Duration
|
||||
httpClient *http.Client
|
||||
Timeout time.Duration
|
||||
httpClient *http.Client
|
||||
listUnsubscribeURLs []string // URLs from List-Unsubscribe header
|
||||
hasOneClickUnsubscribe bool // True if List-Unsubscribe-Post: List-Unsubscribe=One-Click
|
||||
}
|
||||
|
||||
// NewContentAnalyzer creates a new content analyzer with configurable timeout
|
||||
|
|
@ -110,6 +113,13 @@ func (c *ContentAnalyzer) AnalyzeContent(email *EmailMessage) *ContentResults {
|
|||
|
||||
results.IsMultipart = len(email.Parts) > 1
|
||||
|
||||
// Parse List-Unsubscribe header URLs for use in link detection
|
||||
c.listUnsubscribeURLs = email.GetListUnsubscribeURLs()
|
||||
|
||||
// Check for one-click unsubscribe support
|
||||
listUnsubscribePost := email.Header.Get("List-Unsubscribe-Post")
|
||||
c.hasOneClickUnsubscribe = strings.EqualFold(strings.TrimSpace(listUnsubscribePost), "List-Unsubscribe=One-Click")
|
||||
|
||||
// Get HTML and text parts
|
||||
htmlParts := email.GetHTMLParts()
|
||||
textParts := email.GetTextParts()
|
||||
|
|
@ -331,9 +341,14 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
|
|||
|
||||
// isUnsubscribeLink checks if a link is an unsubscribe link
|
||||
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
||||
// First check: does the href match a URL from the List-Unsubscribe header?
|
||||
if slices.Contains(c.listUnsubscribeURLs, href) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check href for unsubscribe keywords
|
||||
lowerHref := strings.ToLower(href)
|
||||
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
|
||||
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe", "отписване", "desubscripció", "zrušit odběr", "dad-danysgrifio", "afmeld", "abmelden", "διαγραφή", "darse de baja", "poistu postituslistalta", "se désabonner", "ביטול רישום", "leiratkozás", "cancella iscrizione", "登録を取り消す", "구독 해지", "വരിക്കാരനല്ലാതാകുക", "uitschrijven", "meld av", "odsubskrybuj", "cancelar assinatura", "cancelar subscrição", "dezabonare", "отписаться", "avsluta prenumeration", "zrušiť odber", "odjava", "üyeliği sonlandır", "відписатися", "hủy đăng ký", "退订", "退訂"}
|
||||
for _, keyword := range unsubKeywords {
|
||||
if strings.Contains(lowerHref, keyword) {
|
||||
return true
|
||||
|
|
@ -722,6 +737,7 @@ func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *api.
|
|||
HasHtml: api.PtrTo(results.HTMLContent != ""),
|
||||
HasPlaintext: api.PtrTo(results.TextContent != ""),
|
||||
HasUnsubscribeLink: api.PtrTo(results.HasUnsubscribe),
|
||||
UnsubscribeMethods: &[]api.ContentAnalysisUnsubscribeMethods{},
|
||||
}
|
||||
|
||||
// Calculate text-to-image ratio (inverse of image-to-text)
|
||||
|
|
@ -868,8 +884,19 @@ func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *api.
|
|||
|
||||
// Unsubscribe methods
|
||||
if results.HasUnsubscribe {
|
||||
methods := []api.ContentAnalysisUnsubscribeMethods{api.Link}
|
||||
analysis.UnsubscribeMethods = &methods
|
||||
*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.Link)
|
||||
}
|
||||
|
||||
for _, url := range c.listUnsubscribeURLs {
|
||||
if strings.HasPrefix(url, "mailto:") {
|
||||
*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.Mailto)
|
||||
} else if strings.HasPrefix(url, "http:") || strings.HasPrefix(url, "https:") {
|
||||
*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.ListUnsubscribeHeader)
|
||||
}
|
||||
}
|
||||
|
||||
if slices.Contains(*analysis.UnsubscribeMethods, api.ListUnsubscribeHeader) && c.hasOneClickUnsubscribe {
|
||||
*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.OneClick)
|
||||
}
|
||||
|
||||
return analysis
|
||||
|
|
|
|||
|
|
@ -144,6 +144,74 @@ func TestIsUnsubscribeLink(t *testing.T) {
|
|||
linkText: "Read more",
|
||||
expected: false,
|
||||
},
|
||||
// Multilingual keyword detection - URL path
|
||||
{
|
||||
name: "German abmelden in URL",
|
||||
href: "https://example.com/abmelden?id=42",
|
||||
linkText: "Click here",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "French se-desabonner slug in URL (no accent/space - not detected by keyword)",
|
||||
href: "https://example.com/se-desabonner?id=42",
|
||||
linkText: "Click here",
|
||||
expected: false,
|
||||
},
|
||||
// Multilingual keyword detection - link text
|
||||
{
|
||||
name: "German Abmelden in link text",
|
||||
href: "https://example.com/manage?id=42&lang=de",
|
||||
linkText: "Abmelden",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "French Se désabonner in link text",
|
||||
href: "https://example.com/manage?id=42&lang=fr",
|
||||
linkText: "Se désabonner",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Russian Отписаться in link text",
|
||||
href: "https://example.com/manage?id=42&lang=ru",
|
||||
linkText: "Отписаться",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Chinese 退订 in link text",
|
||||
href: "https://example.com/manage?id=42&lang=zh",
|
||||
linkText: "退订",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Japanese 登録を取り消す in link text",
|
||||
href: "https://example.com/manage?id=42&lang=ja",
|
||||
linkText: "登録を取り消す",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Korean 구독 해지 in link text",
|
||||
href: "https://example.com/manage?id=42&lang=ko",
|
||||
linkText: "구독 해지",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Dutch Uitschrijven in link text",
|
||||
href: "https://example.com/manage?id=42&lang=nl",
|
||||
linkText: "Uitschrijven",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Polish Odsubskrybuj in link text",
|
||||
href: "https://example.com/manage?id=42&lang=pl",
|
||||
linkText: "Odsubskrybuj",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Turkish Üyeliği sonlandır in link text",
|
||||
href: "https://example.com/manage?id=42&lang=tr",
|
||||
linkText: "Üyeliği sonlandır",
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
analyzer := NewContentAnalyzer(5 * time.Second)
|
||||
|
|
|
|||
|
|
@ -301,3 +301,20 @@ func (e *EmailMessage) GetHeaderValue(key string) string {
|
|||
func (e *EmailMessage) HasHeader(key string) bool {
|
||||
return e.Header.Get(key) != ""
|
||||
}
|
||||
|
||||
// GetListUnsubscribeURLs parses the List-Unsubscribe header and returns all URLs.
|
||||
// The header format is: <url1>, <url2>, ...
|
||||
func (e *EmailMessage) GetListUnsubscribeURLs() []string {
|
||||
value := e.Header.Get("List-Unsubscribe")
|
||||
if value == "" {
|
||||
return nil
|
||||
}
|
||||
var urls []string
|
||||
for _, part := range strings.Split(value, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
if strings.HasPrefix(part, "<") && strings.HasSuffix(part, ">") {
|
||||
urls = append(urls, part[1:len(part)-1])
|
||||
}
|
||||
}
|
||||
return urls
|
||||
}
|
||||
|
|
|
|||
|
|
@ -422,6 +422,17 @@
|
|||
});
|
||||
}
|
||||
|
||||
// One-click unsubscribe check
|
||||
const unsubscribeMethods = report.content_analysis?.unsubscribe_methods;
|
||||
if (unsubscribeMethods && unsubscribeMethods.length > 0 && !unsubscribeMethods.includes("one-click")) {
|
||||
segments.push({ text: ". This email could benefit from " });
|
||||
segments.push({
|
||||
text: "one-click unsubscribe",
|
||||
highlight: { color: "warning", bold: true },
|
||||
link: "#content-details",
|
||||
});
|
||||
}
|
||||
|
||||
// Content/spam assessment
|
||||
const spamAssassin = report.spamassassin;
|
||||
const contentScore = report.summary?.content_score || 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue