Add multilingual unsubscribe keywords for link detection
The list comes from github.com/knadh/listmonk i18n strings Bug: https://github.com/happyDomain/happydeliver/issues/8
This commit is contained in:
parent
6b983f0506
commit
96e83ff70d
2 changed files with 69 additions and 1 deletions
|
|
@ -343,7 +343,7 @@ func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
|||
|
||||
// Check href for unsubscribe keywords
|
||||
lowerHref := strings.ToLower(href)
|
||||
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe"}
|
||||
unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe", "отписване", "desubscripció", "zrušit odběr", "dad-danysgrifio", "afmeld", "abmelden", "διαγραφή", "darse de baja", "poistu postituslistalta", "se désabonner", "ביטול רישום", "leiratkozás", "cancella iscrizione", "登録を取り消す", "구독 해지", "വരിക്കാരനല്ലാതാകുക", "uitschrijven", "meld av", "odsubskrybuj", "cancelar assinatura", "cancelar subscrição", "dezabonare", "отписаться", "avsluta prenumeration", "zrušiť odber", "odjava", "üyeliği sonlandır", "відписатися", "hủy đăng ký", "退订", "退訂"}
|
||||
for _, keyword := range unsubKeywords {
|
||||
if strings.Contains(lowerHref, keyword) {
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -144,6 +144,74 @@ func TestIsUnsubscribeLink(t *testing.T) {
|
|||
linkText: "Read more",
|
||||
expected: false,
|
||||
},
|
||||
// Multilingual keyword detection - URL path
|
||||
{
|
||||
name: "German abmelden in URL",
|
||||
href: "https://example.com/abmelden?id=42",
|
||||
linkText: "Click here",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "French se-desabonner slug in URL (no accent/space - not detected by keyword)",
|
||||
href: "https://example.com/se-desabonner?id=42",
|
||||
linkText: "Click here",
|
||||
expected: false,
|
||||
},
|
||||
// Multilingual keyword detection - link text
|
||||
{
|
||||
name: "German Abmelden in link text",
|
||||
href: "https://example.com/manage?id=42&lang=de",
|
||||
linkText: "Abmelden",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "French Se désabonner in link text",
|
||||
href: "https://example.com/manage?id=42&lang=fr",
|
||||
linkText: "Se désabonner",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Russian Отписаться in link text",
|
||||
href: "https://example.com/manage?id=42&lang=ru",
|
||||
linkText: "Отписаться",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Chinese 退订 in link text",
|
||||
href: "https://example.com/manage?id=42&lang=zh",
|
||||
linkText: "退订",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Japanese 登録を取り消す in link text",
|
||||
href: "https://example.com/manage?id=42&lang=ja",
|
||||
linkText: "登録を取り消す",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Korean 구독 해지 in link text",
|
||||
href: "https://example.com/manage?id=42&lang=ko",
|
||||
linkText: "구독 해지",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Dutch Uitschrijven in link text",
|
||||
href: "https://example.com/manage?id=42&lang=nl",
|
||||
linkText: "Uitschrijven",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Polish Odsubskrybuj in link text",
|
||||
href: "https://example.com/manage?id=42&lang=pl",
|
||||
linkText: "Odsubskrybuj",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "Turkish Üyeliği sonlandır in link text",
|
||||
href: "https://example.com/manage?id=42&lang=tr",
|
||||
linkText: "Üyeliği sonlandır",
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
analyzer := NewContentAnalyzer(5 * time.Second)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue