content: treat unreplaced template placeholders as invalid links
URLs like "{unsubscribe}" parse without error in Go, so they were
reported as valid links and even counted as unsubscribe methods just
because they contain the word "unsubscribe".
Detect common un-substituted merge-field syntaxes ({x}, {{x}}, ${x},
*|X|*, %x%, [x], %7Bx%7D), mark such links invalid (Broken status),
exclude them from unsubscribe method detection, and surface them via a
new unreplaced_template content issue.
This commit is contained in:
parent
c3194de6aa
commit
f88701681f
3 changed files with 172 additions and 8 deletions
|
|
@ -265,7 +265,7 @@ components:
|
||||||
properties:
|
properties:
|
||||||
type:
|
type:
|
||||||
type: string
|
type: string
|
||||||
enum: [broken_html, missing_alt, excessive_images, obfuscated_url, suspicious_link, dangerous_html]
|
enum: [broken_html, missing_alt, excessive_images, obfuscated_url, suspicious_link, dangerous_html, unreplaced_template]
|
||||||
description: Type of content issue
|
description: Type of content issue
|
||||||
example: "missing_alt"
|
example: "missing_alt"
|
||||||
severity:
|
severity:
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,27 @@ type ContentResults struct {
|
||||||
HarmfullIssues []string
|
HarmfullIssues []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// templatePlaceholderRegex matches unreplaced templating tokens that remain when a
|
||||||
|
// merge field was not substituted before sending. It covers the common syntaxes:
|
||||||
|
// - single/double curly braces: {unsubscribe}, {{unsubscribe_url}}
|
||||||
|
// - dollar braces: ${unsubscribe}
|
||||||
|
// - Mailchimp merge tags: *|UNSUB|*
|
||||||
|
// - percent tags: %unsubscribe%, %%unsubscribe%%
|
||||||
|
// - square bracket tags: [unsubscribe]
|
||||||
|
// - URL-encoded curly braces: %7Bunsubscribe%7D
|
||||||
|
//
|
||||||
|
// The percent-tag alternative requires the body to contain at least one non-hex
|
||||||
|
// character ([g-z_.-]). Percent-encoded octets (e.g. "%C3%A9", "%E2%80%A6") only
|
||||||
|
// ever place hex digits between "%" signs, so this distinguishes real merge tags
|
||||||
|
// from ordinary percent-encoding and avoids flagging internationalized URLs.
|
||||||
|
var templatePlaceholderRegex = regexp.MustCompile(`(?i)\{\{?[^{}]*\}?\}|\$\{[^}]*\}|\*\|[^|]*\|\*|%{1,2}[\w.\-]*[g-z_.\-][\w.\-]*%{1,2}|\[[a-z][\w.\-]*\]|%7b[^%]*%7d`)
|
||||||
|
|
||||||
|
// isTemplatePlaceholderURL reports whether a URL still contains an unreplaced
|
||||||
|
// templating placeholder, meaning the merge field was never substituted.
|
||||||
|
func isTemplatePlaceholderURL(urlStr string) bool {
|
||||||
|
return templatePlaceholderRegex.MatchString(urlStr)
|
||||||
|
}
|
||||||
|
|
||||||
// HasPlaintext returns true if the email has plain text content
|
// HasPlaintext returns true if the email has plain text content
|
||||||
func (r *ContentResults) HasPlaintext() bool {
|
func (r *ContentResults) HasPlaintext() bool {
|
||||||
return r.TextContent != ""
|
return r.TextContent != ""
|
||||||
|
|
@ -96,6 +117,7 @@ type LinkCheck struct {
|
||||||
Error string
|
Error string
|
||||||
IsSafe bool
|
IsSafe bool
|
||||||
Warning string
|
Warning string
|
||||||
|
IsTemplate bool // URL still contains an unreplaced templating placeholder (e.g. "{unsubscribe}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// ImageCheck represents an image validation result
|
// ImageCheck represents an image validation result
|
||||||
|
|
@ -342,6 +364,13 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
|
||||||
|
|
||||||
// isUnsubscribeLink checks if a link is an unsubscribe link
|
// isUnsubscribeLink checks if a link is an unsubscribe link
|
||||||
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
|
||||||
|
// An href with an unreplaced template placeholder (e.g. "{unsubscribe}") is not a
|
||||||
|
// working link, so it must not count as a valid unsubscribe method even though it
|
||||||
|
// literally contains the word "unsubscribe".
|
||||||
|
if isTemplatePlaceholderURL(href) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// First check: does the href match a URL from the List-Unsubscribe header?
|
// First check: does the href match a URL from the List-Unsubscribe header?
|
||||||
if slices.Contains(c.listUnsubscribeURLs, href) {
|
if slices.Contains(c.listUnsubscribeURLs, href) {
|
||||||
return true
|
return true
|
||||||
|
|
@ -387,6 +416,15 @@ func (c *ContentAnalyzer) validateLink(urlStr string) LinkCheck {
|
||||||
IsSafe: true,
|
IsSafe: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Detect unreplaced templating placeholders (e.g. "{unsubscribe}"). Such a URL
|
||||||
|
// is not a real link: the merge field was never substituted before sending.
|
||||||
|
if isTemplatePlaceholderURL(urlStr) {
|
||||||
|
check.Valid = false
|
||||||
|
check.IsTemplate = true
|
||||||
|
check.Error = "URL contains an unreplaced template placeholder (merge field was not substituted before sending)"
|
||||||
|
return check
|
||||||
|
}
|
||||||
|
|
||||||
// Parse URL
|
// Parse URL
|
||||||
parsedURL, err := url.Parse(urlStr)
|
parsedURL, err := url.Parse(urlStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -798,6 +836,21 @@ func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *mode
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add unreplaced template placeholder issues
|
||||||
|
for _, link := range results.Links {
|
||||||
|
if !link.IsTemplate {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
location := link.URL
|
||||||
|
htmlIssues = append(htmlIssues, model.ContentIssue{
|
||||||
|
Type: model.UnreplacedTemplate,
|
||||||
|
Severity: model.ContentIssueSeverityHigh,
|
||||||
|
Message: fmt.Sprintf("Link contains an unreplaced template placeholder: %s", link.URL),
|
||||||
|
Location: &location,
|
||||||
|
Advice: utils.PtrTo("Ensure all merge fields and template placeholders are substituted before sending"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Add suspicious URL issues
|
// Add suspicious URL issues
|
||||||
for _, suspURL := range results.SuspiciousURLs {
|
for _, suspURL := range results.SuspiciousURLs {
|
||||||
htmlIssues = append(htmlIssues, model.ContentIssue{
|
htmlIssues = append(htmlIssues, model.ContentIssue{
|
||||||
|
|
@ -838,7 +891,10 @@ func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *mode
|
||||||
links := make([]model.LinkCheck, 0, len(results.Links))
|
links := make([]model.LinkCheck, 0, len(results.Links))
|
||||||
for _, link := range results.Links {
|
for _, link := range results.Links {
|
||||||
status := model.Valid
|
status := model.Valid
|
||||||
if link.Status >= 400 {
|
if !link.Valid {
|
||||||
|
// Link could not be parsed/validated (e.g. unreplaced template placeholder)
|
||||||
|
status = model.Broken
|
||||||
|
} else if link.Status >= 400 {
|
||||||
status = model.Broken
|
status = model.Broken
|
||||||
} else if !link.IsSafe {
|
} else if !link.IsSafe {
|
||||||
status = model.Suspicious
|
status = model.Suspicious
|
||||||
|
|
|
||||||
|
|
@ -24,10 +24,12 @@ package analyzer
|
||||||
import (
|
import (
|
||||||
"net/mail"
|
"net/mail"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.happydns.org/happyDeliver/internal/model"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -212,6 +214,25 @@ func TestIsUnsubscribeLink(t *testing.T) {
|
||||||
linkText: "Üyeliği sonlandır",
|
linkText: "Üyeliği sonlandır",
|
||||||
expected: true,
|
expected: true,
|
||||||
},
|
},
|
||||||
|
// Unreplaced template placeholders must NOT count as unsubscribe methods
|
||||||
|
{
|
||||||
|
name: "Curly brace template placeholder",
|
||||||
|
href: "{unsubscribe}",
|
||||||
|
linkText: "Unsubscribe",
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Double curly brace template placeholder",
|
||||||
|
href: "{{unsubscribe_url}}",
|
||||||
|
linkText: "Unsubscribe",
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Mailchimp merge tag placeholder",
|
||||||
|
href: "*|UNSUB|*",
|
||||||
|
linkText: "Unsubscribe here",
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
analyzer := NewContentAnalyzer(5 * time.Second)
|
analyzer := NewContentAnalyzer(5 * time.Second)
|
||||||
|
|
@ -979,3 +1000,90 @@ func TestHasDomainMisalignment(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsTemplatePlaceholderURL(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
url string
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{name: "Single curly braces", url: "{unsubscribe}", expected: true},
|
||||||
|
{name: "Double curly braces", url: "{{unsubscribe_url}}", expected: true},
|
||||||
|
{name: "Dollar braces", url: "${unsubscribe}", expected: true},
|
||||||
|
{name: "Mailchimp merge tag", url: "*|UNSUB|*", expected: true},
|
||||||
|
{name: "Percent tag", url: "%unsubscribe%", expected: true},
|
||||||
|
{name: "Double percent tag", url: "%%unsubscribe%%", expected: true},
|
||||||
|
{name: "Square bracket tag", url: "[unsubscribe]", expected: true},
|
||||||
|
{name: "URL-encoded curly braces", url: "https://example.com/u/%7Btoken%7D", expected: true},
|
||||||
|
{name: "Placeholder embedded in URL", url: "https://example.com/unsub?id={{recipient_id}}", expected: true},
|
||||||
|
{name: "Normal https URL", url: "https://example.com/unsubscribe?id=123", expected: false},
|
||||||
|
{name: "Normal URL with percent-encoded space", url: "https://example.com/path%20name", expected: false},
|
||||||
|
{name: "Percent-encoded accented char (é)", url: "https://example.com/caf%C3%A9/unsubscribe", expected: false},
|
||||||
|
{name: "Percent-encoded UTF-8 ellipsis", url: "https://example.com/path?q=%E2%80%A6", expected: false},
|
||||||
|
{name: "Percent-encoded Cyrillic", url: "https://example.com/r?u=%D0%BF%D1%80%D0%B8%D0%B2", expected: false},
|
||||||
|
{name: "Adjacent percent-encoded octets (all hex)", url: "https://example.com/%aa%bb", expected: false},
|
||||||
|
{name: "Percent escape then literal hex letters", url: "https://example.com/x%def%20y", expected: false},
|
||||||
|
{name: "Short percent tag with non-hex letter", url: "%id%", expected: true},
|
||||||
|
{name: "Mailto URL", url: "mailto:unsubscribe@example.com", expected: false},
|
||||||
|
{name: "IPv6 URL (square brackets, not a tag)", url: "http://[::1]/unsubscribe", expected: false},
|
||||||
|
{name: "IPv6 URL with hex-letter host", url: "http://[fe80::1]/unsubscribe", expected: false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if got := isTemplatePlaceholderURL(tt.url); got != tt.expected {
|
||||||
|
t.Errorf("isTemplatePlaceholderURL(%q) = %v, want %v", tt.url, got, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateLink_TemplatePlaceholderIsInvalid(t *testing.T) {
|
||||||
|
analyzer := NewContentAnalyzer(5 * time.Second)
|
||||||
|
|
||||||
|
check := analyzer.validateLink("{unsubscribe}")
|
||||||
|
if check.Valid {
|
||||||
|
t.Errorf("validateLink(%q).Valid = true, want false", "{unsubscribe}")
|
||||||
|
}
|
||||||
|
if check.Error == "" {
|
||||||
|
t.Errorf("validateLink(%q).Error is empty, want a template placeholder error", "{unsubscribe}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGenerateContentAnalysis_TemplateLinkNotUnsubscribe(t *testing.T) {
|
||||||
|
analyzer := NewContentAnalyzer(5 * time.Second)
|
||||||
|
|
||||||
|
results := &ContentResults{
|
||||||
|
HTMLContent: "<html><body><a href=\"{unsubscribe}\">Unsubscribe</a></body></html>",
|
||||||
|
Links: []LinkCheck{{URL: "{unsubscribe}", Valid: false, IsTemplate: true, IsSafe: true, Error: "template"}},
|
||||||
|
HasUnsubscribe: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
analysis := analyzer.GenerateContentAnalysis(results)
|
||||||
|
|
||||||
|
// The link must be reported as broken, not valid
|
||||||
|
if analysis.Links == nil || len(*analysis.Links) != 1 {
|
||||||
|
t.Fatalf("expected 1 link in analysis, got %v", analysis.Links)
|
||||||
|
}
|
||||||
|
if (*analysis.Links)[0].Status != model.Broken {
|
||||||
|
t.Errorf("template link status = %q, want %q", (*analysis.Links)[0].Status, model.Broken)
|
||||||
|
}
|
||||||
|
|
||||||
|
// It must not be counted as an unsubscribe method
|
||||||
|
if analysis.UnsubscribeMethods != nil && slices.Contains(*analysis.UnsubscribeMethods, model.Link) {
|
||||||
|
t.Errorf("template link wrongly counted as an unsubscribe method: %v", *analysis.UnsubscribeMethods)
|
||||||
|
}
|
||||||
|
|
||||||
|
// An unreplaced template issue must be reported
|
||||||
|
foundIssue := false
|
||||||
|
if analysis.HtmlIssues != nil {
|
||||||
|
for _, issue := range *analysis.HtmlIssues {
|
||||||
|
if issue.Type == model.UnreplacedTemplate {
|
||||||
|
foundIssue = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundIssue {
|
||||||
|
t.Errorf("expected an unreplaced_template content issue, got %v", analysis.HtmlIssues)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue