diff --git a/api/schemas.yaml b/api/schemas.yaml index 042a3b3..6e12a01 100644 --- a/api/schemas.yaml +++ b/api/schemas.yaml @@ -265,7 +265,7 @@ components: properties: type: type: string - enum: [broken_html, missing_alt, excessive_images, obfuscated_url, suspicious_link, dangerous_html] + enum: [broken_html, missing_alt, excessive_images, obfuscated_url, suspicious_link, dangerous_html, unreplaced_template] description: Type of content issue example: "missing_alt" severity: diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 3e29a7a..2cf08a8 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -83,6 +83,27 @@ type ContentResults struct { HarmfullIssues []string } +// templatePlaceholderRegex matches unreplaced templating tokens that remain when a +// merge field was not substituted before sending. It covers the common syntaxes: +// - single/double curly braces: {unsubscribe}, {{unsubscribe_url}} +// - dollar braces: ${unsubscribe} +// - Mailchimp merge tags: *|UNSUB|* +// - percent tags: %unsubscribe%, %%unsubscribe%% +// - square bracket tags: [unsubscribe] +// - URL-encoded curly braces: %7Bunsubscribe%7D +// +// The percent-tag alternative requires the body to contain at least one non-hex +// character ([g-z_.-]). Percent-encoded octets (e.g. "%C3%A9", "%E2%80%A6") only +// ever place hex digits between "%" signs, so this distinguishes real merge tags +// from ordinary percent-encoding and avoids flagging internationalized URLs. +var templatePlaceholderRegex = regexp.MustCompile(`(?i)\{\{?[^{}]*\}?\}|\$\{[^}]*\}|\*\|[^|]*\|\*|%{1,2}[\w.\-]*[g-z_.\-][\w.\-]*%{1,2}|\[[a-z][\w.\-]*\]|%7b[^%]*%7d`) + +// isTemplatePlaceholderURL reports whether a URL still contains an unreplaced +// templating placeholder, meaning the merge field was never substituted. +func isTemplatePlaceholderURL(urlStr string) bool { + return templatePlaceholderRegex.MatchString(urlStr) +} + // HasPlaintext returns true if the email has plain text content func (r *ContentResults) HasPlaintext() bool { return r.TextContent != "" @@ -90,12 +111,13 @@ func (r *ContentResults) HasPlaintext() bool { // LinkCheck represents a link validation result type LinkCheck struct { - URL string - Valid bool - Status int - Error string - IsSafe bool - Warning string + URL string + Valid bool + Status int + Error string + IsSafe bool + Warning string + IsTemplate bool // URL still contains an unreplaced templating placeholder (e.g. "{unsubscribe}") } // ImageCheck represents an image validation result @@ -342,6 +364,13 @@ func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string { // isUnsubscribeLink checks if a link is an unsubscribe link func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool { + // An href with an unreplaced template placeholder (e.g. "{unsubscribe}") is not a + // working link, so it must not count as a valid unsubscribe method even though it + // literally contains the word "unsubscribe". + if isTemplatePlaceholderURL(href) { + return false + } + // First check: does the href match a URL from the List-Unsubscribe header? if slices.Contains(c.listUnsubscribeURLs, href) { return true @@ -387,6 +416,15 @@ func (c *ContentAnalyzer) validateLink(urlStr string) LinkCheck { IsSafe: true, } + // Detect unreplaced templating placeholders (e.g. "{unsubscribe}"). Such a URL + // is not a real link: the merge field was never substituted before sending. + if isTemplatePlaceholderURL(urlStr) { + check.Valid = false + check.IsTemplate = true + check.Error = "URL contains an unreplaced template placeholder (merge field was not substituted before sending)" + return check + } + // Parse URL parsedURL, err := url.Parse(urlStr) if err != nil { @@ -798,6 +836,21 @@ func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *mode }) } + // Add unreplaced template placeholder issues + for _, link := range results.Links { + if !link.IsTemplate { + continue + } + location := link.URL + htmlIssues = append(htmlIssues, model.ContentIssue{ + Type: model.UnreplacedTemplate, + Severity: model.ContentIssueSeverityHigh, + Message: fmt.Sprintf("Link contains an unreplaced template placeholder: %s", link.URL), + Location: &location, + Advice: utils.PtrTo("Ensure all merge fields and template placeholders are substituted before sending"), + }) + } + // Add suspicious URL issues for _, suspURL := range results.SuspiciousURLs { htmlIssues = append(htmlIssues, model.ContentIssue{ @@ -838,7 +891,10 @@ func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *mode links := make([]model.LinkCheck, 0, len(results.Links)) for _, link := range results.Links { status := model.Valid - if link.Status >= 400 { + if !link.Valid { + // Link could not be parsed/validated (e.g. unreplaced template placeholder) + status = model.Broken + } else if link.Status >= 400 { status = model.Broken } else if !link.IsSafe { status = model.Suspicious diff --git a/pkg/analyzer/content_test.go b/pkg/analyzer/content_test.go index 4ad01a8..e030a2a 100644 --- a/pkg/analyzer/content_test.go +++ b/pkg/analyzer/content_test.go @@ -24,10 +24,12 @@ package analyzer import ( "net/mail" "net/url" + "slices" "strings" "testing" "time" + "git.happydns.org/happyDeliver/internal/model" "golang.org/x/net/html" ) @@ -212,6 +214,25 @@ func TestIsUnsubscribeLink(t *testing.T) { linkText: "Üyeliği sonlandır", expected: true, }, + // Unreplaced template placeholders must NOT count as unsubscribe methods + { + name: "Curly brace template placeholder", + href: "{unsubscribe}", + linkText: "Unsubscribe", + expected: false, + }, + { + name: "Double curly brace template placeholder", + href: "{{unsubscribe_url}}", + linkText: "Unsubscribe", + expected: false, + }, + { + name: "Mailchimp merge tag placeholder", + href: "*|UNSUB|*", + linkText: "Unsubscribe here", + expected: false, + }, } analyzer := NewContentAnalyzer(5 * time.Second) @@ -979,3 +1000,90 @@ func TestHasDomainMisalignment(t *testing.T) { }) } } + +func TestIsTemplatePlaceholderURL(t *testing.T) { + tests := []struct { + name string + url string + expected bool + }{ + {name: "Single curly braces", url: "{unsubscribe}", expected: true}, + {name: "Double curly braces", url: "{{unsubscribe_url}}", expected: true}, + {name: "Dollar braces", url: "${unsubscribe}", expected: true}, + {name: "Mailchimp merge tag", url: "*|UNSUB|*", expected: true}, + {name: "Percent tag", url: "%unsubscribe%", expected: true}, + {name: "Double percent tag", url: "%%unsubscribe%%", expected: true}, + {name: "Square bracket tag", url: "[unsubscribe]", expected: true}, + {name: "URL-encoded curly braces", url: "https://example.com/u/%7Btoken%7D", expected: true}, + {name: "Placeholder embedded in URL", url: "https://example.com/unsub?id={{recipient_id}}", expected: true}, + {name: "Normal https URL", url: "https://example.com/unsubscribe?id=123", expected: false}, + {name: "Normal URL with percent-encoded space", url: "https://example.com/path%20name", expected: false}, + {name: "Percent-encoded accented char (é)", url: "https://example.com/caf%C3%A9/unsubscribe", expected: false}, + {name: "Percent-encoded UTF-8 ellipsis", url: "https://example.com/path?q=%E2%80%A6", expected: false}, + {name: "Percent-encoded Cyrillic", url: "https://example.com/r?u=%D0%BF%D1%80%D0%B8%D0%B2", expected: false}, + {name: "Adjacent percent-encoded octets (all hex)", url: "https://example.com/%aa%bb", expected: false}, + {name: "Percent escape then literal hex letters", url: "https://example.com/x%def%20y", expected: false}, + {name: "Short percent tag with non-hex letter", url: "%id%", expected: true}, + {name: "Mailto URL", url: "mailto:unsubscribe@example.com", expected: false}, + {name: "IPv6 URL (square brackets, not a tag)", url: "http://[::1]/unsubscribe", expected: false}, + {name: "IPv6 URL with hex-letter host", url: "http://[fe80::1]/unsubscribe", expected: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isTemplatePlaceholderURL(tt.url); got != tt.expected { + t.Errorf("isTemplatePlaceholderURL(%q) = %v, want %v", tt.url, got, tt.expected) + } + }) + } +} + +func TestValidateLink_TemplatePlaceholderIsInvalid(t *testing.T) { + analyzer := NewContentAnalyzer(5 * time.Second) + + check := analyzer.validateLink("{unsubscribe}") + if check.Valid { + t.Errorf("validateLink(%q).Valid = true, want false", "{unsubscribe}") + } + if check.Error == "" { + t.Errorf("validateLink(%q).Error is empty, want a template placeholder error", "{unsubscribe}") + } +} + +func TestGenerateContentAnalysis_TemplateLinkNotUnsubscribe(t *testing.T) { + analyzer := NewContentAnalyzer(5 * time.Second) + + results := &ContentResults{ + HTMLContent: "Unsubscribe", + Links: []LinkCheck{{URL: "{unsubscribe}", Valid: false, IsTemplate: true, IsSafe: true, Error: "template"}}, + HasUnsubscribe: false, + } + + analysis := analyzer.GenerateContentAnalysis(results) + + // The link must be reported as broken, not valid + if analysis.Links == nil || len(*analysis.Links) != 1 { + t.Fatalf("expected 1 link in analysis, got %v", analysis.Links) + } + if (*analysis.Links)[0].Status != model.Broken { + t.Errorf("template link status = %q, want %q", (*analysis.Links)[0].Status, model.Broken) + } + + // It must not be counted as an unsubscribe method + if analysis.UnsubscribeMethods != nil && slices.Contains(*analysis.UnsubscribeMethods, model.Link) { + t.Errorf("template link wrongly counted as an unsubscribe method: %v", *analysis.UnsubscribeMethods) + } + + // An unreplaced template issue must be reported + foundIssue := false + if analysis.HtmlIssues != nil { + for _, issue := range *analysis.HtmlIssues { + if issue.Type == model.UnreplacedTemplate { + foundIssue = true + } + } + } + if !foundIssue { + t.Errorf("expected an unreplaced_template content issue, got %v", analysis.HtmlIssues) + } +}