// This file is part of the happyDeliver (R) project. // Copyright (c) 2025 happyDomain // Authors: Pierre-Olivier Mercier, et al. // // This program is offered under a commercial and under the AGPL license. // For commercial licensing, contact us at . // // For AGPL licensing: // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package analyzer import ( "net/mail" "net/url" "strings" "testing" "time" "golang.org/x/net/html" ) func TestNewContentAnalyzer(t *testing.T) { tests := []struct { name string timeout time.Duration expectedTimeout time.Duration }{ { name: "Default timeout", timeout: 0, expectedTimeout: 10 * time.Second, }, { name: "Custom timeout", timeout: 5 * time.Second, expectedTimeout: 5 * time.Second, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { analyzer := NewContentAnalyzer(tt.timeout) if analyzer.Timeout != tt.expectedTimeout { t.Errorf("Timeout = %v, want %v", analyzer.Timeout, tt.expectedTimeout) } if analyzer.httpClient == nil { t.Error("httpClient should not be nil") } }) } } func TestExtractTextFromHTML(t *testing.T) { tests := []struct { name string html string expectedText string }{ { name: "Simple text", html: "

Hello World

", expectedText: "Hello World", }, { name: "Multiple elements", html: "

Title

Paragraph

", expectedText: "TitleParagraph", }, { name: "With script tag", html: "

Text

More

", expectedText: "TextMore", }, { name: "With style tag", html: "

Text

More

", expectedText: "TextMore", }, { name: "Empty HTML", html: "", expectedText: "", }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { text := analyzer.extractTextFromHTML(tt.html) if text != tt.expectedText { t.Errorf("extractTextFromHTML() = %q, want %q", text, tt.expectedText) } }) } } func TestIsUnsubscribeLink(t *testing.T) { tests := []struct { name string href string linkText string expected bool }{ { name: "Unsubscribe in URL", href: "https://example.com/unsubscribe?id=123", linkText: "Click here", expected: true, }, { name: "Unsubscribe in text", href: "https://example.com/action?id=123", linkText: "Unsubscribe from this list", expected: true, }, { name: "Opt-out in URL", href: "https://example.com/optout", linkText: "Click here", expected: true, }, { name: "Remove in text", href: "https://example.com/action", linkText: "Remove me from list", expected: true, }, { name: "Normal link", href: "https://example.com/article", linkText: "Read more", expected: false, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Create a simple text node for testing html := "" + tt.linkText + "" doc, _ := parseHTML(html) linkNode := findFirstLink(doc) if linkNode == nil { t.Fatal("Failed to parse test HTML") } result := analyzer.isUnsubscribeLink(tt.href, linkNode) if result != tt.expected { t.Errorf("isUnsubscribeLink(%q, %q) = %v, want %v", tt.href, tt.linkText, result, tt.expected) } }) } } func TestIsSuspiciousURL(t *testing.T) { tests := []struct { name string url string expected bool }{ { name: "Normal HTTPS URL", url: "https://example.com/page", expected: false, }, { name: "URL with IP address", url: "https://192.168.1.1/page", expected: true, }, { name: "URL with IPv6", url: "https://[2001:db8::1]/page", expected: true, }, { name: "URL shortener - bit.ly", url: "https://bit.ly/abc123", expected: true, }, { name: "URL shortener - tinyurl", url: "https://tinyurl.com/abc123", expected: true, }, { name: "Excessive subdomains", url: "https://a.b.c.d.e.example.com/page", expected: true, }, { name: "URL with @ (phishing)", url: "https://user@example.com/page", expected: true, }, { name: "Normal subdomain", url: "https://mail.example.com/page", expected: false, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { parsedURL, err := parseURL(tt.url) if err != nil { t.Fatalf("Failed to parse URL: %v", err) } result := analyzer.isSuspiciousURL(tt.url, parsedURL) if result != tt.expected { t.Errorf("isSuspiciousURL(%q) = %v, want %v", tt.url, result, tt.expected) } }) } } func TestIsIPAddress(t *testing.T) { tests := []struct { name string host string expected bool }{ { name: "IPv4 address", host: "192.168.1.1", expected: true, }, { name: "IPv4 with port", host: "192.168.1.1:8080", expected: true, }, { name: "IPv6 address", host: "2001:db8::1", expected: true, }, { name: "Domain name", host: "example.com", expected: false, }, { name: "Subdomain", host: "mail.example.com", expected: false, }, { name: "Localhost", host: "localhost", expected: false, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := analyzer.isIPAddress(tt.host) if result != tt.expected { t.Errorf("isIPAddress(%q) = %v, want %v", tt.host, result, tt.expected) } }) } } func TestCalculateTextPlainConsistency(t *testing.T) { tests := []struct { name string plainText string htmlText string expectedMinRatio float32 expectedMaxRatio float32 }{ { name: "Identical content", plainText: "Hello World Test", htmlText: "

Hello World Test

", expectedMinRatio: 0.8, expectedMaxRatio: 1.0, }, { name: "Similar content", plainText: "Hello World", htmlText: "

Hello World Extra

", expectedMinRatio: 0.3, expectedMaxRatio: 0.8, }, { name: "Different content", plainText: "Completely different", htmlText: "

Nothing alike here

", expectedMinRatio: 0.0, expectedMaxRatio: 0.3, }, { name: "Empty plain text", plainText: "", htmlText: "

Some text

", expectedMinRatio: 0.0, expectedMaxRatio: 0.0, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ratio := analyzer.calculateTextPlainConsistency(tt.plainText, tt.htmlText) if ratio < tt.expectedMinRatio || ratio > tt.expectedMaxRatio { t.Errorf("calculateTextPlainConsistency() = %v, want between %v and %v", ratio, tt.expectedMinRatio, tt.expectedMaxRatio) } }) } } func TestNormalizeText(t *testing.T) { tests := []struct { name string input string expected string }{ { name: "Uppercase to lowercase", input: "Hello WORLD", expected: "hello world", }, { name: "Multiple spaces", input: "Hello World", expected: "hello world", }, { name: "Tabs and newlines", input: "Hello\t\nWorld", expected: "hello world", }, { name: "Leading and trailing spaces", input: " Hello World ", expected: "hello world", }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := analyzer.normalizeText(tt.input) if result != tt.expected { t.Errorf("normalizeText(%q) = %q, want %q", tt.input, result, tt.expected) } }) } } func TestAnalyzeContent_HTMLParsing(t *testing.T) { tests := []struct { name string email *EmailMessage expectValid bool expectLinks int expectImages int }{ { name: "Valid HTML with links and images", email: &EmailMessage{ Header: make(mail.Header), Parts: []MessagePart{ { ContentType: "text/html", IsHTML: true, Content: `

Hello World

Link Test `, }, }, }, expectValid: true, expectLinks: 1, expectImages: 1, }, { name: "Multiple links", email: &EmailMessage{ Header: make(mail.Header), Parts: []MessagePart{ { ContentType: "text/html", IsHTML: true, Content: ` Link 1 Link 2 Link 3 `, }, }, }, expectValid: true, expectLinks: 3, expectImages: 0, }, { name: "Plain text only", email: &EmailMessage{ Header: make(mail.Header), Parts: []MessagePart{ { ContentType: "text/plain", IsText: true, Content: "Plain text email", }, }, }, expectValid: false, expectLinks: 0, expectImages: 0, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { results := analyzer.AnalyzeContent(tt.email) if results == nil { t.Fatal("Expected results, got nil") } if results.HTMLValid != tt.expectValid { t.Errorf("HTMLValid = %v, want %v", results.HTMLValid, tt.expectValid) } if len(results.Links) != tt.expectLinks { t.Errorf("Got %d links, want %d", len(results.Links), tt.expectLinks) } if len(results.Images) != tt.expectImages { t.Errorf("Got %d images, want %d", len(results.Images), tt.expectImages) } }) } } func TestAnalyzeContent_UnsubscribeDetection(t *testing.T) { tests := []struct { name string html string expectUnsubscribe bool expectCount int }{ { name: "With unsubscribe link", html: `

Email content

Unsubscribe `, expectUnsubscribe: true, expectCount: 1, }, { name: "Multiple unsubscribe links", html: ` Unsubscribe Opt out `, expectUnsubscribe: true, expectCount: 2, }, { name: "No unsubscribe link", html: `

Email content

Read more `, expectUnsubscribe: false, expectCount: 0, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { email := &EmailMessage{ Header: make(mail.Header), Parts: []MessagePart{ { ContentType: "text/html", IsHTML: true, Content: tt.html, }, }, } results := analyzer.AnalyzeContent(email) if results.HasUnsubscribe != tt.expectUnsubscribe { t.Errorf("HasUnsubscribe = %v, want %v", results.HasUnsubscribe, tt.expectUnsubscribe) } if len(results.UnsubscribeLinks) != tt.expectCount { t.Errorf("Got %d unsubscribe links, want %d", len(results.UnsubscribeLinks), tt.expectCount) } }) } } func TestAnalyzeContent_ImageAltAttributes(t *testing.T) { tests := []struct { name string html string expectImages int expectWithAlt int }{ { name: "Images with alt", html: ` Description 1 Description 2 `, expectImages: 2, expectWithAlt: 2, }, { name: "Images without alt", html: ` `, expectImages: 2, expectWithAlt: 0, }, { name: "Mixed images", html: ` Description `, expectImages: 2, expectWithAlt: 1, }, } analyzer := NewContentAnalyzer(5 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { email := &EmailMessage{ Header: make(mail.Header), Parts: []MessagePart{ { ContentType: "text/html", IsHTML: true, Content: tt.html, }, }, } results := analyzer.AnalyzeContent(email) if len(results.Images) != tt.expectImages { t.Errorf("Got %d images, want %d", len(results.Images), tt.expectImages) } withAlt := 0 for _, img := range results.Images { if img.HasAlt { withAlt++ } } if withAlt != tt.expectWithAlt { t.Errorf("Got %d images with alt, want %d", withAlt, tt.expectWithAlt) } }) } } // Helper functions for testing func parseHTML(htmlStr string) (*html.Node, error) { return html.Parse(strings.NewReader(htmlStr)) } func findFirstLink(n *html.Node) *html.Node { if n.Type == html.ElementNode && n.Data == "a" { return n } for c := n.FirstChild; c != nil; c = c.NextSibling { if result := findFirstLink(c); result != nil { return result } } return nil } func parseURL(urlStr string) (*url.URL, error) { return url.Parse(urlStr) }