diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 87c423f..3150d50 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -220,18 +220,6 @@ func (c *ContentAnalyzer) traverseHTML(n *html.Node, results *ContentResults) { // Validate link linkCheck := c.validateLink(href) - - // Check for domain misalignment (phishing detection) - linkText := c.getNodeText(n) - if c.hasDomainMisalignment(href, linkText) { - linkCheck.IsSafe = false - if linkCheck.Warning == "" { - linkCheck.Warning = "Link text domain does not match actual URL domain (possible phishing)" - } else { - linkCheck.Warning += "; Link text domain does not match actual URL domain (possible phishing)" - } - } - results.Links = append(results.Links, linkCheck) // Check for suspicious URLs @@ -427,131 +415,8 @@ func (c *ContentAnalyzer) validateLink(urlStr string) LinkCheck { return check } -// hasDomainMisalignment checks if the link text contains a different domain than the actual URL -// This is a common phishing technique (e.g., text shows "paypal.com" but links to "evil.com") -func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool { - // Parse the actual URL - parsedURL, err := url.Parse(href) - if err != nil { - return false - } - - // Extract the actual destination domain/email based on scheme - var actualDomain string - - if parsedURL.Scheme == "mailto" { - // Extract email address from mailto: URL - // Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=... - mailtoAddr := parsedURL.Opaque - - // Remove query parameters if present - if idx := strings.Index(mailtoAddr, "?"); idx != -1 { - mailtoAddr = mailtoAddr[:idx] - } - - mailtoAddr = strings.TrimSpace(strings.ToLower(mailtoAddr)) - - // Extract domain from email address - if idx := strings.Index(mailtoAddr, "@"); idx != -1 { - actualDomain = mailtoAddr[idx+1:] - } else { - return false // Invalid mailto - } - } else if parsedURL.Scheme == "http" || parsedURL.Scheme == "https" { - // Check if URL has a host - if parsedURL.Host == "" { - return false - } - - // Extract the actual URL's domain (remove port if present) - actualDomain = parsedURL.Host - if idx := strings.LastIndex(actualDomain, ":"); idx != -1 { - actualDomain = actualDomain[:idx] - } - actualDomain = strings.ToLower(actualDomain) - } else { - // Skip checks for other URL schemes (tel, etc.) - return false - } - - // Normalize link text - linkText = strings.TrimSpace(linkText) - linkText = strings.ToLower(linkText) - - // Skip if link text is empty, too short, or just generic text like "click here" - if linkText == "" || len(linkText) < 4 { - return false - } - - // Common generic link texts that shouldn't trigger warnings - genericTexts := []string{ - "click here", "read more", "learn more", "download", "subscribe", - "unsubscribe", "view online", "view in browser", "click", "here", - "update", "verify", "confirm", "continue", "get started", - // mailto-specific generic texts - "email us", "contact us", "send email", "get in touch", "reach out", - "contact", "email", "write to us", - } - for _, generic := range genericTexts { - if linkText == generic { - return false - } - } - - // Extract domain-like patterns from link text using regex - // Matches patterns like "example.com", "www.example.com", "http://example.com" - domainRegex := regexp.MustCompile(`(?i)(?:https?://)?(?:www\.)?([a-z0-9][-a-z0-9]*\.)+[a-z]{2,}`) - matches := domainRegex.FindAllString(linkText, -1) - - if len(matches) == 0 { - return false - } - - // Check each domain-like pattern found in the text - for _, textDomain := range matches { - // Normalize the text domain - textDomain = strings.ToLower(textDomain) - textDomain = strings.TrimPrefix(textDomain, "http://") - textDomain = strings.TrimPrefix(textDomain, "https://") - textDomain = strings.TrimPrefix(textDomain, "www.") - - // Remove trailing slashes and paths - if idx := strings.Index(textDomain, "/"); idx != -1 { - textDomain = textDomain[:idx] - } - - // Compare domains - they should match or the actual URL should be a subdomain of the text domain - if textDomain != actualDomain { - // Check if actual domain is a subdomain of text domain - if !strings.HasSuffix(actualDomain, "."+textDomain) && !strings.HasSuffix(actualDomain, textDomain) { - // Check if they share the same base domain (last 2 parts) - textParts := strings.Split(textDomain, ".") - actualParts := strings.Split(actualDomain, ".") - - if len(textParts) >= 2 && len(actualParts) >= 2 { - textBase := strings.Join(textParts[len(textParts)-2:], ".") - actualBase := strings.Join(actualParts[len(actualParts)-2:], ".") - - if textBase != actualBase { - return true // Domain mismatch detected! - } - } else { - return true // Domain mismatch detected! - } - } - } - } - - return false -} - // isSuspiciousURL checks if a URL looks suspicious func (c *ContentAnalyzer) isSuspiciousURL(urlStr string, parsedURL *url.URL) bool { - // Skip checks for mailto: URLs - if parsedURL.Scheme == "mailto" { - return false - } - // Check for IP address instead of domain if c.isIPAddress(parsedURL.Host) { return true diff --git a/pkg/analyzer/content_test.go b/pkg/analyzer/content_test.go index 0aa7ff9..78a27e9 100644 --- a/pkg/analyzer/content_test.go +++ b/pkg/analyzer/content_test.go @@ -213,16 +213,6 @@ func TestIsSuspiciousURL(t *testing.T) { url: "https://mail.example.com/page", expected: false, }, - { - name: "Mailto with @ symbol", - url: "mailto:support@example.com", - expected: false, - }, - { - name: "Mailto with multiple @ symbols", - url: "mailto:user@subdomain@example.com", - expected: false, - }, } analyzer := NewContentAnalyzer(5 * time.Second) @@ -638,276 +628,3 @@ func findFirstLink(n *html.Node) *html.Node { func parseURL(urlStr string) (*url.URL, error) { return url.Parse(urlStr) } - -func TestHasDomainMisalignment(t *testing.T) { - tests := []struct { - name string - href string - linkText string - expected bool - reason string - }{ - // Phishing cases - should return true - { - name: "Obvious phishing - different domains", - href: "https://evil.com/page", - linkText: "Click here to verify your paypal.com account", - expected: true, - reason: "Link text shows 'paypal.com' but URL points to 'evil.com'", - }, - { - name: "Domain in link text differs from URL", - href: "http://attacker.net", - linkText: "Visit google.com for more info", - expected: true, - reason: "Link text shows 'google.com' but URL points to 'attacker.net'", - }, - { - name: "URL shown in text differs from actual URL", - href: "https://phishing-site.xyz/login", - linkText: "https://www.bank.example.com/secure", - expected: true, - reason: "Full URL in text doesn't match actual destination", - }, - { - name: "Similar but different domain", - href: "https://paypa1.com/login", - linkText: "Login to your paypal.com account", - expected: true, - reason: "Typosquatting: 'paypa1.com' vs 'paypal.com'", - }, - { - name: "Subdomain spoofing", - href: "https://paypal.com.evil.com/login", - linkText: "Verify your paypal.com account", - expected: true, - reason: "Domain is 'evil.com', not 'paypal.com'", - }, - { - name: "Multiple domains in text, none match", - href: "https://badsite.com", - linkText: "Transfer from bank.com to paypal.com", - expected: true, - reason: "Neither 'bank.com' nor 'paypal.com' matches 'badsite.com'", - }, - - // Legitimate cases - should return false - { - name: "Exact domain match", - href: "https://example.com/page", - linkText: "Visit example.com for more information", - expected: false, - reason: "Domains match exactly", - }, - { - name: "Legitimate subdomain", - href: "https://mail.google.com/inbox", - linkText: "Check your google.com email", - expected: false, - reason: "Subdomain of the mentioned domain", - }, - { - name: "www prefix variation", - href: "https://www.example.com/page", - linkText: "Visit example.com", - expected: false, - reason: "www prefix is acceptable variation", - }, - { - name: "Generic link text - click here", - href: "https://anywhere.com", - linkText: "click here", - expected: false, - reason: "Generic text doesn't contain a domain", - }, - { - name: "Generic link text - read more", - href: "https://example.com/article", - linkText: "Read more", - expected: false, - reason: "Generic text doesn't contain a domain", - }, - { - name: "Generic link text - learn more", - href: "https://example.com/info", - linkText: "Learn More", - expected: false, - reason: "Generic text doesn't contain a domain (case insensitive)", - }, - { - name: "No domain in link text", - href: "https://example.com/page", - linkText: "Click to continue", - expected: false, - reason: "Link text has no domain reference", - }, - { - name: "Short link text", - href: "https://example.com", - linkText: "Go", - expected: false, - reason: "Text too short to contain meaningful domain", - }, - { - name: "Empty link text", - href: "https://example.com", - linkText: "", - expected: false, - reason: "Empty text cannot contain domain", - }, - { - name: "Mailto link - matching domain", - href: "mailto:support@example.com", - linkText: "Email support@example.com", - expected: false, - reason: "Mailto email matches text email", - }, - { - name: "Mailto link - domain mismatch (phishing)", - href: "mailto:attacker@evil.com", - linkText: "Contact support@paypal.com for help", - expected: true, - reason: "Mailto domain 'evil.com' doesn't match text domain 'paypal.com'", - }, - { - name: "Mailto link - generic text", - href: "mailto:info@example.com", - linkText: "Contact us", - expected: false, - reason: "Generic text without domain reference", - }, - { - name: "Mailto link - same domain different user", - href: "mailto:sales@example.com", - linkText: "Email support@example.com", - expected: false, - reason: "Both emails share the same domain", - }, - { - name: "Mailto link - text shows only domain", - href: "mailto:info@example.com", - linkText: "Write to example.com", - expected: false, - reason: "Text domain matches mailto domain", - }, - { - name: "Mailto link - domain in text doesn't match", - href: "mailto:scam@phishing.net", - linkText: "Reply to customer-service@amazon.com", - expected: true, - reason: "Mailto domain 'phishing.net' doesn't match 'amazon.com' in text", - }, - { - name: "Tel link", - href: "tel:+1234567890", - linkText: "Call example.com support", - expected: false, - reason: "Non-HTTP(S) links are excluded", - }, - { - name: "Same base domain with different subdomains", - href: "https://www.example.com/page", - linkText: "Visit blog.example.com", - expected: false, - reason: "Both share same base domain 'example.com'", - }, - { - name: "URL with path matches domain in text", - href: "https://example.com/section/page", - linkText: "Go to example.com", - expected: false, - reason: "Domain matches, path doesn't matter", - }, - { - name: "Generic text - subscribe", - href: "https://newsletter.example.com/signup", - linkText: "Subscribe", - expected: false, - reason: "Generic call-to-action text", - }, - { - name: "Generic text - unsubscribe", - href: "https://example.com/unsubscribe?id=123", - linkText: "Unsubscribe", - expected: false, - reason: "Generic unsubscribe text", - }, - { - name: "Generic text - download", - href: "https://files.example.com/document.pdf", - linkText: "Download", - expected: false, - reason: "Generic action text", - }, - { - name: "Descriptive text without domain", - href: "https://shop.example.com/products", - linkText: "View our latest products", - expected: false, - reason: "No domain mentioned in text", - }, - - // Edge cases - { - name: "Domain-like text but not valid domain", - href: "https://example.com", - linkText: "Save up to 50.00 dollars", - expected: false, - reason: "50.00 looks like domain but isn't", - }, - { - name: "Text with http prefix matching domain", - href: "https://example.com/page", - linkText: "Visit http://example.com", - expected: false, - reason: "Domains match despite different protocols in display", - }, - { - name: "Port in URL should not affect matching", - href: "https://example.com:8080/page", - linkText: "Go to example.com", - expected: false, - reason: "Port number doesn't affect domain matching", - }, - { - name: "Whitespace in link text", - href: "https://example.com", - linkText: " example.com ", - expected: false, - reason: "Whitespace should be trimmed", - }, - { - name: "Multiple spaces in generic text", - href: "https://example.com", - linkText: "click here", - expected: false, - reason: "Generic text with extra spaces", - }, - { - name: "Anchor fragment in URL", - href: "https://example.com/page#section", - linkText: "example.com section", - expected: false, - reason: "Fragment doesn't affect domain matching", - }, - { - name: "Query parameters in URL", - href: "https://example.com/page?utm_source=email", - linkText: "Visit example.com", - expected: false, - reason: "Query params don't affect domain matching", - }, - } - - analyzer := NewContentAnalyzer(5 * time.Second) - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := analyzer.hasDomainMisalignment(tt.href, tt.linkText) - if result != tt.expected { - t.Errorf("hasDomainMisalignment(%q, %q) = %v, want %v\nReason: %s", - tt.href, tt.linkText, result, tt.expected, tt.reason) - } - }) - } -} diff --git a/web/src/lib/components/SummaryCard.svelte b/web/src/lib/components/SummaryCard.svelte index 1267f8b..ffce5ba 100644 --- a/web/src/lib/components/SummaryCard.svelte +++ b/web/src/lib/components/SummaryCard.svelte @@ -323,13 +323,6 @@ }); if (bimiResult.details && bimiResult.details.indexOf("declined") == 0) { segments.push({ text: " declined to participate" }); - } else if (bimiResult?.result !== "fail") { - segments.push({ text: " but" }); - segments.push({ - text: "has issues", - highlight: { color: "danger", bold: true }, - link: "#authentication-bimi", - }); } else { segments.push({ text: " for brand indicator display" }); }