diff --git a/pkg/analyzer/content.go b/pkg/analyzer/content.go index 3150d50..87c423f 100644 --- a/pkg/analyzer/content.go +++ b/pkg/analyzer/content.go @@ -220,6 +220,18 @@ func (c *ContentAnalyzer) traverseHTML(n *html.Node, results *ContentResults) { // Validate link linkCheck := c.validateLink(href) + + // Check for domain misalignment (phishing detection) + linkText := c.getNodeText(n) + if c.hasDomainMisalignment(href, linkText) { + linkCheck.IsSafe = false + if linkCheck.Warning == "" { + linkCheck.Warning = "Link text domain does not match actual URL domain (possible phishing)" + } else { + linkCheck.Warning += "; Link text domain does not match actual URL domain (possible phishing)" + } + } + results.Links = append(results.Links, linkCheck) // Check for suspicious URLs @@ -415,8 +427,131 @@ func (c *ContentAnalyzer) validateLink(urlStr string) LinkCheck { return check } +// hasDomainMisalignment checks if the link text contains a different domain than the actual URL +// This is a common phishing technique (e.g., text shows "paypal.com" but links to "evil.com") +func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool { + // Parse the actual URL + parsedURL, err := url.Parse(href) + if err != nil { + return false + } + + // Extract the actual destination domain/email based on scheme + var actualDomain string + + if parsedURL.Scheme == "mailto" { + // Extract email address from mailto: URL + // Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=... + mailtoAddr := parsedURL.Opaque + + // Remove query parameters if present + if idx := strings.Index(mailtoAddr, "?"); idx != -1 { + mailtoAddr = mailtoAddr[:idx] + } + + mailtoAddr = strings.TrimSpace(strings.ToLower(mailtoAddr)) + + // Extract domain from email address + if idx := strings.Index(mailtoAddr, "@"); idx != -1 { + actualDomain = mailtoAddr[idx+1:] + } else { + return false // Invalid mailto + } + } else if parsedURL.Scheme == "http" || parsedURL.Scheme == "https" { + // Check if URL has a host + if parsedURL.Host == "" { + return false + } + + // Extract the actual URL's domain (remove port if present) + actualDomain = parsedURL.Host + if idx := strings.LastIndex(actualDomain, ":"); idx != -1 { + actualDomain = actualDomain[:idx] + } + actualDomain = strings.ToLower(actualDomain) + } else { + // Skip checks for other URL schemes (tel, etc.) + return false + } + + // Normalize link text + linkText = strings.TrimSpace(linkText) + linkText = strings.ToLower(linkText) + + // Skip if link text is empty, too short, or just generic text like "click here" + if linkText == "" || len(linkText) < 4 { + return false + } + + // Common generic link texts that shouldn't trigger warnings + genericTexts := []string{ + "click here", "read more", "learn more", "download", "subscribe", + "unsubscribe", "view online", "view in browser", "click", "here", + "update", "verify", "confirm", "continue", "get started", + // mailto-specific generic texts + "email us", "contact us", "send email", "get in touch", "reach out", + "contact", "email", "write to us", + } + for _, generic := range genericTexts { + if linkText == generic { + return false + } + } + + // Extract domain-like patterns from link text using regex + // Matches patterns like "example.com", "www.example.com", "http://example.com" + domainRegex := regexp.MustCompile(`(?i)(?:https?://)?(?:www\.)?([a-z0-9][-a-z0-9]*\.)+[a-z]{2,}`) + matches := domainRegex.FindAllString(linkText, -1) + + if len(matches) == 0 { + return false + } + + // Check each domain-like pattern found in the text + for _, textDomain := range matches { + // Normalize the text domain + textDomain = strings.ToLower(textDomain) + textDomain = strings.TrimPrefix(textDomain, "http://") + textDomain = strings.TrimPrefix(textDomain, "https://") + textDomain = strings.TrimPrefix(textDomain, "www.") + + // Remove trailing slashes and paths + if idx := strings.Index(textDomain, "/"); idx != -1 { + textDomain = textDomain[:idx] + } + + // Compare domains - they should match or the actual URL should be a subdomain of the text domain + if textDomain != actualDomain { + // Check if actual domain is a subdomain of text domain + if !strings.HasSuffix(actualDomain, "."+textDomain) && !strings.HasSuffix(actualDomain, textDomain) { + // Check if they share the same base domain (last 2 parts) + textParts := strings.Split(textDomain, ".") + actualParts := strings.Split(actualDomain, ".") + + if len(textParts) >= 2 && len(actualParts) >= 2 { + textBase := strings.Join(textParts[len(textParts)-2:], ".") + actualBase := strings.Join(actualParts[len(actualParts)-2:], ".") + + if textBase != actualBase { + return true // Domain mismatch detected! + } + } else { + return true // Domain mismatch detected! + } + } + } + } + + return false +} + // isSuspiciousURL checks if a URL looks suspicious func (c *ContentAnalyzer) isSuspiciousURL(urlStr string, parsedURL *url.URL) bool { + // Skip checks for mailto: URLs + if parsedURL.Scheme == "mailto" { + return false + } + // Check for IP address instead of domain if c.isIPAddress(parsedURL.Host) { return true diff --git a/pkg/analyzer/content_test.go b/pkg/analyzer/content_test.go index 78a27e9..0aa7ff9 100644 --- a/pkg/analyzer/content_test.go +++ b/pkg/analyzer/content_test.go @@ -213,6 +213,16 @@ func TestIsSuspiciousURL(t *testing.T) { url: "https://mail.example.com/page", expected: false, }, + { + name: "Mailto with @ symbol", + url: "mailto:support@example.com", + expected: false, + }, + { + name: "Mailto with multiple @ symbols", + url: "mailto:user@subdomain@example.com", + expected: false, + }, } analyzer := NewContentAnalyzer(5 * time.Second) @@ -628,3 +638,276 @@ func findFirstLink(n *html.Node) *html.Node { func parseURL(urlStr string) (*url.URL, error) { return url.Parse(urlStr) } + +func TestHasDomainMisalignment(t *testing.T) { + tests := []struct { + name string + href string + linkText string + expected bool + reason string + }{ + // Phishing cases - should return true + { + name: "Obvious phishing - different domains", + href: "https://evil.com/page", + linkText: "Click here to verify your paypal.com account", + expected: true, + reason: "Link text shows 'paypal.com' but URL points to 'evil.com'", + }, + { + name: "Domain in link text differs from URL", + href: "http://attacker.net", + linkText: "Visit google.com for more info", + expected: true, + reason: "Link text shows 'google.com' but URL points to 'attacker.net'", + }, + { + name: "URL shown in text differs from actual URL", + href: "https://phishing-site.xyz/login", + linkText: "https://www.bank.example.com/secure", + expected: true, + reason: "Full URL in text doesn't match actual destination", + }, + { + name: "Similar but different domain", + href: "https://paypa1.com/login", + linkText: "Login to your paypal.com account", + expected: true, + reason: "Typosquatting: 'paypa1.com' vs 'paypal.com'", + }, + { + name: "Subdomain spoofing", + href: "https://paypal.com.evil.com/login", + linkText: "Verify your paypal.com account", + expected: true, + reason: "Domain is 'evil.com', not 'paypal.com'", + }, + { + name: "Multiple domains in text, none match", + href: "https://badsite.com", + linkText: "Transfer from bank.com to paypal.com", + expected: true, + reason: "Neither 'bank.com' nor 'paypal.com' matches 'badsite.com'", + }, + + // Legitimate cases - should return false + { + name: "Exact domain match", + href: "https://example.com/page", + linkText: "Visit example.com for more information", + expected: false, + reason: "Domains match exactly", + }, + { + name: "Legitimate subdomain", + href: "https://mail.google.com/inbox", + linkText: "Check your google.com email", + expected: false, + reason: "Subdomain of the mentioned domain", + }, + { + name: "www prefix variation", + href: "https://www.example.com/page", + linkText: "Visit example.com", + expected: false, + reason: "www prefix is acceptable variation", + }, + { + name: "Generic link text - click here", + href: "https://anywhere.com", + linkText: "click here", + expected: false, + reason: "Generic text doesn't contain a domain", + }, + { + name: "Generic link text - read more", + href: "https://example.com/article", + linkText: "Read more", + expected: false, + reason: "Generic text doesn't contain a domain", + }, + { + name: "Generic link text - learn more", + href: "https://example.com/info", + linkText: "Learn More", + expected: false, + reason: "Generic text doesn't contain a domain (case insensitive)", + }, + { + name: "No domain in link text", + href: "https://example.com/page", + linkText: "Click to continue", + expected: false, + reason: "Link text has no domain reference", + }, + { + name: "Short link text", + href: "https://example.com", + linkText: "Go", + expected: false, + reason: "Text too short to contain meaningful domain", + }, + { + name: "Empty link text", + href: "https://example.com", + linkText: "", + expected: false, + reason: "Empty text cannot contain domain", + }, + { + name: "Mailto link - matching domain", + href: "mailto:support@example.com", + linkText: "Email support@example.com", + expected: false, + reason: "Mailto email matches text email", + }, + { + name: "Mailto link - domain mismatch (phishing)", + href: "mailto:attacker@evil.com", + linkText: "Contact support@paypal.com for help", + expected: true, + reason: "Mailto domain 'evil.com' doesn't match text domain 'paypal.com'", + }, + { + name: "Mailto link - generic text", + href: "mailto:info@example.com", + linkText: "Contact us", + expected: false, + reason: "Generic text without domain reference", + }, + { + name: "Mailto link - same domain different user", + href: "mailto:sales@example.com", + linkText: "Email support@example.com", + expected: false, + reason: "Both emails share the same domain", + }, + { + name: "Mailto link - text shows only domain", + href: "mailto:info@example.com", + linkText: "Write to example.com", + expected: false, + reason: "Text domain matches mailto domain", + }, + { + name: "Mailto link - domain in text doesn't match", + href: "mailto:scam@phishing.net", + linkText: "Reply to customer-service@amazon.com", + expected: true, + reason: "Mailto domain 'phishing.net' doesn't match 'amazon.com' in text", + }, + { + name: "Tel link", + href: "tel:+1234567890", + linkText: "Call example.com support", + expected: false, + reason: "Non-HTTP(S) links are excluded", + }, + { + name: "Same base domain with different subdomains", + href: "https://www.example.com/page", + linkText: "Visit blog.example.com", + expected: false, + reason: "Both share same base domain 'example.com'", + }, + { + name: "URL with path matches domain in text", + href: "https://example.com/section/page", + linkText: "Go to example.com", + expected: false, + reason: "Domain matches, path doesn't matter", + }, + { + name: "Generic text - subscribe", + href: "https://newsletter.example.com/signup", + linkText: "Subscribe", + expected: false, + reason: "Generic call-to-action text", + }, + { + name: "Generic text - unsubscribe", + href: "https://example.com/unsubscribe?id=123", + linkText: "Unsubscribe", + expected: false, + reason: "Generic unsubscribe text", + }, + { + name: "Generic text - download", + href: "https://files.example.com/document.pdf", + linkText: "Download", + expected: false, + reason: "Generic action text", + }, + { + name: "Descriptive text without domain", + href: "https://shop.example.com/products", + linkText: "View our latest products", + expected: false, + reason: "No domain mentioned in text", + }, + + // Edge cases + { + name: "Domain-like text but not valid domain", + href: "https://example.com", + linkText: "Save up to 50.00 dollars", + expected: false, + reason: "50.00 looks like domain but isn't", + }, + { + name: "Text with http prefix matching domain", + href: "https://example.com/page", + linkText: "Visit http://example.com", + expected: false, + reason: "Domains match despite different protocols in display", + }, + { + name: "Port in URL should not affect matching", + href: "https://example.com:8080/page", + linkText: "Go to example.com", + expected: false, + reason: "Port number doesn't affect domain matching", + }, + { + name: "Whitespace in link text", + href: "https://example.com", + linkText: " example.com ", + expected: false, + reason: "Whitespace should be trimmed", + }, + { + name: "Multiple spaces in generic text", + href: "https://example.com", + linkText: "click here", + expected: false, + reason: "Generic text with extra spaces", + }, + { + name: "Anchor fragment in URL", + href: "https://example.com/page#section", + linkText: "example.com section", + expected: false, + reason: "Fragment doesn't affect domain matching", + }, + { + name: "Query parameters in URL", + href: "https://example.com/page?utm_source=email", + linkText: "Visit example.com", + expected: false, + reason: "Query params don't affect domain matching", + }, + } + + analyzer := NewContentAnalyzer(5 * time.Second) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := analyzer.hasDomainMisalignment(tt.href, tt.linkText) + if result != tt.expected { + t.Errorf("hasDomainMisalignment(%q, %q) = %v, want %v\nReason: %s", + tt.href, tt.linkText, result, tt.expected, tt.reason) + } + }) + } +} diff --git a/web/package-lock.json b/web/package-lock.json index f1c42fd..0911c63 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1339,9 +1339,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.18.12", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.18.12.tgz", - "integrity": "sha512-BICHQ67iqxQGFSzfCFTT7MRQ5XcBjG5aeKh5Ok38UBbPe5fxTyE+aHFxwVrGyr8GNlqFMLKD1D3P2K/1ks8tog==", + "version": "22.18.13", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.18.13.tgz", + "integrity": "sha512-Bo45YKIjnmFtv6I1TuC8AaHBbqXtIo+Om5fE4QiU1Tj8QR/qt+8O3BAtOimG5IFmwaWiPmB3Mv3jtYzBA4Us2A==", "dev": true, "license": "MIT", "peer": true, @@ -3132,6 +3132,19 @@ "node": ">=8.6" } }, + "node_modules/micromatch/node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -3383,13 +3396,14 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", + "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", + "peer": true, "engines": { - "node": ">=8.6" + "node": ">=12" }, "funding": { "url": "https://github.com/sponsors/jonschlinkert" @@ -4002,19 +4016,6 @@ "url": "https://github.com/sponsors/SuperchupuDev" } }, - "node_modules/tinyglobby/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/tinypool": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", @@ -4270,19 +4271,6 @@ "url": "https://opencollective.com/vitest" } }, - "node_modules/vite/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/vitefu": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/vitefu/-/vitefu-1.1.1.tgz", @@ -4376,19 +4364,6 @@ } } }, - "node_modules/vitest/node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, "node_modules/vitest/node_modules/tinyexec": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", @@ -4462,21 +4437,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/yaml": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", - "integrity": "sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==", - "dev": true, - "license": "ISC", - "optional": true, - "peer": true, - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - } - }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", diff --git a/web/src/lib/components/AuthenticationCard.svelte b/web/src/lib/components/AuthenticationCard.svelte index 285b045..0b36dd0 100644 --- a/web/src/lib/components/AuthenticationCard.svelte +++ b/web/src/lib/components/AuthenticationCard.svelte @@ -197,6 +197,7 @@
X-Google-DKIM + {authentication.x_google_dkim.result} @@ -227,6 +228,7 @@
X-Aligned-From + {authentication.x_aligned_from.result} diff --git a/web/src/lib/components/SummaryCard.svelte b/web/src/lib/components/SummaryCard.svelte index cf08c2c..1267f8b 100644 --- a/web/src/lib/components/SummaryCard.svelte +++ b/web/src/lib/components/SummaryCard.svelte @@ -130,6 +130,17 @@ } } + // SPF DNS record check + const spfRecord = report.dns_results?.spf_record; + if (spfRecord && !spfRecord.valid && spfRecord.record) { + segments.push({ text: ". Your SPF record is " }); + segments.push({ + text: "invalid", + highlight: { color: "danger", bold: true }, + link: "#dns-spf", + }); + } + // IP Reverse DNS (iprev) check const iprevResult = report.authentication?.iprev; if (iprevResult) { @@ -217,6 +228,28 @@ } } + // DKIM DNS record check + const dkimRecords = report.dns_results?.dkim_records; + if (dkimRecords && Object.keys(dkimRecords).length > 0) { + const invalidDkimKeys = Object.entries(dkimRecords) + .filter(([_, record]) => !record.valid && record.record) + .map(([key, _]) => key); + + if (invalidDkimKeys.length > 0) { + segments.push({ text: ". Your DKIM record" }); + if (invalidDkimKeys.length > 1) { + segments.push({ text: "s are " }); + } else { + segments.push({ text: " is " }); + } + segments.push({ + text: "invalid", + highlight: { color: "danger", bold: true }, + link: "#dns-dkim", + }); + } + } + // DMARC policy check const dmarcRecord = report.dns_results?.dmarc_record; if (dmarcRecord) { @@ -235,9 +268,9 @@ segments.push({ text: "none", highlight: { monospace: true, bold: true } }); segments.push({ text: "' policy", highlight: { bold: true } }); } else if (!dmarcRecord.valid) { - segments.push({ text: ". Your DMARC record has " }); + segments.push({ text: ". Your DMARC record is " }); segments.push({ - text: "issues", + text: "invalid", highlight: { color: "danger", bold: true }, link: "#dns-dmarc", }); @@ -290,6 +323,13 @@ }); if (bimiResult.details && bimiResult.details.indexOf("declined") == 0) { segments.push({ text: " declined to participate" }); + } else if (bimiResult?.result !== "fail") { + segments.push({ text: " but" }); + segments.push({ + text: "has issues", + highlight: { color: "danger", bold: true }, + link: "#authentication-bimi", + }); } else { segments.push({ text: " for brand indicator display" }); }