happyDeliver/pkg/analyzer/content.go

// This file is part of the happyDeliver (R) project.
// Copyright (c) 2025 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
//
// This program is offered under a commercial and under the AGPL license.
// For commercial licensing, contact us at <contact@happydomain.org>.
//
// For AGPL licensing:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

package analyzer

import (
	"context"
	"fmt"
	"net/http"
	"net/url"
	"regexp"
	"slices"
	"strings"
	"time"
	"unicode"

	"git.happydns.org/happyDeliver/internal/api"
	"golang.org/x/net/html"
)

// ContentAnalyzer analyzes email content (HTML, links, images)
type ContentAnalyzer struct {
	Timeout                time.Duration
	httpClient             *http.Client
	listUnsubscribeURLs    []string // URLs from List-Unsubscribe header
	hasOneClickUnsubscribe bool     // True if List-Unsubscribe-Post: List-Unsubscribe=One-Click
}

// NewContentAnalyzer creates a new content analyzer with configurable timeout
func NewContentAnalyzer(timeout time.Duration) *ContentAnalyzer {
	if timeout == 0 {
		timeout = 10 * time.Second // Default timeout
	}
	return &ContentAnalyzer{
		Timeout: timeout,
		httpClient: &http.Client{
			Timeout: timeout,
			CheckRedirect: func(req *http.Request, via []*http.Request) error {
				// Allow up to 10 redirects
				if len(via) >= 10 {
					return fmt.Errorf("too many redirects")
				}
				return nil
			},
		},
	}
}

// ContentResults represents content analysis results
type ContentResults struct {
	IsMultipart      bool
	HTMLValid        bool
	HTMLErrors       []string
	Links            []LinkCheck
	Images           []ImageCheck
	HasUnsubscribe   bool
	UnsubscribeLinks []string
	TextContent      string
	HTMLContent      string
	TextPlainRatio   float32 // Ratio of plain text to HTML consistency
	ImageTextRatio   float32 // Ratio of images to text
	SuspiciousURLs   []string
	ContentIssues    []string
	HarmfullIssues   []string
}

// HasPlaintext returns true if the email has plain text content
func (r *ContentResults) HasPlaintext() bool {
	return r.TextContent != ""
}

// LinkCheck represents a link validation result
type LinkCheck struct {
	URL     string
	Valid   bool
	Status  int
	Error   string
	IsSafe  bool
	Warning string
}

// ImageCheck represents an image validation result
type ImageCheck struct {
	Src      string
	HasAlt   bool
	AltText  string
	Valid    bool
	Error    string
	IsBroken bool
}

// AnalyzeContent performs content analysis on email message
func (c *ContentAnalyzer) AnalyzeContent(email *EmailMessage) *ContentResults {
	results := &ContentResults{}

	results.IsMultipart = len(email.Parts) > 1

	// Parse List-Unsubscribe header URLs for use in link detection
	c.listUnsubscribeURLs = email.GetListUnsubscribeURLs()

	// Check for one-click unsubscribe support
	listUnsubscribePost := email.Header.Get("List-Unsubscribe-Post")
	c.hasOneClickUnsubscribe = strings.EqualFold(strings.TrimSpace(listUnsubscribePost), "List-Unsubscribe=One-Click")

	// Get HTML and text parts
	htmlParts := email.GetHTMLParts()
	textParts := email.GetTextParts()

	// Analyze HTML parts
	if len(htmlParts) > 0 {
		for _, part := range htmlParts {
			c.analyzeHTML(part.Content, results)
		}
	}

	// Analyze text parts
	if len(textParts) > 0 {
		for _, part := range textParts {
			results.TextContent += part.Content
		}
		// Extract and validate links from plain text
		c.analyzeTextLinks(results.TextContent, results)
	}

	// Check plain text/HTML consistency
	if len(htmlParts) > 0 && len(textParts) > 0 {
		results.TextPlainRatio = c.calculateTextPlainConsistency(results.TextContent, results.HTMLContent)
	} else if !results.IsMultipart {
		results.TextPlainRatio = 1.0
	}

	return results
}

// analyzeTextLinks extracts and validates URLs from plain text
func (c *ContentAnalyzer) analyzeTextLinks(textContent string, results *ContentResults) {
	// Regular expression to match URLs in plain text
	// Matches http://, https://, and www. URLs
	urlRegex := regexp.MustCompile(`(?i)\b(?:https?://|www\.)[^\s<>"{}|\\^\[\]` + "`" + `]+`)

	matches := urlRegex.FindAllString(textContent, -1)

	for _, match := range matches {
		// Normalize URL (add http:// if missing)
		urlStr := match
		if strings.HasPrefix(strings.ToLower(urlStr), "www.") {
			urlStr = "http://" + urlStr
		}

		// Check if this URL already exists in results.Links (from HTML analysis)
		exists := false
		for _, link := range results.Links {
			if link.URL == urlStr {
				exists = true
				break
			}
		}

		// Only validate if not already checked
		if !exists {
			linkCheck := c.validateLink(urlStr)
			results.Links = append(results.Links, linkCheck)

			// Check for suspicious URLs
			if !linkCheck.IsSafe {
				results.SuspiciousURLs = append(results.SuspiciousURLs, urlStr)
			}
		}
	}
}

// analyzeHTML parses and analyzes HTML content
func (c *ContentAnalyzer) analyzeHTML(htmlContent string, results *ContentResults) {
	results.HTMLContent = htmlContent

	// Parse HTML
	doc, err := html.Parse(strings.NewReader(htmlContent))
	if err != nil {
		results.HTMLValid = false
		results.HTMLErrors = append(results.HTMLErrors, fmt.Sprintf("Failed to parse HTML: %v", err))
		return
	}

	results.HTMLValid = true

	// Traverse HTML tree
	c.traverseHTML(doc, results)

	// Calculate image-to-text ratio
	if results.HTMLContent != "" {
		textLength := len(c.extractTextFromHTML(htmlContent))
		imageCount := len(results.Images)
		if textLength > 0 {
			results.ImageTextRatio = float32(imageCount) / float32(textLength) * 1000 // Images per 1000 chars
		}
	}
}

// traverseHTML recursively traverses HTML nodes
func (c *ContentAnalyzer) traverseHTML(n *html.Node, results *ContentResults) {
	if n.Type == html.ElementNode {
		switch n.Data {
		case "a":
			// Extract and validate links
			href := c.getAttr(n, "href")
			if href != "" {
				// Check for unsubscribe links
				if c.isUnsubscribeLink(href, n) {
					results.HasUnsubscribe = true
					results.UnsubscribeLinks = append(results.UnsubscribeLinks, href)
				}

				// Validate link
				linkCheck := c.validateLink(href)

				// Check for domain misalignment (phishing detection)
				linkText := c.getNodeText(n)
				if c.hasDomainMisalignment(href, linkText) {
					linkCheck.IsSafe = false
					if linkCheck.Warning == "" {
						linkCheck.Warning = "Link text domain does not match actual URL domain (possible phishing)"
					} else {
						linkCheck.Warning += "; Link text domain does not match actual URL domain (possible phishing)"
					}
				}

				results.Links = append(results.Links, linkCheck)

				// Check for suspicious URLs
				if !linkCheck.IsSafe {
					results.SuspiciousURLs = append(results.SuspiciousURLs, href)
				}
			}

		case "img":
			// Extract and validate images
			src := c.getAttr(n, "src")
			alt := c.getAttr(n, "alt")

			imageCheck := ImageCheck{
				Src:     src,
				HasAlt:  alt != "",
				AltText: alt,
				Valid:   src != "",
			}

			if src == "" {
				imageCheck.Error = "Image missing src attribute"
			}

			results.Images = append(results.Images, imageCheck)

		case "script":
			// JavaScript in emails is a security risk and typically blocked
			results.HarmfullIssues = append(results.HarmfullIssues, "Dangerous <script> tag detected - JavaScript is blocked by most email clients")

		case "iframe":
			// Iframes are security risks and blocked by most email clients
			src := c.getAttr(n, "src")
			issue := "Dangerous <iframe> tag detected"
			if src != "" {
				issue += fmt.Sprintf(" with src='%s'", src)
			}
			results.HarmfullIssues = append(results.HarmfullIssues, issue+" - iframes are blocked by most email clients")

		case "object", "embed", "applet":
			// Legacy embedding tags, security risks
			results.HarmfullIssues = append(results.HarmfullIssues, fmt.Sprintf("Dangerous <%s> tag detected - legacy embedding tags are security risks and blocked by email clients", n.Data))

		case "form":
			// Forms in emails can be phishing vectors
			action := c.getAttr(n, "action")
			issue := "Suspicious <form> tag detected"
			if action != "" {
				issue += fmt.Sprintf(" with action='%s'", action)
			}
			results.HarmfullIssues = append(results.HarmfullIssues, issue+" - forms can be phishing vectors and are often blocked")

		case "base":
			// Base tag can be used for phishing by redirecting relative URLs
			href := c.getAttr(n, "href")
			issue := "Potentially dangerous <base> tag detected"
			if href != "" {
				issue += fmt.Sprintf(" with href='%s'", href)
			}
			results.HarmfullIssues = append(results.HarmfullIssues, issue+" - can redirect all relative URLs")

		case "meta":
			// Check for suspicious meta redirects
			httpEquiv := c.getAttr(n, "http-equiv")
			if strings.ToLower(httpEquiv) == "refresh" {
				content := c.getAttr(n, "content")
				results.HarmfullIssues = append(results.HarmfullIssues, fmt.Sprintf("Suspicious <meta http-equiv='refresh'> tag detected with content='%s' - can be used for phishing redirects", content))
			}

		case "link":
			// Check for external stylesheet links (potential privacy/tracking concerns)
			rel := c.getAttr(n, "rel")
			href := c.getAttr(n, "href")
			if strings.Contains(strings.ToLower(rel), "stylesheet") && href != "" {
				if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") {
					results.ContentIssues = append(results.ContentIssues, fmt.Sprintf("External stylesheet link detected: %s - may cause rendering issues or privacy concerns", href))
				}
			}
		}
	}

	// Traverse children
	for child := n.FirstChild; child != nil; child = child.NextSibling {
		c.traverseHTML(child, results)
	}
}

// getAttr gets an attribute value from an HTML node
func (c *ContentAnalyzer) getAttr(n *html.Node, key string) string {
	for _, attr := range n.Attr {
		if attr.Key == key {
			return attr.Val
		}
	}
	return ""
}

// isUnsubscribeLink checks if a link is an unsubscribe link
func (c *ContentAnalyzer) isUnsubscribeLink(href string, node *html.Node) bool {
	// First check: does the href match a URL from the List-Unsubscribe header?
	if slices.Contains(c.listUnsubscribeURLs, href) {
		return true
	}

	// Check href for unsubscribe keywords
	lowerHref := strings.ToLower(href)
	unsubKeywords := []string{"unsubscribe", "opt-out", "optout", "remove", "list-unsubscribe", "отписване", "desubscripció", "zrušit odběr", "dad-danysgrifio", "afmeld", "abmelden", "διαγραφή", "darse de baja", "poistu postituslistalta", "se désabonner", "ביטול רישום", "leiratkozás", "cancella iscrizione", "登録を取り消す", "구독 해지", "വരിക്കാരനല്ലാതാകുക", "uitschrijven", "meld av", "odsubskrybuj", "cancelar assinatura", "cancelar subscrição", "dezabonare", "отписаться", "avsluta prenumeration", "zrušiť odber", "odjava", "üyeliği sonlandır", "відписатися", "hủy đăng ký", "退订", "退訂"}
	for _, keyword := range unsubKeywords {
		if strings.Contains(lowerHref, keyword) {
			return true
		}
	}

	// Check link text for unsubscribe keywords
	text := c.getNodeText(node)
	lowerText := strings.ToLower(text)
	for _, keyword := range unsubKeywords {
		if strings.Contains(lowerText, keyword) {
			return true
		}
	}

	return false
}

// getNodeText extracts text content from a node
func (c *ContentAnalyzer) getNodeText(n *html.Node) string {
	if n.Type == html.TextNode {
		return n.Data
	}
	var text string
	for child := n.FirstChild; child != nil; child = child.NextSibling {
		text += c.getNodeText(child)
	}
	return text
}

// validateLink validates a URL and checks if it's accessible
func (c *ContentAnalyzer) validateLink(urlStr string) LinkCheck {
	check := LinkCheck{
		URL:    urlStr,
		IsSafe: true,
	}

	// Parse URL
	parsedURL, err := url.Parse(urlStr)
	if err != nil {
		check.Valid = false
		check.Error = fmt.Sprintf("Invalid URL: %v", err)
		return check
	}

	// Check URL safety
	if c.isSuspiciousURL(urlStr, parsedURL) {
		check.IsSafe = false
		check.Warning = "URL appears suspicious (obfuscated, shortened, or unusual)"
	}

	// Only check HTTP/HTTPS links
	if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
		check.Valid = true
		return check
	}

	// Check if link is accessible (with timeout)
	ctx, cancel := context.WithTimeout(context.Background(), c.Timeout)
	defer cancel()

	req, err := http.NewRequestWithContext(ctx, "HEAD", urlStr, nil)
	if err != nil {
		check.Valid = false
		check.Error = fmt.Sprintf("Failed to create request: %v", err)
		return check
	}

	// Set a reasonable user agent
	req.Header.Set("User-Agent", "happyDeliver/1.0 (Email Deliverability Tester)")

	resp, err := c.httpClient.Do(req)
	if err != nil {
		// Don't fail on timeout/connection errors for external links
		// Just mark as warning
		check.Valid = true
		check.Status = 0
		check.Warning = fmt.Sprintf("Could not verify link: %v", err)
		return check
	}
	defer resp.Body.Close()

	check.Status = resp.StatusCode
	check.Valid = true

	// Check for error status codes
	if resp.StatusCode >= 400 {
		check.Error = fmt.Sprintf("Link returns %d status", resp.StatusCode)
	}

	return check
}

// hasDomainMisalignment checks if the link text contains a different domain than the actual URL
// This is a common phishing technique (e.g., text shows "paypal.com" but links to "evil.com")
func (c *ContentAnalyzer) hasDomainMisalignment(href, linkText string) bool {
	// Parse the actual URL
	parsedURL, err := url.Parse(href)
	if err != nil {
		return false
	}

	// Extract the actual destination domain/email based on scheme
	var actualDomain string

	switch parsedURL.Scheme {
	case "mailto":
		// Extract email address from mailto: URL
		// Format can be: mailto:user@domain.com or mailto:user@domain.com?subject=...
		mailtoAddr := parsedURL.Opaque

		// Remove query parameters if present
		if idx := strings.Index(mailtoAddr, "?"); idx != -1 {
			mailtoAddr = mailtoAddr[:idx]
		}

		mailtoAddr = strings.TrimSpace(strings.ToLower(mailtoAddr))

		// Extract domain from email address
		if idx := strings.Index(mailtoAddr, "@"); idx != -1 {
			actualDomain = mailtoAddr[idx+1:]
		} else {
			return false // Invalid mailto
		}
	case "http":
	case "https":
		// Check if URL has a host
		if parsedURL.Host == "" {
			return false
		}

		// Extract the actual URL's domain (remove port if present)
		actualDomain = parsedURL.Host
		if idx := strings.LastIndex(actualDomain, ":"); idx != -1 {
			actualDomain = actualDomain[:idx]
		}
		actualDomain = strings.ToLower(actualDomain)
	default:
		// Skip checks for other URL schemes (tel, etc.)
		return false
	}

	// Normalize link text
	linkText = strings.TrimSpace(linkText)
	linkText = strings.ToLower(linkText)

	// Skip if link text is empty, too short, or just generic text like "click here"
	if linkText == "" || len(linkText) < 4 {
		return false
	}

	// Common generic link texts that shouldn't trigger warnings
	genericTexts := []string{
		"click here", "read more", "learn more", "download", "subscribe",
		"unsubscribe", "view online", "view in browser", "click", "here",
		"update", "verify", "confirm", "continue", "get started",
		// mailto-specific generic texts
		"email us", "contact us", "send email", "get in touch", "reach out",
		"contact", "email", "write to us",
	}
	if slices.Contains(genericTexts, linkText) {
		return false
	}

	// Extract domain-like patterns from link text using regex
	// Matches patterns like "example.com", "www.example.com", "http://example.com"
	domainRegex := regexp.MustCompile(`(?i)(?:https?://)?(?:www\.)?([a-z0-9][-a-z0-9]*\.)+[a-z]{2,}`)
	matches := domainRegex.FindAllString(linkText, -1)

	if len(matches) == 0 {
		return false
	}

	// Check each domain-like pattern found in the text
	for _, textDomain := range matches {
		// Normalize the text domain
		textDomain = strings.ToLower(textDomain)
		textDomain = strings.TrimPrefix(textDomain, "http://")
		textDomain = strings.TrimPrefix(textDomain, "https://")
		textDomain = strings.TrimPrefix(textDomain, "www.")

		// Remove trailing slashes and paths
		if idx := strings.Index(textDomain, "/"); idx != -1 {
			textDomain = textDomain[:idx]
		}

		// Compare domains - they should match or the actual URL should be a subdomain of the text domain
		if textDomain != actualDomain {
			// Check if actual domain is a subdomain of text domain
			if !strings.HasSuffix(actualDomain, "."+textDomain) && !strings.HasSuffix(actualDomain, textDomain) {
				// Check if they share the same base domain (last 2 parts)
				textParts := strings.Split(textDomain, ".")
				actualParts := strings.Split(actualDomain, ".")

				if len(textParts) >= 2 && len(actualParts) >= 2 {
					textBase := strings.Join(textParts[len(textParts)-2:], ".")
					actualBase := strings.Join(actualParts[len(actualParts)-2:], ".")

					if textBase != actualBase {
						return true // Domain mismatch detected!
					}
				} else {
					return true // Domain mismatch detected!
				}
			}
		}
	}

	return false
}

// isSuspiciousURL checks if a URL looks suspicious
func (c *ContentAnalyzer) isSuspiciousURL(urlStr string, parsedURL *url.URL) bool {
	// Skip checks for mailto: URLs
	if parsedURL.Scheme == "mailto" {
		return false
	}

	// Check for IP address instead of domain
	if c.isIPAddress(parsedURL.Host) {
		return true
	}

	// Check for URL shorteners (common ones)
	shorteners := []string{
		"bit.ly", "tinyurl.com", "goo.gl", "ow.ly", "t.co",
		"buff.ly", "is.gd", "bl.ink", "short.io",
	}
	if slices.Contains(shorteners, strings.ToLower(parsedURL.Host)) {
		return true
	}

	// Check for excessive subdomains (possible obfuscation)
	parts := strings.Split(parsedURL.Host, ".")
	if len(parts) > 4 {
		return true
	}

	// Check for URL obfuscation techniques
	if strings.Count(urlStr, "@") > 0 { // @ in URL (possible phishing)
		return true
	}

	// Check for suspicious characters in domain
	if strings.ContainsAny(parsedURL.Host, "[]()<>") {
		return true
	}

	return false
}

// isIPAddress checks if a string is an IP address
func (c *ContentAnalyzer) isIPAddress(host string) bool {
	// Remove port if present
	if idx := strings.LastIndex(host, ":"); idx != -1 {
		host = host[:idx]
	}

	// Simple check for IPv4
	parts := strings.Split(host, ".")
	if len(parts) == 4 {
		for _, part := range parts {
			// Check if all characters are digits
			for _, ch := range part {
				if !unicode.IsDigit(ch) {
					return false
				}
			}
		}
		return true
	}

	// Check for IPv6 (contains colons)
	if strings.Contains(host, ":") {
		return true
	}

	return false
}

// extractTextFromHTML extracts plain text from HTML
func (c *ContentAnalyzer) extractTextFromHTML(htmlContent string) string {
	doc, err := html.Parse(strings.NewReader(htmlContent))
	if err != nil {
		return ""
	}

	var text strings.Builder
	var extract func(*html.Node)
	extract = func(n *html.Node) {
		if n.Type == html.TextNode {
			text.WriteString(" " + n.Data)
		}
		// Skip script and style tags
		if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") {
			return
		}
		for child := n.FirstChild; child != nil; child = child.NextSibling {
			extract(child)
		}
	}
	extract(doc)

	return strings.TrimSpace(text.String())
}

// calculateTextPlainConsistency compares plain text and HTML versions
func (c *ContentAnalyzer) calculateTextPlainConsistency(plainText, htmlText string) float32 {
	// Extract text from HTML
	htmlPlainText := c.extractTextFromHTML(htmlText)

	// Normalize both texts
	plainNorm := c.normalizeText(plainText)
	htmlNorm := c.normalizeText(htmlPlainText)

	// Calculate similarity using simple word overlap
	plainWords := strings.Fields(plainNorm)
	htmlWords := strings.Fields(htmlNorm)

	if len(plainWords) == 0 || len(htmlWords) == 0 {
		return 0.0
	}

	// Count common words by building sets
	plainWordSet := make(map[string]int)
	for _, word := range plainWords {
		plainWordSet[word]++
	}

	htmlWordSet := make(map[string]int)
	for _, word := range htmlWords {
		htmlWordSet[word]++
	}

	// Count matches: for each unique word, count minimum occurrences in both texts
	commonWords := 0
	for word, plainCount := range plainWordSet {
		if htmlCount, exists := htmlWordSet[word]; exists {
			// Count the minimum occurrences between both texts
			if plainCount < htmlCount {
				commonWords += plainCount
			} else {
				commonWords += htmlCount
			}
		}
	}

	// Calculate ratio using total words from both texts (union approach)
	// This provides a balanced measure: perfect match = 1.0, partial overlap = 0.3-0.8
	totalWords := len(plainWords) + len(htmlWords)
	if totalWords == 0 {
		return 0.0
	}

	// Divide by average word count for better scoring
	avgWords := float32(totalWords) / 2.0
	ratio := float32(commonWords) / avgWords

	// Cap at 1.0 for perfect matches
	if ratio > 1.0 {
		ratio = 1.0
	}

	return ratio
}

// normalizeText normalizes text for comparison
func (c *ContentAnalyzer) normalizeText(text string) string {
	// Convert to lowercase
	text = strings.ToLower(text)

	// Remove extra whitespace
	text = strings.TrimSpace(text)
	text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ")

	return text
}

// GenerateContentAnalysis creates structured content analysis from results
func (c *ContentAnalyzer) GenerateContentAnalysis(results *ContentResults) *api.ContentAnalysis {
	if results == nil {
		return nil
	}

	analysis := &api.ContentAnalysis{
		HasHtml:            api.PtrTo(results.HTMLContent != ""),
		HasPlaintext:       api.PtrTo(results.TextContent != ""),
		HasUnsubscribeLink: api.PtrTo(results.HasUnsubscribe),
		UnsubscribeMethods: &[]api.ContentAnalysisUnsubscribeMethods{},
	}

	// Calculate text-to-image ratio (inverse of image-to-text)
	if len(results.Images) > 0 && results.HTMLContent != "" {
		textLen := float32(len(c.extractTextFromHTML(results.HTMLContent)))
		if textLen > 0 {
			ratio := textLen / float32(len(results.Images))
			analysis.TextToImageRatio = &ratio
		}
	}

	// Build HTML issues
	htmlIssues := []api.ContentIssue{}

	// Add HTML parsing errors
	if !results.HTMLValid && len(results.HTMLErrors) > 0 {
		for _, errMsg := range results.HTMLErrors {
			htmlIssues = append(htmlIssues, api.ContentIssue{
				Type:     api.BrokenHtml,
				Severity: api.ContentIssueSeverityHigh,
				Message:  errMsg,
				Advice:   api.PtrTo("Fix HTML structure errors to improve email rendering across clients"),
			})
		}
	}

	// Add missing alt text issues
	if len(results.Images) > 0 {
		missingAltCount := 0
		for _, img := range results.Images {
			if !img.HasAlt {
				missingAltCount++
			}
		}
		if missingAltCount > 0 {
			htmlIssues = append(htmlIssues, api.ContentIssue{
				Type:     api.MissingAlt,
				Severity: api.ContentIssueSeverityMedium,
				Message:  fmt.Sprintf("%d image(s) missing alt attributes", missingAltCount),
				Advice:   api.PtrTo("Add descriptive alt text to all images for better accessibility and deliverability"),
			})
		}
	}

	// Add excessive images issue
	if results.ImageTextRatio > 10.0 {
		htmlIssues = append(htmlIssues, api.ContentIssue{
			Type:     api.ExcessiveImages,
			Severity: api.ContentIssueSeverityMedium,
			Message:  "Email is excessively image-heavy",
			Advice:   api.PtrTo("Reduce the number of images relative to text content"),
		})
	}

	// Add suspicious URL issues
	for _, suspURL := range results.SuspiciousURLs {
		htmlIssues = append(htmlIssues, api.ContentIssue{
			Type:     api.SuspiciousLink,
			Severity: api.ContentIssueSeverityHigh,
			Message:  "Suspicious URL detected",
			Location: &suspURL,
			Advice:   api.PtrTo("Avoid URL shorteners, IP addresses, and obfuscated URLs in emails"),
		})
	}

	// Add harmful HTML tag issues
	for _, harmfulIssue := range results.HarmfullIssues {
		htmlIssues = append(htmlIssues, api.ContentIssue{
			Type:     api.DangerousHtml,
			Severity: api.ContentIssueSeverityCritical,
			Message:  harmfulIssue,
			Advice:   api.PtrTo("Remove dangerous HTML tags like <script>, <iframe>, <object>, <embed>, <applet>, <form>, and <base> from email content"),
		})
	}

	// Add general content issues (like external stylesheets)
	for _, contentIssue := range results.ContentIssues {
		htmlIssues = append(htmlIssues, api.ContentIssue{
			Type:     api.BrokenHtml,
			Severity: api.ContentIssueSeverityLow,
			Message:  contentIssue,
			Advice:   api.PtrTo("Use inline CSS instead of external stylesheets for better email compatibility"),
		})
	}

	if len(htmlIssues) > 0 {
		analysis.HtmlIssues = &htmlIssues
	}

	// Convert links
	if len(results.Links) > 0 {
		links := make([]api.LinkCheck, 0, len(results.Links))
		for _, link := range results.Links {
			status := api.Valid
			if link.Status >= 400 {
				status = api.Broken
			} else if !link.IsSafe {
				status = api.Suspicious
			} else if link.Warning != "" {
				status = api.Timeout
			}

			apiLink := api.LinkCheck{
				Url:    link.URL,
				Status: status,
			}

			if link.Status > 0 {
				apiLink.HttpCode = api.PtrTo(link.Status)
			}

			// Check if it's a URL shortener
			parsedURL, err := url.Parse(link.URL)
			if err == nil {
				isShortened := c.isSuspiciousURL(link.URL, parsedURL)
				apiLink.IsShortened = api.PtrTo(isShortened)
			}

			links = append(links, apiLink)
		}
		analysis.Links = &links
	}

	// Convert images
	if len(results.Images) > 0 {
		images := make([]api.ImageCheck, 0, len(results.Images))
		for _, img := range results.Images {
			apiImg := api.ImageCheck{
				HasAlt: img.HasAlt,
			}
			if img.Src != "" {
				apiImg.Src = &img.Src
			}
			if img.AltText != "" {
				apiImg.AltText = &img.AltText
			}
			// Simple heuristic: tracking pixels are typically 1x1
			apiImg.IsTrackingPixel = api.PtrTo(false)

			images = append(images, apiImg)
		}
		analysis.Images = &images
	}

	// Unsubscribe methods
	if results.HasUnsubscribe {
		*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.Link)
	}

	for _, url := range c.listUnsubscribeURLs {
		if strings.HasPrefix(url, "mailto:") {
			*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.Mailto)
		} else if strings.HasPrefix(url, "http:") || strings.HasPrefix(url, "https:") {
			*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.ListUnsubscribeHeader)
		}
	}

	if slices.Contains(*analysis.UnsubscribeMethods, api.ListUnsubscribeHeader) && c.hasOneClickUnsubscribe {
		*analysis.UnsubscribeMethods = append(*analysis.UnsubscribeMethods, api.OneClick)
	}

	return analysis
}

// CalculateContentScore calculates the content score (0-20 points)
func (c *ContentAnalyzer) CalculateContentScore(results *ContentResults) (int, string) {
	if results == nil {
		return 0, ""
	}

	var score int = 10

	// HTML validity or text alone (10 points)
	if results.HTMLValid || (!results.IsMultipart && results.HasPlaintext()) {
		score += 10
	}

	// Requires plain text alternative (10 points)
	if results.HasPlaintext() {
		score += 10
	}

	// Links (25 points)
	if len(results.Links) > 0 {
		brokenLinks := 0
		for _, link := range results.Links {
			if link.Status >= 400 {
				brokenLinks++
			}
		}
		score += 20 * (len(results.Links) - brokenLinks) / len(results.Links)
		// Too much links, 10 points penalty
		if len(results.Links) > 30 {
			score -= 10
		}
	} else {
		// No links is better, less suspiscous
		score += 25
	}

	// Images (15 points)
	if len(results.Images) > 0 {
		noAltCount := 0
		for _, img := range results.Images {
			if !img.HasAlt {
				noAltCount++
			}
		}
		score += 15 * (len(results.Images) - noAltCount) / len(results.Images)
	} else {
		// No images is Ok
		score += 15
	}

	// Text consistency (15 points)
	if results.TextPlainRatio >= 0.3 {
		score += 15
	}

	// Image ratio (15 points)
	if results.ImageTextRatio <= 5.0 {
		score += 15
	} else if results.ImageTextRatio <= 10.0 {
		score += 7
	}

	// Penalize suspicious URLs (deduct up to 5 points)
	if len(results.SuspiciousURLs) > 0 {
		score -= min(len(results.SuspiciousURLs), 5)
	}

	// Penalize harmful HTML tags (deduct 20 points per harmful tag, max 40 points)
	if len(results.HarmfullIssues) > 0 {
		score -= min(len(results.HarmfullIssues)*20, 40)
	}

	// Ensure score is between 0 and 100
	if score < 0 {
		score = 0
	}
	if score > 100 {
		score = 100
	}

	return score, ScoreToGrade(score)
}