Headers value for Date and email related are now parsed

This commit is contained in:
nemunaire 2025-10-24 10:46:54 +07:00
commit 29cb2cf1f9
2 changed files with 253 additions and 18 deletions

View file

@ -24,6 +24,7 @@ package analyzer
import (
"fmt"
"net"
"net/mail"
"regexp"
"strings"
"time"
@ -153,6 +154,30 @@ func (h *HeaderAnalyzer) isValidMessageID(messageID string) bool {
return len(parts[0]) > 0 && len(parts[1]) > 0
}
// parseEmailDate attempts to parse an email date string using common email date formats
// Returns the parsed time and an error if parsing fails
func (h *HeaderAnalyzer) parseEmailDate(dateStr string) (time.Time, error) {
// Remove timezone name in parentheses if present
dateStr = regexp.MustCompile(`\s*\([^)]+\)\s*$`).ReplaceAllString(strings.TrimSpace(dateStr), "")
// Try parsing with common email date formats
formats := []string{
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
"Mon, 2 Jan 2006 15:04:05 -0700",
"Mon, 2 Jan 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 -0700",
}
for _, format := range formats {
if parsedTime, err := time.Parse(format, dateStr); err == nil {
return parsedTime, nil
}
}
return time.Time{}, fmt.Errorf("unable to parse date string: %s", dateStr)
}
// isNoReplyAddress checks if a header check represents a no-reply email address
func (h *HeaderAnalyzer) isNoReplyAddress(headerCheck api.HeaderCheck) bool {
if !headerCheck.Present || headerCheck.Value == nil {
@ -176,6 +201,39 @@ func (h *HeaderAnalyzer) isNoReplyAddress(headerCheck api.HeaderCheck) bool {
return false
}
// validateAddressHeader validates email address header using net/mail parser
// and returns the normalized address string in "Name <email>" format
func (h *HeaderAnalyzer) validateAddressHeader(value string) (string, error) {
// Try to parse as a single address first
if addr, err := mail.ParseAddress(value); err == nil {
return h.formatAddress(addr), nil
}
// If single address parsing fails, try parsing as an address list
// (for headers like To, Cc that can contain multiple addresses)
if addrs, err := mail.ParseAddressList(value); err != nil {
return "", err
} else {
// Join multiple addresses with ", "
result := ""
for i, addr := range addrs {
if i > 0 {
result += ", "
}
result += h.formatAddress(addr)
}
return result, nil
}
}
// formatAddress formats a mail.Address as "Name <email>" or just "email" if no name
func (h *HeaderAnalyzer) formatAddress(addr *mail.Address) string {
if addr.Name != "" {
return fmt.Sprintf("%s <%s>", addr.Name, addr.Address)
}
return addr.Address
}
// GenerateHeaderAnalysis creates structured header analysis from email
func (h *HeaderAnalyzer) GenerateHeaderAnalysis(email *EmailMessage) *api.HeaderAnalysis {
if email == nil {
@ -262,7 +320,20 @@ func (h *HeaderAnalyzer) checkHeader(email *EmailMessage, headerName string, imp
headerIssues = append(headerIssues, "Invalid Message-ID format (should be <id@domain>)")
}
case "Date":
// Could add date validation here
// Validate date format
if _, err := h.parseEmailDate(value); err != nil {
valid = false
headerIssues = append(headerIssues, fmt.Sprintf("Invalid date format: %v", err))
}
case "From", "To", "Cc", "Bcc", "Reply-To", "Sender", "Resent-From", "Resent-To", "Return-Path":
// Parse address header using net/mail and get normalized address
if normalizedAddr, err := h.validateAddressHeader(value); err != nil {
valid = false
headerIssues = append(headerIssues, fmt.Sprintf("Invalid email address format: %v", err))
} else {
// Use the normalized address as the value
check.Value = &normalizedAddr
}
}
check.Valid = &valid
@ -516,23 +587,9 @@ func (h *HeaderAnalyzer) parseReceivedHeader(receivedValue string) *api.Received
if matches := timestampRegex.FindStringSubmatch(normalized); len(matches) > 1 {
timestampStr := strings.TrimSpace(matches[1])
// Remove timezone name in parentheses if present
timestampStr = regexp.MustCompile(`\s*\([^)]+\)\s*$`).ReplaceAllString(timestampStr, "")
// Try parsing with common email date formats
formats := []string{
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
"Mon, 2 Jan 2006 15:04:05 -0700",
"Mon, 2 Jan 2006 15:04:05 MST",
"2 Jan 2006 15:04:05 -0700",
}
for _, format := range formats {
if parsedTime, err := time.Parse(format, timestampStr); err == nil {
hop.Timestamp = &parsedTime
break
}
// Use the dedicated date parsing function
if parsedTime, err := h.parseEmailDate(timestampStr); err == nil {
hop.Timestamp = &parsedTime
}
}

View file

@ -724,6 +724,184 @@ func TestGenerateHeaderAnalysis_WithReceivedChain(t *testing.T) {
}
}
func TestHeaderAnalyzer_ParseEmailDate(t *testing.T) {
tests := []struct {
name string
dateStr string
expectError bool
expectYear int
expectMonth int
expectDay int
}{
{
name: "RFC1123Z format",
dateStr: "Mon, 02 Jan 2006 15:04:05 -0700",
expectError: false,
expectYear: 2006,
expectMonth: 1,
expectDay: 2,
},
{
name: "RFC1123 format",
dateStr: "Mon, 02 Jan 2006 15:04:05 MST",
expectError: false,
expectYear: 2006,
expectMonth: 1,
expectDay: 2,
},
{
name: "Single digit day",
dateStr: "Mon, 2 Jan 2006 15:04:05 -0700",
expectError: false,
expectYear: 2006,
expectMonth: 1,
expectDay: 2,
},
{
name: "Without day of week",
dateStr: "2 Jan 2006 15:04:05 -0700",
expectError: false,
expectYear: 2006,
expectMonth: 1,
expectDay: 2,
},
{
name: "With timezone name in parentheses",
dateStr: "Mon, 01 Jan 2024 12:00:00 +0000 (UTC)",
expectError: false,
expectYear: 2024,
expectMonth: 1,
expectDay: 1,
},
{
name: "With timezone name in parentheses 2",
dateStr: "Sun, 19 Oct 2025 09:40:33 +0000 (UTC)",
expectError: false,
expectYear: 2025,
expectMonth: 10,
expectDay: 19,
},
{
name: "With CEST timezone",
dateStr: "Fri, 24 Oct 2025 04:17:25 +0200 (CEST)",
expectError: false,
expectYear: 2025,
expectMonth: 10,
expectDay: 24,
},
{
name: "Invalid date format",
dateStr: "not a date",
expectError: true,
},
{
name: "Empty string",
dateStr: "",
expectError: true,
},
{
name: "ISO 8601 format (should fail)",
dateStr: "2024-01-01T12:00:00Z",
expectError: true,
},
}
analyzer := NewHeaderAnalyzer()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := analyzer.parseEmailDate(tt.dateStr)
if tt.expectError {
if err == nil {
t.Errorf("parseEmailDate(%q) expected error, got nil", tt.dateStr)
}
} else {
if err != nil {
t.Errorf("parseEmailDate(%q) unexpected error: %v", tt.dateStr, err)
return
}
if result.Year() != tt.expectYear {
t.Errorf("Year = %d, want %d", result.Year(), tt.expectYear)
}
if int(result.Month()) != tt.expectMonth {
t.Errorf("Month = %d, want %d", result.Month(), tt.expectMonth)
}
if result.Day() != tt.expectDay {
t.Errorf("Day = %d, want %d", result.Day(), tt.expectDay)
}
}
})
}
}
func TestCheckHeader_DateValidation(t *testing.T) {
tests := []struct {
name string
dateValue string
expectedValid bool
expectedIssuesLen int
}{
{
name: "Valid RFC1123Z date",
dateValue: "Mon, 02 Jan 2006 15:04:05 -0700",
expectedValid: true,
expectedIssuesLen: 0,
},
{
name: "Valid date with timezone name",
dateValue: "Mon, 01 Jan 2024 12:00:00 +0000 (UTC)",
expectedValid: true,
expectedIssuesLen: 0,
},
{
name: "Invalid date format",
dateValue: "2024-01-01",
expectedValid: false,
expectedIssuesLen: 1,
},
{
name: "Invalid date string",
dateValue: "not a date",
expectedValid: false,
expectedIssuesLen: 1,
},
{
name: "Empty date",
dateValue: "",
expectedValid: false,
expectedIssuesLen: 1,
},
}
analyzer := NewHeaderAnalyzer()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
email := &EmailMessage{
Header: createHeaderWithFields(map[string]string{
"Date": tt.dateValue,
}),
}
check := analyzer.checkHeader(email, "Date", "required")
if check.Valid != nil && *check.Valid != tt.expectedValid {
t.Errorf("Valid = %v, want %v", *check.Valid, tt.expectedValid)
}
issuesLen := 0
if check.Issues != nil {
issuesLen = len(*check.Issues)
}
if issuesLen != tt.expectedIssuesLen {
t.Errorf("Issues length = %d, want %d (issues: %v)", issuesLen, tt.expectedIssuesLen, check.Issues)
}
})
}
}
// Helper functions for testing
func strPtr(s string) *string {
return &s