Headers value for Date and email related are now parsed
This commit is contained in:
parent
7ed347c86e
commit
29cb2cf1f9
2 changed files with 253 additions and 18 deletions
|
|
@ -24,6 +24,7 @@ package analyzer
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
|
"net/mail"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
@ -153,6 +154,30 @@ func (h *HeaderAnalyzer) isValidMessageID(messageID string) bool {
|
||||||
return len(parts[0]) > 0 && len(parts[1]) > 0
|
return len(parts[0]) > 0 && len(parts[1]) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseEmailDate attempts to parse an email date string using common email date formats
|
||||||
|
// Returns the parsed time and an error if parsing fails
|
||||||
|
func (h *HeaderAnalyzer) parseEmailDate(dateStr string) (time.Time, error) {
|
||||||
|
// Remove timezone name in parentheses if present
|
||||||
|
dateStr = regexp.MustCompile(`\s*\([^)]+\)\s*$`).ReplaceAllString(strings.TrimSpace(dateStr), "")
|
||||||
|
|
||||||
|
// Try parsing with common email date formats
|
||||||
|
formats := []string{
|
||||||
|
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
|
||||||
|
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
|
||||||
|
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||||
|
"Mon, 2 Jan 2006 15:04:05 MST",
|
||||||
|
"2 Jan 2006 15:04:05 -0700",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, format := range formats {
|
||||||
|
if parsedTime, err := time.Parse(format, dateStr); err == nil {
|
||||||
|
return parsedTime, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return time.Time{}, fmt.Errorf("unable to parse date string: %s", dateStr)
|
||||||
|
}
|
||||||
|
|
||||||
// isNoReplyAddress checks if a header check represents a no-reply email address
|
// isNoReplyAddress checks if a header check represents a no-reply email address
|
||||||
func (h *HeaderAnalyzer) isNoReplyAddress(headerCheck api.HeaderCheck) bool {
|
func (h *HeaderAnalyzer) isNoReplyAddress(headerCheck api.HeaderCheck) bool {
|
||||||
if !headerCheck.Present || headerCheck.Value == nil {
|
if !headerCheck.Present || headerCheck.Value == nil {
|
||||||
|
|
@ -176,6 +201,39 @@ func (h *HeaderAnalyzer) isNoReplyAddress(headerCheck api.HeaderCheck) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validateAddressHeader validates email address header using net/mail parser
|
||||||
|
// and returns the normalized address string in "Name <email>" format
|
||||||
|
func (h *HeaderAnalyzer) validateAddressHeader(value string) (string, error) {
|
||||||
|
// Try to parse as a single address first
|
||||||
|
if addr, err := mail.ParseAddress(value); err == nil {
|
||||||
|
return h.formatAddress(addr), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If single address parsing fails, try parsing as an address list
|
||||||
|
// (for headers like To, Cc that can contain multiple addresses)
|
||||||
|
if addrs, err := mail.ParseAddressList(value); err != nil {
|
||||||
|
return "", err
|
||||||
|
} else {
|
||||||
|
// Join multiple addresses with ", "
|
||||||
|
result := ""
|
||||||
|
for i, addr := range addrs {
|
||||||
|
if i > 0 {
|
||||||
|
result += ", "
|
||||||
|
}
|
||||||
|
result += h.formatAddress(addr)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatAddress formats a mail.Address as "Name <email>" or just "email" if no name
|
||||||
|
func (h *HeaderAnalyzer) formatAddress(addr *mail.Address) string {
|
||||||
|
if addr.Name != "" {
|
||||||
|
return fmt.Sprintf("%s <%s>", addr.Name, addr.Address)
|
||||||
|
}
|
||||||
|
return addr.Address
|
||||||
|
}
|
||||||
|
|
||||||
// GenerateHeaderAnalysis creates structured header analysis from email
|
// GenerateHeaderAnalysis creates structured header analysis from email
|
||||||
func (h *HeaderAnalyzer) GenerateHeaderAnalysis(email *EmailMessage) *api.HeaderAnalysis {
|
func (h *HeaderAnalyzer) GenerateHeaderAnalysis(email *EmailMessage) *api.HeaderAnalysis {
|
||||||
if email == nil {
|
if email == nil {
|
||||||
|
|
@ -262,7 +320,20 @@ func (h *HeaderAnalyzer) checkHeader(email *EmailMessage, headerName string, imp
|
||||||
headerIssues = append(headerIssues, "Invalid Message-ID format (should be <id@domain>)")
|
headerIssues = append(headerIssues, "Invalid Message-ID format (should be <id@domain>)")
|
||||||
}
|
}
|
||||||
case "Date":
|
case "Date":
|
||||||
// Could add date validation here
|
// Validate date format
|
||||||
|
if _, err := h.parseEmailDate(value); err != nil {
|
||||||
|
valid = false
|
||||||
|
headerIssues = append(headerIssues, fmt.Sprintf("Invalid date format: %v", err))
|
||||||
|
}
|
||||||
|
case "From", "To", "Cc", "Bcc", "Reply-To", "Sender", "Resent-From", "Resent-To", "Return-Path":
|
||||||
|
// Parse address header using net/mail and get normalized address
|
||||||
|
if normalizedAddr, err := h.validateAddressHeader(value); err != nil {
|
||||||
|
valid = false
|
||||||
|
headerIssues = append(headerIssues, fmt.Sprintf("Invalid email address format: %v", err))
|
||||||
|
} else {
|
||||||
|
// Use the normalized address as the value
|
||||||
|
check.Value = &normalizedAddr
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
check.Valid = &valid
|
check.Valid = &valid
|
||||||
|
|
@ -516,23 +587,9 @@ func (h *HeaderAnalyzer) parseReceivedHeader(receivedValue string) *api.Received
|
||||||
if matches := timestampRegex.FindStringSubmatch(normalized); len(matches) > 1 {
|
if matches := timestampRegex.FindStringSubmatch(normalized); len(matches) > 1 {
|
||||||
timestampStr := strings.TrimSpace(matches[1])
|
timestampStr := strings.TrimSpace(matches[1])
|
||||||
|
|
||||||
// Remove timezone name in parentheses if present
|
// Use the dedicated date parsing function
|
||||||
timestampStr = regexp.MustCompile(`\s*\([^)]+\)\s*$`).ReplaceAllString(timestampStr, "")
|
if parsedTime, err := h.parseEmailDate(timestampStr); err == nil {
|
||||||
|
hop.Timestamp = &parsedTime
|
||||||
// Try parsing with common email date formats
|
|
||||||
formats := []string{
|
|
||||||
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
|
|
||||||
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
|
|
||||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
|
||||||
"Mon, 2 Jan 2006 15:04:05 MST",
|
|
||||||
"2 Jan 2006 15:04:05 -0700",
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, format := range formats {
|
|
||||||
if parsedTime, err := time.Parse(format, timestampStr); err == nil {
|
|
||||||
hop.Timestamp = &parsedTime
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -724,6 +724,184 @@ func TestGenerateHeaderAnalysis_WithReceivedChain(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHeaderAnalyzer_ParseEmailDate(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
dateStr string
|
||||||
|
expectError bool
|
||||||
|
expectYear int
|
||||||
|
expectMonth int
|
||||||
|
expectDay int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "RFC1123Z format",
|
||||||
|
dateStr: "Mon, 02 Jan 2006 15:04:05 -0700",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2006,
|
||||||
|
expectMonth: 1,
|
||||||
|
expectDay: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "RFC1123 format",
|
||||||
|
dateStr: "Mon, 02 Jan 2006 15:04:05 MST",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2006,
|
||||||
|
expectMonth: 1,
|
||||||
|
expectDay: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Single digit day",
|
||||||
|
dateStr: "Mon, 2 Jan 2006 15:04:05 -0700",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2006,
|
||||||
|
expectMonth: 1,
|
||||||
|
expectDay: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Without day of week",
|
||||||
|
dateStr: "2 Jan 2006 15:04:05 -0700",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2006,
|
||||||
|
expectMonth: 1,
|
||||||
|
expectDay: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "With timezone name in parentheses",
|
||||||
|
dateStr: "Mon, 01 Jan 2024 12:00:00 +0000 (UTC)",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2024,
|
||||||
|
expectMonth: 1,
|
||||||
|
expectDay: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "With timezone name in parentheses 2",
|
||||||
|
dateStr: "Sun, 19 Oct 2025 09:40:33 +0000 (UTC)",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2025,
|
||||||
|
expectMonth: 10,
|
||||||
|
expectDay: 19,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "With CEST timezone",
|
||||||
|
dateStr: "Fri, 24 Oct 2025 04:17:25 +0200 (CEST)",
|
||||||
|
expectError: false,
|
||||||
|
expectYear: 2025,
|
||||||
|
expectMonth: 10,
|
||||||
|
expectDay: 24,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid date format",
|
||||||
|
dateStr: "not a date",
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Empty string",
|
||||||
|
dateStr: "",
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ISO 8601 format (should fail)",
|
||||||
|
dateStr: "2024-01-01T12:00:00Z",
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer := NewHeaderAnalyzer()
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := analyzer.parseEmailDate(tt.dateStr)
|
||||||
|
|
||||||
|
if tt.expectError {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("parseEmailDate(%q) expected error, got nil", tt.dateStr)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("parseEmailDate(%q) unexpected error: %v", tt.dateStr, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Year() != tt.expectYear {
|
||||||
|
t.Errorf("Year = %d, want %d", result.Year(), tt.expectYear)
|
||||||
|
}
|
||||||
|
if int(result.Month()) != tt.expectMonth {
|
||||||
|
t.Errorf("Month = %d, want %d", result.Month(), tt.expectMonth)
|
||||||
|
}
|
||||||
|
if result.Day() != tt.expectDay {
|
||||||
|
t.Errorf("Day = %d, want %d", result.Day(), tt.expectDay)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckHeader_DateValidation(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
dateValue string
|
||||||
|
expectedValid bool
|
||||||
|
expectedIssuesLen int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Valid RFC1123Z date",
|
||||||
|
dateValue: "Mon, 02 Jan 2006 15:04:05 -0700",
|
||||||
|
expectedValid: true,
|
||||||
|
expectedIssuesLen: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Valid date with timezone name",
|
||||||
|
dateValue: "Mon, 01 Jan 2024 12:00:00 +0000 (UTC)",
|
||||||
|
expectedValid: true,
|
||||||
|
expectedIssuesLen: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid date format",
|
||||||
|
dateValue: "2024-01-01",
|
||||||
|
expectedValid: false,
|
||||||
|
expectedIssuesLen: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid date string",
|
||||||
|
dateValue: "not a date",
|
||||||
|
expectedValid: false,
|
||||||
|
expectedIssuesLen: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Empty date",
|
||||||
|
dateValue: "",
|
||||||
|
expectedValid: false,
|
||||||
|
expectedIssuesLen: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer := NewHeaderAnalyzer()
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
email := &EmailMessage{
|
||||||
|
Header: createHeaderWithFields(map[string]string{
|
||||||
|
"Date": tt.dateValue,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
check := analyzer.checkHeader(email, "Date", "required")
|
||||||
|
|
||||||
|
if check.Valid != nil && *check.Valid != tt.expectedValid {
|
||||||
|
t.Errorf("Valid = %v, want %v", *check.Valid, tt.expectedValid)
|
||||||
|
}
|
||||||
|
|
||||||
|
issuesLen := 0
|
||||||
|
if check.Issues != nil {
|
||||||
|
issuesLen = len(*check.Issues)
|
||||||
|
}
|
||||||
|
if issuesLen != tt.expectedIssuesLen {
|
||||||
|
t.Errorf("Issues length = %d, want %d (issues: %v)", issuesLen, tt.expectedIssuesLen, check.Issues)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Helper functions for testing
|
// Helper functions for testing
|
||||||
func strPtr(s string) *string {
|
func strPtr(s string) *string {
|
||||||
return &s
|
return &s
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue