checker-smtp/checker/collect.go

563 lines
16 KiB
Go

package checker
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
const defaultEHLOName = "mx-checker.happydomain.org"
const smtpPort = 25
// mxServiceBody mirrors the shape of svcs.MXs in happyDomain. We decode
// it by hand (rather than importing the happyDomain server) to keep the
// build surface small (checker-srv follows the same pattern).
type mxServiceBody struct {
MXs []struct {
Hdr struct {
Name string `json:"Name"`
} `json:"Hdr"`
Preference uint16 `json:"Preference"`
Mx string `json:"Mx"`
} `json:"mx"`
}
func (p *smtpProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
domain, _ := sdk.GetOption[string](opts, "domain")
domain = strings.TrimSuffix(strings.TrimSpace(domain), ".")
if domain == "" {
return nil, fmt.Errorf("domain is required")
}
if !isValidHostname(domain) {
return nil, fmt.Errorf("invalid domain %q", domain)
}
helo, _ := sdk.GetOption[string](opts, "helo_name")
helo = strings.TrimSpace(helo)
if helo == "" {
helo = defaultEHLOName
}
if !isValidHostname(helo) {
return nil, fmt.Errorf("invalid helo_name %q", helo)
}
timeoutSecs := sdk.GetFloatOption(opts, "timeout", 12)
if timeoutSecs < 1 {
timeoutSecs = 12
}
perEndpoint := time.Duration(timeoutSecs * float64(time.Second))
testNull := sdk.GetBoolOption(opts, "test_null_sender", true)
testPostmaster := sdk.GetBoolOption(opts, "test_postmaster", true)
testOpenRelay := sdk.GetBoolOption(opts, "test_open_relay", true)
probeRcpt, _ := sdk.GetOption[string](opts, "test_probe_address")
probeRcpt = strings.TrimSpace(probeRcpt)
if probeRcpt == "" || !isValidMailbox(probeRcpt) {
probeRcpt = "postmaster@example.com"
}
// Never use a recipient inside the domain under test; that would turn
// an accept into a false-positive open relay.
if addrDomain, _, ok := splitMail(probeRcpt); ok && strings.EqualFold(addrDomain, domain) {
probeRcpt = "postmaster@example.com"
}
data := &SMTPData{
Domain: domain,
RunAt: time.Now().UTC().Format(time.RFC3339),
}
resolver := net.DefaultResolver
lookupCtx, cancel := context.WithTimeout(ctx, perEndpoint)
defer cancel()
// Prefer the service body when supplied (authoritative, already
// parsed from the zone); fall back to a live MX lookup.
var mxTargets []mxTargetRaw
if body, ok := sdk.GetOption[json.RawMessage](opts, "service"); ok && len(body) > 0 {
mxTargets = parseServiceBody(body)
}
if len(mxTargets) == 0 {
var err error
mxTargets, err = lookupMX(lookupCtx, resolver, domain)
if err != nil {
data.MX.Error = err.Error()
}
}
// RFC 7505 null MX sentinel.
if len(mxTargets) == 1 && (mxTargets[0].Target == "" || mxTargets[0].Target == ".") && mxTargets[0].Preference == 0 {
data.MX.NullMX = true
return data, nil
}
if len(mxTargets) == 0 && data.MX.Error == "" {
// Implicit MX (RFC 5321 § 5.1): fall back to the bare domain.
data.MX.ImplicitMX = true
mxTargets = []mxTargetRaw{{Preference: 0, Target: domain}}
}
for _, t := range mxTargets {
rec := MXRecord{
Preference: t.Preference,
Target: strings.TrimSuffix(t.Target, "."),
}
if rec.Target == "" {
continue
}
if ip := net.ParseIP(rec.Target); ip != nil {
rec.IsIPLiteral = true
}
// Detect CNAME (RFC 5321 § 5.1 forbids MX → CNAME).
if !rec.IsIPLiteral {
if cname, err := resolver.LookupCNAME(lookupCtx, rec.Target); err == nil {
canon := strings.TrimSuffix(cname, ".")
if canon != "" && !strings.EqualFold(canon, rec.Target) {
rec.IsCNAME = true
rec.CNAMEChain = []string{rec.Target, canon}
}
}
}
if rec.IsIPLiteral {
if ip := net.ParseIP(rec.Target); ip != nil {
if v4 := ip.To4(); v4 != nil {
rec.IPv4 = append(rec.IPv4, v4.String())
} else {
rec.IPv6 = append(rec.IPv6, ip.String())
}
}
} else {
ips, err := resolver.LookupIPAddr(lookupCtx, rec.Target)
if err != nil {
rec.ResolveError = err.Error()
}
for _, ip := range ips {
if v4 := ip.IP.To4(); v4 != nil {
rec.IPv4 = append(rec.IPv4, v4.String())
} else {
rec.IPv6 = append(rec.IPv6, ip.IP.String())
}
}
}
data.MX.Records = append(data.MX.Records, rec)
}
// Probe every (target, ip) pair.
for _, rec := range data.MX.Records {
for _, ip := range rec.IPv4 {
ep := probeEndpoint(ctx, probeInputs{
target: rec.Target,
ip: ip,
isV6: false,
domain: domain,
heloName: helo,
timeout: perEndpoint,
testNull: testNull,
testPostmaster: testPostmaster,
testOpenRelay: testOpenRelay,
openRelayRcpt: probeRcpt,
})
data.Endpoints = append(data.Endpoints, ep)
}
for _, ip := range rec.IPv6 {
ep := probeEndpoint(ctx, probeInputs{
target: rec.Target,
ip: ip,
isV6: true,
domain: domain,
heloName: helo,
timeout: perEndpoint,
testNull: testNull,
testPostmaster: testPostmaster,
testOpenRelay: testOpenRelay,
openRelayRcpt: probeRcpt,
})
data.Endpoints = append(data.Endpoints, ep)
}
}
computeCoverage(data)
return data, nil
}
type mxTargetRaw struct {
Preference uint16
Target string
}
// parseServiceBody extracts the MX list from a happyDomain svcs.MXs
// payload. Returns nil when the payload doesn't look like one; we fall
// back to a live DNS lookup in that case.
func parseServiceBody(raw json.RawMessage) []mxTargetRaw {
// happyDomain wraps the body in ServiceMessage{Type, Service:<body>}.
// We accept either the full ServiceMessage or the body directly.
var envelope struct {
Type string `json:"_svctype"`
Service json.RawMessage `json:"Service"`
}
var body json.RawMessage
if err := json.Unmarshal(raw, &envelope); err == nil && len(envelope.Service) > 0 {
body = envelope.Service
} else {
body = raw
}
var parsed mxServiceBody
if err := json.Unmarshal(body, &parsed); err != nil {
return nil
}
out := make([]mxTargetRaw, 0, len(parsed.MXs))
for _, m := range parsed.MXs {
out = append(out, mxTargetRaw{Preference: m.Preference, Target: m.Mx})
}
return out
}
// lookupMX runs a DNS MX query and returns the records, or nil when
// NXDOMAIN / no records (so the caller can trigger the implicit-MX path).
func lookupMX(ctx context.Context, r *net.Resolver, domain string) ([]mxTargetRaw, error) {
records, err := r.LookupMX(ctx, dns.Fqdn(domain))
if err != nil {
var dnsErr *net.DNSError
if errors.As(err, &dnsErr) && dnsErr.IsNotFound {
return nil, nil
}
// net.LookupMX returns an error on the RFC 7505 null-MX sentinel
// because "." fails host validation. Surface it as a synthetic
// record so the caller can detect the null-MX case.
if strings.Contains(err.Error(), "cannot unmarshal DNS message") {
return []mxTargetRaw{{Preference: 0, Target: "."}}, nil
}
return nil, err
}
out := make([]mxTargetRaw, 0, len(records))
for _, m := range records {
out = append(out, mxTargetRaw{Preference: m.Pref, Target: strings.TrimSuffix(m.Host, ".")})
}
return out, nil
}
type probeInputs struct {
target, ip, domain, heloName string
isV6 bool
timeout time.Duration
testNull, testPostmaster bool
testOpenRelay bool
openRelayRcpt string
}
func probeEndpoint(ctx context.Context, in probeInputs) EndpointProbe {
start := time.Now()
ep := EndpointProbe{
Target: in.target,
Port: smtpPort,
IP: in.ip,
IsIPv6: in.isV6,
Address: net.JoinHostPort(in.ip, strconv.Itoa(smtpPort)),
}
defer func() { ep.ElapsedMS = time.Since(start).Milliseconds() }()
// Reverse DNS: orthogonal to the SMTP connection, so we run it even
// if the connection later fails.
ptrCtx, ptrCancel := context.WithTimeout(ctx, in.timeout)
names, err := net.DefaultResolver.LookupAddr(ptrCtx, in.ip)
ptrCancel()
switch {
case err != nil:
ep.PTRError = err.Error()
case len(names) == 0:
ep.PTRError = "no PTR records"
default:
ep.PTR = strings.TrimSuffix(names[0], ".")
// FCrDNS: PTR's forward lookup must include our IP.
fwdCtx, fwdCancel := context.WithTimeout(ctx, in.timeout)
ips, ferr := net.DefaultResolver.LookupIPAddr(fwdCtx, ep.PTR)
fwdCancel()
if ferr == nil {
for _, a := range ips {
if a.IP.String() == in.ip || a.IP.Equal(net.ParseIP(in.ip)) {
ep.FCrDNSPass = true
break
}
}
}
}
dialCtx, cancel := context.WithTimeout(ctx, in.timeout)
defer cancel()
dialer := &net.Dialer{}
conn, err := dialer.DialContext(dialCtx, "tcp", ep.Address)
if err != nil {
ep.Error = "tcp: " + err.Error()
return ep
}
ep.TCPConnected = true
_ = conn.SetDeadline(time.Now().Add(in.timeout))
sc := newSMTPConn(conn, in.timeout)
// One defer covers both the plaintext and post-STARTTLS cases: after
// swap() the smtpConn owns the tls.Conn whose Close propagates to the
// underlying TCP fd, so a separate `defer conn.Close()` would only
// double-close the same descriptor.
defer sc.close()
// Read the banner (220).
code, text, _, err := sc.readResponse()
if err != nil {
ep.Error = "banner: " + err.Error()
return ep
}
ep.BannerReceived = true
ep.BannerCode = code
ep.BannerLine = strings.TrimSpace(strings.ReplaceAll(text, "\n", " | "))
ep.BannerHostname = parseBanner(text)
if code != 220 {
ep.Error = fmt.Sprintf("banner: unexpected code %d", code)
return ep
}
// EHLO (fall back to HELO on 5xx).
_, text, lines, err := sc.cmd("EHLO " + in.heloName)
if err != nil {
ep.Error = "ehlo: " + err.Error()
return ep
}
if lines[0][0] == '5' {
// Try HELO.
_, _, heloLines, herr := sc.cmd("HELO " + in.heloName)
if herr != nil || len(heloLines) == 0 || heloLines[0][0] != '2' {
ep.Error = "ehlo/helo both rejected"
return ep
}
ep.EHLOReceived = true
ep.EHLOFallbackHELO = true
ep.EHLOHostname = strings.TrimSpace(strings.SplitN(text, " ", 2)[0])
return ep
}
ep.EHLOReceived = true
greeting, exts := parseEHLO(lines)
ep.EHLOHostname = greeting
ep.Extensions = exts
idx := buildExtensions(exts)
ep.STARTTLSOffered = idx.has("STARTTLS")
ep.HasPipelining = idx.has("PIPELINING")
ep.Has8BITMIME = idx.has("8BITMIME")
ep.HasSMTPUTF8 = idx.has("SMTPUTF8")
ep.HasCHUNKING = idx.has("CHUNKING")
ep.HasDSN = idx.has("DSN")
ep.HasENHANCEDCODE = idx.has("ENHANCEDSTATUSCODES")
ep.SizeLimit = idx.parseSize()
ep.AUTHPreTLS = idx.parseAuth()
// STARTTLS.
if ep.STARTTLSOffered {
code, _, _, terr := sc.cmd("STARTTLS")
if terr == nil && code == 220 {
tlsConn := tls.Client(conn, tlsProbeConfig(in.target))
_ = tlsConn.SetDeadline(time.Now().Add(in.timeout))
if herr := tlsConn.Handshake(); herr != nil {
ep.Error = "tls-handshake: " + herr.Error()
return ep
}
ep.STARTTLSUpgraded = true
state := tlsConn.ConnectionState()
ep.TLSVersion = tls.VersionName(state.Version)
ep.TLSCipher = tls.CipherSuiteName(state.CipherSuite)
sc.swap(tlsConn)
// Re-EHLO over TLS (mandatory per RFC 3207).
_, _, lines2, eerr := sc.cmd("EHLO " + in.heloName)
if eerr == nil && len(lines2) > 0 && lines2[0][0] == '2' {
_, exts2 := parseEHLO(lines2)
ep.PostTLSExtensions = exts2
idx2 := buildExtensions(exts2)
ep.AUTHPostTLS = idx2.parseAuth()
// Union the feature flags: some servers only advertise
// 8BITMIME, PIPELINING, etc. after STARTTLS.
if !ep.HasPipelining {
ep.HasPipelining = idx2.has("PIPELINING")
}
if !ep.Has8BITMIME {
ep.Has8BITMIME = idx2.has("8BITMIME")
}
if !ep.HasSMTPUTF8 {
ep.HasSMTPUTF8 = idx2.has("SMTPUTF8")
}
if !ep.HasCHUNKING {
ep.HasCHUNKING = idx2.has("CHUNKING")
}
if !ep.HasDSN {
ep.HasDSN = idx2.has("DSN")
}
if !ep.HasENHANCEDCODE {
ep.HasENHANCEDCODE = idx2.has("ENHANCEDSTATUSCODES")
}
if ep.SizeLimit == 0 {
ep.SizeLimit = idx2.parseSize()
}
}
} else if terr != nil {
ep.Error = "starttls: " + terr.Error()
return ep
} else {
ep.Error = fmt.Sprintf("starttls: unexpected code %d", code)
// Don't bail; still run transactional probes over plaintext
// so the operator sees what the server does without TLS.
}
}
// RCPT-level probes. Each runs in its own MAIL/RSET pair so an earlier
// reject does not mask later ones.
runRCPT := func(from, to string) (int, string) {
code, text, _, err := sc.cmd("MAIL FROM:<" + from + ">")
if err != nil {
return -1, err.Error()
}
if code != 250 {
defer sc.cmd("RSET")
return code, strings.TrimSpace(text)
}
code, text, _, err = sc.cmd("RCPT TO:<" + to + ">")
sc.cmd("RSET")
if err != nil {
return -1, err.Error()
}
return code, strings.TrimSpace(text)
}
if in.testNull {
c, t := runRCPT("", "postmaster@"+in.domain)
ok := c >= 200 && c < 300
ep.NullSenderAccepted = &ok
ep.NullSenderResponse = fmt.Sprintf("%d %s", c, t)
}
if in.testPostmaster {
from := "checker@" + in.heloName
c, t := runRCPT(from, "postmaster@"+in.domain)
ok := c >= 200 && c < 300
ep.PostmasterAccepted = &ok
ep.PostmasterResponse = fmt.Sprintf("%d %s", c, t)
}
if in.testOpenRelay && in.openRelayRcpt != "" {
from := "checker@" + in.heloName
c, t := runRCPT(from, in.openRelayRcpt)
ok := c >= 200 && c < 300
ep.OpenRelay = &ok
ep.OpenRelayResponse = fmt.Sprintf("%d %s", c, t)
ep.OpenRelayRecipient = in.openRelayRcpt
}
return ep
}
func splitMail(addr string) (domain, local string, ok bool) {
at := strings.LastIndex(addr, "@")
if at <= 0 || at == len(addr)-1 {
return "", "", false
}
return addr[at+1:], addr[:at], true
}
// isValidHostname rejects anything that could smuggle SMTP commands
// (CR, LF, spaces, angle brackets) or is otherwise not a plausible
// hostname. We use it on every user-supplied value that ends up
// concatenated into an SMTP command line.
func isValidHostname(s string) bool {
if s == "" || len(s) > 253 {
return false
}
for _, r := range s {
switch {
case r >= 'a' && r <= 'z',
r >= 'A' && r <= 'Z',
r >= '0' && r <= '9',
r == '.', r == '-':
continue
default:
return false
}
}
return true
}
// isValidMailbox accepts a conservative subset of RFC 5321 addr-spec:
// printable ASCII local-part with no SMTP metacharacters, followed by
// "@" and a valid hostname. Quoted local-parts are not allowed.
func isValidMailbox(s string) bool {
at := strings.LastIndex(s, "@")
if at <= 0 || at == len(s)-1 {
return false
}
local := s[:at]
if len(local) > 64 {
return false
}
for i := 0; i < len(local); i++ {
c := local[i]
if c <= 0x20 || c >= 0x7f {
return false
}
switch c {
case '<', '>', '(', ')', '[', ']', ',', ';', ':', '"', '\\', '@':
return false
}
}
return isValidHostname(s[at+1:])
}
func computeCoverage(data *SMTPData) {
if len(data.Endpoints) == 0 {
return
}
allSTARTTLS := true
allAcceptMail := true
for _, ep := range data.Endpoints {
if ep.TCPConnected {
data.Coverage.AnyReachable = true
if ep.IsIPv6 {
data.Coverage.HasIPv6 = true
} else {
data.Coverage.HasIPv4 = true
}
}
if ep.BannerReceived {
data.Coverage.AnyBanner = true
}
if ep.EHLOReceived {
data.Coverage.AnyEHLO = true
}
if ep.STARTTLSUpgraded {
data.Coverage.AnySTARTTLS = true
} else {
allSTARTTLS = false
}
// An endpoint "accepts mail" when the null-sender probe, if run,
// was accepted and the postmaster probe, if run, was accepted.
acc := true
if ep.NullSenderAccepted != nil && !*ep.NullSenderAccepted {
acc = false
}
if ep.PostmasterAccepted != nil && !*ep.PostmasterAccepted {
acc = false
}
if !ep.EHLOReceived {
acc = false
}
if !acc {
allAcceptMail = false
}
}
data.Coverage.AllSTARTTLS = allSTARTTLS
data.Coverage.AllAcceptMail = allAcceptMail
}