checker-email-autoconfig/checker/collect.go

450 lines
13 KiB
Go

package checker
import (
"context"
"crypto/tls"
"encoding/xml"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/url"
"strings"
"sync"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Real autoconfig/autodiscover documents are tiny; anything bigger is
// misconfigured or hostile.
const maxBodyBytes = 256 * 1024
func (p *autoconfigProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
domain, _ := sdk.GetOption[string](opts, "domain_name")
domain = strings.TrimSuffix(strings.TrimSpace(domain), ".")
if domain == "" {
return nil, fmt.Errorf("domain_name is required")
}
domain, err := validateDomain(domain)
if err != nil {
return nil, err
}
localPart, _ := sdk.GetOption[string](opts, "probeEmail")
if localPart == "" {
localPart = "test"
}
email := localPart + "@" + domain
httpTimeout := time.Duration(sdk.GetFloatOption(opts, "httpTimeout", 8)) * time.Second
if httpTimeout <= 0 {
httpTimeout = 8 * time.Second
}
ispdbURL, _ := sdk.GetOption[string](opts, "ispdbURL")
if ispdbURL == "" {
ispdbURL = "https://autoconfig.thunderbird.net/v1.1/"
}
if !strings.HasSuffix(ispdbURL, "/") {
ispdbURL += "/"
}
ispdbParsed, err := url.Parse(ispdbURL)
if err != nil || (ispdbParsed.Scheme != "http" && ispdbParsed.Scheme != "https") || ispdbParsed.Host == "" {
return nil, fmt.Errorf("invalid ispdbURL: must be an absolute http(s) URL")
}
if _, err := validateDomain(ispdbParsed.Hostname()); err != nil {
return nil, fmt.Errorf("invalid ispdbURL host: %w", err)
}
userAgent, _ := sdk.GetOption[string](opts, "userAgent")
if userAgent == "" {
userAgent = "happyDomain-autoconfig/1.0 (+https://happydomain.org)"
}
tryISPDB := sdk.GetBoolOption(opts, "tryISPDB", true)
tryHTTPAutoconfig := sdk.GetBoolOption(opts, "tryHTTPAutoconfig", false)
tryAutodiscover := sdk.GetBoolOption(opts, "tryAutodiscoverPost", true)
client := newHTTPClient(httpTimeout)
data := &Data{
Domain: domain,
Email: email,
CollectedAt: time.Now().UTC(),
}
// SRV and Autodiscover are independent of MX; run them in background.
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
data.SRV = collectSRV(ctx, domain, httpTimeout)
}()
if tryAutodiscover {
wg.Add(1)
go func() {
defer wg.Done()
data.Autodiscover = collectAutodiscover(ctx, client, userAgent, domain, email)
for _, p := range data.Autodiscover {
if p.Parsed != nil {
data.AutodiscoverResult = p.Parsed
break
}
}
}()
}
// MX lookup: result feeds into collectAutoconfig below.
mxResolveCtx, cancel := context.WithTimeout(ctx, httpTimeout)
mx, mxErr := net.DefaultResolver.LookupMX(mxResolveCtx, domain)
cancel()
if mxErr != nil {
data.MXError = mxErr.Error()
}
for _, r := range mx {
data.MX = append(data.MX, MXRecord{
Host: strings.TrimSuffix(r.Host, "."),
Preference: r.Pref,
})
}
// Runs synchronously: needs MX, but overlaps the SRV/Autodiscover goroutines.
data.Autoconfig = collectAutoconfig(ctx, client, userAgent, domain, email, data.MX, tryISPDB, tryHTTPAutoconfig, ispdbURL)
for _, p := range data.Autoconfig {
if p.Parsed != nil {
data.ClientConfig = p.Parsed
data.ClientConfigSource = p.Source
break
}
}
wg.Wait()
return data, nil
}
func newHTTPClient(timeout time.Duration) *http.Client {
// Keep cert validation ON; failures are surfaced as soft probe errors
// so the rule engine can flag them.
tr := &http.Transport{
Proxy: http.ProxyFromEnvironment,
MaxIdleConns: 8,
IdleConnTimeout: 30 * time.Second,
TLSHandshakeTimeout: timeout,
ResponseHeaderTimeout: timeout,
ExpectContinueTimeout: 1 * time.Second,
TLSClientConfig: &tls.Config{
MinVersion: tls.VersionTLS12,
},
}
return &http.Client{
Transport: tr,
Timeout: timeout,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 5 {
return http.ErrUseLastResponse
}
return nil
},
}
}
func fetch(ctx context.Context, client *http.Client, userAgent, method, rawURL string, body io.Reader, contentType string) (ProbeResult, []byte) {
res := ProbeResult{URL: rawURL, Method: method}
start := time.Now()
req, err := http.NewRequestWithContext(ctx, method, rawURL, body)
if err != nil {
res.Error = err.Error()
res.DurationMs = time.Since(start).Milliseconds()
return res, nil
}
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Accept", "application/xml, text/xml, */*;q=0.8")
if contentType != "" {
req.Header.Set("Content-Type", contentType)
}
resp, err := client.Do(req)
res.DurationMs = time.Since(start).Milliseconds()
if err != nil {
res.Error = err.Error()
var tlsErr *tls.CertificateVerificationError
if errors.As(err, &tlsErr) {
res.TLSError = err.Error()
}
return res, nil
}
defer resp.Body.Close()
res.StatusCode = resp.StatusCode
res.ContentType = resp.Header.Get("Content-Type")
res.FinalURL = resp.Request.URL.String()
res.Redirected = res.FinalURL != rawURL
if resp.TLS != nil {
res.TLSServerName = resp.TLS.ServerName
if len(resp.TLS.PeerCertificates) > 0 {
leaf := resp.TLS.PeerCertificates[0]
res.TLSSubject = leaf.Subject.CommonName
res.TLSIssuer = leaf.Issuer.CommonName
res.TLSNotAfter = leaf.NotAfter.UTC().Format(time.RFC3339)
}
}
limit := io.LimitReader(resp.Body, maxBodyBytes+1)
raw, rerr := io.ReadAll(limit)
if rerr != nil {
res.Error = rerr.Error()
return res, nil
}
res.BodyBytes = len(raw)
if len(raw) > maxBodyBytes {
res.Error = fmt.Sprintf("response truncated at %d bytes", maxBodyBytes)
raw = raw[:maxBodyBytes]
}
return res, raw
}
func collectAutoconfig(ctx context.Context, client *http.Client, userAgent, domain, email string, mx []MXRecord, tryISPDB, tryHTTPAutoconfig bool, ispdbURL string) []AutoconfigProbe {
encoded := url.QueryEscape(email)
type target struct {
source string
url string
}
targets := []target{
{"autoconfig", fmt.Sprintf("https://autoconfig.%s/mail/config-v1.1.xml?emailaddress=%s", domain, encoded)},
{"wellknown", fmt.Sprintf("https://%s/.well-known/autoconfig/mail/config-v1.1.xml?emailaddress=%s", domain, encoded)},
}
if tryHTTPAutoconfig {
targets = append(targets, target{"http-autoconfig", fmt.Sprintf("http://autoconfig.%s/mail/config-v1.1.xml?emailaddress=%s", domain, encoded)})
}
if tryISPDB {
targets = append(targets, target{"ispdb", ispdbURL + domain})
}
// MX fallback catches gmail/MS365-hosted domains. Bucksch suggests
// iterating every MX; the lowest-preference one is enough in practice.
if mxParent := pickMXParent(mx); mxParent != "" && mxParent != domain {
targets = append(targets, target{"mx-autoconfig", fmt.Sprintf("https://autoconfig.%s/mail/config-v1.1.xml?emailaddress=%s", mxParent, encoded)})
if tryISPDB {
targets = append(targets, target{"mx-ispdb", ispdbURL + mxParent})
}
}
probes := make([]AutoconfigProbe, len(targets))
var wg sync.WaitGroup
for i, t := range targets {
wg.Add(1)
go func(i int, source, rawURL string) {
defer wg.Done()
probes[i] = runAutoconfigProbe(ctx, client, userAgent, source, rawURL)
}(i, t.source, t.url)
}
wg.Wait()
return probes
}
func runAutoconfigProbe(ctx context.Context, client *http.Client, userAgent, source, rawURL string) AutoconfigProbe {
res, body := fetch(ctx, client, userAgent, http.MethodGet, rawURL, nil, "")
probe := AutoconfigProbe{Source: source, Result: res}
if res.Error != "" || res.StatusCode < 200 || res.StatusCode >= 300 || len(body) == 0 {
return probe
}
cfg, err := parseClientConfig(body)
if err != nil {
probe.Result.ParseError = err.Error()
return probe
}
probe.Parsed = cfg
return probe
}
// validateDomain rejects anything that could escape URL interpolation
// (path/query injection, IP literals). IP-range filtering is left to the
// network layer.
func validateDomain(domain string) (string, error) {
domain = strings.ToLower(domain)
if len(domain) == 0 || len(domain) > 253 {
return "", fmt.Errorf("invalid domain name: length must be 1..253")
}
if net.ParseIP(domain) != nil {
return "", fmt.Errorf("invalid domain name: IP literals are not accepted")
}
labels := strings.Split(domain, ".")
if len(labels) < 2 {
return "", fmt.Errorf("invalid domain name: must contain at least one dot")
}
for _, label := range labels {
if len(label) == 0 || len(label) > 63 {
return "", fmt.Errorf("invalid domain name: label length must be 1..63")
}
if label[0] == '-' || label[len(label)-1] == '-' {
return "", fmt.Errorf("invalid domain name: label %q cannot start or end with '-'", label)
}
for i := 0; i < len(label); i++ {
c := label[i]
if !((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-') {
return "", fmt.Errorf("invalid domain name: label %q contains forbidden character", label)
}
}
}
return domain, nil
}
// pickMXParent returns the parent domain of the lowest-preference MX, or
// empty when no suitable MX is present.
func pickMXParent(mx []MXRecord) string {
if len(mx) == 0 {
return ""
}
best := mx[0]
for _, r := range mx[1:] {
if r.Preference < best.Preference {
best = r
}
}
return registrableDomain(best.Host)
}
// registrableDomain approximates a PSL lookup with last-two-labels (or
// three when the SLD looks like a ccTLD second level, e.g. co.uk). Good
// enough for the gmail / MS365 MX-fallback case we actually care about.
func registrableDomain(host string) string {
host = strings.TrimSuffix(strings.ToLower(host), ".")
parts := strings.Split(host, ".")
if len(parts) < 2 {
return host
}
n := 2
// Very rough country-code second-level heuristic.
if len(parts) >= 3 && len(parts[len(parts)-2]) <= 3 && len(parts[len(parts)-1]) == 2 {
n = 3
}
if len(parts) < n {
return host
}
return strings.Join(parts[len(parts)-n:], ".")
}
// ── RFC 6186 SRV ─────────────────────────────────────────────────────────────
var rfc6186Services = []string{
"_imaps._tcp",
"_imap._tcp",
"_pop3s._tcp",
"_pop3._tcp",
"_submissions._tcp",
"_submission._tcp",
"_autodiscover._tcp",
}
func collectSRV(ctx context.Context, domain string, timeout time.Duration) []SRVRecord {
type indexedResult struct {
idx int
recs []SRVRecord
}
ch := make(chan indexedResult, len(rfc6186Services))
for i, svc := range rfc6186Services {
go func(idx int, svc string) {
c, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
_, addrs, err := net.DefaultResolver.LookupSRV(c, "", "", svc+"."+domain)
if err != nil {
ch <- indexedResult{idx, nil}
return
}
var recs []SRVRecord
for _, a := range addrs {
target := strings.TrimSuffix(a.Target, ".")
rec := SRVRecord{
Service: svc,
Target: target,
Port: a.Port,
Priority: a.Priority,
Weight: a.Weight,
}
// RFC 2782 "service not provided at this domain" sentinel.
if target == "" || target == "." {
rec.Skip = true
}
recs = append(recs, rec)
}
ch <- indexedResult{idx, recs}
}(i, svc)
}
results := make([][]SRVRecord, len(rfc6186Services))
for range rfc6186Services {
r := <-ch
results[r.idx] = r.recs
}
var out []SRVRecord
for _, recs := range results {
out = append(out, recs...)
}
return out
}
// ── Microsoft Autodiscover (POX) ─────────────────────────────────────────────
const autodiscoverRequestTemplate = `<?xml version="1.0" encoding="utf-8"?>
<Autodiscover xmlns="http://schemas.microsoft.com/exchange/autodiscover/outlook/requestschema/2006">
<Request>
<EMailAddress>%s</EMailAddress>
<AcceptableResponseSchema>http://schemas.microsoft.com/exchange/autodiscover/outlook/responseschema/2006a</AcceptableResponseSchema>
</Request>
</Autodiscover>`
func collectAutodiscover(ctx context.Context, client *http.Client, userAgent, domain, email string) []AutodiscoverProbe {
body := fmt.Sprintf(autodiscoverRequestTemplate, xmlEscape(email))
type target struct {
source string
url string
}
targets := []target{
{"subdomain", fmt.Sprintf("https://autodiscover.%s/autodiscover/autodiscover.xml", domain)},
{"root", fmt.Sprintf("https://%s/autodiscover/autodiscover.xml", domain)},
}
probes := make([]AutodiscoverProbe, len(targets))
var wg sync.WaitGroup
for i, t := range targets {
wg.Add(1)
go func(i int, source, rawURL string) {
defer wg.Done()
probes[i] = runAutodiscoverProbe(ctx, client, userAgent, source, rawURL, body)
}(i, t.source, t.url)
}
wg.Wait()
return probes
}
func runAutodiscoverProbe(ctx context.Context, client *http.Client, userAgent, source, rawURL, requestBody string) AutodiscoverProbe {
res, body := fetch(ctx, client, userAgent, http.MethodPost, rawURL, strings.NewReader(requestBody), "text/xml; charset=utf-8")
probe := AutodiscoverProbe{Source: source, Result: res}
if res.Error != "" || res.StatusCode < 200 || res.StatusCode >= 300 || len(body) == 0 {
return probe
}
parsed, err := parseAutodiscoverResponse(body)
if err != nil {
probe.Result.ParseError = err.Error()
return probe
}
probe.Parsed = parsed
return probe
}
func xmlEscape(s string) string {
var b strings.Builder
_ = xml.EscapeText(&b, []byte(s))
return b.String()
}