checker-alias/checker/collect.go

561 lines
17 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// This file is part of the happyDomain (R) project.
// Copyright (c) 2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
package checker
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Collect runs the alias testsuite and returns an *AliasData populated with
// findings, a resolution chain, and optional coexistence / DNSSEC observations.
func (p *aliasProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
owner, err := resolveOwner(opts)
if err != nil {
return nil, err
}
maxChain := sdk.GetIntOption(opts, "maxChainLength", 8)
minTTL := uint32(sdk.GetIntOption(opts, "minTargetTTL", 60))
requireTarget := sdk.GetBoolOption(opts, "requireResolvableTarget", true)
allowApexCNAME := sdk.GetBoolOption(opts, "allowApexCNAME", false)
recognizeApex := sdk.GetBoolOption(opts, "recognizeApexFlattening", true)
data := &AliasData{Owner: owner}
// 1. Find apex and authoritative servers.
apex, servers, err := findApex(ctx, owner)
if err != nil {
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_no_apex",
Severity: SeverityCrit,
Message: fmt.Sprintf("could not locate zone apex of %s: %v", owner, err),
Subject: owner,
Hint: "Check that the parent delegation exists and that the zone is published.",
})
return data, nil
}
data.Apex = apex
data.AuthServers = servers
data.OwnerIsApex = lowerFQDN(owner) == lowerFQDN(apex)
// 2. Detect DNAME substitutions from owner up to apex (exclusive of apex).
data.DNAMESubstitutions = collectDNAMEs(ctx, servers, owner, apex)
// 3. Walk the CNAME/DNAME chain.
chainCtx := &chainCtx{
data: data,
maxLen: maxChain,
minTTL: minTTL,
servers: servers,
apex: apex,
seenOwners: map[string]bool{},
recFallback: systemResolver(),
followTarget: requireTarget,
}
chainCtx.walk(ctx, owner)
// 4. Apex checks (flattening, CNAME-at-apex coexistence).
if data.OwnerIsApex {
checkApex(ctx, data, servers, apex, allowApexCNAME, recognizeApex)
}
// 5. Coexistence at owner (applies at any level, not just apex).
checkCoexistence(ctx, data, servers, owner, allowApexCNAME, recognizeApex)
// 6. DNSSEC checks.
checkDNSSEC(ctx, data, servers, apex, owner)
// 7. Chain-level validations (loops, length, TTL, target resolvability).
validateChain(data, maxChain, minTTL, requireTarget)
return data, nil
}
// resolveOwner derives the FQDN to check from the auto-filled options. The
// "service" option takes precedence (it carries a dns.CNAME whose owner is
// authoritative); otherwise we fall back to subdomain + domain_name.
func resolveOwner(opts sdk.CheckerOptions) (string, error) {
if svcMsg, ok := sdk.GetOption[serviceMessage](opts, "service"); ok && len(svcMsg.Service) > 0 {
var c cnameService
if err := json.Unmarshal(svcMsg.Service, &c); err == nil && c.Record != nil && c.Record.Hdr.Name != "" {
return lowerFQDN(c.Record.Hdr.Name), nil
}
}
parent, _ := sdk.GetOption[string](opts, "domain_name")
sub, _ := sdk.GetOption[string](opts, "subdomain")
if parent == "" {
return "", fmt.Errorf("missing 'domain_name' option")
}
parent = strings.TrimSuffix(parent, ".")
if sub == "" || sub == "@" {
return lowerFQDN(parent), nil
}
sub = strings.TrimSuffix(sub, ".")
return lowerFQDN(sub + "." + parent), nil
}
// chainCtx carries the mutable state of a chain walk.
type chainCtx struct {
data *AliasData
maxLen int
minTTL uint32
servers []string
apex string
seenOwners map[string]bool
recFallback string
followTarget bool
}
// walk follows CNAME/DNAME hops starting from name. It writes hops into
// data.Chain and may add findings.
func (c *chainCtx) walk(ctx context.Context, name string) {
current := lowerFQDN(name)
currentServers := c.servers
for i := 0; i <= c.maxLen+1; i++ {
if c.seenOwners[current] {
c.data.Findings = append(c.data.Findings, AliasFinding{
Code: "alias_loop",
Severity: SeverityCrit,
Message: fmt.Sprintf("chain loops back to %s", current),
Subject: current,
Hint: "Break the loop by pointing the last CNAME at an A/AAAA-bearing name.",
})
c.data.FinalTarget = current
return
}
c.seenOwners[current] = true
if i > c.maxLen {
c.data.Findings = append(c.data.Findings, AliasFinding{
Code: "alias_chain_too_long",
Severity: SeverityCrit,
Message: fmt.Sprintf("chain exceeds %d hops at %s; many resolvers will give up", c.maxLen, current),
Subject: current,
Hint: "Flatten intermediate CNAMEs so that the chain is at most a few hops long.",
})
c.data.FinalTarget = current
return
}
q := dns.Question{Name: current, Qtype: dns.TypeCNAME, Qclass: dns.ClassINET}
r, server, err := c.queryFor(ctx, current, currentServers, q)
if err != nil {
c.data.Findings = append(c.data.Findings, AliasFinding{
Code: "alias_query_failed",
Severity: SeverityWarn,
Message: fmt.Sprintf("CNAME query for %s failed: %v", current, err),
Subject: current,
})
c.data.FinalTarget = current
return
}
if r.Rcode != dns.RcodeSuccess {
c.data.Rcode = rcodeText(r.Rcode)
sev := SeverityCrit
c.data.Findings = append(c.data.Findings, AliasFinding{
Code: "alias_rcode",
Severity: sev,
Message: fmt.Sprintf("server answered %s for %s", c.data.Rcode, current),
Subject: current,
Hint: "Ensure the zone publishes the expected record; NXDOMAIN/SERVFAIL mid-chain breaks the alias.",
})
c.data.FinalTarget = current
return
}
cname, synthesizedFromDNAME, ttl := extractCNAME(r, current)
if cname == "" {
// No CNAME at this name: terminal hop, resolve A/AAAA.
c.data.Chain = append(c.data.Chain, ChainHop{
Owner: current,
Kind: KindTarget,
Server: server,
})
c.data.FinalTarget = current
c.resolveFinal(ctx, current, currentServers)
return
}
target := lowerFQDN(cname)
kind := KindCNAME
if synthesizedFromDNAME {
kind = KindDNAME
}
c.data.Chain = append(c.data.Chain, ChainHop{
Owner: current,
Kind: kind,
Target: target,
TTL: ttl,
Server: server,
Synthesized: synthesizedFromDNAME,
})
if ttl < c.minTTL {
c.data.Findings = append(c.data.Findings, AliasFinding{
Code: "alias_low_ttl",
Severity: SeverityWarn,
Message: fmt.Sprintf("hop %s → %s has TTL %ds (< %d)", current, target, ttl, c.minTTL),
Subject: current,
Hint: "Raise the CNAME TTL to improve cache efficiency (515 minutes is a common floor).",
})
}
// Re-evaluate servers for the next hop: if target leaves the apex,
// we need its own authoritative servers. Out-of-zone targets are
// resolved via the system resolver (recursive path).
if isSubdomain(target, c.apex) {
currentServers = c.servers
} else {
ns, err := resolveZoneNSAddrs(ctx, findZoneOf(ctx, target))
if err != nil || len(ns) == 0 {
currentServers = []string{c.recFallback}
} else {
currentServers = ns
}
}
current = target
}
}
// queryFor sends q, retrying via the recursive resolver if the authoritative
// set is empty (useful for foreign targets).
func (c *chainCtx) queryFor(ctx context.Context, name string, servers []string, q dns.Question) (*dns.Msg, string, error) {
if len(servers) == 0 {
r, err := recursiveExchange(ctx, c.recFallback, q)
return r, c.recFallback, err
}
return queryAtAuth(ctx, servers, q)
}
// extractCNAME returns the first CNAME target matched for owner, and reports
// whether it was synthesized from a DNAME present in the same response.
func extractCNAME(r *dns.Msg, owner string) (target string, fromDNAME bool, ttl uint32) {
for _, rr := range r.Answer {
if c, ok := rr.(*dns.CNAME); ok && strings.EqualFold(dns.Fqdn(c.Hdr.Name), dns.Fqdn(owner)) {
target = c.Target
ttl = c.Hdr.Ttl
break
}
}
if target == "" {
return "", false, 0
}
for _, rr := range r.Answer {
if _, ok := rr.(*dns.DNAME); ok {
fromDNAME = true
break
}
}
return
}
// resolveFinal fetches A/AAAA of the final target and records them.
func (c *chainCtx) resolveFinal(ctx context.Context, name string, servers []string) {
var r *dns.Msg
var err error
q := dns.Question{Name: dns.Fqdn(name), Qtype: dns.TypeA, Qclass: dns.ClassINET}
if len(servers) > 0 {
r, _, err = queryAtAuth(ctx, servers, q)
} else {
r, err = recursiveExchange(ctx, c.recFallback, q)
}
if err == nil && r != nil {
if r.Rcode != dns.RcodeSuccess {
c.data.Rcode = rcodeText(r.Rcode)
}
for _, rr := range r.Answer {
if a, ok := rr.(*dns.A); ok {
c.data.FinalA = append(c.data.FinalA, a.A.String())
}
}
}
q.Qtype = dns.TypeAAAA
if len(servers) > 0 {
r, _, err = queryAtAuth(ctx, servers, q)
} else {
r, err = recursiveExchange(ctx, c.recFallback, q)
}
if err == nil && r != nil {
for _, rr := range r.Answer {
if aaaa, ok := rr.(*dns.AAAA); ok {
c.data.FinalAAAA = append(c.data.FinalAAAA, aaaa.AAAA.String())
}
}
}
}
// findZoneOf walks the labels of name until the system resolver gives it an
// SOA; used to locate authoritative servers for foreign chain targets.
func findZoneOf(ctx context.Context, name string) string {
apex, _, err := findApex(ctx, name)
if err != nil {
return ""
}
return apex
}
// collectDNAMEs queries every label from owner up to (but excluding) apex for
// a DNAME record, returning any substitutions found.
func collectDNAMEs(ctx context.Context, servers []string, owner, apex string) []ChainHop {
var out []ChainHop
labels := dns.SplitDomainName(owner)
apexLabels := dns.SplitDomainName(apex)
stop := len(labels) - len(apexLabels)
if stop < 0 {
stop = 0
}
for i := 0; i < stop; i++ {
name := dns.Fqdn(strings.Join(labels[i:], "."))
q := dns.Question{Name: name, Qtype: dns.TypeDNAME, Qclass: dns.ClassINET}
r, server, err := queryAtAuth(ctx, servers, q)
if err != nil || r == nil || r.Rcode != dns.RcodeSuccess {
continue
}
for _, rr := range r.Answer {
if d, ok := rr.(*dns.DNAME); ok {
out = append(out, ChainHop{
Owner: lowerFQDN(d.Hdr.Name),
Kind: KindDNAME,
Target: lowerFQDN(d.Target),
TTL: d.Hdr.Ttl,
Server: server,
})
}
}
}
return out
}
// checkApex verifies that a CNAME at apex does not break SOA/NS, and
// detects ALIAS/ANAME provider-side flattening.
func checkApex(ctx context.Context, data *AliasData, servers []string, apex string, allowApexCNAME, recognizeApex bool) {
// Collect A/AAAA at apex.
hasA := false
for _, qt := range []uint16{dns.TypeA, dns.TypeAAAA} {
q := dns.Question{Name: apex, Qtype: qt, Qclass: dns.ClassINET}
r, _, err := queryAtAuth(ctx, servers, q)
if err != nil || r == nil {
continue
}
for _, rr := range r.Answer {
switch rr.(type) {
case *dns.A, *dns.AAAA:
hasA = true
}
}
}
// CNAME at apex?
hasCNAME := false
for _, h := range data.Chain {
if h.Kind == KindCNAME && lowerFQDN(h.Owner) == lowerFQDN(apex) {
hasCNAME = true
break
}
}
if hasCNAME {
sev := SeverityCrit
if allowApexCNAME {
sev = SeverityWarn
}
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_cname_at_apex",
Severity: sev,
Message: fmt.Sprintf("CNAME at apex %s conflicts with the SOA/NS records a zone apex must carry (RFC 1912 §2.4)", apex),
Subject: apex,
Hint: "Use the provider's ALIAS/ANAME flattening, an HTTP redirect, or move content to a sub-label such as www.",
})
}
if hasA && !hasCNAME {
// A present at apex alongside SOA/NS — classic ALIAS/ANAME flattening.
data.ApexFlattening = true
if recognizeApex {
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_apex_flattening",
Severity: SeverityInfo,
Message: fmt.Sprintf("apex %s serves A/AAAA directly (provider-side ALIAS/ANAME flattening)", apex),
Subject: apex,
Hint: "Keep the upstream target's TTL in mind: apex A/AAAA will only update as fast as the provider re-flattens.",
})
}
}
}
// checkCoexistence verifies that a CNAME at owner is the only record type
// present (RFC 1034 §3.6.2, RFC 2181 §10.1).
func checkCoexistence(ctx context.Context, data *AliasData, servers []string, owner string, allowApexCNAME, recognizeApex bool) {
hasCNAME := false
for _, h := range data.Chain {
if h.Kind == KindCNAME && lowerFQDN(h.Owner) == lowerFQDN(owner) {
hasCNAME = true
break
}
}
if !hasCNAME {
return
}
// Query a handful of common sibling types at owner.
siblings := []uint16{
dns.TypeA, dns.TypeAAAA, dns.TypeMX, dns.TypeTXT,
dns.TypeNS, dns.TypeSRV, dns.TypeCAA,
}
seen := map[string]uint32{}
for _, qt := range siblings {
q := dns.Question{Name: owner, Qtype: qt, Qclass: dns.ClassINET}
r, _, err := queryAtAuth(ctx, servers, q)
if err != nil || r == nil {
continue
}
// A synthesized CNAME from DNAME will be present in Answer for any
// type; only count answers whose owner matches and whose type is qt.
for _, rr := range r.Answer {
if rr.Header().Rrtype != qt {
continue
}
if !strings.EqualFold(dns.Fqdn(rr.Header().Name), dns.Fqdn(owner)) {
continue
}
seen[dns.TypeToString[qt]] = rr.Header().Ttl
break
}
}
// Apex with ALIAS/ANAME flattening is a known exception when requested.
isApex := lowerFQDN(owner) == lowerFQDN(data.Apex)
for t, ttl := range seen {
// A/AAAA at apex alongside a CNAME is impossible in a standard zone;
// a provider may still serve it through flattening. Still report it
// as critical — two different owners cannot legally exist.
if isApex && (t == "A" || t == "AAAA") && recognizeApex && data.ApexFlattening {
continue
}
sev := SeverityCrit
if isApex && !allowApexCNAME == false {
sev = SeverityWarn
}
data.Coexisting = append(data.Coexisting, CoexistingRRset{Type: t, TTL: ttl})
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_coexisting_rrset",
Severity: sev,
Message: fmt.Sprintf("%s and CNAME both exist at %s (RFC 1034 §3.6.2 / RFC 2181 §10.1)", t, owner),
Subject: owner,
Hint: "Remove the sibling record or move it under a different label; a name cannot simultaneously carry a CNAME and other data.",
})
}
}
// checkDNSSEC verifies that, if the zone is signed, the CNAME at owner is
// properly signed (RRSIG covers it).
func checkDNSSEC(ctx context.Context, data *AliasData, servers []string, apex, owner string) {
qk := dns.Question{Name: apex, Qtype: dns.TypeDNSKEY, Qclass: dns.ClassINET}
r, _, err := queryAtAuthTCP(ctx, servers, qk)
if err != nil || r == nil || r.Rcode != dns.RcodeSuccess {
return
}
signed := false
for _, rr := range r.Answer {
if _, ok := rr.(*dns.DNSKEY); ok {
signed = true
break
}
}
data.ZoneSigned = signed
if !signed {
return
}
// Query CNAME with DO; check for an RRSIG covering it.
q := dns.Question{Name: owner, Qtype: dns.TypeCNAME, Qclass: dns.ClassINET}
r, _, err = queryAtAuthTCP(ctx, servers, q)
if err != nil || r == nil {
return
}
sawCNAME := false
sawSig := false
for _, rr := range r.Answer {
switch v := rr.(type) {
case *dns.CNAME:
sawCNAME = true
case *dns.RRSIG:
if v.TypeCovered == dns.TypeCNAME {
sawSig = true
}
}
}
if sawCNAME {
data.CNAMESigned = sawSig
if !sawSig {
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_cname_not_signed",
Severity: SeverityCrit,
Message: fmt.Sprintf("zone %s is DNSSEC-signed but CNAME at %s has no RRSIG", apex, owner),
Subject: owner,
Hint: "Re-sign the zone or verify your signer covers the alias RRset; unsigned answers in a signed zone SERVFAIL at validating resolvers.",
})
}
}
}
// validateChain enforces global chain invariants.
func validateChain(data *AliasData, maxChain int, minTTL uint32, requireTarget bool) {
if len(data.Chain) == 0 {
return
}
// Target resolvability.
if last := data.Chain[len(data.Chain)-1]; last.Kind == KindTarget {
if len(data.FinalA) == 0 && len(data.FinalAAAA) == 0 {
sev := SeverityWarn
if requireTarget {
sev = SeverityCrit
}
rcode := data.Rcode
if rcode == "" {
rcode = "no A/AAAA"
}
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_target_unresolvable",
Severity: sev,
Message: fmt.Sprintf("final target %s does not resolve to an address (%s)", last.Owner, rcode),
Subject: last.Owner,
Hint: "Point the alias at a name that publishes at least one A or AAAA record, or fix the upstream zone.",
})
}
}
// Multiple CNAME/DNAME kinds with same owner (malformed zone).
seen := map[string]int{}
for _, h := range data.Chain {
if h.Kind == KindCNAME || h.Kind == KindDNAME {
seen[h.Owner]++
}
}
for o, n := range seen {
if n > 1 {
data.Findings = append(data.Findings, AliasFinding{
Code: "alias_multiple_records",
Severity: SeverityCrit,
Message: fmt.Sprintf("%s carries %d CNAME/DNAME records in the chain; only one is legal per owner", o, n),
Subject: o,
Hint: "Keep a single CNAME per name; remove duplicates at the authoritative zone.",
})
}
}
}