checker-alias/checker/collect.go
Pierre-Olivier Mercier c5c13960d5
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
checker: add dname_coexistence rule and refactor sibling probing
Extract querySiblings from observeCoexistence so both CNAME and DNAME
coexistence checks share the same parallel RRset scan. Add
observeDNAMECoexistence (called from Collect) that populates
AliasData.DNAMECoexistence for each DNAME node in DNAMESubstitutions.
Add the dname_coexistence rule (RFC 6672 §2.3) that flags any sibling
RRsets at a DNAME owner as CRIT, with matching tests.
2026-05-16 21:36:20 +08:00

526 lines
14 KiB
Go

package checker
import (
"context"
"encoding/json"
"fmt"
"strings"
"sync"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func (p *aliasProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
owner, err := resolveOwner(opts)
if err != nil {
return nil, err
}
maxChain := sdk.GetIntOption(opts, "maxChainLength", defaultMaxChainLength)
data := &AliasData{Owner: owner}
resolver := systemResolver()
apex, servers, err := findApex(ctx, owner, resolver)
if err != nil {
data.ApexLookupError = err.Error()
return data, nil
}
data.Apex = apex
data.AuthServers = servers
data.OwnerIsApex = lowerFQDN(owner) == lowerFQDN(apex)
data.DNAMESubstitutions = collectDNAMEs(ctx, servers, owner, apex)
chainCtx := &chainCtx{
data: data,
maxLen: maxChain,
servers: servers,
apex: apex,
seenOwners: map[string]bool{},
recFallback: resolver,
}
chainCtx.walk(ctx, owner)
if data.OwnerIsApex {
observeApex(ctx, data, servers, apex)
}
observeCoexistence(ctx, data, servers, owner)
observeDNAMECoexistence(ctx, data, servers)
observeDNSSEC(ctx, data, servers, apex, owner)
return data, nil
}
// resolveOwner prefers the "service" option because its dns.CNAME owner is
// authoritative; subdomain + domain_name is the fallback for ad-hoc forms.
func resolveOwner(opts sdk.CheckerOptions) (string, error) {
parent, _ := sdk.GetOption[string](opts, "domain_name")
parent = strings.TrimSuffix(parent, ".")
if svcMsg, ok := sdk.GetOption[serviceMessage](opts, "service"); ok && len(svcMsg.Service) > 0 {
var c cnameService
if err := json.Unmarshal(svcMsg.Service, &c); err == nil && c.Record != nil {
// svcMsg.Domain holds the subdomain (relative to apex); the
// record's Hdr.Name is relative to that mount point. Build the
// origin first, then join the record name into it.
origin := sdk.JoinRelative(strings.TrimSuffix(svcMsg.Domain, "."), parent)
return lowerFQDN(sdk.JoinRelative(c.Record.Hdr.Name, origin)), nil
}
}
sub, _ := sdk.GetOption[string](opts, "subdomain")
if parent == "" {
return "", fmt.Errorf("missing 'domain_name' option")
}
if sub == "" || sub == "@" {
return lowerFQDN(parent), nil
}
sub = strings.TrimSuffix(sub, ".")
return lowerFQDN(sub + "." + parent), nil
}
type chainCtx struct {
data *AliasData
maxLen int
servers []string
apex string
seenOwners map[string]bool
recFallback string
}
func (c *chainCtx) walk(ctx context.Context, name string) {
current := lowerFQDN(name)
currentServers := c.servers
currentZone := c.apex
for i := 0; i <= c.maxLen+1; i++ {
if c.seenOwners[current] {
c.data.ChainTerminated = ChainTermination{
Reason: TermLoop,
Subject: current,
Detail: fmt.Sprintf("chain loops back to %s", current),
}
c.data.FinalTarget = current
return
}
c.seenOwners[current] = true
if i > c.maxLen {
c.data.ChainTerminated = ChainTermination{
Reason: TermTooLong,
Subject: current,
Detail: fmt.Sprintf("chain exceeds %d hops at %s", c.maxLen, current),
}
c.data.FinalTarget = current
return
}
q := dns.Question{Name: current, Qtype: dns.TypeCNAME, Qclass: dns.ClassINET}
r, server, err := c.queryFor(ctx, currentServers, q)
if err != nil {
c.data.ChainTerminated = ChainTermination{
Reason: TermQueryErr,
Subject: current,
Detail: err.Error(),
}
c.data.FinalTarget = current
return
}
if r.Rcode != dns.RcodeSuccess {
rcode := rcodeText(r.Rcode)
c.data.ChainTerminated = ChainTermination{
Reason: TermRcode,
Subject: current,
Rcode: rcode,
Detail: fmt.Sprintf("server answered %s for %s", rcode, current),
}
c.data.FinalTarget = current
return
}
cname, synthesizedFromDNAME, ttl := extractCNAME(r, current)
if cname == "" {
// A NOERROR with NS in Authority is a referral to a child zone:
// re-anchor on that zone and re-query before declaring a target.
if isReferral(r, current) {
zone, ns, zerr := c.reanchor(ctx, current)
if zerr == nil && len(ns) > 0 && zone != currentZone {
currentZone = zone
currentServers = ns
continue
}
}
c.data.Chain = append(c.data.Chain, ChainHop{
Owner: current,
Kind: KindTarget,
Server: server,
})
c.data.FinalTarget = current
c.data.ChainTerminated = ChainTermination{Reason: TermOK}
c.resolveFinal(ctx, current, currentServers)
return
}
if current == c.data.Owner && !synthesizedFromDNAME {
c.data.OwnerHasCNAME = true
}
target := lowerFQDN(cname)
kind := KindCNAME
if synthesizedFromDNAME {
kind = KindDNAME
}
c.data.Chain = append(c.data.Chain, ChainHop{
Owner: current,
Kind: kind,
Target: target,
TTL: ttl,
Server: server,
Synthesized: synthesizedFromDNAME,
})
// Re-anchor for the next hop. Even within the original apex, the
// target may live in a delegated child zone whose CNAMEs are not
// answered by the parent's auth set.
zone, ns, zerr := c.reanchor(ctx, target)
if zerr != nil {
c.data.ChainTerminated = ChainTermination{
Reason: TermQueryErr,
Subject: target,
Detail: fmt.Sprintf("re-anchor for %s failed: %v", target, zerr),
}
c.data.FinalTarget = target
return
}
if len(ns) == 0 {
currentServers = []string{c.recFallback}
} else {
currentServers = ns
}
currentZone = zone
current = target
}
}
// reanchor finds the apex of name and resolves its NS addresses. Errors are
// returned so the caller can record them rather than masking with the resolver.
func (c *chainCtx) reanchor(ctx context.Context, name string) (string, []string, error) {
zone, _, err := findApex(ctx, name, c.recFallback)
if err != nil {
return "", nil, err
}
ns, err := resolveZoneNSAddrs(ctx, zone)
if err != nil {
return zone, nil, err
}
return zone, ns, nil
}
// isReferral detects "NOERROR + no Answer for owner + NS in Authority": the
// shape of a delegation response from a parent auth.
func isReferral(r *dns.Msg, owner string) bool {
if r == nil || r.Rcode != dns.RcodeSuccess || len(r.Answer) > 0 {
return false
}
target := lowerFQDN(owner)
for _, rr := range r.Ns {
if ns, ok := rr.(*dns.NS); ok {
zone := lowerFQDN(ns.Hdr.Name)
if target == zone || strings.HasSuffix(target, "."+zone) {
return true
}
}
}
return false
}
func (c *chainCtx) queryFor(ctx context.Context, servers []string, q dns.Question) (*dns.Msg, string, error) {
if len(servers) == 0 {
r, err := recursiveExchange(ctx, c.recFallback, q)
return r, c.recFallback, err
}
return queryAtAuth(ctx, "", servers, q, false)
}
// extractCNAME also reports DNAME synthesis so the walker can tag the hop:
// a synthesized CNAME is not itself a zone-published CNAME.
func extractCNAME(r *dns.Msg, owner string) (target string, fromDNAME bool, ttl uint32) {
for _, rr := range r.Answer {
if c, ok := rr.(*dns.CNAME); ok && strings.EqualFold(dns.Fqdn(c.Hdr.Name), dns.Fqdn(owner)) {
target = c.Target
ttl = c.Hdr.Ttl
break
}
}
if target == "" {
return "", false, 0
}
for _, rr := range r.Answer {
if _, ok := rr.(*dns.DNAME); ok {
fromDNAME = true
break
}
}
return
}
func (c *chainCtx) resolveFinal(ctx context.Context, name string, servers []string) {
type result struct {
addrs []string
rcode string
}
query := func(qtype uint16) result {
q := dns.Question{Name: dns.Fqdn(name), Qtype: qtype, Qclass: dns.ClassINET}
var (
r *dns.Msg
err error
)
if len(servers) > 0 {
r, _, err = queryAtAuth(ctx, "", servers, q, false)
} else {
r, err = recursiveExchange(ctx, c.recFallback, q)
}
if err != nil || r == nil {
return result{}
}
var res result
if r.Rcode != dns.RcodeSuccess {
res.rcode = rcodeText(r.Rcode)
}
for _, rr := range r.Answer {
switch v := rr.(type) {
case *dns.A:
if qtype == dns.TypeA {
res.addrs = append(res.addrs, v.A.String())
}
case *dns.AAAA:
if qtype == dns.TypeAAAA {
res.addrs = append(res.addrs, v.AAAA.String())
}
}
}
return res
}
var wg sync.WaitGroup
var aRes, aaaaRes result
wg.Add(2)
go func() { defer wg.Done(); aRes = query(dns.TypeA) }()
go func() { defer wg.Done(); aaaaRes = query(dns.TypeAAAA) }()
wg.Wait()
c.data.FinalA = append(c.data.FinalA, aRes.addrs...)
c.data.FinalAAAA = append(c.data.FinalAAAA, aaaaRes.addrs...)
// Surface either rcode; A wins when both fail because A is the more common
// resolver-driven lookup and operators usually act on it first.
switch {
case aRes.rcode != "":
c.data.FinalRcode = aRes.rcode
case aaaaRes.rcode != "":
c.data.FinalRcode = aaaaRes.rcode
}
}
func collectDNAMEs(ctx context.Context, servers []string, owner, apex string) []ChainHop {
labels := dns.SplitDomainName(owner)
apexLabels := dns.SplitDomainName(apex)
stop := max(len(labels)-len(apexLabels), 0)
results := make([][]ChainHop, stop)
var wg sync.WaitGroup
wg.Add(stop)
for i := range stop {
go func() {
defer wg.Done()
name := dns.Fqdn(strings.Join(labels[i:], "."))
q := dns.Question{Name: name, Qtype: dns.TypeDNAME, Qclass: dns.ClassINET}
r, server, err := queryAtAuth(ctx, "", servers, q, false)
if err != nil || r == nil || r.Rcode != dns.RcodeSuccess {
return
}
for _, rr := range r.Answer {
if d, ok := rr.(*dns.DNAME); ok {
results[i] = append(results[i], ChainHop{
Owner: lowerFQDN(d.Hdr.Name),
Kind: KindDNAME,
Target: lowerFQDN(d.Target),
TTL: d.Hdr.Ttl,
Server: server,
})
}
}
}()
}
wg.Wait()
var out []ChainHop
for _, hops := range results {
out = append(out, hops...)
}
return out
}
func observeApex(ctx context.Context, data *AliasData, servers []string, apex string) {
hasRR := func(qtype uint16) bool {
q := dns.Question{Name: apex, Qtype: qtype, Qclass: dns.ClassINET}
r, _, err := queryAtAuth(ctx, "", servers, q, false)
if err != nil || r == nil {
return false
}
for _, rr := range r.Answer {
if rr.Header().Rrtype == qtype {
return true
}
}
return false
}
var hasA, hasAAAA bool
var wg sync.WaitGroup
wg.Add(2)
go func() { defer wg.Done(); hasA = hasRR(dns.TypeA) }()
go func() { defer wg.Done(); hasAAAA = hasRR(dns.TypeAAAA) }()
wg.Wait()
data.ApexHasA = hasA
data.ApexHasAAAA = hasAAAA
for _, h := range data.Chain {
if h.Kind == KindCNAME && h.Owner == lowerFQDN(apex) {
data.ApexHasCNAME = true
break
}
}
if (hasA || hasAAAA) && !data.ApexHasCNAME {
data.ApexFlattening = true
}
}
// querySiblings returns RRsets of common types that sit alongside a CNAME or DNAME at owner.
// Filter on owner+type: a DNAME-synthesized CNAME would otherwise count as a sibling.
func querySiblings(ctx context.Context, servers []string, owner string) []CoexistingRRset {
candidates := []uint16{
dns.TypeA, dns.TypeAAAA, dns.TypeMX, dns.TypeTXT,
dns.TypeNS, dns.TypeSRV, dns.TypeCAA,
}
seen := map[string]uint32{}
var mu sync.Mutex
var wg sync.WaitGroup
wg.Add(len(candidates))
for _, qt := range candidates {
go func() {
defer wg.Done()
q := dns.Question{Name: owner, Qtype: qt, Qclass: dns.ClassINET}
r, _, err := queryAtAuth(ctx, "", servers, q, false)
if err != nil || r == nil {
return
}
for _, rr := range r.Answer {
if rr.Header().Rrtype != qt {
continue
}
if !strings.EqualFold(dns.Fqdn(rr.Header().Name), dns.Fqdn(owner)) {
continue
}
mu.Lock()
seen[dns.TypeToString[qt]] = rr.Header().Ttl
mu.Unlock()
break
}
}()
}
wg.Wait()
var out []CoexistingRRset
for t, ttl := range seen {
out = append(out, CoexistingRRset{Type: t, TTL: ttl})
}
return out
}
func observeCoexistence(ctx context.Context, data *AliasData, servers []string, owner string) {
if !data.OwnerHasCNAME {
return
}
data.Coexisting = querySiblings(ctx, servers, owner)
}
func observeDNAMECoexistence(ctx context.Context, data *AliasData, servers []string) {
if len(data.DNAMESubstitutions) == 0 {
return
}
results := make(map[string][]CoexistingRRset, len(data.DNAMESubstitutions))
var mu sync.Mutex
var wg sync.WaitGroup
wg.Add(len(data.DNAMESubstitutions))
for _, hop := range data.DNAMESubstitutions {
go func() {
defer wg.Done()
siblings := querySiblings(ctx, servers, hop.Owner)
if len(siblings) > 0 {
mu.Lock()
results[hop.Owner] = siblings
mu.Unlock()
}
}()
}
wg.Wait()
if len(results) > 0 {
data.DNAMECoexistence = results
}
}
func observeDNSSEC(ctx context.Context, data *AliasData, servers []string, apex, owner string) {
qk := dns.Question{Name: apex, Qtype: dns.TypeDNSKEY, Qclass: dns.ClassINET}
r, _, err := queryAtAuth(ctx, "", servers, qk, true)
// DNSKEY responses can exceed the UDP buffer; retry over TCP on truncation.
if err == nil && r != nil && r.Truncated {
r, _, err = queryAtAuth(ctx, "tcp", servers, qk, true)
}
if err != nil || r == nil || r.Rcode != dns.RcodeSuccess {
return
}
signed := false
for _, rr := range r.Answer {
if _, ok := rr.(*dns.DNSKEY); ok {
signed = true
break
}
}
data.ZoneSigned = signed
if !signed {
return
}
q := dns.Question{Name: owner, Qtype: dns.TypeCNAME, Qclass: dns.ClassINET}
r, _, err = queryAtAuth(ctx, "", servers, q, true)
if err == nil && r != nil && r.Truncated {
r, _, err = queryAtAuth(ctx, "tcp", servers, q, true)
}
if err != nil || r == nil {
return
}
sawCNAME := false
sawSig := false
for _, rr := range r.Answer {
switch v := rr.(type) {
case *dns.CNAME:
sawCNAME = true
case *dns.RRSIG:
if v.TypeCovered == dns.TypeCNAME {
sawSig = true
}
}
}
if sawCNAME {
data.CNAMESigCheckDone = true
data.CNAMESigned = sawSig
}
}