checker-resolver-propagation/checker/rules_consensus.go

245 lines
8.1 KiB
Go

package checker
import (
"context"
"fmt"
"sort"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// consensusRule emits one state per RRset summarising how much of the probed
// resolver set agrees on its answer. It covers the "partial propagation"
// case (several distinct NOERROR signatures observed).
type consensusRule struct{}
func (r *consensusRule) Name() string { return "resolver_propagation.consensus" }
func (r *consensusRule) Description() string {
return "Checks that public resolvers agree on a single answer for each probed RRset."
}
func (r *consensusRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Resolvers) == 0 || len(data.RRsets) == 0 {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.consensus.skipped",
Message: "no resolver probes available"}}
}
keys := sortedRRsetKeys(data)
var states []sdk.CheckState
for _, key := range keys {
v := data.RRsets[key]
unfilteredNOERRORSigs := map[string]bool{}
for _, g := range v.Groups {
if g.Rcode != "NOERROR" {
continue
}
for _, rid := range g.Resolvers {
rv := data.Resolvers[rid]
if rv != nil && !rv.Filtered {
unfilteredNOERRORSigs[g.Signature] = true
break
}
}
}
switch {
case v.ConsensusSig == "" && len(v.Groups) == 0:
states = append(states, infoState("resolver_propagation.consensus.no_data", key,
fmt.Sprintf("no resolver returned a usable answer for %s", key)))
case len(unfilteredNOERRORSigs) > 1:
states = append(states, warnState(CodePartialPropagation, key,
fmt.Sprintf("%d distinct answers seen across public resolvers for %s, change is mid-propagation",
len(unfilteredNOERRORSigs), key)))
default:
states = append(states, sdk.CheckState{
Status: sdk.StatusOK,
Code: "resolver_propagation.consensus.ok",
Subject: key,
Message: fmt.Sprintf("all %d probed resolver(s) agree on %s", len(v.Agreeing), key),
})
}
}
return states
}
// authoritativeMatchRule checks the consensus against the answer served by
// the zone's own authoritative servers.
type authoritativeMatchRule struct{}
func (r *authoritativeMatchRule) Name() string { return "resolver_propagation.matches_authoritative" }
func (r *authoritativeMatchRule) Description() string {
return "Checks that the public consensus matches the answer served by the zone's authoritative nameservers."
}
func (r *authoritativeMatchRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
anyExpected := false
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
if v.Expected == "" {
continue
}
anyExpected = true
switch {
case v.ConsensusSig == "":
states = append(states, critState("resolver_propagation.matches_authoritative.no_consensus", key,
fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known)", key)))
case !v.MatchesExpected:
states = append(states, critState(CodeAnswerDrift, key,
fmt.Sprintf("consensus of public resolvers for %s differs from the authoritative answer, wait for TTL expiry or force a flush", key)))
default:
states = append(states, sdk.CheckState{
Status: sdk.StatusOK, Code: "resolver_propagation.matches_authoritative.ok", Subject: key,
Message: fmt.Sprintf("public consensus for %s matches the authoritative answer", key),
})
}
}
if !anyExpected {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.matches_authoritative.skipped",
Message: "authoritative nameservers were unreachable; cannot compare consensus to ground truth"}}
}
return states
}
// nxdomainRule flags RRsets returning NXDOMAIN on some (but not all) resolvers.
type nxdomainRule struct{}
func (r *nxdomainRule) Name() string { return "resolver_propagation.nxdomain" }
func (r *nxdomainRule) Description() string {
return "Flags RRsets for which some resolvers return NXDOMAIN while others return NOERROR."
}
func (r *nxdomainRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
var nxList []string
for _, g := range v.Groups {
if g.Rcode == "NXDOMAIN" {
nxList = append(nxList, g.Resolvers...)
}
}
if len(nxList) > 0 && len(nxList) < len(data.Resolvers) {
states = append(states, critState(CodeUnexpectedNXDOMAIN, key,
fmt.Sprintf("%s resolved as NXDOMAIN on %d resolver(s): %s", key, len(nxList), firstN(nxList, 6))))
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.nxdomain.ok",
"No resolver unexpectedly returns NXDOMAIN.")}
}
return states
}
// servfailRule flags RRsets returning SERVFAIL on any resolver.
type servfailRule struct{}
func (r *servfailRule) Name() string { return "resolver_propagation.servfail" }
func (r *servfailRule) Description() string {
return "Flags RRsets for which any resolver returns SERVFAIL (usually DNSSEC or reachability failure)."
}
func (r *servfailRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
var sfList []string
for _, g := range v.Groups {
if g.Rcode == "SERVFAIL" {
sfList = append(sfList, g.Resolvers...)
}
}
if len(sfList) > 0 {
states = append(states, critState(CodeUnexpectedSERVFAIL, key,
fmt.Sprintf("%s returned SERVFAIL on %d resolver(s): %s", key, len(sfList), firstN(sfList, 6))))
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.servfail.ok",
"No resolver returns SERVFAIL.")}
}
return states
}
// regionalSplitRule flags regions in which all resolvers agree on an answer
// that diverges from the global consensus.
type regionalSplitRule struct{}
func (r *regionalSplitRule) Name() string { return "resolver_propagation.regional_split" }
func (r *regionalSplitRule) Description() string {
return "Flags regions in which every resolver agrees on an answer that differs from the global consensus."
}
func (r *regionalSplitRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
region2sig := map[string]map[string]int{}
for _, g := range v.Groups {
for _, rid := range g.Resolvers {
rv := data.Resolvers[rid]
if rv == nil || rv.Filtered {
continue
}
if region2sig[rv.Region] == nil {
region2sig[rv.Region] = map[string]int{}
}
region2sig[rv.Region][g.Signature]++
}
}
regions := make([]string, 0, len(region2sig))
for r := range region2sig {
regions = append(regions, r)
}
sort.Strings(regions)
for _, region := range regions {
sigs := region2sig[region]
if len(sigs) != 1 {
continue
}
var only string
for s := range sigs {
only = s
}
if only != "" && only != v.ConsensusSig {
states = append(states, warnState(CodeRegionalSplit, region+" "+key,
fmt.Sprintf("all %s resolvers agree on an answer that differs from the global consensus for %s",
regionLabel(region), key)))
}
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.regional_split.ok",
"No region is split from the global consensus.")}
}
return states
}
func sortedRRsetKeys(data *ResolverPropagationData) []string {
keys := make([]string, 0, len(data.RRsets))
for k := range data.RRsets {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}