Treat unreachable resolvers as unknown, not warnings

An unreachable resolver tells us nothing about propagation: it's neither
a disagreement nor an error in the zone. Surface it as a distinct
Unreachable state so it stops polluting warning counts and the
Disagreeing/Errored buckets.
This commit is contained in:
nemunaire 2026-05-25 16:21:41 +08:00
commit d4c44e879b
5 changed files with 28 additions and 17 deletions

View file

@ -116,7 +116,7 @@ type regionRow struct {
Reachable int
Agreeing int
Disagreeing int
Errored int
Unreachable int
}
type resolverRow struct {
@ -283,28 +283,32 @@ func buildReportView(d *ResolverPropagationData, findings []Finding) *reportView
r.Resolvers++
if rv.Reachable {
r.Reachable++
} else {
r.Unreachable++
}
if rv.Reachable && !rv.Filtered {
ok := true
unreachableProbe := false
disagrees := false
for key, p := range rv.Probes {
if p == nil || p.Error != "" {
r.Errored++
ok = false
break
unreachableProbe = true
continue
}
cv := d.RRsets[key]
if cv == nil || cv.ConsensusSig == "" {
continue
}
if p.Signature != cv.ConsensusSig {
ok = false
break
disagrees = true
}
}
if ok {
r.Agreeing++
} else {
switch {
case disagrees:
r.Disagreeing++
case unreachableProbe:
r.Unreachable++
default:
r.Agreeing++
}
}
}
@ -664,7 +668,7 @@ const reportTemplateHTML = `<!DOCTYPE html>
<h2>Per-region view</h2>
<table>
<thead><tr><th>Region</th><th>Reachable</th><th>Agreeing</th><th>Disagreeing</th><th>Errored</th></tr></thead>
<thead><tr><th>Region</th><th>Reachable</th><th>Agreeing</th><th>Disagreeing</th><th>Unreachable</th></tr></thead>
<tbody>
{{range .Regions}}
<tr>
@ -672,7 +676,7 @@ const reportTemplateHTML = `<!DOCTYPE html>
<td>{{.Reachable}} / {{.Resolvers}}</td>
<td><span class="ok">{{.Agreeing}}</span></td>
<td>{{if .Disagreeing}}<span class="warn">{{.Disagreeing}}</span>{{else}}0{{end}}</td>
<td>{{if .Errored}}<span class="crit">{{.Errored}}</span>{{else}}0{{end}}</td>
<td>{{if .Unreachable}}<span class="muted">{{.Unreachable}}</span>{{else}}0{{end}}</td>
</tr>
{{end}}
</tbody>
@ -690,7 +694,7 @@ const reportTemplateHTML = `<!DOCTYPE html>
</td>
<td>{{.Region}}</td>
<td>{{.Transport}}</td>
<td>{{if .Reachable}}{{.AvgMs}}{{else}}<span class="crit">unreachable</span>{{end}}</td>
<td>{{if .Reachable}}{{.AvgMs}}{{else}}<span class="muted">unreachable</span>{{end}}</td>
<td>
{{range .Probes}}
<details>

View file

@ -57,3 +57,7 @@ func warnState(code, subject, message string) sdk.CheckState {
func critState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusCrit, Message: message, Code: code, Subject: subject}
}
func unknownState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusUnknown, Message: message, Code: code, Subject: subject}
}

View file

@ -91,8 +91,8 @@ func (r *authoritativeMatchRule) Evaluate(ctx context.Context, obs sdk.Observati
anyExpected = true
switch {
case v.ConsensusSig == "":
states = append(states, critState("resolver_propagation.matches_authoritative.no_consensus", key,
fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known)", key)))
states = append(states, unknownState("resolver_propagation.matches_authoritative.no_consensus", key,
fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known); resolvers unreachable from the checker", key)))
case !v.MatchesExpected:
states = append(states, critState(CodeAnswerDrift, key,
fmt.Sprintf("consensus of public resolvers for %s differs from the authoritative answer, wait for TTL expiry or force a flush", key)))

View file

@ -71,7 +71,7 @@ func (r *resolverLatencyRule) Evaluate(ctx context.Context, obs sdk.ObservationG
var states []sdk.CheckState
for _, rv := range data.Resolvers {
if !rv.Reachable {
states = append(states, warnState(CodeResolverUnreachable, rv.ID,
states = append(states, unknownState(CodeResolverUnreachable, rv.ID,
fmt.Sprintf("resolver %s (%s, %s) did not answer any query", rv.Name, rv.IP, rv.Transport)))
continue
}

View file

@ -326,8 +326,11 @@ func TestResolverLatencyRule(t *testing.T) {
if _, ok := codes[CodeResolverHighLatency]; !ok {
t.Errorf("want high latency for 'slow', got %+v", st)
}
if _, ok := codes[CodeResolverUnreachable]; !ok {
unreach, ok := codes[CodeResolverUnreachable]
if !ok {
t.Errorf("want unreachable for 'absent', got %+v", st)
} else if unreach[0].Status != sdk.StatusUnknown {
t.Errorf("unreachable should be unknown (not a warning), got status %v", unreach[0].Status)
}
}