checker: report transient mid-chain and final rcodes as Unknown, not Crit/Warn
SERVFAIL/REFUSED from every auth server means the record could not be observed, not that the zone published a negative answer. Mark such rcodes transient on TermRcode terminations and final A/AAAA lookups so chainRcodeRule reports Unknown instead of flapping the check into Crit/Warn; definitive NXDOMAIN answers still drive Crit (mid-chain) and Warn (final).
This commit is contained in:
parent
da6def100c
commit
65687ce375
4 changed files with 65 additions and 16 deletions
|
|
@ -125,18 +125,36 @@ func (chainRcodeRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _
|
|||
}
|
||||
var out []sdk.CheckState
|
||||
if data.ChainTerminated.Reason == TermRcode {
|
||||
// A transient rcode (SERVFAIL/REFUSED from every auth server) means we could
|
||||
// not observe the record, not that the zone published a negative answer:
|
||||
// report it as Unknown so a flaky server does not flap the check into Crit.
|
||||
// A definitive NXDOMAIN mid-chain is a real break and stays Crit.
|
||||
status := sdk.StatusCrit
|
||||
hint := "Ensure the zone publishes the expected record; NXDOMAIN mid-chain breaks the alias."
|
||||
if data.ChainTerminated.Transient {
|
||||
status = sdk.StatusUnknown
|
||||
hint = "Check authoritative-server reachability; SERVFAIL/REFUSED from every server leaves the alias state undetermined."
|
||||
}
|
||||
out = append(out, withHint(sdk.CheckState{
|
||||
Status: sdk.StatusCrit,
|
||||
Status: status,
|
||||
Subject: data.ChainTerminated.Subject,
|
||||
Message: fmt.Sprintf("server answered %s mid-chain", data.ChainTerminated.Rcode),
|
||||
}, "Ensure the zone publishes the expected record; NXDOMAIN/SERVFAIL mid-chain breaks the alias."))
|
||||
}, hint))
|
||||
}
|
||||
if data.FinalRcode != "" && data.FinalRcode != "NOERROR" {
|
||||
// Same distinction for the final A/AAAA lookup: a SERVFAIL/REFUSED could not
|
||||
// be observed (Unknown), a definitive rcode is a real publication gap (Warn).
|
||||
status := sdk.StatusWarn
|
||||
hint := "Check the upstream zone's A/AAAA publication."
|
||||
if data.FinalRcodeTransient {
|
||||
status = sdk.StatusUnknown
|
||||
hint = "Check the upstream auth servers' reachability; the final A/AAAA state could not be determined."
|
||||
}
|
||||
out = append(out, withHint(sdk.CheckState{
|
||||
Status: sdk.StatusWarn,
|
||||
Status: status,
|
||||
Subject: data.FinalTarget,
|
||||
Message: fmt.Sprintf("final A lookup for %s returned %s", data.FinalTarget, data.FinalRcode),
|
||||
}, "Check the upstream zone's A/AAAA publication."))
|
||||
}, hint))
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return okState(data.Owner, "all chain and final lookups returned NOERROR")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue