diff --git a/checker/collect.go b/checker/collect.go index 580f900..f631ace 100644 --- a/checker/collect.go +++ b/checker/collect.go @@ -137,15 +137,11 @@ func (c *chainCtx) walk(ctx context.Context, name string) { if r.Rcode != dns.RcodeSuccess { rcode := rcodeText(r.Rcode) - // A SERVFAIL/REFUSED from every auth server means we could not observe - // the record, not that the zone published a negative answer; mark it - // transient so the rule reports Unknown instead of Crit. c.data.ChainTerminated = ChainTermination{ - Reason: TermRcode, - Subject: current, - Rcode: rcode, - Detail: fmt.Sprintf("server answered %s for %s", rcode, current), - Transient: isTransientRcode(r.Rcode), + Reason: TermRcode, + Subject: current, + Rcode: rcode, + Detail: fmt.Sprintf("server answered %s for %s", rcode, current), } c.data.FinalTarget = current return @@ -284,9 +280,8 @@ func extractCNAME(r *dns.Msg, owner string) (target string, fromDNAME bool, ttl func (c *chainCtx) resolveFinal(ctx context.Context, name string, servers []string) { type result struct { - addrs []string - rcode string - transient bool + addrs []string + rcode string } query := func(qtype uint16) result { @@ -306,7 +301,6 @@ func (c *chainCtx) resolveFinal(ctx context.Context, name string, servers []stri var res result if r.Rcode != dns.RcodeSuccess { res.rcode = rcodeText(r.Rcode) - res.transient = isTransientRcode(r.Rcode) } for _, rr := range r.Answer { switch v := rr.(type) { @@ -338,10 +332,8 @@ func (c *chainCtx) resolveFinal(ctx context.Context, name string, servers []stri switch { case aRes.rcode != "": c.data.FinalRcode = aRes.rcode - c.data.FinalRcodeTransient = aRes.transient case aaaaRes.rcode != "": c.data.FinalRcode = aaaaRes.rcode - c.data.FinalRcodeTransient = aaaaRes.transient } } diff --git a/checker/rules_chain.go b/checker/rules_chain.go index 1cd31aa..4f7cba0 100644 --- a/checker/rules_chain.go +++ b/checker/rules_chain.go @@ -125,36 +125,18 @@ func (chainRcodeRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ } var out []sdk.CheckState if data.ChainTerminated.Reason == TermRcode { - // A transient rcode (SERVFAIL/REFUSED from every auth server) means we could - // not observe the record, not that the zone published a negative answer: - // report it as Unknown so a flaky server does not flap the check into Crit. - // A definitive NXDOMAIN mid-chain is a real break and stays Crit. - status := sdk.StatusCrit - hint := "Ensure the zone publishes the expected record; NXDOMAIN mid-chain breaks the alias." - if data.ChainTerminated.Transient { - status = sdk.StatusUnknown - hint = "Check authoritative-server reachability; SERVFAIL/REFUSED from every server leaves the alias state undetermined." - } out = append(out, withHint(sdk.CheckState{ - Status: status, + Status: sdk.StatusCrit, Subject: data.ChainTerminated.Subject, Message: fmt.Sprintf("server answered %s mid-chain", data.ChainTerminated.Rcode), - }, hint)) + }, "Ensure the zone publishes the expected record; NXDOMAIN/SERVFAIL mid-chain breaks the alias.")) } if data.FinalRcode != "" && data.FinalRcode != "NOERROR" { - // Same distinction for the final A/AAAA lookup: a SERVFAIL/REFUSED could not - // be observed (Unknown), a definitive rcode is a real publication gap (Warn). - status := sdk.StatusWarn - hint := "Check the upstream zone's A/AAAA publication." - if data.FinalRcodeTransient { - status = sdk.StatusUnknown - hint = "Check the upstream auth servers' reachability; the final A/AAAA state could not be determined." - } out = append(out, withHint(sdk.CheckState{ - Status: status, + Status: sdk.StatusWarn, Subject: data.FinalTarget, Message: fmt.Sprintf("final A lookup for %s returned %s", data.FinalTarget, data.FinalRcode), - }, hint)) + }, "Check the upstream zone's A/AAAA publication.")) } if len(out) == 0 { return okState(data.Owner, "all chain and final lookups returned NOERROR") diff --git a/checker/rules_test.go b/checker/rules_test.go index c1ba05f..afd087b 100644 --- a/checker/rules_test.go +++ b/checker/rules_test.go @@ -162,32 +162,14 @@ func TestChainRcodeRule(t *testing.T) { d.ChainTerminated = ChainTermination{Reason: TermRcode, Subject: "gone.example.com.", Rcode: "NXDOMAIN"} assertSingle(t, run(chainRcodeRule{}, d, nil), sdk.StatusCrit) }) - t.Run("mid-chain transient SERVFAIL", func(t *testing.T) { - // SERVFAIL from every auth server could not be observed: Unknown, not Crit. - d := apexKnownData() - d.ChainTerminated = ChainTermination{Reason: TermRcode, Subject: "flaky.example.com.", Rcode: "SERVFAIL", Transient: true} - assertSingle(t, run(chainRcodeRule{}, d, nil), sdk.StatusUnknown) - }) - t.Run("final definitive rcode", func(t *testing.T) { - d := apexKnownData() - d.ChainTerminated.Reason = TermOK - d.FinalTarget = "target.example." - d.FinalRcode = "NXDOMAIN" - states := run(chainRcodeRule{}, d, nil) - if len(states) != 1 || states[0].Status != sdk.StatusWarn { - t.Fatalf("want single WARN, got %+v", states) - } - }) - t.Run("final transient rcode", func(t *testing.T) { - // SERVFAIL on the final lookup could not be observed: Unknown, not Warn. + t.Run("final rcode", func(t *testing.T) { d := apexKnownData() d.ChainTerminated.Reason = TermOK d.FinalTarget = "target.example." d.FinalRcode = "SERVFAIL" - d.FinalRcodeTransient = true states := run(chainRcodeRule{}, d, nil) - if len(states) != 1 || states[0].Status != sdk.StatusUnknown { - t.Fatalf("want single UNKNOWN, got %+v", states) + if len(states) != 1 || states[0].Status != sdk.StatusWarn { + t.Fatalf("want single WARN, got %+v", states) } }) } diff --git a/checker/types.go b/checker/types.go index ba9d064..44a2356 100644 --- a/checker/types.go +++ b/checker/types.go @@ -47,10 +47,9 @@ type ChainTermination struct { Subject string `json:"subject,omitempty"` Detail string `json:"detail,omitempty"` Rcode string `json:"rcode,omitempty"` // only with TermRcode - // Transient is meaningful with TermQueryErr and TermRcode: true when the failure - // could not be observed as a definitive answer (a transport/resolver fault, or a - // SERVFAIL/REFUSED from every auth server), false when it stems from definitive - // evidence such as a target with no locatable apex or an authoritative NXDOMAIN. + // Transient is meaningful with TermQueryErr: true when the query could not be + // completed because of a transport/resolver fault (could not observe), false + // when it stems from definitive evidence such as a target with no locatable apex. Transient bool `json:"transient,omitempty"` } @@ -74,11 +73,7 @@ type AliasData struct { FinalTarget string `json:"final_target,omitempty"` FinalA []string `json:"final_a,omitempty"` FinalAAAA []string `json:"final_aaaa,omitempty"` - // FinalRcode is the non-NOERROR rcode of the final A/AAAA lookup, if any; - // FinalRcodeTransient is true when it was a SERVFAIL/REFUSED (could not observe) - // rather than a definitive negative answer. - FinalRcode string `json:"final_rcode,omitempty"` - FinalRcodeTransient bool `json:"final_rcode_transient,omitempty"` + FinalRcode string `json:"final_rcode,omitempty"` // Coexisting is populated only when Owner has a CNAME. Coexisting []CoexistingRRset `json:"coexisting,omitempty"`