apexLookupRule mapped every findApex failure to Crit, including transport and resolver faults like "lookup nemunai.re on 127.0.0.11:53: server misbehaving" — a flaky recursive resolver, not a broken delegation. That made the check flap into Crit whenever the resolver hiccuped, the same class of false negative the chain path already fixed. Mark apex-lookup failures that stem from a transport/resolver fault (resolveZoneNSAddrs net errors, recursiveExchange transport errors, and SERVFAIL/REFUSED seen during the SOA walk) as transient via a typed error, surface it as ApexLookupTransient, and have apexLookupRule report Unknown for those. Definitive failures (NXDOMAIN-only walk, no resolvable NS) still drive Crit.
109 lines
3.3 KiB
Go
109 lines
3.3 KiB
Go
package checker
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"testing"
|
|
|
|
"github.com/miekg/dns"
|
|
)
|
|
|
|
func TestIsTransientRcode(t *testing.T) {
|
|
transient := []int{dns.RcodeServerFailure, dns.RcodeRefused}
|
|
for _, rc := range transient {
|
|
if !isTransientRcode(rc) {
|
|
t.Errorf("rcode %s should be transient", rcodeText(rc))
|
|
}
|
|
}
|
|
final := []int{dns.RcodeSuccess, dns.RcodeNameError, dns.RcodeNotImplemented}
|
|
for _, rc := range final {
|
|
if isTransientRcode(rc) {
|
|
t.Errorf("rcode %s should not be transient", rcodeText(rc))
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestIsTransientApexError(t *testing.T) {
|
|
wrapped := transientApexError{errors.New("server misbehaving")}
|
|
if !isTransientApexError(wrapped) {
|
|
t.Errorf("transientApexError should be classified as transient")
|
|
}
|
|
if !isTransientApexError(fmt.Errorf("wrapped: %w", wrapped)) {
|
|
t.Errorf("error wrapping a transientApexError should be transient")
|
|
}
|
|
if isTransientApexError(errors.New("could not locate apex of example.com.")) {
|
|
t.Errorf("plain error should not be classified as transient")
|
|
}
|
|
if isTransientApexError(nil) {
|
|
t.Errorf("nil error should not be classified as transient")
|
|
}
|
|
}
|
|
|
|
// startTestServer spins up a UDP DNS server that answers every query with the
|
|
// given handler, returning its address and a shutdown func.
|
|
func startTestServer(t *testing.T, handler dns.HandlerFunc) (string, func()) {
|
|
t.Helper()
|
|
mux := dns.NewServeMux()
|
|
mux.HandleFunc(".", handler)
|
|
pc, err := net.ListenPacket("udp", "127.0.0.1:0")
|
|
if err != nil {
|
|
t.Fatalf("listen: %v", err)
|
|
}
|
|
srv := &dns.Server{PacketConn: pc, Handler: mux}
|
|
go srv.ActivateAndServe()
|
|
return pc.LocalAddr().String(), func() { srv.Shutdown() }
|
|
}
|
|
|
|
func answerWith(rcode int, withAnswer bool) dns.HandlerFunc {
|
|
return func(w dns.ResponseWriter, r *dns.Msg) {
|
|
m := new(dns.Msg)
|
|
m.SetReply(r)
|
|
m.Rcode = rcode
|
|
if withAnswer && len(r.Question) > 0 {
|
|
rr, _ := dns.NewRR(r.Question[0].Name + " 300 IN CNAME target.example.")
|
|
if rr != nil {
|
|
m.Answer = append(m.Answer, rr)
|
|
}
|
|
}
|
|
w.WriteMsg(m)
|
|
}
|
|
}
|
|
|
|
func TestQueryAtAuthFailsOverTransientRcode(t *testing.T) {
|
|
q := dns.Question{Name: "example.com.", Qtype: dns.TypeCNAME, Qclass: dns.ClassINET}
|
|
|
|
t.Run("prefers definitive answer over SERVFAIL", func(t *testing.T) {
|
|
bad, stopBad := startTestServer(t, answerWith(dns.RcodeServerFailure, false))
|
|
defer stopBad()
|
|
good, stopGood := startTestServer(t, answerWith(dns.RcodeSuccess, true))
|
|
defer stopGood()
|
|
|
|
r, server, err := queryAtAuth(context.Background(), "", []string{bad, good}, q, false)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if r.Rcode != dns.RcodeSuccess {
|
|
t.Fatalf("got rcode %s, want NOERROR", rcodeText(r.Rcode))
|
|
}
|
|
if server != good {
|
|
t.Fatalf("answered by %s, want the healthy server %s", server, good)
|
|
}
|
|
})
|
|
|
|
t.Run("returns transient response when every server fails", func(t *testing.T) {
|
|
s1, stop1 := startTestServer(t, answerWith(dns.RcodeServerFailure, false))
|
|
defer stop1()
|
|
s2, stop2 := startTestServer(t, answerWith(dns.RcodeRefused, false))
|
|
defer stop2()
|
|
|
|
r, _, err := queryAtAuth(context.Background(), "", []string{s1, s2}, q, false)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if !isTransientRcode(r.Rcode) {
|
|
t.Fatalf("got rcode %s, want a transient rcode preserved", rcodeText(r.Rcode))
|
|
}
|
|
})
|
|
}
|