checker-alias/checker/dns_test.go
Pierre-Olivier Mercier af0dceca6c checker: fail over to other auth servers on SERVFAIL/REFUSED
queryAtAuth already failed over on transport errors but treated any DNS
response as final, so a SERVFAIL from the first auth server terminated the
chain as Crit even when a sibling server would answer NOERROR. This made
the check flap against a flaky server. Treat SERVFAIL/REFUSED as transient
and try the remaining servers, returning a definitive answer when any
server gives one and only falling back to the transient response (or the
last transport error) when every server fails.
2026-06-18 09:47:28 +09:00

91 lines
2.7 KiB
Go

package checker
import (
"context"
"net"
"testing"
"github.com/miekg/dns"
)
func TestIsTransientRcode(t *testing.T) {
transient := []int{dns.RcodeServerFailure, dns.RcodeRefused}
for _, rc := range transient {
if !isTransientRcode(rc) {
t.Errorf("rcode %s should be transient", rcodeText(rc))
}
}
final := []int{dns.RcodeSuccess, dns.RcodeNameError, dns.RcodeNotImplemented}
for _, rc := range final {
if isTransientRcode(rc) {
t.Errorf("rcode %s should not be transient", rcodeText(rc))
}
}
}
// startTestServer spins up a UDP DNS server that answers every query with the
// given handler, returning its address and a shutdown func.
func startTestServer(t *testing.T, handler dns.HandlerFunc) (string, func()) {
t.Helper()
mux := dns.NewServeMux()
mux.HandleFunc(".", handler)
pc, err := net.ListenPacket("udp", "127.0.0.1:0")
if err != nil {
t.Fatalf("listen: %v", err)
}
srv := &dns.Server{PacketConn: pc, Handler: mux}
go srv.ActivateAndServe()
return pc.LocalAddr().String(), func() { srv.Shutdown() }
}
func answerWith(rcode int, withAnswer bool) dns.HandlerFunc {
return func(w dns.ResponseWriter, r *dns.Msg) {
m := new(dns.Msg)
m.SetReply(r)
m.Rcode = rcode
if withAnswer && len(r.Question) > 0 {
rr, _ := dns.NewRR(r.Question[0].Name + " 300 IN CNAME target.example.")
if rr != nil {
m.Answer = append(m.Answer, rr)
}
}
w.WriteMsg(m)
}
}
func TestQueryAtAuthFailsOverTransientRcode(t *testing.T) {
q := dns.Question{Name: "example.com.", Qtype: dns.TypeCNAME, Qclass: dns.ClassINET}
t.Run("prefers definitive answer over SERVFAIL", func(t *testing.T) {
bad, stopBad := startTestServer(t, answerWith(dns.RcodeServerFailure, false))
defer stopBad()
good, stopGood := startTestServer(t, answerWith(dns.RcodeSuccess, true))
defer stopGood()
r, server, err := queryAtAuth(context.Background(), "", []string{bad, good}, q, false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if r.Rcode != dns.RcodeSuccess {
t.Fatalf("got rcode %s, want NOERROR", rcodeText(r.Rcode))
}
if server != good {
t.Fatalf("answered by %s, want the healthy server %s", server, good)
}
})
t.Run("returns transient response when every server fails", func(t *testing.T) {
s1, stop1 := startTestServer(t, answerWith(dns.RcodeServerFailure, false))
defer stop1()
s2, stop2 := startTestServer(t, answerWith(dns.RcodeRefused, false))
defer stop2()
r, _, err := queryAtAuth(context.Background(), "", []string{s1, s2}, q, false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !isTransientRcode(r.Rcode) {
t.Fatalf("got rcode %s, want a transient rcode preserved", rcodeText(r.Rcode))
}
})
}