Initial commit

This commit is contained in:
nemunaire 2026-04-26 11:06:47 +07:00
commit 7ca2fb60c6
24 changed files with 3098 additions and 0 deletions

250
checker/collect.go Normal file
View file

@ -0,0 +1,250 @@
package checker
import (
"context"
"encoding/json"
"fmt"
"sort"
"strings"
"sync"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Collect gathers raw per-authoritative-NS DNS answers for the zone. It does
// NOT judge: no severity, no pass/fail, no pre-derived findings. Rules in
// rules.go translate the resulting ObservationData into CheckStates.
func (p *authoritativeConsistencyProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
svc, err := loadService(opts)
if err != nil {
return nil, err
}
zone, err := loadZone(opts, svc)
if err != nil {
return nil, err
}
checkEDNS := sdk.GetBoolOption(opts, "checkEDNS", true)
useParentNS := sdk.GetBoolOption(opts, "useParentNS", true)
data := &ObservationData{
Zone: dns.Fqdn(zone),
HasSOA: svc.SOA != nil,
DeclaredNS: normalizeNSList(svc.NameServers),
Results: map[string]*NSResult{},
}
if svc.SOA != nil {
data.DeclaredSerial = svc.SOA.Serial
}
// Parent referral probe (raw).
if useParentNS {
parentNS, perr := parentReferral(ctx, data.Zone)
if perr != nil {
data.ParentQueryError = perr.Error()
} else {
data.ParentNS = parentNS
}
}
// Union of every NS name we intend to probe.
data.Probed = unionStrings(data.DeclaredNS, data.ParentNS)
if len(data.Probed) == 0 {
// Nothing to probe. Rules will turn this into a finding.
return data, nil
}
// Per-NS probes (concurrent, bounded). The cap protects the checker
// from a malicious or misconfigured Origin declaring an unbounded NS
// list, which would otherwise spawn one goroutine and a fresh batch of
// UDP/TCP sockets per name.
const maxConcurrentProbes = 16
sem := make(chan struct{}, maxConcurrentProbes)
var wg sync.WaitGroup
var mu sync.Mutex
for _, nsName := range data.Probed {
nsName := nsName
wg.Add(1)
sem <- struct{}{}
go func() {
defer wg.Done()
defer func() { <-sem }()
res := probeNS(ctx, data.Zone, nsName, checkEDNS)
mu.Lock()
data.Results[nsName] = res
mu.Unlock()
}()
}
wg.Wait()
return data, nil
}
// probeNS performs every probe against a single NS hostname. It resolves the
// name, then iterates over its addresses. For consistency, the "canonical"
// view returned by the NS is the first address that provided an
// authoritative answer; subsequent addresses only update reachability and
// error state. This avoids dual-homed servers appearing twice in the drift
// matrix while still catching IPv4/IPv6-specific failures.
func probeNS(ctx context.Context, zone, nsName string, checkEDNS bool) *NSResult {
res := &NSResult{Name: nsName}
addrs, err := resolveHost(ctx, nsName)
if err != nil {
res.ResolveError = err.Error()
return res
}
if len(addrs) == 0 {
res.ResolveError = "no A/AAAA records"
return res
}
res.Addresses = addrs
for _, addr := range addrs {
srv := hostPort(addr, "53")
soa, aa, rtt, qerr := querySOA(ctx, "", srv, zone)
if qerr != nil {
res.appendError("UDP %s: %v", addr, qerr)
continue
}
res.UDPReachable = true
if res.LatencyMs == 0 {
res.LatencyMs = rtt.Milliseconds()
}
if aa {
res.Authoritative = true
}
// First authoritative answer wins; that is the canonical view of
// this NS. Subsequent addresses only contribute reachability/errors.
if soa != nil && res.SOA == nil {
res.SOA = soa
res.Serial = soa.Serial
}
// TCP probe against the same address.
if _, _, _, terr := querySOA(ctx, "tcp", srv, zone); terr != nil {
res.appendError("TCP %s: %v", addr, terr)
} else {
res.TCPReachable = true
}
// EDNS0 probe against the same address.
if checkEDNS {
if eerr := probeEDNS0(ctx, srv, zone); eerr != nil {
res.appendError("EDNS0 %s: %v", addr, eerr)
} else {
res.EDNSSupported = true
}
}
// NS RRset as seen by this server.
if nss, nerr := queryNSAt(ctx, srv, zone); nerr == nil && len(res.NSRRset) == 0 {
sort.Strings(nss)
res.NSRRset = nss
}
}
return res
}
// loadService extracts the abstract.Origin / abstract.NSOnlyOrigin payload
// from the auto-filled "service" option.
func loadService(opts sdk.CheckerOptions) (*originService, error) {
svc, ok := sdk.GetOption[serviceMessage](opts, "service")
if !ok {
return nil, fmt.Errorf("missing 'service' option")
}
switch svc.Type {
case "", "abstract.Origin", "abstract.NSOnlyOrigin":
default:
return nil, fmt.Errorf("service is %s, expected abstract.Origin or abstract.NSOnlyOrigin", svc.Type)
}
var d originService
if err := json.Unmarshal(svc.Service, &d); err != nil {
return nil, fmt.Errorf("decoding origin service: %w", err)
}
return &d, nil
}
// loadZone picks the zone name from the "domain_name" option or falls back
// to the service's SOA owner name.
func loadZone(opts sdk.CheckerOptions, svc *originService) (string, error) {
if v, ok := sdk.GetOption[string](opts, "domain_name"); ok && v != "" {
return dns.Fqdn(v), nil
}
if svc.SOA != nil && svc.SOA.Header().Name != "" {
return dns.Fqdn(svc.SOA.Header().Name), nil
}
return "", fmt.Errorf("no zone name provided (missing 'domain_name' option and SOA header)")
}
// normalizeNSList lowercases and FQDN-normalizes a list of NS records.
func normalizeNSList(ns []*dns.NS) []string {
out := make([]string, 0, len(ns))
for _, n := range ns {
if n == nil {
continue
}
out = append(out, strings.ToLower(dns.Fqdn(n.Ns)))
}
sort.Strings(out)
return out
}
// unionStrings returns the sorted union of two string slices, de-duplicated.
func unionStrings(a, b []string) []string {
seen := map[string]bool{}
var out []string
for _, s := range a {
if !seen[s] {
seen[s] = true
out = append(out, s)
}
}
for _, s := range b {
if !seen[s] {
seen[s] = true
out = append(out, s)
}
}
sort.Strings(out)
return out
}
// diffStringSets returns the elements of "want" missing from "got" and the
// elements of "got" not present in "want".
func diffStringSets(want, got []string) (missing, extra []string) {
w := map[string]bool{}
for _, v := range want {
w[strings.ToLower(strings.TrimSuffix(v, "."))] = true
}
g := map[string]bool{}
for _, v := range got {
g[strings.ToLower(strings.TrimSuffix(v, "."))] = true
}
for k := range w {
if !g[k] {
missing = append(missing, k)
}
}
for k := range g {
if !w[k] {
extra = append(extra, k)
}
}
sort.Strings(missing)
sort.Strings(extra)
return
}
// serialLess reports whether a is earlier than b under RFC 1982 serial
// number arithmetic (handles wraparound).
func serialLess(a, b uint32) bool {
diff := b - a
return diff != 0 && diff < (1<<31)
}

112
checker/collect_test.go Normal file
View file

@ -0,0 +1,112 @@
package checker
import (
"reflect"
"testing"
"github.com/miekg/dns"
)
func TestSerialLess(t *testing.T) {
tests := []struct {
name string
a, b uint32
want bool
}{
{"equal", 100, 100, false},
{"a<b small", 100, 200, true},
{"a>b small", 200, 100, false},
{"wrap b ahead", 0xFFFFFFFE, 1, true},
{"wrap a ahead", 1, 0xFFFFFFFE, false},
{"zero<one", 0, 1, true},
{"max distance same direction", 0, 1<<31 - 1, true},
{"max distance other direction", 1<<31 - 1, 0, false},
{"undefined boundary equal-half", 0, 1 << 31, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := serialLess(tt.a, tt.b); got != tt.want {
t.Errorf("serialLess(%d,%d) = %v, want %v", tt.a, tt.b, got, tt.want)
}
})
}
}
func TestUnionStrings(t *testing.T) {
tests := []struct {
name string
a, b []string
wantOut []string
}{
{"both empty", nil, nil, nil},
{"only a", []string{"x", "a"}, nil, []string{"a", "x"}},
{"only b", nil, []string{"b", "a"}, []string{"a", "b"}},
{"overlap", []string{"a", "b"}, []string{"b", "c"}, []string{"a", "b", "c"}},
{"duplicates within a", []string{"a", "a"}, []string{"b"}, []string{"a", "b"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := unionStrings(tt.a, tt.b)
if len(got) == 0 && len(tt.wantOut) == 0 {
return
}
if !reflect.DeepEqual(got, tt.wantOut) {
t.Errorf("got %v, want %v", got, tt.wantOut)
}
})
}
}
func TestDiffStringSets(t *testing.T) {
want := []string{"ns1.example.com.", "ns2.example.com.", "NS3.Example.com"}
got := []string{"ns2.example.com", "ns3.example.com.", "ns4.example.com"}
missing, extra := diffStringSets(want, got)
if !reflect.DeepEqual(missing, []string{"ns1.example.com"}) {
t.Errorf("missing = %v, want [ns1.example.com]", missing)
}
if !reflect.DeepEqual(extra, []string{"ns4.example.com"}) {
t.Errorf("extra = %v, want [ns4.example.com]", extra)
}
}
func TestDiffStringSets_Equal(t *testing.T) {
missing, extra := diffStringSets(
[]string{"a.example.", "b.example."},
[]string{"A.example", "b.EXAMPLE."},
)
if len(missing) != 0 || len(extra) != 0 {
t.Errorf("equal sets should produce no diff, got missing=%v extra=%v", missing, extra)
}
}
func TestNormalizeNSList(t *testing.T) {
in := []*dns.NS{
{Ns: "NS2.Example.COM"},
nil,
{Ns: "ns1.example.com."},
{Ns: "NS1.example.com"},
}
got := normalizeNSList(in)
want := []string{"ns1.example.com.", "ns1.example.com.", "ns2.example.com."}
if !reflect.DeepEqual(got, want) {
t.Errorf("got %v, want %v", got, want)
}
}
func TestHostPort(t *testing.T) {
tests := []struct {
host, port, want string
}{
{"192.0.2.1", "53", "192.0.2.1:53"},
{"2001:db8::1", "53", "[2001:db8::1]:53"},
{"ns.example.com.", "53", "ns.example.com:53"},
{"ns.example.com", "5353", "ns.example.com:5353"},
}
for _, tt := range tests {
t.Run(tt.host, func(t *testing.T) {
if got := hostPort(tt.host, tt.port); got != tt.want {
t.Errorf("hostPort(%q,%q) = %q, want %q", tt.host, tt.port, got, tt.want)
}
})
}
}

123
checker/definition.go Normal file
View file

@ -0,0 +1,123 @@
// This file is part of the happyDomain (R) project.
// Copyright (c) 2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
//
// This program is offered under a commercial and under the AGPL license.
// For commercial licensing, contact us at <contact@happydomain.org>.
//
// For AGPL licensing:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package checker
import (
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Version is the checker version reported in CheckerDefinition.Version.
var Version = "built-in"
// Definition returns the CheckerDefinition for the authoritative-consistency checker.
func (p *authoritativeConsistencyProvider) Definition() *sdk.CheckerDefinition {
return &sdk.CheckerDefinition{
ID: "authoritative-consistency",
Name: "Authoritative consistency",
Version: Version,
Availability: sdk.CheckerAvailability{
ApplyToService: true,
LimitToServices: []string{
"abstract.Origin",
"abstract.NSOnlyOrigin",
},
},
ObservationKeys: []sdk.ObservationKey{ObservationKey},
HasHTMLReport: true,
HasMetrics: true,
Options: sdk.CheckerOptionsDocumentation{
UserOpts: []sdk.CheckerOptionDocumentation{
{
Id: "requireTCP",
Type: "bool",
Label: "Require DNS over TCP",
Description: "When enabled, an authoritative server that fails to answer over TCP is flagged as critical (otherwise as warning). TCP/53 is required by RFC 7766 and by DNSSEC.",
Default: true,
},
{
Id: "checkEDNS",
Type: "bool",
Label: "Check EDNS0 support",
Description: "Probe each name server for EDNS0 (RFC 6891). Servers that drop or mishandle EDNS0 break DNSSEC and large answers.",
Default: true,
},
{
Id: "checkLatency",
Type: "bool",
Label: "Measure response latency",
Description: "Measure response time of every name server and warn on slow responders.",
Default: true,
},
{
Id: "latencyThresholdMs",
Type: "uint",
Label: "Latency warning threshold (ms)",
Description: "Response times above this value trigger a slow-server warning.",
Default: float64(500),
},
{
Id: "useParentNS",
Type: "bool",
Label: "Cross-check with parent delegation",
Description: "Query the parent zone for the delegation NS RRset and compare it to the service's declared name servers. Drifts are reported so the user can reconcile.",
Default: true,
},
{
Id: "warnOnStaleSaved",
Type: "bool",
Label: "Warn when live serial is older than the saved one",
Description: "When the saved SOA serial in happyDomain is newer than what the authoritative servers publish, report a warning, typically an un-pushed change.",
Default: true,
},
{
Id: "minNameServers",
Type: "uint",
Label: "Minimum number of name servers",
Description: "Below this count, a warning is emitted (RFC 1034 recommends at least 2).",
Default: float64(2),
},
},
DomainOpts: []sdk.CheckerOptionDocumentation{
{
Id: "domain_name",
Label: "Zone name",
AutoFill: sdk.AutoFillDomainName,
},
},
ServiceOpts: []sdk.CheckerOptionDocumentation{
{
Id: "service",
Label: "Origin service",
AutoFill: sdk.AutoFillService,
},
},
},
Rules: Rules(),
Interval: &sdk.CheckIntervalSpec{
Min: 1 * time.Minute,
Max: 6 * time.Hour,
Default: 10 * time.Minute,
},
}
}

213
checker/dns.go Normal file
View file

@ -0,0 +1,213 @@
package checker
import (
"context"
"fmt"
"net"
"strings"
"time"
"github.com/miekg/dns"
)
// dnsTimeout is the per-query deadline used by every helper here.
const dnsTimeout = 5 * time.Second
// dnsExchange sends a single query to the given server using the requested
// transport ("" for UDP, "tcp"). The server address must already include a
// port. RecursionDesired is forced off: this checker only talks to
// authoritative servers. The measured RTT is reported by the caller
// independently; this helper just exchanges the packet.
func dnsExchange(ctx context.Context, proto, server string, q dns.Question, edns bool) (*dns.Msg, time.Duration, error) {
client := dns.Client{Net: proto, Timeout: dnsTimeout}
m := new(dns.Msg)
m.Id = dns.Id()
m.Question = []dns.Question{q}
m.RecursionDesired = false
if edns {
m.SetEdns0(4096, true)
}
if deadline, ok := ctx.Deadline(); ok {
if d := time.Until(deadline); d > 0 && d < client.Timeout {
client.Timeout = d
}
}
r, rtt, err := client.Exchange(m, server)
if err != nil {
return nil, rtt, err
}
if r == nil {
return nil, rtt, fmt.Errorf("nil response from %s", server)
}
return r, rtt, nil
}
// hostPort returns "host:port", correctly bracketing IPv6 literals.
func hostPort(host, port string) string {
if ip := net.ParseIP(host); ip != nil && ip.To4() == nil {
return "[" + host + "]:" + port
}
host = strings.TrimSuffix(host, ".")
return host + ":" + port
}
// resolveHost resolves a host name to its A and AAAA addresses using the
// system resolver.
func resolveHost(ctx context.Context, host string) ([]string, error) {
var resolver net.Resolver
addrs, err := resolver.LookupHost(ctx, strings.TrimSuffix(host, "."))
if err != nil {
return nil, err
}
return addrs, nil
}
// querySOA asks the given authoritative server for the SOA of zone. Returns
// the SOA record (nil when absent), the AA flag from the response header,
// and the observed RTT. Non-success Rcodes are reported as errors.
func querySOA(ctx context.Context, proto, server, zone string) (soa *dns.SOA, aa bool, rtt time.Duration, err error) {
q := dns.Question{Name: dns.Fqdn(zone), Qtype: dns.TypeSOA, Qclass: dns.ClassINET}
r, rtt, err := dnsExchange(ctx, proto, server, q, false)
if err != nil {
return nil, false, rtt, err
}
if r.Rcode != dns.RcodeSuccess {
return nil, r.Authoritative, rtt, fmt.Errorf("server answered %s", dns.RcodeToString[r.Rcode])
}
for _, rr := range r.Answer {
if t, ok := rr.(*dns.SOA); ok {
return t, r.Authoritative, rtt, nil
}
}
// Some authoritative servers place the SOA in the Authority section
// (for example when queried for their own apex via a referral path).
for _, rr := range r.Ns {
if t, ok := rr.(*dns.SOA); ok {
return t, r.Authoritative, rtt, nil
}
}
return nil, r.Authoritative, rtt, fmt.Errorf("no SOA in answer section")
}
// queryNSAt asks the given authoritative server for the NS RRset of zone.
func queryNSAt(ctx context.Context, server, zone string) ([]string, error) {
q := dns.Question{Name: dns.Fqdn(zone), Qtype: dns.TypeNS, Qclass: dns.ClassINET}
r, _, err := dnsExchange(ctx, "", server, q, false)
if err != nil {
return nil, err
}
if r.Rcode != dns.RcodeSuccess {
return nil, fmt.Errorf("server answered %s", dns.RcodeToString[r.Rcode])
}
var out []string
for _, rr := range r.Answer {
if t, ok := rr.(*dns.NS); ok {
out = append(out, strings.ToLower(dns.Fqdn(t.Ns)))
}
}
return out, nil
}
// probeEDNS0 checks whether the server correctly handles an EDNS0-enabled
// query. A server that silently drops EDNS0 queries, returns FORMERR, or
// strips the OPT record is flagged as non-compliant.
//
// When the UDP probe fails outright (timeout, network error), the function
// retries over TCP: some middleboxes drop large UDP packets carrying the OPT
// record while letting TCP/53 through, and RFC 7766 requires authoritative
// servers to accept TCP fallback. A server that answers EDNS0 correctly over
// TCP is still considered compliant.
func probeEDNS0(ctx context.Context, server, zone string) error {
q := dns.Question{Name: dns.Fqdn(zone), Qtype: dns.TypeSOA, Qclass: dns.ClassINET}
r, _, err := dnsExchange(ctx, "", server, q, true)
if err != nil {
// UDP path failed entirely; try TCP before declaring the server
// EDNS0-broken. Network errors here are reported with the original
// UDP error to make debugging easier.
rt, _, terr := dnsExchange(ctx, "tcp", server, q, true)
if terr != nil {
return fmt.Errorf("EDNS0 query failed over UDP (%v) and TCP (%w)", err, terr)
}
r = rt
}
if r.Rcode == dns.RcodeFormatError {
return fmt.Errorf("server returned FORMERR on EDNS0 query")
}
if r.Rcode != dns.RcodeSuccess {
return fmt.Errorf("server answered %s on EDNS0 query", dns.RcodeToString[r.Rcode])
}
// RFC 6891 requires the OPT pseudo-RR to be echoed in the response.
if r.IsEdns0() == nil {
return fmt.Errorf("server stripped the EDNS0 OPT record from its response")
}
return nil
}
// parentReferral resolves the parent zone of zone via the system resolver,
// then asks each of the parent's authoritative servers for the NS delegation
// of zone. The first server that returns a non-empty referral wins.
//
// The result is a de-duplicated, lowercase, FQDN list of delegated NS names.
func parentReferral(ctx context.Context, zone string) ([]string, error) {
zone = dns.Fqdn(zone)
labels := dns.SplitDomainName(zone)
if len(labels) < 2 {
return nil, fmt.Errorf("zone %q has no parent", zone)
}
parent := dns.Fqdn(strings.Join(labels[1:], "."))
resolver := net.Resolver{}
nss, err := resolver.LookupNS(ctx, strings.TrimSuffix(parent, "."))
if err != nil {
return nil, fmt.Errorf("resolving NS of parent zone %q: %w", parent, err)
}
var lastErr error
seen := map[string]bool{}
var out []string
for _, ns := range nss {
addrs, rerr := resolver.LookupHost(ctx, strings.TrimSuffix(ns.Host, "."))
if rerr != nil || len(addrs) == 0 {
lastErr = rerr
continue
}
for _, a := range addrs {
srv := hostPort(a, "53")
q := dns.Question{Name: zone, Qtype: dns.TypeNS, Qclass: dns.ClassINET}
r, _, qerr := dnsExchange(ctx, "", srv, q, true)
if qerr != nil {
lastErr = qerr
continue
}
if r.Rcode != dns.RcodeSuccess {
lastErr = fmt.Errorf("parent %s answered %s", ns.Host, dns.RcodeToString[r.Rcode])
continue
}
collect := func(records []dns.RR) {
for _, rr := range records {
if t, ok := rr.(*dns.NS); ok {
if strings.EqualFold(strings.TrimSuffix(t.Header().Name, "."), strings.TrimSuffix(zone, ".")) {
name := strings.ToLower(dns.Fqdn(t.Ns))
if !seen[name] {
seen[name] = true
out = append(out, name)
}
}
}
}
}
collect(r.Answer)
collect(r.Ns)
if len(out) > 0 {
return out, nil
}
}
}
if lastErr != nil {
return nil, lastErr
}
return nil, fmt.Errorf("no parent server returned a delegation for %s", zone)
}

120
checker/evaluate.go Normal file
View file

@ -0,0 +1,120 @@
package checker
import (
"encoding/json"
"fmt"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// ExtractMetrics turns the stored data into time-series metrics so the host
// can feed Prometheus / Grafana dashboards. Implements
// sdk.CheckerMetricsReporter.
//
// Findings-based counters are re-derived from the raw observation using
// default option values, since the rule set (and hence severity counts) is
// no longer serialized with the observation.
func (p *authoritativeConsistencyProvider) ExtractMetrics(ctx sdk.ReportContext, collectedAt time.Time) ([]sdk.CheckMetric, error) {
var data ObservationData
if err := json.Unmarshal(ctx.Data(), &data); err != nil {
return nil, fmt.Errorf("checker: decoding observation: %w", err)
}
var out []sdk.CheckMetric
// One "reachability" pair per NS, easy to alert on individual servers.
for name, r := range data.Results {
labels := map[string]string{"zone": data.Zone, "ns": name}
up := float64(0)
if r.UDPReachable && r.Authoritative {
up = 1
}
out = append(out, sdk.CheckMetric{
Name: "authoritative_consistency_ns_up",
Value: up,
Labels: labels,
Timestamp: collectedAt,
})
tcp := float64(0)
if r.TCPReachable {
tcp = 1
}
out = append(out, sdk.CheckMetric{
Name: "authoritative_consistency_ns_tcp",
Value: tcp,
Labels: labels,
Timestamp: collectedAt,
})
if r.LatencyMs > 0 {
out = append(out, sdk.CheckMetric{
Name: "authoritative_consistency_ns_latency_ms",
Value: float64(r.LatencyMs),
Unit: "ms",
Labels: labels,
Timestamp: collectedAt,
})
}
if r.Serial > 0 {
out = append(out, sdk.CheckMetric{
Name: "authoritative_consistency_ns_serial",
Value: float64(r.Serial),
Labels: labels,
Timestamp: collectedAt,
})
}
}
// Zone-wide metrics.
uniqueSerials := map[uint32]struct{}{}
for _, r := range data.Results {
if r.Serial != 0 {
uniqueSerials[r.Serial] = struct{}{}
}
}
out = append(out, sdk.CheckMetric{
Name: "authoritative_consistency_unique_serials",
Value: float64(len(uniqueSerials)),
Labels: map[string]string{"zone": data.Zone},
Timestamp: collectedAt,
})
// Severity counters sourced from ctx.States(). When the host does not
// provide states (e.g. a metrics-only render path), the counters are
// simply emitted as zero; we do NOT re-derive from the raw data.
states := ctx.States()
if states != nil {
var crit, warn, info int
for _, st := range states {
switch st.Status {
case sdk.StatusCrit:
crit++
case sdk.StatusWarn:
warn++
case sdk.StatusInfo:
info++
}
}
for _, x := range []struct {
name string
val int
}{
{"authoritative_consistency_findings_critical", crit},
{"authoritative_consistency_findings_warning", warn},
{"authoritative_consistency_findings_info", info},
} {
out = append(out, sdk.CheckMetric{
Name: x.name,
Value: float64(x.val),
Labels: map[string]string{"zone": data.Zone},
Timestamp: collectedAt,
})
}
}
return out, nil
}

16
checker/provider.go Normal file
View file

@ -0,0 +1,16 @@
package checker
import (
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Provider returns a new authoritative-consistency observation provider.
func Provider() sdk.ObservationProvider {
return &authoritativeConsistencyProvider{}
}
type authoritativeConsistencyProvider struct{}
func (p *authoritativeConsistencyProvider) Key() sdk.ObservationKey {
return ObservationKey
}

567
checker/report.go Normal file
View file

@ -0,0 +1,567 @@
package checker
import (
"encoding/json"
"fmt"
"html/template"
"sort"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Hint maps a finding code to a short, actionable remediation note shown in
// the HTML report. The text is deliberately concrete: the user should know
// what to do next without leaving the page.
var remediationHints = map[string]string{
CodeSerialDrift: "Some authoritative servers are lagging behind. On the hidden primary, trigger a NOTIFY (rndc notify / nsd-control notify / knsc zone-reload); if that doesn't help, check that the primary is reachable from the secondaries (port 53 TCP for AXFR/IXFR) and that their zone file isn't frozen.",
CodeSerialStaleVsSaved: "You edited the zone in happyDomain but the changes have not been pushed to your DNS provider yet. Open the zone and click \"Apply changes\": the provider's API will receive the new serial and propagate it.",
CodeSerialAheadOfSaved: "The zone was modified outside happyDomain. Re-import the zone from the provider so happyDomain's view is up to date.",
CodeNSUnreachable: "This server did not answer any query. Check that the host is up and that UDP/TCP 53 is not filtered by a firewall.",
CodeNSUDPFailed: "UDP/53 is filtered or the server is down. Verify the service, firewall and any upstream load balancer. A DNS server that cannot be reached over UDP is effectively offline.",
CodeNSTCPFailed: "TCP/53 is required by RFC 7766 and by DNSSEC: truncated UDP answers fall back to TCP. Check your firewall and any middleboxes (many consumer firewalls block TCP/53 by default).",
CodeNSUnresolvable: "This NS hostname has no A or AAAA record. Add glue at the registrar if it is in-bailiwick, or point it to a resolvable hostname otherwise.",
CodeLame: "This server answers but is not authoritative for the zone; it has no copy of the zone file. Either configure the zone on it, or remove it from the NS RRset to stop resolvers from wasting queries on it.",
CodeNoSOA: "The server claims authority but does not return a SOA record. Check the zone is fully loaded (no parse error in the zone file, no uncommitted transaction).",
CodeNSRRsetDrift: "The NS RRset differs between authoritative servers. Force a zone transfer from the primary to the lagging server(s), or align the NS records manually.",
CodeNSRRsetMismatchConfig: "The NS records served by the zone do not match what you configured in happyDomain. Either update the service to match reality, or push the declared NS list to your DNS provider.",
CodeParentDrift: "The NS RRset at the parent zone (your registrar) does not match the NS declared here. Log into your registrar and reconcile the delegation.",
CodeParentQueryFailed: "The parent delegation could not be resolved. The cross-check with the parent is skipped for this run; verify the zone name and that its parent is reachable.",
CodeSOAFieldsDrift: "The SOA RDATA (MNAME, RNAME, TTL fields) differs between authoritative servers. This usually means a secondary still serves an old zone file. Force a fresh AXFR.",
CodeSlowNS: "This server answers slowly. It still works, but users on distant networks will see sluggish resolution. Consider an anycast upgrade or moving the server closer to your audience.",
CodeEDNSUnsupported: "This server does not correctly handle EDNS0 (RFC 6891). DNSSEC validation and large answers will fail. Upgrade the DNS software or, on a firewall, allow DNS packets larger than 512 bytes and the OPT record.",
CodeTooFewNS: "A zone with a single NS is fragile. RFC 1034 recommends at least two, ideally on separate networks.",
CodeNoNS: "No authoritative servers were discovered. The zone cannot be served in its current state.",
}
type reportNS struct {
Name string
Addresses string
UDP bool
TCP bool
AA bool
Serial uint32
Latency int64
EDNS bool
BadUDP bool
BadTCP bool
BadAA bool
BadEDNS bool
Errors []string
}
type reportFinding struct {
Code string
Severity string
Message string
Server string
Hint string
Class string // CSS class
}
type reportSerialGroup struct {
Serial uint32
Servers []string
Majority bool
}
type reportData struct {
Zone string
HasSOA bool
DeclaredSerial uint32
DeclaredNS []string
ParentNS []string
ParentError string
Headline string
HeadlineClass string
HeadlineHint string
Totals map[string]int
NS []reportNS
SerialGroups []reportSerialGroup
ShowSerialTable bool
Findings []reportFinding
}
var htmlTemplate = template.Must(
template.New("authoritative-consistency").
Funcs(template.FuncMap{
"join": func(s []string) string { return strings.Join(s, ", ") },
"boolBadge": func(ok bool) template.HTML {
if ok {
return template.HTML(`<span class="pill pill-ok">OK</span>`)
}
return template.HTML(`<span class="pill pill-bad">KO</span>`)
},
"naBadge": func(ok bool, relevant bool) template.HTML {
if !relevant {
return template.HTML(`<span class="pill pill-na">—</span>`)
}
if ok {
return template.HTML(`<span class="pill pill-ok">OK</span>`)
}
return template.HTML(`<span class="pill pill-bad">KO</span>`)
},
}).
Parse(`<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Authoritative consistency: {{.Zone}}</title>
<style>
*, *::before, *::after { box-sizing: border-box; }
:root {
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
font-size: 14px;
line-height: 1.5;
color: #1f2937;
background: #f3f4f6;
}
body { margin: 0; padding: 1rem; }
code { font-family: ui-monospace, monospace; font-size: .9em; }
.card {
background: #fff;
border-radius: 10px;
padding: 1rem 1.25rem;
margin-bottom: .75rem;
box-shadow: 0 1px 3px rgba(0,0,0,.08);
}
.card h2 {
font-size: .95rem;
font-weight: 700;
margin: 0 0 .6rem;
color: #374151;
letter-spacing: .02em;
text-transform: uppercase;
}
.headline {
border-left: 4px solid #9ca3af;
padding-left: .8rem;
}
.headline h1 { margin: 0 0 .25rem; font-size: 1.15rem; font-weight: 700; }
.headline .sub { color: #6b7280; font-size: .85rem; }
.headline .hint { margin-top: .5rem; font-size: .9rem; color: #374151; }
.headline-crit { border-left-color: #dc2626; }
.headline-warn { border-left-color: #d97706; }
.headline-info { border-left-color: #2563eb; }
.headline-ok { border-left-color: #16a34a; }
.totals { display: flex; gap: .35rem; flex-wrap: wrap; margin-top: .5rem; }
.badge {
display: inline-flex; align-items: center;
padding: .18em .55em;
border-radius: 9999px;
font-size: .72rem; font-weight: 700;
letter-spacing: .02em; white-space: nowrap;
}
.badge-crit { background: #fee2e2; color: #991b1b; }
.badge-warn { background: #fef3c7; color: #92400e; }
.badge-info { background: #dbeafe; color: #1e40af; }
.badge-ok { background: #dcfce7; color: #166534; }
table { width: 100%; border-collapse: collapse; font-size: .88rem; }
th, td {
padding: .45rem .6rem;
border-bottom: 1px solid #f3f4f6;
text-align: left;
vertical-align: top;
}
th { color: #6b7280; font-weight: 600; font-size: .75rem; text-transform: uppercase; letter-spacing: .03em; }
tr:last-child td { border-bottom: none; }
.ns-name { font-weight: 600; }
.ns-addrs { color: #6b7280; font-size: .75rem; }
.pill {
display: inline-block; padding: .1em .5em;
border-radius: 9999px;
font-size: .72rem; font-weight: 700;
}
.pill-ok { background: #dcfce7; color: #166534; }
.pill-bad { background: #fee2e2; color: #991b1b; }
.pill-na { background: #f3f4f6; color: #6b7280; }
.serial-row td { padding-top: .3rem; padding-bottom: .3rem; }
.serial-majority { font-weight: 700; color: #166534; }
.serial-lag { color: #991b1b; }
.serial-ahead { color: #1e40af; }
.finding {
border-left: 4px solid #d1d5db;
padding: .5rem .8rem;
margin-bottom: .5rem;
background: #fafafa;
border-radius: 4px;
}
.finding-crit { border-left-color: #dc2626; background: #fef2f2; }
.finding-warn { border-left-color: #d97706; background: #fffbeb; }
.finding-info { border-left-color: #2563eb; background: #eff6ff; }
.finding .title { font-weight: 600; margin-bottom: .2rem; }
.finding .server { font-size: .78rem; color: #6b7280; }
.finding .hint { margin-top: .4rem; font-size: .85rem; color: #374151; }
.small { color: #6b7280; font-size: .82rem; }
.muted { color: #9ca3af; }
</style>
</head>
<body>
<div class="card headline headline-{{.HeadlineClass}}">
<h1>{{.Headline}}</h1>
<div class="sub"><code>{{.Zone}}</code>{{if .HasSOA}}, saved SOA serial <code>{{.DeclaredSerial}}</code>{{end}}</div>
<div class="totals">
{{- range $lvl, $n := .Totals}}{{if $n}}
<span class="badge badge-{{$lvl}}">{{$lvl}}&nbsp;{{$n}}</span>
{{end}}{{end}}
</div>
{{if .HeadlineHint}}<div class="hint">{{.HeadlineHint}}</div>{{end}}
</div>
{{if .ShowSerialTable}}
<div class="card">
<h2>Serial consistency</h2>
<table>
<thead>
<tr><th>SOA serial</th><th>Servers</th></tr>
</thead>
<tbody>
{{- range .SerialGroups}}
<tr class="serial-row">
<td>
<code>{{.Serial}}</code>
{{if .Majority}}<span class="serial-majority"> consensus</span>{{end}}
</td>
<td>{{join .Servers}}</td>
</tr>
{{- end}}
</tbody>
</table>
{{if .DeclaredSerial}}<div class="small" style="margin-top:.5rem">Saved in happyDomain: <code>{{.DeclaredSerial}}</code></div>{{end}}
</div>
{{end}}
<div class="card">
<h2>Per-server probe</h2>
<table>
<thead>
<tr>
<th>Name server</th>
<th>UDP/53</th>
<th>TCP/53</th>
<th>Authoritative</th>
<th>Serial</th>
<th>Latency</th>
<th>EDNS0</th>
</tr>
</thead>
<tbody>
{{- range .NS}}
<tr>
<td>
<div class="ns-name">{{.Name}}</div>
{{if .Addresses}}<div class="ns-addrs">{{.Addresses}}</div>{{end}}
{{- range .Errors}}<div class="ns-addrs"> {{.}}</div>{{end}}
</td>
<td>{{boolBadge .UDP}}</td>
<td>{{boolBadge .TCP}}</td>
<td>{{boolBadge .AA}}</td>
<td>{{if .Serial}}<code>{{.Serial}}</code>{{else}}<span class="muted"></span>{{end}}</td>
<td>{{if .Latency}}{{.Latency}} ms{{else}}<span class="muted"></span>{{end}}</td>
<td>{{naBadge .EDNS .UDP}}</td>
</tr>
{{- end}}
</tbody>
</table>
</div>
{{if .DeclaredNS}}
<div class="card">
<h2>Declared vs observed NS</h2>
<table>
<tbody>
<tr><td class="small">Declared in service</td><td><code>{{join .DeclaredNS}}</code></td></tr>
{{if .ParentNS}}<tr><td class="small">Parent delegation</td><td><code>{{join .ParentNS}}</code></td></tr>{{end}}
{{if .ParentError}}<tr><td class="small">Parent query</td><td class="serial-lag">{{.ParentError}}</td></tr>{{end}}
</tbody>
</table>
</div>
{{end}}
<div class="card">
<h2>Findings</h2>
{{if .Findings}}
{{- range .Findings}}
<div class="finding finding-{{.Class}}">
<div class="title">
<span class="badge badge-{{.Class}}">{{.Severity}}</span>
{{.Message}}
</div>
{{if .Server}}<div class="server">on <code>{{.Server}}</code></div>{{end}}
{{if .Hint}}<div class="hint">💡 {{.Hint}}</div>{{end}}
</div>
{{- end}}
{{else}}
<div class="small">No issue detected. Every authoritative server agrees on the zone.</div>
{{end}}
</div>
</body>
</html>`),
)
// GetHTMLReport implements sdk.CheckerHTMLReporter.
func (p *authoritativeConsistencyProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) {
var data ObservationData
if err := json.Unmarshal(ctx.Data(), &data); err != nil {
return "", fmt.Errorf("checker: unmarshal observation: %w", err)
}
rd := reportData{
Zone: data.Zone,
HasSOA: data.HasSOA,
DeclaredSerial: data.DeclaredSerial,
DeclaredNS: data.DeclaredNS,
ParentNS: data.ParentNS,
ParentError: data.ParentQueryError,
Totals: map[string]int{"crit": 0, "warn": 0, "info": 0},
}
// Per-NS rows: preserve probed order.
for _, name := range data.Probed {
r := data.Results[name]
if r == nil {
rd.NS = append(rd.NS, reportNS{Name: name, Errors: []string{"no probe result"}})
continue
}
rd.NS = append(rd.NS, reportNS{
Name: name,
Addresses: strings.Join(r.Addresses, ", "),
UDP: r.UDPReachable,
TCP: r.TCPReachable,
AA: r.Authoritative,
Serial: r.Serial,
Latency: r.LatencyMs,
EDNS: r.EDNSSupported,
Errors: r.Errors,
})
}
// Serial groups: only meaningful when SOA is declared.
if data.HasSOA {
groups := map[uint32][]string{}
for _, name := range data.Probed {
r := data.Results[name]
if r == nil || !r.Authoritative || r.SOA == nil {
continue
}
groups[r.Serial] = append(groups[r.Serial], name)
}
if len(groups) > 0 {
rd.ShowSerialTable = len(groups) > 1 || data.DeclaredSerial != 0
serials := make([]uint32, 0, len(groups))
for s := range groups {
serials = append(serials, s)
}
sort.Slice(serials, func(i, j int) bool { return len(groups[serials[i]]) > len(groups[serials[j]]) })
majority := serials[0]
for _, s := range serials {
srv := groups[s]
sort.Strings(srv)
rd.SerialGroups = append(rd.SerialGroups, reportSerialGroup{
Serial: s,
Servers: srv,
Majority: s == majority && len(groups) > 1,
})
}
}
}
// Findings: read exclusively from ctx.States(). When the host did
// not pass any states (e.g. a data-only render), the Findings section
// stays empty and the report shows just the raw per-NS observations.
states := ctx.States()
for _, st := range states {
sev := statusToSeverity(st.Status)
if sev == "" {
// Skip non-issue states (OK / Unknown); the findings
// section only lists problems.
continue
}
rf := reportFinding{
Code: st.Code,
Severity: strings.ToUpper(string(sev)),
Message: st.Message,
Server: st.Subject,
}
if st.Meta != nil {
if fix, ok := st.Meta["fix"].(string); ok {
rf.Hint = fix
}
}
switch sev {
case SeverityCrit:
rf.Class = "crit"
rd.Totals["crit"]++
case SeverityWarn:
rf.Class = "warn"
rd.Totals["warn"]++
case SeverityInfo:
rf.Class = "info"
rd.Totals["info"]++
}
rd.Findings = append(rd.Findings, rf)
}
// Headline: summarise the most severe issue so the user sees the
// remediation hint up top, without having to scroll through every
// finding. When states are absent we render a neutral headline and
// skip the hint: the page becomes a data-only view of the raw per-NS
// observations.
if len(states) == 0 {
rd.Headline = fmt.Sprintf("Raw authoritative-consistency observation for %s", data.Zone)
rd.HeadlineClass = "info"
rd.HeadlineHint = ""
} else {
rd.Headline, rd.HeadlineClass, rd.HeadlineHint = headlineFromStates(&data, states, rd.Findings)
}
var buf strings.Builder
if err := htmlTemplate.Execute(&buf, rd); err != nil {
return "", fmt.Errorf("checker: rendering HTML: %w", err)
}
return buf.String(), nil
}
// statusToSeverity maps an SDK status to the local Severity used to render
// findings. Returns "" for statuses that do not represent a user-visible
// issue (OK, Unknown).
func statusToSeverity(s sdk.Status) Severity {
switch s {
case sdk.StatusCrit:
return SeverityCrit
case sdk.StatusWarn:
return SeverityWarn
case sdk.StatusInfo:
return SeverityInfo
}
return ""
}
// headlineFromStates picks the most relevant finding to surface at the top
// of the report, using the rule-emitted states as the source of truth.
// Priority: serial drift → stale-vs-saved → lame/unreachable → NS drift →
// parent drift → everything else → all good.
func headlineFromStates(data *ObservationData, states []sdk.CheckState, renderedFindings []reportFinding) (title, class, hint string) {
codesPresent := map[string]bool{}
for _, st := range states {
if statusToSeverity(st.Status) == "" {
continue
}
codesPresent[st.Code] = true
}
priorities := []string{
CodeNSUDPFailed,
CodeNSUnreachable,
CodeLame,
CodeSerialDrift,
CodeSerialStaleVsSaved,
CodeNSRRsetDrift,
CodeNSRRsetMismatchConfig,
CodeParentDrift,
CodeSOAFieldsDrift,
CodeNSTCPFailed,
CodeEDNSUnsupported,
CodeSerialAheadOfSaved,
CodeSlowNS,
}
for _, code := range priorities {
if codesPresent[code] {
return headlineCopyFor(code, data)
}
}
if len(renderedFindings) == 0 {
if data.HasSOA {
return "Zone is propagated consistently on every name server", "ok", fmt.Sprintf("Serial %d is served identically by all %d probed servers.", mostCommonSerial(data), len(data.Probed))
}
return "Every declared name server is reachable and authoritative", "ok", ""
}
return fmt.Sprintf("%d issue(s) detected", len(renderedFindings)), "warn", "See the findings list below for details."
}
func headlineCopyFor(code string, data *ObservationData) (title, class, hint string) {
class = "warn"
switch code {
case CodeNSUDPFailed, CodeNSUnreachable:
return "One or more name servers are unreachable",
"crit",
remediationHints[code]
case CodeLame:
return "Lame delegation detected",
"crit",
remediationHints[CodeLame]
case CodeSerialDrift:
return "Zone is not fully propagated: SOA serials disagree",
"crit",
remediationHints[CodeSerialDrift]
case CodeSerialStaleVsSaved:
return "Pending changes have not reached the authoritative servers",
"warn",
remediationHints[CodeSerialStaleVsSaved]
case CodeNSRRsetDrift:
return "NS RRset differs between servers",
"warn",
remediationHints[CodeNSRRsetDrift]
case CodeNSRRsetMismatchConfig:
return "NS RRset served does not match the configured one",
"warn",
remediationHints[CodeNSRRsetMismatchConfig]
case CodeParentDrift:
return "Parent delegation does not match the configured NS list",
"warn",
remediationHints[CodeParentDrift]
case CodeSOAFieldsDrift:
return "SOA fields disagree between servers",
"warn",
remediationHints[CodeSOAFieldsDrift]
case CodeNSTCPFailed:
return "TCP/53 is not answered by every server",
"warn",
remediationHints[CodeNSTCPFailed]
case CodeEDNSUnsupported:
return "EDNS0 is not supported by every server",
"warn",
remediationHints[CodeEDNSUnsupported]
case CodeSerialAheadOfSaved:
return "Live serial is ahead of happyDomain's saved value",
"info",
remediationHints[CodeSerialAheadOfSaved]
case CodeSlowNS:
return "At least one name server responds slowly",
"info",
remediationHints[CodeSlowNS]
}
return "Issues detected", "warn", ""
}
// mostCommonSerial returns the SOA serial served by the largest group of
// authoritative servers. Only meaningful when HasSOA is true.
func mostCommonSerial(data *ObservationData) uint32 {
counts := map[uint32]int{}
for _, r := range data.Results {
if r == nil || !r.Authoritative || r.SOA == nil {
continue
}
counts[r.Serial]++
}
var best uint32
var bestN int
for s, n := range counts {
if n > bestN {
best = s
bestN = n
}
}
return best
}

104
checker/report_test.go Normal file
View file

@ -0,0 +1,104 @@
package checker
import (
"testing"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func TestSeverityToStatus(t *testing.T) {
cases := []struct {
sev Severity
want sdk.Status
}{
{SeverityCrit, sdk.StatusCrit},
{SeverityWarn, sdk.StatusWarn},
{SeverityInfo, sdk.StatusInfo},
{Severity("nonsense"), sdk.StatusOK},
}
for _, c := range cases {
if got := severityToStatus(c.sev); got != c.want {
t.Errorf("severityToStatus(%q) = %v, want %v", c.sev, got, c.want)
}
}
}
func TestStatusToSeverity(t *testing.T) {
cases := []struct {
s sdk.Status
want Severity
}{
{sdk.StatusCrit, SeverityCrit},
{sdk.StatusWarn, SeverityWarn},
{sdk.StatusInfo, SeverityInfo},
{sdk.StatusOK, ""},
{sdk.StatusUnknown, ""},
}
for _, c := range cases {
if got := statusToSeverity(c.s); got != c.want {
t.Errorf("statusToSeverity(%v) = %q, want %q", c.s, got, c.want)
}
}
}
func TestMostCommonSerial(t *testing.T) {
d := &ObservationData{
Results: map[string]*NSResult{
"a.": {Authoritative: true, SOA: mkSOA(10), Serial: 10},
"b.": {Authoritative: true, SOA: mkSOA(10), Serial: 10},
"c.": {Authoritative: true, SOA: mkSOA(11), Serial: 11},
"d.": {Authoritative: false, SOA: mkSOA(99), Serial: 99}, // ignored
"e.": {Authoritative: true, SOA: nil, Serial: 0}, // ignored
"f.": nil, // ignored
},
}
if got := mostCommonSerial(d); got != 10 {
t.Errorf("mostCommonSerial = %d, want 10", got)
}
}
func TestFindingsToStates_AttachesHint(t *testing.T) {
in := []Finding{
{Code: CodeNoNS, Severity: SeverityCrit, Message: "x"},
{Code: "no_such_code", Severity: SeverityWarn, Message: "y"},
}
got := findingsToStates(in)
if len(got) != 2 {
t.Fatalf("want 2 states, got %d", len(got))
}
if got[0].Status != sdk.StatusCrit || got[0].Code != CodeNoNS {
t.Errorf("state[0] = %#v", got[0])
}
if got[0].Meta == nil || got[0].Meta["fix"] == nil {
t.Errorf("state[0] should carry remediation hint, got %#v", got[0].Meta)
}
if got[1].Meta != nil {
t.Errorf("state[1] should have no Meta, got %#v", got[1].Meta)
}
}
func TestHeadlineFromStates_AllGood(t *testing.T) {
d := &ObservationData{HasSOA: true, Probed: []string{"a.", "b."}, Results: map[string]*NSResult{
"a.": {Authoritative: true, SOA: mkSOA(7), Serial: 7},
"b.": {Authoritative: true, SOA: mkSOA(7), Serial: 7},
}}
states := []sdk.CheckState{{Status: sdk.StatusOK, Code: "ok"}}
_, class, _ := headlineFromStates(d, states, nil)
if class != "ok" {
t.Errorf("class = %q, want ok", class)
}
}
func TestHeadlineFromStates_PrioritisesCrit(t *testing.T) {
d := &ObservationData{HasSOA: true}
states := []sdk.CheckState{
{Status: sdk.StatusWarn, Code: CodeSlowNS},
{Status: sdk.StatusCrit, Code: CodeLame},
{Status: sdk.StatusWarn, Code: CodeNSRRsetDrift},
}
rendered := []reportFinding{{Class: "crit"}, {Class: "warn"}, {Class: "warn"}}
title, class, _ := headlineFromStates(d, states, rendered)
if class != "crit" || title == "" {
t.Errorf("expected crit headline, got class=%q title=%q", class, title)
}
}

90
checker/rule.go Normal file
View file

@ -0,0 +1,90 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Rules returns the full list of CheckRules exposed by the authoritative-consistency
// checker. Each rule covers one concern so the UI can list what passed and
// what did not without peeking at a single monolithic code field.
func Rules() []sdk.CheckRule {
return []sdk.CheckRule{
&nsDeclaredRule{},
&parentDelegationRule{},
&nsResolvableRule{},
&nsReachableRule{},
&authoritativeRule{},
&ednsRule{},
&latencyRule{},
&serialConsistencyRule{},
&serialVsSavedRule{},
&soaFieldsConsistencyRule{},
&nsRRsetConsistencyRule{},
}
}
// loadObservation fetches the authoritative-consistency observation. On error, returns
// a CheckState the caller should emit to short-circuit its rule.
func loadObservation(ctx context.Context, obs sdk.ObservationGetter) (*ObservationData, *sdk.CheckState) {
var data ObservationData
if err := obs.Get(ctx, ObservationKey, &data); err != nil {
return nil, &sdk.CheckState{
Status: sdk.StatusError,
Message: fmt.Sprintf("Failed to get observation: %v", err),
Code: "authoritative_consistency_error",
}
}
return &data, nil
}
// findingsToStates converts findings into CheckStates. The per-finding
// remediation hint (if any) is copied into Meta["fix"] so the HTML reporter
// can surface it without re-deriving.
func findingsToStates(findings []Finding) []sdk.CheckState {
out := make([]sdk.CheckState, 0, len(findings))
for _, f := range findings {
st := sdk.CheckState{
Status: severityToStatus(f.Severity),
Message: f.Message,
Code: f.Code,
Subject: f.Server,
}
if hint, ok := remediationHints[f.Code]; ok && hint != "" {
st.Meta = map[string]any{"fix": hint}
}
out = append(out, st)
}
return out
}
func severityToStatus(sev Severity) sdk.Status {
switch sev {
case SeverityCrit:
return sdk.StatusCrit
case SeverityWarn:
return sdk.StatusWarn
case SeverityInfo:
return sdk.StatusInfo
default:
return sdk.StatusOK
}
}
func passState(code, message string) sdk.CheckState {
return sdk.CheckState{
Status: sdk.StatusOK,
Message: message,
Code: code,
}
}
func notTestedState(code, message string) sdk.CheckState {
return sdk.CheckState{
Status: sdk.StatusUnknown,
Message: message,
Code: code,
}
}

View file

@ -0,0 +1,294 @@
package checker
import (
"context"
"fmt"
"sort"
"strings"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// serialConsistencyRule checks that every authoritative NS returns the same
// SOA serial.
type serialConsistencyRule struct{}
func (r *serialConsistencyRule) Name() string { return "authoritative_consistency.serial_consistency" }
func (r *serialConsistencyRule) Description() string {
return "Verifies that every authoritative name server returns the same SOA serial (detects incomplete zone transfer)."
}
func (r *serialConsistencyRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if !data.HasSOA {
return []sdk.CheckState{notTestedState("authoritative_consistency.serial_consistency.skipped", "Zone does not declare a SOA record.")}
}
findings := collectSerialDrift(data)
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.serial_consistency.ok", "Every authoritative name server returns the same SOA serial.")}
}
return findingsToStates(findings)
}
func collectSerialDrift(data *ObservationData) []Finding {
bySerial := map[uint32][]string{}
for _, ns := range data.Probed {
r := data.Results[ns]
if r == nil || !r.Authoritative || r.SOA == nil {
continue
}
bySerial[r.Serial] = append(bySerial[r.Serial], ns)
}
if len(bySerial) < 2 {
return nil
}
var pairs []string
serials := make([]uint32, 0, len(bySerial))
for s := range bySerial {
serials = append(serials, s)
}
sort.Slice(serials, func(i, j int) bool { return serials[i] < serials[j] })
for _, s := range serials {
servers := bySerial[s]
sort.Strings(servers)
pairs = append(pairs, fmt.Sprintf("serial %d: %s", s, strings.Join(servers, ", ")))
}
return []Finding{{
Code: CodeSerialDrift,
Severity: SeverityCrit,
Message: "SOA serial drift between authoritative servers: " + strings.Join(pairs, "; "),
}}
}
// serialVsSavedRule compares live serials with the one saved by happyDomain.
type serialVsSavedRule struct{}
func (r *serialVsSavedRule) Name() string { return "authoritative_consistency.serial_vs_saved" }
func (r *serialVsSavedRule) Description() string {
return "Compares the live SOA serial with the one saved in happyDomain (detects un-pushed edits and out-of-band changes)."
}
func (r *serialVsSavedRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if !data.HasSOA || data.DeclaredSerial == 0 {
return []sdk.CheckState{notTestedState("authoritative_consistency.serial_vs_saved.skipped", "No saved serial to compare against.")}
}
warnOnStale := sdk.GetBoolOption(opts, "warnOnStaleSaved", true)
findings := collectSerialVsSaved(data, warnOnStale)
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.serial_vs_saved.ok", fmt.Sprintf("Live serials match the saved value %d.", data.DeclaredSerial))}
}
return findingsToStates(findings)
}
func collectSerialVsSaved(data *ObservationData, warn bool) []Finding {
saved := data.DeclaredSerial
if saved == 0 {
return nil
}
var below, above []string
for _, ns := range data.Probed {
r := data.Results[ns]
if r == nil || !r.Authoritative || r.SOA == nil {
continue
}
switch {
case serialLess(r.Serial, saved):
below = append(below, ns)
case serialLess(saved, r.Serial):
above = append(above, ns)
}
}
var out []Finding
if len(below) > 0 && warn {
sort.Strings(below)
out = append(out, Finding{
Code: CodeSerialStaleVsSaved,
Severity: SeverityWarn,
Message: fmt.Sprintf(
"saved serial %d is newer than live serial on %s; changes have not propagated yet or have not been applied to the provider",
saved, strings.Join(below, ", "),
),
})
}
if len(above) > 0 {
sort.Strings(above)
out = append(out, Finding{
Code: CodeSerialAheadOfSaved,
Severity: SeverityInfo,
Message: fmt.Sprintf(
"live serial on %s is ahead of the saved serial %d; the zone was modified outside happyDomain",
strings.Join(above, ", "), saved,
),
})
}
return out
}
// soaFieldsConsistencyRule checks that every authoritative NS returns the
// same SOA RDATA (MNAME/RNAME/refresh/retry/expire/minimum).
type soaFieldsConsistencyRule struct{}
func (r *soaFieldsConsistencyRule) Name() string { return "authoritative_consistency.soa_fields_consistency" }
func (r *soaFieldsConsistencyRule) Description() string {
return "Verifies that every authoritative name server returns the same SOA RDATA (MNAME, RNAME, refresh, retry, expire, minimum)."
}
func (r *soaFieldsConsistencyRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if !data.HasSOA {
return []sdk.CheckState{notTestedState("authoritative_consistency.soa_fields_consistency.skipped", "Zone does not declare a SOA record.")}
}
findings := collectSOAFieldsDrift(data)
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.soa_fields_consistency.ok", "Every authoritative name server returns the same SOA RDATA.")}
}
return findingsToStates(findings)
}
func collectSOAFieldsDrift(data *ObservationData) []Finding {
type soaSig struct {
mname, rname string
refresh, retry uint32
expire, minimum, serial uint32
}
groups := map[soaSig][]string{}
sig := func(s *dns.SOA) soaSig {
return soaSig{
mname: strings.ToLower(strings.TrimSuffix(s.Ns, ".")),
rname: strings.ToLower(strings.TrimSuffix(s.Mbox, ".")),
refresh: s.Refresh,
retry: s.Retry,
expire: s.Expire,
minimum: s.Minttl,
serial: s.Serial,
}
}
for _, ns := range data.Probed {
r := data.Results[ns]
if r == nil || r.SOA == nil {
continue
}
k := sig(r.SOA)
k.serial = 0 // serial drift is reported separately; compare RDATA only
groups[k] = append(groups[k], ns)
}
if len(groups) < 2 {
return nil
}
var lines []string
keys := make([]soaSig, 0, len(groups))
for k := range groups {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool { return len(groups[keys[i]]) > len(groups[keys[j]]) })
for _, k := range keys {
srv := groups[k]
sort.Strings(srv)
lines = append(lines, fmt.Sprintf(
"mname=%s rname=%s refresh=%d retry=%d expire=%d minimum=%d → %s",
k.mname, k.rname, k.refresh, k.retry, k.expire, k.minimum, strings.Join(srv, ", "),
))
}
return []Finding{{
Code: CodeSOAFieldsDrift,
Severity: SeverityWarn,
Message: "SOA fields differ between authoritative servers: " + strings.Join(lines, "; "),
}}
}
// nsRRsetConsistencyRule checks NS RRset agreement across authoritative
// servers and compares the consensus with the declared list.
type nsRRsetConsistencyRule struct{}
func (r *nsRRsetConsistencyRule) Name() string { return "authoritative_consistency.ns_rrset_consistency" }
func (r *nsRRsetConsistencyRule) Description() string {
return "Verifies every authoritative name server returns the same NS RRset, and that this RRset matches the NS declared in the service."
}
func (r *nsRRsetConsistencyRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
findings := collectNSRRsetDrift(data)
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.ns_rrset_consistency.ok", "NS RRset is consistent across authoritative servers and matches the declared list.")}
}
return findingsToStates(findings)
}
func collectNSRRsetDrift(data *ObservationData) []Finding {
groups := map[string][]string{}
for _, ns := range data.Probed {
r := data.Results[ns]
if r == nil || !r.Authoritative || len(r.NSRRset) == 0 {
continue
}
k := strings.Join(r.NSRRset, "|")
groups[k] = append(groups[k], ns)
}
if len(groups) == 0 {
return nil
}
var findings []Finding
if len(groups) > 1 {
var lines []string
keys := make([]string, 0, len(groups))
for k := range groups {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool { return len(groups[keys[i]]) > len(groups[keys[j]]) })
for _, k := range keys {
srv := groups[k]
sort.Strings(srv)
lines = append(lines, fmt.Sprintf("NS RRset [%s] → %s", strings.ReplaceAll(k, "|", ", "), strings.Join(srv, ", ")))
}
findings = append(findings, Finding{
Code: CodeNSRRsetDrift,
Severity: SeverityWarn,
Message: "NS RRset differs between authoritative servers: " + strings.Join(lines, "; "),
})
}
if len(data.DeclaredNS) == 0 {
return findings
}
var majority []string
var majorityCount int
for k, servers := range groups {
if len(servers) > majorityCount {
majority = strings.Split(k, "|")
majorityCount = len(servers)
}
}
if len(majority) == 0 {
return findings
}
missing, extra := diffStringSets(data.DeclaredNS, majority)
if len(missing) > 0 || len(extra) > 0 {
findings = append(findings, Finding{
Code: CodeNSRRsetMismatchConfig,
Severity: SeverityWarn,
Message: fmt.Sprintf(
"NS RRset served by authoritative servers does not match declared service: missing=%v extra=%v",
missing, extra,
),
})
}
return findings
}

View file

@ -0,0 +1,219 @@
package checker
import (
"strings"
"testing"
"github.com/miekg/dns"
)
func mkSOA(serial uint32) *dns.SOA {
return &dns.SOA{
Hdr: dns.RR_Header{Name: "example.com.", Rrtype: dns.TypeSOA},
Ns: "ns1.example.com.",
Mbox: "hostmaster.example.com.",
Serial: serial,
Refresh: 3600,
Retry: 600,
Expire: 86400,
Minttl: 300,
}
}
func TestCollectSerialDrift_NoDrift(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1.example.com.", "ns2.example.com."},
Results: map[string]*NSResult{
"ns1.example.com.": {Authoritative: true, SOA: mkSOA(10), Serial: 10},
"ns2.example.com.": {Authoritative: true, SOA: mkSOA(10), Serial: 10},
},
}
if got := collectSerialDrift(d); len(got) != 0 {
t.Errorf("expected no findings, got %v", got)
}
}
func TestCollectSerialDrift_Drift(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1.example.com.", "ns2.example.com.", "ns3.example.com."},
Results: map[string]*NSResult{
"ns1.example.com.": {Authoritative: true, SOA: mkSOA(10), Serial: 10},
"ns2.example.com.": {Authoritative: true, SOA: mkSOA(11), Serial: 11},
"ns3.example.com.": {Authoritative: false, SOA: mkSOA(99), Serial: 99}, // ignored
},
}
got := collectSerialDrift(d)
if len(got) != 1 || got[0].Code != CodeSerialDrift || got[0].Severity != SeverityCrit {
t.Fatalf("unexpected findings: %#v", got)
}
if !strings.Contains(got[0].Message, "serial 10") || !strings.Contains(got[0].Message, "serial 11") {
t.Errorf("message missing serials: %q", got[0].Message)
}
if strings.Contains(got[0].Message, "99") {
t.Errorf("non-authoritative server should not appear: %q", got[0].Message)
}
}
func TestCollectSerialVsSaved(t *testing.T) {
tests := []struct {
name string
saved uint32
nsSerials map[string]uint32
warn bool
wantCodes []string
wantSeverity []Severity
}{
{
name: "matches saved",
saved: 50,
nsSerials: map[string]uint32{"ns1.": 50, "ns2.": 50},
warn: true,
},
{
name: "saved newer than live -> stale",
saved: 50,
nsSerials: map[string]uint32{"ns1.": 49, "ns2.": 50},
warn: true,
wantCodes: []string{CodeSerialStaleVsSaved},
wantSeverity: []Severity{SeverityWarn},
},
{
name: "saved newer but warn disabled",
saved: 50,
nsSerials: map[string]uint32{"ns1.": 49},
warn: false,
},
{
name: "live ahead of saved -> info",
saved: 50,
nsSerials: map[string]uint32{"ns1.": 51},
warn: true,
wantCodes: []string{CodeSerialAheadOfSaved},
wantSeverity: []Severity{SeverityInfo},
},
{
name: "mixed",
saved: 50,
nsSerials: map[string]uint32{"ns1.": 49, "ns2.": 51},
warn: true,
wantCodes: []string{CodeSerialStaleVsSaved, CodeSerialAheadOfSaved},
wantSeverity: []Severity{SeverityWarn, SeverityInfo},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
d := &ObservationData{DeclaredSerial: tt.saved, Results: map[string]*NSResult{}}
for ns, s := range tt.nsSerials {
d.Probed = append(d.Probed, ns)
d.Results[ns] = &NSResult{Authoritative: true, SOA: mkSOA(s), Serial: s}
}
got := collectSerialVsSaved(d, tt.warn)
if len(got) != len(tt.wantCodes) {
t.Fatalf("got %d findings, want %d: %#v", len(got), len(tt.wantCodes), got)
}
codes := map[string]Severity{}
for _, f := range got {
codes[f.Code] = f.Severity
}
for i, c := range tt.wantCodes {
if sev, ok := codes[c]; !ok || sev != tt.wantSeverity[i] {
t.Errorf("missing or wrong-severity %s: got %v", c, codes)
}
}
})
}
}
func TestCollectSOAFieldsDrift(t *testing.T) {
soaA := mkSOA(10)
soaB := mkSOA(10)
soaB.Refresh = 9999 // different RDATA
soaC := mkSOA(11) // same RDATA as A but different serial; should NOT trigger this rule
d := &ObservationData{
Probed: []string{"ns1.", "ns2.", "ns3."},
Results: map[string]*NSResult{
"ns1.": {SOA: soaA},
"ns2.": {SOA: soaB},
"ns3.": {SOA: soaC},
},
}
got := collectSOAFieldsDrift(d)
if len(got) != 1 || got[0].Code != CodeSOAFieldsDrift {
t.Fatalf("expected one SOAFieldsDrift finding, got %#v", got)
}
// Two distinct RDATA buckets (A+C grouped, B alone).
if !strings.Contains(got[0].Message, "refresh=3600") || !strings.Contains(got[0].Message, "refresh=9999") {
t.Errorf("message missing refresh values: %q", got[0].Message)
}
}
func TestCollectSOAFieldsDrift_NoDriftWhenOnlySerialDiffers(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1.", "ns2."},
Results: map[string]*NSResult{
"ns1.": {SOA: mkSOA(10)},
"ns2.": {SOA: mkSOA(11)},
},
}
if got := collectSOAFieldsDrift(d); len(got) != 0 {
t.Errorf("serial-only difference should not be flagged here: %v", got)
}
}
func TestCollectNSRRsetDrift_Consistent(t *testing.T) {
rrset := []string{"ns1.example.com.", "ns2.example.com."}
d := &ObservationData{
Probed: []string{"ns1.example.com.", "ns2.example.com."},
DeclaredNS: rrset,
Results: map[string]*NSResult{
"ns1.example.com.": {Authoritative: true, NSRRset: rrset},
"ns2.example.com.": {Authoritative: true, NSRRset: rrset},
},
}
if got := collectNSRRsetDrift(d); len(got) != 0 {
t.Errorf("expected no findings, got %v", got)
}
}
func TestCollectNSRRsetDrift_Drift(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1.example.com.", "ns2.example.com."},
DeclaredNS: []string{"ns1.example.com.", "ns2.example.com."},
Results: map[string]*NSResult{
"ns1.example.com.": {Authoritative: true, NSRRset: []string{"ns1.example.com.", "ns2.example.com."}},
"ns2.example.com.": {Authoritative: true, NSRRset: []string{"ns1.example.com."}},
},
}
got := collectNSRRsetDrift(d)
codes := map[string]bool{}
for _, f := range got {
codes[f.Code] = true
}
if !codes[CodeNSRRsetDrift] {
t.Errorf("expected NSRRsetDrift, got %v", codes)
}
}
func TestCollectNSRRsetDrift_MismatchConfig(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1.example.com."},
DeclaredNS: []string{"ns1.example.com.", "ns2.example.com."},
Results: map[string]*NSResult{
"ns1.example.com.": {Authoritative: true, NSRRset: []string{"ns1.example.com.", "ns3.example.com."}},
},
}
got := collectNSRRsetDrift(d)
var found bool
for _, f := range got {
if f.Code == CodeNSRRsetMismatchConfig {
found = true
if !strings.Contains(f.Message, "ns2.example.com") || !strings.Contains(f.Message, "ns3.example.com") {
t.Errorf("message missing missing/extra entries: %q", f.Message)
}
}
}
if !found {
t.Errorf("expected NSRRsetMismatchConfig in %v", got)
}
}

104
checker/rules_discovery.go Normal file
View file

@ -0,0 +1,104 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// nsDeclaredRule checks that the service declares enough NS records and that
// at least one NS could be probed.
type nsDeclaredRule struct{}
func (r *nsDeclaredRule) Name() string { return "authoritative_consistency.ns_declared" }
func (r *nsDeclaredRule) Description() string {
return "Verifies the service declares at least the recommended number of name servers and that at least one name server could be discovered."
}
func (r *nsDeclaredRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
minNS := sdk.GetIntOption(opts, "minNameServers", 2)
useParentNS := sdk.GetBoolOption(opts, "useParentNS", true)
var findings []Finding
if len(data.DeclaredNS) == 0 && !useParentNS {
findings = append(findings, Finding{
Code: CodeNoNS,
Severity: SeverityCrit,
Message: "no name servers declared in the service and parent cross-check is disabled",
})
}
if len(data.Probed) == 0 {
findings = append(findings, Finding{
Code: CodeNoNS,
Severity: SeverityCrit,
Message: "no authoritative name servers could be discovered (declared list empty and parent query empty)",
})
}
if len(data.DeclaredNS) > 0 && len(data.DeclaredNS) < minNS {
findings = append(findings, Finding{
Code: CodeTooFewNS,
Severity: SeverityWarn,
Message: fmt.Sprintf("only %d name server(s) declared, RFC 1034 recommends at least %d", len(data.DeclaredNS), minNS),
})
}
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.ns_declared.ok", fmt.Sprintf("%d name server(s) declared", len(data.DeclaredNS)))}
}
return findingsToStates(findings)
}
// parentDelegationRule checks that the parent delegation matches what the
// service declares.
type parentDelegationRule struct{}
func (r *parentDelegationRule) Name() string { return "authoritative_consistency.parent_delegation" }
func (r *parentDelegationRule) Description() string {
return "Cross-checks the NS RRset returned by the parent zone's referral with the NS declared in the service."
}
func (r *parentDelegationRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
useParentNS := sdk.GetBoolOption(opts, "useParentNS", true)
if !useParentNS {
return []sdk.CheckState{notTestedState("authoritative_consistency.parent_delegation.skipped", "Parent delegation cross-check disabled by option.")}
}
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var findings []Finding
if data.ParentQueryError != "" {
findings = append(findings, Finding{
Code: CodeParentQueryFailed,
Severity: SeverityWarn,
Message: fmt.Sprintf("parent delegation query failed: %s", data.ParentQueryError),
})
} else if len(data.DeclaredNS) > 0 && len(data.ParentNS) > 0 {
missing, extra := diffStringSets(data.DeclaredNS, data.ParentNS)
if len(missing) > 0 || len(extra) > 0 {
findings = append(findings, Finding{
Code: CodeParentDrift,
Severity: SeverityWarn,
Message: fmt.Sprintf(
"NS RRset at parent does not match declared service: missing=%v extra=%v",
missing, extra,
),
})
}
}
if len(findings) == 0 {
if len(data.ParentNS) == 0 {
return []sdk.CheckState{notTestedState("authoritative_consistency.parent_delegation.skipped", "No parent delegation observed.")}
}
return []sdk.CheckState{passState("authoritative_consistency.parent_delegation.ok", "Parent delegation matches the declared NS list.")}
}
return findingsToStates(findings)
}

View file

@ -0,0 +1,193 @@
package checker
import (
"context"
"encoding/json"
"maps"
"testing"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// stubObs implements the minimal subset of sdk.ObservationGetter the rules use.
type stubObs struct {
data *ObservationData
err error
}
func (s stubObs) Get(_ context.Context, _ sdk.ObservationKey, dst any) error {
if s.err != nil {
return s.err
}
b, err := json.Marshal(s.data)
if err != nil {
return err
}
return json.Unmarshal(b, dst)
}
func (s stubObs) GetRelated(_ context.Context, _ sdk.ObservationKey) ([]sdk.RelatedObservation, error) {
return nil, nil
}
func mkOpts(kv map[string]any) sdk.CheckerOptions {
out := sdk.CheckerOptions{}
maps.Copy(out, kv)
return out
}
func TestNSDeclaredRule(t *testing.T) {
rule := &nsDeclaredRule{}
t.Run("ok with two NS", func(t *testing.T) {
d := &ObservationData{
DeclaredNS: []string{"ns1.example.com.", "ns2.example.com."},
Probed: []string{"ns1.example.com.", "ns2.example.com."},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(nil))
if len(states) != 1 || states[0].Status != sdk.StatusOK {
t.Errorf("expected OK, got %#v", states)
}
})
t.Run("too few NS", func(t *testing.T) {
d := &ObservationData{
DeclaredNS: []string{"ns1.example.com."},
Probed: []string{"ns1.example.com."},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(map[string]any{"minNameServers": 2}))
if len(states) != 1 || states[0].Code != CodeTooFewNS {
t.Errorf("expected TooFewNS, got %#v", states)
}
})
t.Run("no NS at all", func(t *testing.T) {
d := &ObservationData{}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(map[string]any{"useParentNS": false}))
var hasNoNS bool
for _, st := range states {
if st.Code == CodeNoNS {
hasNoNS = true
}
}
if !hasNoNS {
t.Errorf("expected NoNS finding, got %#v", states)
}
})
}
func TestNSReachableRule(t *testing.T) {
rule := &nsReachableRule{}
t.Run("UDP fail is critical", func(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1."},
Results: map[string]*NSResult{
"ns1.": {UDPReachable: false, TCPReachable: false},
},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(nil))
if len(states) != 1 || states[0].Code != CodeNSUDPFailed || states[0].Status != sdk.StatusCrit {
t.Errorf("expected critical UDP fail, got %#v", states)
}
})
t.Run("TCP fail crit when requireTCP", func(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1."},
Results: map[string]*NSResult{
"ns1.": {UDPReachable: true, TCPReachable: false},
},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(map[string]any{"requireTCP": true}))
if len(states) != 1 || states[0].Code != CodeNSTCPFailed || states[0].Status != sdk.StatusCrit {
t.Errorf("got %#v", states)
}
})
t.Run("TCP fail warn when not required", func(t *testing.T) {
d := &ObservationData{
Probed: []string{"ns1."},
Results: map[string]*NSResult{
"ns1.": {UDPReachable: true, TCPReachable: false},
},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(map[string]any{"requireTCP": false}))
if len(states) != 1 || states[0].Status != sdk.StatusWarn {
t.Errorf("got %#v", states)
}
})
}
func TestAuthoritativeRule_Lame(t *testing.T) {
rule := &authoritativeRule{}
d := &ObservationData{
Zone: "example.com.",
HasSOA: true,
Probed: []string{"ns1."},
Results: map[string]*NSResult{
"ns1.": {UDPReachable: true, Authoritative: false},
},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(nil))
if len(states) != 1 || states[0].Code != CodeLame {
t.Errorf("expected lame finding, got %#v", states)
}
}
func TestLatencyRule(t *testing.T) {
rule := &latencyRule{}
d := &ObservationData{
Probed: []string{"fast.", "slow."},
Results: map[string]*NSResult{
"fast.": {UDPReachable: true, LatencyMs: 50},
"slow.": {UDPReachable: true, LatencyMs: 1000},
},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(map[string]any{"latencyThresholdMs": 500}))
if len(states) != 1 || states[0].Code != CodeSlowNS || states[0].Subject != "slow." {
t.Errorf("expected single slow finding for slow., got %#v", states)
}
}
func TestParentDelegationRule_Drift(t *testing.T) {
rule := &parentDelegationRule{}
d := &ObservationData{
DeclaredNS: []string{"ns1.example.com.", "ns2.example.com."},
ParentNS: []string{"ns1.example.com.", "ns3.example.com."},
}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(nil))
if len(states) != 1 || states[0].Code != CodeParentDrift {
t.Errorf("expected ParentDrift, got %#v", states)
}
}
func TestParentDelegationRule_QueryFailed(t *testing.T) {
rule := &parentDelegationRule{}
d := &ObservationData{ParentQueryError: "boom"}
states := rule.Evaluate(context.Background(), stubObs{data: d}, mkOpts(nil))
if len(states) != 1 || states[0].Code != CodeParentQueryFailed {
t.Errorf("expected ParentQueryFailed, got %#v", states)
}
}
func TestRulesRegistry(t *testing.T) {
rules := Rules()
if len(rules) == 0 {
t.Fatal("Rules() returned empty list")
}
seen := map[string]bool{}
for _, r := range rules {
name := r.Name()
if name == "" {
t.Error("rule with empty name")
}
if seen[name] {
t.Errorf("duplicate rule name: %s", name)
}
seen[name] = true
if r.Description() == "" {
t.Errorf("rule %s has empty description", name)
}
}
}

216
checker/rules_per_ns.go Normal file
View file

@ -0,0 +1,216 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// nsResolvableRule checks that every probed NS hostname resolves to at
// least one address.
type nsResolvableRule struct{}
func (r *nsResolvableRule) Name() string { return "authoritative_consistency.ns_resolvable" }
func (r *nsResolvableRule) Description() string {
return "Verifies that every authoritative name server hostname resolves to at least one A or AAAA address."
}
func (r *nsResolvableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var findings []Finding
for _, ns := range data.Probed {
res := data.Results[ns]
if res == nil {
continue
}
if res.ResolveError != "" {
findings = append(findings, Finding{
Code: CodeNSUnresolvable,
Severity: SeverityCrit,
Message: fmt.Sprintf("cannot resolve %s: %s", ns, res.ResolveError),
Server: ns,
})
}
}
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.ns_resolvable.ok", "Every probed name server resolves to at least one address.")}
}
return findingsToStates(findings)
}
// nsReachableRule checks UDP/53 and TCP/53 reachability for every probed NS.
type nsReachableRule struct{}
func (r *nsReachableRule) Name() string { return "authoritative_consistency.ns_reachable" }
func (r *nsReachableRule) Description() string {
return "Verifies that every authoritative name server answers over UDP/53 and TCP/53."
}
func (r *nsReachableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
requireTCP := sdk.GetBoolOption(opts, "requireTCP", true)
var findings []Finding
for _, ns := range data.Probed {
res := data.Results[ns]
if res == nil || res.ResolveError != "" {
continue
}
if !res.UDPReachable {
findings = append(findings, Finding{
Code: CodeNSUDPFailed,
Severity: SeverityCrit,
Message: fmt.Sprintf("%s did not answer any SOA query over UDP/53", ns),
Server: ns,
})
continue
}
if !res.TCPReachable {
sev := SeverityWarn
msg := fmt.Sprintf("%s did not answer over TCP/53", ns)
if requireTCP {
sev = SeverityCrit
msg = fmt.Sprintf("%s did not answer over TCP/53 (required by RFC 7766 and DNSSEC)", ns)
}
findings = append(findings, Finding{
Code: CodeNSTCPFailed,
Severity: sev,
Message: msg,
Server: ns,
})
}
}
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.ns_reachable.ok", "Every probed name server is reachable over UDP/53 and TCP/53.")}
}
return findingsToStates(findings)
}
// authoritativeRule checks that every reachable NS is authoritative (AA=1)
// and returns a SOA when the zone declares one.
type authoritativeRule struct{}
func (r *authoritativeRule) Name() string { return "authoritative_consistency.authoritative" }
func (r *authoritativeRule) Description() string {
return "Verifies that every reachable name server is authoritative for the zone (no lame delegation) and returns a SOA."
}
func (r *authoritativeRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var findings []Finding
for _, ns := range data.Probed {
res := data.Results[ns]
if res == nil || !res.UDPReachable {
continue
}
if !res.Authoritative {
findings = append(findings, Finding{
Code: CodeLame,
Severity: SeverityCrit,
Message: fmt.Sprintf("%s is not authoritative for %s (lame delegation)", ns, data.Zone),
Server: ns,
})
continue
}
if data.HasSOA && res.SOA == nil {
findings = append(findings, Finding{
Code: CodeNoSOA,
Severity: SeverityCrit,
Message: fmt.Sprintf("%s is authoritative but returned no SOA for %s", ns, data.Zone),
Server: ns,
})
}
}
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.authoritative.ok", "Every reachable name server is authoritative for the zone.")}
}
return findingsToStates(findings)
}
// ednsRule checks EDNS0 support on reachable NSes.
type ednsRule struct{}
func (r *ednsRule) Name() string { return "authoritative_consistency.edns" }
func (r *ednsRule) Description() string {
return "Verifies that every reachable name server correctly handles EDNS0 queries (required by DNSSEC and for large answers)."
}
func (r *ednsRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
if !sdk.GetBoolOption(opts, "checkEDNS", true) {
return []sdk.CheckState{notTestedState("authoritative_consistency.edns.skipped", "EDNS0 check disabled by option.")}
}
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var findings []Finding
for _, ns := range data.Probed {
res := data.Results[ns]
if res == nil || !res.UDPReachable {
continue
}
if !res.EDNSSupported {
findings = append(findings, Finding{
Code: CodeEDNSUnsupported,
Severity: SeverityWarn,
Message: fmt.Sprintf("%s does not correctly handle EDNS0 (breaks DNSSEC and large answers)", ns),
Server: ns,
})
}
}
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.edns.ok", "Every reachable name server handles EDNS0 correctly.")}
}
return findingsToStates(findings)
}
// latencyRule flags NSes slower than the configured threshold.
type latencyRule struct{}
func (r *latencyRule) Name() string { return "authoritative_consistency.latency" }
func (r *latencyRule) Description() string {
return "Flags authoritative name servers whose response latency exceeds the configured threshold."
}
func (r *latencyRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
if !sdk.GetBoolOption(opts, "checkLatency", true) {
return []sdk.CheckState{notTestedState("authoritative_consistency.latency.skipped", "Latency check disabled by option.")}
}
data, errSt := loadObservation(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
threshold := int64(sdk.GetIntOption(opts, "latencyThresholdMs", 500))
var findings []Finding
for _, ns := range data.Probed {
res := data.Results[ns]
if res == nil || !res.UDPReachable {
continue
}
if res.LatencyMs > threshold {
findings = append(findings, Finding{
Code: CodeSlowNS,
Severity: SeverityInfo,
Message: fmt.Sprintf("%s responded in %d ms (above %d ms threshold)", ns, res.LatencyMs, threshold),
Server: ns,
})
}
}
if len(findings) == 0 {
return []sdk.CheckState{passState("authoritative_consistency.latency.ok", "Every reachable name server responded within the configured threshold.")}
}
return findingsToStates(findings)
}

208
checker/types.go Normal file
View file

@ -0,0 +1,208 @@
package checker
import (
"encoding/json"
"fmt"
"github.com/miekg/dns"
)
// maxNSResultErrors caps the per-NS error list so a flaky server with many
// addresses cannot bloat the JSON observation payload. Once the cap is
// reached, further errors are dropped and a single sentinel entry records the
// number of suppressed messages.
const maxNSResultErrors = 16
// ObservationKey is the observation key for observation data.
const ObservationKey = "authoritative-consistency"
// Severity classifies a finding emitted by the authoritative-consistency checker.
type Severity string
const (
SeverityInfo Severity = "info"
SeverityWarn Severity = "warn"
SeverityCrit Severity = "crit"
)
// Finding codes: stable machine-readable identifiers used by the UI to
// localize and link to remediation docs.
const (
CodeSerialDrift = "authoritative_consistency_serial_drift"
CodeSerialStaleVsSaved = "authoritative_consistency_serial_stale_vs_saved"
CodeSerialAheadOfSaved = "authoritative_consistency_serial_ahead_of_saved"
CodeNSUnreachable = "authoritative_consistency_ns_unreachable"
CodeNSUDPFailed = "authoritative_consistency_ns_udp_failed"
CodeNSTCPFailed = "authoritative_consistency_ns_tcp_failed"
CodeNSUnresolvable = "authoritative_consistency_ns_unresolvable"
CodeLame = "authoritative_consistency_lame"
CodeNoSOA = "authoritative_consistency_no_soa"
CodeNSRRsetDrift = "authoritative_consistency_ns_rrset_drift"
CodeNSRRsetMismatchConfig = "authoritative_consistency_ns_rrset_mismatch_config"
CodeParentDrift = "authoritative_consistency_parent_drift"
CodeParentQueryFailed = "authoritative_consistency_parent_query_failed"
CodeSOAFieldsDrift = "authoritative_consistency_soa_fields_drift"
CodeSlowNS = "authoritative_consistency_slow_ns"
CodeEDNSUnsupported = "authoritative_consistency_edns_unsupported"
CodeTooFewNS = "authoritative_consistency_too_few_ns"
CodeNoNS = "authoritative_consistency_no_ns"
)
// Finding describes a single observation produced while running the
// checker testsuite.
type Finding struct {
// Code is a stable machine-readable identifier (e.g.
// "authoritative_consistency_serial_drift").
Code string `json:"code"`
// Severity grades the finding.
Severity Severity `json:"severity"`
// Message is a human-readable explanation.
Message string `json:"message"`
// Server is the name server the finding applies to, when the issue is
// scoped to a specific NS. Empty for zone-wide findings.
Server string `json:"server,omitempty"`
// Addr is the IP:port actually queried when the issue was raised. Useful
// to distinguish IPv4/IPv6 problems on the same NS name.
Addr string `json:"addr,omitempty"`
}
// NSResult is the per-name-server view of the zone, populated during Collect.
// It carries every signal the evaluator needs to decide whether the zone is
// propagated correctly, plus what the UI needs to render an actionable
// report.
type NSResult struct {
// Name is the NS hostname (FQDN, lowercase).
Name string `json:"name"`
// Addresses is the list of A/AAAA addresses tried for this NS.
Addresses []string `json:"addresses,omitempty"`
// ResolveError is set when no address could be resolved for this NS.
ResolveError string `json:"resolve_error,omitempty"`
// UDPReachable is true when the NS answered at least once over UDP/53.
UDPReachable bool `json:"udp_reachable"`
// TCPReachable is true when the NS answered at least once over TCP/53.
TCPReachable bool `json:"tcp_reachable"`
// Authoritative is true when at least one authoritative (AA=1) answer
// was received for the zone.
Authoritative bool `json:"authoritative"`
// Serial is the SOA serial returned by this NS (0 when not reachable or
// the answer does not carry a SOA).
Serial uint32 `json:"serial,omitempty"`
// SOA is the full SOA RR returned by this NS, useful for per-field
// comparison in the report.
SOA *dns.SOA `json:"soa,omitempty"`
// NSRRset is the NS RRset this server returns for the zone (lowercase
// FQDNs).
NSRRset []string `json:"ns_rrset,omitempty"`
// EDNSSupported is true when the NS answered correctly to an EDNS0
// query.
EDNSSupported bool `json:"edns_supported"`
// LatencyMs is the duration (milliseconds) of the SOA query used for
// the reachability test. 0 when not reachable.
LatencyMs int64 `json:"latency_ms,omitempty"`
// Errors collects low-level query errors encountered while probing this
// NS. Exposed to help operators debug network/firewall issues. Capped
// at maxNSResultErrors entries; appendError is the only intended writer.
Errors []string `json:"errors,omitempty"`
// suppressedErrors counts the messages that were dropped after the cap
// was reached. Reflected back into Errors as a sentinel line so the
// operator knows the list is truncated.
suppressedErrors int
}
// appendError records a probe error on the NS result, deduplicating identical
// messages and capping the total to maxNSResultErrors. Suppressed entries are
// summarised in a trailing sentinel.
func (n *NSResult) appendError(format string, args ...any) {
msg := fmt.Sprintf(format, args...)
for _, e := range n.Errors {
if e == msg {
return
}
}
if len(n.Errors) >= maxNSResultErrors {
n.suppressedErrors++
sentinel := fmt.Sprintf("(%d more error(s) suppressed)", n.suppressedErrors)
// Replace the previous sentinel in place when present.
if last := len(n.Errors) - 1; last >= 0 && len(n.Errors[last]) > 0 && n.Errors[last][0] == '(' {
n.Errors[last] = sentinel
return
}
n.Errors = append(n.Errors, sentinel)
return
}
n.Errors = append(n.Errors, msg)
}
// ObservationData is the observation payload stored by the checker. It
// carries every finding emitted by the testsuite plus the raw observed state
// from each authoritative server.
type ObservationData struct {
// Zone is the FQDN of the zone under test.
Zone string `json:"zone"`
// HasSOA indicates whether the service declares a SOA record (Origin
// versus NSOnlyOrigin). Drives which tests run.
HasSOA bool `json:"has_soa"`
// DeclaredSerial is the SOA serial saved in happyDomain for this zone.
// Zero when the service is an NSOnlyOrigin.
DeclaredSerial uint32 `json:"declared_serial,omitempty"`
// DeclaredNS is the list of NS hostnames declared by the service,
// lowercased and FQDN-normalized.
DeclaredNS []string `json:"declared_ns,omitempty"`
// ParentNS is the list of NS hostnames returned by the parent zone's
// referral, when parent discovery is enabled. Empty when the parent
// query is disabled or failed (see ParentQueryError).
ParentNS []string `json:"parent_ns,omitempty"`
// ParentQueryError is set when the parent referral query failed.
ParentQueryError string `json:"parent_query_error,omitempty"`
// Probed is the final list of NS names that were actually probed
// (union of DeclaredNS and ParentNS, de-duplicated).
Probed []string `json:"probed,omitempty"`
// Results holds the per-NS probe results, keyed by NS hostname.
Results map[string]*NSResult `json:"results,omitempty"`
// Findings is the list of issues / observations produced by the run,
// ordered by (severity desc, code asc, server asc).
Findings []Finding `json:"findings"`
}
// originService is the minimal local mirror of happyDomain's
// `services/abstract.Origin` type. It is duplicated on purpose so that this
// checker does not have to import the (heavy) happyDomain server module
// just to decode the service payload. github.com/miekg/dns marshals
// dns.SOA / dns.NS to JSON in the same shape happyDomain uses.
type originService struct {
SOA *dns.SOA `json:"soa,omitempty"`
NameServers []*dns.NS `json:"ns"`
}
// serviceMessage is the minimal local mirror of happyDomain's ServiceMessage
// envelope. We only need the embedded service JSON and the type tag; the
// rest of the meta fields are ignored.
type serviceMessage struct {
Type string `json:"_svctype"`
Domain string `json:"_domain"`
Service json.RawMessage `json:"Service"`
}