Initial commit

This commit is contained in:
nemunaire 2026-04-26 11:49:13 +07:00
commit 2d98ed1b5d
33 changed files with 4644 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
checker-resolver-propagation
checker-resolver-propagation.so

17
Dockerfile Normal file
View file

@ -0,0 +1,17 @@
FROM golang:1.25-alpine AS builder
ARG CHECKER_VERSION=custom-build
WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -tags standalone -ldflags "-X main.Version=${CHECKER_VERSION}" -o /checker-resolver-propagation .
FROM scratch
COPY --from=builder /checker-resolver-propagation /checker-resolver-propagation
USER 65534:65534
EXPOSE 8080
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD ["/checker-resolver-propagation", "-healthcheck"]
ENTRYPOINT ["/checker-resolver-propagation"]

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2026 The happyDomain Authors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

28
Makefile Normal file
View file

@ -0,0 +1,28 @@
CHECKER_NAME := checker-resolver-propagation
CHECKER_IMAGE := happydomain/$(CHECKER_NAME)
CHECKER_VERSION ?= custom-build
CHECKER_SOURCES := main.go $(wildcard checker/*.go)
GO_LDFLAGS := -X main.Version=$(CHECKER_VERSION)
.PHONY: all plugin docker test clean
all: $(CHECKER_NAME)
$(CHECKER_NAME): $(CHECKER_SOURCES)
go build -tags standalone -ldflags "$(GO_LDFLAGS)" -o $@ .
plugin: $(CHECKER_NAME).so
$(CHECKER_NAME).so: $(CHECKER_SOURCES) $(wildcard plugin/*.go)
go build -buildmode=plugin -ldflags "$(GO_LDFLAGS)" -o $@ ./plugin/
docker:
docker build --build-arg CHECKER_VERSION=$(CHECKER_VERSION) -t $(CHECKER_IMAGE) .
test:
go test -tags standalone ./...
clean:
rm -f $(CHECKER_NAME) $(CHECKER_NAME).so

106
README.md Normal file
View file

@ -0,0 +1,106 @@
# checker-resolver-propagation
Worldwide DNS propagation checker for [happyDomain](https://www.happydomain.org/).
Probes a curated catalog of public recursive resolvers (Cloudflare,
Google, Quad9, OpenDNS, Yandex, regional ISPs, …) across multiple
transports (UDP, TCP, DoT, DoH) and regions, then compares their
answers to the zone's authoritative nameservers to detect propagation
gaps, regional splits, SOA serial drift, stale caches, DNSSEC
validation failures, SERVFAIL/NXDOMAIN inconsistencies, and resolver
filtering.
## Usage
### Standalone HTTP server
```bash
# Build and run
make
./checker-resolver-propagation -listen :8080
```
The server exposes:
- `GET /health`: health check
- `POST /collect`: collect propagation observations (happyDomain external checker protocol)
- `POST /evaluate`: run the evaluation rules against an observation
- `POST /report`: extract metrics / HTML report from an observation
### Docker
```bash
make docker
docker run -p 8080:8080 happydomain/checker-resolver-propagation
```
### happyDomain plugin
```bash
make plugin
# produces checker-resolver-propagation.so, loadable by happyDomain as a Go plugin
```
The plugin exposes a `NewCheckerPlugin` symbol returning the checker
definition and observation provider, which happyDomain registers in its
global registries at load time.
### Versioning
The binary, plugin, and Docker image embed a version string overridable
at build time:
```bash
make CHECKER_VERSION=1.2.3
make plugin CHECKER_VERSION=1.2.3
make docker CHECKER_VERSION=1.2.3
```
### happyDomain remote endpoint
Set the `endpoint` admin option for the resolver-propagation checker to
the URL of the running checker-resolver-propagation server (e.g.,
`http://checker-resolver-propagation:8080`). happyDomain will delegate
observation collection to this endpoint.
This checker applies to **service**-level checks and is restricted to
the `abstract.Origin` and `abstract.NSOnlyOrigin` services (the zone
apex / NS configuration).
## Options
| Id | Type | Default | Description |
|-----------------------|--------|-------------------------------|------------------------------------------------------------------------------------------------------------------------|
| `recordTypes` | string | `SOA,NS,A,AAAA,MX,TXT,CAA` | Comma-separated list of RR types to probe at the apex (and at each `subdomains` entry). |
| `subdomains` | string | `www` | Comma-separated list of owner names to probe in addition to the apex (e.g. `www,mail,@`). Empty = apex only. |
| `includeFiltered` | bool | `false` | Probe filtering resolvers (malware/family/adblock). Their answers routinely diverge by design. |
| `region` | string | `all` | Restrict to a region: `all`, `global`, `na`, `eu`, `asia`, `ru`, `me`. |
| `transports` | string | `udp` | Comma-separated transports to probe: `udp`, `tcp`, `dot`, `doh`. Encrypted transports are only used where published. |
| `resolverAllowlist` | string | | Comma-separated resolver IDs or IPs to probe exclusively (e.g. `cloudflare,google,9.9.9.9`). Empty = catalog selection.|
| `latencyThresholdMs` | uint | `500` | Resolvers averaging above this value emit an info finding. |
| `runTimeoutSeconds` | uint | `30` | Hard wall-clock budget for one propagation run. Slower resolvers report as unreachable. |
## Rules
Each rule emits a finding code. Severity can be affected by the options above.
| Code | Default severity | Condition |
|-------------------------------|------------------|-----------|
| `rprop_no_resolvers` | critical | The current option set selects no resolver from the catalog. |
| `rprop_all_resolvers_down` | critical | Every selected resolver failed to answer (likely no DNS connectivity from the checker host). |
| `rprop_resolver_unreachable` | warning | An individual resolver failed to answer within the run budget. |
| `rprop_resolver_high_latency` | info | A resolver's average response time exceeds `latencyThresholdMs`. |
| `rprop_resolver_filtered_hit` | info | A filtered resolver returned a different answer than the consensus (typical blocklist behaviour). Only when `includeFiltered` is enabled. |
| `rprop_partial_propagation` | warning | Public resolvers disagree on the answer for a probed RRset. |
| `rprop_answer_drift` | critical | The public consensus differs from the answer served by the zone's authoritative nameservers. |
| `rprop_unexpected_nxdomain` | critical | Some resolvers return NXDOMAIN while others return NOERROR for the same RRset. |
| `rprop_unexpected_servfail` | critical | A resolver returns SERVFAIL (usually a DNSSEC or reachability failure). |
| `rprop_regional_split` | warning | Every resolver of a region agrees on an answer that differs from the global consensus. |
| `rprop_serial_drift` | warning | Unfiltered resolvers disagree on the SOA serial. |
| `rprop_stale_cache` | info | A resolver still serves an SOA serial below the one last observed by happyDomain. |
| `rprop_dnssec_failure` | critical | A validating resolver fails to validate the zone's DNSSEC chain (returns SERVFAIL with AD/CD semantics). |
| `rprop_dnssec_not_validated` | info | A validating resolver answered without setting AD on a signed zone. |
## License
Licensed under the **MIT License** (see `LICENSE`).

422
checker/collect.go Normal file
View file

@ -0,0 +1,422 @@
package checker
import (
"context"
"encoding/json"
"fmt"
"log"
"net"
"strconv"
"strings"
"sync"
"time"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Collect gathers raw DNS answers from each selected public resolver plus the
// zone's own authoritative ground-truth. It performs no judgement: rules
// derive consensus, drift, splits, latency, and DNSSEC verdicts from the
// observation.
func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
svc, err := loadService(opts)
if err != nil {
return nil, err
}
zone, err := loadZone(opts, svc)
if err != nil {
return nil, err
}
includeFiltered := sdk.GetBoolOption(opts, "includeFiltered", false)
region := getStringOpt(opts, "region", "all")
transportsOpt := getStringOpt(opts, "transports", "udp")
recordTypesOpt := getStringOpt(opts, "recordTypes", "SOA,NS,A,AAAA,MX,TXT,CAA")
subdomainsOpt := getStringOpt(opts, "subdomains", "")
runTimeoutS := sdk.GetIntOption(opts, "runTimeoutSeconds", 30)
allowlistOpt := getStringOpt(opts, "resolverAllowlist", "")
// Parse options.
transports := parseCSV(transportsOpt)
if len(transports) == 0 {
transports = []string{string(TransportUDP)}
}
qtypes := parseQTypes(recordTypesOpt)
if len(qtypes) == 0 {
return nil, fmt.Errorf("no valid record types in %q", recordTypesOpt)
}
extraNames := parseCSV(subdomainsOpt)
allowlist := parseCSV(allowlistOpt)
// Build the list of owner names to probe.
names := []string{dns.Fqdn(zone)}
seenName := map[string]bool{names[0]: true}
for _, sd := range extraNames {
full := joinSubdomain(sd, zone)
if !seenName[full] {
seenName[full] = true
names = append(names, full)
}
}
resolvers := selectedResolvers(includeFiltered, region, allowlist)
data := &ResolverPropagationData{
Zone: dns.Fqdn(zone),
Names: names,
Types: qtypeNames(qtypes),
Resolvers: map[string]*ResolverView{},
RRsets: map[string]*RRsetView{},
}
if svc.SOA != nil {
data.DeclaredSerial = svc.SOA.Serial
}
// If the selection matches no resolvers, simply return the (empty)
// payload. Rules classify "no resolvers matched" as their own concern.
if len(resolvers) == 0 {
data.Stats = computeBasicStats(data)
return data, nil
}
runCtx, cancel := context.WithTimeout(ctx, time.Duration(runTimeoutS)*time.Second)
defer cancel()
started := time.Now()
// Ground truth from the zone's own authoritative servers.
expected := collectExpected(runCtx, zone, svc, names, qtypes)
for _, n := range names {
for _, qt := range qtypes {
key := rrsetKey(n, dns.TypeToString[qt])
v := &RRsetView{
Name: strings.ToLower(dns.Fqdn(n)),
Type: dns.TypeToString[qt],
}
if e, ok := expected[key]; ok {
v.Expected = e.sig
v.ExpectedRecords = e.records
}
data.RRsets[key] = v
}
}
// Fan out probes across resolvers × transports × RRsets.
type probeJob struct {
r Resolver
tr Transport
}
var jobs []probeJob
for _, r := range resolvers {
for _, tname := range transports {
tr := Transport(strings.ToLower(strings.TrimSpace(tname)))
switch tr {
case TransportUDP, TransportTCP:
jobs = append(jobs, probeJob{r: r, tr: tr})
case TransportDoT:
if r.DoTHost != "" {
jobs = append(jobs, probeJob{r: r, tr: tr})
}
case TransportDoH:
if r.DoHURL != "" {
jobs = append(jobs, probeJob{r: r, tr: tr})
}
}
}
}
const maxConcurrent = 32
sem := make(chan struct{}, maxConcurrent)
var wg sync.WaitGroup
var mu sync.Mutex
for _, job := range jobs {
job := job
wg.Add(1)
sem <- struct{}{}
go func() {
defer wg.Done()
defer func() { <-sem }()
rid := job.r.ID
if job.tr != TransportUDP {
rid = fmt.Sprintf("%s|%s", job.r.ID, job.tr)
}
view := &ResolverView{
ID: rid,
Name: job.r.Name,
IP: job.r.IP,
Region: job.r.Region,
Filtered: job.r.Filtered,
Transport: job.tr,
Probes: map[string]*RRProbe{},
}
for _, n := range names {
for _, qt := range qtypes {
probe := runProbe(runCtx, job.r, job.tr, n, qt)
key := rrsetKey(n, dns.TypeToString[qt])
view.Probes[key] = probe
if probe.Error == "" {
view.Reachable = true
}
}
}
mu.Lock()
data.Resolvers[rid] = view
mu.Unlock()
}()
}
wg.Wait()
data.RunDurationMs = time.Since(started).Milliseconds()
data.Stats = computeBasicStats(data)
return data, nil
}
func runProbe(ctx context.Context, r Resolver, tr Transport, name string, qtype uint16) *RRProbe {
p := &RRProbe{Transport: tr}
res, err := queryResolver(ctx, r, tr, name, qtype)
if err != nil {
p.Error = err.Error()
return p
}
p.Rcode = rcodeToString(res.Rcode)
p.AD = res.AD
p.LatencyMs = res.Latency.Milliseconds()
if res.Rcode == dns.RcodeSuccess {
sig, recs, ttl := signatureFromRRs(res.Answer, name, qtype)
p.Signature = sig
p.Records = recs
p.MinTTL = ttl
}
return p
}
type expectedEntry struct {
sig string
records []string
}
func collectExpected(ctx context.Context, zone string, svc *originService, names []string, qtypes []uint16) map[string]*expectedEntry {
out := map[string]*expectedEntry{}
var nsHosts []string
for _, n := range svc.NameServers {
if n == nil {
continue
}
nsHosts = append(nsHosts, strings.ToLower(dns.Fqdn(n.Ns)))
}
if len(nsHosts) == 0 {
var resolver net.Resolver
nss, err := resolver.LookupNS(ctx, strings.TrimSuffix(zone, "."))
if err != nil {
log.Printf("collectExpected: NS lookup failed for %q: %v", zone, err)
return out
}
for _, ns := range nss {
nsHosts = append(nsHosts, strings.ToLower(dns.Fqdn(ns.Host)))
}
}
var resolver net.Resolver
var authAddrs []string
for _, ns := range nsHosts {
addrs, err := resolver.LookupHost(ctx, strings.TrimSuffix(ns, "."))
if err != nil {
continue
}
for _, a := range addrs {
authAddrs = append(authAddrs, net.JoinHostPort(a, "53"))
}
}
if len(authAddrs) == 0 {
return out
}
for _, n := range names {
for _, qt := range qtypes {
key := rrsetKey(n, dns.TypeToString[qt])
if e := queryAuthoritative(ctx, authAddrs, n, qt); e != nil {
out[key] = e
}
}
}
return out
}
func queryAuthoritative(ctx context.Context, servers []string, name string, qtype uint16) *expectedEntry {
q := dns.Question{Name: dns.Fqdn(name), Qtype: qtype, Qclass: dns.ClassINET}
m := new(dns.Msg)
m.Id = dns.Id()
m.Question = []dns.Question{q}
m.RecursionDesired = false
m.SetEdns0(ednsUDPSize, false)
client := dns.Client{Timeout: dnsTimeout}
for _, srv := range servers {
r, _, err := client.ExchangeContext(ctx, m, srv)
if err != nil || r == nil {
continue
}
if !r.Authoritative {
continue
}
if r.Rcode != dns.RcodeSuccess {
return &expectedEntry{}
}
sig, recs, _ := signatureFromRRs(r.Answer, name, qtype)
return &expectedEntry{sig: sig, records: recs}
}
return nil
}
// computeBasicStats returns the raw rollup that Collect can produce without
// judgement: simple counts. "Agreement" (UnfilteredAgreeing) is a derived
// metric computed by deriveView once consensus has been established.
func computeBasicStats(data *ResolverPropagationData) Stats {
s := Stats{TotalResolvers: len(data.Resolvers)}
regions := map[string]bool{}
for _, rv := range data.Resolvers {
if rv.Reachable {
s.ReachableResolvers++
}
if rv.Filtered {
s.FilteredProbed++
} else {
s.UnfilteredProbed++
}
regions[rv.Region] = true
}
s.CountriesCovered = len(regions)
return s
}
func loadService(opts sdk.CheckerOptions) (*originService, error) {
svc, ok := sdk.GetOption[serviceMessage](opts, "service")
if !ok {
// Standalone / interactive use: no service was attached. Fall back
// to an empty payload; collectExpected will look up NS via the
// system resolver.
return &originService{}, nil
}
switch svc.Type {
case "", "abstract.Origin", "abstract.NSOnlyOrigin":
default:
return nil, fmt.Errorf("service is %s, expected abstract.Origin or abstract.NSOnlyOrigin", svc.Type)
}
var d originService
if err := json.Unmarshal(svc.Service, &d); err != nil {
return nil, fmt.Errorf("decoding origin service: %w", err)
}
return &d, nil
}
func loadZone(opts sdk.CheckerOptions, svc *originService) (string, error) {
if v, ok := sdk.GetOption[string](opts, "domain_name"); ok && v != "" {
return dns.Fqdn(v), nil
}
if svc.SOA != nil && svc.SOA.Header().Name != "" {
return dns.Fqdn(svc.SOA.Header().Name), nil
}
return "", fmt.Errorf("no zone name provided (missing 'domain_name' option and SOA header)")
}
func getStringOpt(opts sdk.CheckerOptions, key, dflt string) string {
if v, ok := sdk.GetOption[string](opts, key); ok && v != "" {
return v
}
return dflt
}
func parseCSV(s string) []string {
if s == "" {
return nil
}
parts := strings.Split(s, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" {
out = append(out, p)
}
}
return out
}
func parseQTypes(s string) []uint16 {
seen := map[uint16]bool{}
var out []uint16
for _, t := range parseCSV(s) {
if q, ok := dns.StringToType[strings.ToUpper(t)]; ok && !seen[q] {
seen[q] = true
out = append(out, q)
}
}
return out
}
func qtypeNames(qtypes []uint16) []string {
out := make([]string, len(qtypes))
for i, q := range qtypes {
out[i] = dns.TypeToString[q]
}
return out
}
func joinSubdomain(sd, zone string) string {
sd = strings.TrimSpace(sd)
zone = dns.Fqdn(zone)
if sd == "" || sd == "@" {
return zone
}
if strings.HasSuffix(sd, ".") {
return strings.ToLower(sd)
}
return strings.ToLower(sd + "." + zone)
}
func extractSerial(records []string) uint32 {
if len(records) == 0 {
return 0
}
fields := strings.Fields(records[0])
if len(fields) < 7 {
return 0
}
s, err := strconv.ParseUint(fields[2], 10, 32)
if err != nil {
return 0
}
return uint32(s)
}
// Hardcoded allowlist; only these resolvers' AD bit is trustworthy.
func isValidatingResolver(id string) bool {
switch strings.SplitN(id, "|", 2)[0] {
case "cloudflare", "cloudflare-malware", "cloudflare-family",
"google", "quad9", "quad9-unfiltered",
"adguard", "adguard-unfiltered", "adguard-family",
"cleanbrowsing-family", "cleanbrowsing-adult":
return true
}
return false
}
// firstN returns a short "x, y, z (+N more)" display list.
func firstN(items []string, n int) string {
if len(items) <= n {
return strings.Join(items, ", ")
}
return strings.Join(items[:n], ", ") + fmt.Sprintf(" (+%d more)", len(items)-n)
}

243
checker/collect_test.go Normal file
View file

@ -0,0 +1,243 @@
package checker
import (
"encoding/json"
"reflect"
"sort"
"testing"
"github.com/miekg/dns"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func TestParseCSV(t *testing.T) {
cases := []struct {
in string
want []string
}{
{"", nil},
{"a", []string{"a"}},
{"a,b,c", []string{"a", "b", "c"}},
{" a , ,b ,", []string{"a", "b"}},
{",,,", []string{}},
}
for _, c := range cases {
got := parseCSV(c.in)
if len(got) != len(c.want) {
t.Errorf("parseCSV(%q) len = %d, want %d", c.in, len(got), len(c.want))
continue
}
for i := range got {
if got[i] != c.want[i] {
t.Errorf("parseCSV(%q)[%d] = %q, want %q", c.in, i, got[i], c.want[i])
}
}
}
}
func TestParseQTypes(t *testing.T) {
got := parseQTypes("a,aaaa,MX,TxT,bogus,A") // A duplicated; bogus skipped
want := []uint16{dns.TypeA, dns.TypeAAAA, dns.TypeMX, dns.TypeTXT}
if !reflect.DeepEqual(got, want) {
t.Errorf("parseQTypes = %v, want %v", got, want)
}
if got := parseQTypes(""); got != nil {
t.Errorf("parseQTypes(\"\") = %v, want nil", got)
}
if got := parseQTypes("nope,onlybad"); got != nil {
t.Errorf("parseQTypes(bad) = %v, want nil", got)
}
}
func TestQtypeNames(t *testing.T) {
got := qtypeNames([]uint16{dns.TypeA, dns.TypeMX})
want := []string{"A", "MX"}
if !reflect.DeepEqual(got, want) {
t.Errorf("qtypeNames = %v, want %v", got, want)
}
}
func TestJoinSubdomain(t *testing.T) {
cases := []struct {
sd, zone, want string
}{
{"", "example.com", "example.com."},
{"@", "example.com.", "example.com."},
{"www", "example.com", "www.example.com."},
{"WWW", "Example.Com", "www.example.com."},
{"foo.example.org.", "example.com", "foo.example.org."}, // already FQDN: used as-is
{" www ", "example.com", "www.example.com."},
}
for _, c := range cases {
if got := joinSubdomain(c.sd, c.zone); got != c.want {
t.Errorf("joinSubdomain(%q,%q) = %q, want %q", c.sd, c.zone, got, c.want)
}
}
}
func TestExtractSerial(t *testing.T) {
cases := []struct {
in []string
want uint32
}{
{nil, 0},
{[]string{"ns. hostmaster. 2024010101 7200 3600 1209600 3600"}, 2024010101},
{[]string{"too few fields"}, 0},
{[]string{"ns. hm. notanumber 1 2 3 4"}, 0},
{[]string{"ns. hm. 99999999999999999 1 2 3 4"}, 0}, // overflow uint32
}
for _, c := range cases {
if got := extractSerial(c.in); got != c.want {
t.Errorf("extractSerial(%v) = %d, want %d", c.in, got, c.want)
}
}
}
func TestFirstN(t *testing.T) {
if got := firstN([]string{"a", "b"}, 5); got != "a, b" {
t.Errorf("under: %q", got)
}
if got := firstN([]string{"a", "b", "c", "d"}, 2); got != "a, b (+2 more)" {
t.Errorf("over: %q", got)
}
if got := firstN(nil, 3); got != "" {
t.Errorf("nil: %q", got)
}
}
func TestIsValidatingResolver(t *testing.T) {
for _, id := range []string{"cloudflare", "google", "quad9", "adguard"} {
if !isValidatingResolver(id) {
t.Errorf("%s should validate", id)
}
}
for _, id := range []string{"opendns", "yandex", "ntt-jp", ""} {
if isValidatingResolver(id) {
t.Errorf("%s should NOT validate", id)
}
}
// transport-suffixed IDs (e.g. "cloudflare|tcp") should still match.
if !isValidatingResolver("cloudflare|tcp") {
t.Errorf("transport-suffixed ID should still validate")
}
}
func TestComputeBasicStats(t *testing.T) {
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": {Region: "eu", Reachable: true},
"b": {Region: "eu", Reachable: false, Filtered: true},
"c": {Region: "global", Reachable: true},
"d": {Region: "na", Reachable: true, Filtered: true},
},
}
s := computeBasicStats(data)
if s.TotalResolvers != 4 {
t.Errorf("total = %d", s.TotalResolvers)
}
if s.ReachableResolvers != 3 {
t.Errorf("reachable = %d", s.ReachableResolvers)
}
if s.FilteredProbed != 2 || s.UnfilteredProbed != 2 {
t.Errorf("split filtered=%d unfiltered=%d", s.FilteredProbed, s.UnfilteredProbed)
}
if s.CountriesCovered != 3 {
t.Errorf("regions = %d", s.CountriesCovered)
}
}
func TestGetStringOpt(t *testing.T) {
opts := sdk.CheckerOptions{"a": "x", "b": ""}
if got := getStringOpt(opts, "a", "d"); got != "x" {
t.Errorf("a = %q", got)
}
if got := getStringOpt(opts, "b", "d"); got != "d" {
t.Errorf("b = %q", got)
}
if got := getStringOpt(opts, "missing", "d"); got != "d" {
t.Errorf("missing = %q", got)
}
}
func TestLoadService(t *testing.T) {
// Missing service: tolerated (standalone / interactive use). Returns
// an empty payload so collectExpected falls back to the system resolver.
if svc, err := loadService(sdk.CheckerOptions{}); err != nil {
t.Errorf("unexpected error for missing service: %v", err)
} else if svc == nil || svc.SOA != nil || len(svc.NameServers) != 0 {
t.Errorf("want empty service, got %+v", svc)
}
// Wrong type.
bad := serviceMessage{Type: "abstract.NotOrigin", Service: json.RawMessage(`{}`)}
if _, err := loadService(sdk.CheckerOptions{"service": bad}); err == nil {
t.Errorf("want error for wrong service type")
}
// Valid Origin payload.
msg := serviceMessage{
Type: "abstract.Origin",
Service: json.RawMessage(`{"soa":{"Hdr":{"Name":"example.com.","Rrtype":6,"Class":1,"Ttl":3600},"Ns":"ns.example.com.","Mbox":"hm.example.com.","Serial":42,"Refresh":3600,"Retry":600,"Expire":86400,"Minttl":300}}`),
}
svc, err := loadService(sdk.CheckerOptions{"service": msg})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if svc.SOA == nil || svc.SOA.Serial != 42 {
t.Errorf("got SOA = %+v", svc.SOA)
}
// Empty type is accepted.
emptyType := serviceMessage{Type: "", Service: json.RawMessage(`{}`)}
if _, err := loadService(sdk.CheckerOptions{"service": emptyType}); err != nil {
t.Errorf("empty type should be allowed: %v", err)
}
// Malformed JSON in Service.
bad2 := serviceMessage{Type: "abstract.Origin", Service: json.RawMessage(`not-json`)}
if _, err := loadService(sdk.CheckerOptions{"service": bad2}); err == nil {
t.Errorf("want decode error")
}
}
func TestLoadZone(t *testing.T) {
// From explicit option.
z, err := loadZone(sdk.CheckerOptions{"domain_name": "example.com"}, &originService{})
if err != nil || z != "example.com." {
t.Errorf("explicit: %q %v", z, err)
}
// Fallback to SOA header.
soa := &dns.SOA{Hdr: dns.RR_Header{Name: "fallback.test."}}
z, err = loadZone(sdk.CheckerOptions{}, &originService{SOA: soa})
if err != nil || z != "fallback.test." {
t.Errorf("fallback: %q %v", z, err)
}
// No source available.
if _, err := loadZone(sdk.CheckerOptions{}, &originService{}); err == nil {
t.Errorf("want error when nothing supplies a zone")
}
}
func TestNamesAreDeduplicated(t *testing.T) {
// Smoke test for the dedup loop in Collect: build the same names slice
// the way Collect does and confirm extras don't double-up.
zone := dns.Fqdn("example.com")
names := []string{zone}
seen := map[string]bool{names[0]: true}
for _, sd := range []string{"@", "www", "www", "mail"} {
full := joinSubdomain(sd, zone)
if !seen[full] {
seen[full] = true
names = append(names, full)
}
}
sort.Strings(names)
want := []string{"example.com.", "mail.example.com.", "www.example.com."}
if !reflect.DeepEqual(names, want) {
t.Errorf("names = %v, want %v", names, want)
}
}

123
checker/consensus.go Normal file
View file

@ -0,0 +1,123 @@
package checker
import (
"sort"
)
// Idempotent: rules and report both call it; both must see the same grouping.
func deriveView(data *ResolverPropagationData) {
if data == nil {
return
}
for key, view := range data.RRsets {
// Reset derived fields so repeated calls stay idempotent.
view.Groups = nil
view.ConsensusSig = ""
view.Agreeing = nil
view.Dissenting = nil
view.MatchesExpected = false
voteCount := map[string]int{}
type group struct {
rcode string
records []string
resolvers []string
}
groups := map[string]*group{}
for _, rv := range data.Resolvers {
p := rv.Probes[key]
if p == nil || p.Error != "" {
continue
}
g := groups[p.Signature]
if g == nil {
g = &group{rcode: p.Rcode, records: p.Records}
groups[p.Signature] = g
}
g.resolvers = append(g.resolvers, rv.ID)
if !rv.Filtered {
voteCount[p.Signature]++
}
}
// Pick the winning signature, preferring NOERROR responses.
var winSig string
var winVotes int
for sig, g := range groups {
if g.rcode != "NOERROR" && winSig != "" {
continue
}
if voteCount[sig] > winVotes {
winSig = sig
winVotes = voteCount[sig]
}
}
if winSig == "" {
for sig := range groups {
winSig = sig
break
}
}
view.ConsensusSig = winSig
type gEntry struct {
sig string
g *group
}
var entries []gEntry
for s, g := range groups {
sort.Strings(g.resolvers)
entries = append(entries, gEntry{sig: s, g: g})
}
sort.Slice(entries, func(i, j int) bool {
return len(entries[i].g.resolvers) > len(entries[j].g.resolvers)
})
for _, e := range entries {
view.Groups = append(view.Groups, SignatureGroup{
Signature: e.sig,
Records: e.g.records,
Resolvers: e.g.resolvers,
Rcode: e.g.rcode,
})
if e.sig == winSig {
view.Agreeing = append(view.Agreeing, e.g.resolvers...)
} else {
view.Dissenting = append(view.Dissenting, e.g.resolvers...)
}
}
sort.Strings(view.Agreeing)
sort.Strings(view.Dissenting)
if view.Expected != "" {
view.MatchesExpected = view.ConsensusSig == view.Expected
}
}
// Recompute UnfilteredAgreeing from the consensus we just built.
agree := 0
for _, rv := range data.Resolvers {
if rv.Filtered || !rv.Reachable {
continue
}
ok := true
for key, p := range rv.Probes {
if p == nil || p.Error != "" {
continue
}
v := data.RRsets[key]
if v == nil || v.ConsensusSig == "" {
continue
}
if p.Signature != v.ConsensusSig {
ok = false
break
}
}
if ok {
agree++
}
}
data.Stats.UnfilteredAgreeing = agree
}

146
checker/consensus_test.go Normal file
View file

@ -0,0 +1,146 @@
package checker
import (
"reflect"
"testing"
)
func TestDeriveView_Nil(t *testing.T) {
deriveView(nil) // must not panic
}
func TestDeriveView_PicksMajoritySignature(t *testing.T) {
key := "example.com./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"b": mkResolver("b", "global", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"c": mkResolver("c", "na", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "9.9.9.9")}),
},
RRsets: map[string]*RRsetView{
key: {Name: "example.com.", Type: "A"},
},
}
deriveView(data)
v := data.RRsets[key]
if v.ConsensusSig != "1.1.1.1" {
t.Errorf("consensus = %q", v.ConsensusSig)
}
if !reflect.DeepEqual(v.Agreeing, []string{"a", "b"}) {
t.Errorf("agreeing = %v", v.Agreeing)
}
if !reflect.DeepEqual(v.Dissenting, []string{"c"}) {
t.Errorf("dissenting = %v", v.Dissenting)
}
if data.Stats.UnfilteredAgreeing != 2 {
t.Errorf("unfilteredAgreeing = %d", data.Stats.UnfilteredAgreeing)
}
}
func TestDeriveView_FilteredResolverDoesNotVote(t *testing.T) {
key := "example.com./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"good": mkResolver("good", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"filt": mkResolver("filt", "eu", true, true, map[string]*RRProbe{key: mkProbe("NOERROR", "0.0.0.0")}),
"filt2": mkResolver("filt2", "eu", true, true, map[string]*RRProbe{key: mkProbe("NOERROR", "0.0.0.0")}),
"filt3": mkResolver("filt3", "eu", true, true, map[string]*RRProbe{key: mkProbe("NOERROR", "0.0.0.0")}),
},
RRsets: map[string]*RRsetView{
key: {Name: "example.com.", Type: "A"},
},
}
deriveView(data)
if data.RRsets[key].ConsensusSig != "1.1.1.1" {
t.Errorf("filtered resolvers should not win: %q", data.RRsets[key].ConsensusSig)
}
}
func TestDeriveView_ExpectedMatch(t *testing.T) {
key := "example.com./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
},
RRsets: map[string]*RRsetView{
key: {Name: "example.com.", Type: "A", Expected: "1.1.1.1"},
},
}
deriveView(data)
if !data.RRsets[key].MatchesExpected {
t.Errorf("expected match should be true")
}
// Drift case.
data.RRsets[key].Expected = "9.9.9.9"
data.RRsets[key].MatchesExpected = false
deriveView(data)
if data.RRsets[key].MatchesExpected {
t.Errorf("expected match should be false on drift")
}
}
func TestDeriveView_Idempotent(t *testing.T) {
key := "example.com./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"b": mkResolver("b", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
},
RRsets: map[string]*RRsetView{key: {Name: "example.com.", Type: "A"}},
}
deriveView(data)
first := *data.RRsets[key]
deriveView(data)
second := *data.RRsets[key]
if !reflect.DeepEqual(first.Groups, second.Groups) ||
first.ConsensusSig != second.ConsensusSig ||
!reflect.DeepEqual(first.Agreeing, second.Agreeing) {
t.Errorf("deriveView is not idempotent: %+v vs %+v", first, second)
}
}
func TestDeriveView_SkipsErrorProbes(t *testing.T) {
key := "example.com./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: {Error: "timeout", Transport: TransportUDP}}),
"b": mkResolver("b", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
},
RRsets: map[string]*RRsetView{key: {Name: "example.com.", Type: "A"}},
}
deriveView(data)
if data.RRsets[key].ConsensusSig != "1.1.1.1" {
t.Errorf("err probe shouldn't be counted: %q", data.RRsets[key].ConsensusSig)
}
}
func TestDeriveView_DissenterDoesNotAgree(t *testing.T) {
// Resolver "c" probes two RRsets and disagrees on one ⇒ should not be
// counted in UnfilteredAgreeing.
k1, k2 := "ex./A", "ex./MX"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{
k1: mkProbe("NOERROR", "1.1.1.1"),
k2: mkProbe("NOERROR", "10 mx."),
}),
"b": mkResolver("b", "eu", false, true, map[string]*RRProbe{
k1: mkProbe("NOERROR", "1.1.1.1"),
k2: mkProbe("NOERROR", "10 mx."),
}),
"c": mkResolver("c", "eu", false, true, map[string]*RRProbe{
k1: mkProbe("NOERROR", "1.1.1.1"),
k2: mkProbe("NOERROR", "20 nope."), // dissents
}),
},
RRsets: map[string]*RRsetView{
k1: {Name: "ex.", Type: "A"},
k2: {Name: "ex.", Type: "MX"},
},
}
deriveView(data)
if data.Stats.UnfilteredAgreeing != 2 {
t.Errorf("UnfilteredAgreeing = %d, want 2", data.Stats.UnfilteredAgreeing)
}
}

111
checker/definition.go Normal file
View file

@ -0,0 +1,111 @@
package checker
import (
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Version is the checker version reported in CheckerDefinition.Version.
var Version = "built-in"
// Definition returns the CheckerDefinition for the resolver-propagation
// checker.
func (p *resolverPropagationProvider) Definition() *sdk.CheckerDefinition {
return &sdk.CheckerDefinition{
ID: "resolver-propagation",
Name: "Worldwide DNS propagation",
Version: Version,
Availability: sdk.CheckerAvailability{
ApplyToService: true,
LimitToServices: []string{
"abstract.Origin",
"abstract.NSOnlyOrigin",
},
},
ObservationKeys: []sdk.ObservationKey{ObservationKeyResolverPropagation},
HasHTMLReport: true,
HasMetrics: true,
Options: sdk.CheckerOptionsDocumentation{
UserOpts: []sdk.CheckerOptionDocumentation{
{
Id: "recordTypes",
Type: "string",
Label: "Record types to probe",
Description: "Comma-separated list of RR types. The checker probes every listed type at the zone apex (and at each 'subdomains' entry).",
Default: "SOA,NS,A,AAAA,MX,TXT,CAA",
},
{
Id: "subdomains",
Type: "string",
Label: "Extra subdomains to probe",
Description: "Comma-separated list of owner names to probe in addition to the zone apex (e.g. \"www,mail,@\"). Leave empty to only probe the apex.",
Default: "www",
},
{
Id: "includeFiltered",
Type: "bool",
Label: "Include filtered resolvers",
Description: "Probe filtering resolvers (malware/family/adblock). Their answers routinely disagree with the consensus by design; enable only when diagnosing a blocklist hit.",
Default: false,
},
{
Id: "region",
Type: "string",
Label: "Restrict to region",
Description: "Only probe resolvers from the given region. Use 'all' for a worldwide run.",
Choices: []string{"all", "global", "na", "eu", "asia", "ru", "me"},
Default: "all",
},
{
Id: "transports",
Type: "string",
Label: "Transports",
Description: "Comma-separated list of transports to probe. 'udp' is the baseline; 'tcp', 'dot' and 'doh' add coverage. Encrypted transports are only used for resolvers that publish an endpoint.",
Default: "udp",
},
{
Id: "resolverAllowlist",
Type: "string",
Label: "Resolver allowlist (advanced)",
Description: "Comma-separated list of resolver IDs or IPs to probe exclusively. Leave empty to use the catalog selection. Example: \"cloudflare,google,9.9.9.9\".",
Default: "",
},
{
Id: "latencyThresholdMs",
Type: "uint",
Label: "Latency warning threshold (ms)",
Description: "Resolvers averaging above this value produce an info finding.",
Default: float64(500),
},
{
Id: "runTimeoutSeconds",
Type: "uint",
Label: "Run timeout (seconds)",
Description: "Hard wall-clock budget for one propagation run. Slow resolvers beyond this simply report as unreachable.",
Default: float64(30),
},
},
DomainOpts: []sdk.CheckerOptionDocumentation{
{
Id: "domain_name",
Label: "Zone name",
AutoFill: sdk.AutoFillDomainName,
},
},
ServiceOpts: []sdk.CheckerOptionDocumentation{
{
Id: "service",
Label: "Origin service",
AutoFill: sdk.AutoFillService,
},
},
},
Rules: Rules(),
Interval: &sdk.CheckIntervalSpec{
Min: 5 * time.Minute,
Max: 24 * time.Hour,
Default: 30 * time.Minute,
},
}
}

View file

@ -0,0 +1,49 @@
package checker
import (
"testing"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func TestDefinitionSmoke(t *testing.T) {
prov, ok := Provider().(sdk.CheckerDefinitionProvider)
if !ok {
t.Fatalf("Provider does not implement CheckerDefinitionProvider")
}
def := prov.Definition()
if def == nil {
t.Fatalf("nil definition")
}
if def.ID == "" || def.Name == "" {
t.Errorf("missing ID/Name: %+v", def)
}
if !def.HasHTMLReport || !def.HasMetrics {
t.Errorf("expected HasHTMLReport and HasMetrics: %+v", def)
}
if len(def.Rules) == 0 {
t.Errorf("definition exposes no rules")
}
// Recordtype default option must be present for users.
var has bool
for _, opt := range def.Options.UserOpts {
if opt.Id == "recordTypes" {
has = true
break
}
}
if !has {
t.Errorf("missing recordTypes user option")
}
// Service restriction.
if len(def.Availability.LimitToServices) == 0 {
t.Errorf("expected LimitToServices to be set")
}
}
func TestProviderKey(t *testing.T) {
if Provider().Key() != ObservationKeyResolverPropagation {
t.Errorf("unexpected observation key")
}
}

245
checker/dns.go Normal file
View file

@ -0,0 +1,245 @@
package checker
import (
"bytes"
"context"
"crypto/tls"
"encoding/base64"
"fmt"
"io"
"net"
"net/http"
"net/url"
"sort"
"strings"
"time"
"github.com/miekg/dns"
)
// Slower than this, a public resolver is unreachable or too flaky to be useful.
const dnsTimeout = 5 * time.Second
// 4096 is the de-facto ceiling for unfragmented EDNS0 responses on the public Internet.
const ednsUDPSize = 4096
// Bound DoH reads so a hostile server can't stream junk indefinitely.
const maxDoHResponseBytes = 64 * 1024
// Shared so concurrent probes reuse connections and TLS state.
var dohClient = &http.Client{
Timeout: dnsTimeout + 2*time.Second,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
MinVersion: tls.VersionTLS12,
},
TLSHandshakeTimeout: dnsTimeout,
ResponseHeaderTimeout: dnsTimeout,
ExpectContinueTimeout: 1 * time.Second,
DisableKeepAlives: false,
MaxIdleConnsPerHost: 4,
},
}
// Flatter than *dns.Msg so the collector stays protocol-agnostic.
type queryResult struct {
Rcode int
Answer []dns.RR
AD bool
Latency time.Duration
}
// Forces RD=1 (recurse), CD=0 (let resolver validate DNSSEC), AD=1 (signal validation back).
func queryResolver(ctx context.Context, r Resolver, tr Transport, name string, qtype uint16) (*queryResult, error) {
q := dns.Question{Name: dns.Fqdn(name), Qtype: qtype, Qclass: dns.ClassINET}
m := new(dns.Msg)
m.Id = dns.Id()
m.Question = []dns.Question{q}
m.RecursionDesired = true
m.CheckingDisabled = false
m.AuthenticatedData = true
m.SetEdns0(ednsUDPSize, true)
switch tr {
case TransportUDP:
return exchangeUDPOrTCP(ctx, m, r.IP+":53", "udp")
case TransportTCP:
return exchangeUDPOrTCP(ctx, m, r.IP+":53", "tcp")
case TransportDoT:
if r.DoTHost == "" {
return nil, fmt.Errorf("no DoT endpoint for %s", r.ID)
}
return exchangeDoT(ctx, m, r.IP, r.DoTHost)
case TransportDoH:
if r.DoHURL == "" {
return nil, fmt.Errorf("no DoH endpoint for %s", r.ID)
}
return exchangeDoH(ctx, m, r.DoHURL)
default:
return nil, fmt.Errorf("unknown transport %q", tr)
}
}
func exchangeUDPOrTCP(ctx context.Context, m *dns.Msg, server, proto string) (*queryResult, error) {
client := dns.Client{Net: proto, Timeout: dnsTimeout}
if deadline, ok := ctx.Deadline(); ok {
if d := time.Until(deadline); d > 0 && d < client.Timeout {
client.Timeout = d
}
}
r, rtt, err := client.ExchangeContext(ctx, m, server)
if err != nil {
return nil, err
}
if r == nil {
return nil, fmt.Errorf("nil response from %s", server)
}
// Truncated UDP answers force a retry over TCP per RFC 5966.
if proto == "udp" && r.Truncated {
tcpClient := dns.Client{Net: "tcp", Timeout: dnsTimeout}
if r2, rtt2, err2 := tcpClient.ExchangeContext(ctx, m, server); err2 == nil && r2 != nil {
return &queryResult{
Rcode: r2.Rcode, Answer: r2.Answer,
AD: r2.AuthenticatedData, Latency: rtt2,
}, nil
}
}
return &queryResult{
Rcode: r.Rcode, Answer: r.Answer,
AD: r.AuthenticatedData, Latency: rtt,
}, nil
}
// sni validates the certificate; the IP is what we actually dial.
func exchangeDoT(ctx context.Context, m *dns.Msg, ip, sni string) (*queryResult, error) {
client := dns.Client{
Net: "tcp-tls",
Timeout: dnsTimeout,
TLSConfig: &tls.Config{
ServerName: sni,
MinVersion: tls.VersionTLS12,
},
}
if deadline, ok := ctx.Deadline(); ok {
if d := time.Until(deadline); d > 0 && d < client.Timeout {
client.Timeout = d
}
}
r, rtt, err := client.ExchangeContext(ctx, m, net.JoinHostPort(ip, "853"))
if err != nil {
return nil, err
}
if r == nil {
return nil, fmt.Errorf("nil response from %s", ip)
}
return &queryResult{
Rcode: r.Rcode, Answer: r.Answer,
AD: r.AuthenticatedData, Latency: rtt,
}, nil
}
// GET (per RFC 8484) so HTTP caches can merge equivalent queries.
func exchangeDoH(ctx context.Context, m *dns.Msg, endpoint string) (*queryResult, error) {
// Id=0 lets HTTP caches merge equivalent queries.
m.Id = 0
packed, err := m.Pack()
if err != nil {
return nil, fmt.Errorf("packing message: %w", err)
}
u, err := url.Parse(endpoint)
if err != nil {
return nil, fmt.Errorf("invalid DoH endpoint %q: %w", endpoint, err)
}
q := u.Query()
q.Set("dns", base64.RawURLEncoding.EncodeToString(packed))
u.RawQuery = q.Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept", "application/dns-message")
req.Header.Set("User-Agent", "happyDomain-checker-resolver-propagation/"+Version)
start := time.Now()
resp, err := dohClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("DoH HTTP %d", resp.StatusCode)
}
ct := resp.Header.Get("Content-Type")
if !strings.HasPrefix(ct, "application/dns-message") {
return nil, fmt.Errorf("DoH unexpected content-type %q", ct)
}
var buf bytes.Buffer
if _, err := io.Copy(&buf, io.LimitReader(resp.Body, maxDoHResponseBytes)); err != nil {
return nil, err
}
latency := time.Since(start)
r := new(dns.Msg)
if err := r.Unpack(buf.Bytes()); err != nil {
return nil, fmt.Errorf("unpacking DoH response: %w", err)
}
return &queryResult{
Rcode: r.Rcode, Answer: r.Answer,
AD: r.AuthenticatedData, Latency: latency,
}, nil
}
// Strips the "owner TTL class type" header from miekg's zone-file form to leave RDATA.
func canonicalRR(rr dns.RR) string {
if rr == nil {
return ""
}
fields := strings.Fields(rr.String())
if len(fields) <= 4 {
return ""
}
rdata := strings.Join(fields[4:], " ")
// Lowercase so case-only drift in hostnames doesn't read as disagreement.
return strings.ToLower(strings.TrimSpace(rdata))
}
// Deterministic signature for cross-resolver comparison; sort-then-join keeps RRset order irrelevant.
func signatureFromRRs(rrs []dns.RR, owner string, qtype uint16) (sig string, records []string, minTTL uint32) {
ownerL := strings.ToLower(dns.Fqdn(owner))
for _, rr := range rrs {
h := rr.Header()
if h == nil {
continue
}
if !strings.EqualFold(dns.Fqdn(h.Name), ownerL) {
continue
}
if h.Rrtype != qtype {
continue
}
if c := canonicalRR(rr); c != "" {
records = append(records, c)
if minTTL == 0 || h.Ttl < minTTL {
minTTL = h.Ttl
}
}
}
sort.Strings(records)
sig = strings.Join(records, "|")
return sig, records, minTTL
}
func rcodeToString(c int) string {
if s, ok := dns.RcodeToString[c]; ok {
return s
}
return fmt.Sprintf("RCODE%d", c)
}

305
checker/dns_test.go Normal file
View file

@ -0,0 +1,305 @@
package checker
import (
"context"
"net"
"strings"
"sync"
"testing"
"time"
"github.com/miekg/dns"
)
func mustRR(t *testing.T, s string) dns.RR {
t.Helper()
rr, err := dns.NewRR(s)
if err != nil {
t.Fatalf("dns.NewRR(%q): %v", s, err)
}
return rr
}
func TestCanonicalRR(t *testing.T) {
if got := canonicalRR(nil); got != "" {
t.Errorf("nil RR: want empty, got %q", got)
}
cases := []struct {
rr string
want string
}{
{"example.com. 300 IN A 192.0.2.1", "192.0.2.1"},
{"Example.Com. 300 IN NS Ns1.Example.Com.", "ns1.example.com."},
{"example.com. 60 IN MX 10 mail.example.com.", "10 mail.example.com."},
{"example.com. 30 IN TXT \"v=spf1 -all\"", "\"v=spf1 -all\""},
}
for _, c := range cases {
if got := canonicalRR(mustRR(t, c.rr)); got != c.want {
t.Errorf("canonicalRR(%q) = %q, want %q", c.rr, got, c.want)
}
}
}
func TestSignatureFromRRs(t *testing.T) {
rrs := []dns.RR{
mustRR(t, "example.com. 300 IN A 192.0.2.2"),
mustRR(t, "example.com. 60 IN A 192.0.2.1"),
mustRR(t, "example.com. 300 IN AAAA 2001:db8::1"), // wrong type
mustRR(t, "other.example.com. 300 IN A 198.51.100.1"), // wrong owner
}
sig, recs, ttl := signatureFromRRs(rrs, "example.com", dns.TypeA)
if sig != "192.0.2.1|192.0.2.2" {
t.Errorf("sig = %q", sig)
}
if len(recs) != 2 || recs[0] != "192.0.2.1" || recs[1] != "192.0.2.2" {
t.Errorf("records = %v", recs)
}
if ttl != 60 {
t.Errorf("minTTL = %d, want 60", ttl)
}
// Owner case-insensitivity.
sig2, _, _ := signatureFromRRs(rrs, "EXAMPLE.com.", dns.TypeA)
if sig2 != sig {
t.Errorf("owner case sensitivity: %q vs %q", sig2, sig)
}
// Empty input.
if s, r, ttl := signatureFromRRs(nil, "x", dns.TypeA); s != "" || r != nil || ttl != 0 {
t.Errorf("empty input: %q %v %d", s, r, ttl)
}
}
func TestSignatureDeterministic(t *testing.T) {
a := []dns.RR{
mustRR(t, "x. 30 IN A 1.1.1.1"),
mustRR(t, "x. 30 IN A 2.2.2.2"),
}
b := []dns.RR{
mustRR(t, "x. 30 IN A 2.2.2.2"),
mustRR(t, "x. 30 IN A 1.1.1.1"),
}
sa, _, _ := signatureFromRRs(a, "x", dns.TypeA)
sb, _, _ := signatureFromRRs(b, "x", dns.TypeA)
if sa != sb {
t.Errorf("ordering changed sig: %q vs %q", sa, sb)
}
}
func TestRcodeToString(t *testing.T) {
cases := []struct {
in int
want string
}{
{dns.RcodeSuccess, "NOERROR"},
{dns.RcodeNameError, "NXDOMAIN"},
{dns.RcodeServerFailure, "SERVFAIL"},
{42, "RCODE42"},
}
for _, c := range cases {
if got := rcodeToString(c.in); got != c.want {
t.Errorf("rcodeToString(%d) = %q, want %q", c.in, got, c.want)
}
}
}
// startUDPServer brings up a tiny miekg/dns UDP server bound to a free port,
// returning its address and a stop func. The handler is called for every
// query and decides what to write back.
func startUDPServer(t *testing.T, handler dns.HandlerFunc) (string, func()) {
t.Helper()
pc, err := net.ListenPacket("udp", "127.0.0.1:0")
if err != nil {
t.Fatalf("listen: %v", err)
}
srv := &dns.Server{PacketConn: pc, Handler: handler}
done := make(chan struct{})
go func() {
_ = srv.ActivateAndServe()
close(done)
}()
// give the server a moment
time.Sleep(20 * time.Millisecond)
return pc.LocalAddr().String(), func() {
_ = srv.Shutdown()
<-done
}
}
func TestExchangeUDPOrTCP_Success(t *testing.T) {
addr, stop := startUDPServer(t, func(w dns.ResponseWriter, m *dns.Msg) {
resp := new(dns.Msg)
resp.SetReply(m)
resp.Authoritative = true
resp.Answer = []dns.RR{mustRR(t, "example.com. 60 IN A 192.0.2.10")}
_ = w.WriteMsg(resp)
})
defer stop()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
m := new(dns.Msg)
m.SetQuestion("example.com.", dns.TypeA)
res, err := exchangeUDPOrTCP(ctx, m, addr, "udp")
if err != nil {
t.Fatalf("exchange: %v", err)
}
if res.Rcode != dns.RcodeSuccess {
t.Errorf("rcode = %d", res.Rcode)
}
if len(res.Answer) != 1 {
t.Fatalf("answers: %v", res.Answer)
}
}
func TestQueryResolver_UnknownTransport(t *testing.T) {
_, err := queryResolver(context.Background(), Resolver{ID: "x", IP: "127.0.0.1"}, Transport("xyz"), "x.", dns.TypeA)
if err == nil || !strings.Contains(err.Error(), "unknown transport") {
t.Errorf("want unknown transport error, got %v", err)
}
}
func TestQueryResolver_MissingDoTEndpoint(t *testing.T) {
_, err := queryResolver(context.Background(), Resolver{ID: "x", IP: "127.0.0.1"}, TransportDoT, "x.", dns.TypeA)
if err == nil || !strings.Contains(err.Error(), "no DoT endpoint") {
t.Errorf("want missing DoT err, got %v", err)
}
}
func TestQueryResolver_MissingDoHEndpoint(t *testing.T) {
_, err := queryResolver(context.Background(), Resolver{ID: "x", IP: "127.0.0.1"}, TransportDoH, "x.", dns.TypeA)
if err == nil || !strings.Contains(err.Error(), "no DoH endpoint") {
t.Errorf("want missing DoH err, got %v", err)
}
}
func TestRunProbe_TransportError(t *testing.T) {
// Missing DoT host on the resolver: queryResolver returns an error,
// runProbe converts it into RRProbe.Error.
p := runProbe(context.Background(), Resolver{ID: "x", IP: "127.0.0.1"}, TransportDoT, "ex.", dns.TypeA)
if p.Error == "" {
t.Errorf("expected error for missing DoT host")
}
if p.Transport != TransportDoT {
t.Errorf("transport = %v", p.Transport)
}
}
func TestQueryAuthoritative(t *testing.T) {
addr, stop := startUDPServer(t, func(w dns.ResponseWriter, m *dns.Msg) {
resp := new(dns.Msg)
resp.SetReply(m)
resp.Authoritative = true
resp.Answer = []dns.RR{mustRR(t, "ex. 60 IN A 5.6.7.8")}
_ = w.WriteMsg(resp)
})
defer stop()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
e := queryAuthoritative(ctx, []string{addr}, "ex.", dns.TypeA)
if e == nil {
t.Fatal("nil entry")
}
if e.sig != "5.6.7.8" {
t.Errorf("sig = %q", e.sig)
}
}
func TestQueryAuthoritative_NotAuthoritative(t *testing.T) {
addr, stop := startUDPServer(t, func(w dns.ResponseWriter, m *dns.Msg) {
resp := new(dns.Msg)
resp.SetReply(m)
resp.Authoritative = false
resp.Answer = []dns.RR{mustRR(t, "ex. 60 IN A 5.6.7.8")}
_ = w.WriteMsg(resp)
})
defer stop()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if e := queryAuthoritative(ctx, []string{addr}, "ex.", dns.TypeA); e != nil {
t.Errorf("non-authoritative answer should be ignored, got %+v", e)
}
}
func TestQueryAuthoritative_NXDOMAIN(t *testing.T) {
addr, stop := startUDPServer(t, func(w dns.ResponseWriter, m *dns.Msg) {
resp := new(dns.Msg)
resp.SetReply(m)
resp.Authoritative = true
resp.Rcode = dns.RcodeNameError
_ = w.WriteMsg(resp)
})
defer stop()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
e := queryAuthoritative(ctx, []string{addr}, "ex.", dns.TypeA)
if e == nil {
t.Fatal("want non-nil entry for NXDOMAIN")
}
if e.sig != "" {
t.Errorf("NXDOMAIN should give empty sig: %q", e.sig)
}
}
func TestExchangeUDP_TruncationFallsBackToTCP(t *testing.T) {
// UDP returns truncated; we also start a TCP listener that returns the full
// answer. miekg/dns ServeMux supports both via a single Server, but we
// keep it explicit here.
pcUDP, err := net.ListenPacket("udp", "127.0.0.1:0")
if err != nil {
t.Fatalf("udp listen: %v", err)
}
defer pcUDP.Close()
addr := pcUDP.LocalAddr().String()
host, port, err := net.SplitHostPort(addr)
if err != nil {
t.Fatalf("split: %v", err)
}
// TCP needs to share the same port; bind a TCP listener on it.
tcpL, err := net.Listen("tcp", net.JoinHostPort(host, port))
if err != nil {
t.Fatalf("tcp listen: %v", err)
}
defer tcpL.Close()
udpHandler := dns.HandlerFunc(func(w dns.ResponseWriter, m *dns.Msg) {
resp := new(dns.Msg)
resp.SetReply(m)
resp.Truncated = true
_ = w.WriteMsg(resp)
})
tcpHandler := dns.HandlerFunc(func(w dns.ResponseWriter, m *dns.Msg) {
resp := new(dns.Msg)
resp.SetReply(m)
resp.Answer = []dns.RR{mustRR(t, "ex. 60 IN A 1.2.3.4")}
_ = w.WriteMsg(resp)
})
udpSrv := &dns.Server{PacketConn: pcUDP, Handler: udpHandler}
tcpSrv := &dns.Server{Listener: tcpL, Handler: tcpHandler}
var wg sync.WaitGroup
wg.Add(2)
go func() { defer wg.Done(); _ = udpSrv.ActivateAndServe() }()
go func() { defer wg.Done(); _ = tcpSrv.ActivateAndServe() }()
defer func() {
_ = udpSrv.Shutdown()
_ = tcpSrv.Shutdown()
wg.Wait()
}()
time.Sleep(30 * time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
m := new(dns.Msg)
m.SetQuestion("ex.", dns.TypeA)
res, err := exchangeUDPOrTCP(ctx, m, addr, "udp")
if err != nil {
t.Fatalf("exchange: %v", err)
}
if len(res.Answer) != 1 {
t.Fatalf("expected TCP fallback to populate answer, got %v", res.Answer)
}
}

123
checker/interactive.go Normal file
View file

@ -0,0 +1,123 @@
//go:build standalone
package checker
import (
"errors"
"net/http"
"strconv"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func (p *resolverPropagationProvider) RenderForm() []sdk.CheckerOptionField {
return []sdk.CheckerOptionField{
{
Id: "domain_name",
Type: "string",
Label: "Zone name",
Placeholder: "example.com",
Required: true,
Description: "Apex of the zone to probe across public resolvers.",
},
{
Id: "recordTypes",
Type: "string",
Label: "Record types to probe",
Placeholder: "SOA,NS,A,AAAA,MX,TXT,CAA",
Description: "Comma-separated list of RR types. Probed at the apex (and at each 'subdomains' entry).",
},
{
Id: "subdomains",
Type: "string",
Label: "Extra subdomains to probe",
Placeholder: "www",
Description: "Comma-separated list of owner names to probe in addition to the apex (e.g. \"www,mail,@\").",
},
{
Id: "includeFiltered",
Type: "bool",
Label: "Include filtered resolvers",
Description: "Probe filtering resolvers (malware/family/adblock). Their answers routinely disagree with the consensus by design.",
},
{
Id: "region",
Type: "string",
Label: "Restrict to region",
Placeholder: "all",
Description: "One of: all, global, na, eu, asia, ru, me.",
Choices: []string{"all", "global", "na", "eu", "asia", "ru", "me"},
},
{
Id: "transports",
Type: "string",
Label: "Transports",
Placeholder: "udp",
Description: "Comma-separated list of transports to probe: udp, tcp, dot, doh.",
},
{
Id: "resolverAllowlist",
Type: "string",
Label: "Resolver allowlist (advanced)",
Placeholder: "cloudflare,google,9.9.9.9",
Description: "Comma-separated list of resolver IDs or IPs to probe exclusively. Leave empty to use the catalog selection.",
},
{
Id: "latencyThresholdMs",
Type: "uint",
Label: "Latency warning threshold (ms)",
Placeholder: "500",
Description: "Resolvers averaging above this value produce an info finding.",
},
{
Id: "runTimeoutSeconds",
Type: "uint",
Label: "Run timeout (seconds)",
Placeholder: "30",
Description: "Hard wall-clock budget for one propagation run.",
},
}
}
func (p *resolverPropagationProvider) ParseForm(r *http.Request) (sdk.CheckerOptions, error) {
name := strings.TrimSpace(r.FormValue("domain_name"))
if name == "" {
return nil, errors.New("domain_name is required")
}
name = strings.TrimSuffix(name, ".")
opts := sdk.CheckerOptions{
"domain_name": name,
}
for _, key := range []string{
"recordTypes", "subdomains", "region",
"transports", "resolverAllowlist",
} {
if v := strings.TrimSpace(r.FormValue(key)); v != "" {
opts[key] = v
}
}
if v := strings.TrimSpace(r.FormValue("includeFiltered")); v != "" {
switch strings.ToLower(v) {
case "1", "true", "on", "yes":
opts["includeFiltered"] = true
case "0", "false", "off", "no":
opts["includeFiltered"] = false
}
}
for _, key := range []string{"latencyThresholdMs", "runTimeoutSeconds"} {
if v := strings.TrimSpace(r.FormValue(key)); v != "" {
n, err := strconv.ParseUint(v, 10, 32)
if err != nil {
return nil, errors.New(key + " must be a non-negative integer")
}
opts[key] = float64(n)
}
}
return opts, nil
}

137
checker/metrics.go Normal file
View file

@ -0,0 +1,137 @@
package checker
import (
"encoding/json"
"fmt"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// ExtractMetrics implements sdk.CheckerMetricsReporter. It consumes the raw
// observation, derives consensus on the fly, and emits time-series for
// dashboards. Severity counters are computed from the rule states carried
// in the ReportContext rather than re-derived from raw data.
func (p *resolverPropagationProvider) ExtractMetrics(ctx sdk.ReportContext, collectedAt time.Time) ([]sdk.CheckMetric, error) {
var data ResolverPropagationData
if err := json.Unmarshal(ctx.Data(), &data); err != nil {
return nil, fmt.Errorf("resolver-propagation: decoding observation: %w", err)
}
deriveView(&data)
var out []sdk.CheckMetric
zone := data.Zone
rollups := []struct {
name string
val float64
}{
{"resolver_propagation_resolvers_total", float64(data.Stats.TotalResolvers)},
{"resolver_propagation_resolvers_reachable", float64(data.Stats.ReachableResolvers)},
{"resolver_propagation_unfiltered_agreeing", float64(data.Stats.UnfilteredAgreeing)},
{"resolver_propagation_regions_covered", float64(data.Stats.CountriesCovered)},
{"resolver_propagation_run_duration_ms", float64(data.RunDurationMs)},
}
for _, r := range rollups {
out = append(out, sdk.CheckMetric{
Name: r.name,
Value: r.val,
Labels: map[string]string{"zone": zone},
Timestamp: collectedAt,
})
}
if data.DeclaredSerial != 0 {
out = append(out, sdk.CheckMetric{
Name: "resolver_propagation_declared_serial",
Value: float64(data.DeclaredSerial),
Labels: map[string]string{"zone": zone},
Timestamp: collectedAt,
})
}
soaKey := rrsetKey(zone, "SOA")
var staleResolvers int
for id, rv := range data.Resolvers {
if rv.Filtered {
continue
}
p := rv.Probes[soaKey]
if p == nil || p.Error != "" || p.Rcode != "NOERROR" {
continue
}
s := extractSerial(p.Records)
if s == 0 {
continue
}
out = append(out, sdk.CheckMetric{
Name: "resolver_propagation_observed_serial",
Value: float64(s),
Labels: map[string]string{
"zone": zone,
"resolver": id,
},
Timestamp: collectedAt,
})
if data.DeclaredSerial != 0 && s < data.DeclaredSerial {
staleResolvers++
}
}
if data.DeclaredSerial != 0 {
out = append(out, sdk.CheckMetric{
Name: "resolver_propagation_serial_drift_resolvers",
Value: float64(staleResolvers),
Labels: map[string]string{"zone": zone},
Timestamp: collectedAt,
})
}
for id, rv := range data.Resolvers {
labels := map[string]string{
"zone": zone,
"resolver": id,
"ip": rv.IP,
"region": rv.Region,
"transport": string(rv.Transport),
}
up := float64(0)
if rv.Reachable {
up = 1
}
out = append(out, sdk.CheckMetric{
Name: "resolver_propagation_resolver_up",
Value: up, Labels: labels, Timestamp: collectedAt,
})
var total, n int64
for _, p := range rv.Probes {
if p.Error != "" {
continue
}
total += p.LatencyMs
n++
}
if n > 0 {
out = append(out, sdk.CheckMetric{
Name: "resolver_propagation_resolver_latency_ms",
Value: float64(total) / float64(n), Unit: "ms",
Labels: labels, Timestamp: collectedAt,
})
}
}
for key, v := range data.RRsets {
out = append(out, sdk.CheckMetric{
Name: "resolver_propagation_rrset_signatures",
Value: float64(len(v.Groups)),
Labels: map[string]string{
"zone": zone,
"rrset": key,
},
Timestamp: collectedAt,
})
}
return out, nil
}

97
checker/metrics_test.go Normal file
View file

@ -0,0 +1,97 @@
package checker
import (
"encoding/json"
"testing"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func TestExtractMetrics(t *testing.T) {
key := "ex./A"
soaKey := "ex./SOA"
data := &ResolverPropagationData{
Zone: "ex.",
Names: []string{"ex."},
Types: []string{"A", "SOA"},
RunDurationMs: 1234,
DeclaredSerial: 100,
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{
key: {Rcode: "NOERROR", Signature: "1.1.1.1", LatencyMs: 50, Transport: TransportUDP},
soaKey: {Rcode: "NOERROR", Records: []string{"ns.ex. hm.ex. 100 3600 600 86400 300"}, LatencyMs: 50, Transport: TransportUDP},
}),
"b": mkResolver("b", "eu", false, false, map[string]*RRProbe{
key: {Error: "timeout", Transport: TransportUDP},
soaKey: {Rcode: "NOERROR", Records: []string{"ns.ex. hm.ex. 90 3600 600 86400 300"}, LatencyMs: 80, Transport: TransportUDP},
}),
},
RRsets: map[string]*RRsetView{
key: {Name: "ex.", Type: "A"},
soaKey: {Name: "ex.", Type: "SOA"},
},
Stats: Stats{
TotalResolvers: 2,
ReachableResolvers: 1,
CountriesCovered: 1,
},
}
raw, err := json.Marshal(data)
if err != nil {
t.Fatalf("marshal: %v", err)
}
states := []sdk.CheckState{
{Status: sdk.StatusCrit, Code: "x"},
{Status: sdk.StatusWarn, Code: "y"},
{Status: sdk.StatusInfo, Code: "z"},
{Status: sdk.StatusInfo, Code: "z2"},
}
ctx := sdk.NewReportContext(raw, nil, states)
prov := &resolverPropagationProvider{}
metrics, err := prov.ExtractMetrics(ctx, time.Unix(0, 0))
if err != nil {
t.Fatalf("ExtractMetrics: %v", err)
}
want := map[string]float64{
"resolver_propagation_resolvers_total": 2,
"resolver_propagation_resolvers_reachable": 1,
"resolver_propagation_run_duration_ms": 1234,
"resolver_propagation_declared_serial": 100,
"resolver_propagation_serial_drift_resolvers": 1,
}
got := map[string]float64{}
for _, m := range metrics {
// Keep the first sample per name (most are zone-only labels).
if _, ok := got[m.Name]; !ok {
got[m.Name] = m.Value
}
}
for name, v := range want {
if got[name] != v {
t.Errorf("metric %s = %v, want %v", name, got[name], v)
}
}
// resolver_up should appear once per resolver.
var ups int
for _, m := range metrics {
if m.Name == "resolver_propagation_resolver_up" {
ups++
}
}
if ups != 2 {
t.Errorf("resolver_up samples = %d, want 2", ups)
}
}
func TestExtractMetrics_BadPayload(t *testing.T) {
ctx := sdk.StaticReportContext(json.RawMessage(`not-json`))
prov := &resolverPropagationProvider{}
if _, err := prov.ExtractMetrics(ctx, time.Now()); err == nil {
t.Errorf("want decode error")
}
}

16
checker/provider.go Normal file
View file

@ -0,0 +1,16 @@
package checker
import (
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Provider returns a new resolver-propagation observation provider.
func Provider() sdk.ObservationProvider {
return &resolverPropagationProvider{}
}
type resolverPropagationProvider struct{}
func (p *resolverPropagationProvider) Key() sdk.ObservationKey {
return ObservationKeyResolverPropagation
}

733
checker/report.go Normal file
View file

@ -0,0 +1,733 @@
package checker
import (
"bytes"
"encoding/json"
"fmt"
"html/template"
"sort"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// GetHTMLReport implements sdk.CheckerHTMLReporter.
//
// The report is laid out top-down by decreasing importance:
// 1. a "Fix these first" banner listing the common failures (drift,
// DNSSEC, NXDOMAIN, SERVFAIL, regional split, etc.) with a plain-English
// remediation for each;
// 2. a per-RRset consensus table that shows which answers dominate and
// which resolvers disagree: the meat of the check;
// 3. a per-region matrix (consensus / drift / error per region × RRset);
// 4. a detailed per-resolver table for operators who want the raw data.
func (p *resolverPropagationProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) {
var data ResolverPropagationData
if raw := ctx.Data(); len(raw) > 0 {
if err := json.Unmarshal(raw, &data); err != nil {
return "", fmt.Errorf("parse resolver-propagation data: %w", err)
}
}
deriveView(&data)
findings := statesToFindings(ctx.States())
view := buildReportView(&data, findings)
buf := &bytes.Buffer{}
if err := reportTmpl.Execute(buf, view); err != nil {
return "", err
}
return buf.String(), nil
}
// topFailureOrder is the priority used by the "Fix these first" banner.
// Items at the top reflect more impactful / more actionable issues so the
// reader has a triage path.
var topFailureOrder = []string{
CodeAllResolversDown,
CodeUnexpectedSERVFAIL,
CodeDNSSECFailure,
CodeAnswerDrift,
CodeUnexpectedNXDOMAIN,
CodeSerialDrift,
CodeRegionalSplit,
CodePartialPropagation,
CodeDNSSECUnvalidated,
CodeStaleCache,
CodeResolverRewrote,
CodeResolverUnreachable,
CodeResolverHighLatency,
CodeResolverFilteredHit,
CodeNoResolvers,
}
// reportView is the flattened shape the HTML template consumes.
type reportView struct {
Zone string
OverallStatus string
OverallClass string
OverallMessage string
Stats Stats
TopFailures []topFailure
OtherFindings []Finding
RRsets []rrsetRow
Regions []regionRow
Resolvers []resolverRow
}
type topFailure struct {
Code string
Severity string
Message string
Remedy string
Count int
Class string
Headline string // short, human-readable label for the card
}
type rrsetRow struct {
Key string
Name string
Type string
MatchesExpected bool
Expected []string
HasExpected bool
Groups []groupRow
Agreeing int
Dissenting int
StatusClass string
StatusLabel string
}
type groupRow struct {
Rcode string
Records []string
Resolvers []string
IsConsensus bool
}
type regionRow struct {
Region string
Label string
Resolvers int
Reachable int
Agreeing int
Disagreeing int
Errored int
}
type resolverRow struct {
ID string
Name string
IP string
Region string
Transport string
Filtered bool
Reachable bool
AvgMs int64
Probes []probeRow
}
type probeRow struct {
Key string
Rcode string
Records []string
MinTTL uint32
AD bool
AgreesWithConsensus bool
Error string
LatencyMs int64
}
func buildReportView(d *ResolverPropagationData, findings []Finding) *reportView {
v := &reportView{
Zone: d.Zone,
Stats: d.Stats,
}
// Overall banner: worst severity drives colour.
worst := ""
for _, f := range findings {
switch f.Severity {
case SeverityCrit:
worst = "crit"
case SeverityWarn:
if worst == "" {
worst = "warn"
}
case SeverityInfo:
if worst == "" {
worst = "info"
}
}
if worst == "crit" {
break
}
}
switch worst {
case "crit":
v.OverallStatus = "Critical issues"
v.OverallClass = "banner-crit"
v.OverallMessage = fmt.Sprintf("%s is not propagating correctly across public resolvers.", d.Zone)
case "warn":
v.OverallStatus = "Warnings"
v.OverallClass = "banner-warn"
v.OverallMessage = fmt.Sprintf("%s is propagating, but some resolvers or resource sets disagree.", d.Zone)
case "info":
v.OverallStatus = "Informational"
v.OverallClass = "banner-info"
v.OverallMessage = fmt.Sprintf("%s looks healthy; a few advisory notes below.", d.Zone)
default:
v.OverallStatus = "OK"
v.OverallClass = "banner-ok"
v.OverallMessage = fmt.Sprintf("%s is propagated consistently across %d of %d unfiltered resolvers.",
d.Zone, d.Stats.UnfilteredAgreeing, d.Stats.UnfilteredProbed)
}
// Top failures: bucket findings by code, keep each code's most severe
// occurrence, render in topFailureOrder.
byCode := map[string][]Finding{}
for _, f := range findings {
byCode[f.Code] = append(byCode[f.Code], f)
}
order := map[string]int{}
for i, c := range topFailureOrder {
order[c] = i + 1
}
used := map[string]bool{}
for _, code := range topFailureOrder {
list, ok := byCode[code]
if !ok {
continue
}
used[code] = true
f := list[0]
tf := topFailure{
Code: code,
Severity: string(f.Severity),
Message: f.Message,
Remedy: f.Remedy,
Count: len(list),
Class: "severity-" + string(f.Severity),
Headline: headlineFor(code),
}
v.TopFailures = append(v.TopFailures, tf)
}
// Anything else → "Other findings"
for code, list := range byCode {
if used[code] {
continue
}
for _, f := range list {
v.OtherFindings = append(v.OtherFindings, f)
}
}
sort.SliceStable(v.OtherFindings, func(i, j int) bool {
return severityRank(v.OtherFindings[i].Severity) > severityRank(v.OtherFindings[j].Severity)
})
// RRset rows, sorted by "name/type".
keys := make([]string, 0, len(d.RRsets))
for k := range d.RRsets {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
rv := d.RRsets[k]
row := rrsetRow{
Key: k,
Name: rv.Name,
Type: rv.Type,
MatchesExpected: rv.MatchesExpected,
Expected: rv.ExpectedRecords,
HasExpected: rv.Expected != "",
Agreeing: len(rv.Agreeing),
Dissenting: len(rv.Dissenting),
}
for _, g := range rv.Groups {
row.Groups = append(row.Groups, groupRow{
Rcode: g.Rcode,
Records: g.Records,
Resolvers: g.Resolvers,
IsConsensus: g.Signature == rv.ConsensusSig,
})
}
switch {
case rv.Expected != "" && !rv.MatchesExpected:
row.StatusClass = "pill-crit"
row.StatusLabel = "drift"
case len(rv.Groups) > 1:
row.StatusClass = "pill-warn"
row.StatusLabel = "partial"
case len(rv.Groups) == 1:
row.StatusClass = "pill-ok"
row.StatusLabel = "consensus"
default:
row.StatusClass = "pill-info"
row.StatusLabel = "no data"
}
v.RRsets = append(v.RRsets, row)
}
// Per-region rollup.
byRegion := map[string]*regionRow{}
for _, rv := range d.Resolvers {
r, ok := byRegion[rv.Region]
if !ok {
r = &regionRow{Region: rv.Region, Label: regionLabel(rv.Region)}
byRegion[rv.Region] = r
}
r.Resolvers++
if rv.Reachable {
r.Reachable++
}
if rv.Reachable && !rv.Filtered {
ok := true
for key, p := range rv.Probes {
if p == nil || p.Error != "" {
r.Errored++
ok = false
break
}
cv := d.RRsets[key]
if cv == nil || cv.ConsensusSig == "" {
continue
}
if p.Signature != cv.ConsensusSig {
ok = false
break
}
}
if ok {
r.Agreeing++
} else {
r.Disagreeing++
}
}
}
for _, r := range byRegion {
v.Regions = append(v.Regions, *r)
}
sort.Slice(v.Regions, func(i, j int) bool { return v.Regions[i].Label < v.Regions[j].Label })
// Per-resolver rows.
rids := make([]string, 0, len(d.Resolvers))
for k := range d.Resolvers {
rids = append(rids, k)
}
sort.Strings(rids)
for _, rid := range rids {
rv := d.Resolvers[rid]
var total, n int64
probes := []probeRow{}
pkeys := make([]string, 0, len(rv.Probes))
for k := range rv.Probes {
pkeys = append(pkeys, k)
}
sort.Strings(pkeys)
for _, k := range pkeys {
p := rv.Probes[k]
pr := probeRow{
Key: k,
Rcode: p.Rcode,
Records: p.Records,
MinTTL: p.MinTTL,
AD: p.AD,
Error: p.Error,
LatencyMs: p.LatencyMs,
}
if cv := d.RRsets[k]; cv != nil && cv.ConsensusSig != "" {
pr.AgreesWithConsensus = p.Signature == cv.ConsensusSig
}
if p.Error == "" {
total += p.LatencyMs
n++
}
probes = append(probes, pr)
}
avg := int64(0)
if n > 0 {
avg = total / n
}
v.Resolvers = append(v.Resolvers, resolverRow{
ID: rv.ID,
Name: rv.Name,
IP: rv.IP,
Region: regionLabel(rv.Region),
Transport: string(rv.Transport),
Filtered: rv.Filtered,
Reachable: rv.Reachable,
AvgMs: avg,
Probes: probes,
})
}
return v
}
// Kept here (not in rules) so user-facing wording lives in one layer.
func headlineFor(code string) string {
switch code {
case CodeAllResolversDown:
return "No resolver could be reached"
case CodeUnexpectedSERVFAIL:
return "A resolver returns SERVFAIL"
case CodeDNSSECFailure:
return "DNSSEC validation fails"
case CodeAnswerDrift:
return "Public resolvers disagree with your authoritative answer"
case CodeUnexpectedNXDOMAIN:
return "A resolver sees your zone as non-existent"
case CodeSerialDrift:
return "SOA serial differs between resolvers"
case CodeRegionalSplit:
return "A whole region sees a different answer"
case CodePartialPropagation:
return "Change is mid-propagation"
case CodeDNSSECUnvalidated:
return "Validating resolver did not set AD"
case CodeStaleCache:
return "Resolvers still serve the previous SOA serial"
case CodeResolverRewrote:
return "Resolver rewrote the answer"
case CodeResolverUnreachable:
return "Resolver unreachable from the checker"
case CodeResolverHighLatency:
return "Slow resolver"
case CodeResolverFilteredHit:
return "Filtered resolver is blocking your zone"
case CodeNoResolvers:
return "No resolver matched the current selection"
default:
return code
}
}
// View-layer translation only: rules own severity/code/message, report adds remedy + subject scoping.
func statesToFindings(states []sdk.CheckState) []Finding {
if len(states) == 0 {
return nil
}
var out []Finding
for _, st := range states {
sev, ok := severityFromStatus(st.Status)
if !ok {
continue
}
f := Finding{
Code: st.Code,
Severity: sev,
Message: st.Message,
Remedy: remedyFor(st.Code),
}
if isResolverScopedCode(st.Code) {
f.Resolver = st.Subject
} else if st.Subject != "" && strings.Contains(st.Subject, "/") {
f.RRset = st.Subject
}
out = append(out, f)
}
sort.SliceStable(out, func(i, j int) bool {
if a, b := severityRank(out[i].Severity), severityRank(out[j].Severity); a != b {
return a > b
}
if out[i].Code != out[j].Code {
return out[i].Code < out[j].Code
}
if out[i].RRset != out[j].RRset {
return out[i].RRset < out[j].RRset
}
return out[i].Resolver < out[j].Resolver
})
return out
}
func severityFromStatus(s sdk.Status) (Severity, bool) {
switch s {
case sdk.StatusCrit:
return SeverityCrit, true
case sdk.StatusWarn:
return SeverityWarn, true
case sdk.StatusInfo:
return SeverityInfo, true
}
return "", false
}
func isResolverScopedCode(code string) bool {
switch code {
case CodeResolverUnreachable, CodeResolverTimeout, CodeResolverRewrote,
CodeResolverFilteredHit, CodeResolverHighLatency,
CodeDNSSECFailure, CodeDNSSECUnvalidated:
return true
}
return false
}
// Wording lives here, not in rules: severity is judgment, copy is presentation.
func remedyFor(code string) string {
switch code {
case CodeNoResolvers:
return "loosen the region filter or reset the allowlist in the checker options"
case CodeAllResolversDown:
return "retry later, or verify the checker host's outgoing UDP/53 connectivity"
case CodeSerialDrift:
return "usually transient caching right after a zone push"
case CodeStaleCache:
return "the resolvers cached the previous zone version"
case CodeDNSSECFailure:
return "check that the DS record at the parent matches the DNSKEY at the zone apex"
case CodeDNSSECUnvalidated:
return "enable DNSSEC signing at your provider to get full validation downstream"
case CodeRegionalSplit:
return "possible GeoDNS misconfiguration or regional censorship"
case CodePartialPropagation:
return "wait up to the previous TTL for the old cached answer to expire everywhere"
case CodeAnswerDrift:
return "wait for the old TTL to expire or force a flush on the affected resolvers"
case CodeUnexpectedNXDOMAIN:
return "a resolver returning NXDOMAIN while others return NOERROR usually means a poisoned cache or lame delegation"
case CodeUnexpectedSERVFAIL:
return "check DNSSEC signatures and that every authoritative NS is reachable over UDP and TCP"
case CodeResolverUnreachable:
return "the resolver might be blocking the checker's traffic, firewalled, or temporarily down"
case CodeResolverRewrote:
return "the resolver appears to rewrite answers; users relying on it will see a different zone"
case CodeResolverFilteredHit:
return "normal for a filtered resolver when the zone is on a blocklist"
case CodeResolverHighLatency:
return "usually reflects the checker-to-resolver network path"
}
return ""
}
// severityRank orders severities for sorting; higher = more severe.
func severityRank(s Severity) int {
switch s {
case SeverityCrit:
return 3
case SeverityWarn:
return 2
case SeverityInfo:
return 1
}
return 0
}
// reportFuncs exposes small helpers to the template so it can stay concise.
var reportFuncs = template.FuncMap{
"join": func(sep string, s []string) string { return strings.Join(s, sep) },
"len": func(s []string) int { return len(s) },
}
var reportTmpl = template.Must(template.New("report").Funcs(reportFuncs).Parse(reportTemplateHTML))
const reportTemplateHTML = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Resolver propagation report {{.Zone}}</title>
<style>
:root {
color-scheme: light dark;
--bg: #f6f7fb;
--fg: #1c1f2a;
--muted: #6c7488;
--card: #ffffff;
--border: #e3e6ef;
--ok: #1e8a4a;
--info: #2563eb;
--warn: #b7791f;
--crit: #c0392b;
--code-bg: #f0f2f8;
}
@media (prefers-color-scheme: dark) {
:root {
--bg: #11131a;
--fg: #eceff6;
--muted: #96a0b5;
--card: #181b24;
--border: #252a36;
--code-bg: #1f2330;
}
}
body { margin: 0; padding: 24px; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, sans-serif; background: var(--bg); color: var(--fg); line-height: 1.45; }
h1 { font-size: 1.5rem; margin: 0 0 4px 0; }
h2 { font-size: 1.1rem; margin: 24px 0 12px 0; border-bottom: 1px solid var(--border); padding-bottom: 6px; }
h3 { font-size: 0.95rem; margin: 16px 0 6px 0; }
.muted { color: var(--muted); }
.banner { padding: 14px 16px; border-radius: 10px; margin-bottom: 16px; }
.banner-ok { background: rgba(30,138,74,.1); border-left: 4px solid var(--ok); }
.banner-info { background: rgba(37,99,235,.1); border-left: 4px solid var(--info); }
.banner-warn { background: rgba(183,121,31,.12); border-left: 4px solid var(--warn); }
.banner-crit { background: rgba(192,57,43,.12); border-left: 4px solid var(--crit); }
.banner strong { display:block; font-size: 1.05rem; margin-bottom: 2px; }
.stats { display: flex; flex-wrap: wrap; gap: 12px; margin: 16px 0; }
.stat { background: var(--card); border:1px solid var(--border); border-radius:8px; padding: 10px 14px; min-width: 120px; }
.stat .n { font-size: 1.4rem; font-weight: 600; }
.stat .l { color: var(--muted); font-size: .8rem; text-transform: uppercase; letter-spacing: .04em; }
.card { background: var(--card); border: 1px solid var(--border); border-radius: 10px; padding: 14px 16px; margin-bottom: 12px; }
.card.severity-crit { border-left: 4px solid var(--crit); }
.card.severity-warn { border-left: 4px solid var(--warn); }
.card.severity-info { border-left: 4px solid var(--info); }
.card-title { display:flex; justify-content:space-between; align-items:center; gap: 12px; }
.card-title .title { font-weight: 600; }
.count-pill { background: var(--code-bg); border-radius: 999px; padding: 2px 10px; font-size: .8rem; }
.remedy { background: var(--code-bg); padding: 8px 10px; border-radius: 6px; margin-top: 8px; font-size: .9rem; }
table { width: 100%; border-collapse: collapse; margin: 8px 0 16px 0; font-size: .9rem; }
th, td { text-align: left; padding: 6px 8px; border-bottom: 1px solid var(--border); vertical-align: top; }
th { font-weight: 600; color: var(--muted); text-transform: uppercase; font-size: .75rem; letter-spacing: .04em; }
code, pre { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
code { background: var(--code-bg); padding: 1px 5px; border-radius: 4px; font-size: .85em; }
pre { background: var(--code-bg); padding: 10px; border-radius: 6px; overflow-x: auto; font-size: .8em; }
.pill { display:inline-block; padding: 1px 8px; border-radius: 999px; font-size: .75rem; font-weight: 600; }
.pill-ok { background: rgba(30,138,74,.15); color: var(--ok); }
.pill-warn { background: rgba(183,121,31,.18); color: var(--warn); }
.pill-crit { background: rgba(192,57,43,.18); color: var(--crit); }
.pill-info { background: rgba(37,99,235,.15); color: var(--info); }
.group-consensus { background: rgba(30,138,74,.07); }
.tag { display:inline-block; padding: 1px 6px; border-radius: 4px; font-size: .7rem; margin-right: 4px; background: var(--code-bg); color: var(--muted); }
.filtered { opacity: .7; }
details > summary { cursor: pointer; color: var(--muted); font-size: .85rem; }
.ok { color: var(--ok); }
.crit { color: var(--crit); }
.warn { color: var(--warn); }
.info { color: var(--info); }
.records { margin: 0; padding-left: 16px; font-family: ui-monospace, monospace; font-size: .8rem; }
.resolver-list { font-size: .75rem; color: var(--muted); }
</style>
</head>
<body>
<h1>Worldwide DNS propagation <code>{{.Zone}}</code></h1>
<div class="muted">Probe across public recursive resolvers; consensus compared to the zone's own authoritative answer.</div>
<div class="banner {{.OverallClass}}">
<strong>{{.OverallStatus}}</strong>
{{.OverallMessage}}
</div>
<div class="stats">
<div class="stat"><div class="n">{{.Stats.ReachableResolvers}} / {{.Stats.TotalResolvers}}</div><div class="l">Resolvers reachable</div></div>
<div class="stat"><div class="n">{{.Stats.UnfilteredAgreeing}} / {{.Stats.UnfilteredProbed}}</div><div class="l">Unfiltered agreeing</div></div>
<div class="stat"><div class="n">{{.Stats.CountriesCovered}}</div><div class="l">Regions covered</div></div>
{{if .Stats.FilteredProbed}}<div class="stat"><div class="n">{{.Stats.FilteredProbed}}</div><div class="l">Filtered probed</div></div>{{end}}
</div>
{{if .TopFailures}}
<h2>Fix these first</h2>
{{range .TopFailures}}
<div class="card {{.Class}}">
<div class="card-title">
<span class="title">{{.Headline}}</span>
<span class="count-pill">{{.Count}}× · <span class="{{.Severity}}">{{.Severity}}</span></span>
</div>
<div>{{.Message}}</div>
{{if .Remedy}}<div class="remedy"><strong>What to do:</strong> {{.Remedy}}</div>{{end}}
</div>
{{end}}
{{end}}
<h2>Per-RRset consensus</h2>
<table>
<thead><tr><th>Record</th><th>Status</th><th>Expected (authoritative)</th><th>What resolvers see</th></tr></thead>
<tbody>
{{range .RRsets}}
<tr>
<td><code>{{.Name}}</code><br><span class="tag">{{.Type}}</span></td>
<td><span class="pill {{.StatusClass}}">{{.StatusLabel}}</span><br>
<span class="muted">{{.Agreeing}} ok · {{.Dissenting}} diff</span></td>
<td>
{{if .HasExpected}}
{{if .Expected}}<ul class="records">{{range .Expected}}<li>{{.}}</li>{{end}}</ul>{{else}}<span class="muted">(no data / NODATA)</span>{{end}}
{{else}}
<span class="muted">(auth unreachable)</span>
{{end}}
</td>
<td>
{{range .Groups}}
<div class="{{if .IsConsensus}}group-consensus{{end}}" style="margin-bottom:8px; padding:6px 8px; border-radius:6px;">
<span class="pill {{if eq .Rcode "NOERROR"}}pill-ok{{else if eq .Rcode "NXDOMAIN"}}pill-crit{{else if eq .Rcode "SERVFAIL"}}pill-crit{{else}}pill-warn{{end}}">{{.Rcode}}</span>
{{if .IsConsensus}}<span class="pill pill-info">consensus</span>{{end}}
{{if .Records}}<ul class="records">{{range .Records}}<li>{{.}}</li>{{end}}</ul>{{else}}<span class="muted">(empty)</span>{{end}}
<div class="resolver-list">{{len .Resolvers}} resolver(s): {{join ", " .Resolvers}}</div>
</div>
{{end}}
</td>
</tr>
{{end}}
</tbody>
</table>
<h2>Per-region view</h2>
<table>
<thead><tr><th>Region</th><th>Reachable</th><th>Agreeing</th><th>Disagreeing</th><th>Errored</th></tr></thead>
<tbody>
{{range .Regions}}
<tr>
<td>{{.Label}}</td>
<td>{{.Reachable}} / {{.Resolvers}}</td>
<td><span class="ok">{{.Agreeing}}</span></td>
<td>{{if .Disagreeing}}<span class="warn">{{.Disagreeing}}</span>{{else}}0{{end}}</td>
<td>{{if .Errored}}<span class="crit">{{.Errored}}</span>{{else}}0{{end}}</td>
</tr>
{{end}}
</tbody>
</table>
<h2>Per-resolver details</h2>
<table>
<thead><tr><th>Resolver</th><th>Region</th><th>Transport</th><th>Avg&nbsp;ms</th><th>Answers</th></tr></thead>
<tbody>
{{range .Resolvers}}
<tr class="{{if .Filtered}}filtered{{end}}">
<td>
<strong>{{.Name}}</strong>{{if .Filtered}} <span class="tag">filtered</span>{{end}}<br>
<span class="muted"><code>{{.IP}}</code> · {{.ID}}</span>
</td>
<td>{{.Region}}</td>
<td>{{.Transport}}</td>
<td>{{if .Reachable}}{{.AvgMs}}{{else}}<span class="crit">unreachable</span>{{end}}</td>
<td>
{{range .Probes}}
<details>
<summary>
<code>{{.Key}}</code>
<span class="pill {{if .Error}}pill-crit{{else if eq .Rcode "NOERROR"}}{{if .AgreesWithConsensus}}pill-ok{{else}}pill-warn{{end}}{{else}}pill-crit{{end}}">
{{if .Error}}error{{else}}{{.Rcode}}{{if .AgreesWithConsensus}} · {{else}} · {{end}}{{end}}
</span>
{{if .AD}}<span class="tag">AD</span>{{end}}
<span class="muted">{{.LatencyMs}}ms{{if .MinTTL}} · TTL {{.MinTTL}}{{end}}</span>
</summary>
{{if .Error}}<pre>{{.Error}}</pre>{{else if .Records}}<ul class="records">{{range .Records}}<li>{{.}}</li>{{end}}</ul>{{else}}<span class="muted">(empty answer)</span>{{end}}
</details>
{{end}}
</td>
</tr>
{{end}}
</tbody>
</table>
{{if .OtherFindings}}
<h2>Other findings</h2>
<table>
<thead><tr><th>Severity</th><th>Code</th><th>Message</th><th>Remedy</th></tr></thead>
<tbody>
{{range .OtherFindings}}
<tr>
<td><span class="{{.Severity}}">{{.Severity}}</span></td>
<td><code>{{.Code}}</code></td>
<td>{{.Message}}</td>
<td class="muted">{{.Remedy}}</td>
</tr>
{{end}}
</tbody>
</table>
{{end}}
</body>
</html>
`

128
checker/report_test.go Normal file
View file

@ -0,0 +1,128 @@
package checker
import (
"encoding/json"
"strings"
"testing"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func TestStatesToFindings(t *testing.T) {
states := []sdk.CheckState{
{Status: sdk.StatusOK, Code: "ok"},
{Status: sdk.StatusInfo, Code: "i", Subject: "ex./A", Message: "info"},
{Status: sdk.StatusWarn, Code: "w", Subject: "ex./A", Message: "warn"},
{Status: sdk.StatusCrit, Code: "c", Subject: "ex./A", Message: "crit"},
}
got := statesToFindings(states)
if len(got) != 3 {
t.Errorf("expected 3 findings (OK skipped), got %d: %+v", len(got), got)
}
severities := map[Severity]bool{}
for _, f := range got {
severities[f.Severity] = true
}
if !severities[SeverityCrit] || !severities[SeverityWarn] || !severities[SeverityInfo] {
t.Errorf("missing some severities: %+v", got)
}
}
func TestSeverityFromStatus(t *testing.T) {
cases := []struct {
in sdk.Status
want Severity
wantOK bool
}{
{sdk.StatusCrit, SeverityCrit, true},
{sdk.StatusWarn, SeverityWarn, true},
{sdk.StatusInfo, SeverityInfo, true},
{sdk.StatusOK, "", false},
}
for _, c := range cases {
got, ok := severityFromStatus(c.in)
if got != c.want || ok != c.wantOK {
t.Errorf("severityFromStatus(%v) = (%v,%v), want (%v,%v)", c.in, got, ok, c.want, c.wantOK)
}
}
}
func TestSeverityRank(t *testing.T) {
if severityRank(SeverityCrit) <= severityRank(SeverityWarn) {
t.Errorf("crit should outrank warn")
}
if severityRank(SeverityWarn) <= severityRank(SeverityInfo) {
t.Errorf("warn should outrank info")
}
}
func TestRemedyFor(t *testing.T) {
// known code returns a non-empty hint
if r := remedyFor(CodeAnswerDrift); r == "" {
t.Errorf("expected remedy for %q", CodeAnswerDrift)
}
// unknown code is allowed to be empty
_ = remedyFor("totally-bogus-code-xyz")
}
func TestIsResolverScopedCode(t *testing.T) {
if !isResolverScopedCode(CodeResolverUnreachable) {
t.Errorf("resolver code should be scoped")
}
if isResolverScopedCode(CodeAnswerDrift) {
t.Errorf("rrset code should not be scoped")
}
}
func TestGetHTMLReport_Smoke(t *testing.T) {
key := "ex./A"
data := &ResolverPropagationData{
Zone: "ex.",
Names: []string{"ex."},
Types: []string{"A"},
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{
key: {Rcode: "NOERROR", Signature: "1.1.1.1", Records: []string{"1.1.1.1"}, LatencyMs: 30, Transport: TransportUDP},
}),
"b": mkResolver("b", "global", false, true, map[string]*RRProbe{
key: {Rcode: "NOERROR", Signature: "1.1.1.1", Records: []string{"1.1.1.1"}, LatencyMs: 40, Transport: TransportUDP},
}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
Stats: Stats{TotalResolvers: 2, ReachableResolvers: 2, UnfilteredProbed: 2},
}
raw, err := json.Marshal(data)
if err != nil {
t.Fatalf("marshal: %v", err)
}
ctx := sdk.NewReportContext(raw, nil, []sdk.CheckState{
{Status: sdk.StatusOK, Code: "ok", Message: "fine"},
})
prov := &resolverPropagationProvider{}
html, err := prov.GetHTMLReport(ctx)
if err != nil {
t.Fatalf("GetHTMLReport: %v", err)
}
for _, want := range []string{"ex.", "1.1.1.1", "Europe", "Global"} {
if !strings.Contains(html, want) {
t.Errorf("HTML report missing %q", want)
}
}
}
func TestGetHTMLReport_BadPayload(t *testing.T) {
ctx := sdk.StaticReportContext(json.RawMessage(`not-json`))
prov := &resolverPropagationProvider{}
if _, err := prov.GetHTMLReport(ctx); err == nil {
t.Errorf("want decode error")
}
}
func TestGetHTMLReport_EmptyPayload(t *testing.T) {
ctx := sdk.StaticReportContext(nil)
prov := &resolverPropagationProvider{}
if _, err := prov.GetHTMLReport(ctx); err != nil {
t.Errorf("empty payload should not error, got %v", err)
}
}

174
checker/resolvers.go Normal file
View file

@ -0,0 +1,174 @@
package checker
// Region is a coarse bucket for the report, not a geolocation claim (Anycast → "global").
type Resolver struct {
// ID is a stable identifier, exposed in JSON/metrics.
ID string
// Name is the human-readable provider + flavor (shown in the report).
Name string
// IP is the plain-text UDP/TCP address (without port).
IP string
// Region tags the resolver geographically. One of: global, na, eu, asia,
// ru, me, oceania, sa, africa.
Region string
// Filtered marks resolvers that intentionally rewrite or block answers
// (malware / adult / ad / family filters). These are expected to differ
// from the consensus on some zones and therefore kept out of the default
// unfiltered probe set.
Filtered bool
// DoHURL is the RFC 8484 endpoint, when the provider publishes one.
DoHURL string
// DoTHost is the DNS-over-TLS server name (SNI target), when available.
// DoT always runs on port 853 against this same hostname.
DoTHost string
}
// Derived from happydomain3/web/src/lib/resolver.ts; regions are best-effort from AS paths and provider docs.
var allResolvers = []Resolver{
// ── Unfiltered / Anycast global ──────────────────────────────────────
{ID: "cloudflare", Name: "Cloudflare DNS", IP: "1.1.1.1", Region: "global",
DoHURL: "https://cloudflare-dns.com/dns-query", DoTHost: "cloudflare-dns.com"},
{ID: "google", Name: "Google Public DNS", IP: "8.8.8.8", Region: "global",
DoHURL: "https://dns.google/dns-query", DoTHost: "dns.google"},
{ID: "quad9-unfiltered", Name: "Quad9 (no blocklist)", IP: "9.9.9.10", Region: "global",
DoHURL: "https://dns10.quad9.net/dns-query", DoTHost: "dns10.quad9.net"},
{ID: "opendns", Name: "OpenDNS", IP: "208.67.222.222", Region: "global"},
{ID: "he", Name: "Hurricane Electric", IP: "74.82.42.42", Region: "global"},
{ID: "dns-sb", Name: "DNS.SB", IP: "185.222.222.222", Region: "global",
DoHURL: "https://doh.dns.sb/dns-query", DoTHost: "dns.sb"},
{ID: "adguard-unfiltered", Name: "AdGuard (non-filtering)", IP: "94.140.14.140", Region: "global",
DoHURL: "https://unfiltered.adguard-dns.com/dns-query", DoTHost: "unfiltered.adguard-dns.com"},
// ── North America ──
{ID: "level3", Name: "Level3", IP: "4.2.2.1", Region: "na"},
{ID: "verisign", Name: "Verisign", IP: "64.6.64.6", Region: "na"},
{ID: "comodo", Name: "Comodo Secure DNS", IP: "8.26.56.26", Region: "na"},
{ID: "norton", Name: "Norton ConnectSafe", IP: "199.85.126.10", Region: "na"},
{ID: "safeserve", Name: "Namecheap SafeServe", IP: "198.54.117.10", Region: "na"},
{ID: "dyn", Name: "Dyn", IP: "216.146.35.35", Region: "na"},
{ID: "neustar", Name: "Neustar / DNS Advantage", IP: "156.154.70.1", Region: "na"},
{ID: "smartviper", Name: "SmartViper", IP: "208.76.50.50", Region: "na"},
{ID: "alternate", Name: "Alternate DNS", IP: "23.253.163.53", Region: "na"},
{ID: "strongdns", Name: "StrongDNS", IP: "216.131.65.63", Region: "na"},
// ── Europe ──
{ID: "dns-watch", Name: "DNS.WATCH (DE)", IP: "84.200.69.80", Region: "eu"},
{ID: "freedns", Name: "FreeDNS (AT)", IP: "37.235.1.174", Region: "eu"},
{ID: "freenom", Name: "Freenom World (NL)", IP: "80.80.80.80", Region: "eu"},
{ID: "uncensored", Name: "UncensoredDNS (DK)", IP: "91.239.100.100", Region: "eu"},
{ID: "fdn", Name: "French Data Network (FR)", IP: "80.67.169.12", Region: "eu"},
{ID: "fooldns", Name: "FoolDNS (IT)", IP: "87.118.111.215", Region: "eu"},
{ID: "puntcat", Name: "puntCAT (ES)", IP: "109.69.8.51", Region: "eu"},
{ID: "opennic", Name: "OpenNIC", IP: "185.121.177.177", Region: "eu"},
{ID: "dns4eu-unfiltered", Name: "DNS4EU (unfiltered)", IP: "86.54.11.100", Region: "eu",
DoHURL: "https://unfiltered.joindns4.eu/dns-query", DoTHost: "unfiltered.joindns4.eu"},
{ID: "dns4all", Name: "DNS4ALL", IP: "194.0.5.3", Region: "eu"},
// ── Asia (East & SE) ──
{ID: "ntt-jp", Name: "NTT (JP)", IP: "129.250.35.250", Region: "asia"},
{ID: "alidns", Name: "AliDNS (CN)", IP: "223.5.5.5", Region: "asia"},
{ID: "cnnic-sdns", Name: "CNNIC SDNS (CN)", IP: "1.2.4.8", Region: "asia"},
{ID: "dnspod", Name: "DNSPod (CN)", IP: "119.29.29.29", Region: "asia"},
{ID: "onedns", Name: "oneDNS (CN)", IP: "114.215.126.16", Region: "asia"},
{ID: "cloudxns", Name: "CloudXNS (CN)", IP: "124.251.124.251", Region: "asia"},
{ID: "114dns", Name: "114DNS (CN)", IP: "114.114.114.114", Region: "asia"},
{ID: "dnspai", Name: "DNSpai (CN)", IP: "101.226.4.6", Region: "asia"},
{ID: "quad101", Name: "Quad101 (TW)", IP: "101.101.101.101", Region: "asia"},
{ID: "hinet", Name: "HiNet (TW)", IP: "168.95.1.1", Region: "asia"},
// ── Russia ──
{ID: "yandex", Name: "Yandex.DNS", IP: "77.88.8.8", Region: "ru",
DoTHost: "common.dot.dns.yandex.net"},
// ── Middle East ──
{ID: "greenteam", Name: "GreenTeam DNS (IL)", IP: "81.218.119.11", Region: "me"},
// ── Filtered (opt-in) ─────────────────────────────────────────────────
{ID: "cloudflare-malware", Name: "Cloudflare (malware blocking)", IP: "1.1.1.2", Region: "global", Filtered: true,
DoHURL: "https://security.cloudflare-dns.com/dns-query", DoTHost: "security.cloudflare-dns.com"},
{ID: "cloudflare-family", Name: "Cloudflare (malware + adult)", IP: "1.1.1.3", Region: "global", Filtered: true,
DoHURL: "https://family.cloudflare-dns.com/dns-query", DoTHost: "family.cloudflare-dns.com"},
{ID: "quad9", Name: "Quad9 (blocklist)", IP: "9.9.9.9", Region: "global", Filtered: true,
DoHURL: "https://dns.quad9.net/dns-query", DoTHost: "dns.quad9.net"},
{ID: "adguard", Name: "AdGuard (default)", IP: "94.140.14.14", Region: "global", Filtered: true,
DoHURL: "https://dns.adguard-dns.com/dns-query", DoTHost: "dns.adguard-dns.com"},
{ID: "adguard-family", Name: "AdGuard (family protection)", IP: "94.140.14.15", Region: "global", Filtered: true,
DoHURL: "https://family.adguard-dns.com/dns-query", DoTHost: "family.adguard-dns.com"},
{ID: "yandex-safe", Name: "Yandex Safe", IP: "77.88.8.2", Region: "ru", Filtered: true,
DoTHost: "common.dot.dns.yandex.net"},
{ID: "yandex-family", Name: "Yandex Family", IP: "77.88.8.3", Region: "ru", Filtered: true,
DoTHost: "common.dot.dns.yandex.net"},
{ID: "dns-advantage-threat", Name: "DNS Advantage Threat", IP: "156.154.70.2", Region: "na", Filtered: true},
{ID: "dns-advantage-family", Name: "DNS Advantage Family", IP: "156.154.70.3", Region: "na", Filtered: true},
{ID: "dns-advantage-business", Name: "DNS Advantage Business", IP: "156.154.70.4", Region: "na", Filtered: true},
{ID: "cleanbrowsing-family", Name: "CleanBrowsing Family", IP: "185.228.168.168", Region: "global", Filtered: true,
DoHURL: "https://doh.cleanbrowsing.org/doh/family-filter/", DoTHost: "family-filter-dns.cleanbrowsing.org"},
{ID: "cleanbrowsing-adult", Name: "CleanBrowsing Adult", IP: "185.228.168.10", Region: "global", Filtered: true,
DoHURL: "https://doh.cleanbrowsing.org/doh/adult-filter/", DoTHost: "adult-filter-dns.cleanbrowsing.org"},
{ID: "dns4eu-protective", Name: "DNS4EU Protective", IP: "86.54.11.1", Region: "eu", Filtered: true,
DoHURL: "https://protective.joindns4.eu/dns-query", DoTHost: "protective.joindns4.eu"},
{ID: "dns4eu-child", Name: "DNS4EU Child Protection", IP: "86.54.11.12", Region: "eu", Filtered: true,
DoHURL: "https://child.joindns4.eu/dns-query", DoTHost: "child.joindns4.eu"},
{ID: "dns4eu-adblock", Name: "DNS4EU Ad-blocking", IP: "86.54.11.13", Region: "eu", Filtered: true,
DoHURL: "https://ads.joindns4.eu/dns-query", DoTHost: "ads.joindns4.eu"},
}
// A non-empty allowlist takes precedence; filter and region knobs are then ignored.
func selectedResolvers(includeFiltered bool, region string, allowlist []string) []Resolver {
if len(allowlist) > 0 {
allow := make(map[string]bool, len(allowlist))
for _, a := range allowlist {
allow[a] = true
}
var out []Resolver
for _, r := range allResolvers {
if allow[r.ID] || allow[r.IP] {
out = append(out, r)
}
}
return out
}
var out []Resolver
for _, r := range allResolvers {
if r.Filtered && !includeFiltered {
continue
}
if region != "" && region != "all" && region != r.Region {
continue
}
out = append(out, r)
}
return out
}
func regionLabel(region string) string {
switch region {
case "global":
return "Global / Anycast"
case "na":
return "North America"
case "eu":
return "Europe"
case "asia":
return "Asia"
case "ru":
return "Russia"
case "me":
return "Middle East"
case "oceania":
return "Oceania"
case "sa":
return "South America"
case "africa":
return "Africa"
default:
return region
}
}

105
checker/resolvers_test.go Normal file
View file

@ -0,0 +1,105 @@
package checker
import (
"strings"
"testing"
)
func TestSelectedResolvers_DefaultExcludesFiltered(t *testing.T) {
out := selectedResolvers(false, "all", nil)
if len(out) == 0 {
t.Fatalf("default selection is empty")
}
for _, r := range out {
if r.Filtered {
t.Errorf("filtered resolver %q leaked into default selection", r.ID)
}
}
}
func TestSelectedResolvers_IncludeFiltered(t *testing.T) {
withF := selectedResolvers(true, "all", nil)
withoutF := selectedResolvers(false, "all", nil)
if len(withF) <= len(withoutF) {
t.Errorf("includeFiltered=true should add resolvers, got %d vs %d", len(withF), len(withoutF))
}
}
func TestSelectedResolvers_RegionFilter(t *testing.T) {
out := selectedResolvers(false, "eu", nil)
if len(out) == 0 {
t.Fatalf("eu selection is empty")
}
for _, r := range out {
if r.Region != "eu" {
t.Errorf("non-eu resolver %q (%s) leaked in", r.ID, r.Region)
}
}
}
func TestSelectedResolvers_AllowlistByID(t *testing.T) {
out := selectedResolvers(false, "all", []string{"cloudflare", "9.9.9.10"})
ids := make(map[string]bool)
for _, r := range out {
ids[r.ID] = true
}
if !ids["cloudflare"] || !ids["quad9-unfiltered"] {
t.Errorf("allowlist failed: %v", ids)
}
if len(out) != 2 {
t.Errorf("expected exactly 2 resolvers, got %d", len(out))
}
}
func TestSelectedResolvers_AllowlistOverridesFilteredAndRegion(t *testing.T) {
// quad9 is Filtered + global; allowlist must still pick it.
out := selectedResolvers(false, "eu", []string{"quad9"})
if len(out) != 1 || out[0].ID != "quad9" {
t.Errorf("allowlist should override filtered/region, got %v", out)
}
}
func TestRegionLabel(t *testing.T) {
cases := map[string]string{
"global": "Global / Anycast",
"na": "North America",
"eu": "Europe",
"asia": "Asia",
"ru": "Russia",
"me": "Middle East",
"oceania": "Oceania",
"sa": "South America",
"africa": "Africa",
"unknown": "unknown",
"": "",
}
for in, want := range cases {
if got := regionLabel(in); got != want {
t.Errorf("regionLabel(%q) = %q, want %q", in, got, want)
}
}
}
func TestAllResolversCatalogIntegrity(t *testing.T) {
// Catch typos / duplicates in the static catalog.
ids := map[string]bool{}
for _, r := range allResolvers {
if r.ID == "" {
t.Errorf("resolver with empty ID: %+v", r)
}
if r.IP == "" {
t.Errorf("resolver %q has empty IP", r.ID)
}
if strings.Contains(r.ID, "|") {
t.Errorf("resolver ID %q contains reserved separator '|'", r.ID)
}
if ids[r.ID] {
t.Errorf("duplicate resolver ID %q", r.ID)
}
ids[r.ID] = true
if r.DoTHost == "" && r.DoHURL != "" {
// DoH-only is acceptable but log it for visibility.
t.Logf("resolver %q has DoH but no DoT", r.ID)
}
}
}

59
checker/rules.go Normal file
View file

@ -0,0 +1,59 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Rules returns every CheckRule exposed by the resolver-propagation checker.
// Each rule covers one concern so the UI and metrics consumers can reason
// about them independently.
func Rules() []sdk.CheckRule {
return []sdk.CheckRule{
&resolverSelectionRule{},
&resolversReachableRule{},
&consensusRule{},
&authoritativeMatchRule{},
&nxdomainRule{},
&servfailRule{},
&regionalSplitRule{},
&serialDriftRule{},
&staleCacheRule{},
&dnssecRule{},
&resolverLatencyRule{},
&filteredHitRule{},
}
}
// loadData fetches the observation and returns an error state on failure.
// It also runs deriveView so every rule sees a ready-to-use consensus.
func loadData(ctx context.Context, obs sdk.ObservationGetter) (*ResolverPropagationData, *sdk.CheckState) {
var data ResolverPropagationData
if err := obs.Get(ctx, ObservationKeyResolverPropagation, &data); err != nil {
return nil, &sdk.CheckState{
Status: sdk.StatusError,
Message: fmt.Sprintf("failed to load resolver-propagation observation: %v", err),
Code: "resolver_propagation_error",
}
}
deriveView(&data)
return &data, nil
}
func passState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusOK, Message: message, Code: code}
}
func infoState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusInfo, Message: message, Code: code, Subject: subject}
}
func warnState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusWarn, Message: message, Code: code, Subject: subject}
}
func critState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusCrit, Message: message, Code: code, Subject: subject}
}

245
checker/rules_consensus.go Normal file
View file

@ -0,0 +1,245 @@
package checker
import (
"context"
"fmt"
"sort"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// consensusRule emits one state per RRset summarising how much of the probed
// resolver set agrees on its answer. It covers the "partial propagation"
// case (several distinct NOERROR signatures observed).
type consensusRule struct{}
func (r *consensusRule) Name() string { return "resolver_propagation.consensus" }
func (r *consensusRule) Description() string {
return "Checks that public resolvers agree on a single answer for each probed RRset."
}
func (r *consensusRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Resolvers) == 0 || len(data.RRsets) == 0 {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.consensus.skipped",
Message: "no resolver probes available"}}
}
keys := sortedRRsetKeys(data)
var states []sdk.CheckState
for _, key := range keys {
v := data.RRsets[key]
unfilteredNOERRORSigs := map[string]bool{}
for _, g := range v.Groups {
if g.Rcode != "NOERROR" {
continue
}
for _, rid := range g.Resolvers {
rv := data.Resolvers[rid]
if rv != nil && !rv.Filtered {
unfilteredNOERRORSigs[g.Signature] = true
break
}
}
}
switch {
case v.ConsensusSig == "" && len(v.Groups) == 0:
states = append(states, infoState("resolver_propagation.consensus.no_data", key,
fmt.Sprintf("no resolver returned a usable answer for %s", key)))
case len(unfilteredNOERRORSigs) > 1:
states = append(states, warnState(CodePartialPropagation, key,
fmt.Sprintf("%d distinct answers seen across public resolvers for %s, change is mid-propagation",
len(unfilteredNOERRORSigs), key)))
default:
states = append(states, sdk.CheckState{
Status: sdk.StatusOK,
Code: "resolver_propagation.consensus.ok",
Subject: key,
Message: fmt.Sprintf("all %d probed resolver(s) agree on %s", len(v.Agreeing), key),
})
}
}
return states
}
// authoritativeMatchRule checks the consensus against the answer served by
// the zone's own authoritative servers.
type authoritativeMatchRule struct{}
func (r *authoritativeMatchRule) Name() string { return "resolver_propagation.matches_authoritative" }
func (r *authoritativeMatchRule) Description() string {
return "Checks that the public consensus matches the answer served by the zone's authoritative nameservers."
}
func (r *authoritativeMatchRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
anyExpected := false
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
if v.Expected == "" {
continue
}
anyExpected = true
switch {
case v.ConsensusSig == "":
states = append(states, critState("resolver_propagation.matches_authoritative.no_consensus", key,
fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known)", key)))
case !v.MatchesExpected:
states = append(states, critState(CodeAnswerDrift, key,
fmt.Sprintf("consensus of public resolvers for %s differs from the authoritative answer, wait for TTL expiry or force a flush", key)))
default:
states = append(states, sdk.CheckState{
Status: sdk.StatusOK, Code: "resolver_propagation.matches_authoritative.ok", Subject: key,
Message: fmt.Sprintf("public consensus for %s matches the authoritative answer", key),
})
}
}
if !anyExpected {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.matches_authoritative.skipped",
Message: "authoritative nameservers were unreachable; cannot compare consensus to ground truth"}}
}
return states
}
// nxdomainRule flags RRsets returning NXDOMAIN on some (but not all) resolvers.
type nxdomainRule struct{}
func (r *nxdomainRule) Name() string { return "resolver_propagation.nxdomain" }
func (r *nxdomainRule) Description() string {
return "Flags RRsets for which some resolvers return NXDOMAIN while others return NOERROR."
}
func (r *nxdomainRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
var nxList []string
for _, g := range v.Groups {
if g.Rcode == "NXDOMAIN" {
nxList = append(nxList, g.Resolvers...)
}
}
if len(nxList) > 0 && len(nxList) < len(data.Resolvers) {
states = append(states, critState(CodeUnexpectedNXDOMAIN, key,
fmt.Sprintf("%s resolved as NXDOMAIN on %d resolver(s): %s", key, len(nxList), firstN(nxList, 6))))
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.nxdomain.ok",
"No resolver unexpectedly returns NXDOMAIN.")}
}
return states
}
// servfailRule flags RRsets returning SERVFAIL on any resolver.
type servfailRule struct{}
func (r *servfailRule) Name() string { return "resolver_propagation.servfail" }
func (r *servfailRule) Description() string {
return "Flags RRsets for which any resolver returns SERVFAIL (usually DNSSEC or reachability failure)."
}
func (r *servfailRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
var sfList []string
for _, g := range v.Groups {
if g.Rcode == "SERVFAIL" {
sfList = append(sfList, g.Resolvers...)
}
}
if len(sfList) > 0 {
states = append(states, critState(CodeUnexpectedSERVFAIL, key,
fmt.Sprintf("%s returned SERVFAIL on %d resolver(s): %s", key, len(sfList), firstN(sfList, 6))))
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.servfail.ok",
"No resolver returns SERVFAIL.")}
}
return states
}
// regionalSplitRule flags regions in which all resolvers agree on an answer
// that diverges from the global consensus.
type regionalSplitRule struct{}
func (r *regionalSplitRule) Name() string { return "resolver_propagation.regional_split" }
func (r *regionalSplitRule) Description() string {
return "Flags regions in which every resolver agrees on an answer that differs from the global consensus."
}
func (r *regionalSplitRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, key := range sortedRRsetKeys(data) {
v := data.RRsets[key]
region2sig := map[string]map[string]int{}
for _, g := range v.Groups {
for _, rid := range g.Resolvers {
rv := data.Resolvers[rid]
if rv == nil || rv.Filtered {
continue
}
if region2sig[rv.Region] == nil {
region2sig[rv.Region] = map[string]int{}
}
region2sig[rv.Region][g.Signature]++
}
}
regions := make([]string, 0, len(region2sig))
for r := range region2sig {
regions = append(regions, r)
}
sort.Strings(regions)
for _, region := range regions {
sigs := region2sig[region]
if len(sigs) != 1 {
continue
}
var only string
for s := range sigs {
only = s
}
if only != "" && only != v.ConsensusSig {
states = append(states, warnState(CodeRegionalSplit, region+" "+key,
fmt.Sprintf("all %s resolvers agree on an answer that differs from the global consensus for %s",
regionLabel(region), key)))
}
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.regional_split.ok",
"No region is split from the global consensus.")}
}
return states
}
func sortedRRsetKeys(data *ResolverPropagationData) []string {
keys := make([]string, 0, len(data.RRsets))
for k := range data.RRsets {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}

139
checker/rules_resolvers.go Normal file
View file

@ -0,0 +1,139 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// resolverSelectionRule flags an empty selection (nothing to probe).
type resolverSelectionRule struct{}
func (r *resolverSelectionRule) Name() string { return "resolver_propagation.selection" }
func (r *resolverSelectionRule) Description() string {
return "Checks that the current option set selects at least one public resolver."
}
func (r *resolverSelectionRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Resolvers) == 0 {
return []sdk.CheckState{critState(CodeNoResolvers, data.Zone,
"no resolvers match the current selection (region / filtered / allowlist), loosen the region filter or reset the allowlist")}
}
return []sdk.CheckState{passState("resolver_propagation.selection.ok",
fmt.Sprintf("%d resolver(s) selected for probing", len(data.Resolvers)))}
}
// resolversReachableRule flags the "no resolver answered" case.
type resolversReachableRule struct{}
func (r *resolversReachableRule) Name() string { return "resolver_propagation.reachable" }
func (r *resolversReachableRule) Description() string {
return "Checks that at least one selected resolver answered a query (detects a checker host with no DNS connectivity)."
}
func (r *resolversReachableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Resolvers) == 0 {
return []sdk.CheckState{{Status: sdk.StatusUnknown, Code: "resolver_propagation.reachable.skipped",
Message: "no resolver in selection"}}
}
for _, rv := range data.Resolvers {
if rv.Reachable {
return []sdk.CheckState{passState("resolver_propagation.reachable.ok",
fmt.Sprintf("%d/%d resolver(s) answered at least one query",
data.Stats.ReachableResolvers, data.Stats.TotalResolvers))}
}
}
return []sdk.CheckState{critState(CodeAllResolversDown, data.Zone,
"no public resolver answered, the checker host may be offline, or DNS traffic is blocked on its network")}
}
// resolverLatencyRule flags resolvers with high average latency.
type resolverLatencyRule struct{}
func (r *resolverLatencyRule) Name() string { return "resolver_propagation.latency" }
func (r *resolverLatencyRule) Description() string {
return "Flags resolvers whose average response time exceeds the configured threshold."
}
func (r *resolverLatencyRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
threshold := int64(sdk.GetIntOption(opts, "latencyThresholdMs", 500))
var states []sdk.CheckState
for _, rv := range data.Resolvers {
if !rv.Reachable {
states = append(states, warnState(CodeResolverUnreachable, rv.ID,
fmt.Sprintf("resolver %s (%s, %s) did not answer any query", rv.Name, rv.IP, rv.Transport)))
continue
}
var total, n int64
for _, p := range rv.Probes {
if p.Error != "" {
continue
}
total += p.LatencyMs
n++
}
if n > 0 {
avg := total / n
if avg > threshold {
states = append(states, infoState(CodeResolverHighLatency, rv.ID,
fmt.Sprintf("%s answered in %d ms on average (threshold %d ms)", rv.Name, avg, threshold)))
}
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.latency.ok",
"All reachable resolvers respond within the latency threshold.")}
}
return states
}
// filteredHitRule notes when a filtered resolver returns a different answer
// than the consensus (i.e. a likely blocklist hit).
type filteredHitRule struct{}
func (r *filteredHitRule) Name() string { return "resolver_propagation.filtered_hit" }
func (r *filteredHitRule) Description() string {
return "Reports filtered resolvers returning a different answer than the consensus (typical blocklist behaviour)."
}
func (r *filteredHitRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
var states []sdk.CheckState
for _, rv := range data.Resolvers {
if !rv.Filtered {
continue
}
for key, p := range rv.Probes {
if p == nil || p.Error != "" || p.Rcode != "NOERROR" {
continue
}
rv2 := data.RRsets[key]
if rv2 == nil || rv2.ConsensusSig == "" {
continue
}
if p.Signature != rv2.ConsensusSig {
states = append(states, infoState(CodeResolverFilteredHit, rv.ID+" "+key,
fmt.Sprintf("%s (filtered) returned a different answer than the consensus for %s, likely a blocklist hit", rv.Name, key)))
}
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.filtered_hit.ok",
"No filtered resolver deviates from the consensus (no blocklist hit detected).")}
}
return states
}

144
checker/rules_soa.go Normal file
View file

@ -0,0 +1,144 @@
package checker
import (
"context"
"fmt"
"sort"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// serialDriftRule flags disagreement between resolvers on the SOA serial.
type serialDriftRule struct{}
func (r *serialDriftRule) Name() string { return "resolver_propagation.serial_drift" }
func (r *serialDriftRule) Description() string {
return "Flags disagreement on the SOA serial across unfiltered resolvers."
}
func (r *serialDriftRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
soaKey := rrsetKey(data.Zone, "SOA")
if data.RRsets[soaKey] == nil {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.serial_drift.skipped",
Message: "SOA was not probed"}}
}
serials := map[uint32][]string{}
for _, rv := range data.Resolvers {
if rv.Filtered {
continue
}
p := rv.Probes[soaKey]
if p == nil || p.Error != "" || p.Rcode != "NOERROR" {
continue
}
if s := extractSerial(p.Records); s != 0 {
serials[s] = append(serials[s], rv.ID)
}
}
if len(serials) < 2 {
return []sdk.CheckState{passState("resolver_propagation.serial_drift.ok",
"SOA serial is consistent across unfiltered resolvers.")}
}
var parts []string
for s, rs := range serials {
sort.Strings(rs)
parts = append(parts, fmt.Sprintf("serial %d on %s", s, firstN(rs, 6)))
}
sort.Strings(parts)
return []sdk.CheckState{warnState(CodeSerialDrift, soaKey,
"SOA serial differs across resolvers, "+strings.Join(parts, "; "))}
}
// staleCacheRule flags resolvers still serving a serial below the declared one.
type staleCacheRule struct{}
func (r *staleCacheRule) Name() string { return "resolver_propagation.stale_cache" }
func (r *staleCacheRule) Description() string {
return "Flags resolvers still serving an SOA serial below the one saved by happyDomain."
}
func (r *staleCacheRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if data.DeclaredSerial == 0 {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.stale_cache.skipped",
Message: "no declared SOA serial available for comparison"}}
}
soaKey := rrsetKey(data.Zone, "SOA")
if data.RRsets[soaKey] == nil {
return []sdk.CheckState{{Status: sdk.StatusUnknown,
Code: "resolver_propagation.stale_cache.skipped",
Message: "SOA was not probed"}}
}
var below []string
for _, rv := range data.Resolvers {
if rv.Filtered {
continue
}
p := rv.Probes[soaKey]
if p == nil || p.Error != "" || p.Rcode != "NOERROR" {
continue
}
s := extractSerial(p.Records)
if s != 0 && s < data.DeclaredSerial {
below = append(below, rv.ID)
}
}
if len(below) == 0 {
return []sdk.CheckState{passState("resolver_propagation.stale_cache.ok",
"No resolver is still serving an outdated SOA serial.")}
}
sort.Strings(below)
return []sdk.CheckState{infoState(CodeStaleCache, soaKey,
fmt.Sprintf("%d resolver(s) still return a serial below the declared one (%d): %s",
len(below), data.DeclaredSerial, firstN(below, 6)))}
}
// dnssecRule flags DNSSEC failures (SERVFAIL or missing AD) at the zone apex
// on resolvers known to validate.
type dnssecRule struct{}
func (r *dnssecRule) Name() string { return "resolver_propagation.dnssec" }
func (r *dnssecRule) Description() string {
return "Checks that validating resolvers successfully validate the zone's DNSSEC chain."
}
func (r *dnssecRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
soaKey := rrsetKey(data.Zone, "SOA")
var states []sdk.CheckState
for _, rv := range data.Resolvers {
if rv.Filtered || !isValidatingResolver(rv.ID) {
continue
}
soa := rv.Probes[soaKey]
if soa == nil || soa.Error != "" {
continue
}
switch soa.Rcode {
case "SERVFAIL":
states = append(states, critState(CodeDNSSECFailure, rv.ID,
fmt.Sprintf("%s returned SERVFAIL for %s, typically a broken DNSSEC chain", rv.Name, data.Zone)))
case "NOERROR":
if !soa.AD {
states = append(states, infoState(CodeDNSSECUnvalidated, rv.ID,
fmt.Sprintf("%s did not set AD=1 for %s, zone may not be DNSSEC-signed, or signature is broken", rv.Name, data.Zone)))
}
}
}
if len(states) == 0 {
return []sdk.CheckState{passState("resolver_propagation.dnssec.ok",
"Validating resolvers report no DNSSEC issue.")}
}
return states
}

348
checker/rules_test.go Normal file
View file

@ -0,0 +1,348 @@
package checker
import (
"context"
"errors"
"strings"
"testing"
sdk "git.happydns.org/checker-sdk-go/checker"
)
func TestRules_AllAreUniqueAndNamed(t *testing.T) {
seen := map[string]bool{}
for _, r := range Rules() {
if r.Name() == "" {
t.Errorf("rule with empty name: %T", r)
}
if r.Description() == "" {
t.Errorf("rule %s has empty description", r.Name())
}
if seen[r.Name()] {
t.Errorf("duplicate rule name: %s", r.Name())
}
seen[r.Name()] = true
}
if len(seen) < 10 {
t.Errorf("expected many rules, got %d", len(seen))
}
}
func TestLoadData_ObsError(t *testing.T) {
obs := &errObs{err: errors.New("boom")}
data, st := loadData(context.Background(), obs)
if data != nil {
t.Errorf("data should be nil on error")
}
if st == nil || st.Status != sdk.StatusError {
t.Errorf("want error state, got %+v", st)
}
}
// runRule is a tiny helper to evaluate a CheckRule with a payload.
func runRule(t *testing.T, r sdk.CheckRule, data *ResolverPropagationData, opts sdk.CheckerOptions) []sdk.CheckState {
t.Helper()
return r.Evaluate(context.Background(), newFakeObs(data), opts)
}
func TestResolverSelectionRule(t *testing.T) {
// Empty resolver map → crit.
st := runRule(t, &resolverSelectionRule{}, &ResolverPropagationData{Zone: "ex."}, nil)
if len(st) != 1 || st[0].Status != sdk.StatusCrit || st[0].Code != CodeNoResolvers {
t.Errorf("empty: %+v", st)
}
// Non-empty → ok.
data := &ResolverPropagationData{Resolvers: map[string]*ResolverView{"a": {ID: "a"}}}
st = runRule(t, &resolverSelectionRule{}, data, nil)
if len(st) != 1 || st[0].Status != sdk.StatusOK {
t.Errorf("ok: %+v", st)
}
}
func TestResolversReachableRule(t *testing.T) {
// No resolvers → unknown.
st := runRule(t, &resolversReachableRule{}, &ResolverPropagationData{}, nil)
if len(st) != 1 || st[0].Status != sdk.StatusUnknown {
t.Errorf("empty: %+v", st)
}
// All unreachable → crit.
data := &ResolverPropagationData{
Zone: "ex.",
Resolvers: map[string]*ResolverView{
"a": {ID: "a", Reachable: false},
},
}
st = runRule(t, &resolversReachableRule{}, data, nil)
if len(st) != 1 || st[0].Status != sdk.StatusCrit || st[0].Code != CodeAllResolversDown {
t.Errorf("all-down: %+v", st)
}
// One reachable → ok.
data.Resolvers["a"].Reachable = true
data.Stats.ReachableResolvers = 1
data.Stats.TotalResolvers = 1
st = runRule(t, &resolversReachableRule{}, data, nil)
if len(st) != 1 || st[0].Status != sdk.StatusOK {
t.Errorf("reach: %+v", st)
}
}
func TestConsensusRule_PartialPropagation(t *testing.T) {
key := "ex./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"b": mkResolver("b", "na", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "9.9.9.9")}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
}
st := runRule(t, &consensusRule{}, data, nil)
codes := statesByCode(st)
if _, ok := codes[CodePartialPropagation]; !ok {
t.Errorf("want partial propagation, got %+v", st)
}
}
func TestConsensusRule_AllAgree(t *testing.T) {
key := "ex./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"b": mkResolver("b", "na", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
}
st := runRule(t, &consensusRule{}, data, nil)
if len(st) != 1 || st[0].Status != sdk.StatusOK {
t.Errorf("want OK, got %+v", st)
}
}
func TestAuthoritativeMatchRule(t *testing.T) {
key := "ex./A"
mkData := func(expected, returned string) *ResolverPropagationData {
return &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", returned)}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A", Expected: expected}},
}
}
// Match.
st := runRule(t, &authoritativeMatchRule{}, mkData("1.1.1.1", "1.1.1.1"), nil)
if len(st) != 1 || st[0].Status != sdk.StatusOK {
t.Errorf("match: %+v", st)
}
// Drift.
st = runRule(t, &authoritativeMatchRule{}, mkData("1.1.1.1", "9.9.9.9"), nil)
if len(st) != 1 || st[0].Code != CodeAnswerDrift {
t.Errorf("drift: %+v", st)
}
// No expected anywhere → skipped.
skipped := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1")})},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}}, // no Expected
}
st = runRule(t, &authoritativeMatchRule{}, skipped, nil)
if len(st) != 1 || st[0].Status != sdk.StatusUnknown {
t.Errorf("skipped: %+v", st)
}
}
func TestNXDOMAINRule(t *testing.T) {
key := "ex./A"
// Some resolvers say NXDOMAIN, others NOERROR.
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"nx": mkResolver("nx", "eu", false, true, map[string]*RRProbe{key: mkProbe("NXDOMAIN", "")}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
}
st := runRule(t, &nxdomainRule{}, data, nil)
if _, ok := statesByCode(st)[CodeUnexpectedNXDOMAIN]; !ok {
t.Errorf("want NXDOMAIN finding, got %+v", st)
}
// All same NXDOMAIN ⇒ rule does NOT fire (it's an "unexpected" rule).
for _, rv := range data.Resolvers {
rv.Probes[key] = mkProbe("NXDOMAIN", "")
}
st = runRule(t, &nxdomainRule{}, data, nil)
if _, ok := statesByCode(st)[CodeUnexpectedNXDOMAIN]; ok {
t.Errorf("uniform NXDOMAIN should not trigger, got %+v", st)
}
}
func TestSERVFAILRule(t *testing.T) {
key := "ex./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"sf": mkResolver("sf", "eu", false, true, map[string]*RRProbe{key: mkProbe("SERVFAIL", "")}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
}
st := runRule(t, &servfailRule{}, data, nil)
if _, ok := statesByCode(st)[CodeUnexpectedSERVFAIL]; !ok {
t.Errorf("want SERVFAIL finding, got %+v", st)
}
}
func TestRegionalSplitRule(t *testing.T) {
key := "ex./A"
// EU resolvers all see "9.9.9.9", global resolvers see "1.1.1.1" (consensus).
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"g1": mkResolver("g1", "global", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"g2": mkResolver("g2", "global", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"g3": mkResolver("g3", "global", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"eu1": mkResolver("eu1", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "9.9.9.9")}),
"eu2": mkResolver("eu2", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "9.9.9.9")}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
}
st := runRule(t, &regionalSplitRule{}, data, nil)
if _, ok := statesByCode(st)[CodeRegionalSplit]; !ok {
t.Errorf("want regional split, got %+v", st)
}
}
func TestSerialDriftRule(t *testing.T) {
soaKey := rrsetKey("ex.", "SOA")
mk := func(serial string) *RRProbe {
return &RRProbe{Rcode: "NOERROR", Records: []string{"ns. hm. " + serial + " 1 2 3 4"}, Transport: TransportUDP}
}
data := &ResolverPropagationData{
Zone: "ex.",
Resolvers: map[string]*ResolverView{
"a": mkResolver("a", "eu", false, true, map[string]*RRProbe{soaKey: mk("100")}),
"b": mkResolver("b", "eu", false, true, map[string]*RRProbe{soaKey: mk("100")}),
"c": mkResolver("c", "eu", false, true, map[string]*RRProbe{soaKey: mk("99")}),
},
RRsets: map[string]*RRsetView{soaKey: {Name: "ex.", Type: "SOA"}},
}
st := runRule(t, &serialDriftRule{}, data, nil)
if len(st) != 1 || st[0].Code != CodeSerialDrift {
t.Errorf("want serial drift, got %+v", st)
}
// All same → ok.
data.Resolvers["c"].Probes[soaKey] = mk("100")
st = runRule(t, &serialDriftRule{}, data, nil)
if len(st) != 1 || st[0].Status != sdk.StatusOK {
t.Errorf("want ok, got %+v", st)
}
// SOA not probed → skipped.
delete(data.RRsets, soaKey)
st = runRule(t, &serialDriftRule{}, data, nil)
if len(st) != 1 || st[0].Status != sdk.StatusUnknown {
t.Errorf("want skipped, got %+v", st)
}
}
func TestStaleCacheRule(t *testing.T) {
soaKey := rrsetKey("ex.", "SOA")
mk := func(serial string) *RRProbe {
return &RRProbe{Rcode: "NOERROR", Records: []string{"ns. hm. " + serial + " 1 2 3 4"}, Transport: TransportUDP}
}
// No declared serial → skipped.
data := &ResolverPropagationData{Zone: "ex.", RRsets: map[string]*RRsetView{soaKey: {Type: "SOA"}}}
st := runRule(t, &staleCacheRule{}, data, nil)
if st[0].Status != sdk.StatusUnknown {
t.Errorf("no declared: %+v", st)
}
// Below declared → info.
data = &ResolverPropagationData{
Zone: "ex.",
DeclaredSerial: 100,
Resolvers: map[string]*ResolverView{
"old": mkResolver("old", "eu", false, true, map[string]*RRProbe{soaKey: mk("99")}),
"new": mkResolver("new", "eu", false, true, map[string]*RRProbe{soaKey: mk("100")}),
},
RRsets: map[string]*RRsetView{soaKey: {Type: "SOA"}},
}
st = runRule(t, &staleCacheRule{}, data, nil)
if len(st) != 1 || st[0].Code != CodeStaleCache {
t.Errorf("stale: %+v", st)
}
if !strings.Contains(st[0].Message, "old") {
t.Errorf("stale msg should name resolver: %q", st[0].Message)
}
// All up-to-date.
data.Resolvers["old"].Probes[soaKey] = mk("100")
st = runRule(t, &staleCacheRule{}, data, nil)
if st[0].Status != sdk.StatusOK {
t.Errorf("ok: %+v", st)
}
}
func TestDNSSECRule(t *testing.T) {
soaKey := rrsetKey("ex.", "SOA")
data := &ResolverPropagationData{
Zone: "ex.",
Resolvers: map[string]*ResolverView{
// validating + AD set → no finding
"cloudflare": mkResolver("cloudflare", "global", false, true, map[string]*RRProbe{soaKey: {Rcode: "NOERROR", AD: true}}),
// validating + SERVFAIL → DNSSEC failure
"google": mkResolver("google", "global", false, true, map[string]*RRProbe{soaKey: {Rcode: "SERVFAIL"}}),
// validating + NOERROR + AD=false → unvalidated info
"quad9": mkResolver("quad9", "global", false, true, map[string]*RRProbe{soaKey: {Rcode: "NOERROR", AD: false}}),
// non-validating: ignored
"opendns": mkResolver("opendns", "global", false, true, map[string]*RRProbe{soaKey: {Rcode: "SERVFAIL"}}),
},
RRsets: map[string]*RRsetView{soaKey: {Type: "SOA"}},
}
st := runRule(t, &dnssecRule{}, data, nil)
codes := statesByCode(st)
if _, ok := codes[CodeDNSSECFailure]; !ok {
t.Errorf("want DNSSEC failure, got %+v", st)
}
if _, ok := codes[CodeDNSSECUnvalidated]; !ok {
t.Errorf("want DNSSEC unvalidated, got %+v", st)
}
}
func TestResolverLatencyRule(t *testing.T) {
key := "ex./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"slow": mkResolver("slow", "eu", false, true, map[string]*RRProbe{key: {Rcode: "NOERROR", LatencyMs: 1500, Transport: TransportUDP}}),
"fast": mkResolver("fast", "eu", false, true, map[string]*RRProbe{key: {Rcode: "NOERROR", LatencyMs: 30, Transport: TransportUDP}}),
"absent": mkResolver("absent", "eu", false, false, map[string]*RRProbe{key: {Error: "timeout", Transport: TransportUDP}}),
},
}
st := runRule(t, &resolverLatencyRule{}, data, sdk.CheckerOptions{"latencyThresholdMs": 500})
codes := statesByCode(st)
if _, ok := codes[CodeResolverHighLatency]; !ok {
t.Errorf("want high latency for 'slow', got %+v", st)
}
if _, ok := codes[CodeResolverUnreachable]; !ok {
t.Errorf("want unreachable for 'absent', got %+v", st)
}
}
func TestFilteredHitRule(t *testing.T) {
key := "ex./A"
data := &ResolverPropagationData{
Resolvers: map[string]*ResolverView{
"clean1": mkResolver("clean1", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"clean2": mkResolver("clean2", "eu", false, true, map[string]*RRProbe{key: mkProbe("NOERROR", "1.1.1.1")}),
"filt": mkResolver("filt", "eu", true, true, map[string]*RRProbe{key: mkProbe("NOERROR", "0.0.0.0")}),
},
RRsets: map[string]*RRsetView{key: {Name: "ex.", Type: "A"}},
}
st := runRule(t, &filteredHitRule{}, data, nil)
if _, ok := statesByCode(st)[CodeResolverFilteredHit]; !ok {
t.Errorf("want filtered hit, got %+v", st)
}
}

View file

@ -0,0 +1,65 @@
package checker
import (
"context"
"encoding/json"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// fakeObs is a tiny ObservationGetter that returns a single payload regardless
// of the key requested. Tests use it to feed canned ResolverPropagationData
// into rules.
type fakeObs struct {
payload any
err error
}
func newFakeObs(payload any) *fakeObs { return &fakeObs{payload: payload} }
func (f *fakeObs) Get(_ context.Context, _ sdk.ObservationKey, dest any) error {
if f.err != nil {
return f.err
}
raw, err := json.Marshal(f.payload)
if err != nil {
return fmt.Errorf("fakeObs marshal: %w", err)
}
return json.Unmarshal(raw, dest)
}
func (f *fakeObs) GetRelated(_ context.Context, _ sdk.ObservationKey) ([]sdk.RelatedObservation, error) {
return nil, nil
}
// errObs always fails Get; used to verify error-path branches in rules.
type errObs struct{ err error }
func (e *errObs) Get(_ context.Context, _ sdk.ObservationKey, _ any) error { return e.err }
func (e *errObs) GetRelated(_ context.Context, _ sdk.ObservationKey) ([]sdk.RelatedObservation, error) {
return nil, nil
}
// mkProbe is a small constructor used in many tests.
func mkProbe(rcode, sig string, records ...string) *RRProbe {
return &RRProbe{Rcode: rcode, Signature: sig, Records: records, Transport: TransportUDP}
}
// mkResolver builds a ResolverView with a single probe.
func mkResolver(id, region string, filtered, reachable bool, probes map[string]*RRProbe) *ResolverView {
return &ResolverView{
ID: id, Name: id, IP: "0.0.0.0", Region: region,
Filtered: filtered, Reachable: reachable, Transport: TransportUDP,
Probes: probes,
}
}
// statesByCode reorganises a slice of CheckState by Code for easy lookup.
func statesByCode(states []sdk.CheckState) map[string][]sdk.CheckState {
out := map[string][]sdk.CheckState{}
for _, s := range states {
out[s.Code] = append(out[s.Code], s)
}
return out
}

215
checker/types.go Normal file
View file

@ -0,0 +1,215 @@
package checker
import (
"encoding/json"
"github.com/miekg/dns"
)
// ObservationKeyResolverPropagation is the observation key used to store data
// produced by this checker.
const ObservationKeyResolverPropagation = "resolver_propagation"
// Severity classifies a finding.
type Severity string
const (
SeverityInfo Severity = "info"
SeverityWarn Severity = "warn"
SeverityCrit Severity = "crit"
)
// Finding codes: stable machine-readable identifiers surfaced in the UI.
const (
// Zone-wide.
CodeNoResolvers = "rprop_no_resolvers"
CodeAllResolversDown = "rprop_all_resolvers_down"
CodeSerialDrift = "rprop_serial_drift"
CodeStaleCache = "rprop_stale_cache"
CodeDNSSECFailure = "rprop_dnssec_failure"
CodeDNSSECUnvalidated = "rprop_dnssec_not_validated"
CodeRegionalSplit = "rprop_regional_split"
CodePartialPropagation = "rprop_partial_propagation"
CodeAnswerDrift = "rprop_answer_drift"
CodeUnexpectedNXDOMAIN = "rprop_unexpected_nxdomain"
CodeUnexpectedSERVFAIL = "rprop_unexpected_servfail"
// Per-resolver.
CodeResolverUnreachable = "rprop_resolver_unreachable"
CodeResolverTimeout = "rprop_resolver_timeout"
CodeResolverRewrote = "rprop_resolver_rewrote_answer"
CodeResolverFilteredHit = "rprop_resolver_filtered_hit"
CodeResolverHighLatency = "rprop_resolver_high_latency"
)
// Transport identifies the protocol used to reach a resolver.
type Transport string
const (
TransportUDP Transport = "udp"
TransportTCP Transport = "tcp"
TransportDoT Transport = "dot"
TransportDoH Transport = "doh"
)
// Finding is a single observation produced during collection.
type Finding struct {
Code string `json:"code"`
Severity Severity `json:"severity"`
Message string `json:"message"`
// Resolver is the resolver ID when the finding is scoped to one.
Resolver string `json:"resolver,omitempty"`
// RRset is "name/TYPE" when the finding is scoped to one RR set.
RRset string `json:"rrset,omitempty"`
// Remedy is a short, user-facing sentence describing what to do.
Remedy string `json:"remedy,omitempty"`
}
// RRProbe is the observation for a single (resolver, RRset) pair.
type RRProbe struct {
// Rcode is the response rcode in text form (NOERROR / NXDOMAIN /
// SERVFAIL / REFUSED / …). Empty when the probe failed before a
// response was parsed.
Rcode string `json:"rcode,omitempty"`
// Signature is the sorted, TTL-stripped RDATA joined with "|". Two
// resolvers agree on an answer iff their signatures are equal.
Signature string `json:"signature,omitempty"`
// Records is the list of record RDATA strings as returned by the
// resolver, sorted.
Records []string `json:"records,omitempty"`
// MinTTL is the smallest TTL across the RRset. Useful to spot stale
// caches (TTL close to 0 means the resolver just refreshed).
MinTTL uint32 `json:"min_ttl,omitempty"`
// AD indicates the resolver set the AD bit on the response (DNSSEC
// validated). Only meaningful on AD-capable resolvers.
AD bool `json:"ad,omitempty"`
// LatencyMs is the observed round-trip time in milliseconds.
LatencyMs int64 `json:"latency_ms,omitempty"`
// Transport is the protocol used for this probe.
Transport Transport `json:"transport,omitempty"`
// Error describes a transport/protocol failure. Set means the probe
// did not complete and Rcode/Signature are empty.
Error string `json:"error,omitempty"`
}
// ResolverView aggregates every probe performed against a single resolver.
type ResolverView struct {
ID string `json:"id"`
Name string `json:"name"`
IP string `json:"ip"`
Region string `json:"region"`
Filtered bool `json:"filtered,omitempty"`
Transport Transport `json:"transport"`
// Reachable is true when at least one probe against this resolver
// produced a valid response (any rcode, including NXDOMAIN).
Reachable bool `json:"reachable"`
// Probes is one RRProbe per "name/TYPE" string.
Probes map[string]*RRProbe `json:"probes,omitempty"`
}
// RRsetView is the cross-resolver picture of a single (name, type): which
// signatures were seen, which resolvers returned each signature, and which
// one we pick as "consensus". The consensus is the most-returned signature
// from unfiltered, reachable resolvers.
type RRsetView struct {
Name string `json:"name"`
Type string `json:"type"`
// Expected is the signature computed from the user's declared zone. Used
// to distinguish "resolvers disagree with each other" from "resolvers
// agree but are wrong".
Expected string `json:"expected,omitempty"`
ExpectedRecords []string `json:"expected_records,omitempty"`
// Groups buckets resolvers by signature.
Groups []SignatureGroup `json:"groups,omitempty"`
// ConsensusSig is the signature returned by the majority of unfiltered
// reachable resolvers.
ConsensusSig string `json:"consensus_sig,omitempty"`
// Agreeing / Dissenting are resolver IDs relative to the consensus.
Agreeing []string `json:"agreeing,omitempty"`
Dissenting []string `json:"dissenting,omitempty"`
// MatchesExpected is true when the consensus matches the expected
// signature. When Expected is empty we skip this check.
MatchesExpected bool `json:"matches_expected"`
}
// SignatureGroup is one bucket in RRsetView: a signature + its records + the
// resolvers that returned it.
type SignatureGroup struct {
Signature string `json:"signature"`
Records []string `json:"records,omitempty"`
Resolvers []string `json:"resolvers,omitempty"`
Rcode string `json:"rcode,omitempty"`
}
// ResolverPropagationData is the top-level observation payload.
type ResolverPropagationData struct {
Zone string `json:"zone"`
// Names lists the owner names probed: apex + user-provided subdomains.
Names []string `json:"names"`
// Types lists the RR types probed (text: "A", "AAAA", "MX", …).
Types []string `json:"types"`
// Resolvers is the per-resolver view, keyed by resolver ID.
Resolvers map[string]*ResolverView `json:"resolvers,omitempty"`
// RRsets is the per-RRset cross-resolver view, keyed by "name/TYPE".
RRsets map[string]*RRsetView `json:"rrsets,omitempty"`
// DeclaredSerial is the SOA serial saved by happyDomain (when available).
DeclaredSerial uint32 `json:"declared_serial,omitempty"`
// RunDurationMs is the wall-clock duration of the probe round.
RunDurationMs int64 `json:"run_duration_ms,omitempty"`
// Stats summarizes the run.
Stats Stats `json:"stats"`
}
// Stats is a rollup of resolver health, useful for the dashboard.
type Stats struct {
TotalResolvers int `json:"total_resolvers"`
ReachableResolvers int `json:"reachable_resolvers"`
UnfilteredProbed int `json:"unfiltered_probed"`
FilteredProbed int `json:"filtered_probed"`
CountriesCovered int `json:"countries_covered"`
UnfilteredAgreeing int `json:"unfiltered_agreeing"`
}
// originService mirrors happyDomain's abstract.Origin payload (same shape as
// checker-propagation). We only need the NS list + SOA to detect "this zone
// is supposed to exist".
type originService struct {
SOA *dns.SOA `json:"soa,omitempty"`
NameServers []*dns.NS `json:"ns"`
}
// serviceMessage mirrors happyDomain's ServiceMessage envelope.
type serviceMessage struct {
Type string `json:"_svctype"`
Domain string `json:"_domain"`
Service json.RawMessage `json:"Service"`
}
// rrsetKey builds the "name/TYPE" identifier used to index RRsets.
func rrsetKey(name, typ string) string {
return dns.Fqdn(name) + "/" + typ
}

20
checker/types_test.go Normal file
View file

@ -0,0 +1,20 @@
package checker
import "testing"
func TestRRsetKey(t *testing.T) {
cases := []struct {
name, typ, want string
}{
{"example.com", "A", "example.com./A"},
{"example.com.", "A", "example.com./A"},
{"WWW.Example.Com", "AAAA", "WWW.Example.Com./AAAA"}, // case is preserved (Fqdn doesn't downcase)
{".", "SOA", "./SOA"},
{"sub.example.com", "MX", "sub.example.com./MX"},
}
for _, c := range cases {
if got := rrsetKey(c.name, c.typ); got != c.want {
t.Errorf("rrsetKey(%q,%q) = %q, want %q", c.name, c.typ, got, c.want)
}
}
}

16
go.mod Normal file
View file

@ -0,0 +1,16 @@
module git.happydns.org/checker-resolver-propagation
go 1.25.0
require (
git.happydns.org/checker-sdk-go v1.5.0
github.com/miekg/dns v1.1.72
)
require (
golang.org/x/mod v0.31.0 // indirect
golang.org/x/net v0.48.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.39.0 // indirect
golang.org/x/tools v0.40.0 // indirect
)

16
go.sum Normal file
View file

@ -0,0 +1,16 @@
git.happydns.org/checker-sdk-go v1.5.0 h1:5uD5Cm6xJ+lwnhbJ09iCXGHbYS9zRh+Yh0NeBHkAPBY=
git.happydns.org/checker-sdk-go v1.5.0/go.mod h1:aNAcfYFfbhvH9kJhE0Njp5GX0dQbxdRB0rJ0KvSC5nI=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI=
github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs=
golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=

28
main.go Normal file
View file

@ -0,0 +1,28 @@
package main
import (
"flag"
"log"
resolverpropagation "git.happydns.org/checker-resolver-propagation/checker"
"git.happydns.org/checker-sdk-go/checker/server"
)
var listenAddr = flag.String("listen", ":8080", "HTTP listen address")
// Version is the standalone binary's version. It defaults to "custom-build"
// and is meant to be overridden by the CI at link time:
//
// go build -ldflags "-X main.Version=1.2.3" .
var Version = "custom-build"
func main() {
flag.Parse()
resolverpropagation.Version = Version
srv := server.New(resolverpropagation.Provider())
if err := srv.ListenAndServe(*listenAddr); err != nil {
log.Fatalf("server error: %v", err)
}
}

18
plugin/plugin.go Normal file
View file

@ -0,0 +1,18 @@
// Command plugin is the happyDomain plugin entrypoint for the
// resolver-propagation checker.
package main
import (
resolverpropagation "git.happydns.org/checker-resolver-propagation/checker"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Version is the plugin's version.
var Version = "custom-build"
// NewCheckerPlugin is the symbol resolved by happyDomain when loading the .so.
func NewCheckerPlugin() (*sdk.CheckerDefinition, sdk.ObservationProvider, error) {
resolverpropagation.Version = Version
prvd := resolverpropagation.Provider()
return prvd.(sdk.CheckerDefinitionProvider).Definition(), prvd, nil
}