diff --git a/.drone-manifest.yml b/.drone-manifest.yml new file mode 100644 index 0000000..3fd8f9c --- /dev/null +++ b/.drone-manifest.yml @@ -0,0 +1,22 @@ +image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}latest{{/if}} +{{#if build.tags}} +tags: +{{#each build.tags}} + - {{this}} +{{/each}} +{{/if}} +manifests: + - image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-amd64 + platform: + architecture: amd64 + os: linux + - image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm64 + platform: + architecture: arm64 + os: linux + variant: v8 + - image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm + platform: + architecture: arm + os: linux + variant: v7 diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..c6dcce9 --- /dev/null +++ b/.drone.yml @@ -0,0 +1,187 @@ +--- +kind: pipeline +type: docker +name: build-amd64 + +platform: + os: linux + arch: amd64 + +steps: + - name: checker build + image: golang:1-alpine + commands: + - apk add --no-cache git make + - make + environment: + CHECKER_VERSION: "${DRONE_BRANCH}-${DRONE_COMMIT}" + CGO_ENABLED: 0 + when: + event: + exclude: + - tag + + - name: checker build tag + image: golang:1-alpine + commands: + - apk add --no-cache git make + - make + environment: + CHECKER_VERSION: "${DRONE_SEMVER}" + CGO_ENABLED: 0 + when: + event: + - tag + + - name: publish on Docker Hub + image: plugins/docker + settings: + repo: happydomain/checker-resolver-propagation + auto_tag: true + auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} + dockerfile: Dockerfile + build_args: + - CHECKER_VERSION=${DRONE_BRANCH}-${DRONE_COMMIT} + username: + from_secret: docker_username + password: + from_secret: docker_password + when: + event: + exclude: + - tag + + - name: publish on Docker Hub (tag) + image: plugins/docker + settings: + repo: happydomain/checker-resolver-propagation + auto_tag: true + auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} + dockerfile: Dockerfile + build_args: + - CHECKER_VERSION=${DRONE_SEMVER} + username: + from_secret: docker_username + password: + from_secret: docker_password + when: + event: + - tag + +trigger: + branch: + exclude: + - renovate/* + event: + - cron + - push + - tag + +--- +kind: pipeline +type: docker +name: build-arm64 + +platform: + os: linux + arch: arm64 + +steps: + - name: checker build + image: golang:1-alpine + commands: + - apk add --no-cache git make + - make + environment: + CHECKER_VERSION: "${DRONE_BRANCH}-${DRONE_COMMIT}" + CGO_ENABLED: 0 + when: + event: + exclude: + - tag + + - name: checker build tag + image: golang:1-alpine + commands: + - apk add --no-cache git make + - make + environment: + CHECKER_VERSION: "${DRONE_SEMVER}" + CGO_ENABLED: 0 + when: + event: + - tag + + - name: publish on Docker Hub + image: plugins/docker + settings: + repo: happydomain/checker-resolver-propagation + auto_tag: true + auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} + dockerfile: Dockerfile + build_args: + - CHECKER_VERSION=${DRONE_BRANCH}-${DRONE_COMMIT} + username: + from_secret: docker_username + password: + from_secret: docker_password + when: + event: + exclude: + - tag + + - name: publish on Docker Hub (tag) + image: plugins/docker + settings: + repo: happydomain/checker-resolver-propagation + auto_tag: true + auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} + dockerfile: Dockerfile + build_args: + - CHECKER_VERSION=${DRONE_SEMVER} + username: + from_secret: docker_username + password: + from_secret: docker_password + when: + event: + - tag + +trigger: + event: + - cron + - push + - tag + +--- +kind: pipeline +name: docker-manifest + +platform: + os: linux + arch: arm64 + +steps: + - name: publish on Docker Hub + image: plugins/manifest + settings: + auto_tag: true + ignore_missing: true + spec: .drone-manifest.yml + username: + from_secret: docker_username + password: + from_secret: docker_password + +trigger: + branch: + exclude: + - renovate/* + event: + - cron + - push + - tag + +depends_on: + - build-amd64 + - build-arm64 diff --git a/README.md b/README.md index 66f6de4..6078f6f 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ apex / NS configuration). | Id | Type | Default | Description | |-----------------------|--------|-------------------------------|------------------------------------------------------------------------------------------------------------------------| -| `recordTypes` | string | `SOA,NS,A,AAAA,MX,TXT,CAA` | Comma-separated list of RR types to probe at the apex (and at each `subdomains` entry). | +| `recordTypes` | string | _derived from zone_ | Comma-separated list of RR types to probe at every owner. Leave empty to derive the list from the working zone (SOA/NS at the apex plus whatever RR types are defined on each owner). | | `subdomains` | string | `www` | Comma-separated list of owner names to probe in addition to the apex (e.g. `www,mail,@`). Empty = apex only. | | `includeFiltered` | bool | `false` | Probe filtering resolvers (malware/family/adblock). Their answers routinely diverge by design. | | `region` | string | `all` | Restrict to a region: `all`, `global`, `na`, `eu`, `asia`, `ru`, `me`. | @@ -82,24 +82,20 @@ apex / NS configuration). ## Rules -Each rule emits a finding code. Severity can be affected by the options above. - -| Code | Default severity | Condition | -|-------------------------------|------------------|-----------| -| `rprop_no_resolvers` | critical | The current option set selects no resolver from the catalog. | -| `rprop_all_resolvers_down` | critical | Every selected resolver failed to answer (likely no DNS connectivity from the checker host). | -| `rprop_resolver_unreachable` | warning | An individual resolver failed to answer within the run budget. | -| `rprop_resolver_high_latency` | info | A resolver's average response time exceeds `latencyThresholdMs`. | -| `rprop_resolver_filtered_hit` | info | A filtered resolver returned a different answer than the consensus (typical blocklist behaviour). Only when `includeFiltered` is enabled. | -| `rprop_partial_propagation` | warning | Public resolvers disagree on the answer for a probed RRset. | -| `rprop_answer_drift` | critical | The public consensus differs from the answer served by the zone's authoritative nameservers. | -| `rprop_unexpected_nxdomain` | critical | Some resolvers return NXDOMAIN while others return NOERROR for the same RRset. | -| `rprop_unexpected_servfail` | critical | A resolver returns SERVFAIL (usually a DNSSEC or reachability failure). | -| `rprop_regional_split` | warning | Every resolver of a region agrees on an answer that differs from the global consensus. | -| `rprop_serial_drift` | warning | Unfiltered resolvers disagree on the SOA serial. | -| `rprop_stale_cache` | info | A resolver still serves an SOA serial below the one last observed by happyDomain. | -| `rprop_dnssec_failure` | critical | A validating resolver fails to validate the zone's DNSSEC chain (returns SERVFAIL with AD/CD semantics). | -| `rprop_dnssec_not_validated` | info | A validating resolver answered without setting AD on a signed zone. | +| Code | Description | Severity | +|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------|---------------------| +| `resolver_propagation.selection` | Checks that the current option set selects at least one public resolver. | CRITICAL | +| `resolver_propagation.reachable` | Checks that at least one selected resolver answered a query. | CRITICAL | +| `resolver_propagation.latency` | Flags resolvers that are unreachable or whose average response time exceeds the configured threshold. | WARNING | +| `resolver_propagation.filtered_hit` | Reports filtered resolvers returning a different answer than the consensus (typical blocklist behaviour). | INFO | +| `resolver_propagation.consensus` | Checks that public resolvers agree on a single answer for each probed RRset. | WARNING | +| `resolver_propagation.matches_authoritative` | Checks that the public consensus matches the answer served by the zone's authoritative nameservers. | CRITICAL | +| `resolver_propagation.nxdomain` | Flags RRsets for which some resolvers return NXDOMAIN while others return NOERROR. | CRITICAL | +| `resolver_propagation.servfail` | Flags RRsets for which any resolver returns SERVFAIL (usually DNSSEC or reachability failure). | CRITICAL | +| `resolver_propagation.regional_split` | Flags regions in which every resolver agrees on an answer that differs from the global consensus. | WARNING | +| `resolver_propagation.serial_drift` | Flags disagreement on the SOA serial across unfiltered resolvers. | WARNING | +| `resolver_propagation.stale_cache` | Flags resolvers still serving an SOA serial below the one saved by happyDomain. | INFO | +| `resolver_propagation.dnssec` | Checks that validating resolvers successfully validate the zone's DNSSEC chain. | CRITICAL | ## License diff --git a/checker/collect.go b/checker/collect.go index defe6dd..e635696 100644 --- a/checker/collect.go +++ b/checker/collect.go @@ -34,7 +34,7 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec includeFiltered := sdk.GetBoolOption(opts, "includeFiltered", false) region := getStringOpt(opts, "region", "all") transportsOpt := getStringOpt(opts, "transports", "udp") - recordTypesOpt := getStringOpt(opts, "recordTypes", "SOA,NS,A,AAAA,MX,TXT,CAA") + recordTypesOpt := getStringOpt(opts, "recordTypes", "") subdomainsOpt := getStringOpt(opts, "subdomains", "") runTimeoutS := sdk.GetIntOption(opts, "runTimeoutSeconds", 30) allowlistOpt := getStringOpt(opts, "resolverAllowlist", "") @@ -44,15 +44,12 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec if len(transports) == 0 { transports = []string{string(TransportUDP)} } - qtypes := parseQTypes(recordTypesOpt) - if len(qtypes) == 0 { - return nil, fmt.Errorf("no valid record types in %q", recordTypesOpt) - } extraNames := parseCSV(subdomainsOpt) allowlist := parseCSV(allowlistOpt) // Build the list of owner names to probe. - names := []string{dns.Fqdn(zone)} + apex := dns.Fqdn(zone) + names := []string{apex} seenName := map[string]bool{names[0]: true} for _, sd := range extraNames { full := joinSubdomain(sd, zone) @@ -62,12 +59,20 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec } } + // Pick the RR types to probe at each owner. An explicit recordTypes + // option overrides everything; otherwise we derive the per-owner type + // set from the working zone (when the host auto-filled it). + ownerQTypes, typeUnion, err := resolveQTypes(opts, recordTypesOpt, apex, names) + if err != nil { + return nil, err + } + resolvers := selectedResolvers(includeFiltered, region, allowlist) data := &ResolverPropagationData{ Zone: dns.Fqdn(zone), Names: names, - Types: qtypeNames(qtypes), + Types: qtypeNames(typeUnion), Resolvers: map[string]*ResolverView{}, RRsets: map[string]*RRsetView{}, } @@ -88,10 +93,10 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec started := time.Now() // Ground truth from the zone's own authoritative servers. - expected := collectExpected(runCtx, zone, svc, names, qtypes) + expected := collectExpectedPerOwner(runCtx, zone, svc, ownerQTypes) for _, n := range names { - for _, qt := range qtypes { + for _, qt := range ownerQTypes[n] { key := rrsetKey(n, dns.TypeToString[qt]) v := &RRsetView{ Name: strings.ToLower(dns.Fqdn(n)), @@ -158,7 +163,7 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec } for _, n := range names { - for _, qt := range qtypes { + for _, qt := range ownerQTypes[n] { probe := runProbe(runCtx, job.r, job.tr, n, qt) key := rrsetKey(n, dns.TypeToString[qt]) view.Probes[key] = probe @@ -207,7 +212,7 @@ type expectedEntry struct { records []string } -func collectExpected(ctx context.Context, zone string, svc *originService, names []string, qtypes []uint16) map[string]*expectedEntry { +func collectExpectedPerOwner(ctx context.Context, zone string, svc *originService, ownerQTypes map[string][]uint16) map[string]*expectedEntry { out := map[string]*expectedEntry{} var nsHosts []string @@ -244,7 +249,7 @@ func collectExpected(ctx context.Context, zone string, svc *originService, names return out } - for _, n := range names { + for n, qtypes := range ownerQTypes { for _, qt := range qtypes { key := rrsetKey(n, dns.TypeToString[qt]) if e := queryAuthoritative(ctx, authAddrs, n, qt); e != nil { diff --git a/checker/definition.go b/checker/definition.go index 99376a0..dac7fb4 100644 --- a/checker/definition.go +++ b/checker/definition.go @@ -32,8 +32,7 @@ func (p *resolverPropagationProvider) Definition() *sdk.CheckerDefinition { Id: "recordTypes", Type: "string", Label: "Record types to probe", - Description: "Comma-separated list of RR types. The checker probes every listed type at the zone apex (and at each 'subdomains' entry).", - Default: "SOA,NS,A,AAAA,MX,TXT,CAA", + Description: "Comma-separated list of RR types to probe at every owner (apex + each 'subdomains' entry). Leave empty to derive the list from the working zone (SOA/NS at the apex plus whatever RR types are actually defined on each owner).", }, { Id: "subdomains", @@ -92,6 +91,12 @@ func (p *resolverPropagationProvider) Definition() *sdk.CheckerDefinition { Label: "Zone name", AutoFill: sdk.AutoFillDomainName, }, + { + Id: "zone", + Label: "Zone", + AutoFill: sdk.AutoFillZone, + Hide: true, + }, }, ServiceOpts: []sdk.CheckerOptionDocumentation{ { diff --git a/checker/report.go b/checker/report.go index ed7d851..572d952 100644 --- a/checker/report.go +++ b/checker/report.go @@ -116,7 +116,7 @@ type regionRow struct { Reachable int Agreeing int Disagreeing int - Errored int + Unreachable int } type resolverRow struct { @@ -283,28 +283,32 @@ func buildReportView(d *ResolverPropagationData, findings []Finding) *reportView r.Resolvers++ if rv.Reachable { r.Reachable++ + } else { + r.Unreachable++ } if rv.Reachable && !rv.Filtered { - ok := true + unreachableProbe := false + disagrees := false for key, p := range rv.Probes { if p == nil || p.Error != "" { - r.Errored++ - ok = false - break + unreachableProbe = true + continue } cv := d.RRsets[key] if cv == nil || cv.ConsensusSig == "" { continue } if p.Signature != cv.ConsensusSig { - ok = false - break + disagrees = true } } - if ok { - r.Agreeing++ - } else { + switch { + case disagrees: r.Disagreeing++ + case unreachableProbe: + r.Unreachable++ + default: + r.Agreeing++ } } } @@ -664,7 +668,7 @@ const reportTemplateHTML = `
| Region | Reachable | Agreeing | Disagreeing | Errored |
|---|---|---|---|---|
| Region | Reachable | Agreeing | Disagreeing | Unreachable |
| {{.Reachable}} / {{.Resolvers}} | {{.Agreeing}} | {{if .Disagreeing}}{{.Disagreeing}}{{else}}0{{end}} | -{{if .Errored}}{{.Errored}}{{else}}0{{end}} | +{{if .Unreachable}}{{.Unreachable}}{{else}}0{{end}} | {{.Region}} | {{.Transport}} | -{{if .Reachable}}{{.AvgMs}}{{else}}unreachable{{end}} | +{{if .Reachable}}{{.AvgMs}}{{else}}unreachable{{end}} |
{{range .Probes}}
diff --git a/checker/rules.go b/checker/rules.go
index 9ef20bf..6c7d822 100644
--- a/checker/rules.go
+++ b/checker/rules.go
@@ -57,3 +57,7 @@ func warnState(code, subject, message string) sdk.CheckState {
func critState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusCrit, Message: message, Code: code, Subject: subject}
}
+
+func unknownState(code, subject, message string) sdk.CheckState {
+ return sdk.CheckState{Status: sdk.StatusUnknown, Message: message, Code: code, Subject: subject}
+}
diff --git a/checker/rules_consensus.go b/checker/rules_consensus.go
index eb45834..3f7ff96 100644
--- a/checker/rules_consensus.go
+++ b/checker/rules_consensus.go
@@ -91,8 +91,8 @@ func (r *authoritativeMatchRule) Evaluate(ctx context.Context, obs sdk.Observati
anyExpected = true
switch {
case v.ConsensusSig == "":
- states = append(states, critState("resolver_propagation.matches_authoritative.no_consensus", key,
- fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known)", key)))
+ states = append(states, unknownState("resolver_propagation.matches_authoritative.no_consensus", key,
+ fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known); resolvers unreachable from the checker", key)))
case !v.MatchesExpected:
states = append(states, critState(CodeAnswerDrift, key,
fmt.Sprintf("consensus of public resolvers for %s differs from the authoritative answer, wait for TTL expiry or force a flush", key)))
diff --git a/checker/rules_resolvers.go b/checker/rules_resolvers.go
index 8fa8b17..995140d 100644
--- a/checker/rules_resolvers.go
+++ b/checker/rules_resolvers.go
@@ -71,7 +71,7 @@ func (r *resolverLatencyRule) Evaluate(ctx context.Context, obs sdk.ObservationG
var states []sdk.CheckState
for _, rv := range data.Resolvers {
if !rv.Reachable {
- states = append(states, warnState(CodeResolverUnreachable, rv.ID,
+ states = append(states, unknownState(CodeResolverUnreachable, rv.ID,
fmt.Sprintf("resolver %s (%s, %s) did not answer any query", rv.Name, rv.IP, rv.Transport)))
continue
}
diff --git a/checker/rules_test.go b/checker/rules_test.go
index e14d999..1007fc7 100644
--- a/checker/rules_test.go
+++ b/checker/rules_test.go
@@ -326,8 +326,11 @@ func TestResolverLatencyRule(t *testing.T) {
if _, ok := codes[CodeResolverHighLatency]; !ok {
t.Errorf("want high latency for 'slow', got %+v", st)
}
- if _, ok := codes[CodeResolverUnreachable]; !ok {
+ unreach, ok := codes[CodeResolverUnreachable]
+ if !ok {
t.Errorf("want unreachable for 'absent', got %+v", st)
+ } else if unreach[0].Status != sdk.StatusUnknown {
+ t.Errorf("unreachable should be unknown (not a warning), got status %v", unreach[0].Status)
}
}
diff --git a/checker/zone.go b/checker/zone.go
new file mode 100644
index 0000000..1e3e853
--- /dev/null
+++ b/checker/zone.go
@@ -0,0 +1,188 @@
+package checker
+
+import (
+ "encoding/json"
+ "sort"
+ "strings"
+
+ "github.com/miekg/dns"
+
+ sdk "git.happydns.org/checker-sdk-go/checker"
+)
+
+// rawZone is the minimal slice of happyDomain's Zone JSON we consume to
+// derive the RR types actually present at each owner. It mirrors the
+// shape used by sibling checkers (see checker-legacy-records).
+type rawZone struct {
+ DomainName string `json:"domain_name,omitempty"`
+ Services map[string][]rawService `json:"services"`
+}
+
+type rawService struct {
+ Type string `json:"_svctype"`
+ Domain string `json:"_domain"`
+ Service json.RawMessage `json:"Service"`
+}
+
+// fallbackQTypes is the legacy default applied when no zone is available
+// and the user did not set recordTypes explicitly.
+var fallbackQTypes = []uint16{
+ dns.TypeSOA, dns.TypeNS, dns.TypeA, dns.TypeAAAA,
+ dns.TypeMX, dns.TypeTXT, dns.TypeCAA,
+}
+
+// resolveQTypes returns the RR types to probe at each owner name plus the
+// union across all owners (for reporting/metrics).
+//
+// Precedence:
+// 1. Explicit "recordTypes" option → apply that list to every owner.
+// 2. Auto-filled "zone" option → derive per-owner types from the zone's
+// services. The apex always carries SOA+NS even if the zone payload
+// omits them. Owners with no derivable types fall back to A,AAAA so
+// the probe still surfaces NXDOMAIN drift for user-requested
+// subdomains that are not present in the zone.
+// 3. Neither → use the legacy default at every owner.
+func resolveQTypes(opts sdk.CheckerOptions, recordTypesOpt, apex string, names []string) (map[string][]uint16, []uint16, error) {
+ if recordTypesOpt != "" {
+ qts := parseQTypes(recordTypesOpt)
+ if len(qts) == 0 {
+ return nil, nil, &invalidTypesError{raw: recordTypesOpt}
+ }
+ return uniformOwnerQTypes(names, qts), qts, nil
+ }
+
+ zone, _ := readWorkingZone(opts)
+ if zone == nil {
+ return uniformOwnerQTypes(names, fallbackQTypes), append([]uint16(nil), fallbackQTypes...), nil
+ }
+
+ owner := map[string]map[uint16]bool{}
+ for _, n := range names {
+ owner[n] = map[uint16]bool{}
+ }
+
+ for sub, services := range zone.Services {
+ full := joinSubdomain(sub, apex)
+ set, ok := owner[full]
+ if !ok {
+ continue
+ }
+ for _, svc := range services {
+ for _, qt := range typesFromService(svc) {
+ set[qt] = true
+ }
+ }
+ }
+
+ // SOA + NS at apex are foundational; the rules depend on them.
+ apexLower := strings.ToLower(dns.Fqdn(apex))
+ if set, ok := owner[apexLower]; ok {
+ set[dns.TypeSOA] = true
+ set[dns.TypeNS] = true
+ }
+
+ out := make(map[string][]uint16, len(names))
+ unionSet := map[uint16]bool{}
+ for _, n := range names {
+ set := owner[n]
+ if len(set) == 0 {
+ // Owner present in the probe list but unknown to the zone:
+ // keep a minimal probe so a missing-record finding can fire.
+ set = map[uint16]bool{dns.TypeA: true, dns.TypeAAAA: true}
+ }
+ qts := sortedTypes(set)
+ out[n] = qts
+ for _, qt := range qts {
+ unionSet[qt] = true
+ }
+ }
+ return out, sortedTypes(unionSet), nil
+}
+
+func uniformOwnerQTypes(names []string, qts []uint16) map[string][]uint16 {
+ out := make(map[string][]uint16, len(names))
+ for _, n := range names {
+ out[n] = qts
+ }
+ return out
+}
+
+func sortedTypes(set map[uint16]bool) []uint16 {
+ out := make([]uint16, 0, len(set))
+ for q := range set {
+ out = append(out, q)
+ }
+ sort.Slice(out, func(i, j int) bool { return out[i] < out[j] })
+ return out
+}
+
+// readWorkingZone parses the "zone" auto-fill option. The host may pass
+// the value either as a native struct (in-process plugin) or as a JSON
+// object (HTTP path); we round-trip through JSON in both cases for a
+// single decoding path. A missing zone is not an error — standalone /
+// HTTP callers may simply not provide one.
+func readWorkingZone(opts sdk.CheckerOptions) (*rawZone, error) {
+ v, ok := opts["zone"]
+ if !ok || v == nil {
+ return nil, nil
+ }
+ raw, err := json.Marshal(v)
+ if err != nil {
+ return nil, err
+ }
+ z := &rawZone{}
+ if err := json.Unmarshal(raw, z); err != nil {
+ return nil, err
+ }
+ return z, nil
+}
+
+// typesFromService extracts every RR type referenced by a service body.
+// happyDomain service envelopes are opaque to us (the registry is in the
+// host), so we scan the JSON for any nested "Rrtype": |