diff --git a/.drone-manifest.yml b/.drone-manifest.yml deleted file mode 100644 index 3fd8f9c..0000000 --- a/.drone-manifest.yml +++ /dev/null @@ -1,22 +0,0 @@ -image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}latest{{/if}} -{{#if build.tags}} -tags: -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-amd64 - platform: - architecture: amd64 - os: linux - - image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 - - image: happydomain/checker-resolver-propagation:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm - platform: - architecture: arm - os: linux - variant: v7 diff --git a/.drone.yml b/.drone.yml deleted file mode 100644 index c6dcce9..0000000 --- a/.drone.yml +++ /dev/null @@ -1,187 +0,0 @@ ---- -kind: pipeline -type: docker -name: build-amd64 - -platform: - os: linux - arch: amd64 - -steps: - - name: checker build - image: golang:1-alpine - commands: - - apk add --no-cache git make - - make - environment: - CHECKER_VERSION: "${DRONE_BRANCH}-${DRONE_COMMIT}" - CGO_ENABLED: 0 - when: - event: - exclude: - - tag - - - name: checker build tag - image: golang:1-alpine - commands: - - apk add --no-cache git make - - make - environment: - CHECKER_VERSION: "${DRONE_SEMVER}" - CGO_ENABLED: 0 - when: - event: - - tag - - - name: publish on Docker Hub - image: plugins/docker - settings: - repo: happydomain/checker-resolver-propagation - auto_tag: true - auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} - dockerfile: Dockerfile - build_args: - - CHECKER_VERSION=${DRONE_BRANCH}-${DRONE_COMMIT} - username: - from_secret: docker_username - password: - from_secret: docker_password - when: - event: - exclude: - - tag - - - name: publish on Docker Hub (tag) - image: plugins/docker - settings: - repo: happydomain/checker-resolver-propagation - auto_tag: true - auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} - dockerfile: Dockerfile - build_args: - - CHECKER_VERSION=${DRONE_SEMVER} - username: - from_secret: docker_username - password: - from_secret: docker_password - when: - event: - - tag - -trigger: - branch: - exclude: - - renovate/* - event: - - cron - - push - - tag - ---- -kind: pipeline -type: docker -name: build-arm64 - -platform: - os: linux - arch: arm64 - -steps: - - name: checker build - image: golang:1-alpine - commands: - - apk add --no-cache git make - - make - environment: - CHECKER_VERSION: "${DRONE_BRANCH}-${DRONE_COMMIT}" - CGO_ENABLED: 0 - when: - event: - exclude: - - tag - - - name: checker build tag - image: golang:1-alpine - commands: - - apk add --no-cache git make - - make - environment: - CHECKER_VERSION: "${DRONE_SEMVER}" - CGO_ENABLED: 0 - when: - event: - - tag - - - name: publish on Docker Hub - image: plugins/docker - settings: - repo: happydomain/checker-resolver-propagation - auto_tag: true - auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} - dockerfile: Dockerfile - build_args: - - CHECKER_VERSION=${DRONE_BRANCH}-${DRONE_COMMIT} - username: - from_secret: docker_username - password: - from_secret: docker_password - when: - event: - exclude: - - tag - - - name: publish on Docker Hub (tag) - image: plugins/docker - settings: - repo: happydomain/checker-resolver-propagation - auto_tag: true - auto_tag_suffix: ${DRONE_STAGE_OS}-${DRONE_STAGE_ARCH} - dockerfile: Dockerfile - build_args: - - CHECKER_VERSION=${DRONE_SEMVER} - username: - from_secret: docker_username - password: - from_secret: docker_password - when: - event: - - tag - -trigger: - event: - - cron - - push - - tag - ---- -kind: pipeline -name: docker-manifest - -platform: - os: linux - arch: arm64 - -steps: - - name: publish on Docker Hub - image: plugins/manifest - settings: - auto_tag: true - ignore_missing: true - spec: .drone-manifest.yml - username: - from_secret: docker_username - password: - from_secret: docker_password - -trigger: - branch: - exclude: - - renovate/* - event: - - cron - - push - - tag - -depends_on: - - build-amd64 - - build-arm64 diff --git a/README.md b/README.md index 6078f6f..66f6de4 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ apex / NS configuration). | Id | Type | Default | Description | |-----------------------|--------|-------------------------------|------------------------------------------------------------------------------------------------------------------------| -| `recordTypes` | string | _derived from zone_ | Comma-separated list of RR types to probe at every owner. Leave empty to derive the list from the working zone (SOA/NS at the apex plus whatever RR types are defined on each owner). | +| `recordTypes` | string | `SOA,NS,A,AAAA,MX,TXT,CAA` | Comma-separated list of RR types to probe at the apex (and at each `subdomains` entry). | | `subdomains` | string | `www` | Comma-separated list of owner names to probe in addition to the apex (e.g. `www,mail,@`). Empty = apex only. | | `includeFiltered` | bool | `false` | Probe filtering resolvers (malware/family/adblock). Their answers routinely diverge by design. | | `region` | string | `all` | Restrict to a region: `all`, `global`, `na`, `eu`, `asia`, `ru`, `me`. | @@ -82,20 +82,24 @@ apex / NS configuration). ## Rules -| Code | Description | Severity | -|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------|---------------------| -| `resolver_propagation.selection` | Checks that the current option set selects at least one public resolver. | CRITICAL | -| `resolver_propagation.reachable` | Checks that at least one selected resolver answered a query. | CRITICAL | -| `resolver_propagation.latency` | Flags resolvers that are unreachable or whose average response time exceeds the configured threshold. | WARNING | -| `resolver_propagation.filtered_hit` | Reports filtered resolvers returning a different answer than the consensus (typical blocklist behaviour). | INFO | -| `resolver_propagation.consensus` | Checks that public resolvers agree on a single answer for each probed RRset. | WARNING | -| `resolver_propagation.matches_authoritative` | Checks that the public consensus matches the answer served by the zone's authoritative nameservers. | CRITICAL | -| `resolver_propagation.nxdomain` | Flags RRsets for which some resolvers return NXDOMAIN while others return NOERROR. | CRITICAL | -| `resolver_propagation.servfail` | Flags RRsets for which any resolver returns SERVFAIL (usually DNSSEC or reachability failure). | CRITICAL | -| `resolver_propagation.regional_split` | Flags regions in which every resolver agrees on an answer that differs from the global consensus. | WARNING | -| `resolver_propagation.serial_drift` | Flags disagreement on the SOA serial across unfiltered resolvers. | WARNING | -| `resolver_propagation.stale_cache` | Flags resolvers still serving an SOA serial below the one saved by happyDomain. | INFO | -| `resolver_propagation.dnssec` | Checks that validating resolvers successfully validate the zone's DNSSEC chain. | CRITICAL | +Each rule emits a finding code. Severity can be affected by the options above. + +| Code | Default severity | Condition | +|-------------------------------|------------------|-----------| +| `rprop_no_resolvers` | critical | The current option set selects no resolver from the catalog. | +| `rprop_all_resolvers_down` | critical | Every selected resolver failed to answer (likely no DNS connectivity from the checker host). | +| `rprop_resolver_unreachable` | warning | An individual resolver failed to answer within the run budget. | +| `rprop_resolver_high_latency` | info | A resolver's average response time exceeds `latencyThresholdMs`. | +| `rprop_resolver_filtered_hit` | info | A filtered resolver returned a different answer than the consensus (typical blocklist behaviour). Only when `includeFiltered` is enabled. | +| `rprop_partial_propagation` | warning | Public resolvers disagree on the answer for a probed RRset. | +| `rprop_answer_drift` | critical | The public consensus differs from the answer served by the zone's authoritative nameservers. | +| `rprop_unexpected_nxdomain` | critical | Some resolvers return NXDOMAIN while others return NOERROR for the same RRset. | +| `rprop_unexpected_servfail` | critical | A resolver returns SERVFAIL (usually a DNSSEC or reachability failure). | +| `rprop_regional_split` | warning | Every resolver of a region agrees on an answer that differs from the global consensus. | +| `rprop_serial_drift` | warning | Unfiltered resolvers disagree on the SOA serial. | +| `rprop_stale_cache` | info | A resolver still serves an SOA serial below the one last observed by happyDomain. | +| `rprop_dnssec_failure` | critical | A validating resolver fails to validate the zone's DNSSEC chain (returns SERVFAIL with AD/CD semantics). | +| `rprop_dnssec_not_validated` | info | A validating resolver answered without setting AD on a signed zone. | ## License diff --git a/checker/collect.go b/checker/collect.go index e635696..defe6dd 100644 --- a/checker/collect.go +++ b/checker/collect.go @@ -34,7 +34,7 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec includeFiltered := sdk.GetBoolOption(opts, "includeFiltered", false) region := getStringOpt(opts, "region", "all") transportsOpt := getStringOpt(opts, "transports", "udp") - recordTypesOpt := getStringOpt(opts, "recordTypes", "") + recordTypesOpt := getStringOpt(opts, "recordTypes", "SOA,NS,A,AAAA,MX,TXT,CAA") subdomainsOpt := getStringOpt(opts, "subdomains", "") runTimeoutS := sdk.GetIntOption(opts, "runTimeoutSeconds", 30) allowlistOpt := getStringOpt(opts, "resolverAllowlist", "") @@ -44,12 +44,15 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec if len(transports) == 0 { transports = []string{string(TransportUDP)} } + qtypes := parseQTypes(recordTypesOpt) + if len(qtypes) == 0 { + return nil, fmt.Errorf("no valid record types in %q", recordTypesOpt) + } extraNames := parseCSV(subdomainsOpt) allowlist := parseCSV(allowlistOpt) // Build the list of owner names to probe. - apex := dns.Fqdn(zone) - names := []string{apex} + names := []string{dns.Fqdn(zone)} seenName := map[string]bool{names[0]: true} for _, sd := range extraNames { full := joinSubdomain(sd, zone) @@ -59,20 +62,12 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec } } - // Pick the RR types to probe at each owner. An explicit recordTypes - // option overrides everything; otherwise we derive the per-owner type - // set from the working zone (when the host auto-filled it). - ownerQTypes, typeUnion, err := resolveQTypes(opts, recordTypesOpt, apex, names) - if err != nil { - return nil, err - } - resolvers := selectedResolvers(includeFiltered, region, allowlist) data := &ResolverPropagationData{ Zone: dns.Fqdn(zone), Names: names, - Types: qtypeNames(typeUnion), + Types: qtypeNames(qtypes), Resolvers: map[string]*ResolverView{}, RRsets: map[string]*RRsetView{}, } @@ -93,10 +88,10 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec started := time.Now() // Ground truth from the zone's own authoritative servers. - expected := collectExpectedPerOwner(runCtx, zone, svc, ownerQTypes) + expected := collectExpected(runCtx, zone, svc, names, qtypes) for _, n := range names { - for _, qt := range ownerQTypes[n] { + for _, qt := range qtypes { key := rrsetKey(n, dns.TypeToString[qt]) v := &RRsetView{ Name: strings.ToLower(dns.Fqdn(n)), @@ -163,7 +158,7 @@ func (p *resolverPropagationProvider) Collect(ctx context.Context, opts sdk.Chec } for _, n := range names { - for _, qt := range ownerQTypes[n] { + for _, qt := range qtypes { probe := runProbe(runCtx, job.r, job.tr, n, qt) key := rrsetKey(n, dns.TypeToString[qt]) view.Probes[key] = probe @@ -212,7 +207,7 @@ type expectedEntry struct { records []string } -func collectExpectedPerOwner(ctx context.Context, zone string, svc *originService, ownerQTypes map[string][]uint16) map[string]*expectedEntry { +func collectExpected(ctx context.Context, zone string, svc *originService, names []string, qtypes []uint16) map[string]*expectedEntry { out := map[string]*expectedEntry{} var nsHosts []string @@ -249,7 +244,7 @@ func collectExpectedPerOwner(ctx context.Context, zone string, svc *originServic return out } - for n, qtypes := range ownerQTypes { + for _, n := range names { for _, qt := range qtypes { key := rrsetKey(n, dns.TypeToString[qt]) if e := queryAuthoritative(ctx, authAddrs, n, qt); e != nil { diff --git a/checker/definition.go b/checker/definition.go index dac7fb4..99376a0 100644 --- a/checker/definition.go +++ b/checker/definition.go @@ -32,7 +32,8 @@ func (p *resolverPropagationProvider) Definition() *sdk.CheckerDefinition { Id: "recordTypes", Type: "string", Label: "Record types to probe", - Description: "Comma-separated list of RR types to probe at every owner (apex + each 'subdomains' entry). Leave empty to derive the list from the working zone (SOA/NS at the apex plus whatever RR types are actually defined on each owner).", + Description: "Comma-separated list of RR types. The checker probes every listed type at the zone apex (and at each 'subdomains' entry).", + Default: "SOA,NS,A,AAAA,MX,TXT,CAA", }, { Id: "subdomains", @@ -91,12 +92,6 @@ func (p *resolverPropagationProvider) Definition() *sdk.CheckerDefinition { Label: "Zone name", AutoFill: sdk.AutoFillDomainName, }, - { - Id: "zone", - Label: "Zone", - AutoFill: sdk.AutoFillZone, - Hide: true, - }, }, ServiceOpts: []sdk.CheckerOptionDocumentation{ { diff --git a/checker/report.go b/checker/report.go index 572d952..ed7d851 100644 --- a/checker/report.go +++ b/checker/report.go @@ -116,7 +116,7 @@ type regionRow struct { Reachable int Agreeing int Disagreeing int - Unreachable int + Errored int } type resolverRow struct { @@ -283,32 +283,28 @@ func buildReportView(d *ResolverPropagationData, findings []Finding) *reportView r.Resolvers++ if rv.Reachable { r.Reachable++ - } else { - r.Unreachable++ } if rv.Reachable && !rv.Filtered { - unreachableProbe := false - disagrees := false + ok := true for key, p := range rv.Probes { if p == nil || p.Error != "" { - unreachableProbe = true - continue + r.Errored++ + ok = false + break } cv := d.RRsets[key] if cv == nil || cv.ConsensusSig == "" { continue } if p.Signature != cv.ConsensusSig { - disagrees = true + ok = false + break } } - switch { - case disagrees: - r.Disagreeing++ - case unreachableProbe: - r.Unreachable++ - default: + if ok { r.Agreeing++ + } else { + r.Disagreeing++ } } } @@ -668,7 +664,7 @@ const reportTemplateHTML = `
| Region | Reachable | Agreeing | Disagreeing | Unreachable |
|---|---|---|---|---|
| Region | Reachable | Agreeing | Disagreeing | Errored |
| {{.Reachable}} / {{.Resolvers}} | {{.Agreeing}} | {{if .Disagreeing}}{{.Disagreeing}}{{else}}0{{end}} | -{{if .Unreachable}}{{.Unreachable}}{{else}}0{{end}} | +{{if .Errored}}{{.Errored}}{{else}}0{{end}} | {{.Region}} | {{.Transport}} | -{{if .Reachable}}{{.AvgMs}}{{else}}unreachable{{end}} | +{{if .Reachable}}{{.AvgMs}}{{else}}unreachable{{end}} |
{{range .Probes}}
diff --git a/checker/rules.go b/checker/rules.go
index 6c7d822..9ef20bf 100644
--- a/checker/rules.go
+++ b/checker/rules.go
@@ -57,7 +57,3 @@ func warnState(code, subject, message string) sdk.CheckState {
func critState(code, subject, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusCrit, Message: message, Code: code, Subject: subject}
}
-
-func unknownState(code, subject, message string) sdk.CheckState {
- return sdk.CheckState{Status: sdk.StatusUnknown, Message: message, Code: code, Subject: subject}
-}
diff --git a/checker/rules_consensus.go b/checker/rules_consensus.go
index 3f7ff96..eb45834 100644
--- a/checker/rules_consensus.go
+++ b/checker/rules_consensus.go
@@ -91,8 +91,8 @@ func (r *authoritativeMatchRule) Evaluate(ctx context.Context, obs sdk.Observati
anyExpected = true
switch {
case v.ConsensusSig == "":
- states = append(states, unknownState("resolver_propagation.matches_authoritative.no_consensus", key,
- fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known); resolvers unreachable from the checker", key)))
+ states = append(states, critState("resolver_propagation.matches_authoritative.no_consensus", key,
+ fmt.Sprintf("no public resolver returned a usable answer for %s (authoritative answer is known)", key)))
case !v.MatchesExpected:
states = append(states, critState(CodeAnswerDrift, key,
fmt.Sprintf("consensus of public resolvers for %s differs from the authoritative answer, wait for TTL expiry or force a flush", key)))
diff --git a/checker/rules_resolvers.go b/checker/rules_resolvers.go
index 995140d..8fa8b17 100644
--- a/checker/rules_resolvers.go
+++ b/checker/rules_resolvers.go
@@ -71,7 +71,7 @@ func (r *resolverLatencyRule) Evaluate(ctx context.Context, obs sdk.ObservationG
var states []sdk.CheckState
for _, rv := range data.Resolvers {
if !rv.Reachable {
- states = append(states, unknownState(CodeResolverUnreachable, rv.ID,
+ states = append(states, warnState(CodeResolverUnreachable, rv.ID,
fmt.Sprintf("resolver %s (%s, %s) did not answer any query", rv.Name, rv.IP, rv.Transport)))
continue
}
diff --git a/checker/rules_test.go b/checker/rules_test.go
index 1007fc7..e14d999 100644
--- a/checker/rules_test.go
+++ b/checker/rules_test.go
@@ -326,11 +326,8 @@ func TestResolverLatencyRule(t *testing.T) {
if _, ok := codes[CodeResolverHighLatency]; !ok {
t.Errorf("want high latency for 'slow', got %+v", st)
}
- unreach, ok := codes[CodeResolverUnreachable]
- if !ok {
+ if _, ok := codes[CodeResolverUnreachable]; !ok {
t.Errorf("want unreachable for 'absent', got %+v", st)
- } else if unreach[0].Status != sdk.StatusUnknown {
- t.Errorf("unreachable should be unknown (not a warning), got status %v", unreach[0].Status)
}
}
diff --git a/checker/zone.go b/checker/zone.go
deleted file mode 100644
index 1e3e853..0000000
--- a/checker/zone.go
+++ /dev/null
@@ -1,188 +0,0 @@
-package checker
-
-import (
- "encoding/json"
- "sort"
- "strings"
-
- "github.com/miekg/dns"
-
- sdk "git.happydns.org/checker-sdk-go/checker"
-)
-
-// rawZone is the minimal slice of happyDomain's Zone JSON we consume to
-// derive the RR types actually present at each owner. It mirrors the
-// shape used by sibling checkers (see checker-legacy-records).
-type rawZone struct {
- DomainName string `json:"domain_name,omitempty"`
- Services map[string][]rawService `json:"services"`
-}
-
-type rawService struct {
- Type string `json:"_svctype"`
- Domain string `json:"_domain"`
- Service json.RawMessage `json:"Service"`
-}
-
-// fallbackQTypes is the legacy default applied when no zone is available
-// and the user did not set recordTypes explicitly.
-var fallbackQTypes = []uint16{
- dns.TypeSOA, dns.TypeNS, dns.TypeA, dns.TypeAAAA,
- dns.TypeMX, dns.TypeTXT, dns.TypeCAA,
-}
-
-// resolveQTypes returns the RR types to probe at each owner name plus the
-// union across all owners (for reporting/metrics).
-//
-// Precedence:
-// 1. Explicit "recordTypes" option → apply that list to every owner.
-// 2. Auto-filled "zone" option → derive per-owner types from the zone's
-// services. The apex always carries SOA+NS even if the zone payload
-// omits them. Owners with no derivable types fall back to A,AAAA so
-// the probe still surfaces NXDOMAIN drift for user-requested
-// subdomains that are not present in the zone.
-// 3. Neither → use the legacy default at every owner.
-func resolveQTypes(opts sdk.CheckerOptions, recordTypesOpt, apex string, names []string) (map[string][]uint16, []uint16, error) {
- if recordTypesOpt != "" {
- qts := parseQTypes(recordTypesOpt)
- if len(qts) == 0 {
- return nil, nil, &invalidTypesError{raw: recordTypesOpt}
- }
- return uniformOwnerQTypes(names, qts), qts, nil
- }
-
- zone, _ := readWorkingZone(opts)
- if zone == nil {
- return uniformOwnerQTypes(names, fallbackQTypes), append([]uint16(nil), fallbackQTypes...), nil
- }
-
- owner := map[string]map[uint16]bool{}
- for _, n := range names {
- owner[n] = map[uint16]bool{}
- }
-
- for sub, services := range zone.Services {
- full := joinSubdomain(sub, apex)
- set, ok := owner[full]
- if !ok {
- continue
- }
- for _, svc := range services {
- for _, qt := range typesFromService(svc) {
- set[qt] = true
- }
- }
- }
-
- // SOA + NS at apex are foundational; the rules depend on them.
- apexLower := strings.ToLower(dns.Fqdn(apex))
- if set, ok := owner[apexLower]; ok {
- set[dns.TypeSOA] = true
- set[dns.TypeNS] = true
- }
-
- out := make(map[string][]uint16, len(names))
- unionSet := map[uint16]bool{}
- for _, n := range names {
- set := owner[n]
- if len(set) == 0 {
- // Owner present in the probe list but unknown to the zone:
- // keep a minimal probe so a missing-record finding can fire.
- set = map[uint16]bool{dns.TypeA: true, dns.TypeAAAA: true}
- }
- qts := sortedTypes(set)
- out[n] = qts
- for _, qt := range qts {
- unionSet[qt] = true
- }
- }
- return out, sortedTypes(unionSet), nil
-}
-
-func uniformOwnerQTypes(names []string, qts []uint16) map[string][]uint16 {
- out := make(map[string][]uint16, len(names))
- for _, n := range names {
- out[n] = qts
- }
- return out
-}
-
-func sortedTypes(set map[uint16]bool) []uint16 {
- out := make([]uint16, 0, len(set))
- for q := range set {
- out = append(out, q)
- }
- sort.Slice(out, func(i, j int) bool { return out[i] < out[j] })
- return out
-}
-
-// readWorkingZone parses the "zone" auto-fill option. The host may pass
-// the value either as a native struct (in-process plugin) or as a JSON
-// object (HTTP path); we round-trip through JSON in both cases for a
-// single decoding path. A missing zone is not an error — standalone /
-// HTTP callers may simply not provide one.
-func readWorkingZone(opts sdk.CheckerOptions) (*rawZone, error) {
- v, ok := opts["zone"]
- if !ok || v == nil {
- return nil, nil
- }
- raw, err := json.Marshal(v)
- if err != nil {
- return nil, err
- }
- z := &rawZone{}
- if err := json.Unmarshal(raw, z); err != nil {
- return nil, err
- }
- return z, nil
-}
-
-// typesFromService extracts every RR type referenced by a service body.
-// happyDomain service envelopes are opaque to us (the registry is in the
-// host), so we scan the JSON for any nested "Rrtype": |