commit e6eb2e081e847825fc5387be3c5cfeb5595652a9 Author: Pierre-Olivier Mercier Date: Wed Apr 8 04:18:58 2026 +0700 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d214794 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +checker-delegation +*.so diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..591dc3d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.25-alpine AS builder + +ARG CHECKER_VERSION=custom-build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -tags standalone -ldflags "-X main.Version=${CHECKER_VERSION}" -o /checker-delegation . + +FROM scratch +COPY --from=builder /checker-delegation /checker-delegation +EXPOSE 8080 +ENTRYPOINT ["/checker-delegation"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07d44d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 The happyDomain Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a257412 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CHECKER_NAME := checker-delegation +CHECKER_IMAGE := happydomain/$(CHECKER_NAME) +CHECKER_VERSION ?= custom-build + +CHECKER_SOURCES := main.go $(wildcard checker/*.go) + +GO_LDFLAGS := -X main.Version=$(CHECKER_VERSION) + +.PHONY: all plugin docker test clean + +all: $(CHECKER_NAME) + +$(CHECKER_NAME): $(CHECKER_SOURCES) + go build -tags standalone -ldflags "$(GO_LDFLAGS)" -o $@ . + +plugin: $(CHECKER_NAME).so + +$(CHECKER_NAME).so: $(CHECKER_SOURCES) $(wildcard plugin/*.go) + go build -buildmode=plugin -ldflags "$(GO_LDFLAGS)" -o $@ ./plugin/ + +docker: + docker build --build-arg CHECKER_VERSION=$(CHECKER_VERSION) -t $(CHECKER_IMAGE) . + +test: + go test -tags standalone ./... + +clean: + rm -f $(CHECKER_NAME) $(CHECKER_NAME).so diff --git a/README.md b/README.md new file mode 100644 index 0000000..baa57dc --- /dev/null +++ b/README.md @@ -0,0 +1,122 @@ +# checker-delegation + +DNS delegation checker for [happyDomain](https://www.happydomain.org/). + +Audits the delegation of a zone: NS consistency between parent and child, +glue correctness, DS / DNSKEY hand-off, TCP reachability, SOA serial drift, +and authoritativeness of each delegated server. Applies to services of type +`abstract.Delegation`. + +## Usage + +### Standalone HTTP server + +```bash +# Build and run +make +./checker-delegation -listen :8080 +``` + +The server exposes: + +- `GET /health`, health check +- `POST /collect`, collect delegation observations (happyDomain external checker protocol) + +### Docker + +```bash +make docker +docker run -p 8080:8080 happydomain/checker-delegation +``` + +### happyDomain plugin + +```bash +make plugin +# produces checker-delegation.so, loadable by happyDomain as a Go plugin +``` + +The plugin exposes a `NewCheckerPlugin` symbol returning the checker +definition and observation provider, which happyDomain registers in its +global registries at load time. + +### Versioning + +The binary, plugin, and Docker image embed a version string overridable +at build time: + +```bash +make CHECKER_VERSION=1.2.3 +make plugin CHECKER_VERSION=1.2.3 +make docker CHECKER_VERSION=1.2.3 +``` + +### happyDomain remote endpoint + +Set the `endpoint` admin option for the delegation checker to the URL of +the running checker-delegation server (e.g., +`http://checker-delegation:8080`). happyDomain will delegate observation +collection to this endpoint. + +## Options + +| Option | Type | Default | Description | +|---------------------|------|---------|---------------------------------------------------------------------------------------------------| +| `requireDS` | bool | `false` | When enabled, missing DS records at the parent are treated as critical (otherwise informational). | +| `requireTCP` | bool | `true` | When enabled, name servers that fail to answer over TCP are reported as critical (otherwise warning). | +| `minNameServers` | uint | `2` | Below this count, the delegation is reported as a warning (RFC 1034 recommends at least 2). | +| `allowGlueMismatch` | bool | `false` | When disabled, glue/address mismatches between parent and child are reported as critical. | + +## Protocol + +### POST /collect + +Request: +```json +{ + "key": "delegation", + "target": {"userId": "...", "domainId": "..."}, + "options": { + "domain_name": "example.com.", + "subdomain": "www", + "service": { "_svctype": "abstract.Delegation", "Service": { "ns": [...], "ds": [...] } } + } +} +``` + +Response: +```json +{ + "data": { + "delegated_fqdn": "www.example.com.", + "parent_zone": "example.com.", + "parent_ns": ["a.iana-servers.net.", "b.iana-servers.net."], + "advertised_ns": ["ns1.example.net.", "ns2.example.net."], + "advertised_glue": {}, + "parent_ds": [], + "child_serials": {"ns1.example.net.:53": 2026042401}, + "findings": [ + { + "code": "delegation_ns_mismatch", + "severity": "crit", + "message": "NS RRset at parent does not match declared service: missing=[ns3.example.net] extra=[]", + "server": "a.iana-servers.net.:53" + } + ] + } +} +``` + +Findings carry a stable `code` (e.g. `delegation_lame`, +`delegation_missing_glue`, `delegation_ds_mismatch`, +`delegation_soa_serial_drift`, `delegation_dnskey_no_match`, …) so that +downstream rules can match on them deterministically. + +## License + +This project is licensed under the **MIT License** (see `LICENSE`), in +line with the rest of the happyDomain checker ecosystem. + +The third-party Apache-2.0 attributions for `checker-sdk-go` are recorded +in `NOTICE` and must accompany any binary or source redistribution of this +project. diff --git a/checker/collect.go b/checker/collect.go new file mode 100644 index 0000000..ae5a840 --- /dev/null +++ b/checker/collect.go @@ -0,0 +1,223 @@ +package checker + +import ( + "context" + "encoding/json" + "fmt" + "strings" + + "github.com/miekg/dns" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Collect runs the delegation probe and returns a *DelegationData populated +// with raw facts only. All judgment (severity, option-driven thresholds, +// pass/fail) is deferred to the rules in rule.go. +// +// The collector resolves the parent zone's authoritative servers, asks each +// of them for the delegation of the target FQDN, then turns around and +// queries every delegated server using ONLY the NS names + glue learned +// from the parent. The child zone is never used as a source of truth. +func (p *delegationProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) { + svc, err := loadService(opts) + if err != nil { + return nil, err + } + + parentZone, subdomain := loadNames(opts) + if subdomain == "" { + return nil, fmt.Errorf("missing 'subdomain' option") + } + if parentZone == "" { + return nil, fmt.Errorf("missing 'domain_name' option") + } + + delegatedFQDN := dns.Fqdn(strings.TrimSuffix(subdomain, ".") + "." + strings.TrimSuffix(parentZone, ".") + ".") + + data := &DelegationData{ + DelegatedFQDN: delegatedFQDN, + ParentZone: dns.Fqdn(parentZone), + DeclaredNS: normalizeNSList(svc.NameServers), + } + for _, d := range svc.DS { + if d == nil { + continue + } + data.DeclaredDS = append(data.DeclaredDS, NewDSRecord(d)) + } + + // Resolve parent's authoritative servers. + _, parentServers, err := findParentZone(ctx, delegatedFQDN, parentZone) + if err != nil { + data.ParentDiscoveryError = err.Error() + return data, nil + } + data.ParentNS = parentServers + + // Phase A: query every parent server. Record raw outcomes only. + for _, ps := range parentServers { + view := ParentView{Server: ps} + + ns, glue, _, qerr := queryDelegation(ctx, ps, delegatedFQDN) + if qerr != nil { + view.UDPNSError = qerr.Error() + } else { + view.NS = ns + view.Glue = glue + } + + if _, _, _, terr := queryDelegationTCP(ctx, ps, delegatedFQDN); terr != nil { + view.TCPNSError = terr.Error() + } + + dsRRs, sigs, dserr := queryDS(ctx, ps, delegatedFQDN) + if dserr != nil { + view.DSQueryError = dserr.Error() + } else { + for _, d := range dsRRs { + view.DS = append(view.DS, NewDSRecord(d)) + } + for _, sig := range sigs { + view.DSRRSIGs = append(view.DSRRSIGs, DSRRSIGObservation{ + Inception: sig.Inception, + Expiration: sig.Expiration, + }) + } + } + + data.ParentViews = append(data.ParentViews, view) + } + + // Pick the first view that actually returned an NS RRset as the + // source of truth for Phase B. If none succeeded, skip Phase B; the + // rules will flag the absence of child data. + var primary *ParentView + for i := range data.ParentViews { + if data.ParentViews[i].UDPNSError == "" && len(data.ParentViews[i].NS) > 0 { + primary = &data.ParentViews[i] + break + } + } + if primary == nil { + return data, nil + } + + // Phase B: query each child name server using only parent-supplied data. + for _, nsName := range primary.NS { + child := ChildNSView{NSName: nsName} + addrs := primary.Glue[nsName] + if len(addrs) == 0 { + // Out-of-bailiwick: resolve via the system resolver. + resolved, rerr := resolveHost(ctx, nsName) + if rerr != nil { + child.ResolveError = rerr.Error() + data.Children = append(data.Children, child) + continue + } + addrs = resolved + } + + for _, addr := range addrs { + srv := hostPort(addr, "53") + av := ChildAddressView{Address: addr, Server: srv} + + soa, aa, qerr := querySOA(ctx, "", srv, delegatedFQDN) + if qerr != nil { + av.UDPError = qerr.Error() + av.Authoritative = aa + child.Addresses = append(child.Addresses, av) + continue + } + av.Authoritative = aa + if soa != nil { + av.SOASerial = soa.Serial + av.SOASerialKnown = true + } + + if _, _, terr := querySOA(ctx, "tcp", srv, delegatedFQDN); terr != nil { + av.TCPError = terr.Error() + } + + childNS, nerr := queryNSAt(ctx, srv, delegatedFQDN) + if nerr != nil { + av.ChildNSError = nerr.Error() + } else { + av.ChildNS = childNS + } + + if isInBailiwick(nsName, delegatedFQDN) { + addrsAt, _ := queryAddrsAt(ctx, srv, nsName) + av.ChildGlueAddrs = addrsAt + } + + // Only bother probing DNSKEY when the parent has at least one + // DS to match against. The rule confirms this precondition. + parentHasDS := false + for _, pv := range data.ParentViews { + if len(pv.DS) > 0 { + parentHasDS = true + break + } + } + if parentHasDS { + keys, kerr := queryDNSKEY(ctx, srv, delegatedFQDN) + if kerr != nil { + av.DNSKEYError = kerr.Error() + } else { + for _, k := range keys { + av.DNSKEYs = append(av.DNSKEYs, NewDNSKEYRecord(k)) + } + } + } + + child.Addresses = append(child.Addresses, av) + } + + data.Children = append(data.Children, child) + } + + return data, nil +} + +// queryDelegationTCP is the TCP variant of queryDelegation. It is split out +// so the per-server observations keep their UDP/TCP roles distinct. +func queryDelegationTCP(ctx context.Context, parentServer, fqdn string) (ns []string, glue map[string][]string, msg *dns.Msg, err error) { + q := dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeNS, Qclass: dns.ClassINET} + msg, err = dnsExchange(ctx, "tcp", parentServer, q, true) + if err != nil { + return nil, nil, nil, err + } + if msg.Rcode != dns.RcodeSuccess { + return nil, nil, msg, fmt.Errorf("parent answered %s", dns.RcodeToString[msg.Rcode]) + } + return +} + +// loadService extracts the abstract.Delegation payload from the auto-filled +// "service" option. We parse it into our local minimal type so this checker +// does not have to import the full happyDomain server module. +func loadService(opts sdk.CheckerOptions) (*delegationService, error) { + svc, ok := sdk.GetOption[serviceMessage](opts, "service") + if !ok { + return nil, fmt.Errorf("missing 'service' option") + } + if svc.Type != "" && svc.Type != "abstract.Delegation" { + return nil, fmt.Errorf("service is %s, expected abstract.Delegation", svc.Type) + } + var d delegationService + if err := json.Unmarshal(svc.Service, &d); err != nil { + return nil, fmt.Errorf("decoding delegation service: %w", err) + } + return &d, nil +} + +func loadNames(opts sdk.CheckerOptions) (parentZone, subdomain string) { + if v, ok := sdk.GetOption[string](opts, "domain_name"); ok { + parentZone = v + } + if v, ok := sdk.GetOption[string](opts, "subdomain"); ok { + subdomain = v + } + return +} diff --git a/checker/definition.go b/checker/definition.go new file mode 100644 index 0000000..4d9a173 --- /dev/null +++ b/checker/definition.go @@ -0,0 +1,83 @@ +package checker + +import ( + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Version is the checker version reported in CheckerDefinition.Version. +var Version = "built-in" + +// Definition returns the CheckerDefinition for the delegation checker. +func Definition() *sdk.CheckerDefinition { + return &sdk.CheckerDefinition{ + ID: "delegation", + Name: "DNS delegation", + Version: Version, + Availability: sdk.CheckerAvailability{ + ApplyToService: true, + LimitToServices: []string{"abstract.Delegation"}, + }, + ObservationKeys: []sdk.ObservationKey{ObservationKeyDelegation}, + Options: sdk.CheckerOptionsDocumentation{ + UserOpts: []sdk.CheckerOptionDocumentation{ + { + Id: "requireDS", + Type: "bool", + Label: "Require DS at parent", + Description: "When enabled, missing DS records at the parent are treated as a critical issue (otherwise informational).", + Default: false, + }, + { + Id: "requireTCP", + Type: "bool", + Label: "Require DNS over TCP", + Description: "When enabled, name servers that fail to answer over TCP are reported as critical (otherwise as warning).", + Default: true, + }, + { + Id: "minNameServers", + Type: "uint", + Label: "Minimum number of name servers", + Description: "Below this count, the delegation is reported as a warning (RFC 1034 recommends at least 2).", + Default: float64(2), + }, + { + Id: "allowGlueMismatch", + Type: "bool", + Label: "Allow glue mismatches", + Description: "When disabled, glue/address mismatches between parent and child are reported as critical.", + Default: false, + }, + }, + DomainOpts: []sdk.CheckerOptionDocumentation{ + { + Id: "domain_name", + Label: "Parent domain name", + AutoFill: sdk.AutoFillDomainName, + }, + { + Id: "subdomain", + Label: "Subdomain", + AutoFill: sdk.AutoFillSubdomain, + }, + }, + ServiceOpts: []sdk.CheckerOptionDocumentation{ + { + Id: "service", + Label: "Service", + AutoFill: sdk.AutoFillService, + }, + }, + }, + Rules: Rules(), + HasHTMLReport: true, + HasMetrics: true, + Interval: &sdk.CheckIntervalSpec{ + Min: 5 * time.Minute, + Max: 24 * time.Hour, + Default: 1 * time.Hour, + }, + } +} diff --git a/checker/dns.go b/checker/dns.go new file mode 100644 index 0000000..bd5b730 --- /dev/null +++ b/checker/dns.go @@ -0,0 +1,277 @@ +package checker + +import ( + "context" + "fmt" + "net" + "strings" + "time" + + "github.com/miekg/dns" +) + +// year68 mirrors the constant from miekg/dns used to wrap RRSIG validity +// periods around 2^32 seconds (≈68 years). +const year68 = int64(1 << 31) + +// dnsTimeout is the per-query deadline used by every helper here. +const dnsTimeout = 5 * time.Second + +// dnsExchange sends a single query to the given server using the requested +// transport ("" for UDP, "tcp"). The server address must already include a +// port. RecursionDesired is forced off, this checker only talks to +// authoritative servers. +func dnsExchange(ctx context.Context, proto, server string, q dns.Question, edns bool) (*dns.Msg, error) { + client := dns.Client{Net: proto, Timeout: dnsTimeout} + + m := new(dns.Msg) + m.Id = dns.Id() + m.Question = []dns.Question{q} + m.RecursionDesired = false + if edns { + m.SetEdns0(4096, true) + } + + deadline, ok := ctx.Deadline() + if ok { + if d := time.Until(deadline); d > 0 && d < client.Timeout { + client.Timeout = d + } + } + + r, _, err := client.Exchange(m, server) + if err != nil { + return nil, err + } + if r == nil { + return nil, fmt.Errorf("nil response from %s", server) + } + return r, nil +} + +// hostPort returns "host:port", correctly bracketing IPv6 literals. +func hostPort(host, port string) string { + if ip := net.ParseIP(host); ip != nil && ip.To4() == nil { + return "[" + host + "]:" + port + } + host = strings.TrimSuffix(host, ".") + return host + ":" + port +} + +// resolveHost resolves an NS hostname to its A and AAAA addresses using the +// system resolver. It is used as a fallback when no glue is provided by the +// parent for an out-of-bailiwick NS. +func resolveHost(ctx context.Context, host string) ([]string, error) { + var resolver net.Resolver + addrs, err := resolver.LookupHost(ctx, strings.TrimSuffix(host, ".")) + if err != nil { + return nil, err + } + return addrs, nil +} + +// findParentZone walks up the labels of fqdn until it finds the closest +// enclosing zone (the one that has its own SOA), and returns the FQDN of +// that zone along with its authoritative server addresses (resolved from +// its NS RRset). The walk stops as soon as a SOA query at the system +// resolver returns NOERROR with an answer. +// +// If hintParent is non-empty, it is used as the assumed parent and we only +// resolve its NS, this matches happyDomain's data model where the parent +// zone is known. +func findParentZone(ctx context.Context, fqdn, hintParent string) (zone string, servers []string, err error) { + zone = dns.Fqdn(hintParent) + if zone == "" || zone == "." { + // Walk up. + labels := dns.SplitDomainName(fqdn) + if len(labels) == 0 { + return "", nil, fmt.Errorf("cannot derive parent of %q", fqdn) + } + zone = dns.Fqdn(strings.Join(labels[1:], ".")) + } + + servers, err = resolveZoneNSAddrs(ctx, zone) + if err != nil { + return "", nil, fmt.Errorf("resolving NS of parent zone %q: %w", zone, err) + } + if len(servers) == 0 { + return "", nil, fmt.Errorf("parent zone %q has no resolvable NS", zone) + } + return zone, servers, nil +} + +// resolveZoneNSAddrs returns the list of "host:53" entries for every NS of +// the given zone, as seen by the system resolver. It is used to discover the +// parent's authoritative servers. +func resolveZoneNSAddrs(ctx context.Context, zone string) ([]string, error) { + var resolver net.Resolver + nss, err := resolver.LookupNS(ctx, strings.TrimSuffix(zone, ".")) + if err != nil { + return nil, err + } + + var out []string + for _, ns := range nss { + addrs, err := resolver.LookupHost(ctx, strings.TrimSuffix(ns.Host, ".")) + if err != nil || len(addrs) == 0 { + continue + } + for _, a := range addrs { + out = append(out, hostPort(a, "53")) + } + } + return out, nil +} + +// queryDelegation queries the given parent server for the NS RRset of fqdn +// and extracts the advertised NS names plus any glue records found in the +// Additional section. The query is sent without RD; the response is the +// classical "referral" packet. +func queryDelegation(ctx context.Context, parentServer, fqdn string) (ns []string, glue map[string][]string, msg *dns.Msg, err error) { + q := dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeNS, Qclass: dns.ClassINET} + + msg, err = dnsExchange(ctx, "", parentServer, q, true) + if err != nil { + return nil, nil, nil, err + } + if msg.Rcode != dns.RcodeSuccess { + return nil, nil, msg, fmt.Errorf("parent answered %s", dns.RcodeToString[msg.Rcode]) + } + + glue = map[string][]string{} + + collect := func(records []dns.RR) { + for _, rr := range records { + switch t := rr.(type) { + case *dns.NS: + if strings.EqualFold(strings.TrimSuffix(t.Header().Name, "."), strings.TrimSuffix(fqdn, ".")) { + ns = append(ns, strings.ToLower(dns.Fqdn(t.Ns))) + } + case *dns.A: + name := strings.ToLower(dns.Fqdn(t.Header().Name)) + glue[name] = append(glue[name], t.A.String()) + case *dns.AAAA: + name := strings.ToLower(dns.Fqdn(t.Header().Name)) + glue[name] = append(glue[name], t.AAAA.String()) + } + } + } + collect(msg.Answer) + collect(msg.Ns) + collect(msg.Extra) + return +} + +// queryDS asks the parent server for the DS RRset of fqdn and returns the +// DS records plus any RRSIGs found in the same section. +func queryDS(ctx context.Context, parentServer, fqdn string) (ds []*dns.DS, sigs []*dns.RRSIG, err error) { + q := dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeDS, Qclass: dns.ClassINET} + + r, err := dnsExchange(ctx, "tcp", parentServer, q, true) + if err != nil { + return nil, nil, err + } + if r.Rcode != dns.RcodeSuccess { + return nil, nil, fmt.Errorf("parent answered %s for DS", dns.RcodeToString[r.Rcode]) + } + + for _, rr := range r.Answer { + switch t := rr.(type) { + case *dns.DS: + ds = append(ds, t) + case *dns.RRSIG: + sigs = append(sigs, t) + } + } + return +} + +// querySOA asks the given authoritative server for the SOA of fqdn and +// returns the SOA record plus the AA flag from the response header. +func querySOA(ctx context.Context, proto, server, fqdn string) (soa *dns.SOA, aa bool, err error) { + q := dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeSOA, Qclass: dns.ClassINET} + r, err := dnsExchange(ctx, proto, server, q, false) + if err != nil { + return nil, false, err + } + if r.Rcode != dns.RcodeSuccess { + return nil, r.Authoritative, fmt.Errorf("server answered %s", dns.RcodeToString[r.Rcode]) + } + for _, rr := range r.Answer { + if t, ok := rr.(*dns.SOA); ok { + return t, r.Authoritative, nil + } + } + return nil, r.Authoritative, fmt.Errorf("no SOA in answer section") +} + +// queryNSAt asks the given authoritative server for the NS RRset of fqdn. +func queryNSAt(ctx context.Context, server, fqdn string) ([]string, error) { + q := dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeNS, Qclass: dns.ClassINET} + r, err := dnsExchange(ctx, "", server, q, false) + if err != nil { + return nil, err + } + if r.Rcode != dns.RcodeSuccess { + return nil, fmt.Errorf("server answered %s", dns.RcodeToString[r.Rcode]) + } + var out []string + for _, rr := range r.Answer { + if t, ok := rr.(*dns.NS); ok { + out = append(out, strings.ToLower(dns.Fqdn(t.Ns))) + } + } + return out, nil +} + +// queryAddrsAt asks an authoritative server for the A and AAAA records of +// host (typically an in-bailiwick NS hostname). +func queryAddrsAt(ctx context.Context, server, host string) ([]string, error) { + var out []string + for _, qt := range []uint16{dns.TypeA, dns.TypeAAAA} { + r, err := dnsExchange(ctx, "", server, dns.Question{Name: dns.Fqdn(host), Qtype: qt, Qclass: dns.ClassINET}, false) + if err != nil { + continue + } + if r.Rcode != dns.RcodeSuccess { + continue + } + for _, rr := range r.Answer { + switch t := rr.(type) { + case *dns.A: + out = append(out, t.A.String()) + case *dns.AAAA: + out = append(out, t.AAAA.String()) + } + } + } + return out, nil +} + +// queryDNSKEY asks the given child server for the DNSKEY RRset of fqdn. +func queryDNSKEY(ctx context.Context, server, fqdn string) ([]*dns.DNSKEY, error) { + q := dns.Question{Name: dns.Fqdn(fqdn), Qtype: dns.TypeDNSKEY, Qclass: dns.ClassINET} + r, err := dnsExchange(ctx, "tcp", server, q, true) + if err != nil { + return nil, err + } + if r.Rcode != dns.RcodeSuccess { + return nil, fmt.Errorf("server answered %s for DNSKEY", dns.RcodeToString[r.Rcode]) + } + var out []*dns.DNSKEY + for _, rr := range r.Answer { + if t, ok := rr.(*dns.DNSKEY); ok { + out = append(out, t) + } + } + return out, nil +} + +// dsEqual returns true when two DS records refer to the same key material. +func dsEqual(a, b *dns.DS) bool { + return a.KeyTag == b.KeyTag && + a.Algorithm == b.Algorithm && + a.DigestType == b.DigestType && + strings.EqualFold(a.Digest, b.Digest) +} + diff --git a/checker/provider.go b/checker/provider.go new file mode 100644 index 0000000..9994bd2 --- /dev/null +++ b/checker/provider.go @@ -0,0 +1,40 @@ +package checker + +import ( + "fmt" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Provider returns a new delegation observation provider. +func Provider() sdk.ObservationProvider { + return &delegationProvider{} +} + +type delegationProvider struct{} + +func (p *delegationProvider) Key() sdk.ObservationKey { + return ObservationKeyDelegation +} + +// Definition implements sdk.CheckerDefinitionProvider so the SDK server can +// expose /definition without an extra argument. +func (p *delegationProvider) Definition() *sdk.CheckerDefinition { + return Definition() +} + +// ValidateOptions implements sdk.OptionsValidator. Option values cross all +// rules, so validating them once on the provider avoids duplicating the +// check in every rule. +func (p *delegationProvider) ValidateOptions(opts sdk.CheckerOptions) error { + if v, ok := opts["minNameServers"]; ok { + f, ok := v.(float64) + if !ok { + return fmt.Errorf("minNameServers must be a number") + } + if f < 1 { + return fmt.Errorf("minNameServers must be >= 1") + } + } + return nil +} diff --git a/checker/rule.go b/checker/rule.go new file mode 100644 index 0000000..33c5e9e --- /dev/null +++ b/checker/rule.go @@ -0,0 +1,990 @@ +package checker + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/miekg/dns" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Rules returns the full rule set for the delegation checker. Each rule +// reads the same shared observation (DelegationData) and emits one +// CheckState per subject it evaluates. +func Rules() []sdk.CheckRule { + return []sdk.CheckRule{ + &minNameServersRule{}, + &parentDiscoveredRule{}, + &parentNSQueryRule{}, + &parentTCPRule{}, + &nsMatchesDeclaredRule{}, + &inBailiwickGlueRule{}, + &unnecessaryGlueRule{}, + &dsQueryRule{}, + &dsMatchesDeclaredRule{}, + &dsPresentAtParentRule{}, + &dsRRSIGValidityRule{}, + &nsResolvableRule{}, + &childReachableRule{}, + &childAuthoritativeRule{}, + &childSOASerialDriftRule{}, + &childTCPRule{}, + &childNSMatchesParentRule{}, + &childGlueMatchesParentRule{}, + &dnskeyQueryRule{}, + &dnskeyMatchesDSRule{}, + &nsHasAuthoritativeAnswerRule{}, + } +} + +// loadData reads the delegation observation or returns an error state. +func loadData(ctx context.Context, obs sdk.ObservationGetter, code string) (*DelegationData, []sdk.CheckState) { + var data DelegationData + if err := obs.Get(ctx, ObservationKeyDelegation, &data); err != nil { + return nil, []sdk.CheckState{{ + Status: sdk.StatusError, + Message: fmt.Sprintf("Failed to get delegation data: %v", err), + Code: code, + }} + } + return &data, nil +} + +// primaryParentView returns the first ParentView that actually returned an +// NS RRset. It mirrors the "source of truth" choice made by Collect for +// Phase B. +func primaryParentView(views []ParentView) *ParentView { + for i := range views { + if views[i].UDPNSError == "" && len(views[i].NS) > 0 { + return &views[i] + } + } + return nil +} + +// ───────────────────────── checker-wide rules ───────────────────────── + +type minNameServersRule struct{} + +func (r *minNameServersRule) Name() string { return "delegation_min_name_servers" } +func (r *minNameServersRule) Description() string { + return "Checks that enough name servers are declared for the delegation (RFC 1034 recommends at least 2)" +} +func (r *minNameServersRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_too_few_ns") + if errState != nil { + return errState + } + minNS := sdk.GetIntOption(opts, "minNameServers", 2) + if len(data.DeclaredNS) < minNS { + return []sdk.CheckState{{ + Status: sdk.StatusWarn, + Code: "delegation_too_few_ns", + Message: fmt.Sprintf("only %d name server(s) declared, at least %d recommended", len(data.DeclaredNS), minNS), + Meta: map[string]any{"declared": len(data.DeclaredNS), "minimum": minNS}, + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "delegation_too_few_ns", + Message: fmt.Sprintf("%d name server(s) declared", len(data.DeclaredNS)), + }} +} + +type parentDiscoveredRule struct{} + +func (r *parentDiscoveredRule) Name() string { return "delegation_parent_discovered" } +func (r *parentDiscoveredRule) Description() string { + return "Verifies that the parent zone's authoritative servers could be discovered" +} +func (r *parentDiscoveredRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_no_parent_ns") + if errState != nil { + return errState + } + if data.ParentDiscoveryError != "" { + return []sdk.CheckState{{ + Status: sdk.StatusCrit, + Code: "delegation_no_parent_ns", + Message: data.ParentDiscoveryError, + }} + } + if len(data.ParentNS) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusCrit, + Code: "delegation_no_parent_ns", + Message: "parent zone has no resolvable authoritative servers", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "delegation_no_parent_ns", + Message: fmt.Sprintf("%d parent authoritative server(s) discovered", len(data.ParentNS)), + }} +} + +// ───────────────────────── parent-side rules ───────────────────────── + +type parentNSQueryRule struct{} + +func (r *parentNSQueryRule) Name() string { return "delegation_parent_ns_query" } +func (r *parentNSQueryRule) Description() string { + return "Verifies that every parent authoritative server answers the NS query for the delegated FQDN" +} +func (r *parentNSQueryRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_parent_query_failed") + if errState != nil { + return errState + } + if len(data.ParentViews) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_parent_query_failed", + Message: "no parent server was queried", + }} + } + out := make([]sdk.CheckState, 0, len(data.ParentViews)) + for _, v := range data.ParentViews { + st := sdk.CheckState{Code: "delegation_parent_query_failed", Subject: v.Server} + switch { + case v.UDPNSError != "": + st.Status = sdk.StatusCrit + st.Message = fmt.Sprintf("parent NS query failed: %s", v.UDPNSError) + case len(v.NS) == 0: + st.Status = sdk.StatusCrit + st.Message = "parent returned an empty NS RRset" + default: + st.Status = sdk.StatusOK + st.Message = fmt.Sprintf("%d NS record(s) returned", len(v.NS)) + } + out = append(out, st) + } + return out +} + +type parentTCPRule struct{} + +func (r *parentTCPRule) Name() string { return "delegation_parent_tcp" } +func (r *parentTCPRule) Description() string { + return "Verifies that every parent authoritative server answers the NS query over TCP" +} +func (r *parentTCPRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_parent_tcp_failed") + if errState != nil { + return errState + } + if len(data.ParentViews) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_parent_tcp_failed", + Message: "no parent server was queried", + }} + } + requireTCP := sdk.GetBoolOption(opts, "requireTCP", true) + failStatus := sdk.StatusCrit + if !requireTCP { + failStatus = sdk.StatusWarn + } + out := make([]sdk.CheckState, 0, len(data.ParentViews)) + for _, v := range data.ParentViews { + st := sdk.CheckState{Code: "delegation_parent_tcp_failed", Subject: v.Server} + if v.TCPNSError != "" { + st.Status = failStatus + st.Message = fmt.Sprintf("parent NS query over TCP failed: %s", v.TCPNSError) + } else { + st.Status = sdk.StatusOK + st.Message = "TCP reachable" + } + out = append(out, st) + } + return out +} + +type nsMatchesDeclaredRule struct{} + +func (r *nsMatchesDeclaredRule) Name() string { return "delegation_ns_matches_declared" } +func (r *nsMatchesDeclaredRule) Description() string { + return "Verifies that the NS RRset served by the parent matches the service's declared name servers" +} +func (r *nsMatchesDeclaredRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ns_mismatch") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, v := range data.ParentViews { + if v.UDPNSError != "" || len(v.NS) == 0 { + continue + } + missing, extra := diffStringSets(data.DeclaredNS, v.NS) + st := sdk.CheckState{Code: "delegation_ns_mismatch", Subject: v.Server} + if len(missing) > 0 || len(extra) > 0 { + st.Status = sdk.StatusCrit + st.Message = fmt.Sprintf("NS RRset does not match declared: missing=%v extra=%v", missing, extra) + st.Meta = map[string]any{"missing": missing, "extra": extra} + } else { + st.Status = sdk.StatusOK + st.Message = "NS RRset matches the declared service" + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_ns_mismatch", + Message: "no parent server returned an NS RRset", + }} + } + return out +} + +type inBailiwickGlueRule struct{} + +func (r *inBailiwickGlueRule) Name() string { return "delegation_in_bailiwick_glue" } +func (r *inBailiwickGlueRule) Description() string { + return "Verifies that every in-bailiwick NS hostname has glue records at the parent" +} +func (r *inBailiwickGlueRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_missing_glue") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, v := range data.ParentViews { + if v.UDPNSError != "" { + continue + } + for _, n := range v.NS { + if !isInBailiwick(n, data.DelegatedFQDN) { + continue + } + subject := fmt.Sprintf("%s@%s", n, v.Server) + if len(v.Glue[n]) == 0 { + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "delegation_missing_glue", + Subject: subject, + Message: "in-bailiwick NS has no glue", + }) + } else { + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "delegation_missing_glue", + Subject: subject, + Message: fmt.Sprintf("%d glue address(es)", len(v.Glue[n])), + }) + } + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_missing_glue", + Message: "no in-bailiwick NS to evaluate", + }} + } + return out +} + +type unnecessaryGlueRule struct{} + +func (r *unnecessaryGlueRule) Name() string { return "delegation_unnecessary_glue" } +func (r *unnecessaryGlueRule) Description() string { + return "Flags out-of-bailiwick NS hostnames for which the parent still returns glue" +} +func (r *unnecessaryGlueRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_unnecessary_glue") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, v := range data.ParentViews { + if v.UDPNSError != "" { + continue + } + for _, n := range v.NS { + if isInBailiwick(n, data.DelegatedFQDN) { + continue + } + subject := fmt.Sprintf("%s@%s", n, v.Server) + if len(v.Glue[n]) > 0 { + out = append(out, sdk.CheckState{ + Status: sdk.StatusWarn, + Code: "delegation_unnecessary_glue", + Subject: subject, + Message: "out-of-bailiwick NS has glue records at the parent", + }) + } else { + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "delegation_unnecessary_glue", + Subject: subject, + Message: "no glue (expected)", + }) + } + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_unnecessary_glue", + Message: "no out-of-bailiwick NS to evaluate", + }} + } + return out +} + +type dsQueryRule struct{} + +func (r *dsQueryRule) Name() string { return "delegation_ds_query" } +func (r *dsQueryRule) Description() string { + return "Verifies that every parent authoritative server answers the DS query for the delegated FQDN" +} +func (r *dsQueryRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ds_query_failed") + if errState != nil { + return errState + } + if len(data.ParentViews) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_ds_query_failed", + Message: "no parent server was queried", + }} + } + out := make([]sdk.CheckState, 0, len(data.ParentViews)) + for _, v := range data.ParentViews { + st := sdk.CheckState{Code: "delegation_ds_query_failed", Subject: v.Server} + if v.DSQueryError != "" { + st.Status = sdk.StatusWarn + st.Message = fmt.Sprintf("DS query failed: %s", v.DSQueryError) + } else { + st.Status = sdk.StatusOK + st.Message = fmt.Sprintf("%d DS record(s) returned", len(v.DS)) + } + out = append(out, st) + } + return out +} + +type dsMatchesDeclaredRule struct{} + +func (r *dsMatchesDeclaredRule) Name() string { return "delegation_ds_matches_declared" } +func (r *dsMatchesDeclaredRule) Description() string { + return "Verifies that the DS RRset served by the parent matches the service's declared DS records" +} +func (r *dsMatchesDeclaredRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ds_mismatch") + if errState != nil { + return errState + } + declared := dsRecordsToMiekg(data.DeclaredDS) + var out []sdk.CheckState + for _, v := range data.ParentViews { + if v.DSQueryError != "" { + continue + } + got := dsRecordsToMiekg(v.DS) + if len(declared) == 0 && len(got) == 0 { + continue + } + missing, extra := diffDS(declared, got) + st := sdk.CheckState{Code: "delegation_ds_mismatch", Subject: v.Server} + if len(missing) == 0 && len(extra) == 0 { + st.Status = sdk.StatusOK + st.Message = "DS RRset matches the declared service" + } else { + if len(declared) == 0 { + st.Status = sdk.StatusWarn + } else { + st.Status = sdk.StatusCrit + } + st.Message = fmt.Sprintf("DS RRset does not match declared: missing=%d extra=%d", len(missing), len(extra)) + st.Meta = map[string]any{"missing": len(missing), "extra": len(extra)} + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_ds_mismatch", + Message: "no DS data to compare", + }} + } + return out +} + +type dsPresentAtParentRule struct{} + +func (r *dsPresentAtParentRule) Name() string { return "delegation_ds_present_at_parent" } +func (r *dsPresentAtParentRule) Description() string { + return "Flags the case where the service declares DS records but the parent serves none" +} +func (r *dsPresentAtParentRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ds_missing") + if errState != nil { + return errState + } + if len(data.DeclaredDS) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_ds_missing", + Message: "service declares no DS records", + }} + } + anyDS := false + for _, v := range data.ParentViews { + if v.DSQueryError == "" && len(v.DS) > 0 { + anyDS = true + break + } + } + if anyDS { + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "delegation_ds_missing", + Message: "parent serves DS records for the delegation", + }} + } + status := sdk.StatusInfo + if sdk.GetBoolOption(opts, "requireDS", false) { + status = sdk.StatusCrit + } + return []sdk.CheckState{{ + Status: status, + Code: "delegation_ds_missing", + Message: "service declares DS records but parent serves none", + }} +} + +type dsRRSIGValidityRule struct{} + +func (r *dsRRSIGValidityRule) Name() string { return "delegation_ds_rrsig_validity" } +func (r *dsRRSIGValidityRule) Description() string { + return "Verifies that every RRSIG covering the DS RRset is inside its validity window" +} +func (r *dsRRSIGValidityRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ds_rrsig_invalid") + if errState != nil { + return errState + } + now := time.Now() + var out []sdk.CheckState + for _, v := range data.ParentViews { + if v.DSQueryError != "" || len(v.DSRRSIGs) == 0 { + continue + } + worst := sdk.StatusOK + var reason string + for _, sig := range v.DSRRSIGs { + probe := &dns.RRSIG{Inception: sig.Inception, Expiration: sig.Expiration} + if !probe.ValidityPeriod(now) { + worst = sdk.StatusCrit + reason = rrsigReason(sig, now) + break + } + } + st := sdk.CheckState{Code: "delegation_ds_rrsig_invalid", Subject: v.Server, Status: worst} + if worst == sdk.StatusOK { + st.Message = "DS RRSIG within validity window" + } else { + st.Message = fmt.Sprintf("DS RRSIG: %s", reason) + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_ds_rrsig_invalid", + Message: "no DS RRSIG to evaluate", + }} + } + return out +} + +func rrsigReason(sig DSRRSIGObservation, now time.Time) string { + utc := now.UTC().Unix() + modi := (int64(sig.Inception) - utc) / year68 + ti := int64(sig.Inception) + modi*year68 + mode := (int64(sig.Expiration) - utc) / year68 + te := int64(sig.Expiration) + mode*year68 + if ti > utc { + return "signature not yet valid" + } else if utc > te { + return "signature expired" + } + return "signature outside its validity window" +} + +// ───────────────────────── child-side rules ───────────────────────── + +type nsResolvableRule struct{} + +func (r *nsResolvableRule) Name() string { return "delegation_ns_resolvable" } +func (r *nsResolvableRule) Description() string { + return "Verifies that every out-of-bailiwick NS hostname resolves to at least one address" +} +func (r *nsResolvableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ns_unresolvable") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, c := range data.Children { + if isInBailiwick(c.NSName, data.DelegatedFQDN) { + continue + } + st := sdk.CheckState{Code: "delegation_ns_unresolvable", Subject: c.NSName} + if c.ResolveError != "" { + st.Status = sdk.StatusCrit + st.Message = fmt.Sprintf("cannot resolve NS: %s", c.ResolveError) + } else { + st.Status = sdk.StatusOK + st.Message = fmt.Sprintf("%d address(es)", len(c.Addresses)) + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_ns_unresolvable", + Message: "no out-of-bailiwick NS to resolve", + }} + } + return out +} + +type childReachableRule struct{} + +func (r *childReachableRule) Name() string { return "delegation_child_reachable" } +func (r *childReachableRule) Description() string { + return "Verifies that every delegated name server address answers over UDP" +} +func (r *childReachableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_unreachable") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, c := range data.Children { + for _, a := range c.Addresses { + subject := fmt.Sprintf("%s (%s)", c.NSName, a.Address) + st := sdk.CheckState{Code: "delegation_unreachable", Subject: subject} + if a.UDPError != "" { + st.Status = sdk.StatusCrit + st.Message = fmt.Sprintf("UDP SOA query failed: %s", a.UDPError) + } else { + st.Status = sdk.StatusOK + st.Message = "UDP SOA query succeeded" + } + out = append(out, st) + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_unreachable", + Message: "no delegated server address to probe", + }} + } + return out +} + +type childAuthoritativeRule struct{} + +func (r *childAuthoritativeRule) Name() string { return "delegation_child_authoritative" } +func (r *childAuthoritativeRule) Description() string { + return "Verifies that every reachable delegated server answers authoritatively (AA bit) for the zone" +} +func (r *childAuthoritativeRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_lame") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, c := range data.Children { + for _, a := range c.Addresses { + if a.UDPError != "" { + continue + } + subject := fmt.Sprintf("%s (%s)", c.NSName, a.Address) + st := sdk.CheckState{Code: "delegation_lame", Subject: subject} + if !a.Authoritative { + st.Status = sdk.StatusCrit + st.Message = "server is not authoritative for the zone" + } else { + st.Status = sdk.StatusOK + st.Message = "authoritative answer" + } + out = append(out, st) + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_lame", + Message: "no reachable delegated server to probe", + }} + } + return out +} + +type childSOASerialDriftRule struct{} + +func (r *childSOASerialDriftRule) Name() string { return "delegation_child_soa_serial_drift" } +func (r *childSOASerialDriftRule) Description() string { + return "Verifies that all reachable addresses of a name server agree on the SOA serial" +} +func (r *childSOASerialDriftRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_soa_serial_drift") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, c := range data.Children { + seen := map[uint32]bool{} + for _, a := range c.Addresses { + if a.SOASerialKnown { + seen[a.SOASerial] = true + } + } + if len(seen) == 0 { + continue + } + st := sdk.CheckState{Code: "delegation_soa_serial_drift", Subject: c.NSName} + if len(seen) > 1 { + serials := make([]string, 0, len(seen)) + for s := range seen { + serials = append(serials, fmt.Sprintf("%d", s)) + } + st.Status = sdk.StatusWarn + st.Message = fmt.Sprintf("SOA serial drift across addresses: %s", strings.Join(serials, ", ")) + } else { + st.Status = sdk.StatusOK + st.Message = "all addresses agree on SOA serial" + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_soa_serial_drift", + Message: "no SOA serial observed", + }} + } + return out +} + +type childTCPRule struct{} + +func (r *childTCPRule) Name() string { return "delegation_child_tcp" } +func (r *childTCPRule) Description() string { + return "Verifies that every reachable delegated server also answers over TCP" +} +func (r *childTCPRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_tcp_failed") + if errState != nil { + return errState + } + requireTCP := sdk.GetBoolOption(opts, "requireTCP", true) + failStatus := sdk.StatusCrit + if !requireTCP { + failStatus = sdk.StatusWarn + } + var out []sdk.CheckState + for _, c := range data.Children { + for _, a := range c.Addresses { + if a.UDPError != "" { + continue + } + subject := fmt.Sprintf("%s (%s)", c.NSName, a.Address) + st := sdk.CheckState{Code: "delegation_tcp_failed", Subject: subject} + if a.TCPError != "" { + st.Status = failStatus + st.Message = fmt.Sprintf("TCP SOA query failed: %s", a.TCPError) + } else { + st.Status = sdk.StatusOK + st.Message = "TCP reachable" + } + out = append(out, st) + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_tcp_failed", + Message: "no reachable delegated server to probe", + }} + } + return out +} + +type childNSMatchesParentRule struct{} + +func (r *childNSMatchesParentRule) Name() string { return "delegation_child_ns_matches_parent" } +func (r *childNSMatchesParentRule) Description() string { + return "Verifies that the NS RRset served by each delegated server agrees with the parent's view" +} +func (r *childNSMatchesParentRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_ns_drift") + if errState != nil { + return errState + } + primary := primaryParentView(data.ParentViews) + if primary == nil { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_ns_drift", + Message: "no parent NS RRset to compare against", + }} + } + var out []sdk.CheckState + for _, c := range data.Children { + for _, a := range c.Addresses { + if a.UDPError != "" || a.ChildNSError != "" { + continue + } + subject := fmt.Sprintf("%s (%s)", c.NSName, a.Address) + missing, extra := diffStringSets(primary.NS, a.ChildNS) + st := sdk.CheckState{Code: "delegation_ns_drift", Subject: subject} + if len(missing) > 0 || len(extra) > 0 { + st.Status = sdk.StatusWarn + st.Message = fmt.Sprintf("child NS RRset differs from parent: missing=%v extra=%v", missing, extra) + st.Meta = map[string]any{"missing": missing, "extra": extra} + } else { + st.Status = sdk.StatusOK + st.Message = "child NS RRset matches parent" + } + out = append(out, st) + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_ns_drift", + Message: "no child NS RRset observed", + }} + } + return out +} + +type childGlueMatchesParentRule struct{} + +func (r *childGlueMatchesParentRule) Name() string { return "delegation_child_glue_matches_parent" } +func (r *childGlueMatchesParentRule) Description() string { + return "Verifies that the addresses served by the child for in-bailiwick NS names match the parent glue" +} +func (r *childGlueMatchesParentRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_glue_mismatch") + if errState != nil { + return errState + } + primary := primaryParentView(data.ParentViews) + if primary == nil { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_glue_mismatch", + Message: "no parent glue to compare against", + }} + } + allow := sdk.GetBoolOption(opts, "allowGlueMismatch", false) + failStatus := sdk.StatusCrit + if allow { + failStatus = sdk.StatusWarn + } + var out []sdk.CheckState + for _, c := range data.Children { + if !isInBailiwick(c.NSName, data.DelegatedFQDN) { + continue + } + for _, a := range c.Addresses { + if a.UDPError != "" { + continue + } + subject := fmt.Sprintf("%s (%s)", c.NSName, a.Address) + missing, _ := diffStringSets(primary.Glue[c.NSName], a.ChildGlueAddrs) + st := sdk.CheckState{Code: "delegation_glue_mismatch", Subject: subject} + if len(missing) > 0 { + st.Status = failStatus + st.Message = fmt.Sprintf("child addresses for %s differ from parent glue: missing=%v", c.NSName, missing) + st.Meta = map[string]any{"missing": missing} + } else { + st.Status = sdk.StatusOK + st.Message = "child glue matches parent" + } + out = append(out, st) + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, + Code: "delegation_glue_mismatch", + Message: "no in-bailiwick NS to compare", + }} + } + return out +} + +// ───────────────────────── DNSSEC rules ───────────────────────── + +func parentHasAnyDS(views []ParentView) bool { + for _, v := range views { + if len(v.DS) > 0 { + return true + } + } + return false +} + +type dnskeyQueryRule struct{} + +func (r *dnskeyQueryRule) Name() string { return "delegation_dnskey_query" } +func (r *dnskeyQueryRule) Description() string { + return "Verifies that the delegated servers answer DNSKEY queries when the parent publishes DS records" +} +func (r *dnskeyQueryRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_dnskey_query_failed") + if errState != nil { + return errState + } + if !parentHasAnyDS(data.ParentViews) { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_dnskey_query_failed", + Message: "parent has no DS records, DNSKEY probe skipped", + }} + } + var out []sdk.CheckState + for _, c := range data.Children { + for _, a := range c.Addresses { + if a.UDPError != "" { + continue + } + subject := fmt.Sprintf("%s (%s)", c.NSName, a.Address) + st := sdk.CheckState{Code: "delegation_dnskey_query_failed", Subject: subject} + if a.DNSKEYError != "" { + st.Status = sdk.StatusWarn + st.Message = fmt.Sprintf("DNSKEY query failed: %s", a.DNSKEYError) + } else { + st.Status = sdk.StatusOK + st.Message = fmt.Sprintf("%d DNSKEY record(s) returned", len(a.DNSKEYs)) + } + out = append(out, st) + } + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_dnskey_query_failed", + Message: "no reachable child server to probe", + }} + } + return out +} + +type dnskeyMatchesDSRule struct{} + +func (r *dnskeyMatchesDSRule) Name() string { return "delegation_dnskey_matches_ds" } +func (r *dnskeyMatchesDSRule) Description() string { + return "Verifies that at least one DNSKEY served by the child hashes to one of the DS records at the parent" +} +func (r *dnskeyMatchesDSRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_dnskey_no_match") + if errState != nil { + return errState + } + if !parentHasAnyDS(data.ParentViews) { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_dnskey_no_match", + Message: "parent has no DS records, DNSKEY/DS match skipped", + }} + } + // Collect the DS records from whichever parent view has them. + var parentDS []*dns.DS + for _, v := range data.ParentViews { + if len(v.DS) > 0 { + parentDS = dsRecordsToMiekg(v.DS) + break + } + } + var out []sdk.CheckState + for _, c := range data.Children { + var keys []*dns.DNSKEY + probed := false + for _, a := range c.Addresses { + if len(a.DNSKEYs) > 0 { + probed = true + keys = append(keys, dnskeysToMiekg(a.DNSKEYs)...) + } + } + if !probed { + continue + } + st := sdk.CheckState{Code: "delegation_dnskey_no_match", Subject: c.NSName} + if dsMatchesAnyKey(parentDS, keys) { + st.Status = sdk.StatusOK + st.Message = "at least one DNSKEY matches a parent DS record" + } else { + st.Status = sdk.StatusCrit + st.Message = "no DNSKEY served by this NS matches any parent DS record" + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_dnskey_no_match", + Message: "no DNSKEY observed at any child server", + }} + } + return out +} + +type nsHasAuthoritativeAnswerRule struct{} + +func (r *nsHasAuthoritativeAnswerRule) Name() string { + return "delegation_ns_has_authoritative_answer" +} +func (r *nsHasAuthoritativeAnswerRule) Description() string { + return "Verifies that every delegated NS produced at least one authoritative answer across all its addresses" +} +func (r *nsHasAuthoritativeAnswerRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + data, errState := loadData(ctx, obs, "delegation_no_authoritative_answer") + if errState != nil { + return errState + } + var out []sdk.CheckState + for _, c := range data.Children { + if len(c.Addresses) == 0 { + continue + } + sawAA := false + for _, a := range c.Addresses { + if a.UDPError == "" && a.Authoritative { + sawAA = true + break + } + } + st := sdk.CheckState{Code: "delegation_no_authoritative_answer", Subject: c.NSName} + if sawAA { + st.Status = sdk.StatusOK + st.Message = "at least one address answered authoritatively" + } else { + st.Status = sdk.StatusCrit + st.Message = "no address of this NS answered authoritatively" + } + out = append(out, st) + } + if len(out) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "delegation_no_authoritative_answer", + Message: "no delegated NS to probe", + }} + } + return out +} diff --git a/checker/types.go b/checker/types.go new file mode 100644 index 0000000..b091445 --- /dev/null +++ b/checker/types.go @@ -0,0 +1,176 @@ +package checker + +import ( + "encoding/json" + + "github.com/miekg/dns" +) + +// ObservationKeyDelegation is the observation key for delegation data. +const ObservationKeyDelegation = "delegation" + +// DelegationData is the raw, judgment-free observation produced by Collect. +// Every field records what was observed (or the error string returned by +// the probe) but never classifies severity. Rules consume this data and +// emit CheckStates. +type DelegationData struct { + // DelegatedFQDN is the FQDN of the delegated zone (subdomain + parent). + DelegatedFQDN string `json:"delegated_fqdn"` + + // ParentZone is the FQDN of the parent zone that delegates DelegatedFQDN. + ParentZone string `json:"parent_zone"` + + // DeclaredNS lists the NS hostnames declared by the service definition, + // lowercased and FQDN-normalized. + DeclaredNS []string `json:"declared_ns,omitempty"` + + // DeclaredDS lists the DS records declared by the service definition. + DeclaredDS []DSRecord `json:"declared_ds,omitempty"` + + // ParentDiscoveryError captures why the parent zone's authoritative + // servers could not be resolved, when that step failed outright. + ParentDiscoveryError string `json:"parent_discovery_error,omitempty"` + + // ParentNS lists the parent zone's authoritative server addresses that + // were queried (host:port entries). + ParentNS []string `json:"parent_ns,omitempty"` + + // ParentViews holds one entry per queried parent server describing + // what it returned for the delegation of DelegatedFQDN. + ParentViews []ParentView `json:"parent_views,omitempty"` + + // Children holds one entry per NS name learned from the first + // successful parent view, with the per-address probes performed + // against that NS. + Children []ChildNSView `json:"children,omitempty"` +} + +// ParentView captures everything one specific parent server returned +// while being probed for the delegation. +type ParentView struct { + Server string `json:"server"` + UDPNSError string `json:"udp_ns_error,omitempty"` + TCPNSError string `json:"tcp_ns_error,omitempty"` + NS []string `json:"ns,omitempty"` + Glue map[string][]string `json:"glue,omitempty"` + DSQueryError string `json:"ds_query_error,omitempty"` + DS []DSRecord `json:"ds,omitempty"` + DSRRSIGs []DSRRSIGObservation `json:"ds_rrsigs,omitempty"` +} + +// ChildNSView holds the observations for a single delegated NS hostname, +// possibly probed across several addresses. +type ChildNSView struct { + NSName string `json:"ns_name"` + ResolveError string `json:"resolve_error,omitempty"` + Addresses []ChildAddressView `json:"addresses,omitempty"` +} + +// ChildAddressView captures the probes performed against a single +// (NS, IP address) pair. +type ChildAddressView struct { + Address string `json:"address"` + Server string `json:"server"` + UDPError string `json:"udp_error,omitempty"` + Authoritative bool `json:"authoritative"` + SOASerial uint32 `json:"soa_serial,omitempty"` + SOASerialKnown bool `json:"soa_serial_known,omitempty"` + TCPError string `json:"tcp_error,omitempty"` + ChildNS []string `json:"child_ns,omitempty"` + ChildNSError string `json:"child_ns_error,omitempty"` + ChildGlueAddrs []string `json:"child_glue_addrs,omitempty"` + DNSKEYError string `json:"dnskey_error,omitempty"` + DNSKEYs []DNSKEYRecord `json:"dnskeys,omitempty"` +} + +// DSRecord mirrors a DS RR in a form that is both human-readable (Text) +// and directly comparable (the structured fields). +type DSRecord struct { + Text string `json:"text"` + KeyTag uint16 `json:"keytag"` + Algorithm uint8 `json:"algorithm"` + DigestType uint8 `json:"digest_type"` + Digest string `json:"digest"` +} + +// ToMiekg rebuilds a *dns.DS from the stored fields. +func (d DSRecord) ToMiekg() *dns.DS { + return &dns.DS{ + KeyTag: d.KeyTag, + Algorithm: d.Algorithm, + DigestType: d.DigestType, + Digest: d.Digest, + } +} + +// NewDSRecord converts a miekg *dns.DS into the wire-friendly DSRecord. +func NewDSRecord(d *dns.DS) DSRecord { + return DSRecord{ + Text: d.String(), + KeyTag: d.KeyTag, + Algorithm: d.Algorithm, + DigestType: d.DigestType, + Digest: d.Digest, + } +} + +// DNSKEYRecord preserves the fields needed to recompute DS digests. +type DNSKEYRecord struct { + Name string `json:"name"` + Flags uint16 `json:"flags"` + Protocol uint8 `json:"protocol"` + Algorithm uint8 `json:"algorithm"` + PublicKey string `json:"public_key"` +} + +// ToMiekg rebuilds a *dns.DNSKEY from the stored fields so the rule can +// call k.ToDS(digestType). +func (k DNSKEYRecord) ToMiekg() *dns.DNSKEY { + name := k.Name + if name == "" { + name = "." + } + return &dns.DNSKEY{ + Hdr: dns.RR_Header{Name: dns.Fqdn(name), Rrtype: dns.TypeDNSKEY, Class: dns.ClassINET}, + Flags: k.Flags, + Protocol: k.Protocol, + Algorithm: k.Algorithm, + PublicKey: k.PublicKey, + } +} + +// NewDNSKEYRecord converts a miekg *dns.DNSKEY into the wire-friendly form. +func NewDNSKEYRecord(k *dns.DNSKEY) DNSKEYRecord { + return DNSKEYRecord{ + Name: k.Hdr.Name, + Flags: k.Flags, + Protocol: k.Protocol, + Algorithm: k.Algorithm, + PublicKey: k.PublicKey, + } +} + +// DSRRSIGObservation records an RRSIG covering a DS RRset; rules decide +// whether the current clock falls inside the validity window. +type DSRRSIGObservation struct { + Inception uint32 `json:"inception"` + Expiration uint32 `json:"expiration"` +} + +// delegationService is the minimal local mirror of happyDomain's +// `services/abstract.Delegation` type. It is duplicated on purpose so that +// this checker does not have to import the (heavy) happyDomain server module +// just to decode the service payload. +type delegationService struct { + NameServers []*dns.NS `json:"ns"` + DS []*dns.DS `json:"ds"` +} + +// serviceMessage is the minimal local mirror of happyDomain's ServiceMessage +// envelope. We only need the embedded service JSON; the rest of the meta +// fields are ignored. +type serviceMessage struct { + Type string `json:"_svctype"` + Domain string `json:"_domain"` + Service json.RawMessage `json:"Service"` +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..a589c05 --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module git.happydns.org/checker-delegation + +go 1.25.0 + +require ( + git.happydns.org/checker-sdk-go v1.4.0 + github.com/miekg/dns v1.1.72 +) + +require ( + golang.org/x/mod v0.31.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/tools v0.40.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..fc3939b --- /dev/null +++ b/go.sum @@ -0,0 +1,16 @@ +git.happydns.org/checker-sdk-go v1.4.0 h1:sO8EnF3suhNgYLRsbmCZWJOymH/oNMrOUqj3FEzJArs= +git.happydns.org/checker-sdk-go v1.4.0/go.mod h1:aNAcfYFfbhvH9kJhE0Njp5GX0dQbxdRB0rJ0KvSC5nI= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= +github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= diff --git a/main.go b/main.go new file mode 100644 index 0000000..465e3cf --- /dev/null +++ b/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "flag" + "log" + + delegation "git.happydns.org/checker-delegation/checker" + "git.happydns.org/checker-sdk-go/checker/server" +) + +var listenAddr = flag.String("listen", ":8080", "HTTP listen address") + +// Version is the standalone binary's version. It defaults to "custom-build" +// and is meant to be overridden by the CI at link time: +// +// go build -ldflags "-X main.Version=1.2.3" . +var Version = "custom-build" + +func main() { + flag.Parse() + + delegation.Version = Version + + srv := server.New(delegation.Provider()) + if err := srv.ListenAndServe(*listenAddr); err != nil { + log.Fatalf("server error: %v", err) + } +} diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000..7d2e37d --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,22 @@ +// Command plugin is the happyDomain plugin entrypoint for the delegation +// checker. It is built as a Go plugin (`go build -buildmode=plugin`) and +// loaded at runtime by happyDomain. +package main + +import ( + delegation "git.happydns.org/checker-delegation/checker" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Version is the plugin's version. It defaults to "custom-build" and is +// meant to be overridden by the CI at link time: +// +// go build -buildmode=plugin -ldflags "-X main.Version=1.2.3" -o checker-delegation.so ./plugin +var Version = "custom-build" + +// NewCheckerPlugin is the symbol resolved by happyDomain when loading the +// .so file. +func NewCheckerPlugin() (*sdk.CheckerDefinition, sdk.ObservationProvider, error) { + delegation.Version = Version + return delegation.Definition(), delegation.Provider(), nil +}