From 30caf67389924bd9b9f0fdfdb12f41610fc9bb7a Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Tue, 28 Apr 2026 10:38:01 +0700 Subject: [PATCH] Initial commit --- .gitignore | 2 + Dockerfile | 17 ++ LICENSE | 21 ++ Makefile | 28 +++ NOTICE | 26 +++ README.md | 65 ++++++ checker/collect.go | 397 +++++++++++++++++++++++++++++++++ checker/definition.go | 76 +++++++ checker/provider.go | 16 ++ checker/report.go | 273 +++++++++++++++++++++++ checker/rule.go | 366 ++++++++++++++++++++++++++++++ checker/rules_test.go | 502 ++++++++++++++++++++++++++++++++++++++++++ checker/types.go | 147 +++++++++++++ contract/entry.go | 86 ++++++++ go.mod | 16 ++ go.sum | 16 ++ main.go | 29 +++ plugin/plugin.go | 15 ++ 18 files changed, 2098 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 NOTICE create mode 100644 README.md create mode 100644 checker/collect.go create mode 100644 checker/definition.go create mode 100644 checker/provider.go create mode 100644 checker/report.go create mode 100644 checker/rule.go create mode 100644 checker/rules_test.go create mode 100644 checker/types.go create mode 100644 contract/entry.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 plugin/plugin.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6ef6a07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +checker-dangling +checker-dangling.so diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c64dd34 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM golang:1.25-alpine AS builder + +ARG CHECKER_VERSION=custom-build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -ldflags "-X main.Version=${CHECKER_VERSION}" -o /checker-dangling . + +FROM scratch +COPY --from=builder /checker-dangling /checker-dangling +USER 65534:65534 +EXPOSE 8080 +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD ["/checker-dangling", "-healthcheck"] +ENTRYPOINT ["/checker-dangling"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07d44d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 The happyDomain Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..40bc7d6 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CHECKER_NAME := checker-dangling +CHECKER_IMAGE := happydomain/$(CHECKER_NAME) +CHECKER_VERSION ?= custom-build + +CHECKER_SOURCES := main.go $(wildcard checker/*.go) $(wildcard contract/*.go) + +GO_LDFLAGS := -X main.Version=$(CHECKER_VERSION) + +.PHONY: all plugin docker test clean + +all: $(CHECKER_NAME) + +$(CHECKER_NAME): $(CHECKER_SOURCES) + go build -ldflags "$(GO_LDFLAGS)" -o $@ . + +plugin: $(CHECKER_NAME).so + +$(CHECKER_NAME).so: $(CHECKER_SOURCES) $(wildcard plugin/*.go) + go build -buildmode=plugin -ldflags "$(GO_LDFLAGS)" -o $@ ./plugin/ + +docker: + docker build --build-arg CHECKER_VERSION=$(CHECKER_VERSION) -t $(CHECKER_IMAGE) . + +test: + go test ./... + +clean: + rm -f $(CHECKER_NAME) $(CHECKER_NAME).so diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..2f80849 --- /dev/null +++ b/NOTICE @@ -0,0 +1,26 @@ +checker-legacy-records +Copyright (c) 2026 The happyDomain Authors + +This product is licensed under the MIT License (see LICENSE). + +------------------------------------------------------------------------------- +Third-party notices +------------------------------------------------------------------------------- + +This product includes software developed as part of the checker-sdk-go +project (https://git.happydns.org/happyDomain/checker-sdk-go), licensed +under the Apache License, Version 2.0: + + checker-sdk-go + Copyright 2020-2026 The happyDomain Authors + + This product includes software developed as part of the happyDomain + project (https://happydomain.org). + + Portions of this code were originally written for the happyDomain + server (licensed under AGPL-3.0 and a commercial license) and are + made available there under the Apache License, Version 2.0 to enable + a permissively licensed ecosystem of checker plugins. + +You may obtain a copy of the Apache License 2.0 at: + http://www.apache.org/licenses/LICENSE-2.0 diff --git a/README.md b/README.md new file mode 100644 index 0000000..b4e7ec2 --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# checker-dangling + +A happyDomain checker that scans a working zone for **dangling subdomains**: +records (`CNAME` / `MX` / `SRV` / `NS`) whose targets resolve to NXDOMAIN, +or whose external registrable domain is expired, in `pendingDelete`, or +recently re-registered. This is the attack class popularised by Ars +Technica in 2017, where universities ended up serving porn from CNAMEs +that pointed at decommissioned third-party services after malicious +actors re-registered the lapsed targets. + +It runs in three deployment modes (standalone HTTP binary, Go plugin, +Docker image), like every other checker in the happyDomain ecosystem. + +## How it works + +The checker walks every service in the working zone (`AutoFillZone`) and +extracts pointer records from `svcs.CNAME`, `svcs.SpecialCNAME`, +`svcs.MXs`, `svcs.UnknownSRV`, and `svcs.Orphan` bodies (the latter +covering bare `NS`/`CNAME`/`MX` records when no dedicated service is +attached). For each (owner, rrtype, target) triple it: + +1. Classifies the target as in-zone or external relative to the zone's + eTLD+1 (via `golang.org/x/net/publicsuffix`). +2. Performs a single, time-bounded DNS resolution to detect immediate + breakage (`nxdomain`, `servfail`, `no_answer`, `timeout`). +3. Publishes a `DiscoveryEntry` per pointer: + - `dangling.external-target.v1` for external pointers — companion + checkers (notably the host's `domain_expiry`) subscribe to this + type and run RDAP/WHOIS on the registrable domain. + - `dangling.in-zone-target.v1` for same-registrable pointers — used + as a join key for future reachability checkers (alias / ping / + http) that may consume it. + +## Verdict matrix + +| Signal | Severity | Source | +|--------------------------------------------------------------|----------|-------------------------| +| Target NXDOMAIN | Critical | local DNS resolution | +| Target SERVFAIL | Warning | local DNS resolution | +| Target NOERROR with empty answer | Info | local DNS resolution | +| Registrable domain expired | Critical | `whois` related obs. | +| Registrable status `pendingDelete` / `redemptionPeriod` | Critical | `whois` related obs. | +| Registrable registered within the last 90 days | Warning | `whois` related obs. | + +The rule emits one `CheckState` per impacted owner and ranks them by +descending severity so the report's "Fix this first" card always +matches the rule output. + +## Companion: `domain_expiry` + +For the WHOIS-driven signals to fire, the host's existing +`domain_expiry` checker must be extended to subscribe to +`dangling.external-target.v1` entries via `AutoFillDiscoveryEntries`, +run RDAP per registrable domain, and publish a per-Ref `whois` +observation. Without that subscription the checker still works as a +DNS-only dangling detector. + +## Build + +```sh +make # standalone binary +make plugin # .so plugin for happyDomain +make docker # Docker image +make test # run the unit tests +``` diff --git a/checker/collect.go b/checker/collect.go new file mode 100644 index 0000000..517c657 --- /dev/null +++ b/checker/collect.go @@ -0,0 +1,397 @@ +package checker + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net" + "sort" + "strings" + "time" + + "github.com/miekg/dns" + "golang.org/x/net/publicsuffix" + + contract "git.happydns.org/checker-dangling/contract" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// resolverTimeout caps each individual lookup so a slow / blackholed +// authoritative server cannot stall a zone scan. Set conservatively: +// the host can re-run the check at any time, and a deadline beats a +// hang. +const resolverTimeout = 4 * time.Second + +// resolveHost is the function used to classify a target. It is a +// package-level variable so tests can stub it deterministically without +// reaching the network. +var resolveHost = defaultResolveHost + +// Collect walks the working zone, extracts every pointer record +// (CNAME / MX / SRV / NS), classifies each target as in-zone or +// external relative to the zone's registrable domain, and resolves +// each target on the live DNS to detect immediate breakage. +func (p *danglingProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + + zone, err := readZone(opts) + if err != nil { + return nil, err + } + + zoneApex := strings.TrimSuffix(zone.DomainName, ".") + if zoneApex == "" { + if name, ok := sdk.GetOption[string](opts, "domain_name"); ok { + zoneApex = strings.TrimSuffix(name, ".") + } + } + zoneRegistrable, _ := publicsuffix.EffectiveTLDPlusOne(zoneApex) + + skipResolution, _ := sdk.GetOption[bool](opts, "skip_resolution") + + data := &DanglingData{Zone: zoneApex} + + // Sort subdomains for deterministic output. + subs := make([]string, 0, len(zone.Services)) + for s := range zone.Services { + subs = append(subs, s) + } + sort.Strings(subs) + + // Track unique (owner, rrtype, target) so duplicate services do + // not produce duplicate findings. + seen := map[string]bool{} + + for _, sub := range subs { + if err := ctx.Err(); err != nil { + return nil, err + } + for _, svc := range zone.Services[sub] { + data.ServicesScanned++ + pts, perr := extractPointers(sub, zoneApex, svc) + if perr != nil { + data.CollectErrors = append(data.CollectErrors, + fmt.Sprintf("%s/%s: %v", displaySubdomain(sub), svc.Type, perr)) + continue + } + for _, pt := range pts { + key := pt.Owner + "|" + pt.Rrtype + "|" + pt.Target + if seen[key] { + continue + } + seen[key] = true + classifyExternal(&pt, zoneRegistrable) + if skipResolution { + pt.Resolution = "skipped" + } else { + pt.Resolution, pt.ResolutionDetail = resolveHost(ctx, pt.Target) + } + data.Pointers = append(data.Pointers, pt) + } + } + } + + return data, nil +} + +// DiscoverEntries publishes one DiscoveryEntry per external pointer so +// a subscriber (typically domain_expiry) can RDAP/WHOIS each target's +// registrable domain. In-zone pointers also get an entry so future +// reachability checkers can subscribe; this checker does not currently +// rely on observations attached to those entries. +func (p *danglingProvider) DiscoverEntries(data any) ([]sdk.DiscoveryEntry, error) { + d, ok := data.(*DanglingData) + if !ok || d == nil { + return nil, nil + } + out := make([]sdk.DiscoveryEntry, 0, len(d.Pointers)) + for _, pt := range d.Pointers { + if pt.External && pt.Registrable != "" { + entry, err := contract.NewExternalEntry(contract.ExternalTarget{ + Owner: pt.Owner, + Rrtype: pt.Rrtype, + Target: pt.Target, + Registrable: pt.Registrable, + }) + if err != nil { + return nil, err + } + out = append(out, entry) + continue + } + entry, err := contract.NewInZoneEntry(contract.InZoneTarget{ + Owner: pt.Owner, + Rrtype: pt.Rrtype, + Target: pt.Target, + Registrable: pt.Registrable, + }) + if err != nil { + return nil, err + } + out = append(out, entry) + } + return out, nil +} + +// readZone normalises the zone option (native struct or JSON object). +func readZone(opts sdk.CheckerOptions) (*rawZone, error) { + v, ok := opts["zone"] + if !ok || v == nil { + return nil, fmt.Errorf("missing 'zone' option (AutoFillZone): the host did not provide a working zone") + } + raw, err := json.Marshal(v) + if err != nil { + return nil, fmt.Errorf("re-marshal zone option: %w", err) + } + z := &rawZone{} + if err := json.Unmarshal(raw, z); err != nil { + return nil, fmt.Errorf("decode zone option: %w", err) + } + return z, nil +} + +// extractPointers walks one service body and returns every +// (owner, rrtype, target) triple it carries. It is best-effort: +// services that do not match any known pointer shape return (nil, nil) +// so the common case of a pure A/AAAA/TXT zone produces no noise in +// CollectErrors. +func extractPointers(sub, apex string, svc rawService) ([]Pointer, error) { + if len(svc.Service) == 0 { + return nil, nil + } + owner := ownerFQDN(svc.Domain, sub, apex) + + switch svc.Type { + case "svcs.CNAME", "svcs.SpecialCNAME": + var b cnameBody + if err := json.Unmarshal(svc.Service, &b); err != nil { + return nil, fmt.Errorf("decode cname body: %w", err) + } + target := normaliseTarget(b.Record.Target, owner, apex) + if target == "" { + return nil, nil + } + ptOwner := preferRRName(b.Record.Hdr.Name, owner) + return []Pointer{{ + Owner: ptOwner, + Subdomain: sub, + Rrtype: "CNAME", + Target: target, + ServiceType: svc.Type, + }}, nil + + case "svcs.MXs": + var b mxsBody + if err := json.Unmarshal(svc.Service, &b); err != nil { + return nil, fmt.Errorf("decode mxs body: %w", err) + } + out := make([]Pointer, 0, len(b.MXs)) + for _, r := range b.MXs { + target := normaliseTarget(r.Mx, owner, apex) + if target == "" { + continue + } + out = append(out, Pointer{ + Owner: preferRRName(r.Hdr.Name, owner), + Subdomain: sub, + Rrtype: "MX", + Target: target, + ServiceType: svc.Type, + }) + } + return out, nil + + case "svcs.UnknownSRV": + var b srvsBody + if err := json.Unmarshal(svc.Service, &b); err != nil { + return nil, fmt.Errorf("decode srv body: %w", err) + } + out := make([]Pointer, 0, len(b.Records)) + for _, r := range b.Records { + target := normaliseTarget(r.Target, owner, apex) + if target == "" { + continue + } + out = append(out, Pointer{ + Owner: preferRRName(r.Hdr.Name, owner), + Subdomain: sub, + Rrtype: "SRV", + Target: target, + ServiceType: svc.Type, + }) + } + return out, nil + + case "svcs.Orphan": + var b orphanRecord + if err := json.Unmarshal(svc.Service, &b); err != nil { + return nil, fmt.Errorf("decode orphan body: %w", err) + } + ptOwner := preferRRName(b.Record.Hdr.Name, owner) + switch b.Record.Hdr.Rrtype { + case dns.TypeNS: + target := normaliseTarget(b.Record.Ns, ptOwner, apex) + if target == "" { + return nil, nil + } + return []Pointer{{ + Owner: ptOwner, + Subdomain: sub, + Rrtype: "NS", + Target: target, + ServiceType: svc.Type, + }}, nil + case dns.TypeCNAME: + target := normaliseTarget(b.Record.Target, ptOwner, apex) + if target == "" { + return nil, nil + } + return []Pointer{{ + Owner: ptOwner, + Subdomain: sub, + Rrtype: "CNAME", + Target: target, + ServiceType: svc.Type, + }}, nil + case dns.TypeMX: + target := normaliseTarget(b.Record.Mx, ptOwner, apex) + if target == "" { + return nil, nil + } + return []Pointer{{ + Owner: ptOwner, + Subdomain: sub, + Rrtype: "MX", + Target: target, + ServiceType: svc.Type, + }}, nil + } + return nil, nil + } + + return nil, nil +} + +// classifyExternal sets pt.External and pt.Registrable based on +// publicsuffix-derived eTLD+1. When publicsuffix cannot resolve an +// eTLD+1 (e.g. internal TLD), we fall back to suffix-comparing the +// target against the zone's registrable name. This fallback is +// imprecise for sub-zones (a target under the parent registrable will +// be treated as in-zone), but it is only reached for non-PSL names. +func classifyExternal(pt *Pointer, zoneRegistrable string) { + target := strings.TrimSuffix(pt.Target, ".") + if target == "" { + return + } + reg, err := publicsuffix.EffectiveTLDPlusOne(target) + if err != nil { + // Fall back to suffix comparison when target is not a + // PSL-known name (e.g. ".internal", ".lan"). + suffix := strings.TrimSuffix(zoneRegistrable, ".") + if suffix == "" || (target != suffix && !strings.HasSuffix(target, "."+suffix)) { + pt.External = true + } + return + } + pt.Registrable = reg + if zoneRegistrable == "" || !strings.EqualFold(reg, zoneRegistrable) { + pt.External = true + } +} + +// defaultResolveHost performs a single A/AAAA lookup on target and +// classifies the outcome into one of: +// +// - "ok" – at least one A/AAAA returned +// - "no_answer" – NOERROR but the server returned no addresses +// - "nxdomain" – authoritative NXDOMAIN +// - "servfail" – upstream resolver returned SERVFAIL +// - "timeout" – the lookup did not complete in time +// - "error" – any other resolution error +func defaultResolveHost(ctx context.Context, target string) (verdict, detail string) { + target = strings.TrimSuffix(target, ".") + if target == "" { + return "skipped", "empty target" + } + cctx, cancel := context.WithTimeout(ctx, resolverTimeout) + defer cancel() + + ips, err := net.DefaultResolver.LookupHost(cctx, target) + if err == nil { + if len(ips) == 0 { + return "no_answer", "" + } + return "ok", "" + } + + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + switch { + case dnsErr.IsNotFound: + return "nxdomain", dnsErr.Err + case dnsErr.IsTimeout: + return "timeout", dnsErr.Err + case strings.Contains(strings.ToLower(dnsErr.Err), "servfail"): + return "servfail", dnsErr.Err + default: + return "error", dnsErr.Err + } + } + return "error", err.Error() +} + +// ownerFQDN returns the FQDN of the service's owner. We prefer the +// service's _domain field (already an FQDN with trailing dot in +// happyDomain's wire shape) and fall back to subdomain+apex. +func ownerFQDN(svcDomain, sub, apex string) string { + if svcDomain != "" { + return strings.TrimSuffix(svcDomain, ".") + } + if apex == "" { + return sub + } + if sub == "" || sub == "@" { + return apex + } + return sub + "." + apex +} + +// preferRRName returns the RR header Name when present (it is the +// authoritative owner for the record), otherwise the service-derived +// owner. +func preferRRName(rrName, fallback string) string { + rrName = strings.TrimSuffix(rrName, ".") + if rrName != "" { + return rrName + } + return fallback +} + +// normaliseTarget yields the FQDN form of a record target. happyDomain +// stores within-zone targets relative to the zone, and external targets +// fully-qualified. We accept both shapes. +func normaliseTarget(target, owner, apex string) string { + t := strings.TrimSpace(target) + if t == "" { + return "" + } + if trimmed, ok := strings.CutSuffix(t, "."); ok { + return trimmed + } + // Relative: anchor under the zone apex (or the owner when apex is + // empty, which only happens in tests that omit the domain name). + if apex != "" { + return t + "." + apex + } + return t + "." + owner +} + +func displaySubdomain(s string) string { + if s == "" || s == "@" { + return "@" + } + return s +} diff --git a/checker/definition.go b/checker/definition.go new file mode 100644 index 0000000..0d3cfa0 --- /dev/null +++ b/checker/definition.go @@ -0,0 +1,76 @@ +package checker + +import ( + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Version is overridden at build time via -ldflags by main.go and +// plugin/plugin.go. Use SetVersion from entrypoints rather than +// assigning to it directly. +var Version = "built-in" + +// SetVersion updates the package-level Version reported in the +// CheckerDefinition. Empty values are ignored so an entrypoint that +// forgets its own ldflags does not erase the default. +func SetVersion(v string) { + if v != "" { + Version = v + } +} + +// Definition exposes the checker to the happyDomain host. +// +// The checker is zone-scoped: it inspects every pointer service in a +// single pass so the report consolidates findings by owner instead of +// fanning one observation out per service. +func Definition() *sdk.CheckerDefinition { + def := &sdk.CheckerDefinition{ + ID: "dangling", + Name: "Dangling subdomains", + Version: Version, + Availability: sdk.CheckerAvailability{ + ApplyToZone: true, + }, + ObservationKeys: []sdk.ObservationKey{ObservationKeyDangling}, + Options: sdk.CheckerOptionsDocumentation{ + DomainOpts: []sdk.CheckerOptionDocumentation{ + { + Id: "domain_name", + Type: "string", + Label: "Domain name", + AutoFill: sdk.AutoFillDomainName, + Hide: true, + }, + { + Id: "zone", + Type: "string", + Label: "Zone", + AutoFill: sdk.AutoFillZone, + Hide: true, + }, + }, + RunOpts: []sdk.CheckerOptionDocumentation{ + { + Id: "skip_resolution", + Type: "bool", + Label: "Skip live DNS resolution", + Description: "When set, the checker only reports the static structure of pointer records. Useful for offline analysis; defaults to false.", + Default: false, + }, + }, + }, + Rules: []sdk.CheckRule{ + &danglingRule{}, + }, + HasHTMLReport: true, + Interval: &sdk.CheckIntervalSpec{ + Min: 15 * time.Minute, + Max: 7 * 24 * time.Hour, + Default: 12 * time.Hour, + }, + } + def.BuildRulesInfo() + return def +} diff --git a/checker/provider.go b/checker/provider.go new file mode 100644 index 0000000..ee90bd2 --- /dev/null +++ b/checker/provider.go @@ -0,0 +1,16 @@ +package checker + +import ( + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Provider returns the dangling-records observation provider. +func Provider() sdk.ObservationProvider { + return &danglingProvider{} +} + +type danglingProvider struct{} + +func (p *danglingProvider) Key() sdk.ObservationKey { return ObservationKeyDangling } + +func (p *danglingProvider) Definition() *sdk.CheckerDefinition { return Definition() } diff --git a/checker/report.go b/checker/report.go new file mode 100644 index 0000000..dc11523 --- /dev/null +++ b/checker/report.go @@ -0,0 +1,273 @@ +package checker + +import ( + "bytes" + "encoding/json" + "fmt" + "html/template" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// GetHTMLReport renders the dangling-records observation as a +// self-contained HTML page. The report shows one card per impacted +// owner, sorted by descending severity, with the failing pointer and +// the human-readable reason behind each trigger. +func (p *danglingProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) { + var data DanglingData + if raw := ctx.Data(); len(raw) > 0 { + if err := json.Unmarshal(raw, &data); err != nil { + return "", fmt.Errorf("parse dangling-records data: %w", err) + } + } + + view := buildReportView(&data, ctx.States()) + + buf := &bytes.Buffer{} + if err := reportTmpl.Execute(buf, view); err != nil { + return "", err + } + return buf.String(), nil +} + +type reportView struct { + Zone string + ServicesScanned int + Pointers int + OverallText string + OverallClass string + Top *ownerCard + Others []ownerCard + CollectErrors []string +} + +type ownerCard struct { + Owner string + Severity string + SeverityCSS string + Triggers []SignalTrigger +} + +func buildReportView(data *DanglingData, states []sdk.CheckState) *reportView { + v := &reportView{ + Zone: data.Zone, + ServicesScanned: data.ServicesScanned, + Pointers: len(data.Pointers), + CollectErrors: data.CollectErrors, + } + + cards := cardsFromStates(states) + if len(cards) == 0 { + // Honour an Error state from the rule so the banner does not + // masquerade as OK when the observation could not be loaded. + if errState, ok := firstErrorState(states); ok { + v.OverallText = errState.Message + v.OverallClass = "status-crit" + return v + } + v.OverallText = fmt.Sprintf("No dangling subdomain detected across %d service(s).", data.ServicesScanned) + v.OverallClass = "status-ok" + return v + } + + v.Top = &cards[0] + v.Others = cards[1:] + v.OverallText, v.OverallClass = overallLabel(cards[0].SeverityCSS) + return v +} + +// cardsFromStates rebuilds the per-owner cards from the CheckState +// slice the host has already produced. We rely on Meta.triggers (set by +// danglingRule.Evaluate) so the report and the rule never disagree on +// what to show. +func cardsFromStates(states []sdk.CheckState) []ownerCard { + out := make([]ownerCard, 0, len(states)) + for _, st := range states { + if st.Code == "dangling_clean" || st.Code == "dangling_observation_error" { + continue + } + card := ownerCard{ + Owner: st.Subject, + } + if sev, ok := st.Meta["severity"].(string); ok { + card.Severity = severityLabel(sev) + card.SeverityCSS = sev + } + // Triggers may have been round-tripped through JSON if the host + // crossed an HTTP boundary; handle both shapes. + switch v := st.Meta["triggers"].(type) { + case []SignalTrigger: + card.Triggers = v + case []any: + skipped := 0 + for _, item := range v { + b, err := json.Marshal(item) + if err != nil { + skipped++ + continue + } + var t SignalTrigger + if err := json.Unmarshal(b, &t); err != nil { + skipped++ + continue + } + card.Triggers = append(card.Triggers, t) + } + if skipped > 0 { + card.Triggers = append(card.Triggers, SignalTrigger{ + Reason: fmt.Sprintf("%d trigger(s) could not be rendered.", skipped), + }) + } + } + out = append(out, card) + } + return out +} + +func firstErrorState(states []sdk.CheckState) (sdk.CheckState, bool) { + for i := range states { + if states[i].Status == sdk.StatusError { + return states[i], true + } + } + return sdk.CheckState{}, false +} + +func severityLabel(css string) string { + switch css { + case "critical": + return "Critical" + case "warning": + return "Warning" + case "info": + return "Informational" + default: + return "" + } +} + +func overallLabel(severityCSS string) (text, css string) { + switch severityCSS { + case "critical": + return "Dangling subdomains require urgent attention", "status-crit" + case "warning": + return "Dangling subdomains should be reviewed", "status-warn" + case "info": + return "Informational pointer issues found", "status-info" + default: + return "Dangling subdomains detected", "status-warn" + } +} + +var reportTmpl = template.Must(template.New("dangling-records-report").Parse(reportTemplate)) + +const reportTemplate = ` + + + +Dangling subdomains — {{if .Zone}}{{.Zone}}{{else}}zone report{{end}} + + + +

Dangling subdomains

+
{{if .Zone}}Zone: {{.Zone}} · {{end}}{{.ServicesScanned}} service(s) scanned · {{.Pointers}} pointer(s) inspected
+ +
+
+
{{.OverallText}}
+
+
+ + {{if .Top}} +

Fix this first

+
+

+ {{.Top.Owner}} + {{.Top.Severity}} +

+ {{if .Top.Triggers}} + + + + {{range .Top.Triggers}} + + + + + + {{end}} + +
PointerTargetWhy
{{.Rrtype}}{{.Target}}{{.Reason}}{{if .Detail}} ({{.Detail}}){{end}}
+ {{end}} +
+ {{end}} + + {{if .Others}} +

Other dangling subdomains

+ {{range .Others}} +
+

+ {{.Owner}} + {{.Severity}} +

+ {{if .Triggers}} + + {{end}} +
+ {{end}} + {{end}} + + {{if .CollectErrors}} +
+ {{len .CollectErrors}} service(s) skipped during scan + +
+ {{end}} + + +` diff --git a/checker/rule.go b/checker/rule.go new file mode 100644 index 0000000..b811a05 --- /dev/null +++ b/checker/rule.go @@ -0,0 +1,366 @@ +package checker + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "strings" + "time" + + contract "git.happydns.org/checker-dangling/contract" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// recentRegistrationDays defines how recently a registrable domain +// must have been (re-)registered for the rule to flag it as a likely +// takeover candidate. The Ars Technica scenario hinges on attackers +// re-registering a freshly-released domain; surfacing recently-changed +// registrations is what turns a passing NXDOMAIN-free lookup into an +// audit signal. +const recentRegistrationDays = 90 + +// danglingRule is the single rule for v1: it walks the observation's +// pointer list, joins it with the related "whois" observations +// produced by domain_expiry on the entries we published, and emits one +// CheckState per impacted owner. +type danglingRule struct{} + +func (r *danglingRule) Name() string { return "dangling_records" } + +func (r *danglingRule) Description() string { + return "Detects subdomains whose CNAME / MX / SRV / NS targets resolve to NXDOMAIN, or whose external registrable domain is expired or recently re-registered. Combines local DNS resolution with WHOIS observations published by companion checkers." +} + +func (r *danglingRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + var data DanglingData + if err := obs.Get(ctx, ObservationKeyDangling, &data); err != nil { + return []sdk.CheckState{{ + Status: sdk.StatusError, + Message: fmt.Sprintf("failed to load dangling-records observation: %v", err), + RuleName: r.Name(), + Code: "dangling_observation_error", + }} + } + + whoisByRef, whoisLoadErrors := loadWHOIS(ctx, obs) + + // Group findings by owner so we report once per impacted subdomain + // even when multiple pointers under the same owner trigger a rule. + byOwner := map[string]*ownerFindings{} + for i := range data.Pointers { + pt := &data.Pointers[i] + triggers := evaluatePointer(pt, whoisByRef) + if len(triggers) == 0 { + continue + } + f, ok := byOwner[pt.Owner] + if !ok { + f = &ownerFindings{Owner: pt.Owner, Subdomain: pt.Subdomain} + byOwner[pt.Owner] = f + } + f.Triggers = append(f.Triggers, triggers...) + if sev := scoreSeverity(triggers); sev > f.WorstSeverity { + f.WorstSeverity = sev + } + } + + out := make([]sdk.CheckState, 0, len(byOwner)+1) + if whoisLoadErrors > 0 { + out = append(out, sdk.CheckState{ + Status: sdk.StatusInfo, + Message: fmt.Sprintf("%d related WHOIS observation(s) could not be parsed; takeover signals may be incomplete.", whoisLoadErrors), + RuleName: r.Name(), + Code: "dangling_whois_load_warning", + }) + } + + if len(byOwner) == 0 { + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Message: fmt.Sprintf("No dangling subdomain detected (%d service(s) scanned, %d pointer(s) inspected)", data.ServicesScanned, len(data.Pointers)), + RuleName: r.Name(), + Code: "dangling_clean", + }) + return out + } + + for _, f := range sortFindings(byOwner) { + out = append(out, sdk.CheckState{ + Status: severityToStatus(f.WorstSeverity), + Message: buildOwnerMessage(f), + RuleName: r.Name(), + Code: codeForSeverity(f.WorstSeverity), + Subject: displayOwner(f), + Meta: map[string]any{ + "owner": f.Owner, + "subdomain": f.Subdomain, + "triggers": f.Triggers, + "severity": f.WorstSeverity.String(), + }, + }) + } + return out +} + +// Severity is the rule's internal grading. Higher value = more urgent. +type Severity int + +const ( + SeverityNone Severity = iota + SeverityInfo + SeverityWarn + SeverityCrit +) + +func (s Severity) String() string { + switch s { + case SeverityCrit: + return "critical" + case SeverityWarn: + return "warning" + case SeverityInfo: + return "info" + default: + return "none" + } +} + +// SignalTrigger captures one reason the rule flagged an owner. Stored +// in the per-owner Meta so the report can render a concise list of +// "why this is dangling". +type SignalTrigger struct { + Rrtype string `json:"rrtype"` + Target string `json:"target"` + Reason string `json:"reason"` + Detail string `json:"detail,omitempty"` + Severity Severity `json:"severity"` +} + +type ownerFindings struct { + Owner string + Subdomain string + Triggers []SignalTrigger + WorstSeverity Severity +} + +// evaluatePointer applies the v1 verdict matrix to a single pointer: +// +// - Resolution == "nxdomain" → critical (broken pointer). +// - Resolution == "servfail" → warning (likely lame upstream, may +// also indicate decommissioning). +// - Resolution == "no_answer" → info (NOERROR with empty answer +// section is rarely the operator's intent for a pointer). +// - WHOIS Status contains "pendingDelete"/"redemptionPeriod" → critical. +// - WHOIS ExpiryDate already in the past → critical. +// - WHOIS shows a registration < recentRegistrationDays old → warning +// (possible re-registration; surface for review). +// +// Multiple triggers on the same pointer are reported individually so +// the report can explain "why" without ambiguity. +func evaluatePointer(pt *Pointer, whoisByRef map[string]*whoisFacts) []SignalTrigger { + var out []SignalTrigger + + switch pt.Resolution { + case "nxdomain": + out = append(out, SignalTrigger{ + Rrtype: pt.Rrtype, Target: pt.Target, + Reason: "Target does not resolve (NXDOMAIN). The record points at a host that no longer exists.", + Detail: pt.ResolutionDetail, Severity: SeverityCrit, + }) + case "servfail": + out = append(out, SignalTrigger{ + Rrtype: pt.Rrtype, Target: pt.Target, + Reason: "Target lookup returned SERVFAIL. The authoritative server may be misconfigured or the delegation broken.", + Detail: pt.ResolutionDetail, Severity: SeverityWarn, + }) + case "no_answer": + out = append(out, SignalTrigger{ + Rrtype: pt.Rrtype, Target: pt.Target, + Reason: "Target resolves to no address (NOERROR with empty answer). Rarely the operator's intent for a pointer record.", + Severity: SeverityInfo, + }) + } + + // WHOIS-driven checks only apply to external targets we successfully + // classified into a registrable domain. + if pt.External && pt.Registrable != "" { + if facts, ok := whoisByRef[contract.Ref(pt.Owner, pt.Rrtype, pt.Target)]; ok && facts != nil { + out = append(out, evaluateWHOIS(pt, facts)...) + } + } + + return out +} + +// whoisFacts is the minimal shape we need from a related "whois" +// observation: ExpiryDate to detect expiration, Status to spot +// registry-side states like pendingDelete, and CreationDate (when +// reported by the upstream RDAP probe) to flag fresh re-registrations. +type whoisFacts struct { + ExpiryDate time.Time `json:"expiryDate"` + CreationDate time.Time `json:"creationDate,omitzero"` + Status []string `json:"status,omitempty"` +} + +func evaluateWHOIS(pt *Pointer, f *whoisFacts) []SignalTrigger { + var out []SignalTrigger + now := time.Now() + + var atRiskStatuses []string + for _, s := range f.Status { + ls := strings.ToLower(s) + if strings.Contains(ls, "pendingdelete") || strings.Contains(ls, "redemptionperiod") { + atRiskStatuses = append(atRiskStatuses, s) + } + } + if len(atRiskStatuses) > 0 { + out = append(out, SignalTrigger{ + Rrtype: pt.Rrtype, Target: pt.Target, + Reason: fmt.Sprintf("Target's registrable domain (%s) is in registry state %s. It may be deleted soon and re-registered by anyone.", pt.Registrable, strings.Join(atRiskStatuses, ", ")), + Severity: SeverityCrit, + }) + } + + if !f.ExpiryDate.IsZero() && f.ExpiryDate.Before(now) { + out = append(out, SignalTrigger{ + Rrtype: pt.Rrtype, Target: pt.Target, + Reason: fmt.Sprintf("Target's registrable domain (%s) expired on %s.", pt.Registrable, f.ExpiryDate.Format("2006-01-02")), + Severity: SeverityCrit, + }) + } + + if !f.CreationDate.IsZero() { + age := now.Sub(f.CreationDate) + if age < time.Duration(recentRegistrationDays)*24*time.Hour && age > 0 { + out = append(out, SignalTrigger{ + Rrtype: pt.Rrtype, Target: pt.Target, + Reason: fmt.Sprintf("Target's registrable domain (%s) was registered %d days ago, after the original target was likely decommissioned. Verify the new owner is intentional.", pt.Registrable, int(age.Hours()/24)), + Severity: SeverityWarn, + }) + } + } + + return out +} + +// ExternalWhoisObservationKey names the observation produced by the +// companion checker that subscribes to dangling.external-target.v1 +// entries and runs RDAP/WHOIS per registrable domain. Kept in sync +// with happydomain3/checkers/external_expiry.go. +const ExternalWhoisObservationKey = "external_whois" + +// loadWHOIS resolves related observations of key external_whois into a +// per-Ref index. A non-fatal error is silently swallowed: WHOIS data +// is best-effort context and its absence must not turn the whole rule +// into an Error state. +// +// The companion checker is expected to return a map[Ref]facts under +// each related observation; we also accept a single-fact payload keyed +// directly by the entry Ref (host-side flattening case). +func loadWHOIS(ctx context.Context, obs sdk.ObservationGetter) (map[string]*whoisFacts, int) { + out := map[string]*whoisFacts{} + related, err := obs.GetRelated(ctx, ExternalWhoisObservationKey) + if err != nil { + return out, 0 + } + parseErrors := 0 + for _, ro := range related { + // Try the per-Ref map shape first (the convention the host's + // external_whois provider uses, mirrored from checker-tls). + var asMap struct { + Facts map[string]whoisFacts `json:"facts"` + } + if err := json.Unmarshal(ro.Data, &asMap); err == nil && len(asMap.Facts) > 0 { + for ref, f := range asMap.Facts { + ff := f + out[ref] = &ff + } + continue + } + // Fallback: a single-fact payload, keyed by the related Ref. + var f whoisFacts + if err := json.Unmarshal(ro.Data, &f); err != nil { + parseErrors++ + continue + } + out[ro.Ref] = &f + } + return out, parseErrors +} + +func severityToStatus(s Severity) sdk.Status { + switch s { + case SeverityCrit: + return sdk.StatusCrit + case SeverityWarn: + return sdk.StatusWarn + case SeverityInfo: + return sdk.StatusInfo + default: + return sdk.StatusOK + } +} + +func scoreSeverity(triggers []SignalTrigger) Severity { + worst := SeverityNone + for _, t := range triggers { + if t.Severity > worst { + worst = t.Severity + } + } + return worst +} + +func codeForSeverity(s Severity) string { + switch s { + case SeverityCrit: + return "dangling_critical" + case SeverityWarn: + return "dangling_warning" + case SeverityInfo: + return "dangling_info" + default: + return "dangling_clean" + } +} + +func buildOwnerMessage(f *ownerFindings) string { + first := f.Triggers[0] + if len(f.Triggers) == 1 { + return fmt.Sprintf("%s — %s", displayOwner(f), first.Reason) + } + return fmt.Sprintf("%s — %s (and %d more signal%s)", displayOwner(f), first.Reason, + len(f.Triggers)-1, plural(len(f.Triggers)-1)) +} + +func displayOwner(f *ownerFindings) string { + if f.Owner != "" { + return f.Owner + } + return displaySubdomain(f.Subdomain) +} + +func plural(n int) string { + if n == 1 { + return "" + } + return "s" +} + +// sortFindings yields a stable, severity-first ordering of the +// per-owner findings so the report's "fix this first" card always +// matches the rule output. +func sortFindings(byOwner map[string]*ownerFindings) []*ownerFindings { + out := make([]*ownerFindings, 0, len(byOwner)) + for _, f := range byOwner { + out = append(out, f) + } + sort.SliceStable(out, func(i, j int) bool { + if out[i].WorstSeverity != out[j].WorstSeverity { + return out[i].WorstSeverity > out[j].WorstSeverity + } + return out[i].Owner < out[j].Owner + }) + return out +} diff --git a/checker/rules_test.go b/checker/rules_test.go new file mode 100644 index 0000000..334eb09 --- /dev/null +++ b/checker/rules_test.go @@ -0,0 +1,502 @@ +package checker + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + "github.com/miekg/dns" + + contract "git.happydns.org/checker-dangling/contract" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// --- test helpers --------------------------------------------------------- + +// stubResolver lets a single test override the resolution outcome per +// target without touching the real network. The outer test wires it +// in/out via a t.Cleanup so the package-level variable stays clean. +func stubResolver(t *testing.T, table map[string]struct{ verdict, detail string }) { + t.Helper() + prev := resolveHost + resolveHost = func(_ context.Context, target string) (string, string) { + target = strings.TrimSuffix(target, ".") + if v, ok := table[target]; ok { + return v.verdict, v.detail + } + // Default: target resolves cleanly. Tests pin behaviour they + // care about; everything else should be a "boring OK". + return "ok", "" + } + t.Cleanup(func() { resolveHost = prev }) +} + +func cnameSvc(target string) rawService { + body, _ := json.Marshal(map[string]any{ + "cname": map[string]any{ + "Hdr": map[string]any{"Name": ""}, + "Target": target, + }, + }) + return rawService{Type: "svcs.CNAME", Domain: "", Service: body} +} + +func mxSvc(targets ...string) rawService { + mxs := make([]map[string]any, 0, len(targets)) + for _, t := range targets { + mxs = append(mxs, map[string]any{ + "Hdr": map[string]any{"Name": ""}, + "Mx": t, + "Preference": 10, + }) + } + body, _ := json.Marshal(map[string]any{"mx": mxs}) + return rawService{Type: "svcs.MXs", Domain: "", Service: body} +} + +func srvSvc(target string) rawService { + body, _ := json.Marshal(map[string]any{ + "srv": []map[string]any{{ + "Hdr": map[string]any{"Name": ""}, + "Target": target, + }}, + }) + return rawService{Type: "svcs.UnknownSRV", Domain: "", Service: body} +} + +func nsOrphan(host string) rawService { + body, _ := json.Marshal(map[string]any{ + "record": map[string]any{ + "Hdr": map[string]any{"Name": "", "Rrtype": dns.TypeNS}, + "Ns": host, + }, + }) + return rawService{Type: "svcs.Orphan", Domain: "", Service: body} +} + +// modernNonPointer mimics a service that carries no pointer (e.g. an +// abstract.Server with A/AAAA records). The collector should ignore it +// silently, contributing only to ServicesScanned. +func modernNonPointer() rawService { + body, _ := json.Marshal(map[string]any{"A": map[string]any{}}) + return rawService{Type: "abstract.Server", Domain: "", Service: body} +} + +func runCollect(t *testing.T, zone *rawZone, opts sdk.CheckerOptions) *DanglingData { + t.Helper() + if opts == nil { + opts = sdk.CheckerOptions{} + } + raw, err := json.Marshal(zone) + if err != nil { + t.Fatalf("marshal zone: %v", err) + } + var jsonZone map[string]any + if err := json.Unmarshal(raw, &jsonZone); err != nil { + t.Fatalf("unmarshal zone: %v", err) + } + opts["zone"] = jsonZone + if _, ok := opts["domain_name"]; !ok && zone.DomainName != "" { + opts["domain_name"] = zone.DomainName + } + + out, err := (&danglingProvider{}).Collect(context.Background(), opts) + if err != nil { + t.Fatalf("Collect: %v", err) + } + d, ok := out.(*DanglingData) + if !ok { + t.Fatalf("Collect returned %T, want *DanglingData", out) + } + return d +} + +func mustMarshal(t *testing.T, v any) []byte { + t.Helper() + b, err := json.Marshal(v) + if err != nil { + t.Fatalf("marshal: %v", err) + } + return b +} + +// staticObs serves a single observation by key plus a fixed map of +// related observations keyed by ObservationKey. Mirrors the helper +// used by checker-legacy-records, extended to cover GetRelated. +type staticObs struct { + key sdk.ObservationKey + payload []byte + related map[sdk.ObservationKey][]sdk.RelatedObservation +} + +func (s staticObs) Get(_ context.Context, key sdk.ObservationKey, dest any) error { + if key != s.key { + return fmt.Errorf("staticObs: unexpected observation key %q (have %q)", key, s.key) + } + return json.Unmarshal(s.payload, dest) +} + +func (s staticObs) GetRelated(_ context.Context, key sdk.ObservationKey) ([]sdk.RelatedObservation, error) { + return s.related[key], nil +} + +// --- collect tests -------------------------------------------------------- + +func TestCollect_CleanZone_NoPointers(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "": {modernNonPointer()}, + "www": {modernNonPointer()}, + }, + } + data := runCollect(t, z, nil) + if data.ServicesScanned != 2 { + t.Errorf("ServicesScanned = %d, want 2", data.ServicesScanned) + } + if len(data.Pointers) != 0 { + t.Errorf("Pointers = %+v, want empty", data.Pointers) + } +} + +func TestCollect_DetectsCNAMEMXSRV_NS(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "www": {cnameSvc("target.example.net.")}, + "": {mxSvc("mail.example.org."), nsOrphan("ns1.someprovider.net.")}, + "_sip._tcp": {srvSvc("sipserver.example.io.")}, + }, + } + data := runCollect(t, z, nil) + if got := len(data.Pointers); got != 4 { + t.Fatalf("Pointers count = %d, want 4: %+v", got, data.Pointers) + } + want := map[string]bool{"CNAME": false, "MX": false, "NS": false, "SRV": false} + for _, p := range data.Pointers { + if !p.External { + t.Errorf("expected pointer to external target to be flagged External: %+v", p) + } + if p.Registrable == "" { + t.Errorf("expected non-empty Registrable for external target: %+v", p) + } + want[p.Rrtype] = true + } + for k, ok := range want { + if !ok { + t.Errorf("missing pointer of type %s", k) + } + } +} + +func TestCollect_InZoneTargetIsNotExternal(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "www": {cnameSvc("aliased.example.com.")}, + }, + } + data := runCollect(t, z, nil) + if len(data.Pointers) != 1 { + t.Fatalf("want 1 pointer, got %d", len(data.Pointers)) + } + if data.Pointers[0].External { + t.Errorf("same-registrable target must not be External: %+v", data.Pointers[0]) + } +} + +func TestCollect_MissingZoneOptionFails(t *testing.T) { + _, err := (&danglingProvider{}).Collect(context.Background(), sdk.CheckerOptions{}) + if err == nil { + t.Fatal("expected error when 'zone' option is missing, got nil") + } +} + +// --- DiscoverEntries ------------------------------------------------------ + +func TestDiscoverEntries_PublishesExternalAndInZone(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "alias-ext": {cnameSvc("provider.example.net.")}, + "alias-in": {cnameSvc("internal.example.com.")}, + }, + } + data := runCollect(t, z, nil) + + entries, err := (&danglingProvider{}).DiscoverEntries(data) + if err != nil { + t.Fatalf("DiscoverEntries: %v", err) + } + if len(entries) != 2 { + t.Fatalf("want 2 entries, got %d: %+v", len(entries), entries) + } + var sawExternal, sawInZone bool + for _, e := range entries { + switch e.Type { + case contract.ExternalTargetType: + sawExternal = true + case contract.InZoneTargetType: + sawInZone = true + default: + t.Errorf("unexpected entry Type %q", e.Type) + } + } + if !sawExternal || !sawInZone { + t.Errorf("entry types missing: external=%v inzone=%v", sawExternal, sawInZone) + } +} + +// --- Evaluate matrix ------------------------------------------------------ + +func TestEvaluate_NXDOMAINIsCritical(t *testing.T) { + stubResolver(t, map[string]struct{ verdict, detail string }{ + "gone.example.net": {"nxdomain", "no such host"}, + }) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "old": {cnameSvc("gone.example.net.")}, + }, + } + data := runCollect(t, z, nil) + obs := staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data)} + states := (&danglingRule{}).Evaluate(context.Background(), obs, sdk.CheckerOptions{}) + + if len(states) != 1 || states[0].Status != sdk.StatusCrit { + t.Fatalf("want 1 critical state, got %+v", states) + } + if !strings.Contains(states[0].Message, "old.example.com") { + t.Errorf("message should name the impacted owner: %q", states[0].Message) + } +} + +func TestEvaluate_ServfailIsWarning(t *testing.T) { + stubResolver(t, map[string]struct{ verdict, detail string }{ + "flaky.example.net": {"servfail", "lookup servfail"}, + }) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "www": {cnameSvc("flaky.example.net.")}, + }, + } + data := runCollect(t, z, nil) + states := (&danglingRule{}).Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data)}, + sdk.CheckerOptions{}) + if len(states) != 1 || states[0].Status != sdk.StatusWarn { + t.Fatalf("want 1 warning state, got %+v", states) + } +} + +func TestEvaluate_WhoisExpiredIsCritical(t *testing.T) { + stubResolver(t, nil) // target resolves OK on DNS — only WHOIS is bad. + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "promo": {cnameSvc("brand.attackertarget.net.")}, + }, + } + data := runCollect(t, z, nil) + + expired := whoisFacts{ExpiryDate: time.Now().Add(-30 * 24 * time.Hour)} + ref := contract.Ref("promo.example.com", "CNAME", "brand.attackertarget.net") + related := map[sdk.ObservationKey][]sdk.RelatedObservation{ + ExternalWhoisObservationKey: {{ + CheckerID: "domain-expiry", + Key: ExternalWhoisObservationKey, + Data: mustMarshal(t, expired), + Ref: ref, + }}, + } + + states := (&danglingRule{}).Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data), related: related}, + sdk.CheckerOptions{}) + + if len(states) != 1 || states[0].Status != sdk.StatusCrit { + t.Fatalf("want 1 critical state, got %+v", states) + } + if !strings.Contains(states[0].Message, "expired") { + t.Errorf("message should mention expired registrable: %q", states[0].Message) + } +} + +func TestEvaluate_WhoisPendingDeleteIsCritical(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "shop": {cnameSvc("brand.dropping.net.")}, + }, + } + data := runCollect(t, z, nil) + + facts := whoisFacts{ + ExpiryDate: time.Now().Add(30 * 24 * time.Hour), + Status: []string{"clientTransferProhibited", "pendingDelete"}, + } + related := map[sdk.ObservationKey][]sdk.RelatedObservation{ + ExternalWhoisObservationKey: {{ + CheckerID: "domain-expiry", + Key: ExternalWhoisObservationKey, + Data: mustMarshal(t, facts), + Ref: contract.Ref("shop.example.com", "CNAME", "brand.dropping.net"), + }}, + } + states := (&danglingRule{}).Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data), related: related}, + sdk.CheckerOptions{}) + if len(states) != 1 || states[0].Status != sdk.StatusCrit { + t.Fatalf("want 1 critical state, got %+v", states) + } +} + +func TestEvaluate_RecentRegistrationIsWarning(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "legacy": {cnameSvc("brand.recently-grabbed.net.")}, + }, + } + data := runCollect(t, z, nil) + + facts := whoisFacts{ + ExpiryDate: time.Now().Add(365 * 24 * time.Hour), + CreationDate: time.Now().Add(-15 * 24 * time.Hour), + } + related := map[sdk.ObservationKey][]sdk.RelatedObservation{ + ExternalWhoisObservationKey: {{ + CheckerID: "domain-expiry", + Key: ExternalWhoisObservationKey, + Data: mustMarshal(t, facts), + Ref: contract.Ref("legacy.example.com", "CNAME", "brand.recently-grabbed.net"), + }}, + } + states := (&danglingRule{}).Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data), related: related}, + sdk.CheckerOptions{}) + if len(states) != 1 || states[0].Status != sdk.StatusWarn { + t.Fatalf("want 1 warning state, got %+v", states) + } +} + +func TestEvaluate_CleanZoneReturnsOK(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "www": {cnameSvc("aliased.example.com.")}, // in-zone, OK + }, + } + data := runCollect(t, z, nil) + states := (&danglingRule{}).Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data)}, + sdk.CheckerOptions{}) + if len(states) != 1 || states[0].Status != sdk.StatusOK { + t.Fatalf("want single OK state, got %+v", states) + } +} + +func TestEvaluate_RanksCriticalAboveWarning(t *testing.T) { + stubResolver(t, map[string]struct{ verdict, detail string }{ + "flaky.example.net": {"servfail", ""}, + "gone.example.net": {"nxdomain", ""}, + }) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "a": {cnameSvc("flaky.example.net.")}, + "b": {cnameSvc("gone.example.net.")}, + }, + } + data := runCollect(t, z, nil) + states := (&danglingRule{}).Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data)}, + sdk.CheckerOptions{}) + + if len(states) != 2 { + t.Fatalf("want 2 states, got %d: %+v", len(states), states) + } + if states[0].Status != sdk.StatusCrit { + t.Errorf("first state must be critical (NXDOMAIN), got %v", states[0].Status) + } + if states[1].Status != sdk.StatusWarn { + t.Errorf("second state must be warning (SERVFAIL), got %v", states[1].Status) + } +} + +// --- Report --------------------------------------------------------------- + +type staticReportCtx struct { + data []byte + states []sdk.CheckState + related map[sdk.ObservationKey][]sdk.RelatedObservation +} + +func (s staticReportCtx) Data() json.RawMessage { return s.data } +func (s staticReportCtx) Related(k sdk.ObservationKey) []sdk.RelatedObservation { + return s.related[k] +} +func (s staticReportCtx) States() []sdk.CheckState { return s.states } + +func TestReport_OKBannerWhenNoFindings(t *testing.T) { + stubResolver(t, nil) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "www": {cnameSvc("aliased.example.com.")}, + }, + } + data := runCollect(t, z, nil) + html, err := (&danglingProvider{}).GetHTMLReport(staticReportCtx{ + data: mustMarshal(t, data), + states: []sdk.CheckState{{Status: sdk.StatusOK, Code: "dangling_clean"}}, + }) + if err != nil { + t.Fatalf("GetHTMLReport: %v", err) + } + if !strings.Contains(html, "status-ok") { + t.Errorf("report missing OK banner") + } +} + +func TestReport_TopCardReflectsCriticalOwner(t *testing.T) { + stubResolver(t, map[string]struct{ verdict, detail string }{ + "gone.example.net": {"nxdomain", ""}, + }) + z := &rawZone{ + DomainName: "example.com", + Services: map[string][]rawService{ + "old": {cnameSvc("gone.example.net.")}, + }, + } + data := runCollect(t, z, nil) + rule := &danglingRule{} + states := rule.Evaluate(context.Background(), + staticObs{key: ObservationKeyDangling, payload: mustMarshal(t, data)}, + sdk.CheckerOptions{}) + + html, err := (&danglingProvider{}).GetHTMLReport(staticReportCtx{ + data: mustMarshal(t, data), + states: states, + }) + if err != nil { + t.Fatalf("GetHTMLReport: %v", err) + } + if !strings.Contains(html, "Fix this first") { + t.Errorf("report missing 'Fix this first' card") + } + if !strings.Contains(html, "old.example.com") { + t.Errorf("report does not name the impacted owner") + } +} diff --git a/checker/types.go b/checker/types.go new file mode 100644 index 0000000..2c2e1f1 --- /dev/null +++ b/checker/types.go @@ -0,0 +1,147 @@ +// Package checker implements the happyDomain "dangling records" +// checker: it walks the working zone, identifies every pointer record +// (CNAME / MX / SRV / NS) whose target lives outside the zone, performs +// a light DNS resolution to detect immediate breakage (NXDOMAIN), and +// publishes DiscoveryEntry records so a companion checker (typically +// the host's domain_expiry) can verify each external registrable domain +// via RDAP/WHOIS. The rule layer joins both signals to surface +// subdomains at risk of takeover (the "dangling CNAME" attack class +// publicised by Ars Technica in 2017). +package checker + +import ( + "encoding/json" +) + +const ObservationKeyDangling = "dangling_records" + +// DanglingData is the raw observation payload. It carries one Pointer +// entry per (owner, rrtype, target) triple found in the zone, including +// targets resolved to their DNS verdict. Aggregation by owner happens +// in the rule layer. +type DanglingData struct { + // Zone is the zone apex, without trailing dot. Empty when the host + // did not provide a domain_name option. + Zone string `json:"zone,omitempty"` + + // ServicesScanned counts every service inspected (matches the same + // field in checker-legacy-records, anchoring the report). + ServicesScanned int `json:"services_scanned"` + + // Pointers lists every pointer record encountered. One entry per + // distinct (owner, rrtype, target). External pointers carry a + // non-empty Registrable; in-zone pointers leave it empty so the + // rule does not request RDAP on the user's own apex. + Pointers []Pointer `json:"pointers,omitempty"` + + // CollectErrors records non-fatal problems encountered during the + // zone walk, surfaced in the report so silent skips do not + // masquerade as a clean pass. + CollectErrors []string `json:"collect_errors,omitempty"` +} + +// Pointer is the unit of observation: one (owner, rrtype, target) seen +// in the zone, plus the result of the local DNS resolution. +type Pointer struct { + // Owner is the FQDN that carries the pointer record (CNAME owner, + // MX/SRV owner, NS apex, …). No trailing dot. + Owner string `json:"owner"` + + // Subdomain is Owner relative to the zone apex. "" means apex + // (rendered as "@" in the report). + Subdomain string `json:"subdomain"` + + // Rrtype is the textual record type ("CNAME", "MX", "SRV", "NS"). + Rrtype string `json:"rrtype"` + + // Target is the FQDN the record points at. No trailing dot. + Target string `json:"target"` + + // External is true when Target's registrable domain differs from + // the zone's registrable domain (the takeover-risk case). + External bool `json:"external"` + + // Registrable is the eTLD+1 of Target. Empty when External is false + // or when public-suffix lookup failed. + Registrable string `json:"registrable,omitempty"` + + // ServiceType is the happyDomain service that exposed the record + // ("svcs.CNAME", "svcs.MXs", …). Useful for navigating users back + // to the right edit screen in the report. + ServiceType string `json:"service_type,omitempty"` + + // Resolution is the verdict of the local DNS lookup of Target: + // "ok", "nxdomain", "no_answer", "servfail", "timeout", "skipped". + // "skipped" is used when the collector chose not to resolve (for + // example, because lookups are disabled at runtime). + Resolution string `json:"resolution"` + + // ResolutionDetail is a free-form sentence describing the + // resolution outcome (e.g. the underlying error string). Optional. + ResolutionDetail string `json:"resolution_detail,omitempty"` +} + +// rawZone is the minimal slice of happyDomain's *Zone JSON we consume. +// Like checker-legacy-records, we redeclare just the fields we need so +// this checker compiles without depending on the happyDomain module. +type rawZone struct { + DomainName string `json:"domain_name,omitempty"` + Services map[string][]rawService `json:"services"` +} + +type rawService struct { + Type string `json:"_svctype"` + Domain string `json:"_domain"` + Service json.RawMessage `json:"Service"` +} + +// Below: minimal JSON shapes for each service body we extract pointers +// from. We only need fields that point at a host name, so the +// definitions are deliberately partial. + +type cnameBody struct { + Record struct { + Hdr struct { + Name string `json:"Name"` + } `json:"Hdr"` + Target string `json:"Target"` + } `json:"cname"` +} + +type mxRecord struct { + Hdr struct { + Name string `json:"Name"` + } `json:"Hdr"` + Mx string `json:"Mx"` +} + +type mxsBody struct { + MXs []mxRecord `json:"mx"` +} + +type srvRecord struct { + Hdr struct { + Name string `json:"Name"` + } `json:"Hdr"` + Target string `json:"Target"` +} + +type srvsBody struct { + Records []srvRecord `json:"srv"` +} + +// orphanRecord covers the body shape used by svcs.Orphan when the +// embedded RR is a CNAME, NS, MX, or SRV. We sniff Hdr.Rrtype before +// committing to a specific decoder. +type orphanRecord struct { + Record struct { + Hdr struct { + Name string `json:"Name"` + Rrtype uint16 `json:"Rrtype"` + } `json:"Hdr"` + // Optional fields, populated for the relevant rrtype. + Target string `json:"Target,omitempty"` + Mx string `json:"Mx,omitempty"` + Ns string `json:"Ns,omitempty"` + } `json:"record"` +} diff --git a/contract/entry.go b/contract/entry.go new file mode 100644 index 0000000..d94a780 --- /dev/null +++ b/contract/entry.go @@ -0,0 +1,86 @@ +// Package contract defines the DiscoveryEntry types published by +// checker-dangling and consumed by companion checkers (notably +// domain-expiry, which subscribes to ExternalTargetType to perform RDAP +// on the target's registrable domain). +// +// This package is deliberately tiny and dependency-light so that any +// consumer can import it without dragging the whole checker in. +package contract + +import ( + "encoding/json" + "fmt" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// ExternalTargetType is the DiscoveryEntry.Type for an out-of-zone +// pointer target (CNAME/MX/SRV/NS host) whose registrable domain is +// distinct from the zone apex. Consumers subscribed to this type are +// expected to look up the registrable domain (RDAP/WHOIS) and publish a +// "whois" observation per entry Ref. +const ExternalTargetType = "dangling.external-target.v1" + +// InZoneTargetType is the DiscoveryEntry.Type for a pointer target that +// resolves within the same zone or the same registrable domain. It is +// declared so future probing checkers (ping/http/alias) can subscribe to +// it for in-zone reachability checks. v1 of checker-dangling does not +// itself rely on observations attached to in-zone entries. +const InZoneTargetType = "dangling.in-zone-target.v1" + +// ExternalTarget is the payload of an ExternalTargetType entry. +// +// Owner is the FQDN whose pointer is at risk (e.g. "old-promo.example.com."). +// Pointer captures the type+target verbatim so a consumer can refer to +// the precise record when reporting findings. Registrable is the eTLD+1 +// (or PSL-derived equivalent) that an RDAP probe should query. +type ExternalTarget struct { + Owner string `json:"owner"` + Rrtype string `json:"rrtype"` // "CNAME", "MX", "SRV", "NS" + Target string `json:"target"` // FQDN, no trailing dot + Registrable string `json:"registrable"` +} + +// InZoneTarget mirrors ExternalTarget for in-zone or same-registrable +// pointers. Registrable is set when known so a subscriber can decide to +// skip records that point at the user's own domain. +type InZoneTarget struct { + Owner string `json:"owner"` + Rrtype string `json:"rrtype"` + Target string `json:"target"` + Registrable string `json:"registrable,omitempty"` +} + +// Ref builds the canonical, stable Ref for a (owner, rrtype, target) +// triple. Callers must use this on both the producer and consumer side +// so RelatedObservation.Ref correlates with the right entry. +func Ref(owner, rrtype, target string) string { + return fmt.Sprintf("%s|%s|%s", owner, rrtype, target) +} + +// NewExternalEntry builds a DiscoveryEntry of type ExternalTargetType +// with the canonical Ref. +func NewExternalEntry(t ExternalTarget) (sdk.DiscoveryEntry, error) { + payload, err := json.Marshal(t) + if err != nil { + return sdk.DiscoveryEntry{}, fmt.Errorf("marshal external target: %w", err) + } + return sdk.DiscoveryEntry{ + Type: ExternalTargetType, + Ref: Ref(t.Owner, t.Rrtype, t.Target), + Payload: payload, + }, nil +} + +// NewInZoneEntry builds a DiscoveryEntry of type InZoneTargetType. +func NewInZoneEntry(t InZoneTarget) (sdk.DiscoveryEntry, error) { + payload, err := json.Marshal(t) + if err != nil { + return sdk.DiscoveryEntry{}, fmt.Errorf("marshal in-zone target: %w", err) + } + return sdk.DiscoveryEntry{ + Type: InZoneTargetType, + Ref: Ref(t.Owner, t.Rrtype, t.Target), + Payload: payload, + }, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..23b279c --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module git.happydns.org/checker-dangling + +go 1.25.0 + +require ( + git.happydns.org/checker-sdk-go v1.5.0 + github.com/miekg/dns v1.1.72 + golang.org/x/net v0.48.0 +) + +require ( + golang.org/x/mod v0.31.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/tools v0.40.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..2a80023 --- /dev/null +++ b/go.sum @@ -0,0 +1,16 @@ +git.happydns.org/checker-sdk-go v1.5.0 h1:5uD5Cm6xJ+lwnhbJ09iCXGHbYS9zRh+Yh0NeBHkAPBY= +git.happydns.org/checker-sdk-go v1.5.0/go.mod h1:aNAcfYFfbhvH9kJhE0Njp5GX0dQbxdRB0rJ0KvSC5nI= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= +github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= diff --git a/main.go b/main.go new file mode 100644 index 0000000..001e536 --- /dev/null +++ b/main.go @@ -0,0 +1,29 @@ +// Command checker-dangling is the standalone HTTP server entrypoint +// for the dangling/orphan-target checker. +package main + +import ( + "flag" + "log" + + dangling "git.happydns.org/checker-dangling/checker" + "git.happydns.org/checker-sdk-go/checker/server" +) + +// Version is overridden at build time: +// +// go build -ldflags "-X main.Version=1.2.3" . +var Version = "custom-build" + +var listenAddr = flag.String("listen", ":8080", "HTTP listen address") + +func main() { + flag.Parse() + + dangling.SetVersion(Version) + + srv := server.New(dangling.Provider()) + if err := srv.ListenAndServe(*listenAddr); err != nil { + log.Fatalf("server error: %v", err) + } +} diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000..074c6ab --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,15 @@ +// Command plugin is the happyDomain plugin entrypoint for the dangling +// records checker. It is built as a Go plugin and loaded at runtime. +package main + +import ( + dangling "git.happydns.org/checker-dangling/checker" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +var Version = "custom-build" + +func NewCheckerPlugin() (*sdk.CheckerDefinition, sdk.ObservationProvider, error) { + dangling.SetVersion(Version) + return dangling.Definition(), dangling.Provider(), nil +}