commit a6dbcef0f9f6ba5daff08cf6ac7b3c1868e8c7ce Author: Pierre-Olivier Mercier Date: Fri Apr 24 10:33:26 2026 +0700 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c10bb91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +checker-dane +checker-dane.so diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..36b5ba0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM golang:1.25-alpine AS builder + +ARG CHECKER_VERSION=custom-build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -tags standalone -ldflags "-X main.Version=${CHECKER_VERSION}" -o /checker-dane . + +FROM scratch +COPY --from=builder /checker-dane /checker-dane +USER 65534:65534 +EXPOSE 8080 +ENTRYPOINT ["/checker-dane"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07d44d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 The happyDomain Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1d966d8 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CHECKER_NAME := checker-dane +CHECKER_IMAGE := happydomain/$(CHECKER_NAME) +CHECKER_VERSION ?= custom-build + +CHECKER_SOURCES := main.go $(wildcard checker/*.go) + +GO_LDFLAGS := -X main.Version=$(CHECKER_VERSION) + +.PHONY: all plugin docker test clean + +all: $(CHECKER_NAME) + +$(CHECKER_NAME): $(CHECKER_SOURCES) + go build -tags standalone -ldflags "$(GO_LDFLAGS)" -o $@ . + +plugin: $(CHECKER_NAME).so + +$(CHECKER_NAME).so: $(CHECKER_SOURCES) $(wildcard plugin/*.go) + go build -buildmode=plugin -ldflags "$(GO_LDFLAGS)" -o $@ ./plugin/ + +docker: + docker build --build-arg CHECKER_VERSION=$(CHECKER_VERSION) -t $(CHECKER_IMAGE) . + +test: + go test -tags standalone ./... + +clean: + rm -f $(CHECKER_NAME) $(CHECKER_NAME).so diff --git a/README.md b/README.md new file mode 100644 index 0000000..e1294a9 --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +# checker-dane + +DANE / TLSA checker for [happyDomain](https://www.happydomain.org/). + +Bound to the `svcs.TLSAs` service: groups the user's TLSA records by +`(port, proto, base)`, publishes one `tls.endpoint.v1` discovery entry +per endpoint so [`checker-tls`](https://git.happydns.org/checker-tls) +probes them, then matches each TLSA against the observed certificate +chain per RFC 6698. + +## Usage + +### Standalone HTTP server + +```bash +# Build and run +make +./checker-dane -listen :8080 +``` + +The server exposes: + +- `GET /health`, health check +- `POST /collect`, collect DANE observations (happyDomain external checker protocol) + +### Docker + +```bash +make docker +docker run -p 8080:8080 happydomain/checker-dane +``` + +### happyDomain plugin + +```bash +make plugin +# produces checker-dane.so, loadable by happyDomain as a Go plugin +``` + +The plugin exposes a `NewCheckerPlugin` symbol returning the checker +definition and observation provider, which happyDomain registers in its +global registries at load time. + +### Versioning + +The binary, plugin, and Docker image embed a version string overridable +at build time: + +```bash +make CHECKER_VERSION=1.2.3 +make plugin CHECKER_VERSION=1.2.3 +make docker CHECKER_VERSION=1.2.3 +``` + +### happyDomain remote endpoint + +Set the `endpoint` admin option for the DANE checker to the URL of the +running checker-dane server (e.g., `http://checker-dane:8080`). +happyDomain will delegate observation collection to this endpoint. + +## Behavior + +- **Usage 0 (PKIX-TA) / 1 (PKIX-EE)**: TLSA match + publicly trusted PKIX chain required. +- **Usage 2 (DANE-TA) / 3 (DANE-EE)**: TLSA acts as the trust anchor; PKIX validity is informational. +- **Selector** 0 (Cert) / 1 (SPKI) and **MatchingType** 0/1/2 (Full / SHA-256 / SHA-512) + are matched against the chain slot implied by the usage. +- Common STARTTLS ports (25, 110, 143, 389, 587, 5222, 5269) are auto-mapped; + override via the `starttls` option keyed by `"/"`. diff --git a/checker/collect.go b/checker/collect.go new file mode 100644 index 0000000..e444d65 --- /dev/null +++ b/checker/collect.go @@ -0,0 +1,282 @@ +package checker + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + "sort" + "strconv" + "strings" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" + tlscontract "git.happydns.org/checker-tls/contract" +) + +// tlsaOwner matches the "_._." TLSA owner-name pattern. +// The base group is whatever the happyDomain analyzer bucketed the TLSAs +// under; when empty, the TLSAs live directly under the zone apex. +var tlsaOwner = regexp.MustCompile(`^_(\d+)\._(tcp|udp)(?:\.(.*))?$`) + +// tlsaOwnerName builds the canonical "_._." owner name. +// When base is empty (TLSA records sit directly at the zone apex of an +// otherwise-unspecified host), the trailing label is omitted so the result +// is still a syntactically valid relative name rather than "_443._tcp.". +func tlsaOwnerName(port uint16, proto, base string) string { + base = strings.TrimSuffix(base, ".") + if base == "" { + return fmt.Sprintf("_%d._%s", port, proto) + } + return fmt.Sprintf("_%d._%s.%s", port, proto, base) +} + +// starttlsKey is the "/" lookup key used in OptionSTARTTLS. +func starttlsKey(port uint16, proto string) string { + return fmt.Sprintf("%d/%s", port, proto) +} + +// serviceMessage mirrors the on-wire happydns.ServiceMessage shape, kept +// local so this module does not depend on happyDomain core. Same pattern +// as checker-caa/checker/collect.go. +type serviceMessage struct { + Type string `json:"_svctype"` + Domain string `json:"_domain"` + Service json.RawMessage `json:"Service"` +} + +// tlsasPayload mirrors the JSON shape of svcs.TLSAs (services/tlsa.go). +type tlsasPayload struct { + Records []tlsaRecord `json:"tlsa"` +} + +// tlsaRecord decodes one dns.TLSA as serialized by miekg/dns. The Hdr.Name +// is how we learn which endpoint each record applies to; Certificate is +// already a lowercase-hex string as miekg/dns emits it. +type tlsaRecord struct { + Hdr struct { + Name string `json:"Name"` + } `json:"Hdr"` + Usage uint8 `json:"Usage"` + Selector uint8 `json:"Selector"` + MatchingType uint8 `json:"MatchingType"` + Certificate string `json:"Certificate"` +} + +// defaultSTARTTLS maps common ports to the STARTTLS service name checker-tls +// expects. Endpoints not covered default to direct TLS; the user can override +// explicitly via the OptionSTARTTLS map. +var defaultSTARTTLS = map[uint16]string{ + 25: "smtp", + 110: "pop3", + 143: "imap", + 389: "ldap", + 587: "submission", + 5222: "xmpp-client", + 5269: "xmpp-server", +} + +// Collect walks the bound TLSAs service, groups records by (port, proto, +// base), emits one tls.endpoint.v1 discovery entry per group so checker-tls +// probes each of them, and returns DANEData with the user's TLSA records. +// No TLSA matching happens here; that's the rule's job: it reads the TLS +// chain via obs.GetRelated on the next evaluation. +func (p *daneProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + svc, err := serviceFromOptions(opts) + if err != nil { + return nil, err + } + if svc.Type != serviceType { + return nil, fmt.Errorf("service is %q, expected %q", svc.Type, serviceType) + } + + var pl tlsasPayload + if err := json.Unmarshal(svc.Service, &pl); err != nil { + return nil, fmt.Errorf("decode TLSAs service: %w", err) + } + + apex, _ := sdk.GetOption[string](opts, OptionDomain) + apex = strings.TrimSuffix(apex, ".") + subdomain, _ := sdk.GetOption[string](opts, OptionSubdomain) + subdomain = strings.TrimSuffix(subdomain, ".") + + // STARTTLS overrides: map of "port/proto" → service name. + var starttlsOverride map[string]string + if v, ok := opts[OptionSTARTTLS]; ok { + raw, _ := json.Marshal(v) + _ = json.Unmarshal(raw, &starttlsOverride) + } + + // Group records by endpoint key. + type key struct { + Port uint16 + Proto string + Base string // base host, fully-qualified without trailing dot + } + groups := map[key][]TLSARecord{} + var invalid []InvalidRecord + for _, r := range pl.Records { + owner := strings.TrimSuffix(r.Hdr.Name, ".") + m := tlsaOwner.FindStringSubmatch(owner) + if len(m) != 4 { + invalid = append(invalid, InvalidRecord{ + Owner: owner, + Reason: "owner name does not match _._[.]", + }) + continue + } + port64, err := strconv.ParseUint(m[1], 10, 16) + if err != nil || port64 == 0 { + invalid = append(invalid, InvalidRecord{ + Owner: owner, + Reason: fmt.Sprintf("port %q out of range (1-65535)", m[1]), + }) + continue + } + base := m[3] + // Resolve base relative to the apex: TLSA owners in the service + // are typically stored relative to the service's subdomain + // bucket. Fall back to the apex when unspecified. + base = joinName(base, subdomain, apex) + if base == "" { + invalid = append(invalid, InvalidRecord{ + Owner: owner, + Reason: "could not resolve a host name (apex and subdomain both empty)", + }) + continue + } + + k := key{Port: uint16(port64), Proto: m[2], Base: base} + groups[k] = append(groups[k], TLSARecord{ + Usage: r.Usage, + Selector: r.Selector, + MatchingType: r.MatchingType, + Certificate: strings.ToLower(strings.TrimSpace(r.Certificate)), + }) + } + + // Deterministic output ordering keeps diffs quiet across runs. + keys := make([]key, 0, len(groups)) + for k := range groups { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { + if keys[i].Base != keys[j].Base { + return keys[i].Base < keys[j].Base + } + if keys[i].Port != keys[j].Port { + return keys[i].Port < keys[j].Port + } + return keys[i].Proto < keys[j].Proto + }) + + targets := make([]TargetResult, 0, len(keys)) + for _, k := range keys { + starttls := defaultSTARTTLS[k.Port] + if v, ok := starttlsOverride[starttlsKey(k.Port, k.Proto)]; ok { + starttls = v + } + t := TargetResult{ + Owner: tlsaOwnerName(k.Port, k.Proto, k.Base), + Host: k.Base, + Port: k.Port, + Proto: k.Proto, + STARTTLS: starttls, + Records: groups[k], + } + t.Ref = tlscontract.Ref(endpointFromTarget(t)) + targets = append(targets, t) + } + + data := &DANEData{ + Targets: targets, + Invalid: invalid, + CollectedAt: time.Now().UTC(), + } + if v, ok := opts[OptionDNSSECValidated]; ok { + if b, ok := v.(bool); ok { + data.DNSSECValidated = &b + } + } + return data, nil +} + +// endpointFromTarget builds the TLSEndpoint for a collected target. +func endpointFromTarget(t TargetResult) tlscontract.TLSEndpoint { + return tlscontract.TLSEndpoint{ + Host: t.Host, + Port: t.Port, + SNI: t.Host, + STARTTLS: t.STARTTLS, + // RFC 7672 §2.2: when a TLSA record exists for an SMTP service, the + // receiving MTA MUST use STARTTLS. The whole point of DANE on port 25 + // is to defeat STARTTLS-stripping downgrade attacks, so the presence + // of TLSA records here flips the connection from opportunistic to + // mandatory. + RequireSTARTTLS: t.STARTTLS != "", + } +} + +// DiscoverEntries publishes one tls.endpoint.v1 entry per target so +// checker-tls probes them in its next cycle. Implements sdk.DiscoveryPublisher. +func (p *daneProvider) DiscoverEntries(data any) ([]sdk.DiscoveryEntry, error) { + d, ok := data.(*DANEData) + if !ok || d == nil { + return nil, nil + } + out := make([]sdk.DiscoveryEntry, 0, len(d.Targets)) + for _, t := range d.Targets { + entry, err := tlscontract.NewEntry(endpointFromTarget(t)) + if err != nil { + return nil, err + } + out = append(out, entry) + } + return out, nil +} + +// serviceFromOptions extracts and decodes the happyDomain service payload. +func serviceFromOptions(opts sdk.CheckerOptions) (*serviceMessage, error) { + v, ok := opts[OptionService] + if !ok { + return nil, fmt.Errorf("service option missing") + } + raw, err := json.Marshal(v) + if err != nil { + return nil, fmt.Errorf("marshal service option: %w", err) + } + var svc serviceMessage + if err := json.Unmarshal(raw, &svc); err != nil { + return nil, fmt.Errorf("decode service option: %w", err) + } + return &svc, nil +} + +// joinName resolves a possibly-relative TLSA base name against the service's +// subdomain bucket and the zone apex, returning a fully-qualified host name +// without trailing dot. An empty base means "the subdomain/apex itself". +func joinName(base, subdomain, apex string) string { + base = strings.TrimSuffix(base, ".") + // Absolute match to apex: return apex; otherwise treat as relative. + if base == "" { + if subdomain != "" { + return strings.TrimSuffix(subdomain+"."+apex, ".") + } + return apex + } + // If base already ends with apex (fully qualified), keep as-is. + if apex != "" && (base == apex || strings.HasSuffix(base, "."+apex)) { + return base + } + // Otherwise, base is relative to the subdomain bucket (or apex). + if subdomain != "" { + return strings.TrimSuffix(base+"."+subdomain+"."+apex, ".") + } + if apex != "" { + return base + "." + apex + } + return base +} diff --git a/checker/collect_test.go b/checker/collect_test.go new file mode 100644 index 0000000..72c732d --- /dev/null +++ b/checker/collect_test.go @@ -0,0 +1,226 @@ +package checker + +import ( + "context" + "encoding/json" + "testing" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func makeOpts(t *testing.T, apex, subdomain string, records []map[string]any, starttls map[string]string) sdk.CheckerOptions { + t.Helper() + svc := map[string]any{ + "_svctype": serviceType, + "_domain": apex, + "Service": map[string]any{"tlsa": records}, + } + opts := sdk.CheckerOptions{ + OptionDomain: apex, + OptionService: svc, + } + if subdomain != "" { + opts[OptionSubdomain] = subdomain + } + if starttls != nil { + opts[OptionSTARTTLS] = starttls + } + return opts +} + +func tlsaRR(owner string, usage, selector, mtype int, cert string) map[string]any { + return map[string]any{ + "Hdr": map[string]any{"Name": owner}, + "Usage": usage, + "Selector": selector, + "MatchingType": mtype, + "Certificate": cert, + } +} + +func TestCollect_GroupsByEndpoint(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com.", "", []map[string]any{ + tlsaRR("_443._tcp.example.com.", 3, 1, 1, "AABB"), + tlsaRR("_443._tcp.example.com.", 3, 1, 1, "CCDD"), + tlsaRR("_25._tcp.mail.example.com.", 3, 1, 1, "EEFF"), + }, nil) + p := &daneProvider{} + out, err := p.Collect(context.Background(), opts) + if err != nil { + t.Fatalf("err=%v", err) + } + d := out.(*DANEData) + if len(d.Targets) != 2 { + t.Fatalf("targets=%d want 2", len(d.Targets)) + } + // Sorted by base alphabetically: example.com < mail.example.com. + if d.Targets[0].Host != "example.com" || d.Targets[0].Port != 443 { + t.Errorf("sort[0]: %+v", d.Targets[0]) + } + if d.Targets[1].Host != "mail.example.com" || d.Targets[1].Port != 25 { + t.Errorf("sort[1]: %+v", d.Targets[1]) + } + // Two records on the 443 endpoint + if len(d.Targets[0].Records) != 2 { + t.Errorf("443 records=%d want 2", len(d.Targets[0].Records)) + } + // Certificate hex was lowercased + if d.Targets[0].Records[0].Certificate != "aabb" { + t.Errorf("expected lowercased cert, got %q", d.Targets[0].Records[0].Certificate) + } +} + +func TestCollect_DefaultSTARTTLS(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com", "", []map[string]any{ + tlsaRR("_25._tcp.mail.example.com", 3, 1, 1, "00"), + tlsaRR("_443._tcp.example.com", 3, 1, 1, "00"), + tlsaRR("_587._tcp.mail.example.com", 3, 1, 1, "00"), + }, nil) + out, err := (&daneProvider{}).Collect(context.Background(), opts) + if err != nil { + t.Fatal(err) + } + d := out.(*DANEData) + got := map[uint16]string{} + for _, t := range d.Targets { + got[t.Port] = t.STARTTLS + } + if got[25] != "smtp" { + t.Errorf("port 25 starttls=%q want smtp", got[25]) + } + if got[443] != "" { + t.Errorf("port 443 starttls=%q want empty (direct TLS)", got[443]) + } + if got[587] != "submission" { + t.Errorf("port 587 starttls=%q want submission", got[587]) + } +} + +func TestCollect_STARTTLSOverride(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com", "", []map[string]any{ + tlsaRR("_25._tcp.mail.example.com", 3, 1, 1, "00"), + }, map[string]string{"25/tcp": "lmtp"}) + out, err := (&daneProvider{}).Collect(context.Background(), opts) + if err != nil { + t.Fatal(err) + } + d := out.(*DANEData) + if d.Targets[0].STARTTLS != "lmtp" { + t.Errorf("override: starttls=%q want lmtp", d.Targets[0].STARTTLS) + } +} + +func TestCollect_MalformedOwnerSurfaced(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com", "", []map[string]any{ + tlsaRR("totally-invalid", 3, 1, 1, "00"), + tlsaRR("_99999._tcp.example.com", 3, 1, 1, "00"), // port > 65535 + tlsaRR("_443._tcp.example.com", 3, 1, 1, "AA"), + }, nil) + out, err := (&daneProvider{}).Collect(context.Background(), opts) + if err != nil { + t.Fatal(err) + } + d := out.(*DANEData) + if len(d.Targets) != 1 { + t.Errorf("expected one well-formed target, got %d", len(d.Targets)) + } + if len(d.Invalid) != 2 { + t.Errorf("expected 2 invalid entries, got %d (%+v)", len(d.Invalid), d.Invalid) + } +} + +func TestCollect_BaseRelativeToSubdomain(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com", "mail", []map[string]any{ + // Owner has no base, so the records live on the subdomain itself. + tlsaRR("_25._tcp", 3, 1, 1, "AA"), + }, nil) + out, err := (&daneProvider{}).Collect(context.Background(), opts) + if err != nil { + t.Fatal(err) + } + d := out.(*DANEData) + if len(d.Targets) != 1 { + t.Fatalf("targets=%d", len(d.Targets)) + } + if d.Targets[0].Host != "mail.example.com" { + t.Errorf("host=%q want mail.example.com", d.Targets[0].Host) + } + if d.Targets[0].Owner != "_25._tcp.mail.example.com" { + t.Errorf("owner=%q", d.Targets[0].Owner) + } +} + +func TestCollect_WrongServiceType(t *testing.T) { + t.Parallel() + svc := map[string]any{ + "_svctype": "svcs.NotTLSAs", + "Service": map[string]any{"tlsa": []any{}}, + } + opts := sdk.CheckerOptions{OptionDomain: "example.com", OptionService: svc} + if _, err := (&daneProvider{}).Collect(context.Background(), opts); err == nil { + t.Error("expected error on wrong service type") + } +} + +func TestCollect_MissingService(t *testing.T) { + t.Parallel() + opts := sdk.CheckerOptions{OptionDomain: "example.com"} + if _, err := (&daneProvider{}).Collect(context.Background(), opts); err == nil { + t.Error("expected error on missing service") + } +} + +func TestCollect_DiscoverEntries(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com", "", []map[string]any{ + tlsaRR("_443._tcp.example.com", 3, 1, 1, "AA"), + tlsaRR("_25._tcp.mail.example.com", 3, 1, 1, "BB"), + }, nil) + p := &daneProvider{} + data, err := p.Collect(context.Background(), opts) + if err != nil { + t.Fatal(err) + } + entries, err := p.DiscoverEntries(data) + if err != nil { + t.Fatalf("err=%v", err) + } + if len(entries) != 2 { + t.Errorf("entries=%d want 2", len(entries)) + } + + // Nil/wrong type returns nil, nil (defensive). + if got, err := p.DiscoverEntries(nil); err != nil || got != nil { + t.Errorf("nil: got=%v err=%v", got, err) + } + if got, err := p.DiscoverEntries("not a *DANEData"); err != nil || got != nil { + t.Errorf("wrong type: got=%v err=%v", got, err) + } +} + +func TestCollect_DeterministicOutput(t *testing.T) { + t.Parallel() + opts := makeOpts(t, "example.com", "", []map[string]any{ + tlsaRR("_25._tcp.b.example.com", 3, 1, 1, "AA"), + tlsaRR("_25._tcp.a.example.com", 3, 1, 1, "BB"), + tlsaRR("_443._tcp.a.example.com", 3, 1, 1, "CC"), + }, nil) + var prev []byte + for i := range 3 { + out, err := (&daneProvider{}).Collect(context.Background(), opts) + if err != nil { + t.Fatal(err) + } + // Compare only Targets: CollectedAt is a wall-clock timestamp. + b, _ := json.Marshal(out.(*DANEData).Targets) + if i > 0 && string(b) != string(prev) { + t.Errorf("non-deterministic targets:\n%s\nvs\n%s", prev, b) + } + prev = b + } +} diff --git a/checker/definition.go b/checker/definition.go new file mode 100644 index 0000000..9dba3cb --- /dev/null +++ b/checker/definition.go @@ -0,0 +1,68 @@ +package checker + +import ( + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// Version defaults to "built-in"; standalone and plugin builds override it +// via -ldflags "-X .../checker.Version=...". +var Version = "built-in" + +// serviceType is the happyDomain service type this checker binds to. +const serviceType = "svcs.TLSAs" + +// Definition satisfies sdk.CheckerDefinitionProvider. +func (p *daneProvider) Definition() *sdk.CheckerDefinition { + return &sdk.CheckerDefinition{ + ID: "dane", + Name: "DANE / TLSA", + Version: Version, + Availability: sdk.CheckerAvailability{ + ApplyToService: true, + LimitToServices: []string{serviceType}, + }, + ObservationKeys: []sdk.ObservationKey{ObservationKeyDANE}, + HasHTMLReport: true, + Options: sdk.CheckerOptionsDocumentation{ + UserOpts: []sdk.CheckerOptionDocumentation{ + { + Id: OptionProbeTimeoutMs, + Type: "number", + Label: "Probe timeout (ms)", + Description: "Forwarded to checker-tls for each DANE endpoint.", + Default: float64(tls.DefaultProbeTimeoutMs), + }, + }, + RunOpts: []sdk.CheckerOptionDocumentation{ + { + Id: OptionDomain, + Type: "string", + Label: "Domain", + AutoFill: sdk.AutoFillDomainName, + Required: true, + }, + { + Id: OptionSubdomain, + Type: "string", + Label: "Subdomain", + AutoFill: sdk.AutoFillSubdomain, + }, + { + Id: OptionService, + Label: "TLSAs service", + AutoFill: sdk.AutoFillService, + Hide: true, + }, + }, + }, + Rules: Rules(), + Interval: &sdk.CheckIntervalSpec{ + Min: 6 * time.Hour, + Max: 7 * 24 * time.Hour, + Default: 24 * time.Hour, + }, + } +} diff --git a/checker/interactive.go b/checker/interactive.go new file mode 100644 index 0000000..1a15f96 --- /dev/null +++ b/checker/interactive.go @@ -0,0 +1,211 @@ +//go:build standalone + +package checker + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net" + "net/http" + "os" + "strconv" + "strings" + "time" + + "github.com/miekg/dns" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// resolverEnvVar names the environment variable that points at the +// DNSSEC-validating resolver this checker queries. The operator MUST point +// this at a trusted, validating resolver (typically 127.0.0.1:53 backed by +// Unbound, BIND, or Knot Resolver). DANE without DNSSEC validation is a +// downgrade primitive: an on-path attacker can forge TLSA responses. To +// fail loudly rather than silently insecure, lookupTLSA returns an error +// when no validating resolver is configured. +const resolverEnvVar = "DANE_CHECKER_RESOLVER" + +// dnsClientTimeout bounds each TLSA exchange so a black-holing resolver +// cannot tie up server goroutines indefinitely on the public listener. +const dnsClientTimeout = 5 * time.Second + +// tlsaLookup fetches TLSA records for owner via the system resolver and +// reports whether the resolver cryptographically validated the answer +// (AD bit set). It is a package variable so tests can swap it for a +// fixture. The context bounds the underlying DNS exchange so a slow or +// hung resolver cannot outlive the originating HTTP request on the +// public listener. +var tlsaLookup = lookupTLSA + +// RenderForm lets a human run this checker standalone. The form only +// collects the endpoint coordinates; the expected TLSA records are read +// from DNS by ParseForm and the live certificate is fetched in-process by +// the SDK running checker-tls as a sibling (see RelatedProviders). +func (p *daneProvider) RenderForm() []sdk.CheckerOptionField { + return []sdk.CheckerOptionField{ + {Id: OptionDomain, Type: "string", Label: "Domain", Placeholder: "example.com", Required: true}, + {Id: "port", Type: "uint", Label: "Port", Default: float64(443), Required: true}, + {Id: "proto", Type: "string", Label: "Protocol", Choices: []string{"tcp", "udp"}, Default: "tcp"}, + { + Id: "starttls", + Type: "string", + Label: "STARTTLS override", + Description: "Leave empty to auto-derive from port (25→smtp, 587→submission, 143→imap, …).", + }, + { + Id: OptionProbeTimeoutMs, + Type: "uint", + Label: "Probe timeout (ms)", + Default: float64(tls.DefaultProbeTimeoutMs), + Description: "Forwarded to checker-tls for the live probe.", + }, + } +} + +// ParseForm turns the submitted endpoint into the same CheckerOptions +// shape happyDomain would feed Collect. The TLSA RRset expected by +// Collect is resolved live from DNS at _._.; if +// nothing is published there, no validation is possible and the form is +// re-rendered with the error. +func (p *daneProvider) ParseForm(r *http.Request) (sdk.CheckerOptions, error) { + domain := strings.TrimSuffix(strings.TrimSpace(r.FormValue(OptionDomain)), ".") + if domain == "" { + return nil, errors.New("domain is required") + } + portStr := strings.TrimSpace(r.FormValue("port")) + if portStr == "" { + return nil, errors.New("port is required") + } + port64, err := strconv.ParseUint(portStr, 10, 16) + if err != nil || port64 == 0 { + return nil, fmt.Errorf("invalid port %q: must be 1-65535", portStr) + } + port := uint16(port64) + proto := strings.TrimSpace(r.FormValue("proto")) + if proto == "" { + proto = "tcp" + } + if proto != "tcp" && proto != "udp" { + return nil, fmt.Errorf("invalid protocol %q: must be tcp or udp", proto) + } + + owner := tlsaOwnerName(port, proto, domain) + records, validated, err := tlsaLookup(r.Context(), owner) + if err != nil { + return nil, fmt.Errorf("TLSA lookup for %s: %w", owner, err) + } + if len(records) == 0 { + return nil, fmt.Errorf("no TLSA records found at %s", owner) + } + + tlsaEntries := make([]map[string]any, 0, len(records)) + for _, t := range records { + tlsaEntries = append(tlsaEntries, map[string]any{ + "Hdr": map[string]any{"Name": owner}, + "Usage": t.Usage, + "Selector": t.Selector, + "MatchingType": t.MatchingType, + "Certificate": strings.ToLower(t.Certificate), + }) + } + body, err := json.Marshal(map[string]any{"tlsa": tlsaEntries}) + if err != nil { + return nil, fmt.Errorf("marshal TLSAs service: %w", err) + } + + opts := sdk.CheckerOptions{ + OptionDomain: domain, + OptionService: serviceMessage{ + Type: serviceType, + Domain: domain, + Service: body, + }, + } + + if s := strings.TrimSpace(r.FormValue("starttls")); s != "" { + opts[OptionSTARTTLS] = map[string]string{ + starttlsKey(port, proto): s, + } + } + if v := strings.TrimSpace(r.FormValue(OptionProbeTimeoutMs)); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + opts[OptionProbeTimeoutMs] = float64(n) + } + } + opts[OptionDNSSECValidated] = validated + return opts, nil +} + +// RelatedProviders declares checker-tls as the sibling the SDK should run +// in-process during the interactive flow. The SDK harvests the discovery +// entries this checker publishes via DiscoverEntries and auto-fills +// checker-tls's OptionEndpoints (the option tagged +// sdk.AutoFillDiscoveryEntries in its definition), so the probe map the +// rule reads via GetRelated is populated with live data. +func (p *daneProvider) RelatedProviders() []sdk.ObservationProvider { + return []sdk.ObservationProvider{tls.Provider()} +} + +// lookupTLSA queries the configured DNSSEC-validating resolver for TLSA +// records at owner. The second return reports whether the resolver +// cryptographically validated the response (AD bit set). Callers must +// treat unvalidated answers as untrusted: a DANE "match" against +// records that lack DNSSEC protection is meaningless because an on-path +// attacker could have injected them. The records are still returned so +// the absence of validation surfaces as a check rule failure rather +// than a hard error that aborts the whole evaluation. +func lookupTLSA(ctx context.Context, owner string) ([]*dns.TLSA, bool, error) { + resolver, err := interactiveResolver() + if err != nil { + return nil, false, err + } + msg := new(dns.Msg) + msg.SetQuestion(dns.Fqdn(owner), dns.TypeTLSA) + msg.RecursionDesired = true + // AuthenticDataRequired = true asks the resolver to set AD on validated + // answers; SetEdns0 with do=true requests DNSSEC RRs. + msg.AuthenticatedData = true + msg.SetEdns0(4096, true) + + c := &dns.Client{Timeout: dnsClientTimeout} + in, _, err := c.ExchangeContext(ctx, msg, resolver) + if err != nil { + return nil, false, err + } + if in.Rcode != dns.RcodeSuccess && in.Rcode != dns.RcodeNameError { + return nil, false, fmt.Errorf("rcode %s", dns.RcodeToString[in.Rcode]) + } + var out []*dns.TLSA + for _, rr := range in.Answer { + if t, ok := rr.(*dns.TLSA); ok { + out = append(out, t) + } + } + return out, in.AuthenticatedData, nil +} + +// interactiveResolver returns the address of the trusted, DNSSEC-validating +// resolver this checker should use. It refuses to silently fall back to a +// public plaintext resolver: that path is a downgrade vector and would make +// every "validation" trivially spoofable on a hostile network. The operator +// must opt in by setting DANE_CHECKER_RESOLVER (e.g. "127.0.0.1:53") or +// providing an /etc/resolv.conf entry that explicitly points at a local +// validating resolver. +func interactiveResolver() (string, error) { + if v := strings.TrimSpace(os.Getenv(resolverEnvVar)); v != "" { + // Accept either "host" (port defaults to 53) or "host:port". + if _, _, err := net.SplitHostPort(v); err != nil { + v = net.JoinHostPort(v, "53") + } + return v, nil + } + cfg, err := dns.ClientConfigFromFile("/etc/resolv.conf") + if err != nil || len(cfg.Servers) == 0 { + return "", fmt.Errorf("no DNSSEC-validating resolver configured: set %s to a trusted validator (e.g. 127.0.0.1:53)", resolverEnvVar) + } + return net.JoinHostPort(cfg.Servers[0], cfg.Port), nil +} diff --git a/checker/interactive_test.go b/checker/interactive_test.go new file mode 100644 index 0000000..496a58c --- /dev/null +++ b/checker/interactive_test.go @@ -0,0 +1,150 @@ +//go:build standalone + +package checker + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" + + "github.com/miekg/dns" +) + +// stubTLSA returns a synthetic TLSA RR with the given fields, avoiding the +// textual-parse boilerplate of dns.NewRR. +func stubTLSA(owner string, usage, selector, matching uint8, cert string) *dns.TLSA { + return &dns.TLSA{ + Hdr: dns.RR_Header{Name: dns.Fqdn(owner), Rrtype: dns.TypeTLSA, Class: dns.ClassINET, Ttl: 3600}, + Usage: usage, + Selector: selector, + MatchingType: matching, + Certificate: cert, + } +} + +func withStubLookup(t *testing.T, records []*dns.TLSA, err error) { + t.Helper() + withStubLookupValidated(t, records, true, err) +} + +func withStubLookupValidated(t *testing.T, records []*dns.TLSA, validated bool, err error) { + t.Helper() + prev := tlsaLookup + tlsaLookup = func(_ context.Context, _ string) ([]*dns.TLSA, bool, error) { + return records, validated, err + } + t.Cleanup(func() { tlsaLookup = prev }) +} + +func postForm(values url.Values) *http.Request { + req := httptest.NewRequest("POST", "/check", strings.NewReader(values.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.ParseForm() + return req +} + +func TestParseForm_PopulatesServiceFromDNS(t *testing.T) { + withStubLookup(t, []*dns.TLSA{ + stubTLSA("_443._tcp.example.com", 3, 1, 1, "DEADBEEF"), + stubTLSA("_443._tcp.example.com", 2, 0, 1, "cafebabe"), + }, nil) + + p := &daneProvider{} + opts, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"example.com"}, + "port": {"443"}, + "proto": {"tcp"}, + })) + if err != nil { + t.Fatalf("ParseForm: %v", err) + } + svc, ok := opts[OptionService].(serviceMessage) + if !ok { + t.Fatalf("service option has wrong type: %#v", opts[OptionService]) + } + if svc.Type != serviceType { + t.Errorf("service type = %q, want %q", svc.Type, serviceType) + } + if svc.Domain != "example.com" { + t.Errorf("service domain = %q, want example.com", svc.Domain) + } + + var body struct { + TLSA []struct { + Hdr struct { + Name string + } + Usage uint8 + Selector uint8 + MatchingType uint8 + Certificate string + } `json:"tlsa"` + } + if err := json.Unmarshal(svc.Service, &body); err != nil { + t.Fatalf("decode service body: %v", err) + } + if len(body.TLSA) != 2 { + t.Fatalf("got %d TLSA entries, want 2", len(body.TLSA)) + } + if body.TLSA[0].Certificate != "deadbeef" { + t.Errorf("expected lowercased cert, got %q", body.TLSA[0].Certificate) + } + if body.TLSA[0].Hdr.Name != "_443._tcp.example.com" { + t.Errorf("owner = %q, want _443._tcp.example.com", body.TLSA[0].Hdr.Name) + } +} + +func TestParseForm_NoRecordsIsError(t *testing.T) { + withStubLookup(t, nil, nil) + + p := &daneProvider{} + _, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"example.com"}, + "port": {"443"}, + "proto": {"tcp"}, + })) + if err == nil { + t.Fatal("expected error when no TLSA records found, got nil") + } + if !strings.Contains(err.Error(), "no TLSA records") { + t.Errorf("unexpected error %v", err) + } +} + +func TestParseForm_StartTLSOverride(t *testing.T) { + withStubLookup(t, []*dns.TLSA{stubTLSA("_25._tcp.mail.example.com", 3, 1, 1, "aa")}, nil) + + p := &daneProvider{} + opts, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"mail.example.com"}, + "port": {"25"}, + "proto": {"tcp"}, + "starttls": {"smtp"}, + })) + if err != nil { + t.Fatalf("ParseForm: %v", err) + } + override, ok := opts[OptionSTARTTLS].(map[string]string) + if !ok { + t.Fatalf("starttls option type = %T", opts[OptionSTARTTLS]) + } + if override["25/tcp"] != "smtp" { + t.Errorf("override[25/tcp] = %q, want smtp", override["25/tcp"]) + } +} + +func TestParseForm_InvalidPort(t *testing.T) { + p := &daneProvider{} + _, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"example.com"}, + "port": {"0"}, + "proto": {"tcp"}, + })) + if err == nil { + t.Fatal("expected error for port 0") + } +} diff --git a/checker/match_test.go b/checker/match_test.go new file mode 100644 index 0000000..281d203 --- /dev/null +++ b/checker/match_test.go @@ -0,0 +1,417 @@ +package checker + +import ( + "crypto/sha256" + "crypto/sha512" + "encoding/base64" + "encoding/hex" + "strings" + "testing" + + tls "git.happydns.org/checker-tls/checker" +) + +// fakeCert builds a CertInfo whose hashes are precomputed from given +// pseudo-DER and pseudo-SPKI byte slices. Real DER is unnecessary: the +// matching logic only operates on bytes/hex. +func fakeCert(der, spki []byte) tls.CertInfo { + cs256 := sha256.Sum256(der) + cs512 := sha512.Sum512(der) + ss256 := sha256.Sum256(spki) + ss512 := sha512.Sum512(spki) + return tls.CertInfo{ + DERBase64: base64.StdEncoding.EncodeToString(der), + SPKIDERBase64: base64.StdEncoding.EncodeToString(spki), + CertSHA256: hex.EncodeToString(cs256[:]), + CertSHA512: hex.EncodeToString(cs512[:]), + SPKISHA256: hex.EncodeToString(ss256[:]), + SPKISHA512: hex.EncodeToString(ss512[:]), + } +} + +func TestTLSAOwnerRegex(t *testing.T) { + t.Parallel() + cases := []struct { + in string + ok bool + port, proto, bs string + }{ + {"_443._tcp.example.com", true, "443", "tcp", "example.com"}, + {"_25._tcp.mail.example.com", true, "25", "tcp", "mail.example.com"}, + {"_853._udp", true, "853", "udp", ""}, + {"_443._sctp.example.com", false, "", "", ""}, + {"443._tcp.example.com", false, "", "", ""}, + {"_abc._tcp.example.com", false, "", "", ""}, + {"_443.tcp.example.com", false, "", "", ""}, + } + for _, tc := range cases { + m := tlsaOwner.FindStringSubmatch(tc.in) + if (m != nil) != tc.ok { + t.Errorf("%q: match=%v want=%v", tc.in, m != nil, tc.ok) + continue + } + if !tc.ok { + continue + } + if m[1] != tc.port || m[2] != tc.proto || m[3] != tc.bs { + t.Errorf("%q: got (%q,%q,%q) want (%q,%q,%q)", tc.in, m[1], m[2], m[3], tc.port, tc.proto, tc.bs) + } + } +} + +func TestTLSAOwnerName(t *testing.T) { + t.Parallel() + cases := []struct { + port uint16 + proto string + base string + want string + }{ + {443, "tcp", "example.com", "_443._tcp.example.com"}, + {25, "tcp", "mail.example.com", "_25._tcp.mail.example.com"}, + } + for _, tc := range cases { + got := tlsaOwnerName(tc.port, tc.proto, tc.base) + if got != tc.want { + t.Errorf("tlsaOwnerName(%d,%q,%q)=%q want %q", tc.port, tc.proto, tc.base, got, tc.want) + } + } + + // Empty base: trailing label is omitted so the result is still a + // syntactically valid relative name rather than "_443._tcp.". + if got := tlsaOwnerName(443, "tcp", ""); got != "_443._tcp" { + t.Errorf("empty base: got %q want %q", got, "_443._tcp") + } + if got := tlsaOwnerName(443, "tcp", "example.com."); got != "_443._tcp.example.com" { + t.Errorf("trailing dot stripped: got %q", got) + } +} + +func TestStarttlsKey(t *testing.T) { + t.Parallel() + if got := starttlsKey(25, "tcp"); got != "25/tcp" { + t.Errorf("got %q want 25/tcp", got) + } +} + +func TestJoinName(t *testing.T) { + t.Parallel() + cases := []struct { + name string + base, sub, apex string + want string + }{ + {"empty base, no sub", "", "", "example.com", "example.com"}, + {"empty base with sub", "", "mail", "example.com", "mail.example.com"}, + {"absolute base equal apex", "example.com", "", "example.com", "example.com"}, + {"absolute base ending in apex", "mail.example.com", "", "example.com", "mail.example.com"}, + {"absolute base ending in apex with sub", "host.sub.example.com", "sub", "example.com", "host.sub.example.com"}, + {"relative base with sub", "host", "sub", "example.com", "host.sub.example.com"}, + {"relative base no sub", "host", "", "example.com", "host.example.com"}, + {"trailing dot", "host.", "", "example.com", "host.example.com"}, + {"empty everything", "", "", "", ""}, + // Brittle short-apex case (the "com" apex). Pinned to current + // behaviour: HasSuffix(".com") makes "example.com" already + // fully-qualified, so it is returned unchanged. + {"short apex collision", "example.com", "", "com", "example.com"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := joinName(tc.base, tc.sub, tc.apex) + if got != tc.want { + t.Errorf("got %q want %q", got, tc.want) + } + }) + } +} + +func TestRecordCandidate_Selectors(t *testing.T) { + t.Parallel() + der := []byte("der-bytes") + spki := []byte("spki-bytes") + c := fakeCert(der, spki) + + derHex := hex.EncodeToString(der) + spkiHex := hex.EncodeToString(spki) + + cases := []struct { + name string + rec TLSARecord + want string + }{ + {"cert/full", TLSARecord{Selector: SelectorCert, MatchingType: MatchingFull}, derHex}, + {"cert/sha256", TLSARecord{Selector: SelectorCert, MatchingType: MatchingSHA256}, c.CertSHA256}, + {"cert/sha512", TLSARecord{Selector: SelectorCert, MatchingType: MatchingSHA512}, c.CertSHA512}, + {"spki/full", TLSARecord{Selector: SelectorSPKI, MatchingType: MatchingFull}, spkiHex}, + {"spki/sha256", TLSARecord{Selector: SelectorSPKI, MatchingType: MatchingSHA256}, c.SPKISHA256}, + {"spki/sha512", TLSARecord{Selector: SelectorSPKI, MatchingType: MatchingSHA512}, c.SPKISHA512}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := recordCandidate(tc.rec, c) + if err != nil { + t.Fatalf("err=%v", err) + } + if got != tc.want { + t.Errorf("got %q want %q", got, tc.want) + } + }) + } +} + +func TestRecordCandidate_Errors(t *testing.T) { + t.Parallel() + c := fakeCert([]byte("d"), []byte("s")) + if _, err := recordCandidate(TLSARecord{Selector: 9, MatchingType: MatchingSHA256}, c); err == nil { + t.Error("expected error on unknown selector") + } + if _, err := recordCandidate(TLSARecord{Selector: SelectorCert, MatchingType: 9}, c); err == nil { + t.Error("expected error on unknown matching type for cert") + } + if _, err := recordCandidate(TLSARecord{Selector: SelectorSPKI, MatchingType: 9}, c); err == nil { + t.Error("expected error on unknown matching type for spki") + } + bad := tls.CertInfo{DERBase64: "!!!not-base64!!!"} + if _, err := recordCandidate(TLSARecord{Selector: SelectorCert, MatchingType: MatchingFull}, bad); err == nil { + t.Error("expected base64 decode error") + } +} + +func TestDecodeFullDER_SizeLimit(t *testing.T) { + t.Parallel() + huge := strings.Repeat("A", maxFullDERBytes+10) // base64; decoded is ~3/4 of len + if _, err := decodeFullDER(huge, "test"); err == nil { + t.Error("expected size-limit error") + } + small := base64.StdEncoding.EncodeToString([]byte("hello")) + got, err := decodeFullDER(small, "test") + if err != nil { + t.Fatalf("err=%v", err) + } + if string(got) != "hello" { + t.Errorf("got %q want hello", got) + } +} + +func TestMatchRecord_LeafSelectsByUsage(t *testing.T) { + t.Parallel() + leaf := fakeCert([]byte("leaf-der"), []byte("leaf-spki")) + mid := fakeCert([]byte("mid-der"), []byte("mid-spki")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf, mid}} + + // usage 3 (DANE-EE) matches leaf SHA-256 SPKI + rec := TLSARecord{Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: leaf.SPKISHA256} + if ok, why := matchRecord(rec, probe); !ok { + t.Errorf("DANE-EE leaf SPKI sha256: ok=false reason=%q", why) + } + // usage 3 with intermediate hash should NOT match (wrong slot) + rec.Certificate = mid.SPKISHA256 + if ok, _ := matchRecord(rec, probe); ok { + t.Error("DANE-EE matching against intermediate SPKI should fail") + } + + // usage 2 (DANE-TA) matches intermediate + rec = TLSARecord{Usage: UsageDANETA, Selector: SelectorCert, MatchingType: MatchingSHA256, Certificate: mid.CertSHA256} + if ok, why := matchRecord(rec, probe); !ok { + t.Errorf("DANE-TA intermediate cert sha256: ok=false reason=%q", why) + } + + // usage 1 (PKIX-EE) matches leaf cert hash + rec = TLSARecord{Usage: UsagePKIXEE, Selector: SelectorCert, MatchingType: MatchingSHA256, Certificate: leaf.CertSHA256} + if ok, why := matchRecord(rec, probe); !ok { + t.Errorf("PKIX-EE leaf cert sha256: ok=false reason=%q", why) + } + + // usage 0 (PKIX-TA) matches intermediate + rec = TLSARecord{Usage: UsagePKIXTA, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: mid.SPKISHA256} + if ok, why := matchRecord(rec, probe); !ok { + t.Errorf("PKIX-TA intermediate spki sha256: ok=false reason=%q", why) + } +} + +func TestMatchRecord_NoChain(t *testing.T) { + t.Parallel() + if ok, why := matchRecord(TLSARecord{Usage: UsageDANEEE}, &tls.TLSProbe{}); ok || why == "" { + t.Errorf("empty chain: ok=%v reason=%q", ok, why) + } +} + +func TestMatchRecord_TASelfSignedFallback(t *testing.T) { + t.Parallel() + // When the chain has only a leaf, usage 0/2 falls back to matching the + // leaf as a degenerate TA so the user gets feedback. + leaf := fakeCert([]byte("leaf"), []byte("spki")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf}} + rec := TLSARecord{Usage: UsageDANETA, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: leaf.SPKISHA256} + if ok, why := matchRecord(rec, probe); !ok { + t.Errorf("self-signed TA fallback: ok=false reason=%q", why) + } +} + +func TestMatchRecord_UnsupportedUsage(t *testing.T) { + t.Parallel() + leaf := fakeCert([]byte("leaf"), []byte("spki")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf}} + if ok, why := matchRecord(TLSARecord{Usage: 9}, probe); ok || !strings.Contains(why, "unsupported") { + t.Errorf("usage 9: ok=%v reason=%q", ok, why) + } +} + +func TestMatchRecord_FullDER(t *testing.T) { + t.Parallel() + der := []byte("the-actual-cert-der") + leaf := fakeCert(der, []byte("ignored")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf}} + rec := TLSARecord{ + Usage: UsageDANEEE, + Selector: SelectorCert, + MatchingType: MatchingFull, + Certificate: hex.EncodeToString(der), + } + if ok, why := matchRecord(rec, probe); !ok { + t.Errorf("Full DER match failed: %q", why) + } +} + +func TestSummarizeMatches(t *testing.T) { + t.Parallel() + leaf := fakeCert([]byte("leaf"), []byte("ls")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf}} + t1 := TargetResult{Records: []TLSARecord{ + {Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: leaf.SPKISHA256}, // ok + {Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: "deadbeef"}, // miss + {Usage: UsageDANEEE, Selector: SelectorCert, MatchingType: MatchingSHA256, Certificate: leaf.CertSHA256}, // ok + }} + s := summarizeMatches(t1, probe) + if s.matched != 2 || s.unmatched != 1 || s.firstUnmatchedIdx != 1 { + t.Errorf("got matched=%d unmatched=%d firstIdx=%d", s.matched, s.unmatched, s.firstUnmatchedIdx) + } + + if got := summarizeMatches(t1, nil); got.matched != 0 || got.firstUnmatchedIdx != -1 { + t.Errorf("nil probe: %+v", got) + } +} + +func TestSummarizeMatches_BadFirstSlotDoesNotAbort(t *testing.T) { + t.Parallel() + // An undecodable Full record at slot 0 shouldn't prevent later valid + // records from matching: regression test for the per-slot continue. + leaf := fakeCert([]byte("leaf"), []byte("spki")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf}} + bad := TargetResult{Records: []TLSARecord{ + {Usage: UsageDANEEE, Selector: SelectorCert, MatchingType: MatchingFull, Certificate: "00"}, // hex won't match decoded DER + {Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: leaf.SPKISHA256}, + }} + s := summarizeMatches(bad, probe) + if s.matched != 1 { + t.Errorf("expected 1 match (the second record), got %d (unmatched=%d)", s.matched, s.unmatched) + } +} + +func TestHasPKIXUsage(t *testing.T) { + t.Parallel() + if hasPKIXUsage(TargetResult{Records: []TLSARecord{{Usage: UsageDANEEE}}}) { + t.Error("DANE-EE only: expected false") + } + if !hasPKIXUsage(TargetResult{Records: []TLSARecord{{Usage: UsagePKIXEE}}}) { + t.Error("PKIX-EE: expected true") + } + if !hasPKIXUsage(TargetResult{Records: []TLSARecord{{Usage: UsageDANETA}, {Usage: UsagePKIXTA}}}) { + t.Error("contains PKIX-TA: expected true") + } + if hasPKIXUsage(TargetResult{}) { + t.Error("empty: expected false") + } +} + +func TestSuspiciousUsage(t *testing.T) { + t.Parallel() + leaf := fakeCert([]byte("leaf"), []byte("ls")) + mid := fakeCert([]byte("mid"), []byte("ms")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf, mid}} + + // Record declared as EE but hash matches intermediate => suspicious. + tgt := TargetResult{Records: []TLSARecord{{ + Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, + Certificate: mid.SPKISHA256, + }}} + if got := suspiciousUsage(tgt, probe); got == "" { + t.Error("expected suspicious-usage warning") + } + + // Record declared as EE matching the leaf is fine. + tgt.Records[0].Certificate = leaf.SPKISHA256 + if got := suspiciousUsage(tgt, probe); got != "" { + t.Errorf("unexpected warning: %q", got) + } + + // Single-cert chain: rule is silent. + if got := suspiciousUsage(tgt, &tls.TLSProbe{Chain: []tls.CertInfo{leaf}}); got != "" { + t.Errorf("single-cert chain should be silent, got %q", got) + } +} + +func TestProposedTLSA(t *testing.T) { + t.Parallel() + leaf := fakeCert([]byte("leaf"), []byte("spki")) + probe := &tls.TLSProbe{Chain: []tls.CertInfo{leaf}} + + // No record published: defaults to 3 1 1. + t1 := TargetResult{Owner: "_443._tcp.example.com", Records: nil} + got := proposedTLSA(t1, probe) + if !strings.Contains(got, "TLSA 3 1 1 ") || !strings.Contains(got, leaf.SPKISHA256) { + t.Errorf("default proposal: %q", got) + } + + // Existing record uses Full → suggestion downgrades to SHA-256. + t2 := TargetResult{Owner: "_443._tcp.example.com", Records: []TLSARecord{{Usage: UsageDANEEE, Selector: SelectorCert, MatchingType: MatchingFull}}} + got = proposedTLSA(t2, probe) + if !strings.Contains(got, "TLSA 3 0 1 ") { + t.Errorf("Full→SHA256 collapse: %q", got) + } + + // No probe: empty. + if got := proposedTLSA(t1, nil); got != "" { + t.Errorf("no probe: got %q", got) + } +} + +func TestHandshakeFix(t *testing.T) { + t.Parallel() + got := handshakeFix(TargetResult{Host: "mail.example.com", Port: 25, STARTTLS: "smtp"}) + if !strings.Contains(got, "-starttls smtp") || !strings.Contains(got, "-connect mail.example.com:25") { + t.Errorf("smtp fix: %q", got) + } + got = handshakeFix(TargetResult{Host: "example.com", Port: 443}) + if strings.Contains(got, "-starttls") || !strings.Contains(got, "-connect example.com:443") { + t.Errorf("direct fix: %q", got) + } +} + +func TestTruncHex(t *testing.T) { + t.Parallel() + if truncHex("abc") != "abc" { + t.Error("short") + } + long := strings.Repeat("a", 20) + if got := truncHex(long); got != "aaaaaaaaaaaa…" { + t.Errorf("long: %q", got) + } +} + +func TestProbeUsable(t *testing.T) { + t.Parallel() + leaf := fakeCert([]byte("l"), []byte("s")) + if probeUsable(nil) { + t.Error("nil") + } + if probeUsable(&tls.TLSProbe{}) { + t.Error("empty chain") + } + if probeUsable(&tls.TLSProbe{Chain: []tls.CertInfo{leaf}, Error: "boom"}) { + t.Error("error set") + } + if !probeUsable(&tls.TLSProbe{Chain: []tls.CertInfo{leaf}}) { + t.Error("good probe") + } +} diff --git a/checker/provider.go b/checker/provider.go new file mode 100644 index 0000000..9e462f6 --- /dev/null +++ b/checker/provider.go @@ -0,0 +1,14 @@ +package checker + +import sdk "git.happydns.org/checker-sdk-go/checker" + +// Provider returns a new DANE observation provider. +func Provider() sdk.ObservationProvider { + return &daneProvider{} +} + +type daneProvider struct{} + +func (p *daneProvider) Key() sdk.ObservationKey { + return ObservationKeyDANE +} diff --git a/checker/report.go b/checker/report.go new file mode 100644 index 0000000..675388e --- /dev/null +++ b/checker/report.go @@ -0,0 +1,292 @@ +package checker + +import ( + "bytes" + "encoding/json" + "fmt" + "html/template" + "sort" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// GetHTMLReport implements sdk.CheckerHTMLReporter. The report opens with a +// diagnosis-first section that lists the most common DANE failure modes +// actually detected on the user's targets, each with a one-shot remediation +// snippet; a per-target table follows for reference. +func (p *daneProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) { + var data DANEData + if err := json.Unmarshal(ctx.Data(), &data); err != nil { + return "", fmt.Errorf("decode DANE data: %w", err) + } + + probes := indexProbes(ctx.Related(tls.ObservationKeyTLSProbes)) + + rows := make([]reportRow, 0, len(data.Targets)) + for _, t := range data.Targets { + probe := probes[t.Ref] + status, cls := targetStatus(t, probe) + leaf := "—" + if probe != nil && len(probe.Chain) > 0 { + leaf = probe.Chain[0].Subject + } else if probe != nil && probe.Error != "" { + leaf = "handshake error" + } + rows = append(rows, reportRow{ + Owner: t.Owner, + Host: t.Host, + Port: t.Port, + Proto: t.Proto, + STARTTLS: t.STARTTLS, + RecordCount: len(t.Records), + StatusLabel: status, + StatusClass: cls, + Leaf: leaf, + }) + } + + view := reportView{ + CollectedAt: data.CollectedAt.Format("2006-01-02 15:04 MST"), + TargetCount: len(data.Targets), + Diagnoses: diagnose(data, probes), + Rows: rows, + CSS: template.CSS(reportCSS), + } + + var b bytes.Buffer + if err := reportTemplate.Execute(&b, view); err != nil { + return "", fmt.Errorf("render DANE report: %w", err) + } + return b.String(), nil +} + +// reportView is the rendering payload passed to reportTemplate. Pre-computing +// the per-row status label/class and leaf string keeps the template free of +// branching beyond simple range/if. +type reportView struct { + CollectedAt string + TargetCount int + Diagnoses []diagnosis + Rows []reportRow + CSS template.CSS +} + +type reportRow struct { + Owner string + Host string + Port uint16 + Proto string + STARTTLS string + RecordCount int + StatusLabel string + StatusClass string + Leaf string +} + +// diagnosis is a single actionable hint surfaced at the top of the report. +type diagnosis struct { + Severity string // crit | warn | info + Title string + Detail string + Fix string // ready-to-apply snippet (shell or zone fragment) +} + +// diagnose scans every target and produces the minimum set of high-signal +// cards users need to act on. Priority ordering (most-common first): +// +// 1. no_match: TLSA records do not cover the live cert (post-rotation miss). +// 2. handshake_failed: endpoint unreachable or TLS broken, DANE can't be +// validated at all. +// 3. pkix_chain_invalid: usage 0/1 published but public chain is broken. +// 4. usage_3_matches_issuer: DANE-EE selector matches an intermediate +// the record is probably miscategorized (usage 2 was intended). +// 5. no_probe_yet: quiet informational to avoid false alarms on first run. +func diagnose(data DANEData, probes map[string]*tls.TLSProbe) []diagnosis { + var out []diagnosis + + for _, t := range data.Targets { + probe := probes[t.Ref] + switch { + case probe == nil: + out = append(out, diagnosis{ + Severity: SeverityInfo, + Title: fmt.Sprintf("Waiting for first TLS probe on %s:%d", t.Host, t.Port), + Detail: "checker-tls has not yet probed this endpoint. This is normal immediately after publishing a new TLSA record; status will clear on the next cycle.", + }) + case !probeUsable(probe): + out = append(out, diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("Cannot reach %s:%d to validate DANE", t.Host, t.Port), + Detail: "TLS handshake failed, DANE publishes hashes for a certificate nobody can see. Either the service is down, the port is blocked, or STARTTLS negotiation is broken.", + Fix: handshakeFix(t), + }) + default: + if summarizeMatches(t, probe).matched == 0 && len(t.Records) > 0 { + out = append(out, diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("No TLSA record matches the live certificate on %s:%d", t.Host, t.Port), + Detail: "This is the most common DANE outage cause: the certificate was rotated without rolling over the TLSA RRset, and validating resolvers are now rejecting the connection. Publish a TLSA record for the new certificate before removing the old one.", + Fix: proposedTLSA(t, probe), + }) + } + if hasPKIXUsage(t) && (probe.ChainValid == nil || !*probe.ChainValid) { + out = append(out, diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("Usage 0/1 needs a publicly-trusted chain on %s:%d", t.Host, t.Port), + Detail: "TLSA usages 0 (PKIX-TA) and 1 (PKIX-EE) require the certificate chain to validate against system roots. Either re-issue through a publicly-trusted CA or switch to usage 2 / 3, which skip PKIX.", + }) + } + if warn := suspiciousUsage(t, probe); warn != "" { + out = append(out, diagnosis{ + Severity: SeverityWarn, + Title: fmt.Sprintf("Suspicious TLSA usage on %s:%d", t.Host, t.Port), + Detail: warn, + }) + } + } + } + + // Stable: crit first, then warn, then info; preserving encounter order + // within each group keeps the table and the cards aligned. + sort.SliceStable(out, func(i, j int) bool { + return sevRank(out[i].Severity) < sevRank(out[j].Severity) + }) + return out +} + +func sevRank(s string) int { + switch s { + case SeverityCrit: + return 0 + case SeverityWarn: + return 1 + default: + return 2 + } +} + +// hasPKIXUsage reports whether any TLSA record at this target demands PKIX +// validation (usage 0 or 1). +func hasPKIXUsage(t TargetResult) bool { + for _, r := range t.Records { + if r.Usage == UsagePKIXTA || r.Usage == UsagePKIXEE { + return true + } + } + return false +} + +// proposedTLSA renders a ready-to-paste replacement RR computed from the +// live chain. The (usage, selector, matching) triplet is taken from the +// user's first existing record so the suggestion stays consistent with +// their published profile (e.g. a deployment standardised on usage 2 keeps +// usage 2). When no record is published yet, fall back to the DANE-EE + +// SPKI + SHA-256 triplet most Let's Encrypt deployers settle on. +func proposedTLSA(t TargetResult, p *tls.TLSProbe) string { + if p == nil || len(p.Chain) == 0 { + return "" + } + tmpl := TLSARecord{Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256} + if len(t.Records) > 0 { + r := t.Records[0] + tmpl.Usage = r.Usage + tmpl.Selector = r.Selector + tmpl.MatchingType = r.MatchingType + // Suggesting Full (matching type 0) inline as a zone fragment is + // not useful: collapse to SHA-256 of the same selector, which is + // what operators publish in practice. + if tmpl.MatchingType == MatchingFull { + tmpl.MatchingType = MatchingSHA256 + } + } + + slot := p.Chain[0] + if (tmpl.Usage == UsagePKIXTA || tmpl.Usage == UsageDANETA) && len(p.Chain) > 1 { + slot = p.Chain[1] + } + hex, err := recordCandidate(tmpl, slot) + if err != nil || hex == "" { + return "" + } + return fmt.Sprintf("%s IN TLSA %d %d %d %s", t.Owner, tmpl.Usage, tmpl.Selector, tmpl.MatchingType, hex) +} + +// handshakeFix proposes a STARTTLS-aware first step when the probe failed. +func handshakeFix(t TargetResult) string { + if t.STARTTLS != "" { + return fmt.Sprintf("openssl s_client -connect %s:%d -starttls %s -servername %s", t.Host, t.Port, t.STARTTLS, t.Host) + } + return fmt.Sprintf("openssl s_client -connect %s:%d -servername %s", t.Host, t.Port, t.Host) +} + +func targetStatus(t TargetResult, p *tls.TLSProbe) (label, class string) { + if p == nil { + return "Waiting for probe", "unknown" + } + if !probeUsable(p) { + return "Handshake failed", "crit" + } + if len(t.Records) == 0 { + return "No records", "info" + } + matched := summarizeMatches(t, p).matched + if matched == 0 { + return "No match", "crit" + } + return fmt.Sprintf("%d/%d match", matched, len(t.Records)), "ok" +} + +var reportTemplate = template.Must(template.New("dane").Parse(` + + + +DANE report + + +
+

DANE / TLSA

+

Collected {{.CollectedAt}} · {{.TargetCount}} endpoint(s).

+{{with .Diagnoses}}
+

Action required

+{{range .}}
+

{{.Title}}

+

{{.Detail}}

+{{with .Fix}}
{{.}}
{{end}} +
+{{end}}
+{{end}}
+

Endpoints

+ + + +{{range .Rows}} + + + + + +{{end}} +
EndpointStatusRecordsObserved leaf
{{.Owner}}
{{.Proto}} → {{.Host}}:{{.Port}}{{with .STARTTLS}} · STARTTLS {{.}}{{end}}
{{.StatusLabel}}{{.RecordCount}}{{.Leaf}}
+
+
`)) + +const reportCSS = `body{font-family:system-ui,sans-serif;margin:0;background:#fafbfc;color:#1b1f23;} +main{max-width:980px;margin:0 auto;padding:1.5rem;} +h1{margin:0 0 .25rem 0;} +.meta{color:#586069;margin:0 0 1.5rem 0;} +section{margin-bottom:2rem;} +h2{border-bottom:1px solid #e1e4e8;padding-bottom:.25rem;} +.finding{border-left:4px solid;padding:.75rem 1rem;margin:.75rem 0;background:#fff;border-radius:4px;} +.finding h3{margin:0 0 .25rem 0;font-size:1rem;} +.finding.sev-crit{border-color:#d73a49;} +.finding.sev-warn{border-color:#dbab09;} +.finding.sev-info{border-color:#0366d6;} +.fix{background:#1b1f23;color:#fafbfc;padding:.5rem .75rem;border-radius:4px;overflow-x:auto;font-size:.85rem;} +table{width:100%;border-collapse:collapse;background:#fff;} +th,td{padding:.5rem .75rem;border-bottom:1px solid #e1e4e8;text-align:left;vertical-align:top;} +tr.status-crit td:nth-child(2){color:#d73a49;font-weight:600;} +tr.status-ok td:nth-child(2){color:#22863a;font-weight:600;} +tr.status-unknown td:nth-child(2){color:#586069;} +code{font-size:.85rem;} +small{color:#586069;}` diff --git a/checker/rule.go b/checker/rule.go new file mode 100644 index 0000000..c4d63a4 --- /dev/null +++ b/checker/rule.go @@ -0,0 +1,277 @@ +package checker + +import ( + "context" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "strings" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// Rules returns the full list of CheckRules exposed by the DANE checker. +// Each rule covers exactly one concern so the UI can show per-concern +// status rather than a single monolithic rule that multiplexes many codes. +func Rules() []sdk.CheckRule { + return []sdk.CheckRule{ + &hasRecordsRule{}, + &dnssecValidatedRule{}, + &probeAvailableRule{}, + &handshakeOKRule{}, + &recordsMatchChainRule{}, + &pkixChainValidRule{}, + &usageCoherentRule{}, + } +} + +// ruleContext bundles the data rules typically need: the checker's own +// observation plus the map of related TLS probes keyed by endpoint Ref. +type ruleContext struct { + data DANEData + probes map[string]*tls.TLSProbe + // relatedErr is a non-fatal error encountered while loading related + // probes (e.g. the cross-checker lineage was unreachable). Rules + // surface it as an error state so operators can spot misconfiguration. + relatedErr error + // err is a fatal error loading the checker's own observation. + err error +} + +// loadRuleContext fetches the DANE observation and the related TLS probes. +// Rules call this once and then filter on the fields they care about. +func loadRuleContext(ctx context.Context, obs sdk.ObservationGetter) *ruleContext { + rc := &ruleContext{} + if err := obs.Get(ctx, ObservationKeyDANE, &rc.data); err != nil { + rc.err = err + return rc + } + rc.probes, rc.relatedErr = relatedTLSProbes(ctx, obs) + return rc +} + +// observationErrorState is the canonical short-circuit state emitted when a +// rule cannot load the DANE observation at all. +func observationErrorState(err error) sdk.CheckState { + return sdk.CheckState{ + Status: sdk.StatusError, + Message: fmt.Sprintf("Failed to read %s: %v", ObservationKeyDANE, err), + Code: "dane_observation_error", + } +} + +// targetMeta builds the common Meta map for per-endpoint states. +func targetMeta(t TargetResult) map[string]any { + return map[string]any{ + "host": t.Host, + "port": t.Port, + "proto": t.Proto, + "owner": t.Owner, + "starttls": t.STARTTLS, + "records": len(t.Records), + } +} + +// targetSubject is the human-readable subject tag used on per-endpoint states. +func targetSubject(t TargetResult) string { + return fmt.Sprintf("%s:%d (%s)", t.Host, t.Port, t.Proto) +} + +// probeUsable reports whether p carries a successfully-observed certificate +// chain. Rules that need to compare against the chain skip endpoints where +// this is false; the missing/failed cases are surfaced by probeAvailableRule +// and handshakeOKRule respectively, so other rules stay focused. +func probeUsable(p *tls.TLSProbe) bool { + return p != nil && p.Error == "" && len(p.Chain) > 0 +} + +// matchSummary aggregates per-target match outcomes so callers don't redo the +// per-record loop. firstUnmatchedIdx is -1 when every record matched. +type matchSummary struct { + matched, unmatched int + firstUnmatchedIdx int + firstUnmatchedReason string +} + +// summarizeMatches walks t.Records once and reports how many matched p's +// chain, plus the first unmatched index and reason for messaging. +func summarizeMatches(t TargetResult, p *tls.TLSProbe) matchSummary { + s := matchSummary{firstUnmatchedIdx: -1} + if p == nil { + return s + } + for i, rec := range t.Records { + ok, reason := matchRecord(rec, p) + if ok { + s.matched++ + continue + } + s.unmatched++ + if s.firstUnmatchedIdx < 0 { + s.firstUnmatchedIdx = i + s.firstUnmatchedReason = reason + } + } + return s +} + +// matchRecord returns true when rec matches some certificate at the chain +// slot implied by rec.Usage. reason explains the miss on a false return. +// +// Slot selection: +// +// - Usage 1 (PKIX-EE) and 3 (DANE-EE): leaf only. +// - Usage 0 (PKIX-TA) and 2 (DANE-TA): intermediates + the root the +// server presented (if any). We match against every non-leaf cert the +// server sent, because some deployments publish the root and some the +// intermediate; either is a valid TA reference for the connection's +// path. +func matchRecord(rec TLSARecord, p *tls.TLSProbe) (bool, string) { + if len(p.Chain) == 0 { + return false, "no certificates observed on the endpoint" + } + var slots []tls.CertInfo + switch rec.Usage { + case UsagePKIXEE, UsageDANEEE: + slots = p.Chain[:1] + case UsagePKIXTA, UsageDANETA: + if len(p.Chain) > 1 { + slots = p.Chain[1:] + } else { + // Self-signed / bundle with only a leaf: allow matching against + // the leaf as a degenerate TA so the user gets a hash comparison + // rather than a silent "no slot". + slots = p.Chain[:1] + } + default: + return false, fmt.Sprintf("unsupported TLSA usage %d", rec.Usage) + } + + var lastErr string + for _, c := range slots { + got, err := recordCandidate(rec, c) + if err != nil { + lastErr = err.Error() + continue + } + if strings.EqualFold(got, rec.Certificate) { + return true, "" + } + } + if lastErr != "" { + return false, lastErr + } + return false, fmt.Sprintf("expected %s, got none matching in chain", truncHex(rec.Certificate)) +} + +// maxFullDERBytes caps the size of a "Full" (MatchingType 0) DER payload +// that this checker is willing to base64-decode and hex-encode. Real X.509 +// certificates rarely exceed 8 KiB; 64 KiB leaves comfortable headroom for +// pathological-but-legitimate chains while preventing a hostile probe +// payload from forcing arbitrary heap allocations during evaluation. +const maxFullDERBytes = 64 * 1024 + +// decodeFullDER base64-decodes b after rejecting payloads whose decoded size +// would exceed maxFullDERBytes, so an attacker-controlled probe cannot make +// the rule allocate unbounded memory before the hex comparison. +func decodeFullDER(b string, what string) ([]byte, error) { + // base64 decoded length is at most ceil(len(b)/4)*3; bail out cheaply + // before allocating the destination buffer. + if len(b)/4*3 > maxFullDERBytes { + return nil, fmt.Errorf("%s exceeds %d bytes", what, maxFullDERBytes) + } + der, err := base64.StdEncoding.DecodeString(b) + if err != nil { + return nil, fmt.Errorf("decode %s: %w", what, err) + } + if len(der) > maxFullDERBytes { + return nil, fmt.Errorf("%s exceeds %d bytes", what, maxFullDERBytes) + } + return der, nil +} + +// recordCandidate returns the hex value the TLSA record should match for +// the (selector, matching_type) pair against this certificate slot. For +// matching_type 0 (Full), both sides are compared as hex-encoded DER. +func recordCandidate(rec TLSARecord, c tls.CertInfo) (string, error) { + var source string + switch rec.Selector { + case SelectorCert: + switch rec.MatchingType { + case MatchingFull: + der, err := decodeFullDER(c.DERBase64, "cert DER") + if err != nil { + return "", err + } + source = hex.EncodeToString(der) + case MatchingSHA256: + source = c.CertSHA256 + case MatchingSHA512: + source = c.CertSHA512 + default: + return "", fmt.Errorf("unsupported matching type %d", rec.MatchingType) + } + case SelectorSPKI: + switch rec.MatchingType { + case MatchingFull: + spki, err := decodeFullDER(c.SPKIDERBase64, "SPKI DER") + if err != nil { + return "", err + } + source = hex.EncodeToString(spki) + case MatchingSHA256: + source = c.SPKISHA256 + case MatchingSHA512: + source = c.SPKISHA512 + default: + return "", fmt.Errorf("unsupported matching type %d", rec.MatchingType) + } + default: + return "", fmt.Errorf("unsupported selector %d", rec.Selector) + } + return source, nil +} + +// parseTLSProbeMap decodes one related-observation payload into its constituent +// probes, keyed by endpoint Ref. Returns nil on decode error (caller skips). +func parseTLSProbeMap(data []byte) map[string]tls.TLSProbe { + var payload struct { + Probes map[string]tls.TLSProbe `json:"probes"` + } + if err := json.Unmarshal(data, &payload); err != nil { + return nil + } + return payload.Probes +} + +// relatedTLSProbes indexes TLS probes fetched via GetRelated by endpoint Ref. +func relatedTLSProbes(ctx context.Context, obs sdk.ObservationGetter) (map[string]*tls.TLSProbe, error) { + related, err := obs.GetRelated(ctx, tls.ObservationKeyTLSProbes) + if err != nil { + return nil, fmt.Errorf("related TLS probes unavailable: %w", err) + } + return indexProbes(related), nil +} + +// indexProbes flattens a slice of related TLS-probe observations into a probe +// map keyed by endpoint Ref. Shared by the rule path (relatedTLSProbes) and +// the report path (GetHTMLReport), which receive the same RelatedObservation +// type from different SDK entry points. +func indexProbes(related []sdk.RelatedObservation) map[string]*tls.TLSProbe { + out := map[string]*tls.TLSProbe{} + for _, ro := range related { + for k, v := range parseTLSProbeMap(ro.Data) { + out[k] = &v + } + } + return out +} + +func truncHex(s string) string { + if len(s) > 12 { + return s[:12] + "…" + } + return s +} diff --git a/checker/rules_handshake.go b/checker/rules_handshake.go new file mode 100644 index 0000000..2047ea1 --- /dev/null +++ b/checker/rules_handshake.go @@ -0,0 +1,57 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// handshakeOKRule reports whether the TLS handshake succeeded on every +// endpoint that has been probed. A failing handshake means DANE cannot be +// validated regardless of what TLSA records are published. +type handshakeOKRule struct{} + +func (r *handshakeOKRule) Name() string { return "dane.handshake_ok" } +func (r *handshakeOKRule) Description() string { + return "Verifies the TLS handshake succeeds on every DANE endpoint so the presented chain can be compared to TLSA records." +} + +func (r *handshakeOKRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if probe == nil { + continue // covered by probeAvailableRule + } + tested++ + if !probeUsable(probe) { + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "dane_handshake_failed", + Subject: targetSubject(t), + Message: "TLS handshake failed, cannot validate DANE: " + probe.Error, + Meta: targetMeta(t), + }) + } + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_handshake_ok_skipped", + Message: "No probed endpoint to evaluate (waiting for checker-tls).", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_handshake_ok", + Message: "TLS handshake succeeds on every probed endpoint.", + }} + } + return out +} diff --git a/checker/rules_match.go b/checker/rules_match.go new file mode 100644 index 0000000..b8a7f0e --- /dev/null +++ b/checker/rules_match.go @@ -0,0 +1,80 @@ +package checker + +import ( + "context" + "fmt" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// recordsMatchChainRule is the core DANE check: for every endpoint whose +// handshake succeeded, at least one declared TLSA record must match the +// certificate chain presented by the server (RFC 6698 §2.1 OR semantics). +// +// This is the most common DANE outage vector, a certificate rotation +// without a matching TLSA rollover, so it deserves its own rule and its +// own per-endpoint states. +type recordsMatchChainRule struct{} + +func (r *recordsMatchChainRule) Name() string { return "dane.records_match_chain" } +func (r *recordsMatchChainRule) Description() string { + return "Verifies that at least one TLSA record matches the certificate chain presented by each endpoint." +} + +func (r *recordsMatchChainRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if !probeUsable(probe) { + continue // covered by probeAvailableRule / handshakeOKRule + } + if len(t.Records) == 0 { + continue // covered by hasRecordsRule + } + tested++ + subj := targetSubject(t) + meta := targetMeta(t) + + s := summarizeMatches(t, probe) + meta["matched"] = s.matched + meta["unmatched"] = s.unmatched + + if s.matched > 0 { + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "dane_match_ok", + Subject: subj, + Message: fmt.Sprintf("%d/%d TLSA record(s) match the presented certificate chain.", s.matched, s.matched+s.unmatched), + Meta: meta, + }) + continue + } + msg := "No TLSA record matches the presented certificate chain." + if s.firstUnmatchedReason != "" { + msg += " " + s.firstUnmatchedReason + } + meta["first_unmatched_index"] = s.firstUnmatchedIdx + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "dane_no_match", + Subject: subj, + Message: msg, + Meta: meta, + }) + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_records_match_chain_skipped", + Message: "No usable probe/records pair to evaluate.", + }} + } + } + return out +} diff --git a/checker/rules_pkix.go b/checker/rules_pkix.go new file mode 100644 index 0000000..8977e7a --- /dev/null +++ b/checker/rules_pkix.go @@ -0,0 +1,61 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// pkixChainValidRule reports whether endpoints that publish PKIX-dependent +// TLSA usages (0 or 1) also present a certificate chain that validates +// against the system trust store. DANE usages 2/3 are unaffected and +// skipped entirely by this rule. +type pkixChainValidRule struct{} + +func (r *pkixChainValidRule) Name() string { return "dane.pkix_chain_valid" } +func (r *pkixChainValidRule) Description() string { + return "When TLSA usages 0 or 1 are published, verifies the certificate chain also validates against system trust roots." +} + +func (r *pkixChainValidRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if !probeUsable(probe) { + continue + } + if !hasPKIXUsage(t) { + continue + } + tested++ + if probe.ChainValid == nil || !*probe.ChainValid { + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "dane_pkix_chain_invalid", + Subject: targetSubject(t), + Message: "Usage 0/1 requires a publicly-trusted chain, but the certificate chain did not validate against system roots.", + Meta: targetMeta(t), + }) + } + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_pkix_chain_valid_skipped", + Message: "No endpoint publishes PKIX-dependent TLSA usages (0/1).", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_pkix_chain_valid_ok", + Message: "Every endpoint with PKIX-dependent usages presents a publicly-trusted chain.", + }} + } + return out +} diff --git a/checker/rules_probe.go b/checker/rules_probe.go new file mode 100644 index 0000000..4e35333 --- /dev/null +++ b/checker/rules_probe.go @@ -0,0 +1,61 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// probeAvailableRule reports whether the downstream TLS checker has probed +// every endpoint we published. Absent probes are common immediately after a +// new TLSA record is published and should not flap the service red. +type probeAvailableRule struct{} + +func (r *probeAvailableRule) Name() string { return "dane.probe_available" } +func (r *probeAvailableRule) Description() string { + return "Verifies a TLS probe is available for every DANE endpoint so the chain can be compared to TLSA records." +} + +func (r *probeAvailableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + if rc.relatedErr != nil { + return []sdk.CheckState{{ + Status: sdk.StatusError, + Code: "dane_observation_warning", + Message: rc.relatedErr.Error(), + }} + } + if len(rc.data.Targets) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_probe_available_skipped", + Message: "No DANE endpoints to probe.", + }} + } + out := make([]sdk.CheckState, 0, len(rc.data.Targets)) + for _, t := range rc.data.Targets { + subj := targetSubject(t) + meta := targetMeta(t) + if rc.probes[t.Ref] == nil { + out = append(out, sdk.CheckState{ + Status: sdk.StatusUnknown, + Code: "dane_no_probe", + Subject: subj, + Message: "No TLS probe available yet for this endpoint; re-evaluate after the next checker-tls cycle.", + Meta: meta, + }) + continue + } + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "dane_probe_available_ok", + Subject: subj, + Message: "TLS probe available for this endpoint.", + Meta: meta, + }) + } + return out +} diff --git a/checker/rules_records.go b/checker/rules_records.go new file mode 100644 index 0000000..0a4ba39 --- /dev/null +++ b/checker/rules_records.go @@ -0,0 +1,99 @@ +package checker + +import ( + "context" + "fmt" + "strings" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// hasRecordsRule reports whether the TLSAs service declares any TLSA record +// at all. Without records there is nothing for DANE to validate. +type hasRecordsRule struct{} + +func (r *hasRecordsRule) Name() string { return "dane.has_records" } +func (r *hasRecordsRule) Description() string { + return "Verifies that at least one TLSA record is declared on the service." +} + +func (r *hasRecordsRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var states []sdk.CheckState + for _, inv := range rc.data.Invalid { + states = append(states, sdk.CheckState{ + Status: sdk.StatusError, + Code: "dane_invalid_owner", + Subject: inv.Owner, + Message: fmt.Sprintf("TLSA record %q is unusable: %s", inv.Owner, inv.Reason), + Meta: map[string]any{"owner": inv.Owner, "reason": inv.Reason}, + }) + } + if len(rc.data.Targets) == 0 { + if len(states) > 0 { + // Records exist but none are usable; flag the aggregate too so + // the UI doesn't only show per-record errors. + owners := make([]string, 0, len(rc.data.Invalid)) + for _, inv := range rc.data.Invalid { + owners = append(owners, inv.Owner) + } + states = append(states, sdk.CheckState{ + Status: sdk.StatusError, + Code: "dane_no_usable_records", + Message: fmt.Sprintf("No usable TLSA records (all %d declared records are malformed: %s).", len(rc.data.Invalid), strings.Join(owners, ", ")), + }) + return states + } + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_no_records", + Message: "No TLSA records declared on this service.", + }} + } + states = append(states, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "dane_has_records_ok", + Message: "TLSA records are declared for all bound endpoints.", + Meta: map[string]any{"endpoints": len(rc.data.Targets)}, + }) + return states +} + +// dnssecValidatedRule reports whether the TLSA records this checker is +// evaluating were fetched over a DNSSEC-validated path. Without DNSSEC, +// DANE is a downgrade primitive: an on-path attacker can forge TLSA +// answers and any "match" the rest of the rules report is meaningless. +// The rule only emits when the collector recorded a validation status: +// in managed mode the records come from the user's authoritative zone +// config and DNSSEC posture is checked by a different checker. +type dnssecValidatedRule struct{} + +func (r *dnssecValidatedRule) Name() string { return "dane.dnssec_validated" } +func (r *dnssecValidatedRule) Description() string { + return "Verifies the TLSA records were fetched via a DNSSEC-validating resolver (AD bit set)." +} + +func (r *dnssecValidatedRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + if rc.data.DNSSECValidated == nil { + return nil + } + if *rc.data.DNSSECValidated { + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_dnssec_validated", + Message: "TLSA records were fetched over a DNSSEC-validated path (AD bit set).", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusError, + Code: "dane_dnssec_unvalidated", + Message: "TLSA records were fetched without DNSSEC validation (resolver did not set the AD bit). DANE matches are not trustworthy without DNSSEC.", + }} +} diff --git a/checker/rules_test.go b/checker/rules_test.go new file mode 100644 index 0000000..4e05b37 --- /dev/null +++ b/checker/rules_test.go @@ -0,0 +1,276 @@ +package checker + +import ( + "context" + "encoding/json" + "errors" + "testing" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" + tlscontract "git.happydns.org/checker-tls/contract" +) + +// mockObs is a lightweight ObservationGetter for rule unit tests. +type mockObs struct { + dane *DANEData + daneErr error + probes map[string]tls.TLSProbe + relatedErr error +} + +func (m *mockObs) Get(_ context.Context, key sdk.ObservationKey, dest any) error { + if m.daneErr != nil { + return m.daneErr + } + if key != ObservationKeyDANE || m.dane == nil { + return errors.New("not found") + } + b, err := json.Marshal(m.dane) + if err != nil { + return err + } + return json.Unmarshal(b, dest) +} + +func (m *mockObs) GetRelated(_ context.Context, key sdk.ObservationKey) ([]sdk.RelatedObservation, error) { + if m.relatedErr != nil { + return nil, m.relatedErr + } + if key != tls.ObservationKeyTLSProbes || m.probes == nil { + return nil, nil + } + payload := struct { + Probes map[string]tls.TLSProbe `json:"probes"` + }{Probes: m.probes} + b, _ := json.Marshal(payload) + return []sdk.RelatedObservation{{ + CheckerID: "tls", + Key: tls.ObservationKeyTLSProbes, + Data: b, + }}, nil +} + +func makeTarget(host string, port uint16, recs []TLSARecord) TargetResult { + t := TargetResult{ + Owner: tlsaOwnerName(port, "tcp", host), + Host: host, + Port: port, + Proto: "tcp", + Records: recs, + } + t.Ref = tlscontract.Ref(tlscontract.TLSEndpoint{Host: host, Port: port, SNI: host}) + return t +} + +func TestHasRecordsRule(t *testing.T) { + t.Parallel() + r := &hasRecordsRule{} + + // No records, no invalid → unknown + obs := &mockObs{dane: &DANEData{}} + st := r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_no_records" { + t.Errorf("no records: %+v", st) + } + + // Records present → ok + obs = &mockObs{dane: &DANEData{Targets: []TargetResult{makeTarget("a.example.com", 443, []TLSARecord{{}})}}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_has_records_ok" { + t.Errorf("ok: %+v", st) + } + + // Invalid records, no targets → error states + obs = &mockObs{dane: &DANEData{Invalid: []InvalidRecord{{Owner: "_x._tcp", Reason: "bad port"}}}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) < 2 { + t.Fatalf("expected per-record + aggregate, got %+v", st) + } + if st[0].Code != "dane_invalid_owner" || st[len(st)-1].Code != "dane_no_usable_records" { + t.Errorf("invalid only: %+v", st) + } + + // Observation read error + obs = &mockObs{daneErr: errors.New("boom")} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_observation_error" { + t.Errorf("err: %+v", st) + } +} + +func TestProbeAvailableRule(t *testing.T) { + t.Parallel() + r := &probeAvailableRule{} + tgt := makeTarget("a.example.com", 443, []TLSARecord{{Usage: UsageDANEEE}}) + + // Probe present + leaf := fakeCert([]byte("l"), []byte("s")) + obs := &mockObs{ + dane: &DANEData{Targets: []TargetResult{tgt}}, + probes: map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}}}, + } + st := r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_probe_available_ok" { + t.Errorf("ok: %+v", st) + } + + // Probe absent + obs.probes = map[string]tls.TLSProbe{} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_no_probe" { + t.Errorf("missing: %+v", st) + } + + // No targets at all + obs = &mockObs{dane: &DANEData{}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_probe_available_skipped" { + t.Errorf("empty: %+v", st) + } + + // Related-fetch error surfaces as warning state. + obs = &mockObs{dane: &DANEData{Targets: []TargetResult{tgt}}, relatedErr: errors.New("upstream down")} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_observation_warning" { + t.Errorf("relatedErr: %+v", st) + } +} + +func TestHandshakeOKRule(t *testing.T) { + t.Parallel() + r := &handshakeOKRule{} + tgt := makeTarget("a.example.com", 443, []TLSARecord{{Usage: UsageDANEEE}}) + leaf := fakeCert([]byte("l"), []byte("s")) + + // All good. + obs := &mockObs{ + dane: &DANEData{Targets: []TargetResult{tgt}}, + probes: map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}}}, + } + st := r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_handshake_ok" { + t.Errorf("ok: %+v", st) + } + + // Handshake failed. + obs.probes = map[string]tls.TLSProbe{tgt.Ref: {Error: "tls: bad cert"}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_handshake_failed" { + t.Errorf("failed: %+v", st) + } +} + +func TestRecordsMatchChainRule(t *testing.T) { + t.Parallel() + r := &recordsMatchChainRule{} + leaf := fakeCert([]byte("leaf"), []byte("ls")) + + tgt := makeTarget("a.example.com", 443, []TLSARecord{ + {Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, Certificate: leaf.SPKISHA256}, + }) + obs := &mockObs{ + dane: &DANEData{Targets: []TargetResult{tgt}}, + probes: map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}}}, + } + st := r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_match_ok" { + t.Errorf("match ok: %+v", st) + } + + // Same target, wrong cert hash → no match (crit). + tgt.Records[0].Certificate = "deadbeef" + obs.dane = &DANEData{Targets: []TargetResult{tgt}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_no_match" { + t.Errorf("no match: %+v", st) + } + + // No probe usable → skipped. + obs.probes = map[string]tls.TLSProbe{} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_records_match_chain_skipped" { + t.Errorf("skipped: %+v", st) + } +} + +func TestPKIXChainValidRule(t *testing.T) { + t.Parallel() + r := &pkixChainValidRule{} + leaf := fakeCert([]byte("l"), []byte("s")) + bTrue, bFalse := true, false + + // PKIX usage + valid chain → ok. + tgt := makeTarget("a.example.com", 443, []TLSARecord{{Usage: UsagePKIXEE}}) + obs := &mockObs{ + dane: &DANEData{Targets: []TargetResult{tgt}}, + probes: map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}, ChainValid: &bTrue}}, + } + st := r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_pkix_chain_valid_ok" { + t.Errorf("ok: %+v", st) + } + + // PKIX usage + invalid chain → crit. + obs.probes = map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}, ChainValid: &bFalse}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_pkix_chain_invalid" { + t.Errorf("invalid: %+v", st) + } + + // DANE-only usages → skipped (rule does not apply). + tgt.Records = []TLSARecord{{Usage: UsageDANEEE}} + obs.dane = &DANEData{Targets: []TargetResult{tgt}} + obs.probes = map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_pkix_chain_valid_skipped" { + t.Errorf("skipped: %+v", st) + } +} + +func TestUsageCoherentRule(t *testing.T) { + t.Parallel() + r := &usageCoherentRule{} + leaf := fakeCert([]byte("l"), []byte("ls")) + mid := fakeCert([]byte("m"), []byte("ms")) + + // EE record whose hash matches the intermediate → warn. + tgt := makeTarget("a.example.com", 443, []TLSARecord{{ + Usage: UsageDANEEE, Selector: SelectorSPKI, MatchingType: MatchingSHA256, + Certificate: mid.SPKISHA256, + }}) + obs := &mockObs{ + dane: &DANEData{Targets: []TargetResult{tgt}}, + probes: map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf, mid}}}, + } + st := r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_usage_incoherent" { + t.Errorf("incoherent: %+v", st) + } + + // EE matching leaf → ok. + tgt.Records[0].Certificate = leaf.SPKISHA256 + obs.dane = &DANEData{Targets: []TargetResult{tgt}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_usage_coherent_ok" { + t.Errorf("coherent ok: %+v", st) + } + + // Single-cert chain → skipped. + obs.probes = map[string]tls.TLSProbe{tgt.Ref: {Chain: []tls.CertInfo{leaf}}} + st = r.Evaluate(context.Background(), obs, nil) + if len(st) != 1 || st[0].Code != "dane_usage_coherent_skipped" { + t.Errorf("skipped: %+v", st) + } +} + +func TestRules_ObservationError(t *testing.T) { + t.Parallel() + obs := &mockObs{daneErr: errors.New("read failed")} + for _, rule := range Rules() { + st := rule.Evaluate(context.Background(), obs, nil) + if len(st) == 0 || st[0].Code != "dane_observation_error" { + t.Errorf("%s: expected observation_error, got %+v", rule.Name(), st) + } + } +} diff --git a/checker/rules_usage.go b/checker/rules_usage.go new file mode 100644 index 0000000..86eac94 --- /dev/null +++ b/checker/rules_usage.go @@ -0,0 +1,86 @@ +package checker + +import ( + "context" + "strings" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// usageCoherentRule flags TLSA records whose declared usage contradicts the +// chain slot their hash actually matches, typically a record published as +// usage 1 or 3 (end-entity) whose hash in fact matches an intermediate. +// That is almost always a publisher error: the intended usage was 0 or 2. +type usageCoherentRule struct{} + +func (r *usageCoherentRule) Name() string { return "dane.usage_coherent" } +func (r *usageCoherentRule) Description() string { + return "Flags TLSA records whose declared usage does not match the chain slot they actually hash (e.g. usage 3 matching an intermediate)." +} + +func (r *usageCoherentRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if !probeUsable(probe) || len(probe.Chain) < 2 { + continue + } + tested++ + warn := suspiciousUsage(t, probe) + if warn != "" { + out = append(out, sdk.CheckState{ + Status: sdk.StatusWarn, + Code: "dane_usage_incoherent", + Subject: targetSubject(t), + Message: warn, + Meta: targetMeta(t), + }) + } + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_usage_coherent_skipped", + Message: "No multi-cert chain probed yet; cannot assess usage coherence.", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_usage_coherent_ok", + Message: "End-entity TLSA records match end-entity certificates on every probed chain.", + }} + } + return out +} + +// suspiciousUsage returns a human-readable hint when a record hash matches a +// chain slot that contradicts its declared usage (e.g. usage 3 whose hash +// actually matches the intermediate), almost always a publisher error. Used +// by both usageCoherentRule and the HTML report. +func suspiciousUsage(t TargetResult, p *tls.TLSProbe) string { + if p == nil || len(p.Chain) < 2 { + return "" + } + for _, r := range t.Records { + if r.Usage != UsageDANEEE && r.Usage != UsagePKIXEE { + continue + } + for _, c := range p.Chain[1:] { + cand, err := recordCandidate(r, c) + if err != nil { + continue + } + if strings.EqualFold(cand, r.Certificate) { + return "A record declared with usage 1/3 (end-entity) actually matches an intermediate certificate. It should probably use usage 0 or 2 (trust-anchor) instead." + } + } + } + return "" +} diff --git a/checker/types.go b/checker/types.go new file mode 100644 index 0000000..2190b2b --- /dev/null +++ b/checker/types.go @@ -0,0 +1,128 @@ +// Package checker implements the DANE/TLSA checker for happyDomain. +// +// This checker is bound to the svcs.TLSAs service. Collect takes the TLSA +// records the user published (or plans to publish) for the service, derives +// one TLS endpoint per distinct (port, proto, base name), and declares those +// endpoints as tls.endpoint.v1 discovery entries. checker-tls then probes +// them; on the next evaluation, this checker reads the related TLS probes +// via obs.GetRelated and verifies each TLSA record matches the certificate +// chain the probe observed. +// +// The user-visible contract matches what DANE deployers expect: +// +// - Usage 0 (PKIX-TA) / 1 (PKIX-EE): also require the PKIX chain to be +// publicly trusted. +// - Usage 2 (DANE-TA) / 3 (DANE-EE): trust the TLSA as the anchor; PKIX +// validity is informational. +// - Selector 0 (Cert) / 1 (SPKI) and matching types 0/1/2 (Full/SHA-256/ +// SHA-512) are matched against the chain slot implied by the usage. +package checker + +import "time" + +// ObservationKeyDANE is the observation key this checker writes. +const ObservationKeyDANE = "dane_checks" + +// Option ids on CheckerOptions. +const ( + // OptionService is auto-filled by the happyDomain host with the + // svcs.TLSAs service payload this checker is bound to. + OptionService = "service" + + // OptionDomain is auto-filled with the domain apex. TLSA owner names + // in the service are relative to this apex. + OptionDomain = "domain_name" + + // OptionSubdomain is the optional sub-zone under which the TLSAs + // service lives (matches the svcs.TLSAs analyzer's subdomain bucket). + OptionSubdomain = "subdomain" + + // OptionProbeTimeoutMs is how long each underlying TLS probe is allowed. + // Passed through to checker-tls verbatim via the discovery entry options. + OptionProbeTimeoutMs = "probeTimeoutMs" + + // OptionSTARTTLS is an optional per-endpoint STARTTLS hint keyed by + // "/" → RFC 6335 service name (e.g. "25/tcp" → "smtp", + // "587/tcp" → "submission"). Common ports auto-map via a built-in table. + OptionSTARTTLS = "starttls" + + // OptionDNSSECValidated reports whether the TLSA records the host + // submitted to this checker came from a DNSSEC-validated lookup. + // Only set by the standalone interactive flow; absent in managed mode + // where TLSA records come from the user's authoritative zone config. + OptionDNSSECValidated = "dnssec_validated" +) + +// Severity constants mirror checker-tls. +const ( + SeverityCrit = "crit" + SeverityWarn = "warn" + SeverityInfo = "info" +) + +// TLSA field enum constants (RFC 6698 §2.1). +const ( + UsagePKIXTA uint8 = 0 + UsagePKIXEE uint8 = 1 + UsageDANETA uint8 = 2 + UsageDANEEE uint8 = 3 + + SelectorCert uint8 = 0 + SelectorSPKI uint8 = 1 + + MatchingFull uint8 = 0 + MatchingSHA256 uint8 = 1 + MatchingSHA512 uint8 = 2 +) + +// DANEData is the full payload the checker writes under ObservationKeyDANE. +type DANEData struct { + // Targets is one entry per (port, proto, basename) triplet extracted + // from the TLSAs service. + Targets []TargetResult `json:"targets"` + // Invalid lists TLSA records that could not be parsed into a usable + // endpoint (malformed owner name, out-of-range port, etc.). They are + // surfaced by hasRecordsRule so a misconfigured zone fails loudly + // instead of silently passing as "no records". + Invalid []InvalidRecord `json:"invalid,omitempty"` + // DNSSECValidated reflects whether the resolver that fetched the TLSA + // records set the AD bit. Only populated by the standalone interactive + // flow (lookupTLSA); nil in managed mode where records come from the + // user's zone config and DNSSEC posture is checked elsewhere. + DNSSECValidated *bool `json:"dnssec_validated,omitempty"` + CollectedAt time.Time `json:"collected_at"` +} + +// InvalidRecord describes a TLSA record dropped during Collect. +type InvalidRecord struct { + Owner string `json:"owner"` + Reason string `json:"reason"` +} + +// TargetResult groups all TLSA records declared on a single endpoint and +// carries enough context to render an actionable HTML row per endpoint. +type TargetResult struct { + // Owner is the fully qualified DANE owner name (_._.). + Owner string `json:"owner"` + // Host is the connection target (typically the base name the TLSA + // records live under, or its MX/SRV target when relevant). + Host string `json:"host"` + Port uint16 `json:"port"` + Proto string `json:"proto"` + STARTTLS string `json:"starttls,omitempty"` + + // Ref ties this target to the tls.endpoint.v1 discovery entry the + // checker emitted, so the rule can pick the matching RelatedObservation. + Ref string `json:"ref"` + + // Records are the TLSA records declared for this endpoint. + Records []TLSARecord `json:"records"` +} + +// TLSARecord is a user-facing view of a single dns.TLSA record. +type TLSARecord struct { + Usage uint8 `json:"usage"` + Selector uint8 `json:"selector"` + MatchingType uint8 `json:"matching_type"` + Certificate string `json:"certificate"` // lowercase hex +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e910279 --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module git.happydns.org/checker-dane + +go 1.25.0 + +require ( + git.happydns.org/checker-sdk-go v1.4.0 + git.happydns.org/checker-tls v0.6.2 + github.com/miekg/dns v1.1.72 +) + +require ( + golang.org/x/mod v0.31.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/tools v0.40.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..826c1e6 --- /dev/null +++ b/go.sum @@ -0,0 +1,18 @@ +git.happydns.org/checker-sdk-go v1.4.0 h1:sO8EnF3suhNgYLRsbmCZWJOymH/oNMrOUqj3FEzJArs= +git.happydns.org/checker-sdk-go v1.4.0/go.mod h1:aNAcfYFfbhvH9kJhE0Njp5GX0dQbxdRB0rJ0KvSC5nI= +git.happydns.org/checker-tls v0.6.2 h1:8oKia1XlD+tklyqrwzmUgFH1Kw8VLSLLF9suZ7Qr14E= +git.happydns.org/checker-tls v0.6.2/go.mod h1:9tpnxg0iOwS+7If64DRG1jqYonUAgxOBuxwfF5mVkL4= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= +github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= diff --git a/main.go b/main.go new file mode 100644 index 0000000..f0fdce8 --- /dev/null +++ b/main.go @@ -0,0 +1,23 @@ +package main + +import ( + "flag" + "log" + + dane "git.happydns.org/checker-dane/checker" + "git.happydns.org/checker-sdk-go/checker/server" +) + +var Version = "custom-build" + +var listenAddr = flag.String("listen", ":8080", "HTTP listen address") + +func main() { + flag.Parse() + dane.Version = Version + + srv := server.New(dane.Provider()) + if err := srv.ListenAndServe(*listenAddr); err != nil { + log.Fatalf("server error: %v", err) + } +} diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000..71afa4f --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,16 @@ +// Command plugin is the happyDomain plugin entrypoint for the DANE/TLSA +// checker. Built with -buildmode=plugin and loaded at runtime. +package main + +import ( + dane "git.happydns.org/checker-dane/checker" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +var Version = "custom-build" + +func NewCheckerPlugin() (*sdk.CheckerDefinition, sdk.ObservationProvider, error) { + dane.Version = Version + prvd := dane.Provider() + return prvd.(sdk.CheckerDefinitionProvider).Definition(), prvd, nil +}