commit 1b4dacff1f8cfbbd0b0653fed1add075ae731f11 Author: Pierre-Olivier Mercier Date: Fri Apr 24 10:33:26 2026 +0700 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c10bb91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +checker-dane +checker-dane.so diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d407a62 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM golang:1.25-alpine AS builder + +ARG CHECKER_VERSION=custom-build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -tags standalone -ldflags "-X main.Version=${CHECKER_VERSION}" -o /checker-dane . + +FROM scratch +COPY --from=builder /checker-dane /checker-dane +EXPOSE 8080 +ENTRYPOINT ["/checker-dane"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07d44d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 The happyDomain Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1d966d8 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CHECKER_NAME := checker-dane +CHECKER_IMAGE := happydomain/$(CHECKER_NAME) +CHECKER_VERSION ?= custom-build + +CHECKER_SOURCES := main.go $(wildcard checker/*.go) + +GO_LDFLAGS := -X main.Version=$(CHECKER_VERSION) + +.PHONY: all plugin docker test clean + +all: $(CHECKER_NAME) + +$(CHECKER_NAME): $(CHECKER_SOURCES) + go build -tags standalone -ldflags "$(GO_LDFLAGS)" -o $@ . + +plugin: $(CHECKER_NAME).so + +$(CHECKER_NAME).so: $(CHECKER_SOURCES) $(wildcard plugin/*.go) + go build -buildmode=plugin -ldflags "$(GO_LDFLAGS)" -o $@ ./plugin/ + +docker: + docker build --build-arg CHECKER_VERSION=$(CHECKER_VERSION) -t $(CHECKER_IMAGE) . + +test: + go test -tags standalone ./... + +clean: + rm -f $(CHECKER_NAME) $(CHECKER_NAME).so diff --git a/README.md b/README.md new file mode 100644 index 0000000..e1294a9 --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +# checker-dane + +DANE / TLSA checker for [happyDomain](https://www.happydomain.org/). + +Bound to the `svcs.TLSAs` service: groups the user's TLSA records by +`(port, proto, base)`, publishes one `tls.endpoint.v1` discovery entry +per endpoint so [`checker-tls`](https://git.happydns.org/checker-tls) +probes them, then matches each TLSA against the observed certificate +chain per RFC 6698. + +## Usage + +### Standalone HTTP server + +```bash +# Build and run +make +./checker-dane -listen :8080 +``` + +The server exposes: + +- `GET /health`, health check +- `POST /collect`, collect DANE observations (happyDomain external checker protocol) + +### Docker + +```bash +make docker +docker run -p 8080:8080 happydomain/checker-dane +``` + +### happyDomain plugin + +```bash +make plugin +# produces checker-dane.so, loadable by happyDomain as a Go plugin +``` + +The plugin exposes a `NewCheckerPlugin` symbol returning the checker +definition and observation provider, which happyDomain registers in its +global registries at load time. + +### Versioning + +The binary, plugin, and Docker image embed a version string overridable +at build time: + +```bash +make CHECKER_VERSION=1.2.3 +make plugin CHECKER_VERSION=1.2.3 +make docker CHECKER_VERSION=1.2.3 +``` + +### happyDomain remote endpoint + +Set the `endpoint` admin option for the DANE checker to the URL of the +running checker-dane server (e.g., `http://checker-dane:8080`). +happyDomain will delegate observation collection to this endpoint. + +## Behavior + +- **Usage 0 (PKIX-TA) / 1 (PKIX-EE)**: TLSA match + publicly trusted PKIX chain required. +- **Usage 2 (DANE-TA) / 3 (DANE-EE)**: TLSA acts as the trust anchor; PKIX validity is informational. +- **Selector** 0 (Cert) / 1 (SPKI) and **MatchingType** 0/1/2 (Full / SHA-256 / SHA-512) + are matched against the chain slot implied by the usage. +- Common STARTTLS ports (25, 110, 143, 389, 587, 5222, 5269) are auto-mapped; + override via the `starttls` option keyed by `"/"`. diff --git a/checker/collect.go b/checker/collect.go new file mode 100644 index 0000000..31a399e --- /dev/null +++ b/checker/collect.go @@ -0,0 +1,243 @@ +package checker + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + "sort" + "strconv" + "strings" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" + tlscontract "git.happydns.org/checker-tls/contract" +) + +// tlsaOwner matches the "_._." TLSA owner-name pattern. +// The base group is whatever the happyDomain analyzer bucketed the TLSAs +// under; when empty, the TLSAs live directly under the zone apex. +var tlsaOwner = regexp.MustCompile(`^_(\d+)\._(tcp|udp)(?:\.(.*))?$`) + +// tlsaOwnerName builds the canonical "_._." owner name. +func tlsaOwnerName(port uint16, proto, base string) string { + return fmt.Sprintf("_%d._%s.%s", port, proto, base) +} + +// starttlsKey is the "/" lookup key used in OptionSTARTTLS. +func starttlsKey(port uint16, proto string) string { + return fmt.Sprintf("%d/%s", port, proto) +} + +// serviceMessage mirrors the on-wire happydns.ServiceMessage shape, kept +// local so this module does not depend on happyDomain core. Same pattern +// as checker-caa/checker/collect.go. +type serviceMessage struct { + Type string `json:"_svctype"` + Domain string `json:"_domain"` + Service json.RawMessage `json:"Service"` +} + +// tlsasPayload mirrors the JSON shape of svcs.TLSAs (services/tlsa.go). +type tlsasPayload struct { + Records []tlsaRecord `json:"tlsa"` +} + +// tlsaRecord decodes one dns.TLSA as serialized by miekg/dns. The Hdr.Name +// is how we learn which endpoint each record applies to; Certificate is +// already a lowercase-hex string as miekg/dns emits it. +type tlsaRecord struct { + Hdr struct { + Name string `json:"Name"` + } `json:"Hdr"` + Usage uint8 `json:"Usage"` + Selector uint8 `json:"Selector"` + MatchingType uint8 `json:"MatchingType"` + Certificate string `json:"Certificate"` +} + +// defaultSTARTTLS maps common ports to the STARTTLS service name checker-tls +// expects. Endpoints not covered default to direct TLS; the user can override +// explicitly via the OptionSTARTTLS map. +var defaultSTARTTLS = map[uint16]string{ + 25: "smtp", + 110: "pop3", + 143: "imap", + 389: "ldap", + 587: "submission", + 5222: "xmpp-client", + 5269: "xmpp-server", +} + +// Collect walks the bound TLSAs service, groups records by (port, proto, +// base), emits one tls.endpoint.v1 discovery entry per group so checker-tls +// probes each of them, and returns DANEData with the user's TLSA records. +// No TLSA matching happens here; that's the rule's job: it reads the TLS +// chain via obs.GetRelated on the next evaluation. +func (p *daneProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) { + svc, err := serviceFromOptions(opts) + if err != nil { + return nil, err + } + if svc.Type != serviceType { + return nil, fmt.Errorf("service is %q, expected %q", svc.Type, serviceType) + } + + var pl tlsasPayload + if err := json.Unmarshal(svc.Service, &pl); err != nil { + return nil, fmt.Errorf("decode TLSAs service: %w", err) + } + + apex, _ := sdk.GetOption[string](opts, OptionDomain) + apex = strings.TrimSuffix(apex, ".") + subdomain, _ := sdk.GetOption[string](opts, OptionSubdomain) + subdomain = strings.TrimSuffix(subdomain, ".") + + // STARTTLS overrides: map of "port/proto" → service name. + var starttlsOverride map[string]string + if v, ok := opts[OptionSTARTTLS]; ok { + raw, _ := json.Marshal(v) + _ = json.Unmarshal(raw, &starttlsOverride) + } + + // Group records by endpoint key. + type key struct { + Port uint16 + Proto string + Base string // base host, fully-qualified without trailing dot + } + groups := map[key][]TLSARecord{} + for _, r := range pl.Records { + m := tlsaOwner.FindStringSubmatch(strings.TrimSuffix(r.Hdr.Name, ".")) + if len(m) != 4 { + continue + } + port64, err := strconv.ParseUint(m[1], 10, 16) + if err != nil { + continue + } + base := m[3] + // Resolve base relative to the apex: TLSA owners in the service + // are typically stored relative to the service's subdomain + // bucket. Fall back to the apex when unspecified. + base = joinName(base, subdomain, apex) + + k := key{Port: uint16(port64), Proto: m[2], Base: base} + groups[k] = append(groups[k], TLSARecord{ + Usage: r.Usage, + Selector: r.Selector, + MatchingType: r.MatchingType, + Certificate: strings.ToLower(strings.TrimSpace(r.Certificate)), + }) + } + + // Deterministic output ordering keeps diffs quiet across runs. + keys := make([]key, 0, len(groups)) + for k := range groups { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { + if keys[i].Base != keys[j].Base { + return keys[i].Base < keys[j].Base + } + if keys[i].Port != keys[j].Port { + return keys[i].Port < keys[j].Port + } + return keys[i].Proto < keys[j].Proto + }) + + targets := make([]TargetResult, 0, len(keys)) + for _, k := range keys { + starttls := defaultSTARTTLS[k.Port] + if v, ok := starttlsOverride[starttlsKey(k.Port, k.Proto)]; ok { + starttls = v + } + t := TargetResult{ + Owner: tlsaOwnerName(k.Port, k.Proto, k.Base), + Host: k.Base, + Port: k.Port, + Proto: k.Proto, + STARTTLS: starttls, + Records: groups[k], + } + t.Ref = tlscontract.Ref(endpointFromTarget(t)) + targets = append(targets, t) + } + + return &DANEData{ + Targets: targets, + CollectedAt: time.Now().UTC(), + }, nil +} + +// endpointFromTarget builds the TLSEndpoint for a collected target. +func endpointFromTarget(t TargetResult) tlscontract.TLSEndpoint { + return tlscontract.TLSEndpoint{ + Host: t.Host, + Port: t.Port, + SNI: t.Host, + STARTTLS: t.STARTTLS, + RequireSTARTTLS: t.STARTTLS != "" && t.Port != 25, // SMTP on 25 stays opportunistic + } +} + +// DiscoverEntries publishes one tls.endpoint.v1 entry per target so +// checker-tls probes them in its next cycle. Implements sdk.DiscoveryPublisher. +func (p *daneProvider) DiscoverEntries(data any) ([]sdk.DiscoveryEntry, error) { + d, ok := data.(*DANEData) + if !ok || d == nil { + return nil, nil + } + out := make([]sdk.DiscoveryEntry, 0, len(d.Targets)) + for _, t := range d.Targets { + entry, err := tlscontract.NewEntry(endpointFromTarget(t)) + if err != nil { + return nil, err + } + out = append(out, entry) + } + return out, nil +} + +// serviceFromOptions extracts and decodes the happyDomain service payload. +func serviceFromOptions(opts sdk.CheckerOptions) (*serviceMessage, error) { + v, ok := opts[OptionService] + if !ok { + return nil, fmt.Errorf("service option missing") + } + raw, err := json.Marshal(v) + if err != nil { + return nil, fmt.Errorf("marshal service option: %w", err) + } + var svc serviceMessage + if err := json.Unmarshal(raw, &svc); err != nil { + return nil, fmt.Errorf("decode service option: %w", err) + } + return &svc, nil +} + +// joinName resolves a possibly-relative TLSA base name against the service's +// subdomain bucket and the zone apex, returning a fully-qualified host name +// without trailing dot. An empty base means "the subdomain/apex itself". +func joinName(base, subdomain, apex string) string { + base = strings.TrimSuffix(base, ".") + // Absolute match to apex: return apex; otherwise treat as relative. + if base == "" { + if subdomain != "" { + return strings.TrimSuffix(subdomain+"."+apex, ".") + } + return apex + } + // If base already ends with apex (fully qualified), keep as-is. + if apex != "" && (base == apex || strings.HasSuffix(base, "."+apex)) { + return base + } + // Otherwise, base is relative to the subdomain bucket (or apex). + if subdomain != "" { + return strings.TrimSuffix(base+"."+subdomain+"."+apex, ".") + } + if apex != "" { + return base + "." + apex + } + return base +} diff --git a/checker/definition.go b/checker/definition.go new file mode 100644 index 0000000..9dba3cb --- /dev/null +++ b/checker/definition.go @@ -0,0 +1,68 @@ +package checker + +import ( + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// Version defaults to "built-in"; standalone and plugin builds override it +// via -ldflags "-X .../checker.Version=...". +var Version = "built-in" + +// serviceType is the happyDomain service type this checker binds to. +const serviceType = "svcs.TLSAs" + +// Definition satisfies sdk.CheckerDefinitionProvider. +func (p *daneProvider) Definition() *sdk.CheckerDefinition { + return &sdk.CheckerDefinition{ + ID: "dane", + Name: "DANE / TLSA", + Version: Version, + Availability: sdk.CheckerAvailability{ + ApplyToService: true, + LimitToServices: []string{serviceType}, + }, + ObservationKeys: []sdk.ObservationKey{ObservationKeyDANE}, + HasHTMLReport: true, + Options: sdk.CheckerOptionsDocumentation{ + UserOpts: []sdk.CheckerOptionDocumentation{ + { + Id: OptionProbeTimeoutMs, + Type: "number", + Label: "Probe timeout (ms)", + Description: "Forwarded to checker-tls for each DANE endpoint.", + Default: float64(tls.DefaultProbeTimeoutMs), + }, + }, + RunOpts: []sdk.CheckerOptionDocumentation{ + { + Id: OptionDomain, + Type: "string", + Label: "Domain", + AutoFill: sdk.AutoFillDomainName, + Required: true, + }, + { + Id: OptionSubdomain, + Type: "string", + Label: "Subdomain", + AutoFill: sdk.AutoFillSubdomain, + }, + { + Id: OptionService, + Label: "TLSAs service", + AutoFill: sdk.AutoFillService, + Hide: true, + }, + }, + }, + Rules: Rules(), + Interval: &sdk.CheckIntervalSpec{ + Min: 6 * time.Hour, + Max: 7 * 24 * time.Hour, + Default: 24 * time.Hour, + }, + } +} diff --git a/checker/interactive.go b/checker/interactive.go new file mode 100644 index 0000000..47cf8d2 --- /dev/null +++ b/checker/interactive.go @@ -0,0 +1,167 @@ +//go:build standalone + +package checker + +import ( + "encoding/json" + "errors" + "fmt" + "net" + "net/http" + "strconv" + "strings" + + "github.com/miekg/dns" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// tlsaLookup fetches TLSA records for owner via the system resolver. +// It is a package variable so tests can swap it for a fixture. +var tlsaLookup = lookupTLSA + +// RenderForm lets a human run this checker standalone. The form only +// collects the endpoint coordinates; the expected TLSA records are read +// from DNS by ParseForm and the live certificate is fetched in-process by +// the SDK running checker-tls as a sibling (see RelatedProviders). +func (p *daneProvider) RenderForm() []sdk.CheckerOptionField { + return []sdk.CheckerOptionField{ + {Id: OptionDomain, Type: "string", Label: "Domain", Placeholder: "example.com", Required: true}, + {Id: "port", Type: "uint", Label: "Port", Default: float64(443), Required: true}, + {Id: "proto", Type: "string", Label: "Protocol", Choices: []string{"tcp", "udp"}, Default: "tcp"}, + { + Id: "starttls", + Type: "string", + Label: "STARTTLS override", + Description: "Leave empty to auto-derive from port (25→smtp, 587→submission, 143→imap, …).", + }, + { + Id: OptionProbeTimeoutMs, + Type: "uint", + Label: "Probe timeout (ms)", + Default: float64(tls.DefaultProbeTimeoutMs), + Description: "Forwarded to checker-tls for the live probe.", + }, + } +} + +// ParseForm turns the submitted endpoint into the same CheckerOptions +// shape happyDomain would feed Collect. The TLSA RRset expected by +// Collect is resolved live from DNS at _._.; if +// nothing is published there, no validation is possible and the form is +// re-rendered with the error. +func (p *daneProvider) ParseForm(r *http.Request) (sdk.CheckerOptions, error) { + domain := strings.TrimSuffix(strings.TrimSpace(r.FormValue(OptionDomain)), ".") + if domain == "" { + return nil, errors.New("domain is required") + } + portStr := strings.TrimSpace(r.FormValue("port")) + if portStr == "" { + return nil, errors.New("port is required") + } + port64, err := strconv.ParseUint(portStr, 10, 16) + if err != nil || port64 == 0 { + return nil, fmt.Errorf("invalid port %q: must be 1-65535", portStr) + } + port := uint16(port64) + proto := strings.TrimSpace(r.FormValue("proto")) + if proto == "" { + proto = "tcp" + } + if proto != "tcp" && proto != "udp" { + return nil, fmt.Errorf("invalid protocol %q: must be tcp or udp", proto) + } + + owner := tlsaOwnerName(port, proto, domain) + records, err := tlsaLookup(owner) + if err != nil { + return nil, fmt.Errorf("TLSA lookup for %s: %w", owner, err) + } + if len(records) == 0 { + return nil, fmt.Errorf("no TLSA records found at %s", owner) + } + + tlsaEntries := make([]map[string]any, 0, len(records)) + for _, t := range records { + tlsaEntries = append(tlsaEntries, map[string]any{ + "Hdr": map[string]any{"Name": owner}, + "Usage": t.Usage, + "Selector": t.Selector, + "MatchingType": t.MatchingType, + "Certificate": strings.ToLower(t.Certificate), + }) + } + body, err := json.Marshal(map[string]any{"tlsa": tlsaEntries}) + if err != nil { + return nil, fmt.Errorf("marshal TLSAs service: %w", err) + } + + opts := sdk.CheckerOptions{ + OptionDomain: domain, + OptionService: serviceMessage{ + Type: serviceType, + Domain: domain, + Service: body, + }, + } + + if s := strings.TrimSpace(r.FormValue("starttls")); s != "" { + opts[OptionSTARTTLS] = map[string]string{ + starttlsKey(port, proto): s, + } + } + if v := strings.TrimSpace(r.FormValue(OptionProbeTimeoutMs)); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + opts[OptionProbeTimeoutMs] = float64(n) + } + } + return opts, nil +} + +// RelatedProviders declares checker-tls as the sibling the SDK should run +// in-process during the interactive flow. The SDK harvests the discovery +// entries this checker publishes via DiscoverEntries and auto-fills +// checker-tls's OptionEndpoints (the option tagged +// sdk.AutoFillDiscoveryEntries in its definition), so the probe map the +// rule reads via GetRelated is populated with live data. +func (p *daneProvider) RelatedProviders() []sdk.ObservationProvider { + return []sdk.ObservationProvider{tls.Provider()} +} + +// lookupTLSA queries the system resolver for TLSA records at owner. +// Falls back to 1.1.1.1 when /etc/resolv.conf is unreadable. +func lookupTLSA(owner string) ([]*dns.TLSA, error) { + resolver, err := interactiveResolver() + if err != nil { + return nil, err + } + msg := new(dns.Msg) + msg.SetQuestion(dns.Fqdn(owner), dns.TypeTLSA) + msg.RecursionDesired = true + msg.SetEdns0(4096, true) + + c := new(dns.Client) + in, _, err := c.Exchange(msg, resolver) + if err != nil { + return nil, err + } + if in.Rcode != dns.RcodeSuccess && in.Rcode != dns.RcodeNameError { + return nil, fmt.Errorf("rcode %s", dns.RcodeToString[in.Rcode]) + } + var out []*dns.TLSA + for _, rr := range in.Answer { + if t, ok := rr.(*dns.TLSA); ok { + out = append(out, t) + } + } + return out, nil +} + +func interactiveResolver() (string, error) { + cfg, err := dns.ClientConfigFromFile("/etc/resolv.conf") + if err != nil || len(cfg.Servers) == 0 { + return net.JoinHostPort("1.1.1.1", "53"), nil + } + return net.JoinHostPort(cfg.Servers[0], cfg.Port), nil +} diff --git a/checker/interactive_test.go b/checker/interactive_test.go new file mode 100644 index 0000000..4f97732 --- /dev/null +++ b/checker/interactive_test.go @@ -0,0 +1,144 @@ +//go:build standalone + +package checker + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" + + "github.com/miekg/dns" +) + +// stubTLSA returns a synthetic TLSA RR with the given fields, avoiding the +// textual-parse boilerplate of dns.NewRR. +func stubTLSA(owner string, usage, selector, matching uint8, cert string) *dns.TLSA { + return &dns.TLSA{ + Hdr: dns.RR_Header{Name: dns.Fqdn(owner), Rrtype: dns.TypeTLSA, Class: dns.ClassINET, Ttl: 3600}, + Usage: usage, + Selector: selector, + MatchingType: matching, + Certificate: cert, + } +} + +func withStubLookup(t *testing.T, records []*dns.TLSA, err error) { + t.Helper() + prev := tlsaLookup + tlsaLookup = func(owner string) ([]*dns.TLSA, error) { + return records, err + } + t.Cleanup(func() { tlsaLookup = prev }) +} + +func postForm(values url.Values) *http.Request { + req := httptest.NewRequest("POST", "/check", strings.NewReader(values.Encode())) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.ParseForm() + return req +} + +func TestParseForm_PopulatesServiceFromDNS(t *testing.T) { + withStubLookup(t, []*dns.TLSA{ + stubTLSA("_443._tcp.example.com", 3, 1, 1, "DEADBEEF"), + stubTLSA("_443._tcp.example.com", 2, 0, 1, "cafebabe"), + }, nil) + + p := &daneProvider{} + opts, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"example.com"}, + "port": {"443"}, + "proto": {"tcp"}, + })) + if err != nil { + t.Fatalf("ParseForm: %v", err) + } + svc, ok := opts[OptionService].(serviceMessage) + if !ok { + t.Fatalf("service option has wrong type: %#v", opts[OptionService]) + } + if svc.Type != serviceType { + t.Errorf("service type = %q, want %q", svc.Type, serviceType) + } + if svc.Domain != "example.com" { + t.Errorf("service domain = %q, want example.com", svc.Domain) + } + + var body struct { + TLSA []struct { + Hdr struct { + Name string + } + Usage uint8 + Selector uint8 + MatchingType uint8 + Certificate string + } `json:"tlsa"` + } + if err := json.Unmarshal(svc.Service, &body); err != nil { + t.Fatalf("decode service body: %v", err) + } + if len(body.TLSA) != 2 { + t.Fatalf("got %d TLSA entries, want 2", len(body.TLSA)) + } + if body.TLSA[0].Certificate != "deadbeef" { + t.Errorf("expected lowercased cert, got %q", body.TLSA[0].Certificate) + } + if body.TLSA[0].Hdr.Name != "_443._tcp.example.com" { + t.Errorf("owner = %q, want _443._tcp.example.com", body.TLSA[0].Hdr.Name) + } +} + +func TestParseForm_NoRecordsIsError(t *testing.T) { + withStubLookup(t, nil, nil) + + p := &daneProvider{} + _, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"example.com"}, + "port": {"443"}, + "proto": {"tcp"}, + })) + if err == nil { + t.Fatal("expected error when no TLSA records found, got nil") + } + if !strings.Contains(err.Error(), "no TLSA records") { + t.Errorf("unexpected error %v", err) + } +} + +func TestParseForm_StartTLSOverride(t *testing.T) { + withStubLookup(t, []*dns.TLSA{stubTLSA("_25._tcp.mail.example.com", 3, 1, 1, "aa")}, nil) + + p := &daneProvider{} + opts, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"mail.example.com"}, + "port": {"25"}, + "proto": {"tcp"}, + "starttls": {"smtp"}, + })) + if err != nil { + t.Fatalf("ParseForm: %v", err) + } + override, ok := opts[OptionSTARTTLS].(map[string]string) + if !ok { + t.Fatalf("starttls option type = %T", opts[OptionSTARTTLS]) + } + if override["25/tcp"] != "smtp" { + t.Errorf("override[25/tcp] = %q, want smtp", override["25/tcp"]) + } +} + +func TestParseForm_InvalidPort(t *testing.T) { + p := &daneProvider{} + _, err := p.ParseForm(postForm(url.Values{ + "domain_name": {"example.com"}, + "port": {"0"}, + "proto": {"tcp"}, + })) + if err == nil { + t.Fatal("expected error for port 0") + } +} diff --git a/checker/provider.go b/checker/provider.go new file mode 100644 index 0000000..9e462f6 --- /dev/null +++ b/checker/provider.go @@ -0,0 +1,14 @@ +package checker + +import sdk "git.happydns.org/checker-sdk-go/checker" + +// Provider returns a new DANE observation provider. +func Provider() sdk.ObservationProvider { + return &daneProvider{} +} + +type daneProvider struct{} + +func (p *daneProvider) Key() sdk.ObservationKey { + return ObservationKeyDANE +} diff --git a/checker/report.go b/checker/report.go new file mode 100644 index 0000000..27efbd6 --- /dev/null +++ b/checker/report.go @@ -0,0 +1,268 @@ +package checker + +import ( + "bytes" + "encoding/json" + "fmt" + "html/template" + "sort" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// GetHTMLReport implements sdk.CheckerHTMLReporter. The report opens with a +// diagnosis-first section that lists the most common DANE failure modes +// actually detected on the user's targets, each with a one-shot remediation +// snippet; a per-target table follows for reference. +func (p *daneProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) { + var data DANEData + if err := json.Unmarshal(ctx.Data(), &data); err != nil { + return "", fmt.Errorf("decode DANE data: %w", err) + } + + probes := indexProbes(ctx.Related(tls.ObservationKeyTLSProbes)) + + rows := make([]reportRow, 0, len(data.Targets)) + for _, t := range data.Targets { + probe := probes[t.Ref] + status, cls := targetStatus(t, probe) + leaf := "—" + if probe != nil && len(probe.Chain) > 0 { + leaf = probe.Chain[0].Subject + } else if probe != nil && probe.Error != "" { + leaf = "handshake error" + } + rows = append(rows, reportRow{ + Owner: t.Owner, + Host: t.Host, + Port: t.Port, + Proto: t.Proto, + STARTTLS: t.STARTTLS, + RecordCount: len(t.Records), + StatusLabel: status, + StatusClass: cls, + Leaf: leaf, + }) + } + + view := reportView{ + CollectedAt: data.CollectedAt.Format("2006-01-02 15:04 MST"), + TargetCount: len(data.Targets), + Diagnoses: diagnose(data, probes), + Rows: rows, + CSS: template.CSS(reportCSS), + } + + var b bytes.Buffer + if err := reportTemplate.Execute(&b, view); err != nil { + return "", fmt.Errorf("render DANE report: %w", err) + } + return b.String(), nil +} + +// reportView is the rendering payload passed to reportTemplate. Pre-computing +// the per-row status label/class and leaf string keeps the template free of +// branching beyond simple range/if. +type reportView struct { + CollectedAt string + TargetCount int + Diagnoses []diagnosis + Rows []reportRow + CSS template.CSS +} + +type reportRow struct { + Owner string + Host string + Port uint16 + Proto string + STARTTLS string + RecordCount int + StatusLabel string + StatusClass string + Leaf string +} + +// diagnosis is a single actionable hint surfaced at the top of the report. +type diagnosis struct { + Severity string // crit | warn | info + Title string + Detail string + Fix string // ready-to-apply snippet (shell or zone fragment) +} + +// diagnose scans every target and produces the minimum set of high-signal +// cards users need to act on. Priority ordering (most-common first): +// +// 1. no_match: TLSA records do not cover the live cert (post-rotation miss). +// 2. handshake_failed: endpoint unreachable or TLS broken, DANE can't be +// validated at all. +// 3. pkix_chain_invalid: usage 0/1 published but public chain is broken. +// 4. usage_3_matches_issuer: DANE-EE selector matches an intermediate +// the record is probably miscategorized (usage 2 was intended). +// 5. no_probe_yet: quiet informational to avoid false alarms on first run. +func diagnose(data DANEData, probes map[string]*tls.TLSProbe) []diagnosis { + var out []diagnosis + + for _, t := range data.Targets { + probe := probes[t.Ref] + switch { + case probe == nil: + out = append(out, diagnosis{ + Severity: SeverityInfo, + Title: fmt.Sprintf("Waiting for first TLS probe on %s:%d", t.Host, t.Port), + Detail: "checker-tls has not yet probed this endpoint. This is normal immediately after publishing a new TLSA record; status will clear on the next cycle.", + }) + case !probeUsable(probe): + out = append(out, diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("Cannot reach %s:%d to validate DANE", t.Host, t.Port), + Detail: "TLS handshake failed, DANE publishes hashes for a certificate nobody can see. Either the service is down, the port is blocked, or STARTTLS negotiation is broken.", + Fix: handshakeFix(t), + }) + default: + if summarizeMatches(t, probe).matched == 0 && len(t.Records) > 0 { + out = append(out, diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("No TLSA record matches the live certificate on %s:%d", t.Host, t.Port), + Detail: "This is the most common DANE outage cause: the certificate was rotated without rolling over the TLSA RRset, and validating resolvers are now rejecting the connection. Publish a TLSA record for the new certificate before removing the old one.", + Fix: proposedTLSA(t, probe), + }) + } + if hasPKIXUsage(t) && (probe.ChainValid == nil || !*probe.ChainValid) { + out = append(out, diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("Usage 0/1 needs a publicly-trusted chain on %s:%d", t.Host, t.Port), + Detail: "TLSA usages 0 (PKIX-TA) and 1 (PKIX-EE) require the certificate chain to validate against system roots. Either re-issue through a publicly-trusted CA or switch to usage 2 / 3, which skip PKIX.", + }) + } + if warn := suspiciousUsage(t, probe); warn != "" { + out = append(out, diagnosis{ + Severity: SeverityWarn, + Title: fmt.Sprintf("Suspicious TLSA usage on %s:%d", t.Host, t.Port), + Detail: warn, + }) + } + } + } + + // Stable: crit first, then warn, then info; preserving encounter order + // within each group keeps the table and the cards aligned. + sort.SliceStable(out, func(i, j int) bool { + return sevRank(out[i].Severity) < sevRank(out[j].Severity) + }) + return out +} + +func sevRank(s string) int { + switch s { + case SeverityCrit: + return 0 + case SeverityWarn: + return 1 + default: + return 2 + } +} + +// hasPKIXUsage reports whether any TLSA record at this target demands PKIX +// validation (usage 0 or 1). +func hasPKIXUsage(t TargetResult) bool { + for _, r := range t.Records { + if r.Usage == UsagePKIXTA || r.Usage == UsagePKIXEE { + return true + } + } + return false +} + +// proposedTLSA renders a ready-to-paste replacement RR using the most common +// DANE-EE + SPKI + SHA-256 triplet computed from the live leaf. This is the +// profile Let's Encrypt users are pushed towards because it survives any +// cert rotation that keeps the same key pair. +func proposedTLSA(t TargetResult, p *tls.TLSProbe) string { + if p == nil || len(p.Chain) == 0 { + return "" + } + return fmt.Sprintf("%s IN TLSA 3 1 1 %s", t.Owner, p.Chain[0].SPKISHA256) +} + +// handshakeFix proposes a STARTTLS-aware first step when the probe failed. +func handshakeFix(t TargetResult) string { + if t.STARTTLS != "" { + return fmt.Sprintf("openssl s_client -connect %s:%d -starttls %s -servername %s", t.Host, t.Port, t.STARTTLS, t.Host) + } + return fmt.Sprintf("openssl s_client -connect %s:%d -servername %s", t.Host, t.Port, t.Host) +} + +func targetStatus(t TargetResult, p *tls.TLSProbe) (label, class string) { + if p == nil { + return "Waiting for probe", "unknown" + } + if !probeUsable(p) { + return "Handshake failed", "crit" + } + if len(t.Records) == 0 { + return "No records", "info" + } + matched := summarizeMatches(t, p).matched + if matched == 0 { + return "No match", "crit" + } + return fmt.Sprintf("%d/%d match", matched, len(t.Records)), "ok" +} + +var reportTemplate = template.Must(template.New("dane").Parse(` + + + +DANE report + + +
+

DANE / TLSA

+

Collected {{.CollectedAt}} · {{.TargetCount}} endpoint(s).

+{{with .Diagnoses}}
+

Action required

+{{range .}}
+

{{.Title}}

+

{{.Detail}}

+{{with .Fix}}
{{.}}
{{end}} +
+{{end}}
+{{end}}
+

Endpoints

+ + + +{{range .Rows}} + + + + + +{{end}} +
EndpointStatusRecordsObserved leaf
{{.Owner}}
{{.Proto}} → {{.Host}}:{{.Port}}{{with .STARTTLS}} · STARTTLS {{.}}{{end}}
{{.StatusLabel}}{{.RecordCount}}{{.Leaf}}
+
+
`)) + +const reportCSS = `body{font-family:system-ui,sans-serif;margin:0;background:#fafbfc;color:#1b1f23;} +main{max-width:980px;margin:0 auto;padding:1.5rem;} +h1{margin:0 0 .25rem 0;} +.meta{color:#586069;margin:0 0 1.5rem 0;} +section{margin-bottom:2rem;} +h2{border-bottom:1px solid #e1e4e8;padding-bottom:.25rem;} +.finding{border-left:4px solid;padding:.75rem 1rem;margin:.75rem 0;background:#fff;border-radius:4px;} +.finding h3{margin:0 0 .25rem 0;font-size:1rem;} +.finding.sev-crit{border-color:#d73a49;} +.finding.sev-warn{border-color:#dbab09;} +.finding.sev-info{border-color:#0366d6;} +.fix{background:#1b1f23;color:#fafbfc;padding:.5rem .75rem;border-radius:4px;overflow-x:auto;font-size:.85rem;} +table{width:100%;border-collapse:collapse;background:#fff;} +th,td{padding:.5rem .75rem;border-bottom:1px solid #e1e4e8;text-align:left;vertical-align:top;} +tr.status-crit td:nth-child(2){color:#d73a49;font-weight:600;} +tr.status-ok td:nth-child(2){color:#22863a;font-weight:600;} +tr.status-unknown td:nth-child(2){color:#586069;} +code{font-size:.85rem;} +small{color:#586069;}` diff --git a/checker/rule.go b/checker/rule.go new file mode 100644 index 0000000..f2078ba --- /dev/null +++ b/checker/rule.go @@ -0,0 +1,245 @@ +package checker + +import ( + "context" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "strings" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// Rules returns the full list of CheckRules exposed by the DANE checker. +// Each rule covers exactly one concern so the UI can show per-concern +// status rather than a single monolithic rule that multiplexes many codes. +func Rules() []sdk.CheckRule { + return []sdk.CheckRule{ + &hasRecordsRule{}, + &probeAvailableRule{}, + &handshakeOKRule{}, + &recordsMatchChainRule{}, + &pkixChainValidRule{}, + &usageCoherentRule{}, + } +} + +// ruleContext bundles the data rules typically need: the checker's own +// observation plus the map of related TLS probes keyed by endpoint Ref. +type ruleContext struct { + data DANEData + probes map[string]*tls.TLSProbe + // warn is a non-fatal issue encountered while loading related probes + // (e.g. the cross-checker lineage was unreachable). Rules surface it + // as an error state so operators can spot misconfiguration. + warn string + // err is a fatal error loading the checker's own observation. + err error +} + +// loadRuleContext fetches the DANE observation and the related TLS probes. +// Rules call this once and then filter on the fields they care about. +func loadRuleContext(ctx context.Context, obs sdk.ObservationGetter) *ruleContext { + rc := &ruleContext{} + if err := obs.Get(ctx, ObservationKeyDANE, &rc.data); err != nil { + rc.err = err + return rc + } + rc.probes, rc.warn = relatedTLSProbes(ctx, obs) + return rc +} + +// observationErrorState is the canonical short-circuit state emitted when a +// rule cannot load the DANE observation at all. +func observationErrorState(err error) sdk.CheckState { + return sdk.CheckState{ + Status: sdk.StatusError, + Message: fmt.Sprintf("Failed to read %s: %v", ObservationKeyDANE, err), + Code: "dane_observation_error", + } +} + +// targetMeta builds the common Meta map for per-endpoint states. +func targetMeta(t TargetResult) map[string]any { + return map[string]any{ + "host": t.Host, + "port": t.Port, + "proto": t.Proto, + "owner": t.Owner, + "starttls": t.STARTTLS, + "records": len(t.Records), + } +} + +// targetSubject is the human-readable subject tag used on per-endpoint states. +func targetSubject(t TargetResult) string { + return fmt.Sprintf("%s:%d (%s)", t.Host, t.Port, t.Proto) +} + +// probeUsable reports whether p carries a successfully-observed certificate +// chain. Rules that need to compare against the chain skip endpoints where +// this is false; the missing/failed cases are surfaced by probeAvailableRule +// and handshakeOKRule respectively, so other rules stay focused. +func probeUsable(p *tls.TLSProbe) bool { + return p != nil && p.Error == "" && len(p.Chain) > 0 +} + +// matchSummary aggregates per-target match outcomes so callers don't redo the +// per-record loop. firstUnmatchedIdx is -1 when every record matched. +type matchSummary struct { + matched, unmatched int + firstUnmatchedIdx int + firstUnmatchedReason string +} + +// summarizeMatches walks t.Records once and reports how many matched p's +// chain, plus the first unmatched index and reason for messaging. +func summarizeMatches(t TargetResult, p *tls.TLSProbe) matchSummary { + s := matchSummary{firstUnmatchedIdx: -1} + if p == nil { + return s + } + for i, rec := range t.Records { + ok, reason := matchRecord(rec, p) + if ok { + s.matched++ + continue + } + s.unmatched++ + if s.firstUnmatchedIdx < 0 { + s.firstUnmatchedIdx = i + s.firstUnmatchedReason = reason + } + } + return s +} + +// matchRecord returns true when rec matches some certificate at the chain +// slot implied by rec.Usage. reason explains the miss on a false return. +// +// Slot selection: +// +// - Usage 1 (PKIX-EE) and 3 (DANE-EE): leaf only. +// - Usage 0 (PKIX-TA) and 2 (DANE-TA): intermediates + the root the +// server presented (if any). We match against every non-leaf cert the +// server sent, because some deployments publish the root and some the +// intermediate; either is a valid TA reference for the connection's +// path. +func matchRecord(rec TLSARecord, p *tls.TLSProbe) (bool, string) { + if len(p.Chain) == 0 { + return false, "no certificates observed on the endpoint" + } + var slots []tls.CertInfo + switch rec.Usage { + case UsagePKIXEE, UsageDANEEE: + slots = p.Chain[:1] + case UsagePKIXTA, UsageDANETA: + if len(p.Chain) > 1 { + slots = p.Chain[1:] + } else { + // Self-signed / bundle with only a leaf: allow matching against + // the leaf as a degenerate TA so the user gets a hash comparison + // rather than a silent "no slot". + slots = p.Chain[:1] + } + default: + return false, fmt.Sprintf("unsupported TLSA usage %d", rec.Usage) + } + + for _, c := range slots { + got, err := recordCandidate(rec, c) + if err != nil { + return false, err.Error() + } + if strings.EqualFold(got, rec.Certificate) { + return true, "" + } + } + return false, fmt.Sprintf("expected %s, got none matching in chain", truncHex(rec.Certificate)) +} + +// recordCandidate returns the hex value the TLSA record should match for +// the (selector, matching_type) pair against this certificate slot. For +// matching_type 0 (Full), both sides are compared as hex-encoded DER. +func recordCandidate(rec TLSARecord, c tls.CertInfo) (string, error) { + var source string + switch rec.Selector { + case SelectorCert: + switch rec.MatchingType { + case MatchingFull: + der, err := base64.StdEncoding.DecodeString(c.DERBase64) + if err != nil { + return "", fmt.Errorf("decode cert DER: %w", err) + } + source = hex.EncodeToString(der) + case MatchingSHA256: + source = c.CertSHA256 + case MatchingSHA512: + source = c.CertSHA512 + default: + return "", fmt.Errorf("unsupported matching type %d", rec.MatchingType) + } + case SelectorSPKI: + switch rec.MatchingType { + case MatchingFull: + spki, err := base64.StdEncoding.DecodeString(c.SPKIDERBase64) + if err != nil { + return "", fmt.Errorf("decode SPKI DER: %w", err) + } + source = hex.EncodeToString(spki) + case MatchingSHA256: + source = c.SPKISHA256 + case MatchingSHA512: + source = c.SPKISHA512 + default: + return "", fmt.Errorf("unsupported matching type %d", rec.MatchingType) + } + default: + return "", fmt.Errorf("unsupported selector %d", rec.Selector) + } + return source, nil +} + +// parseTLSProbeMap decodes one related-observation payload into its constituent +// probes, keyed by endpoint Ref. Returns nil on decode error (caller skips). +func parseTLSProbeMap(data []byte) map[string]tls.TLSProbe { + var payload struct { + Probes map[string]tls.TLSProbe `json:"probes"` + } + if err := json.Unmarshal(data, &payload); err != nil { + return nil + } + return payload.Probes +} + +// relatedTLSProbes indexes TLS probes fetched via GetRelated by endpoint Ref. +func relatedTLSProbes(ctx context.Context, obs sdk.ObservationGetter) (map[string]*tls.TLSProbe, string) { + related, err := obs.GetRelated(ctx, tls.ObservationKeyTLSProbes) + if err != nil { + return nil, "related TLS probes unavailable: " + err.Error() + } + return indexProbes(related), "" +} + +// indexProbes flattens a slice of related TLS-probe observations into a probe +// map keyed by endpoint Ref. Shared by the rule path (relatedTLSProbes) and +// the report path (GetHTMLReport), which receive the same RelatedObservation +// type from different SDK entry points. +func indexProbes(related []sdk.RelatedObservation) map[string]*tls.TLSProbe { + out := map[string]*tls.TLSProbe{} + for _, ro := range related { + for k, v := range parseTLSProbeMap(ro.Data) { + out[k] = &v + } + } + return out +} + +func truncHex(s string) string { + if len(s) > 12 { + return s[:12] + "…" + } + return s +} diff --git a/checker/rules_handshake.go b/checker/rules_handshake.go new file mode 100644 index 0000000..2047ea1 --- /dev/null +++ b/checker/rules_handshake.go @@ -0,0 +1,57 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// handshakeOKRule reports whether the TLS handshake succeeded on every +// endpoint that has been probed. A failing handshake means DANE cannot be +// validated regardless of what TLSA records are published. +type handshakeOKRule struct{} + +func (r *handshakeOKRule) Name() string { return "dane.handshake_ok" } +func (r *handshakeOKRule) Description() string { + return "Verifies the TLS handshake succeeds on every DANE endpoint so the presented chain can be compared to TLSA records." +} + +func (r *handshakeOKRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if probe == nil { + continue // covered by probeAvailableRule + } + tested++ + if !probeUsable(probe) { + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "dane_handshake_failed", + Subject: targetSubject(t), + Message: "TLS handshake failed, cannot validate DANE: " + probe.Error, + Meta: targetMeta(t), + }) + } + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_handshake_ok_skipped", + Message: "No probed endpoint to evaluate (waiting for checker-tls).", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_handshake_ok", + Message: "TLS handshake succeeds on every probed endpoint.", + }} + } + return out +} diff --git a/checker/rules_match.go b/checker/rules_match.go new file mode 100644 index 0000000..b8a7f0e --- /dev/null +++ b/checker/rules_match.go @@ -0,0 +1,80 @@ +package checker + +import ( + "context" + "fmt" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// recordsMatchChainRule is the core DANE check: for every endpoint whose +// handshake succeeded, at least one declared TLSA record must match the +// certificate chain presented by the server (RFC 6698 §2.1 OR semantics). +// +// This is the most common DANE outage vector, a certificate rotation +// without a matching TLSA rollover, so it deserves its own rule and its +// own per-endpoint states. +type recordsMatchChainRule struct{} + +func (r *recordsMatchChainRule) Name() string { return "dane.records_match_chain" } +func (r *recordsMatchChainRule) Description() string { + return "Verifies that at least one TLSA record matches the certificate chain presented by each endpoint." +} + +func (r *recordsMatchChainRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if !probeUsable(probe) { + continue // covered by probeAvailableRule / handshakeOKRule + } + if len(t.Records) == 0 { + continue // covered by hasRecordsRule + } + tested++ + subj := targetSubject(t) + meta := targetMeta(t) + + s := summarizeMatches(t, probe) + meta["matched"] = s.matched + meta["unmatched"] = s.unmatched + + if s.matched > 0 { + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "dane_match_ok", + Subject: subj, + Message: fmt.Sprintf("%d/%d TLSA record(s) match the presented certificate chain.", s.matched, s.matched+s.unmatched), + Meta: meta, + }) + continue + } + msg := "No TLSA record matches the presented certificate chain." + if s.firstUnmatchedReason != "" { + msg += " " + s.firstUnmatchedReason + } + meta["first_unmatched_index"] = s.firstUnmatchedIdx + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "dane_no_match", + Subject: subj, + Message: msg, + Meta: meta, + }) + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_records_match_chain_skipped", + Message: "No usable probe/records pair to evaluate.", + }} + } + } + return out +} diff --git a/checker/rules_pkix.go b/checker/rules_pkix.go new file mode 100644 index 0000000..8977e7a --- /dev/null +++ b/checker/rules_pkix.go @@ -0,0 +1,61 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// pkixChainValidRule reports whether endpoints that publish PKIX-dependent +// TLSA usages (0 or 1) also present a certificate chain that validates +// against the system trust store. DANE usages 2/3 are unaffected and +// skipped entirely by this rule. +type pkixChainValidRule struct{} + +func (r *pkixChainValidRule) Name() string { return "dane.pkix_chain_valid" } +func (r *pkixChainValidRule) Description() string { + return "When TLSA usages 0 or 1 are published, verifies the certificate chain also validates against system trust roots." +} + +func (r *pkixChainValidRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if !probeUsable(probe) { + continue + } + if !hasPKIXUsage(t) { + continue + } + tested++ + if probe.ChainValid == nil || !*probe.ChainValid { + out = append(out, sdk.CheckState{ + Status: sdk.StatusCrit, + Code: "dane_pkix_chain_invalid", + Subject: targetSubject(t), + Message: "Usage 0/1 requires a publicly-trusted chain, but the certificate chain did not validate against system roots.", + Meta: targetMeta(t), + }) + } + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_pkix_chain_valid_skipped", + Message: "No endpoint publishes PKIX-dependent TLSA usages (0/1).", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_pkix_chain_valid_ok", + Message: "Every endpoint with PKIX-dependent usages presents a publicly-trusted chain.", + }} + } + return out +} diff --git a/checker/rules_probe.go b/checker/rules_probe.go new file mode 100644 index 0000000..c9b322c --- /dev/null +++ b/checker/rules_probe.go @@ -0,0 +1,61 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// probeAvailableRule reports whether the downstream TLS checker has probed +// every endpoint we published. Absent probes are common immediately after a +// new TLSA record is published and should not flap the service red. +type probeAvailableRule struct{} + +func (r *probeAvailableRule) Name() string { return "dane.probe_available" } +func (r *probeAvailableRule) Description() string { + return "Verifies a TLS probe is available for every DANE endpoint so the chain can be compared to TLSA records." +} + +func (r *probeAvailableRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + if rc.warn != "" { + return []sdk.CheckState{{ + Status: sdk.StatusError, + Code: "dane_observation_warning", + Message: rc.warn, + }} + } + if len(rc.data.Targets) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_probe_available_skipped", + Message: "No DANE endpoints to probe.", + }} + } + out := make([]sdk.CheckState, 0, len(rc.data.Targets)) + for _, t := range rc.data.Targets { + subj := targetSubject(t) + meta := targetMeta(t) + if rc.probes[t.Ref] == nil { + out = append(out, sdk.CheckState{ + Status: sdk.StatusUnknown, + Code: "dane_no_probe", + Subject: subj, + Message: "No TLS probe available yet for this endpoint; re-evaluate after the next checker-tls cycle.", + Meta: meta, + }) + continue + } + out = append(out, sdk.CheckState{ + Status: sdk.StatusOK, + Code: "dane_probe_available_ok", + Subject: subj, + Message: "TLS probe available for this endpoint.", + Meta: meta, + }) + } + return out +} diff --git a/checker/rules_records.go b/checker/rules_records.go new file mode 100644 index 0000000..d84c039 --- /dev/null +++ b/checker/rules_records.go @@ -0,0 +1,36 @@ +package checker + +import ( + "context" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// hasRecordsRule reports whether the TLSAs service declares any TLSA record +// at all. Without records there is nothing for DANE to validate. +type hasRecordsRule struct{} + +func (r *hasRecordsRule) Name() string { return "dane.has_records" } +func (r *hasRecordsRule) Description() string { + return "Verifies that at least one TLSA record is declared on the service." +} + +func (r *hasRecordsRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + if len(rc.data.Targets) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_no_records", + Message: "No TLSA records declared on this service.", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_has_records_ok", + Message: "TLSA records are declared for all bound endpoints.", + Meta: map[string]any{"endpoints": len(rc.data.Targets)}, + }} +} diff --git a/checker/rules_usage.go b/checker/rules_usage.go new file mode 100644 index 0000000..86eac94 --- /dev/null +++ b/checker/rules_usage.go @@ -0,0 +1,86 @@ +package checker + +import ( + "context" + "strings" + + sdk "git.happydns.org/checker-sdk-go/checker" + tls "git.happydns.org/checker-tls/checker" +) + +// usageCoherentRule flags TLSA records whose declared usage contradicts the +// chain slot their hash actually matches, typically a record published as +// usage 1 or 3 (end-entity) whose hash in fact matches an intermediate. +// That is almost always a publisher error: the intended usage was 0 or 2. +type usageCoherentRule struct{} + +func (r *usageCoherentRule) Name() string { return "dane.usage_coherent" } +func (r *usageCoherentRule) Description() string { + return "Flags TLSA records whose declared usage does not match the chain slot they actually hash (e.g. usage 3 matching an intermediate)." +} + +func (r *usageCoherentRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState { + rc := loadRuleContext(ctx, obs) + if rc.err != nil { + return []sdk.CheckState{observationErrorState(rc.err)} + } + var out []sdk.CheckState + tested := 0 + for _, t := range rc.data.Targets { + probe := rc.probes[t.Ref] + if !probeUsable(probe) || len(probe.Chain) < 2 { + continue + } + tested++ + warn := suspiciousUsage(t, probe) + if warn != "" { + out = append(out, sdk.CheckState{ + Status: sdk.StatusWarn, + Code: "dane_usage_incoherent", + Subject: targetSubject(t), + Message: warn, + Meta: targetMeta(t), + }) + } + } + if len(out) == 0 { + if tested == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusUnknown, + Code: "dane_usage_coherent_skipped", + Message: "No multi-cert chain probed yet; cannot assess usage coherence.", + }} + } + return []sdk.CheckState{{ + Status: sdk.StatusOK, + Code: "dane_usage_coherent_ok", + Message: "End-entity TLSA records match end-entity certificates on every probed chain.", + }} + } + return out +} + +// suspiciousUsage returns a human-readable hint when a record hash matches a +// chain slot that contradicts its declared usage (e.g. usage 3 whose hash +// actually matches the intermediate), almost always a publisher error. Used +// by both usageCoherentRule and the HTML report. +func suspiciousUsage(t TargetResult, p *tls.TLSProbe) string { + if p == nil || len(p.Chain) < 2 { + return "" + } + for _, r := range t.Records { + if r.Usage != UsageDANEEE && r.Usage != UsagePKIXEE { + continue + } + for _, c := range p.Chain[1:] { + cand, err := recordCandidate(r, c) + if err != nil { + continue + } + if strings.EqualFold(cand, r.Certificate) { + return "A record declared with usage 1/3 (end-entity) actually matches an intermediate certificate. It should probably use usage 0 or 2 (trust-anchor) instead." + } + } + } + return "" +} diff --git a/checker/types.go b/checker/types.go new file mode 100644 index 0000000..75bfcbc --- /dev/null +++ b/checker/types.go @@ -0,0 +1,106 @@ +// Package checker implements the DANE/TLSA checker for happyDomain. +// +// This checker is bound to the svcs.TLSAs service. Collect takes the TLSA +// records the user published (or plans to publish) for the service, derives +// one TLS endpoint per distinct (port, proto, base name), and declares those +// endpoints as tls.endpoint.v1 discovery entries. checker-tls then probes +// them; on the next evaluation, this checker reads the related TLS probes +// via obs.GetRelated and verifies each TLSA record matches the certificate +// chain the probe observed. +// +// The user-visible contract matches what DANE deployers expect: +// +// - Usage 0 (PKIX-TA) / 1 (PKIX-EE): also require the PKIX chain to be +// publicly trusted. +// - Usage 2 (DANE-TA) / 3 (DANE-EE): trust the TLSA as the anchor; PKIX +// validity is informational. +// - Selector 0 (Cert) / 1 (SPKI) and matching types 0/1/2 (Full/SHA-256/ +// SHA-512) are matched against the chain slot implied by the usage. +package checker + +import "time" + +// ObservationKeyDANE is the observation key this checker writes. +const ObservationKeyDANE = "dane_checks" + +// Option ids on CheckerOptions. +const ( + // OptionService is auto-filled by the happyDomain host with the + // svcs.TLSAs service payload this checker is bound to. + OptionService = "service" + + // OptionDomain is auto-filled with the domain apex. TLSA owner names + // in the service are relative to this apex. + OptionDomain = "domain_name" + + // OptionSubdomain is the optional sub-zone under which the TLSAs + // service lives (matches the svcs.TLSAs analyzer's subdomain bucket). + OptionSubdomain = "subdomain" + + // OptionProbeTimeoutMs is how long each underlying TLS probe is allowed. + // Passed through to checker-tls verbatim via the discovery entry options. + OptionProbeTimeoutMs = "probeTimeoutMs" + + // OptionSTARTTLS is an optional per-endpoint STARTTLS hint keyed by + // "/" → RFC 6335 service name (e.g. "25/tcp" → "smtp", + // "587/tcp" → "submission"). Common ports auto-map via a built-in table. + OptionSTARTTLS = "starttls" +) + +// Severity constants mirror checker-tls. +const ( + SeverityCrit = "crit" + SeverityWarn = "warn" + SeverityInfo = "info" +) + +// TLSA field enum constants (RFC 6698 §2.1). +const ( + UsagePKIXTA uint8 = 0 + UsagePKIXEE uint8 = 1 + UsageDANETA uint8 = 2 + UsageDANEEE uint8 = 3 + + SelectorCert uint8 = 0 + SelectorSPKI uint8 = 1 + + MatchingFull uint8 = 0 + MatchingSHA256 uint8 = 1 + MatchingSHA512 uint8 = 2 +) + +// DANEData is the full payload the checker writes under ObservationKeyDANE. +type DANEData struct { + // Targets is one entry per (port, proto, basename) triplet extracted + // from the TLSAs service. + Targets []TargetResult `json:"targets"` + CollectedAt time.Time `json:"collected_at"` +} + +// TargetResult groups all TLSA records declared on a single endpoint and +// carries enough context to render an actionable HTML row per endpoint. +type TargetResult struct { + // Owner is the fully qualified DANE owner name (_._.). + Owner string `json:"owner"` + // Host is the connection target (typically the base name the TLSA + // records live under, or its MX/SRV target when relevant). + Host string `json:"host"` + Port uint16 `json:"port"` + Proto string `json:"proto"` + STARTTLS string `json:"starttls,omitempty"` + + // Ref ties this target to the tls.endpoint.v1 discovery entry the + // checker emitted, so the rule can pick the matching RelatedObservation. + Ref string `json:"ref"` + + // Records are the TLSA records declared for this endpoint. + Records []TLSARecord `json:"records"` +} + +// TLSARecord is a user-facing view of a single dns.TLSA record. +type TLSARecord struct { + Usage uint8 `json:"usage"` + Selector uint8 `json:"selector"` + MatchingType uint8 `json:"matching_type"` + Certificate string `json:"certificate"` // lowercase hex +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..318b0a6 --- /dev/null +++ b/go.mod @@ -0,0 +1,17 @@ +module git.happydns.org/checker-dane + +go 1.25.0 + +require ( + git.happydns.org/checker-sdk-go v1.4.0 + git.happydns.org/checker-tls v0.6.1 + github.com/miekg/dns v1.1.72 +) + +require ( + golang.org/x/mod v0.31.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/tools v0.40.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f00160f --- /dev/null +++ b/go.sum @@ -0,0 +1,18 @@ +git.happydns.org/checker-sdk-go v1.4.0 h1:sO8EnF3suhNgYLRsbmCZWJOymH/oNMrOUqj3FEzJArs= +git.happydns.org/checker-sdk-go v1.4.0/go.mod h1:aNAcfYFfbhvH9kJhE0Njp5GX0dQbxdRB0rJ0KvSC5nI= +git.happydns.org/checker-tls v0.6.1 h1:YJp9Q+1aJZ6wATyUZbRh67ZtERN6Mp4Sje8ld2dNFuo= +git.happydns.org/checker-tls v0.6.1/go.mod h1:9tpnxg0iOwS+7If64DRG1jqYonUAgxOBuxwfF5mVkL4= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= +github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= diff --git a/main.go b/main.go new file mode 100644 index 0000000..f0fdce8 --- /dev/null +++ b/main.go @@ -0,0 +1,23 @@ +package main + +import ( + "flag" + "log" + + dane "git.happydns.org/checker-dane/checker" + "git.happydns.org/checker-sdk-go/checker/server" +) + +var Version = "custom-build" + +var listenAddr = flag.String("listen", ":8080", "HTTP listen address") + +func main() { + flag.Parse() + dane.Version = Version + + srv := server.New(dane.Provider()) + if err := srv.ListenAndServe(*listenAddr); err != nil { + log.Fatalf("server error: %v", err) + } +} diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000..71afa4f --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,16 @@ +// Command plugin is the happyDomain plugin entrypoint for the DANE/TLSA +// checker. Built with -buildmode=plugin and loaded at runtime. +package main + +import ( + dane "git.happydns.org/checker-dane/checker" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +var Version = "custom-build" + +func NewCheckerPlugin() (*sdk.CheckerDefinition, sdk.ObservationProvider, error) { + dane.Version = Version + prvd := dane.Provider() + return prvd.(sdk.CheckerDefinitionProvider).Definition(), prvd, nil +}