Initial commit
Generic SRV records checker for happyDomain.
For each SRV record attached to an svcs.UnknownSRV service, the checker
resolves every target and probes reachability:
- DNS resolution (A/AAAA), CNAME detection (RFC 2782 violation),
null-target detection (RFC 2782 "service explicitly unavailable")
- TCP connect to target:port for _tcp SRVs
- UDP probe for _udp SRVs, using ICMP port-unreachable detection
The checker also publishes TLS endpoints (host, port, SNI) for every
SRV target hitting a well-known direct-TLS port (443, 465, 636, 853,
993, 995, 5061, 5223, …) via the EndpointDiscoverer SDK interface, so
a downstream TLS checker can pick them up.
The HTML report groups records as cards and surfaces the most common
failure scenarios (DNS failure, CNAME target, TCP unreachable,
null-target) at the top with remediation guidance.
This commit is contained in:
commit
90f1b4943f
27 changed files with 2809 additions and 0 deletions
228
checker/collect.go
Normal file
228
checker/collect.go
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
// This file is part of the happyDomain (R) project.
|
||||
// Copyright (c) 2020-2026 happyDomain
|
||||
// Authors: Pierre-Olivier Mercier, et al.
|
||||
//
|
||||
// This program is offered under a commercial and under the AGPL license.
|
||||
// For commercial licensing, contact us at <contact@happydomain.org>.
|
||||
//
|
||||
// For AGPL licensing:
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
sdk "git.happydns.org/checker-sdk-go/checker"
|
||||
happydns "git.happydns.org/happyDomain/model"
|
||||
)
|
||||
|
||||
// We decode SRV records by hand (instead of importing miekg/dns) so the
|
||||
// checker stays light and its build surface minimal.
|
||||
type unknownSRVPayload struct {
|
||||
Records []struct {
|
||||
Hdr struct {
|
||||
Name string `json:"Name"`
|
||||
} `json:"Hdr"`
|
||||
Priority uint16 `json:"Priority"`
|
||||
Weight uint16 `json:"Weight"`
|
||||
Port uint16 `json:"Port"`
|
||||
Target string `json:"Target"`
|
||||
} `json:"srv"`
|
||||
}
|
||||
|
||||
func (p *srvProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
|
||||
svcMsg, ok := sdk.GetOption[happydns.ServiceMessage](opts, "service")
|
||||
if !ok {
|
||||
return p.collectFallback(ctx, opts)
|
||||
}
|
||||
if svcMsg.Type != "svcs.UnknownSRV" {
|
||||
return nil, fmt.Errorf("service type is %q, expected svcs.UnknownSRV", svcMsg.Type)
|
||||
}
|
||||
|
||||
var payload unknownSRVPayload
|
||||
if err := json.Unmarshal(svcMsg.Service, &payload); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode UnknownSRV: %w", err)
|
||||
}
|
||||
if len(payload.Records) == 0 {
|
||||
return nil, fmt.Errorf("service contains no SRV records")
|
||||
}
|
||||
|
||||
subdomain, _ := sdk.GetOption[string](opts, "subdomain")
|
||||
domain, _ := sdk.GetOption[string](opts, "domain")
|
||||
|
||||
serviceDomain := strings.TrimSuffix(subdomain, ".")
|
||||
if domain != "" {
|
||||
if serviceDomain != "" {
|
||||
serviceDomain += "." + strings.TrimSuffix(domain, ".")
|
||||
} else {
|
||||
serviceDomain = strings.TrimSuffix(domain, ".")
|
||||
}
|
||||
}
|
||||
|
||||
tcpTimeout := durationOpt(opts, "tcpTimeout", 3000)
|
||||
udpTimeout := durationOpt(opts, "udpTimeout", 2000)
|
||||
|
||||
data := &SRVData{
|
||||
ServiceDomain: serviceDomain,
|
||||
Records: make([]SRVRecord, 0, len(payload.Records)),
|
||||
}
|
||||
|
||||
for _, r := range payload.Records {
|
||||
owner := strings.TrimSuffix(r.Hdr.Name, ".")
|
||||
svc, proto := parseOwner(owner, serviceDomain)
|
||||
|
||||
rec := SRVRecord{
|
||||
Service: svc,
|
||||
Proto: proto,
|
||||
Owner: owner,
|
||||
Target: strings.TrimSuffix(r.Target, "."),
|
||||
Port: r.Port,
|
||||
Priority: r.Priority,
|
||||
Weight: r.Weight,
|
||||
}
|
||||
resolveAndProbe(ctx, &rec, tcpTimeout, udpTimeout)
|
||||
data.Records = append(data.Records, rec)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Owners that don't match _svc._proto have no proto we can trust, so we
|
||||
// skip probing rather than silently defaulting to TCP and reporting a misleading status.
|
||||
func resolveAndProbe(ctx context.Context, rec *SRVRecord, tcpTimeout, udpTimeout time.Duration) {
|
||||
// RFC 2782: "." target means "service decidedly not available".
|
||||
if rec.Target == "" || rec.Target == "." {
|
||||
rec.IsNullTarget = true
|
||||
return
|
||||
}
|
||||
|
||||
// CNAME detection (RFC 2782 §"Usage rules": target MUST be a name that
|
||||
// resolves to A/AAAA records directly, not a CNAME).
|
||||
if cname, err := net.DefaultResolver.LookupCNAME(ctx, rec.Target); err == nil {
|
||||
canon := strings.TrimSuffix(cname, ".")
|
||||
if canon != "" && !strings.EqualFold(canon, rec.Target) {
|
||||
rec.IsCNAME = true
|
||||
rec.CNAMEChain = []string{rec.Target, canon}
|
||||
}
|
||||
}
|
||||
|
||||
ips, err := net.DefaultResolver.LookupIPAddr(ctx, rec.Target)
|
||||
if err != nil {
|
||||
rec.ResolveError = err.Error()
|
||||
return
|
||||
}
|
||||
for _, ip := range ips {
|
||||
rec.Addresses = append(rec.Addresses, ip.IP.String())
|
||||
}
|
||||
|
||||
for _, addr := range rec.Addresses {
|
||||
hostport := net.JoinHostPort(addr, strconv.Itoa(int(rec.Port)))
|
||||
switch rec.Proto {
|
||||
case protoTCP:
|
||||
rec.Probes = append(rec.Probes, probeTCP(ctx, hostport, tcpTimeout))
|
||||
case protoUDP:
|
||||
rec.Probes = append(rec.Probes, probeUDP(ctx, hostport, udpTimeout))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseOwner(owner, serviceDomain string) (svc, proto string) {
|
||||
// Returns ("", "") when the owner does not match: callers must treat
|
||||
// that as "unknown" and skip proto-specific probing rather than guessing.
|
||||
s := strings.TrimSuffix(owner, "."+serviceDomain)
|
||||
parts := strings.Split(s, ".")
|
||||
if len(parts) >= 2 && strings.HasPrefix(parts[0], "_") && strings.HasPrefix(parts[1], "_") {
|
||||
return strings.TrimPrefix(parts[0], "_"), strings.TrimPrefix(parts[1], "_")
|
||||
}
|
||||
return "", ""
|
||||
}
|
||||
|
||||
func durationOpt(opts sdk.CheckerOptions, key string, defMs int) time.Duration {
|
||||
ms := defMs
|
||||
if v, ok := opts[key]; ok {
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
ms = int(n)
|
||||
case int:
|
||||
ms = n
|
||||
}
|
||||
}
|
||||
if ms < 100 {
|
||||
ms = 100
|
||||
}
|
||||
if ms > 60000 {
|
||||
ms = 60000
|
||||
}
|
||||
return time.Duration(ms) * time.Millisecond
|
||||
}
|
||||
|
||||
func probeTCP(ctx context.Context, hostport string, timeout time.Duration) ProbeResult {
|
||||
pr := ProbeResult{Address: hostport, Proto: protoTCP}
|
||||
start := time.Now()
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
conn, err := (&net.Dialer{}).DialContext(ctx, protoTCP, hostport)
|
||||
pr.LatencyMs = float64(time.Since(start).Microseconds()) / 1000.0
|
||||
if err != nil {
|
||||
pr.Error = err.Error()
|
||||
return pr
|
||||
}
|
||||
_ = conn.Close()
|
||||
pr.Connected = true
|
||||
return pr
|
||||
}
|
||||
|
||||
func probeUDP(ctx context.Context, hostport string, timeout time.Duration) ProbeResult {
|
||||
pr := ProbeResult{Address: hostport, Proto: protoUDP}
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
conn, err := (&net.Dialer{}).DialContext(ctx, protoUDP, hostport)
|
||||
if err != nil {
|
||||
pr.Error = err.Error()
|
||||
return pr
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Send a single zero byte. If the host has nothing listening and returns
|
||||
// ICMP port-unreachable, a subsequent Read will fail with "connection
|
||||
// refused". Silent drops (firewalled) remain indistinguishable from a
|
||||
// working service, report as "reachable (no response)".
|
||||
_ = conn.SetDeadline(time.Now().Add(timeout))
|
||||
if _, err := conn.Write([]byte{0}); err != nil {
|
||||
pr.Error = err.Error()
|
||||
return pr
|
||||
}
|
||||
buf := make([]byte, 1)
|
||||
_, err = conn.Read(buf)
|
||||
if err != nil {
|
||||
if ne, ok := err.(net.Error); ok && ne.Timeout() {
|
||||
// No ICMP unreachable came back: host probably accepts UDP,
|
||||
// or packets are silently dropped. Treat as "reachable".
|
||||
pr.Connected = true
|
||||
pr.Error = "no UDP response (host may still be reachable)"
|
||||
return pr
|
||||
}
|
||||
pr.Error = err.Error()
|
||||
return pr
|
||||
}
|
||||
pr.Connected = true
|
||||
return pr
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue