checker-tls/checker/collect.go
Pierre-Olivier Mercier 03af5615ae
All checks were successful
continuous-integration/drone/tag Build is passing
continuous-integration/drone/push Build is passing
checker: implement ShareKey to mutualise TLS probes across targets
A TLS probe result depends only on the set of endpoints actually dialed and
the probe knobs, never on which domain or service published them: the
observation is a map keyed by each endpoint's contract Ref
(host|port|effective SNI|STARTTLS|require). Implement sdk.ObservationSharer so
the host dials a host:port once and serves every target that resolves to the
same endpoint set, instead of re-handshaking per record. This is the
highest-value case among the checkers, since dane, xmpp, srv, dav, … all
funnel their endpoints into this single checker.

The share key sorts the endpoint Refs and folds in the probe timeout and the
cipher-enumeration flag, since both change what is collected (a tighter
timeout can fail a slow handshake; enumeration adds the Enum block). An empty
or unparseable entry set yields "" so the host falls back to per-target
caching.
2026-06-18 15:27:54 +09:00

129 lines
4.3 KiB
Go

package checker
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
"sort"
"strings"
"sync"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
"git.happydns.org/checker-tls/contract"
)
func (p *tlsProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) {
raw, ok := sdk.GetOption[[]sdk.DiscoveryEntry](opts, OptionEndpoints)
if !ok {
return nil, fmt.Errorf("no discovery entries in options: did the host wire AutoFillDiscoveryEntries?")
}
timeoutMs := sdk.GetIntOption(opts, OptionProbeTimeoutMs, DefaultProbeTimeoutMs)
if timeoutMs <= 0 {
timeoutMs = DefaultProbeTimeoutMs
}
timeout := time.Duration(timeoutMs) * time.Millisecond
enumerate := sdk.GetBoolOption(opts, OptionEnumerateCiphers, false)
entries, warnings := contract.ParseEntries(raw)
for _, w := range warnings {
log.Printf("checker-tls: discarding malformed entry: %v", w)
}
// An empty entry set is not an error: it is the steady state on any
// target where no producer has published yet, and the first run after
// a fresh publication when the producer hasn't finished its own cycle.
// The rule surfaces this as StatusUnknown rather than StatusError so a
// freshly-enrolled domain doesn't flap red.
if len(entries) == 0 {
return &TLSData{Probes: map[string]TLSProbe{}, CollectedAt: time.Now()}, nil
}
probes := make(map[string]TLSProbe, len(entries))
var mu sync.Mutex
var wg sync.WaitGroup
sem := make(chan struct{}, MaxConcurrentProbes)
dispatch:
for _, e := range entries {
select {
case sem <- struct{}{}:
case <-ctx.Done():
break dispatch
}
wg.Add(1)
go func() {
defer wg.Done()
defer func() { <-sem }()
pr := probe(ctx, e.Endpoint, timeout)
log.Printf("checker-tls: %s %s:%d → tls=%s handshake_ok=%t elapsed=%dms err=%q",
pr.Type, pr.Host, pr.Port, pr.TLSVersion, pr.TLSHandshakeOK, pr.ElapsedMS, pr.Error)
if enumerate && pr.TLSHandshakeOK {
enumRes, skipReason := enumerateEndpoint(ctx, e.Endpoint, enumerationBudget)
switch {
case enumRes != nil && enumRes.Skipped != "":
pr.Enum = enumRes
log.Printf("checker-tls: enum %s:%d → error: %s (duration=%dms)",
pr.Host, pr.Port, enumRes.Skipped, enumRes.DurationMS)
case enumRes != nil:
pr.Enum = enumRes
log.Printf("checker-tls: enum %s:%d → versions=%d duration=%dms",
pr.Host, pr.Port, len(enumRes.Versions), enumRes.DurationMS)
case skipReason != "":
log.Printf("checker-tls: enum %s:%d → skipped: %s",
pr.Host, pr.Port, skipReason)
}
}
mu.Lock()
probes[e.Ref] = pr
mu.Unlock()
}()
}
wg.Wait()
return &TLSData{
Probes: probes,
CollectedAt: time.Now(),
}, nil
}
// ShareKey implements sdk.ObservationSharer. A TLS probe result depends only on
// the set of endpoints actually dialed and the probe knobs, never on which
// domain or service published them: the observation is a map keyed by each
// endpoint's contract Ref (host|port|effective SNI|STARTTLS|require), so two
// targets that resolve to the same endpoint set produce identical probes. This
// lets the host dial a host:port once and serve every target that points at it
// instead of re-handshaking per record — the highest-value case here, since
// dane, xmpp, srv, dav, … all funnel endpoints into this single checker.
//
// The probe timeout and cipher-enumeration flag are folded in because they
// change what is collected (a tighter timeout can fail a slow handshake;
// enumeration adds the Enum block). Inputs that yield no probable endpoint
// return "" so the host falls back to the default per-target caching.
func (p *tlsProvider) ShareKey(opts sdk.CheckerOptions) (string, error) {
raw, ok := sdk.GetOption[[]sdk.DiscoveryEntry](opts, OptionEndpoints)
if !ok {
return "", nil
}
entries, _ := contract.ParseEntries(raw)
if len(entries) == 0 {
return "", nil
}
refs := make([]string, 0, len(entries))
for _, e := range entries {
refs = append(refs, e.Ref)
}
sort.Strings(refs)
timeoutMs := sdk.GetIntOption(opts, OptionProbeTimeoutMs, DefaultProbeTimeoutMs)
if timeoutMs <= 0 {
timeoutMs = DefaultProbeTimeoutMs
}
enumerate := sdk.GetBoolOption(opts, OptionEnumerateCiphers, false)
h := sha256.Sum256(fmt.Appendf(nil, "%d|%t|%s", timeoutMs, enumerate, strings.Join(refs, ",")))
return "tls:" + hex.EncodeToString(h[:8]), nil
}