Split monolithic rule into per-test rules, collect gathers facts only

This commit is contained in:
nemunaire 2026-04-25 23:14:42 +07:00
commit 4177fcdc7b
14 changed files with 758 additions and 259 deletions

View file

@ -119,7 +119,7 @@ Observation data written under `tls_probes`:
}
```
The map is keyed by `contract.Ref(ep)` the same value the host exposes
The map is keyed by `contract.Ref(ep)`, the same value the host exposes
on the lineage side so that a consumer knows which probe corresponds to
which entry it originally published.
@ -129,14 +129,14 @@ existing downstream parsers.
## Issues reported
- `tcp_unreachable` dial failed.
- `handshake_failed` TLS handshake or STARTTLS upgrade failed.
- `starttls_not_offered` server didn't advertise STARTTLS. Severity is
- `tcp_unreachable`, dial failed.
- `handshake_failed`, TLS handshake or STARTTLS upgrade failed.
- `starttls_not_offered`, server didn't advertise STARTTLS. Severity is
`crit` when `TLSEndpoint.RequireSTARTTLS` is `true`, `warn` otherwise.
- `chain_invalid` leaf does not chain to a system-trusted root.
- `hostname_mismatch` cert SANs don't cover the SNI.
- `expired` / `expiring_soon` cert expiry posture.
- `weak_tls_version` negotiated TLS < 1.2.
- `chain_invalid`, leaf does not chain to a system-trusted root.
- `hostname_mismatch`, cert SANs don't cover the SNI.
- `expired` / `expiring_soon`, cert expiry posture.
- `weak_tls_version`, negotiated TLS < 1.2.
## Options

View file

@ -47,8 +47,8 @@ func (p *tlsProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any
defer wg.Done()
defer func() { <-sem }()
pr := probe(ctx, e.Endpoint, timeout)
log.Printf("checker-tls: %s %s:%d → tls=%s issues=%d elapsed=%dms err=%q",
pr.Type, pr.Host, pr.Port, pr.TLSVersion, len(pr.Issues), pr.ElapsedMS, pr.Error)
log.Printf("checker-tls: %s %s:%d → tls=%s handshake_ok=%t elapsed=%dms err=%q",
pr.Type, pr.Host, pr.Port, pr.TLSVersion, pr.TLSHandshakeOK, pr.ElapsedMS, pr.Error)
mu.Lock()
probes[e.Ref] = pr
mu.Unlock()

View file

@ -40,9 +40,7 @@ func Definition() *sdk.CheckerDefinition {
},
},
},
Rules: []sdk.CheckRule{
Rule(),
},
Rules: Rules(),
Interval: &sdk.CheckIntervalSpec{
Min: 6 * time.Hour,
Max: 7 * 24 * time.Hour,

View file

@ -58,8 +58,11 @@ func probeTypeString(ep contract.TLSEndpoint) string {
// probe performs a TLS handshake (or STARTTLS upgrade + handshake) on the
// given endpoint and returns a populated TLSProbe. It never returns an error:
// transport/handshake failures are recorded on the probe so the caller can
// still surface them in the report.
// transport/handshake failures are recorded on the probe as raw fields so
// rules can classify them.
//
// This function MUST NOT decide severity or pass/fail: it only gathers
// observation data. All judgement happens in CheckRules (see rules_*.go).
func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration) TLSProbe {
start := time.Now()
host := strings.TrimSuffix(ep.Host, ".")
@ -70,11 +73,13 @@ func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration)
}
p := TLSProbe{
Host: host,
Port: ep.Port,
Endpoint: addr,
Type: probeTypeString(ep),
SNI: sni,
Host: host,
Port: ep.Port,
Endpoint: addr,
Type: probeTypeString(ep),
SNI: sni,
RequireSTARTTLS: ep.RequireSTARTTLS,
STARTTLSDialect: ep.STARTTLS,
}
dialCtx, cancel := context.WithTimeout(ctx, timeout)
@ -83,13 +88,8 @@ func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration)
d := &net.Dialer{}
conn, err := d.DialContext(dialCtx, "tcp", addr)
if err != nil {
p.TCPError = err.Error()
p.Error = "dial: " + err.Error()
p.Issues = append(p.Issues, Issue{
Code: "tcp_unreachable",
Severity: SeverityCrit,
Message: fmt.Sprintf("Cannot open TCP connection to %s: %v", addr, err),
Fix: "Check DNS, firewall, and that the service listens on this port.",
})
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
@ -101,23 +101,28 @@ func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration)
tlsConn, err := handshake(conn, ep, sni)
if err != nil {
p.HandshakeError = err.Error()
p.Error = err.Error()
p.Issues = append(p.Issues, classifyHandshakeError(ep, err))
if ep.STARTTLS != "" && isStartTLSUnsupported(err) {
p.STARTTLSNotOffered = true
}
if errors.Is(err, errUnsupportedStartTLSProto) {
p.STARTTLSUnsupportedProto = true
}
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
defer tlsConn.Close()
p.TLSHandshakeOK = true
state := tlsConn.ConnectionState()
p.TLSVersionNum = state.Version
p.TLSVersion = tls.VersionName(state.Version)
p.CipherSuite = tls.CipherSuiteName(state.CipherSuite)
p.CipherSuiteID = state.CipherSuite
if len(state.PeerCertificates) == 0 {
p.Issues = append(p.Issues, Issue{
Code: "no_peer_cert",
Severity: SeverityCrit,
Message: "Server presented no certificate.",
})
p.NoPeerCert = true
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
@ -130,16 +135,16 @@ func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration)
p.IssuerAKI = strings.ToUpper(hex.EncodeToString(leaf.AuthorityKeyId))
}
p.Subject = leaf.Subject.CommonName
p.DNSNames = append(p.DNSNames, leaf.DNSNames...)
p.DNSNames = leaf.DNSNames
p.Chain = buildChain(state.PeerCertificates)
hostnameMatch := leaf.VerifyHostname(sni) == nil
p.HostnameMatch = &hostnameMatch
// Chain verification against system roots, using intermediates presented
// by the server. We run this independently from Go's tls.Config
// verification so we can report a dedicated "chain invalid" issue rather
// than failing the whole handshake.
// by the server. Running it separately from tls.Config verification
// means we can record it as a raw observation rather than aborting the
// handshake, rules classify it afterwards.
intermediates := x509.NewCertPool()
for _, c := range state.PeerCertificates[1:] {
intermediates.AddCert(c)
@ -152,48 +157,8 @@ func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration)
})
chainValid := verifyErr == nil
p.ChainValid = &chainValid
if !chainValid {
msg := "Invalid certificate chain"
if verifyErr != nil {
msg = "Invalid certificate chain: " + verifyErr.Error()
}
p.Issues = append(p.Issues, Issue{
Code: "chain_invalid",
Severity: SeverityCrit,
Message: msg,
Fix: "Serve the full intermediate chain and ensure the root is trusted.",
})
}
if !hostnameMatch {
p.Issues = append(p.Issues, Issue{
Code: "hostname_mismatch",
Severity: SeverityCrit,
Message: fmt.Sprintf("Certificate does not cover %q (SANs: %s)", sni, strings.Join(leaf.DNSNames, ", ")),
Fix: "Re-issue the certificate with a matching SAN.",
})
}
if leaf.NotAfter.Before(now) {
p.Issues = append(p.Issues, Issue{
Code: "expired",
Severity: SeverityCrit,
Message: "Certificate expired on " + leaf.NotAfter.Format(time.RFC3339),
Fix: "Renew the certificate.",
})
} else if leaf.NotAfter.Sub(now) < 14*24*time.Hour {
p.Issues = append(p.Issues, Issue{
Code: "expiring_soon",
Severity: SeverityWarn,
Message: "Certificate expires in less than 14 days (" + leaf.NotAfter.Format(time.RFC3339) + ")",
Fix: "Renew before expiry.",
})
}
if state.Version < tls.VersionTLS12 {
p.Issues = append(p.Issues, Issue{
Code: "weak_tls_version",
Severity: SeverityWarn,
Message: "Negotiated TLS version " + p.TLSVersion + " is below the recommended TLS 1.2.",
Fix: "Disable TLS 1.0/1.1 on the server.",
})
if verifyErr != nil {
p.ChainVerifyErr = verifyErr.Error()
}
p.ElapsedMS = time.Since(start).Milliseconds()
@ -202,8 +167,8 @@ func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration)
// handshake performs STARTTLS upgrade (when ep.STARTTLS is non-empty) and
// then a TLS handshake. InsecureSkipVerify is true on purpose: we verify
// the chain separately in probe so an invalid chain becomes a structured
// Issue rather than aborting the handshake.
// the chain separately in probe so an invalid chain becomes a raw
// observation rather than aborting the handshake.
func handshake(conn net.Conn, ep contract.TLSEndpoint, sni string) (*tls.Conn, error) {
cfg := &tls.Config{
ServerName: sni,
@ -220,7 +185,7 @@ func handshake(conn net.Conn, ep contract.TLSEndpoint, sni string) (*tls.Conn, e
up, ok := starttlsUpgraders[ep.STARTTLS]
if !ok {
return nil, fmt.Errorf("unsupported starttls protocol %q", ep.STARTTLS)
return nil, fmt.Errorf("%w: %q", errUnsupportedStartTLSProto, ep.STARTTLS)
}
if err := up(conn, sni); err != nil {
return nil, fmt.Errorf("starttls-%s: %w", ep.STARTTLS, err)
@ -232,34 +197,10 @@ func handshake(conn net.Conn, ep contract.TLSEndpoint, sni string) (*tls.Conn, e
return tlsConn, nil
}
// classifyHandshakeError converts a dial/handshake error into a structured
// Issue, distinguishing "server doesn't offer STARTTLS" (which is opportunistic
// for some endpoints) from hard failures.
func classifyHandshakeError(ep contract.TLSEndpoint, err error) Issue {
msg := err.Error()
if ep.STARTTLS != "" && isStartTLSUnsupported(err) {
sev := SeverityWarn
if ep.RequireSTARTTLS {
sev = SeverityCrit
}
return Issue{
Code: "starttls_not_offered",
Severity: sev,
Message: fmt.Sprintf("Server on %s:%d does not advertise STARTTLS: %s", ep.Host, ep.Port, msg),
Fix: "Enable STARTTLS on the server or publish a direct-TLS endpoint.",
}
}
return Issue{
Code: "handshake_failed",
Severity: SeverityCrit,
Message: fmt.Sprintf("TLS handshake failed on %s:%d: %s", ep.Host, ep.Port, msg),
Fix: "Inspect the server's TLS configuration and certificate.",
}
}
var errStartTLSNotOffered = errors.New("starttls not advertised by server")
var (
errStartTLSNotOffered = errors.New("starttls not advertised by server")
errUnsupportedStartTLSProto = errors.New("unsupported starttls protocol")
)
func isStartTLSUnsupported(err error) bool {
return errors.Is(err, errStartTLSNotOffered)

View file

@ -60,11 +60,8 @@ func TestProbe_TCPUnreachable(t *testing.T) {
Port: uint16(addr.Port),
}, 1*time.Second)
if probe.Error == "" {
t.Errorf("expected an error for unreachable port")
}
if len(probe.Issues) == 0 || probe.Issues[0].Code != "tcp_unreachable" {
t.Errorf("expected tcp_unreachable issue, got %+v", probe.Issues)
if probe.TCPError == "" {
t.Errorf("expected a TCP error for unreachable port")
}
}

View file

@ -8,140 +8,81 @@ import (
sdk "git.happydns.org/checker-sdk-go/checker"
)
// Rule returns the rule that aggregates per-endpoint TLS probe outcomes into
// a single status for this checker run.
func Rule() sdk.CheckRule {
return &tlsRule{}
// Rules returns the full list of CheckRules exposed by the TLS checker.
// Each rule covers a single concern (reachability, handshake, chain, hostname,
// expiry, TLS version, STARTTLS advertisement, cipher suite, …) so the UI can
// surface a passing-list rather than a single aggregated code.
func Rules() []sdk.CheckRule {
return []sdk.CheckRule{
&endpointsDiscoveredRule{},
&reachabilityRule{},
&tlsHandshakeRule{},
&starttlsAdvertisedRule{},
&starttlsSupportedRule{},
&peerCertificateRule{},
&chainValidityRule{},
&hostnameMatchRule{},
&expiryRule{},
&tlsVersionRule{},
&cipherSuiteRule{},
}
}
type tlsRule struct{}
func (r *tlsRule) Name() string { return "tls_posture" }
func (r *tlsRule) Description() string {
return "Summarises TLS handshake, certificate validity, hostname match and expiry across all probed endpoints"
}
func (r *tlsRule) ValidateOptions(opts sdk.CheckerOptions) error {
return nil
}
func (r *tlsRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState {
// loadData fetches the TLS observation. On error, returns a single error
// state the caller should emit.
func loadData(ctx context.Context, obs sdk.ObservationGetter) (*TLSData, *sdk.CheckState) {
var data TLSData
if err := obs.Get(ctx, ObservationKeyTLSProbes, &data); err != nil {
return []sdk.CheckState{{
return nil, &sdk.CheckState{
Status: sdk.StatusError,
Message: fmt.Sprintf("Failed to read tls_probes: %v", err),
Code: "tls_observation_error",
}}
}
// Steady state when no producer has published entries for this target
// yet (or when the last producer run cleared them). Report Unknown so
// we don't flap red during the eventual-consistency window between a
// fresh enrollment and the first producer cycle.
if len(data.Probes) == 0 {
return []sdk.CheckState{{
Status: sdk.StatusUnknown,
Message: "No TLS endpoints have been discovered for this target yet",
Code: "tls_no_endpoints",
}}
Message: fmt.Sprintf("failed to load tls_probes observation: %v", err),
Code: "tls.observation_error",
}
}
return &data, nil
}
// sortedRefs returns the probe refs in deterministic order. Rules iterate
// this sorted list so CheckState output is stable.
func sortedRefs(data *TLSData) []string {
refs := make([]string, 0, len(data.Probes))
for ref := range data.Probes {
refs = append(refs, ref)
}
sort.Strings(refs)
out := make([]sdk.CheckState, 0, len(refs))
for _, ref := range refs {
p := data.Probes[ref]
out = append(out, evaluateProbe(p))
}
return out
return refs
}
// evaluateProbe distills a single TLSProbe into a CheckState. Subject is the
// probed endpoint so the host can correlate states across runs and surface
// them per-target in the UI. Message describes the finding only -- the UI
// renders Subject separately.
func evaluateProbe(p TLSProbe) sdk.CheckState {
subject := fmt.Sprintf("%s://%s", p.Type, p.Endpoint)
meta := map[string]any{
"type": p.Type,
"host": p.Host,
"port": p.Port,
"sni": p.SNI,
"issues": len(p.Issues),
// subjectOf formats the UI-facing subject for a single probe.
func subjectOf(p TLSProbe) string {
return fmt.Sprintf("%s://%s", p.Type, p.Endpoint)
}
// metaOf returns a compact meta map to attach to a CheckState.
func metaOf(p TLSProbe) map[string]any {
m := map[string]any{
"type": p.Type,
"host": p.Host,
"port": p.Port,
"sni": p.SNI,
}
if p.TLSVersion != "" {
meta["tls_version"] = p.TLSVersion
}
if !p.NotAfter.IsZero() {
meta["not_after"] = p.NotAfter
}
worst, critMsg, warnMsg := summarize(p.Issues)
switch worst {
case SeverityCrit:
return sdk.CheckState{
Status: sdk.StatusCrit,
Message: critMsg,
Code: "tls_critical",
Subject: subject,
Meta: meta,
}
case SeverityWarn:
return sdk.CheckState{
Status: sdk.StatusWarn,
Message: warnMsg,
Code: "tls_warning",
Subject: subject,
Meta: meta,
}
default:
msg := "TLS endpoint OK"
if p.TLSVersion != "" {
msg = fmt.Sprintf("TLS endpoint OK (%s)", p.TLSVersion)
}
return sdk.CheckState{
Status: sdk.StatusOK,
Message: msg,
Code: "tls_ok",
Subject: subject,
Meta: meta,
}
m["tls_version"] = p.TLSVersion
}
return m
}
// summarize walks the issues once and returns (worst severity, first
// critical message, first warning message). Picking the messages during the
// same pass avoids a second iteration in the caller.
func summarize(issues []Issue) (worst, firstCrit, firstWarn string) {
for _, is := range issues {
msg := is.Message
if msg == "" {
msg = is.Code
}
switch is.Severity {
case SeverityCrit:
worst = SeverityCrit
if firstCrit == "" {
firstCrit = msg
}
case SeverityWarn:
if worst == "" || worst == SeverityInfo {
worst = SeverityWarn
}
if firstWarn == "" {
firstWarn = msg
}
case SeverityInfo:
if worst == "" {
worst = SeverityInfo
}
}
}
return
// passState / infoState / unknownState helpers.
func passState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusOK, Code: code, Message: message}
}
func unknownState(code, message string) sdk.CheckState {
return sdk.CheckState{Status: sdk.StatusUnknown, Code: code, Message: message}
}
// emptyCaseState returns a single state describing "no probes to evaluate".
// Rules call this when len(data.Probes) == 0 to avoid returning an empty
// slice (see CheckRule.Evaluate contract).
func emptyCaseState(code string) sdk.CheckState {
return unknownState(code, "No TLS endpoints have been discovered for this target yet.")
}

View file

@ -0,0 +1,233 @@
package checker
import (
"context"
"fmt"
"strings"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// peerCertificateRule flags successful handshakes in which the server sent
// no certificate. This is distinct from chain validity: if no cert was sent,
// hostname/chain/expiry cannot be evaluated.
type peerCertificateRule struct{}
func (r *peerCertificateRule) Name() string { return "tls.peer_certificate_present" }
func (r *peerCertificateRule) Description() string {
return "Verifies the server presented a certificate during the TLS handshake."
}
func (r *peerCertificateRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.peer_certificate_present.no_endpoints")}
}
var out []sdk.CheckState
anyHandshake := false
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if !p.TLSHandshakeOK {
continue
}
anyHandshake = true
if !p.NoPeerCert {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "tls.peer_certificate_present.missing",
Subject: subjectOf(p),
Message: fmt.Sprintf("Server on %s completed the handshake but presented no certificate.", p.Endpoint),
Meta: metaOf(p),
})
}
if !anyHandshake {
return []sdk.CheckState{unknownState(
"tls.peer_certificate_present.skipped",
"No endpoint completed a TLS handshake.",
)}
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.peer_certificate_present.ok",
"Every endpoint presented a certificate.",
)}
}
return out
}
// chainValidityRule flags invalid certificate chains.
type chainValidityRule struct{}
func (r *chainValidityRule) Name() string { return "tls.chain_validity" }
func (r *chainValidityRule) Description() string {
return "Verifies the presented certificate chain validates against the system trust store."
}
func (r *chainValidityRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.chain_validity.no_endpoints")}
}
var out []sdk.CheckState
any := false
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.ChainValid == nil {
continue
}
any = true
if *p.ChainValid {
continue
}
msg := "Invalid certificate chain"
if p.ChainVerifyErr != "" {
msg = "Invalid certificate chain: " + p.ChainVerifyErr
}
out = append(out, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "tls.chain_validity.invalid",
Subject: subjectOf(p),
Message: msg,
Meta: metaOf(p),
})
}
if !any {
return []sdk.CheckState{unknownState(
"tls.chain_validity.skipped",
"No endpoint yielded a certificate chain to verify.",
)}
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.chain_validity.ok",
"Every presented chain validates against the system trust store.",
)}
}
return out
}
// hostnameMatchRule flags endpoints whose leaf cert does not cover the SNI
// the probe used.
type hostnameMatchRule struct{}
func (r *hostnameMatchRule) Name() string { return "tls.hostname_match" }
func (r *hostnameMatchRule) Description() string {
return "Verifies the leaf certificate covers the probed hostname (SNI)."
}
func (r *hostnameMatchRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.hostname_match.no_endpoints")}
}
var out []sdk.CheckState
any := false
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.HostnameMatch == nil {
continue
}
any = true
if *p.HostnameMatch {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "tls.hostname_match.mismatch",
Subject: subjectOf(p),
Message: fmt.Sprintf("Certificate does not cover %q (SANs: %s)", p.SNI, strings.Join(p.DNSNames, ", ")),
Meta: metaOf(p),
})
}
if !any {
return []sdk.CheckState{unknownState(
"tls.hostname_match.skipped",
"No endpoint yielded a certificate to hostname-match.",
)}
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.hostname_match.ok",
"Every certificate covers its probed SNI.",
)}
}
return out
}
// expiryRule flags expired or near-expiry certificates.
type expiryRule struct{}
func (r *expiryRule) Name() string { return "tls.expiry" }
func (r *expiryRule) Description() string {
return "Flags expired or soon-to-expire leaf certificates."
}
func (r *expiryRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.expiry.no_endpoints")}
}
now := time.Now()
var out []sdk.CheckState
any := false
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.NotAfter.IsZero() {
continue
}
any = true
meta := metaOf(p)
meta["not_after"] = p.NotAfter
if p.NotAfter.Before(now) {
out = append(out, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "tls.expiry.expired",
Subject: subjectOf(p),
Message: "Certificate expired on " + p.NotAfter.Format(time.RFC3339),
Meta: meta,
})
continue
}
if p.NotAfter.Sub(now) < ExpiringSoonThreshold {
out = append(out, sdk.CheckState{
Status: sdk.StatusWarn,
Code: "tls.expiry.expiring_soon",
Subject: subjectOf(p),
Message: "Certificate expires in less than 14 days (" + p.NotAfter.Format(time.RFC3339) + ")",
Meta: meta,
})
}
}
if !any {
return []sdk.CheckState{unknownState(
"tls.expiry.skipped",
"No endpoint yielded a certificate with an expiry to check.",
)}
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.expiry.ok",
"Every leaf certificate is valid for more than 14 days.",
)}
}
return out
}

View file

@ -0,0 +1,34 @@
package checker
import (
"context"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// endpointsDiscoveredRule surfaces the "no producer has published endpoints
// for this target yet" steady state. Kept as its own rule so it does not
// contaminate per-endpoint findings when discovery is in flight.
type endpointsDiscoveredRule struct{}
func (r *endpointsDiscoveredRule) Name() string { return "tls.endpoints_discovered" }
func (r *endpointsDiscoveredRule) Description() string {
return "Verifies that at least one TLS endpoint has been discovered for this target."
}
func (r *endpointsDiscoveredRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{unknownState(
"tls.endpoints_discovered.none",
"No TLS endpoints have been discovered for this target yet.",
)}
}
return []sdk.CheckState{passState(
"tls.endpoints_discovered.ok",
"TLS endpoints were discovered for this target.",
)}
}

View file

@ -0,0 +1,60 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// tlsHandshakeRule flags reachable endpoints on which the TLS handshake
// failed. STARTTLS-specific shortfalls (server not advertising the upgrade)
// are surfaced by starttlsAdvertisedRule / starttlsSupportedRule instead,
// so this rule skips them.
type tlsHandshakeRule struct{}
func (r *tlsHandshakeRule) Name() string { return "tls.handshake" }
func (r *tlsHandshakeRule) Description() string {
return "Verifies the TLS handshake completes on every reachable endpoint."
}
func (r *tlsHandshakeRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.handshake.no_endpoints")}
}
var out []sdk.CheckState
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.TCPError != "" {
continue // reachability covers this.
}
if p.STARTTLSNotOffered || p.STARTTLSUnsupportedProto {
continue // starttls-specific rules cover these.
}
if p.TLSHandshakeOK {
continue
}
if p.HandshakeError == "" {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "tls.handshake.failed",
Subject: subjectOf(p),
Message: fmt.Sprintf("TLS handshake failed on %s: %s", p.Endpoint, p.HandshakeError),
Meta: metaOf(p),
})
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.handshake.ok",
"TLS handshake succeeded on every reachable endpoint.",
)}
}
return out
}

105
checker/rules_protocol.go Normal file
View file

@ -0,0 +1,105 @@
package checker
import (
"context"
"crypto/tls"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// tlsVersionRule flags endpoints negotiating a protocol version below the
// recommended TLS 1.2 floor.
type tlsVersionRule struct{}
func (r *tlsVersionRule) Name() string { return "tls.version" }
func (r *tlsVersionRule) Description() string {
return "Flags endpoints negotiating a TLS version below the recommended TLS 1.2."
}
func (r *tlsVersionRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.version.no_endpoints")}
}
var out []sdk.CheckState
any := false
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.TLSVersionNum == 0 {
continue
}
any = true
if p.TLSVersionNum >= tls.VersionTLS12 {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusWarn,
Code: "tls.version.weak",
Subject: subjectOf(p),
Message: fmt.Sprintf("Negotiated TLS version %s is below the recommended TLS 1.2.", p.TLSVersion),
Meta: metaOf(p),
})
}
if !any {
return []sdk.CheckState{unknownState(
"tls.version.skipped",
"No endpoint completed a TLS handshake.",
)}
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.version.ok",
"Every endpoint negotiates TLS 1.2 or higher.",
)}
}
return out
}
// cipherSuiteRule reports the negotiated cipher suite for visibility.
// It does not currently classify suites as weak/strong: go's crypto/tls
// refuses to negotiate the known-weak suites anyway. The rule exists so the
// UI can expose the suite in the passing-list rather than leaving it buried
// in the raw observation.
type cipherSuiteRule struct{}
func (r *cipherSuiteRule) Name() string { return "tls.cipher_suite" }
func (r *cipherSuiteRule) Description() string {
return "Reports the cipher suite negotiated on each endpoint."
}
func (r *cipherSuiteRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.cipher_suite.no_endpoints")}
}
var out []sdk.CheckState
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.CipherSuite == "" {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusInfo,
Code: "tls.cipher_suite.negotiated",
Subject: subjectOf(p),
Message: fmt.Sprintf("Cipher suite %s negotiated.", p.CipherSuite),
Meta: metaOf(p),
})
}
if len(out) == 0 {
return []sdk.CheckState{unknownState(
"tls.cipher_suite.skipped",
"No endpoint completed a TLS handshake.",
)}
}
return out
}

View file

@ -0,0 +1,48 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// reachabilityRule flags endpoints that did not accept a TCP connection.
type reachabilityRule struct{}
func (r *reachabilityRule) Name() string { return "tls.reachability" }
func (r *reachabilityRule) Description() string {
return "Verifies that every discovered TLS endpoint accepts a TCP connection."
}
func (r *reachabilityRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.reachability.no_endpoints")}
}
var out []sdk.CheckState
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.TCPError == "" {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusCrit,
Code: "tls.reachability.tcp_unreachable",
Subject: subjectOf(p),
Message: fmt.Sprintf("Cannot open TCP connection to %s: %s", p.Endpoint, p.TCPError),
Meta: metaOf(p),
})
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.reachability.ok",
"All discovered endpoints accepted a TCP connection.",
)}
}
return out
}

108
checker/rules_starttls.go Normal file
View file

@ -0,0 +1,108 @@
package checker
import (
"context"
"fmt"
sdk "git.happydns.org/checker-sdk-go/checker"
)
// starttlsAdvertisedRule flags STARTTLS endpoints whose server did not
// advertise the upgrade. Severity depends on RequireSTARTTLS: opportunistic
// STARTTLS degrades to a warning; mandatory STARTTLS is critical.
type starttlsAdvertisedRule struct{}
func (r *starttlsAdvertisedRule) Name() string { return "tls.starttls_advertised" }
func (r *starttlsAdvertisedRule) Description() string {
return "Verifies that STARTTLS endpoints advertise the upgrade capability."
}
func (r *starttlsAdvertisedRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.starttls_advertised.no_endpoints")}
}
var out []sdk.CheckState
anySTARTTLS := false
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if p.STARTTLSDialect == "" {
continue
}
anySTARTTLS = true
if !p.STARTTLSNotOffered {
continue
}
status := sdk.StatusWarn
if p.RequireSTARTTLS {
status = sdk.StatusCrit
}
out = append(out, sdk.CheckState{
Status: status,
Code: "tls.starttls_advertised.missing",
Subject: subjectOf(p),
Message: fmt.Sprintf("Server on %s does not advertise STARTTLS.", p.Endpoint),
Meta: metaOf(p),
})
}
if !anySTARTTLS {
return []sdk.CheckState{unknownState(
"tls.starttls_advertised.not_applicable",
"No STARTTLS endpoint in the discovered set.",
)}
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.starttls_advertised.ok",
"STARTTLS is advertised on every STARTTLS endpoint.",
)}
}
return out
}
// starttlsSupportedRule flags endpoints whose STARTTLS dialect is not
// implemented by this checker. A misconfigured discovery entry (typo, new
// protocol) should be visible as its own concern rather than blending into
// generic handshake failures.
type starttlsSupportedRule struct{}
func (r *starttlsSupportedRule) Name() string { return "tls.starttls_dialect_supported" }
func (r *starttlsSupportedRule) Description() string {
return "Verifies that discovered STARTTLS dialects are implemented by the checker."
}
func (r *starttlsSupportedRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, _ sdk.CheckerOptions) []sdk.CheckState {
data, errSt := loadData(ctx, obs)
if errSt != nil {
return []sdk.CheckState{*errSt}
}
if len(data.Probes) == 0 {
return []sdk.CheckState{emptyCaseState("tls.starttls_dialect_supported.no_endpoints")}
}
var out []sdk.CheckState
for _, ref := range sortedRefs(data) {
p := data.Probes[ref]
if !p.STARTTLSUnsupportedProto {
continue
}
out = append(out, sdk.CheckState{
Status: sdk.StatusError,
Code: "tls.starttls_dialect_supported.unknown",
Subject: subjectOf(p),
Message: fmt.Sprintf("Unsupported STARTTLS dialect %q for %s.", p.STARTTLSDialect, p.Endpoint),
Meta: metaOf(p),
})
}
if len(out) == 0 {
return []sdk.CheckState{passState(
"tls.starttls_dialect_supported.ok",
"Every STARTTLS dialect encountered is implemented.",
)}
}
return out
}

View file

@ -22,33 +22,67 @@ const (
MaxConcurrentProbes = 32
)
// Severity values used in Issue.Severity (lowercase, ascii).
const (
SeverityCrit = "crit"
SeverityWarn = "warn"
SeverityInfo = "info"
)
// TLSData is the full collected payload written under ObservationKeyTLSProbes.
type TLSData struct {
Probes map[string]TLSProbe `json:"probes"`
CollectedAt time.Time `json:"collected_at"`
}
// TLSProbe captures the outcome of probing a single endpoint. Field names
// mirror what consumers already parse (checker-xmpp's tlsProbeView).
// TLSProbe captures the outcome of probing a single endpoint.
//
// Only raw observation fields live here. Judgement (severity, pass/fail,
// human-facing messages) is derived from these fields by CheckRules.
type TLSProbe struct {
Host string `json:"host"`
Port uint16 `json:"port"`
Endpoint string `json:"endpoint"`
Type string `json:"type"`
SNI string `json:"sni,omitempty"`
TLSVersion string `json:"tls_version,omitempty"`
CipherSuite string `json:"cipher_suite,omitempty"`
HostnameMatch *bool `json:"hostname_match,omitempty"`
ChainValid *bool `json:"chain_valid,omitempty"`
NotAfter time.Time `json:"not_after,omitempty"`
Issuer string `json:"issuer,omitempty"`
Host string `json:"host"`
Port uint16 `json:"port"`
Endpoint string `json:"endpoint"`
Type string `json:"type"`
SNI string `json:"sni,omitempty"`
// RequireSTARTTLS is copied from the discovery entry so rules can tell
// whether a missing STARTTLS advertisement is a hard or soft failure.
RequireSTARTTLS bool `json:"require_starttls,omitempty"`
// STARTTLSDialect mirrors contract.TLSEndpoint.STARTTLS verbatim. An
// empty value means direct TLS.
STARTTLSDialect string `json:"starttls_dialect,omitempty"`
// Raw error strings. Exactly one of TCPError or HandshakeError is set
// when the probe failed before gathering handshake data.
TCPError string `json:"tcp_error,omitempty"`
HandshakeError string `json:"handshake_error,omitempty"`
// STARTTLSNotOffered is true when HandshakeError was produced because
// the server did not advertise STARTTLS (errStartTLSNotOffered).
STARTTLSNotOffered bool `json:"starttls_not_offered,omitempty"`
// STARTTLSUnsupportedProto is true when the STARTTLS dialect is not
// implemented by this checker.
STARTTLSUnsupportedProto bool `json:"starttls_unsupported_proto,omitempty"`
// TLSHandshakeOK is true when a TLS handshake completed. It is
// independent from chain validity.
TLSHandshakeOK bool `json:"tls_handshake_ok,omitempty"`
// TLSVersionNum is the numeric TLS version negotiated (uint16 from
// crypto/tls). Zero means no handshake occurred. Kept as an unsigned
// integer so rules can compare against tls.VersionTLS12 without
// re-parsing a string.
TLSVersionNum uint16 `json:"tls_version_num,omitempty"`
TLSVersion string `json:"tls_version,omitempty"`
CipherSuite string `json:"cipher_suite,omitempty"`
CipherSuiteID uint16 `json:"cipher_suite_id,omitempty"`
// NoPeerCert is true when the handshake succeeded but the server sent
// no certificate.
NoPeerCert bool `json:"no_peer_cert,omitempty"`
HostnameMatch *bool `json:"hostname_match,omitempty"`
ChainValid *bool `json:"chain_valid,omitempty"`
ChainVerifyErr string `json:"chain_verify_err,omitempty"`
NotAfter time.Time `json:"not_after,omitempty"`
Issuer string `json:"issuer,omitempty"`
// IssuerDN is the leaf's issuer as an RFC 2253 DN string, suitable for
// matching the CCADB CAA Identifiers CSV "Subject" column when the AKI
// lookup misses.
@ -65,8 +99,11 @@ type TLSProbe struct {
// DANE consumers can match without re-handshaking or re-parsing.
Chain []CertInfo `json:"chain,omitempty"`
ElapsedMS int64 `json:"elapsed_ms,omitempty"`
Error string `json:"error,omitempty"`
Issues []Issue `json:"issues,omitempty"`
// Error is a compatibility summary of whichever raw error applies.
// Left for any external consumer still inspecting it; rules should
// look at TCPError / HandshakeError instead.
Error string `json:"error,omitempty"`
}
// CertInfo describes one certificate in the presented chain together with
@ -101,10 +138,7 @@ type CertInfo struct {
SPKIDERBase64 string `json:"spki_der_base64,omitempty"`
}
// Issue is a single TLS finding surfaced to the consumer.
type Issue struct {
Code string `json:"code"`
Severity string `json:"severity"`
Message string `json:"message,omitempty"`
Fix string `json:"fix,omitempty"`
}
// Expiry thresholds shared by rules.
const (
ExpiringSoonThreshold = 14 * 24 * time.Hour
)

View file

@ -123,7 +123,7 @@ type Entry struct {
}
// ParseEntries filters entries to those of Type and decodes each payload.
// Entries of other types are ignored silently they belong to other
// Entries of other types are ignored silently, they belong to other
// contracts. Entries of this type whose Payload fails to unmarshal are
// skipped and returned as warnings so a single malformed payload cannot
// starve the checker of the rest of its workload.