checker-tls/checker/prober.go

231 lines
6.8 KiB
Go

package checker
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net"
"strconv"
"strings"
"time"
"git.happydns.org/checker-tls/contract"
)
// probeTypeString renders the TLSProbe.Type string from a TLSEndpoint.
// Observation consumers already parse this field in its "tls" /
// "starttls-<proto>" shape; the contract-level split of direct vs.
// STARTTLS is collapsed back here so the wire format of tls_probes
// stays unchanged.
func probeTypeString(ep contract.TLSEndpoint) string {
if ep.STARTTLS == "" {
return "tls"
}
return "starttls-" + ep.STARTTLS
}
// probe performs a TLS handshake (or STARTTLS upgrade + handshake) on the
// given endpoint and returns a populated TLSProbe. It never returns an error:
// transport/handshake failures are recorded on the probe so the caller can
// still surface them in the report.
func probe(ctx context.Context, ep contract.TLSEndpoint, timeout time.Duration) TLSProbe {
start := time.Now()
host := strings.TrimSuffix(ep.Host, ".")
addr := net.JoinHostPort(host, strconv.Itoa(int(ep.Port)))
sni := ep.SNI
if sni == "" {
sni = host
}
p := TLSProbe{
Host: host,
Port: ep.Port,
Endpoint: addr,
Type: probeTypeString(ep),
SNI: sni,
}
dialCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
d := &net.Dialer{}
conn, err := d.DialContext(dialCtx, "tcp", addr)
if err != nil {
p.Error = "dial: " + err.Error()
p.Issues = append(p.Issues, Issue{
Code: "tcp_unreachable",
Severity: SeverityCrit,
Message: fmt.Sprintf("Cannot open TCP connection to %s: %v", addr, err),
Fix: "Check DNS, firewall, and that the service listens on this port.",
})
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
defer conn.Close()
if deadline, ok := dialCtx.Deadline(); ok {
_ = conn.SetDeadline(deadline)
}
tlsConn, err := handshake(conn, ep, sni)
if err != nil {
p.Error = err.Error()
p.Issues = append(p.Issues, classifyHandshakeError(ep, err))
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
defer tlsConn.Close()
state := tlsConn.ConnectionState()
p.TLSVersion = tls.VersionName(state.Version)
p.CipherSuite = tls.CipherSuiteName(state.CipherSuite)
if len(state.PeerCertificates) == 0 {
p.Issues = append(p.Issues, Issue{
Code: "no_peer_cert",
Severity: SeverityCrit,
Message: "Server presented no certificate.",
})
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
leaf := state.PeerCertificates[0]
p.NotAfter = leaf.NotAfter
p.Issuer = leaf.Issuer.CommonName
p.Subject = leaf.Subject.CommonName
p.DNSNames = append(p.DNSNames, leaf.DNSNames...)
hostnameMatch := leaf.VerifyHostname(sni) == nil
p.HostnameMatch = &hostnameMatch
// Chain verification against system roots, using intermediates presented
// by the server. We run this independently from Go's tls.Config
// verification so we can report a dedicated "chain invalid" issue rather
// than failing the whole handshake.
intermediates := x509.NewCertPool()
for _, c := range state.PeerCertificates[1:] {
intermediates.AddCert(c)
}
now := time.Now()
_, verifyErr := leaf.Verify(x509.VerifyOptions{
DNSName: sni,
Intermediates: intermediates,
CurrentTime: now,
})
chainValid := verifyErr == nil
p.ChainValid = &chainValid
if !chainValid {
msg := "Invalid certificate chain"
if verifyErr != nil {
msg = "Invalid certificate chain: " + verifyErr.Error()
}
p.Issues = append(p.Issues, Issue{
Code: "chain_invalid",
Severity: SeverityCrit,
Message: msg,
Fix: "Serve the full intermediate chain and ensure the root is trusted.",
})
}
if !hostnameMatch {
p.Issues = append(p.Issues, Issue{
Code: "hostname_mismatch",
Severity: SeverityCrit,
Message: fmt.Sprintf("Certificate does not cover %q (SANs: %s)", sni, strings.Join(leaf.DNSNames, ", ")),
Fix: "Re-issue the certificate with a matching SAN.",
})
}
if leaf.NotAfter.Before(now) {
p.Issues = append(p.Issues, Issue{
Code: "expired",
Severity: SeverityCrit,
Message: "Certificate expired on " + leaf.NotAfter.Format(time.RFC3339),
Fix: "Renew the certificate.",
})
} else if leaf.NotAfter.Sub(now) < 14*24*time.Hour {
p.Issues = append(p.Issues, Issue{
Code: "expiring_soon",
Severity: SeverityWarn,
Message: "Certificate expires in less than 14 days (" + leaf.NotAfter.Format(time.RFC3339) + ")",
Fix: "Renew before expiry.",
})
}
if state.Version < tls.VersionTLS12 {
p.Issues = append(p.Issues, Issue{
Code: "weak_tls_version",
Severity: SeverityWarn,
Message: "Negotiated TLS version " + p.TLSVersion + " is below the recommended TLS 1.2.",
Fix: "Disable TLS 1.0/1.1 on the server.",
})
}
p.ElapsedMS = time.Since(start).Milliseconds()
return p
}
// handshake performs STARTTLS upgrade (when ep.STARTTLS is non-empty) and
// then a TLS handshake. InsecureSkipVerify is true on purpose: we verify
// the chain separately in probe so an invalid chain becomes a structured
// Issue rather than aborting the handshake.
func handshake(conn net.Conn, ep contract.TLSEndpoint, sni string) (*tls.Conn, error) {
cfg := &tls.Config{
ServerName: sni,
InsecureSkipVerify: true,
}
if ep.STARTTLS == "" {
tlsConn := tls.Client(conn, cfg)
if err := tlsConn.Handshake(); err != nil {
return nil, fmt.Errorf("tls-handshake: %w", err)
}
return tlsConn, nil
}
up, ok := starttlsUpgraders[ep.STARTTLS]
if !ok {
return nil, fmt.Errorf("unsupported starttls protocol %q", ep.STARTTLS)
}
if err := up(conn, sni); err != nil {
return nil, fmt.Errorf("starttls-%s: %w", ep.STARTTLS, err)
}
tlsConn := tls.Client(conn, cfg)
if err := tlsConn.Handshake(); err != nil {
return nil, fmt.Errorf("tls-handshake-after-starttls: %w", err)
}
return tlsConn, nil
}
// classifyHandshakeError converts a dial/handshake error into a structured
// Issue, distinguishing "server doesn't offer STARTTLS" (which is opportunistic
// for some endpoints) from hard failures.
func classifyHandshakeError(ep contract.TLSEndpoint, err error) Issue {
msg := err.Error()
if ep.STARTTLS != "" && isStartTLSUnsupported(err) {
sev := SeverityWarn
if ep.RequireSTARTTLS {
sev = SeverityCrit
}
return Issue{
Code: "starttls_not_offered",
Severity: sev,
Message: fmt.Sprintf("Server on %s:%d does not advertise STARTTLS: %s", ep.Host, ep.Port, msg),
Fix: "Enable STARTTLS on the server or publish a direct-TLS endpoint.",
}
}
return Issue{
Code: "handshake_failed",
Severity: SeverityCrit,
Message: fmt.Sprintf("TLS handshake failed on %s:%d: %s", ep.Host, ep.Port, msg),
Fix: "Inspect the server's TLS configuration and certificate.",
}
}
var errStartTLSNotOffered = errors.New("starttls not advertised by server")
func isStartTLSUnsupported(err error) bool {
return errors.Is(err, errStartTLSNotOffered)
}