checker-dane/checker/report.go

282 lines
9.7 KiB
Go

package checker
import (
"bytes"
"encoding/json"
"fmt"
"html"
"sort"
"strings"
sdk "git.happydns.org/checker-sdk-go/checker"
tls "git.happydns.org/checker-tls/checker"
)
// GetHTMLReport implements sdk.CheckerHTMLReporter. The report opens with a
// diagnosis-first section that lists the most common DANE failure modes
// actually detected on the user's targets, each with a one-shot remediation
// snippet; a per-target table follows for reference.
func (p *daneProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) {
var data DANEData
if err := json.Unmarshal(ctx.Data(), &data); err != nil {
return "", fmt.Errorf("decode DANE data: %w", err)
}
probes := map[string]*tls.TLSProbe{}
for _, ro := range ctx.Related(tls.ObservationKeyTLSProbes) {
for k, v := range parseTLSProbeMap(ro.Data) {
probes[k] = &v
}
}
var b bytes.Buffer
fmt.Fprint(&b, `<!DOCTYPE html><html><head><meta charset="utf-8"><title>DANE report</title>`)
fmt.Fprint(&b, reportCSS)
fmt.Fprint(&b, `</head><body><main>`)
fmt.Fprintf(&b, `<h1>DANE / TLSA</h1><p class="meta">Collected %s · %d endpoint(s).</p>`,
html.EscapeString(data.CollectedAt.Format("2006-01-02 15:04 MST")), len(data.Targets))
diag := diagnose(data, probes)
if len(diag) > 0 {
fmt.Fprint(&b, `<section class="diagnosis"><h2>Action required</h2>`)
for _, d := range diag {
fmt.Fprintf(&b,
`<article class="finding sev-%s"><h3>%s</h3><p>%s</p>`,
html.EscapeString(d.Severity),
html.EscapeString(d.Title),
html.EscapeString(d.Detail))
if d.Fix != "" {
fmt.Fprintf(&b, `<pre class="fix">%s</pre>`, html.EscapeString(d.Fix))
}
fmt.Fprint(&b, `</article>`)
}
fmt.Fprint(&b, `</section>`)
}
fmt.Fprint(&b, `<section class="targets"><h2>Endpoints</h2><table><thead><tr><th>Endpoint</th><th>Status</th><th>Records</th><th>Observed leaf</th></tr></thead><tbody>`)
for _, t := range data.Targets {
probe := probes[t.Ref]
status, cls := targetStatus(t, probe)
leaf := "—"
if probe != nil && len(probe.Chain) > 0 {
leaf = probe.Chain[0].Subject
} else if probe != nil && probe.Error != "" {
leaf = "handshake error"
}
fmt.Fprintf(&b,
`<tr class="status-%s"><td><code>%s</code><br><small>%s → %s:%d%s</small></td><td>%s</td><td>%d</td><td>%s</td></tr>`,
html.EscapeString(cls),
html.EscapeString(t.Owner),
html.EscapeString(t.Proto),
html.EscapeString(t.Host),
t.Port,
starttlsLabel(t.STARTTLS),
html.EscapeString(status),
len(t.Records),
html.EscapeString(leaf),
)
}
fmt.Fprint(&b, `</tbody></table></section>`)
fmt.Fprint(&b, `</main></body></html>`)
return b.String(), nil
}
// diagnosis is a single actionable hint surfaced at the top of the report.
type diagnosis struct {
Severity string // crit | warn | info
Title string
Detail string
Fix string // ready-to-apply snippet (shell or zone fragment)
}
// diagnose scans every target and produces the minimum set of high-signal
// cards users need to act on. Priority ordering (most-common first):
//
// 1. no_match: TLSA records do not cover the live cert (post-rotation miss).
// 2. handshake_failed: endpoint unreachable or TLS broken, DANE can't be
// validated at all.
// 3. pkix_chain_invalid: usage 0/1 published but public chain is broken.
// 4. usage_3_matches_issuer: DANE-EE selector matches an intermediate
// the record is probably miscategorized (usage 2 was intended).
// 5. no_probe_yet: quiet informational to avoid false alarms on first run.
//
// countMatched returns the number of TLSA records in t that match probe's chain.
func countMatched(t TargetResult, p *tls.TLSProbe) int {
if p == nil {
return 0
}
n := 0
for _, r := range t.Records {
if ok, _ := matchRecord(r, p); ok {
n++
}
}
return n
}
func diagnose(data DANEData, probes map[string]*tls.TLSProbe) []diagnosis {
var out []diagnosis
for _, t := range data.Targets {
probe := probes[t.Ref]
switch {
case probe == nil:
out = append(out, diagnosis{
Severity: SeverityInfo,
Title: fmt.Sprintf("Waiting for first TLS probe on %s:%d", t.Host, t.Port),
Detail: "checker-tls has not yet probed this endpoint. This is normal immediately after publishing a new TLSA record; status will clear on the next cycle.",
})
case probe.Error != "" || len(probe.Chain) == 0:
out = append(out, diagnosis{
Severity: SeverityCrit,
Title: fmt.Sprintf("Cannot reach %s:%d to validate DANE", t.Host, t.Port),
Detail: "TLS handshake failed — DANE publishes hashes for a certificate nobody can see. Either the service is down, the port is blocked, or STARTTLS negotiation is broken.",
Fix: handshakeFix(t),
})
default:
if countMatched(t, probe) == 0 && len(t.Records) > 0 {
out = append(out, diagnosis{
Severity: SeverityCrit,
Title: fmt.Sprintf("No TLSA record matches the live certificate on %s:%d", t.Host, t.Port),
Detail: "This is the most common DANE outage cause: the certificate was rotated without rolling over the TLSA RRset, and validating resolvers are now rejecting the connection. Publish a TLSA record for the new certificate before removing the old one.",
Fix: proposedTLSA(t, probe),
})
}
if hasPKIXUsage(t) && (probe.ChainValid == nil || !*probe.ChainValid) {
out = append(out, diagnosis{
Severity: SeverityCrit,
Title: fmt.Sprintf("Usage 0/1 needs a publicly-trusted chain on %s:%d", t.Host, t.Port),
Detail: "TLSA usages 0 (PKIX-TA) and 1 (PKIX-EE) require the certificate chain to validate against system roots. Either re-issue through a publicly-trusted CA or switch to usage 2 / 3, which skip PKIX.",
})
}
if warn := suspiciousUsage(t, probe); warn != "" {
out = append(out, diagnosis{
Severity: SeverityWarn,
Title: fmt.Sprintf("Suspicious TLSA usage on %s:%d", t.Host, t.Port),
Detail: warn,
})
}
}
}
// Stable: crit first, then warn, then info; preserving encounter order
// within each group keeps the table and the cards aligned.
sort.SliceStable(out, func(i, j int) bool {
return sevRank(out[i].Severity) < sevRank(out[j].Severity)
})
return out
}
func sevRank(s string) int {
switch s {
case SeverityCrit:
return 0
case SeverityWarn:
return 1
default:
return 2
}
}
// hasPKIXUsage reports whether any TLSA record at this target demands PKIX
// validation (usage 0 or 1).
func hasPKIXUsage(t TargetResult) bool {
for _, r := range t.Records {
if r.Usage == UsagePKIXTA || r.Usage == UsagePKIXEE {
return true
}
}
return false
}
// suspiciousUsage returns a human-readable hint when a record hash matches a
// chain slot that contradicts its declared usage (e.g. usage 3 whose hash
// actually matches the intermediate), almost always a publisher error.
func suspiciousUsage(t TargetResult, p *tls.TLSProbe) string {
if len(p.Chain) < 2 {
return ""
}
for _, r := range t.Records {
if r.Usage != UsageDANEEE && r.Usage != UsagePKIXEE {
continue
}
// Compare against non-leaf certs; any match there means the user
// published the wrong usage.
for _, c := range p.Chain[1:] {
cand, err := recordCandidate(r, c)
if err != nil {
continue
}
if strings.EqualFold(cand, r.Certificate) {
return "A record declared with usage 1/3 (end-entity) actually matches an intermediate certificate. It should probably use usage 0 or 2 (trust-anchor) instead."
}
}
}
return ""
}
// proposedTLSA renders a ready-to-paste replacement RR using the most common
// DANE-EE + SPKI + SHA-256 triplet computed from the live leaf. This is the
// profile Let's Encrypt users are pushed towards because it survives any
// cert rotation that keeps the same key pair.
func proposedTLSA(t TargetResult, p *tls.TLSProbe) string {
if p == nil || len(p.Chain) == 0 {
return ""
}
return fmt.Sprintf("%s IN TLSA 3 1 1 %s", t.Owner, p.Chain[0].SPKISHA256)
}
// handshakeFix proposes a STARTTLS-aware first step when the probe failed.
func handshakeFix(t TargetResult) string {
if t.STARTTLS != "" {
return fmt.Sprintf("openssl s_client -connect %s:%d -starttls %s -servername %s", t.Host, t.Port, t.STARTTLS, t.Host)
}
return fmt.Sprintf("openssl s_client -connect %s:%d -servername %s", t.Host, t.Port, t.Host)
}
func targetStatus(t TargetResult, p *tls.TLSProbe) (label, class string) {
if p == nil {
return "Waiting for probe", "unknown"
}
if p.Error != "" || len(p.Chain) == 0 {
return "Handshake failed", "crit"
}
if len(t.Records) == 0 {
return "No records", "info"
}
matched := countMatched(t, p)
if matched == 0 {
return "No match", "crit"
}
return fmt.Sprintf("%d/%d match", matched, len(t.Records)), "ok"
}
func starttlsLabel(s string) string {
if s == "" {
return ""
}
return " · STARTTLS " + html.EscapeString(s)
}
const reportCSS = `<style>
body{font-family:system-ui,sans-serif;margin:0;background:#fafbfc;color:#1b1f23;}
main{max-width:980px;margin:0 auto;padding:1.5rem;}
h1{margin:0 0 .25rem 0;}
.meta{color:#586069;margin:0 0 1.5rem 0;}
section{margin-bottom:2rem;}
h2{border-bottom:1px solid #e1e4e8;padding-bottom:.25rem;}
.finding{border-left:4px solid;padding:.75rem 1rem;margin:.75rem 0;background:#fff;border-radius:4px;}
.finding h3{margin:0 0 .25rem 0;font-size:1rem;}
.finding.sev-crit{border-color:#d73a49;}
.finding.sev-warn{border-color:#dbab09;}
.finding.sev-info{border-color:#0366d6;}
.fix{background:#1b1f23;color:#fafbfc;padding:.5rem .75rem;border-radius:4px;overflow-x:auto;font-size:.85rem;}
table{width:100%;border-collapse:collapse;background:#fff;}
th,td{padding:.5rem .75rem;border-bottom:1px solid #e1e4e8;text-align:left;vertical-align:top;}
tr.status-crit td:nth-child(2){color:#d73a49;font-weight:600;}
tr.status-ok td:nth-child(2){color:#22863a;font-weight:600;}
tr.status-unknown td:nth-child(2){color:#586069;}
code{font-size:.85rem;}
small{color:#586069;}
</style>`