checker-tls/checker/rule.go

package checker

import (
	"context"
	"fmt"

	sdk "git.happydns.org/checker-sdk-go/checker"
)

// Rule returns the rule that aggregates per-endpoint TLS probe outcomes into
// a single status for this checker run.
func Rule() sdk.CheckRule {
	return &tlsRule{}
}

type tlsRule struct{}

func (r *tlsRule) Name() string { return "tls_posture" }

func (r *tlsRule) Description() string {
	return "Summarises TLS handshake, certificate validity, hostname match and expiry across all probed endpoints"
}

func (r *tlsRule) ValidateOptions(opts sdk.CheckerOptions) error {
	return nil
}

func (r *tlsRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) sdk.CheckState {
	var data TLSData
	if err := obs.Get(ctx, ObservationKeyTLSProbes, &data); err != nil {
		return sdk.CheckState{
			Status:  sdk.StatusError,
			Message: fmt.Sprintf("Failed to read tls_probes: %v", err),
			Code:    "tls_observation_error",
		}
	}

	var (
		total     = len(data.Probes)
		okCount   int
		warnCount int
		critCount int
		firstCrit string
		firstWarn string
	)

	// Steady state when no producer has published entries for this target
	// yet (or when the last producer run cleared them). Report Unknown so
	// we don't flap red during the eventual-consistency window between a
	// fresh enrollment and the first producer cycle.
	if total == 0 {
		return sdk.CheckState{
			Status:  sdk.StatusUnknown,
			Message: "No TLS endpoints have been discovered for this target yet",
			Code:    "tls_no_endpoints",
		}
	}
	for _, p := range data.Probes {
		worst, critMsg, warnMsg := summarize(p.Issues)
		switch worst {
		case SeverityCrit:
			critCount++
			if firstCrit == "" {
				firstCrit = fmt.Sprintf("%s (%s)", p.Endpoint, critMsg)
			}
		case SeverityWarn:
			warnCount++
			if firstWarn == "" {
				firstWarn = fmt.Sprintf("%s (%s)", p.Endpoint, warnMsg)
			}
		default:
			okCount++
		}
	}

	meta := map[string]any{
		"probes": total,
		"ok":     okCount,
		"warn":   warnCount,
		"crit":   critCount,
	}

	switch {
	case critCount > 0:
		return sdk.CheckState{
			Status:  sdk.StatusCrit,
			Message: fmt.Sprintf("%d/%d TLS endpoint(s) have critical issues: %s", critCount, total, firstCrit),
			Code:    "tls_critical",
			Meta:    meta,
		}
	case warnCount > 0:
		return sdk.CheckState{
			Status:  sdk.StatusWarn,
			Message: fmt.Sprintf("%d/%d TLS endpoint(s) have warnings: %s", warnCount, total, firstWarn),
			Code:    "tls_warning",
			Meta:    meta,
		}
	default:
		return sdk.CheckState{
			Status:  sdk.StatusOK,
			Message: fmt.Sprintf("%d TLS endpoint(s) OK", total),
			Code:    "tls_ok",
			Meta:    meta,
		}
	}
}

// summarize walks the issues once and returns (worst severity, first
// critical message, first warning message). Picking the messages during the
// same pass avoids a second iteration in the caller.
func summarize(issues []Issue) (worst, firstCrit, firstWarn string) {
	for _, is := range issues {
		msg := is.Message
		if msg == "" {
			msg = is.Code
		}
		switch is.Severity {
		case SeverityCrit:
			worst = SeverityCrit
			if firstCrit == "" {
				firstCrit = msg
			}
		case SeverityWarn:
			if worst == "" || worst == SeverityInfo {
				worst = SeverityWarn
			}
			if firstWarn == "" {
				firstWarn = msg
			}
		case SeverityInfo:
			if worst == "" {
				worst = SeverityInfo
			}
		}
	}
	return
}