Add the plumbing that lets a checker receive (at evaluation, report
rendering, and metrics extraction) observations produced by other
checkers on DiscoveryEntry records it originally published.
Surface changes:
- RelatedObservation struct: one downstream observation, tagged with
the producing CheckerID and the Ref matching the DiscoveryEntry
it covers.
- ObservationGetter gains GetRelated(ctx, key), so rules can opt in
to cross-checker composition. mapObservationGetter (remote
/evaluate path) returns empty; the host owns lineage resolution.
- ReportContext interface: Data() + Related(key). Reporters consume
it instead of a raw json.RawMessage, which collapses the former
legacy/Ctx duplicate and gives one uniform signature:
GetHTMLReport(ctx ReportContext) (string, error)
ExtractMetrics(ctx ReportContext, t time.Time) ([]CheckMetric, error)
- NewReportContext(data, related) and StaticReportContext(data) build
fixed-payload contexts for entry points without an ObservationContext.
- ExternalReportRequest gains a Related map so the host can ship
pre-composed lineage to a remote checker over /report. The SDK's
/report handler threads it through to the reporter via
NewReportContext, closing the wire gap that previously forced
remote reports to a StaticReportContext with no related data.
Tests cover the Related map round-trip end-to-end via a peeking provider.
512 lines
20 KiB
Go
512 lines
20 KiB
Go
// Copyright 2020-2026 The happyDomain Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package checker provides the public types and helpers for writing
|
|
// happyDomain checker plugins. It is the stable API surface that all
|
|
// external checkers should depend on.
|
|
package checker
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
)
|
|
|
|
// CheckScopeType represents the scope level of a check target.
|
|
type CheckScopeType int
|
|
|
|
const (
|
|
CheckScopeAdmin CheckScopeType = 0
|
|
CheckScopeUser CheckScopeType = iota
|
|
CheckScopeDomain
|
|
CheckScopeZone
|
|
CheckScopeService
|
|
)
|
|
|
|
const (
|
|
AutoFillDomainName = "domain_name"
|
|
AutoFillSubdomain = "subdomain"
|
|
AutoFillZone = "zone"
|
|
AutoFillServiceType = "service_type"
|
|
AutoFillService = "service"
|
|
|
|
// AutoFillDiscoveryEntries receives DiscoveryEntry records published by
|
|
// other checkers on the same target. The host does not pre-filter by
|
|
// Type; consumers pick the contracts they understand and ignore the rest.
|
|
AutoFillDiscoveryEntries = "discovery_entries"
|
|
)
|
|
|
|
// CheckTarget identifies the resource a check applies to. Identifiers are
|
|
// passed as opaque strings so the SDK stays self-contained and does not
|
|
// depend on any happyDomain-specific identifier type. The host is free to
|
|
// parse them into its own representation at the boundary.
|
|
type CheckTarget struct {
|
|
UserId string `json:"userId,omitempty"`
|
|
DomainId string `json:"domainId,omitempty"`
|
|
ServiceId string `json:"serviceId,omitempty"`
|
|
ServiceType string `json:"serviceType,omitempty"`
|
|
}
|
|
|
|
// Scope returns the most specific scope level of this target.
|
|
func (t CheckTarget) Scope() CheckScopeType {
|
|
if t.ServiceId != "" {
|
|
return CheckScopeService
|
|
}
|
|
if t.DomainId != "" {
|
|
return CheckScopeDomain
|
|
}
|
|
return CheckScopeUser
|
|
}
|
|
|
|
// String returns a stable, unambiguous string representation of the target.
|
|
// All three fields are always present (even when empty) so that different
|
|
// targets never produce the same string.
|
|
func (t CheckTarget) String() string {
|
|
return t.UserId + "/" + t.DomainId + "/" + t.ServiceId
|
|
}
|
|
|
|
// CheckerAvailability declares on which scopes a checker can operate.
|
|
type CheckerAvailability struct {
|
|
ApplyToDomain bool `json:"applyToDomain,omitempty"`
|
|
ApplyToZone bool `json:"applyToZone,omitempty"`
|
|
ApplyToService bool `json:"applyToService,omitempty"`
|
|
LimitToProviders []string `json:"limitToProviders,omitempty"`
|
|
LimitToServices []string `json:"limitToServices,omitempty"`
|
|
}
|
|
|
|
// CheckerOptions holds the runtime options for a checker execution.
|
|
type CheckerOptions map[string]any
|
|
|
|
// CheckerOptionField describes a single checker option, used to document
|
|
// what configuration the checker accepts. The fields mirror happyDomain's
|
|
// generic Field type so that the host can re-export it as a type alias and
|
|
// keep using its existing form-rendering code unchanged.
|
|
type CheckerOptionField struct {
|
|
// Id is the option identifier (the key in CheckerOptions).
|
|
Id string `json:"id" binding:"required"`
|
|
|
|
// Type is the string representation of the option's type
|
|
// (e.g. "string", "number", "uint", "bool").
|
|
Type string `json:"type" binding:"required"`
|
|
|
|
// Label is the title shown to the user.
|
|
Label string `json:"label,omitempty"`
|
|
|
|
// Placeholder is the placeholder shown in the input.
|
|
Placeholder string `json:"placeholder,omitempty"`
|
|
|
|
// Default is the value used when the option is not set by the user.
|
|
Default any `json:"default,omitempty"`
|
|
|
|
// Choices holds the available choices for a dropdown option.
|
|
Choices []string `json:"choices,omitempty"`
|
|
|
|
// Required indicates whether the option must be filled.
|
|
Required bool `json:"required,omitempty"`
|
|
|
|
// Secret indicates that the option holds sensitive information
|
|
// (API keys, tokens, …).
|
|
Secret bool `json:"secret,omitempty"`
|
|
|
|
// Hide indicates that the option should be hidden from the user.
|
|
Hide bool `json:"hide,omitempty"`
|
|
|
|
// Textarea indicates that a multi-line input should be used.
|
|
Textarea bool `json:"textarea,omitempty"`
|
|
|
|
// Description is a help sentence describing the option.
|
|
Description string `json:"description,omitempty"`
|
|
|
|
// AutoFill indicates that this option is automatically populated by the
|
|
// host based on execution context (e.g. domain name, service payload).
|
|
AutoFill string `json:"autoFill,omitempty"`
|
|
|
|
// NoOverride indicates that once this option is set at a given scope,
|
|
// more specific scopes cannot override its value.
|
|
NoOverride bool `json:"noOverride,omitempty"`
|
|
}
|
|
|
|
// CheckerOptionDocumentation describes a single checker option.
|
|
type CheckerOptionDocumentation = CheckerOptionField
|
|
|
|
// CheckerOptionsDocumentation describes all options a checker accepts, organized by level.
|
|
type CheckerOptionsDocumentation struct {
|
|
AdminOpts []CheckerOptionDocumentation `json:"adminOpts,omitempty"`
|
|
UserOpts []CheckerOptionDocumentation `json:"userOpts,omitempty"`
|
|
DomainOpts []CheckerOptionDocumentation `json:"domainOpts,omitempty"`
|
|
ServiceOpts []CheckerOptionDocumentation `json:"serviceOpts,omitempty"`
|
|
RunOpts []CheckerOptionDocumentation `json:"runOpts,omitempty"`
|
|
}
|
|
|
|
// Status represents the result status of a check evaluation.
|
|
type Status int
|
|
|
|
const (
|
|
StatusUnknown Status = iota
|
|
StatusOK
|
|
StatusInfo
|
|
StatusWarn
|
|
StatusCrit
|
|
StatusError
|
|
)
|
|
|
|
// String returns the human-readable name of the status.
|
|
func (s Status) String() string {
|
|
switch s {
|
|
case StatusUnknown:
|
|
return "UNKNOWN"
|
|
case StatusOK:
|
|
return "OK"
|
|
case StatusInfo:
|
|
return "INFO"
|
|
case StatusWarn:
|
|
return "WARN"
|
|
case StatusCrit:
|
|
return "CRIT"
|
|
case StatusError:
|
|
return "ERROR"
|
|
default:
|
|
return fmt.Sprintf("Status(%d)", int(s))
|
|
}
|
|
}
|
|
|
|
// CheckState is the result of evaluating a single rule.
|
|
type CheckState struct {
|
|
Status Status `json:"status"`
|
|
Message string `json:"message"`
|
|
Code string `json:"code,omitempty"`
|
|
Meta map[string]any `json:"meta,omitempty"`
|
|
}
|
|
|
|
// CheckMetric represents a single metric produced by a check.
|
|
type CheckMetric struct {
|
|
Name string `json:"name" binding:"required"`
|
|
Value float64 `json:"value" binding:"required"`
|
|
Unit string `json:"unit,omitempty"`
|
|
Labels map[string]string `json:"labels,omitempty"`
|
|
Timestamp time.Time `json:"timestamp" binding:"required" format:"date-time"`
|
|
}
|
|
|
|
// ObservationKey identifies a type of observation data.
|
|
type ObservationKey = string
|
|
|
|
// CheckIntervalSpec defines scheduling bounds for a checker.
|
|
type CheckIntervalSpec struct {
|
|
Min time.Duration `json:"min" swaggertype:"integer"`
|
|
Max time.Duration `json:"max" swaggertype:"integer"`
|
|
Default time.Duration `json:"default" swaggertype:"integer"`
|
|
}
|
|
|
|
// ObservationProvider collects a specific type of data for a target.
|
|
type ObservationProvider interface {
|
|
Key() ObservationKey
|
|
Collect(ctx context.Context, opts CheckerOptions) (any, error)
|
|
}
|
|
|
|
// CheckRuleInfo is the JSON-serializable description of a rule, for API/UI listing.
|
|
type CheckRuleInfo struct {
|
|
Name string `json:"name"`
|
|
Description string `json:"description"`
|
|
Options *CheckerOptionsDocumentation `json:"options,omitempty"`
|
|
}
|
|
|
|
// CheckRule evaluates observations and produces a CheckState.
|
|
type CheckRule interface {
|
|
Name() string
|
|
Description() string
|
|
Evaluate(ctx context.Context, obs ObservationGetter, opts CheckerOptions) CheckState
|
|
}
|
|
|
|
// CheckRuleWithOptions is an optional interface that rules can implement
|
|
// to declare their own options documentation for API/UI grouping.
|
|
type CheckRuleWithOptions interface {
|
|
CheckRule
|
|
Options() CheckerOptionsDocumentation
|
|
}
|
|
|
|
// ObservationGetter provides access to observation data (used by CheckRule).
|
|
// Get unmarshals observation data into dest (like json.Unmarshal).
|
|
//
|
|
// GetRelated returns observations produced by other checkers on DiscoveryEntry
|
|
// records originally published by the current target. It is the core of
|
|
// cross-checker composition: a checker that published some entries via its
|
|
// DiscoveryPublisher can, during rule evaluation, fetch the latest
|
|
// observations that cover those entries and fold them into its own states.
|
|
//
|
|
// GetRelated returns an empty slice (not an error) when there is nothing
|
|
// to relate (no entries originally published, no downstream observation
|
|
// yet, no downstream checker registered for the entry type, …). Callers
|
|
// handle that as "no related data", typically skipping optional sections.
|
|
type ObservationGetter interface {
|
|
Get(ctx context.Context, key ObservationKey, dest any) error
|
|
GetRelated(ctx context.Context, key ObservationKey) ([]RelatedObservation, error)
|
|
}
|
|
|
|
// RelatedObservation is a single observation, produced by some other checker,
|
|
// that covers a DiscoveryEntry originally published by the current target.
|
|
//
|
|
// Data carries the raw JSON payload; consumers parse it according to the
|
|
// producer's schema, which they are expected to know via external agreement
|
|
// (typically a shared contract package imported by both producer and
|
|
// consumer).
|
|
type RelatedObservation struct {
|
|
// CheckerID identifies the producer of this observation.
|
|
CheckerID string `json:"checkerId"`
|
|
|
|
// Key is the observation key the producer filled.
|
|
Key ObservationKey `json:"key"`
|
|
|
|
// Data is the raw JSON payload as persisted by the producer.
|
|
Data json.RawMessage `json:"data"`
|
|
|
|
// CollectedAt is when the producer ran its Collect.
|
|
CollectedAt time.Time `json:"collectedAt"`
|
|
|
|
// Ref matches DiscoveryEntry.Ref of the entry this observation covers.
|
|
// Opaque to the SDK; meaningful within the producer/consumer contract.
|
|
Ref string `json:"ref"`
|
|
}
|
|
|
|
// CheckAggregator combines multiple CheckStates into a single result.
|
|
type CheckAggregator interface {
|
|
Aggregate(states []CheckState) CheckState
|
|
}
|
|
|
|
// ReportContext carries both the primary observation payload and any
|
|
// observations produced by other checkers that cover the same discovery
|
|
// entries. Hosts build a ReportContext and hand it to reporter methods.
|
|
//
|
|
// The method set is deliberately tiny: a single primary payload (Data) and
|
|
// a query for related observations by key (Related). Hosts return nil from
|
|
// Related when there is nothing to relate; reporters must tolerate that.
|
|
type ReportContext interface {
|
|
Data() json.RawMessage
|
|
Related(key ObservationKey) []RelatedObservation
|
|
}
|
|
|
|
// NewReportContext returns a ReportContext backed by a primary payload and
|
|
// a pre-resolved map of related observations by key. The SDK's /report HTTP
|
|
// handler uses this to wrap ExternalReportRequest contents; hosts and tests
|
|
// can use it whenever they already have the related observations in memory.
|
|
//
|
|
// Passing a nil or empty related map is fine; Related(key) will then return
|
|
// nil, just like StaticReportContext.
|
|
func NewReportContext(data json.RawMessage, related map[ObservationKey][]RelatedObservation) ReportContext {
|
|
return fixedReportContext{data: data, related: related}
|
|
}
|
|
|
|
// StaticReportContext is a shorthand for NewReportContext(data, nil): a
|
|
// ReportContext with a primary payload and no related observations.
|
|
// Intended for tests and ad-hoc callers that have no lineage to supply.
|
|
func StaticReportContext(data json.RawMessage) ReportContext {
|
|
return fixedReportContext{data: data}
|
|
}
|
|
|
|
type fixedReportContext struct {
|
|
data json.RawMessage
|
|
related map[ObservationKey][]RelatedObservation
|
|
}
|
|
|
|
func (f fixedReportContext) Data() json.RawMessage { return f.data }
|
|
func (f fixedReportContext) Related(key ObservationKey) []RelatedObservation {
|
|
if f.related == nil {
|
|
return nil
|
|
}
|
|
return f.related[key]
|
|
}
|
|
|
|
// CheckerHTMLReporter is an optional interface that observation providers can
|
|
// implement to render their stored data as a full HTML document (for iframe embedding).
|
|
// Detect support with a type assertion: _, ok := provider.(CheckerHTMLReporter)
|
|
//
|
|
// The ReportContext carries the primary observation payload plus any
|
|
// downstream observations produced on DiscoveryEntry records this checker
|
|
// published. Implementations that do not need related observations can
|
|
// simply consume ctx.Data().
|
|
type CheckerHTMLReporter interface {
|
|
GetHTMLReport(ctx ReportContext) (string, error)
|
|
}
|
|
|
|
// CheckerMetricsReporter is an optional interface that observation providers can
|
|
// implement to extract time-series metrics from their stored data.
|
|
// Detect support with a type assertion: _, ok := provider.(CheckerMetricsReporter)
|
|
//
|
|
// As with CheckerHTMLReporter, the ReportContext exposes related
|
|
// observations for cross-checker composition.
|
|
type CheckerMetricsReporter interface {
|
|
ExtractMetrics(ctx ReportContext, collectedAt time.Time) ([]CheckMetric, error)
|
|
}
|
|
|
|
// CheckerDefinitionProvider is an optional interface that observation providers can
|
|
// implement to expose their checker definition. Used by the SDK server to serve
|
|
// /definition and /evaluate endpoints without requiring a separate argument.
|
|
// Detect support with a type assertion: _, ok := provider.(CheckerDefinitionProvider)
|
|
type CheckerDefinitionProvider interface {
|
|
// Definition returns the checker definition for this provider.
|
|
Definition() *CheckerDefinition
|
|
}
|
|
|
|
// CheckerDefinition is the complete definition of a checker, registered via init().
|
|
type CheckerDefinition struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Version string `json:"version,omitempty"`
|
|
Availability CheckerAvailability `json:"availability"`
|
|
Options CheckerOptionsDocumentation `json:"options"`
|
|
RulesInfo []CheckRuleInfo `json:"rules"`
|
|
Rules []CheckRule `json:"-"`
|
|
Aggregator CheckAggregator `json:"-"`
|
|
Interval *CheckIntervalSpec `json:"interval,omitempty"`
|
|
HasHTMLReport bool `json:"has_html_report,omitempty"`
|
|
HasMetrics bool `json:"has_metrics,omitempty"`
|
|
ObservationKeys []ObservationKey `json:"observationKeys,omitempty"`
|
|
}
|
|
|
|
// BuildRulesInfo populates RulesInfo from the Rules slice.
|
|
func (d *CheckerDefinition) BuildRulesInfo() {
|
|
d.RulesInfo = make([]CheckRuleInfo, len(d.Rules))
|
|
for i, rule := range d.Rules {
|
|
info := CheckRuleInfo{
|
|
Name: rule.Name(),
|
|
Description: rule.Description(),
|
|
}
|
|
if rwo, ok := rule.(CheckRuleWithOptions); ok {
|
|
opts := rwo.Options()
|
|
info.Options = &opts
|
|
}
|
|
d.RulesInfo[i] = info
|
|
}
|
|
}
|
|
|
|
// OptionsValidator is an optional interface that checkers (or their rules/providers)
|
|
// can implement to perform domain-specific validation of checker options.
|
|
type OptionsValidator interface {
|
|
ValidateOptions(opts CheckerOptions) error
|
|
}
|
|
|
|
// ExternalCollectRequest is sent to POST /collect on a remote checker endpoint.
|
|
type ExternalCollectRequest struct {
|
|
Key ObservationKey `json:"key"`
|
|
Target CheckTarget `json:"target"`
|
|
Options CheckerOptions `json:"options"`
|
|
}
|
|
|
|
// ExternalCollectResponse is returned by POST /collect on a remote checker endpoint.
|
|
type ExternalCollectResponse struct {
|
|
Data json.RawMessage `json:"data,omitempty"`
|
|
Entries []DiscoveryEntry `json:"entries,omitempty"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// DiscoveryEntry is a single "thing worth probing" declared by a checker as a
|
|
// by-product of its collection, intended to be consumed by other checkers
|
|
// without having to re-parse raw observations.
|
|
//
|
|
// The SDK treats Payload as an opaque byte string: producer and consumer
|
|
// checkers agree on a schema through a separate contract (typically a small
|
|
// shared Go package imported by both). This keeps the SDK free of
|
|
// protocol-specific concepts; new entry families (TLS endpoint, HTTP probe,
|
|
// ACME challenge, DNSSEC key, …) can appear without touching it.
|
|
//
|
|
// Entries are ingested by happyDomain into a separate index. Each new
|
|
// collection from the same source atomically replaces the set of entries
|
|
// previously published for the same (producer, target) pair.
|
|
type DiscoveryEntry struct {
|
|
// Type names the contract Payload follows, e.g. "tls.endpoint" or
|
|
// "http.probe". Producers and consumers match on this string; the SDK
|
|
// does not interpret it. Stick to a reverse-DNS-ish convention so that
|
|
// independent contracts do not collide.
|
|
Type string `json:"type"`
|
|
|
|
// Ref is a stable per-entry identifier chosen by the producer. The host
|
|
// uses it to dedupe entries across repeated collections and to link
|
|
// related observations back to this entry (RelatedObservation.Ref). Two
|
|
// producers may reuse the same Ref space; the host namespaces them by
|
|
// (producer, target).
|
|
Ref string `json:"ref"`
|
|
|
|
// Payload is the entry-specific data, in the format defined by the
|
|
// contract named in Type. Opaque to the SDK.
|
|
Payload json.RawMessage `json:"payload"`
|
|
}
|
|
|
|
// DiscoveryPublisher is an optional interface an ObservationProvider can
|
|
// co-implement to declare DiscoveryEntry records derived from the value it
|
|
// just collected.
|
|
//
|
|
// The host invokes DiscoverEntries immediately after Collect, passing the
|
|
// native Go value returned by Collect (no JSON round-trip). Implementations
|
|
// should therefore type-assert data to their concrete collection type and
|
|
// marshal each contract payload themselves.
|
|
type DiscoveryPublisher interface {
|
|
DiscoverEntries(data any) ([]DiscoveryEntry, error)
|
|
}
|
|
|
|
// ExternalEvaluateRequest is sent to POST /evaluate on a remote checker endpoint.
|
|
type ExternalEvaluateRequest struct {
|
|
Observations map[ObservationKey]json.RawMessage `json:"observations"`
|
|
Options CheckerOptions `json:"options"`
|
|
EnabledRules map[string]bool `json:"enabledRules,omitempty"`
|
|
}
|
|
|
|
// ExternalEvaluateResponse is returned by POST /evaluate on a remote checker endpoint.
|
|
type ExternalEvaluateResponse struct {
|
|
States []CheckState `json:"states"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// ExternalReportRequest is sent to POST /report on a remote checker endpoint.
|
|
//
|
|
// Related carries observations produced by other checkers on DiscoveryEntry
|
|
// records originally published by the target of this report, that is, the
|
|
// cross-checker lineage that ObservationGetter.GetRelated would expose in
|
|
// the in-process path. The host composes it before making the HTTP request;
|
|
// when absent, the remote checker receives a context that reports no
|
|
// related observations (equivalent to StaticReportContext).
|
|
type ExternalReportRequest struct {
|
|
Key ObservationKey `json:"key"`
|
|
Data json.RawMessage `json:"data"`
|
|
Related map[ObservationKey][]RelatedObservation `json:"related,omitempty"`
|
|
}
|
|
|
|
// HealthResponse is returned by GET /health on a remote checker endpoint.
|
|
// It carries lightweight runtime signals so a scheduler can pick the least
|
|
// busy worker among a set of equivalent checker instances.
|
|
//
|
|
// LoadAvg mirrors /proc/loadavg semantics: it is the 1, 5, 15-minute
|
|
// exponentially weighted moving average of the InFlight request count,
|
|
// sampled every 5 seconds. Divide by NumCPU to estimate saturation.
|
|
type HealthResponse struct {
|
|
// Status is a coarse liveness indicator. Currently always "ok";
|
|
// "degraded" is reserved for future use.
|
|
Status string `json:"status"`
|
|
|
|
// Uptime is the number of (fractional) seconds since the server started.
|
|
Uptime float64 `json:"uptime_seconds"`
|
|
|
|
// NumCPU is the value of runtime.NumCPU() on this worker.
|
|
NumCPU int `json:"num_cpu"`
|
|
|
|
// InFlight is the number of work requests (/collect, /evaluate, /report)
|
|
// currently being processed. /health and /definition are not counted.
|
|
InFlight int64 `json:"inflight"`
|
|
|
|
// TotalRequests is the cumulative number of work requests served since
|
|
// the server started. /health and /definition are not counted.
|
|
TotalRequests uint64 `json:"total_requests"`
|
|
|
|
// LoadAvg holds the 1, 5, 15-minute EWMAs of InFlight.
|
|
LoadAvg [3]float64 `json:"loadavg"`
|
|
}
|