From 660fda9c3aa96c25dbdcfda7433e7e3ebbf9fcb5 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Sun, 26 Apr 2026 10:52:30 +0700 Subject: [PATCH 1/6] server: add -healthcheck flag for scratch-image Docker probes Registered on the default FlagSet; ListenAndServe intercepts it and exits 0/1 after probing /health on the -listen address. Lets checker-* Dockerfiles add HEALTHCHECK without any main.go change, even though the runtime image is scratch (no shell, no curl, no wget). --- checker/server/healthcheck.go | 81 ++++++++++++++++++++++++++++++ checker/server/healthcheck_test.go | 72 ++++++++++++++++++++++++++ checker/server/server.go | 13 +++++ 3 files changed, 166 insertions(+) create mode 100644 checker/server/healthcheck.go create mode 100644 checker/server/healthcheck_test.go diff --git a/checker/server/healthcheck.go b/checker/server/healthcheck.go new file mode 100644 index 0000000..bb79d22 --- /dev/null +++ b/checker/server/healthcheck.go @@ -0,0 +1,81 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "context" + "flag" + "fmt" + "net" + "net/http" + "strings" + "time" +) + +// healthcheckMode is registered on the default flag set so any consumer that +// calls flag.Parse() before ListenAndServe (the standard pattern in our +// checker mains) gets the behaviour for free. When set, ListenAndServe +// performs a short-lived HTTP probe against /health on the configured listen +// address and exits 0/1 instead of starting the server. This lets the same +// binary act as its own Docker HEALTHCHECK probe for scratch images, where +// no shell, curl or wget is available. +var healthcheckMode = flag.Bool( + "healthcheck", + false, + "probe /health on the server's listen address and exit 0 if healthy, 1 "+ + "otherwise (intended as a Docker HEALTHCHECK for scratch-based images)", +) + +// runHealthcheck performs a GET against http:///health with a short +// timeout. Returns nil on a 2xx response, an error otherwise. A bind address +// like ":8080" or "0.0.0.0:8080" is rewritten to dial the loopback interface +// so the probe targets the local process. +func runHealthcheck(addr string) error { + host, port, err := net.SplitHostPort(normalizeHealthcheckAddr(addr)) + if err != nil { + return fmt.Errorf("invalid listen addr %q: %w", addr, err) + } + if host == "" || host == "0.0.0.0" || host == "::" { + host = "127.0.0.1" + } + url := fmt.Sprintf("http://%s/health", net.JoinHostPort(host, port)) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return err + } + client := &http.Client{Timeout: 2 * time.Second} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + return fmt.Errorf("unhealthy: HTTP %d", resp.StatusCode) + } + return nil +} + +func normalizeHealthcheckAddr(a string) string { + if strings.HasPrefix(a, ":") { + return "127.0.0.1" + a + } + if strings.HasPrefix(a, "[::]:") { + return "[::1]:" + strings.TrimPrefix(a, "[::]:") + } + return a +} diff --git a/checker/server/healthcheck_test.go b/checker/server/healthcheck_test.go new file mode 100644 index 0000000..daa4bc5 --- /dev/null +++ b/checker/server/healthcheck_test.go @@ -0,0 +1,72 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestRunHealthcheck_OK(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + addr := strings.TrimPrefix(srv.URL, "http://") + if err := runHealthcheck(addr); err != nil { + t.Fatalf("runHealthcheck(%s) returned error: %v", addr, err) + } +} + +func TestRunHealthcheck_NonOK(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + addr := strings.TrimPrefix(srv.URL, "http://") + if err := runHealthcheck(addr); err == nil { + t.Fatalf("runHealthcheck against 503 returned nil; want error") + } +} + +func TestRunHealthcheck_Unreachable(t *testing.T) { + // Reserved-for-documentation port on loopback that nothing should bind. + if err := runHealthcheck("127.0.0.1:1"); err == nil { + t.Fatalf("runHealthcheck against unreachable port returned nil; want error") + } +} + +func TestNormalizeHealthcheckAddr(t *testing.T) { + cases := map[string]string{ + ":8080": "127.0.0.1:8080", + "127.0.0.1:8080": "127.0.0.1:8080", + "0.0.0.0:8080": "0.0.0.0:8080", + "[::1]:8080": "[::1]:8080", + "[::]:8080": "[::1]:8080", + } + for in, want := range cases { + if got := normalizeHealthcheckAddr(in); got != want { + t.Errorf("normalizeHealthcheckAddr(%q) = %q, want %q", in, got, want) + } + } +} diff --git a/checker/server/server.go b/checker/server/server.go index 26f50ee..caae01f 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -26,6 +26,7 @@ import ( "log" "math" "net/http" + "os" "runtime" "strings" "sync" @@ -170,7 +171,19 @@ func (s *Server) HandleFunc(pattern string, handler func(http.ResponseWriter, *h // ListenAndServe does not stop the background load-average sampler on return; // call Close to stop it. This is not required for process-scoped usage but is // recommended for tests and embedded lifecycles. +// +// If the consumer's flag.Parse() set the SDK-registered -healthcheck flag, +// ListenAndServe never starts the server: it probes /health on addr and calls +// os.Exit(0) on success or os.Exit(1) on failure. This is what lets a +// scratch-based Docker image use the binary itself as its HEALTHCHECK probe. func (s *Server) ListenAndServe(addr string) error { + if *healthcheckMode { + if err := runHealthcheck(addr); err != nil { + fmt.Fprintln(os.Stderr, "healthcheck failed:", err) + os.Exit(1) + } + os.Exit(0) + } log.Printf("checker listening on %s", addr) return http.ListenAndServe(addr, requestLogger(s.mux)) } From 8f8dc3ca57b221806a705b70b77af9422d89dc40 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Mon, 27 Apr 2026 00:23:13 +0700 Subject: [PATCH 2/6] server: graceful shutdown on SIGINT/SIGTERM in ListenAndServe Drains in-flight requests within a 10s timeout and stops the load-average sampler before returning. Callers needing custom signal handling can still opt out via Handler() and run their own http.Server. --- checker/server/server.go | 55 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/checker/server/server.go b/checker/server/server.go index caae01f..763a600 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -27,10 +27,12 @@ import ( "math" "net/http" "os" + "os/signal" "runtime" "strings" "sync" "sync/atomic" + "syscall" "time" "git.happydns.org/checker-sdk-go/checker" @@ -44,6 +46,10 @@ const maxRequestBodySize = 1 << 20 // 5 seconds matches the Unix kernel's loadavg cadence. const loadSampleInterval = 5 * time.Second +// shutdownTimeout bounds how long ListenAndServe waits for in-flight +// requests to drain after receiving SIGINT or SIGTERM. +const shutdownTimeout = 10 * time.Second + // EWMA smoothing factors for 1, 5, and 15-minute windows sampled every // loadSampleInterval. Derived as 1 - exp(-interval/window) so that the // steady-state response to a constant InFlight of N converges to N. @@ -166,11 +172,15 @@ func (s *Server) HandleFunc(pattern string, handler func(http.ResponseWriter, *h s.mux.HandleFunc(pattern, handler) } -// ListenAndServe starts the HTTP server on the given address. +// ListenAndServe starts the HTTP server on the given address and blocks +// until the server stops. // -// ListenAndServe does not stop the background load-average sampler on return; -// call Close to stop it. This is not required for process-scoped usage but is -// recommended for tests and embedded lifecycles. +// ListenAndServe installs a SIGINT/SIGTERM handler that triggers a graceful +// shutdown: new connections are refused and in-flight requests are given up +// to shutdownTimeout to complete. The background load-average sampler is +// stopped via Close before returning. Callers who need their own signal +// handling or shutdown semantics should use Handler() and run their own +// http.Server instead. // // If the consumer's flag.Parse() set the SDK-registered -healthcheck flag, // ListenAndServe never starts the server: it probes /health on addr and calls @@ -184,8 +194,43 @@ func (s *Server) ListenAndServe(addr string) error { } os.Exit(0) } + + srv := &http.Server{Addr: addr, Handler: requestLogger(s.mux)} + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + shutdownErr := make(chan error, 1) + go func() { + sig, ok := <-sigCh + if !ok { + shutdownErr <- nil + return + } + log.Printf("checker received %s, shutting down (timeout %s)", sig, shutdownTimeout) + ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer cancel() + shutdownErr <- srv.Shutdown(ctx) + }() + log.Printf("checker listening on %s", addr) - return http.ListenAndServe(addr, requestLogger(s.mux)) + err := srv.ListenAndServe() + signal.Stop(sigCh) + close(sigCh) + + if err == http.ErrServerClosed { + if sErr := <-shutdownErr; sErr != nil { + err = sErr + } else { + err = nil + } + } + + if cErr := s.Close(); cErr != nil && err == nil { + err = cErr + } + return err } // Close stops the background load-average sampler goroutine. It is safe to From c1de9aca1cab48e7fd97a02e6ee2f58e907441f6 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Wed, 29 Apr 2026 17:35:13 +0700 Subject: [PATCH 3/6] checker: add JoinRelative helper for service-relative owner names --- checker/names.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 checker/names.go diff --git a/checker/names.go b/checker/names.go new file mode 100644 index 0000000..61df52e --- /dev/null +++ b/checker/names.go @@ -0,0 +1,37 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package checker + +import "strings" + +// JoinRelative treats name as relative to origin, as happyDomain encodes +// service-embedded record owners and subdomains. An empty or "@" name +// resolves to the origin itself; an empty origin returns the trimmed name +// unchanged. A name already suffixed by origin is returned as-is so that +// absolute encodings round-trip safely. Trailing dots are stripped. +func JoinRelative(name, origin string) string { + origin = strings.TrimSuffix(origin, ".") + name = strings.TrimSuffix(name, ".") + if origin == "" { + return name + } + if name == "" || name == "@" { + return origin + } + if name == origin || strings.HasSuffix(name, "."+origin) { + return name + } + return name + "." + origin +} From c72558e266b4f52e5dec7c45808cae7979054a58 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Tue, 19 May 2026 22:05:07 +0800 Subject: [PATCH 4/6] checker: pass EnabledRules to Collect via context Providers can now skip optional work (network calls, paid API hits) for rules the host has disabled. The /collect request grows an EnabledRules field, and server.handleCollect attaches it to the context with WithEnabledRules; providers read it via EnabledRulesFromContext or the per-rule RuleEnabled helper. --- checker/context.go | 52 ++++++++++++++++++++++++++++++++++++++++ checker/server/server.go | 3 ++- checker/types.go | 13 +++++++--- 3 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 checker/context.go diff --git a/checker/context.go b/checker/context.go new file mode 100644 index 0000000..9dd8918 --- /dev/null +++ b/checker/context.go @@ -0,0 +1,52 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package checker + +import "context" + +type enabledRulesCtxKey struct{} + +// WithEnabledRules returns a context carrying the host's per-rule enable map. +// The SDK server attaches it before calling ObservationProvider.Collect so +// providers can skip optional work (network calls, paid API hits, …) for +// rules the host has disabled. A nil map means "run everything". +func WithEnabledRules(ctx context.Context, enabled map[string]bool) context.Context { + if enabled == nil { + return ctx + } + return context.WithValue(ctx, enabledRulesCtxKey{}, enabled) +} + +// EnabledRulesFromContext returns the enabled-rule map attached by +// WithEnabledRules, or nil if none. RuleEnabled is the usual access pattern. +func EnabledRulesFromContext(ctx context.Context) map[string]bool { + m, _ := ctx.Value(enabledRulesCtxKey{}).(map[string]bool) + return m +} + +// RuleEnabled reports whether ruleName is enabled given the host's map. +// Absent rules default to enabled (nil map or rule not in map), matching +// the SDK server's evaluate-side semantics. +func RuleEnabled(ctx context.Context, ruleName string) bool { + m := EnabledRulesFromContext(ctx) + if m == nil { + return true + } + enabled, ok := m[ruleName] + if !ok { + return true + } + return enabled +} diff --git a/checker/server/server.go b/checker/server/server.go index 763a600..5505307 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -325,7 +325,8 @@ func (s *Server) handleCollect(w http.ResponseWriter, r *http.Request) { return } - data, err := s.provider.Collect(r.Context(), req.Options) + ctx := checker.WithEnabledRules(r.Context(), req.EnabledRules) + data, err := s.provider.Collect(ctx, req.Options) if err != nil { writeJSON(w, http.StatusInternalServerError, checker.ExternalCollectResponse{ Error: err.Error(), diff --git a/checker/types.go b/checker/types.go index 7256766..ea25247 100644 --- a/checker/types.go +++ b/checker/types.go @@ -421,10 +421,17 @@ type OptionsValidator interface { } // ExternalCollectRequest is sent to POST /collect on a remote checker endpoint. +// +// EnabledRules lets the host inform the provider which rules will be evaluated +// downstream, so the provider can skip optional work (network calls, paid API +// hits, …) for data that would not surface in any state. nil means "run +// everything"; an explicit map with a rule name set to false means that rule +// is off. Providers access the value via EnabledRulesFromContext(ctx). type ExternalCollectRequest struct { - Key ObservationKey `json:"key"` - Target CheckTarget `json:"target"` - Options CheckerOptions `json:"options"` + Key ObservationKey `json:"key"` + Target CheckTarget `json:"target"` + Options CheckerOptions `json:"options"` + EnabledRules map[string]bool `json:"enabledRules,omitempty"` } // ExternalCollectResponse is returned by POST /collect on a remote checker endpoint. From f203b2e573cd16cdc2e79e78ee9069652c977a2e Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Wed, 20 May 2026 13:59:44 +0800 Subject: [PATCH 5/6] checker: add POST /definition with precheck failures Introduces an optional RulePrecheck interface so rules can declare prerequisite checks against the current options (e.g. "missing API key"). POST /definition mirrors GET /definition and adds a precheck_failures map keyed by rule name, letting a UI fetch the definition and precheck results in a single round-trip. --- checker/server/server.go | 33 +++++++++++++ checker/server/server_test.go | 89 +++++++++++++++++++++++++++++++++++ checker/types.go | 28 +++++++++++ 3 files changed, 150 insertions(+) diff --git a/checker/server/server.go b/checker/server/server.go index 5505307..5075b04 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -133,6 +133,7 @@ func New(provider checker.ObservationProvider) *Server { s.definition = def s.definition.BuildRulesInfo() s.mux.HandleFunc("GET /definition", s.handleDefinition) + s.mux.HandleFunc("POST /definition", s.handlePrecheck) s.mux.Handle("POST /evaluate", s.TrackWork(http.HandlerFunc(s.handleEvaluate))) } } @@ -316,6 +317,38 @@ func (s *Server) handleDefinition(w http.ResponseWriter, r *http.Request) { writeJSON(w, http.StatusOK, s.definition) } +// handlePrecheck answers POST /definition: it returns the same +// definition body as GET /definition, plus a PrecheckFailures map +// listing rules whose prerequisites are unmet for the submitted +// options. Rules that do not implement checker.RulePrecheck, or whose +// Precheck returned nil, are omitted from that map. +func (s *Server) handlePrecheck(w http.ResponseWriter, r *http.Request) { + var req checker.RulePrecheckRequest + if r.ContentLength != 0 { + if err := json.NewDecoder(io.LimitReader(r.Body, maxRequestBodySize)).Decode(&req); err != nil { + writeJSON(w, http.StatusBadRequest, map[string]string{ + "error": fmt.Sprintf("invalid request body: %v", err), + }) + return + } + } + + failures := map[string]string{} + for _, rule := range s.definition.Rules { + pc, ok := rule.(checker.RulePrecheck) + if !ok { + continue + } + if err := pc.Precheck(r.Context(), req.Options); err != nil { + failures[rule.Name()] = err.Error() + } + } + writeJSON(w, http.StatusOK, checker.RulePrecheckResponse{ + CheckerDefinition: s.definition, + PrecheckFailures: failures, + }) +} + func (s *Server) handleCollect(w http.ResponseWriter, r *http.Request) { var req checker.ExternalCollectRequest if err := json.NewDecoder(io.LimitReader(r.Body, maxRequestBodySize)).Decode(&req); err != nil { diff --git a/checker/server/server_test.go b/checker/server/server_test.go index 9e0adc4..17bbd12 100644 --- a/checker/server/server_test.go +++ b/checker/server/server_test.go @@ -667,3 +667,92 @@ func TestServer_NoDefinition_NoEvaluateEndpoint(t *testing.T) { t.Error("POST /evaluate should not be available without CheckerDefinitionProvider") } } + +// prereqRule implements RulePrecheck, failing when a named option is empty. +type prereqRule struct { + name string + optKey string + msg string +} + +func (r *prereqRule) Name() string { return r.name } +func (r *prereqRule) Description() string { return "" } +func (r *prereqRule) Evaluate(ctx context.Context, obs checker.ObservationGetter, opts checker.CheckerOptions) []checker.CheckState { + return []checker.CheckState{{Status: checker.StatusOK}} +} +func (r *prereqRule) Precheck(ctx context.Context, opts checker.CheckerOptions) error { + if v, _ := opts[r.optKey].(string); v == "" { + return errors.New(r.msg) + } + return nil +} + +func TestServer_Precheck(t *testing.T) { + gated := &prereqRule{name: "gated", optKey: "api_key", msg: "missing API key"} + open := &dummyRule{name: "open", desc: "no prereq"} + p := &testProvider{ + key: "test", + definition: &checker.CheckerDefinition{ + ID: "test", + Rules: []checker.CheckRule{gated, open}, + }, + } + srv := newTestServer(p) + defer srv.Close() + handler := srv.Handler() + + // GET /definition stays static — no precheck information surfaces here. + rec := doRequest(handler, "GET", "/definition", nil, nil) + if rec.Code != http.StatusOK { + t.Fatalf("GET /definition = %d, want %d", rec.Code, http.StatusOK) + } + if bytes.Contains(rec.Body.Bytes(), []byte("precheck_failures")) { + t.Errorf("GET /definition leaked precheck_failures field: %s", rec.Body.String()) + } + + // POST /definition with empty opts: gated rule fails, open rule absent. + rec = doRequest(handler, "POST", "/definition", checker.RulePrecheckRequest{Options: checker.CheckerOptions{}}, nil) + if rec.Code != http.StatusOK { + t.Fatalf("POST /definition (empty opts) = %d, want %d", rec.Code, http.StatusOK) + } + var resp checker.RulePrecheckResponse + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("decode POST /definition: %v", err) + } + if resp.CheckerDefinition == nil { + t.Fatalf("POST /definition response missing embedded CheckerDefinition") + } + if resp.ID != "test" { + t.Errorf("response ID = %q, want %q", resp.ID, "test") + } + if len(resp.RulesInfo) != 2 { + t.Errorf("response RulesInfo len = %d, want 2", len(resp.RulesInfo)) + } + if got := resp.PrecheckFailures["gated"]; got != "missing API key" { + t.Errorf("PrecheckFailures[gated] = %q, want %q", got, "missing API key") + } + if _, ok := resp.PrecheckFailures["open"]; ok { + t.Errorf("PrecheckFailures[open] should be absent (no RulePrecheck impl), got %q", resp.PrecheckFailures["open"]) + } + if len(resp.PrecheckFailures) != 1 { + t.Errorf("PrecheckFailures = %v, want exactly 1 entry", resp.PrecheckFailures) + } + + // POST /definition with sufficient opts: empty failure map. + rec = doRequest(handler, "POST", "/definition", checker.RulePrecheckRequest{ + Options: checker.CheckerOptions{"api_key": "secret"}, + }, nil) + if rec.Code != http.StatusOK { + t.Fatalf("POST /definition (with opts) = %d, want %d", rec.Code, http.StatusOK) + } + resp = checker.RulePrecheckResponse{} + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("decode POST /definition: %v", err) + } + if resp.CheckerDefinition == nil || resp.ID != "test" { + t.Errorf("response missing definition: %+v", resp) + } + if len(resp.PrecheckFailures) != 0 { + t.Errorf("PrecheckFailures = %v, want empty when opts satisfy prereqs", resp.PrecheckFailures) + } +} diff --git a/checker/types.go b/checker/types.go index ea25247..f32d8d8 100644 --- a/checker/types.go +++ b/checker/types.go @@ -250,6 +250,34 @@ type CheckRuleWithOptions interface { Options() CheckerOptionsDocumentation } +// RulePrecheck is an optional interface a CheckRule can implement to +// declare whether the current options are sufficient for the rule to +// run. Return nil if runnable, or an error describing the missing +// prerequisite (for example "missing API key"). The host calls this via +// POST /definition to surface unavailable rules in the UI; it is never +// invoked from Collect, so rules that need to short-circuit at run time +// must keep their own self-guard. +type RulePrecheck interface { + CheckRule + Precheck(ctx context.Context, opts CheckerOptions) error +} + +// RulePrecheckRequest is the body accepted by POST /definition. +type RulePrecheckRequest struct { + Options CheckerOptions `json:"options"` +} + +// RulePrecheckResponse is the body returned by POST /definition. The +// embedded *CheckerDefinition mirrors GET /definition so a client can +// fetch the full definition and precheck results in one round-trip. +// Keys in PrecheckFailures are rule names; values are the precheck +// error messages. Rules that do not implement RulePrecheck, or whose +// Precheck returned nil for the given options, are absent from the map. +type RulePrecheckResponse struct { + *CheckerDefinition + PrecheckFailures map[string]string `json:"precheck_failures"` +} + // ObservationGetter provides access to observation data (used by CheckRule). // Get unmarshals observation data into dest (like json.Unmarshal). // From d387cd629b37766d2212d0c2320d1dd88f8d16ef Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Fri, 29 May 2026 21:07:45 +0800 Subject: [PATCH 6/6] checker: add CheckEnabler interface for data-driven eligibility Add an optional CheckEnabler interface that lets a provider decide, from the actual target data, whether running the checker is meaningful at all (e.g. reverse-zone outside in-addr.arpa, delegation without DNSSEC). The result is folded into the POST /definition response via new Eligible and EligibilityReason fields, and the handler now tracks load since IsEligible may perform I/O. --- checker/server/server.go | 19 ++++++- checker/server/server_test.go | 104 ++++++++++++++++++++++++++++++++++ checker/types.go | 37 ++++++++++++ 3 files changed, 157 insertions(+), 3 deletions(-) diff --git a/checker/server/server.go b/checker/server/server.go index 5075b04..c6c43f2 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -133,7 +133,7 @@ func New(provider checker.ObservationProvider) *Server { s.definition = def s.definition.BuildRulesInfo() s.mux.HandleFunc("GET /definition", s.handleDefinition) - s.mux.HandleFunc("POST /definition", s.handlePrecheck) + s.mux.Handle("POST /definition", s.TrackWork(http.HandlerFunc(s.handlePrecheck))) s.mux.Handle("POST /evaluate", s.TrackWork(http.HandlerFunc(s.handleEvaluate))) } } @@ -343,10 +343,23 @@ func (s *Server) handlePrecheck(w http.ResponseWriter, r *http.Request) { failures[rule.Name()] = err.Error() } } - writeJSON(w, http.StatusOK, checker.RulePrecheckResponse{ + resp := checker.RulePrecheckResponse{ CheckerDefinition: s.definition, PrecheckFailures: failures, - }) + } + if en, ok := s.provider.(checker.CheckEnabler); ok { + eligible, reason, err := en.IsEligible(r.Context(), req.Options) + if err != nil { + // Eligibility undetermined: leave Eligible nil so the host fails + // open (shows the checker), but surface the error for diagnostics. + log.Printf("IsEligible failed: %v", err) + resp.EligibilityReason = err.Error() + } else { + resp.Eligible = &eligible + resp.EligibilityReason = reason + } + } + writeJSON(w, http.StatusOK, resp) } func (s *Server) handleCollect(w http.ResponseWriter, r *http.Request) { diff --git a/checker/server/server_test.go b/checker/server/server_test.go index 17bbd12..d6feefa 100644 --- a/checker/server/server_test.go +++ b/checker/server/server_test.go @@ -687,6 +687,110 @@ func (r *prereqRule) Precheck(ctx context.Context, opts checker.CheckerOptions) return nil } +// enablerProvider is a minimal ObservationProvider + CheckerDefinitionProvider +// that also implements CheckEnabler, returning whatever isEligibleFn yields. +type enablerProvider struct { + key checker.ObservationKey + definition *checker.CheckerDefinition + isEligibleFn func(ctx context.Context, opts checker.CheckerOptions) (bool, string, error) +} + +func (p *enablerProvider) Key() checker.ObservationKey { return p.key } +func (p *enablerProvider) Collect(ctx context.Context, opts checker.CheckerOptions) (any, error) { + return map[string]string{"result": "ok"}, nil +} +func (p *enablerProvider) Definition() *checker.CheckerDefinition { return p.definition } +func (p *enablerProvider) IsEligible(ctx context.Context, opts checker.CheckerOptions) (bool, string, error) { + return p.isEligibleFn(ctx, opts) +} + +func TestServer_Precheck_Eligibility(t *testing.T) { + tests := []struct { + name string + fn func(ctx context.Context, opts checker.CheckerOptions) (bool, string, error) + wantNil bool // expect Eligible == nil + wantElig bool // value of *Eligible when not nil + wantReason string // expected EligibilityReason + }{ + { + name: "eligible true", + fn: func(context.Context, checker.CheckerOptions) (bool, string, error) { return true, "", nil }, + wantElig: true, + }, + { + name: "eligible false with reason", + fn: func(context.Context, checker.CheckerOptions) (bool, string, error) { return false, "not a reverse zone", nil }, + wantElig: false, + wantReason: "not a reverse zone", + }, + { + name: "error fails open", + fn: func(context.Context, checker.CheckerOptions) (bool, string, error) { return false, "", errors.New("lookup timeout") }, + wantNil: true, + wantReason: "lookup timeout", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + p := &enablerProvider{ + key: "test", + definition: &checker.CheckerDefinition{ID: "test", Rules: []checker.CheckRule{}}, + isEligibleFn: tc.fn, + } + srv := New(p) + defer srv.Close() + + rec := doRequest(srv.Handler(), "POST", "/definition", checker.RulePrecheckRequest{Options: checker.CheckerOptions{}}, nil) + if rec.Code != http.StatusOK { + t.Fatalf("POST /definition = %d, want %d", rec.Code, http.StatusOK) + } + var resp checker.RulePrecheckResponse + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if tc.wantNil { + if resp.Eligible != nil { + t.Errorf("Eligible = %v, want nil", *resp.Eligible) + } + } else { + if resp.Eligible == nil { + t.Fatalf("Eligible = nil, want %v", tc.wantElig) + } + if *resp.Eligible != tc.wantElig { + t.Errorf("Eligible = %v, want %v", *resp.Eligible, tc.wantElig) + } + } + if resp.EligibilityReason != tc.wantReason { + t.Errorf("EligibilityReason = %q, want %q", resp.EligibilityReason, tc.wantReason) + } + }) + } +} + +// TestServer_Precheck_NoEnabler verifies that a provider not implementing +// CheckEnabler yields no eligibility fields (Eligible nil, reason empty). +func TestServer_Precheck_NoEnabler(t *testing.T) { + p := &testProvider{key: "test", definition: &checker.CheckerDefinition{ID: "test", Rules: []checker.CheckRule{}}} + srv := newTestServer(p) + defer srv.Close() + + rec := doRequest(srv.Handler(), "POST", "/definition", checker.RulePrecheckRequest{Options: checker.CheckerOptions{}}, nil) + if rec.Code != http.StatusOK { + t.Fatalf("POST /definition = %d, want %d", rec.Code, http.StatusOK) + } + if bytes.Contains(rec.Body.Bytes(), []byte("eligible")) { + t.Errorf("response leaked eligible field for non-enabler provider: %s", rec.Body.String()) + } + var resp checker.RulePrecheckResponse + if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil { + t.Fatalf("decode: %v", err) + } + if resp.Eligible != nil { + t.Errorf("Eligible = %v, want nil for non-enabler provider", *resp.Eligible) + } +} + func TestServer_Precheck(t *testing.T) { gated := &prereqRule{name: "gated", optKey: "api_key", msg: "missing API key"} open := &dummyRule{name: "open", desc: "no prereq"} diff --git a/checker/types.go b/checker/types.go index f32d8d8..ed4a9fc 100644 --- a/checker/types.go +++ b/checker/types.go @@ -262,6 +262,31 @@ type RulePrecheck interface { Precheck(ctx context.Context, opts CheckerOptions) error } +// CheckEnabler is an optional interface an ObservationProvider can implement +// to declare, from the actual target data, whether running this checker is +// meaningful at all. +// +// It complements the two existing gates: +// - CheckerAvailability is a static, registration-time scope/service-type +// filter; it never sees the target's data. +// - RulePrecheck is a per-rule, options-only check ("missing API key"). +// +// CheckEnabler is whole-checker and data-driven. IsEligible receives the same +// CheckerOptions as Collect, including the autofilled domain_name / zone / +// service payloads (read them with GetOption), and may perform light I/O +// (e.g. a DNSKEY lookup) to decide. +// +// Return (true, "", nil) to run the checker, or (false, reason, nil) with a +// short human-readable reason ("not a reverse zone", "DNSSEC not enabled") +// to skip it. Return a non-nil error only when eligibility could not be +// determined (transient I/O failure); the host treats that as "unknown" and +// fails open (shows the checker) rather than as a definitive skip. +// +// Detect support with a type assertion: _, ok := provider.(CheckEnabler) +type CheckEnabler interface { + IsEligible(ctx context.Context, opts CheckerOptions) (eligible bool, reason string, err error) +} + // RulePrecheckRequest is the body accepted by POST /definition. type RulePrecheckRequest struct { Options CheckerOptions `json:"options"` @@ -276,6 +301,18 @@ type RulePrecheckRequest struct { type RulePrecheckResponse struct { *CheckerDefinition PrecheckFailures map[string]string `json:"precheck_failures"` + + // Eligible reports whether this checker is meaningful for the submitted + // target, as decided by the provider's CheckEnabler (if implemented). It + // is nil when the checker does not implement CheckEnabler, or when + // IsEligible could not determine eligibility (its error was non-nil). A + // non-nil false means the checker is definitively not applicable to this + // target; the host should hide it unless Eligible != nil && !*Eligible. + Eligible *bool `json:"eligible,omitempty"` + + // EligibilityReason explains a false Eligible, or carries the lookup error + // message when eligibility could not be determined. Empty otherwise. + EligibilityReason string `json:"eligibility_reason,omitempty"` } // ObservationGetter provides access to observation data (used by CheckRule).