From 660fda9c3aa96c25dbdcfda7433e7e3ebbf9fcb5 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Sun, 26 Apr 2026 10:52:30 +0700 Subject: [PATCH 1/3] server: add -healthcheck flag for scratch-image Docker probes Registered on the default FlagSet; ListenAndServe intercepts it and exits 0/1 after probing /health on the -listen address. Lets checker-* Dockerfiles add HEALTHCHECK without any main.go change, even though the runtime image is scratch (no shell, no curl, no wget). --- checker/server/healthcheck.go | 81 ++++++++++++++++++++++++++++++ checker/server/healthcheck_test.go | 72 ++++++++++++++++++++++++++ checker/server/server.go | 13 +++++ 3 files changed, 166 insertions(+) create mode 100644 checker/server/healthcheck.go create mode 100644 checker/server/healthcheck_test.go diff --git a/checker/server/healthcheck.go b/checker/server/healthcheck.go new file mode 100644 index 0000000..bb79d22 --- /dev/null +++ b/checker/server/healthcheck.go @@ -0,0 +1,81 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "context" + "flag" + "fmt" + "net" + "net/http" + "strings" + "time" +) + +// healthcheckMode is registered on the default flag set so any consumer that +// calls flag.Parse() before ListenAndServe (the standard pattern in our +// checker mains) gets the behaviour for free. When set, ListenAndServe +// performs a short-lived HTTP probe against /health on the configured listen +// address and exits 0/1 instead of starting the server. This lets the same +// binary act as its own Docker HEALTHCHECK probe for scratch images, where +// no shell, curl or wget is available. +var healthcheckMode = flag.Bool( + "healthcheck", + false, + "probe /health on the server's listen address and exit 0 if healthy, 1 "+ + "otherwise (intended as a Docker HEALTHCHECK for scratch-based images)", +) + +// runHealthcheck performs a GET against http:///health with a short +// timeout. Returns nil on a 2xx response, an error otherwise. A bind address +// like ":8080" or "0.0.0.0:8080" is rewritten to dial the loopback interface +// so the probe targets the local process. +func runHealthcheck(addr string) error { + host, port, err := net.SplitHostPort(normalizeHealthcheckAddr(addr)) + if err != nil { + return fmt.Errorf("invalid listen addr %q: %w", addr, err) + } + if host == "" || host == "0.0.0.0" || host == "::" { + host = "127.0.0.1" + } + url := fmt.Sprintf("http://%s/health", net.JoinHostPort(host, port)) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return err + } + client := &http.Client{Timeout: 2 * time.Second} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode/100 != 2 { + return fmt.Errorf("unhealthy: HTTP %d", resp.StatusCode) + } + return nil +} + +func normalizeHealthcheckAddr(a string) string { + if strings.HasPrefix(a, ":") { + return "127.0.0.1" + a + } + if strings.HasPrefix(a, "[::]:") { + return "[::1]:" + strings.TrimPrefix(a, "[::]:") + } + return a +} diff --git a/checker/server/healthcheck_test.go b/checker/server/healthcheck_test.go new file mode 100644 index 0000000..daa4bc5 --- /dev/null +++ b/checker/server/healthcheck_test.go @@ -0,0 +1,72 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestRunHealthcheck_OK(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + addr := strings.TrimPrefix(srv.URL, "http://") + if err := runHealthcheck(addr); err != nil { + t.Fatalf("runHealthcheck(%s) returned error: %v", addr, err) + } +} + +func TestRunHealthcheck_NonOK(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + + addr := strings.TrimPrefix(srv.URL, "http://") + if err := runHealthcheck(addr); err == nil { + t.Fatalf("runHealthcheck against 503 returned nil; want error") + } +} + +func TestRunHealthcheck_Unreachable(t *testing.T) { + // Reserved-for-documentation port on loopback that nothing should bind. + if err := runHealthcheck("127.0.0.1:1"); err == nil { + t.Fatalf("runHealthcheck against unreachable port returned nil; want error") + } +} + +func TestNormalizeHealthcheckAddr(t *testing.T) { + cases := map[string]string{ + ":8080": "127.0.0.1:8080", + "127.0.0.1:8080": "127.0.0.1:8080", + "0.0.0.0:8080": "0.0.0.0:8080", + "[::1]:8080": "[::1]:8080", + "[::]:8080": "[::1]:8080", + } + for in, want := range cases { + if got := normalizeHealthcheckAddr(in); got != want { + t.Errorf("normalizeHealthcheckAddr(%q) = %q, want %q", in, got, want) + } + } +} diff --git a/checker/server/server.go b/checker/server/server.go index 26f50ee..caae01f 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -26,6 +26,7 @@ import ( "log" "math" "net/http" + "os" "runtime" "strings" "sync" @@ -170,7 +171,19 @@ func (s *Server) HandleFunc(pattern string, handler func(http.ResponseWriter, *h // ListenAndServe does not stop the background load-average sampler on return; // call Close to stop it. This is not required for process-scoped usage but is // recommended for tests and embedded lifecycles. +// +// If the consumer's flag.Parse() set the SDK-registered -healthcheck flag, +// ListenAndServe never starts the server: it probes /health on addr and calls +// os.Exit(0) on success or os.Exit(1) on failure. This is what lets a +// scratch-based Docker image use the binary itself as its HEALTHCHECK probe. func (s *Server) ListenAndServe(addr string) error { + if *healthcheckMode { + if err := runHealthcheck(addr); err != nil { + fmt.Fprintln(os.Stderr, "healthcheck failed:", err) + os.Exit(1) + } + os.Exit(0) + } log.Printf("checker listening on %s", addr) return http.ListenAndServe(addr, requestLogger(s.mux)) } From 8f8dc3ca57b221806a705b70b77af9422d89dc40 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Mon, 27 Apr 2026 00:23:13 +0700 Subject: [PATCH 2/3] server: graceful shutdown on SIGINT/SIGTERM in ListenAndServe Drains in-flight requests within a 10s timeout and stops the load-average sampler before returning. Callers needing custom signal handling can still opt out via Handler() and run their own http.Server. --- checker/server/server.go | 55 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/checker/server/server.go b/checker/server/server.go index caae01f..763a600 100644 --- a/checker/server/server.go +++ b/checker/server/server.go @@ -27,10 +27,12 @@ import ( "math" "net/http" "os" + "os/signal" "runtime" "strings" "sync" "sync/atomic" + "syscall" "time" "git.happydns.org/checker-sdk-go/checker" @@ -44,6 +46,10 @@ const maxRequestBodySize = 1 << 20 // 5 seconds matches the Unix kernel's loadavg cadence. const loadSampleInterval = 5 * time.Second +// shutdownTimeout bounds how long ListenAndServe waits for in-flight +// requests to drain after receiving SIGINT or SIGTERM. +const shutdownTimeout = 10 * time.Second + // EWMA smoothing factors for 1, 5, and 15-minute windows sampled every // loadSampleInterval. Derived as 1 - exp(-interval/window) so that the // steady-state response to a constant InFlight of N converges to N. @@ -166,11 +172,15 @@ func (s *Server) HandleFunc(pattern string, handler func(http.ResponseWriter, *h s.mux.HandleFunc(pattern, handler) } -// ListenAndServe starts the HTTP server on the given address. +// ListenAndServe starts the HTTP server on the given address and blocks +// until the server stops. // -// ListenAndServe does not stop the background load-average sampler on return; -// call Close to stop it. This is not required for process-scoped usage but is -// recommended for tests and embedded lifecycles. +// ListenAndServe installs a SIGINT/SIGTERM handler that triggers a graceful +// shutdown: new connections are refused and in-flight requests are given up +// to shutdownTimeout to complete. The background load-average sampler is +// stopped via Close before returning. Callers who need their own signal +// handling or shutdown semantics should use Handler() and run their own +// http.Server instead. // // If the consumer's flag.Parse() set the SDK-registered -healthcheck flag, // ListenAndServe never starts the server: it probes /health on addr and calls @@ -184,8 +194,43 @@ func (s *Server) ListenAndServe(addr string) error { } os.Exit(0) } + + srv := &http.Server{Addr: addr, Handler: requestLogger(s.mux)} + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + shutdownErr := make(chan error, 1) + go func() { + sig, ok := <-sigCh + if !ok { + shutdownErr <- nil + return + } + log.Printf("checker received %s, shutting down (timeout %s)", sig, shutdownTimeout) + ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) + defer cancel() + shutdownErr <- srv.Shutdown(ctx) + }() + log.Printf("checker listening on %s", addr) - return http.ListenAndServe(addr, requestLogger(s.mux)) + err := srv.ListenAndServe() + signal.Stop(sigCh) + close(sigCh) + + if err == http.ErrServerClosed { + if sErr := <-shutdownErr; sErr != nil { + err = sErr + } else { + err = nil + } + } + + if cErr := s.Close(); cErr != nil && err == nil { + err = cErr + } + return err } // Close stops the background load-average sampler goroutine. It is safe to From c1de9aca1cab48e7fd97a02e6ee2f58e907441f6 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Wed, 29 Apr 2026 17:35:13 +0700 Subject: [PATCH 3/3] checker: add JoinRelative helper for service-relative owner names --- checker/names.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 checker/names.go diff --git a/checker/names.go b/checker/names.go new file mode 100644 index 0000000..61df52e --- /dev/null +++ b/checker/names.go @@ -0,0 +1,37 @@ +// Copyright 2020-2026 The happyDomain Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package checker + +import "strings" + +// JoinRelative treats name as relative to origin, as happyDomain encodes +// service-embedded record owners and subdomains. An empty or "@" name +// resolves to the origin itself; an empty origin returns the trimmed name +// unchanged. A name already suffixed by origin is returned as-is so that +// absolute encodings round-trip safely. Trailing dots are stripped. +func JoinRelative(name, origin string) string { + origin = strings.TrimSuffix(origin, ".") + name = strings.TrimSuffix(name, ".") + if origin == "" { + return name + } + if name == "" || name == "@" { + return origin + } + if name == origin || strings.HasSuffix(name, "."+origin) { + return name + } + return name + "." + origin +}