// This file is part of the happyDomain (R) project. // Copyright (c) 2020-2026 happyDomain // Authors: Pierre-Olivier Mercier, et al. package checker import ( "context" "crypto/tls" "io" "net" "net/http" "net/url" "time" ) // Target captures everything a Collector needs to probe one logical host. // It is built once by the orchestrator from CheckerOptions and passed to // every Collector, so individual collectors don't have to re-parse options // or re-resolve IPs. type Target struct { Host string IPs []string Timeout time.Duration MaxRedirects int UserAgent string } // Collector contributes a typed observation about a Target. Each collector // owns one slice of the work (root probe, well-known endpoints, CORS // preflight, etc.) and writes its result under Key() in the final // payload's Extensions map. // // The current orchestrator wires only the root collector and writes its // result directly under ObservationKeyHTTP for backward compatibility. // Additional collectors are introduced in step 4; they will populate // HTTPData.Extensions[Key()] without disturbing existing rules. type Collector interface { Key() string Collect(ctx context.Context, t Target) (any, error) } // PathProbe is the common result of a single HTTPS path probe. It is // embedded by collector-specific probe types that may add extra fields // (e.g. HoneypotProbe adds Critical). type PathProbe struct { URL string `json:"url"` StatusCode int `json:"status_code,omitempty"` Bytes int `json:"bytes,omitempty"` ContentType string `json:"content_type,omitempty"` Error string `json:"error,omitempty"` } // fetchHTTPSPath issues a single GET against the given path using client, // reads up to limit bytes (just to measure size), and returns a PathProbe. // Callers that need the body itself should use fetchHTTPSPathBody. func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe { probe, _ := fetchHTTPSPathBody(ctx, client, host, path, ua, limit) return probe } // fetchHTTPSPathBody is fetchHTTPSPath but also returns the response body, // truncated to limit bytes. Probe.Bytes equals len(body). func fetchHTTPSPathBody(ctx context.Context, client *http.Client, host, path, ua string, limit int64) (PathProbe, []byte) { u := (&url.URL{Scheme: "https", Host: host, Path: path}).String() probe := PathProbe{URL: u} req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { probe.Error = err.Error() return probe, nil } req.Header.Set("User-Agent", ua) resp, err := client.Do(req) if err != nil { probe.Error = err.Error() return probe, nil } defer resp.Body.Close() probe.StatusCode = resp.StatusCode probe.ContentType = resp.Header.Get("Content-Type") body, err := io.ReadAll(io.LimitReader(resp.Body, limit)) probe.Bytes = len(body) // A read error here (e.g. connection reset mid-body) means body is // truncated and any counts derived from it are unreliable; record it so // callers don't treat the partial body as a complete response. if err != nil { probe.Error = err.Error() } return probe, body } // newPinnedHTTPSTransport returns an http.Transport that dials every request // to ip:443 and presents host as the TLS ServerName. The caller must defer // the returned cleanup func to drain idle connections. func newPinnedHTTPSTransport(ip, host string, timeout time.Duration) (*http.Transport, func()) { addr := net.JoinHostPort(ip, "443") dialer := &net.Dialer{Timeout: timeout} t := &http.Transport{ DialContext: func(ctx context.Context, network, _ string) (net.Conn, error) { return dialer.DialContext(ctx, network, addr) }, TLSClientConfig: &tls.Config{ServerName: host}, TLSHandshakeTimeout: timeout, ResponseHeaderTimeout: timeout, DisableKeepAlives: true, } return t, t.CloseIdleConnections }