// This file is part of the happyDomain (R) project. // Copyright (c) 2020-2026 happyDomain // Authors: Pierre-Olivier Mercier, et al. package checker import ( "context" "crypto/tls" "fmt" "io" "net" "net/http" "net/url" ) // ObservationKeyWellKnown is the Extensions[] key under which // wellknownCollector publishes its observation. const ObservationKeyWellKnown = "wellknown" // WellKnownData captures whether each well-known URI returned a usable // document. It is intentionally narrow: per-URI presence and HTTP status // are enough for the current rule set; deeper parsing (e.g. PGP-signed // security.txt fields) is left to dedicated collectors when the need // arises. type WellKnownData struct { URIs map[string]WellKnownProbe `json:"uris"` } // WellKnownProbe is a single (URI → outcome) entry. type WellKnownProbe struct { URL string `json:"url"` StatusCode int `json:"status_code,omitempty"` Bytes int `json:"bytes,omitempty"` Error string `json:"error,omitempty"` } // wellknownCollector probes a small, fixed set of standardised URIs // served at the apex of the host. Today it covers: // // - /.well-known/security.txt (RFC 9116) — security disclosure contact // - /robots.txt (RFC 9309) — crawler directives // // It uses the first IP only because these documents are expected to be // host-uniform: there is nothing to learn from probing every backend. type wellknownCollector struct{} func (wellknownCollector) Key() string { return ObservationKeyWellKnown } func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) { if len(t.IPs) == 0 { return nil, fmt.Errorf("no IPs to probe") } addr := net.JoinHostPort(t.IPs[0], "443") dialer := &net.Dialer{Timeout: t.Timeout} transport := &http.Transport{ DialContext: func(ctx context.Context, network, _ string) (net.Conn, error) { return dialer.DialContext(ctx, network, addr) }, TLSClientConfig: &tls.Config{ServerName: t.Host}, TLSHandshakeTimeout: t.Timeout, ResponseHeaderTimeout: t.Timeout, DisableKeepAlives: true, } defer transport.CloseIdleConnections() client := &http.Client{Transport: transport} uris := []string{"/.well-known/security.txt", "/robots.txt"} out := WellKnownData{URIs: make(map[string]WellKnownProbe, len(uris))} for _, path := range uris { out.URIs[path] = fetchOne(ctx, client, t.Host, path, t.UserAgent) } return &out, nil } func fetchOne(ctx context.Context, client *http.Client, host, path, ua string) WellKnownProbe { u := (&url.URL{Scheme: "https", Host: host, Path: path}).String() probe := WellKnownProbe{URL: u} req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { probe.Error = err.Error() return probe } req.Header.Set("User-Agent", ua) resp, err := client.Do(req) if err != nil { probe.Error = err.Error() return probe } defer resp.Body.Close() probe.StatusCode = resp.StatusCode // Cap the read so a misconfigured server can't pull megabytes for a // "did this exist?" probe. body, _ := io.ReadAll(io.LimitReader(resp.Body, 64<<10)) probe.Bytes = len(body) return probe } func init() { RegisterCollector(wellknownCollector{}) }