// This file is part of the happyDomain (R) project. // Copyright (c) 2020-2026 happyDomain // Authors: Pierre-Olivier Mercier, et al. package checker import ( "context" "fmt" "net/http" "strings" ) // ObservationKeyWellKnown is the Extensions[] key under which // wellknownCollector publishes its observation. const ObservationKeyWellKnown = "wellknown" // WellKnownData captures whether each well-known URI returned a usable // document. It is intentionally narrow: per-URI presence and HTTP status // are enough for the current rule set; deeper parsing (e.g. PGP-signed // security.txt fields) is left to dedicated collectors when the need // arises. type WellKnownData struct { URIs map[string]WellKnownProbe `json:"uris"` } // WellKnownProbe is a single (URI โ†’ outcome) entry. It embeds the generic // PathProbe and adds the few security.txt signals the rule needs to decide // whether the response is an actual RFC 9116 file rather than, say, a soft-404 // HTML page. ContactCount/ExpiresCount are only populated for security.txt. type WellKnownProbe struct { PathProbe ContactCount int `json:"contact_count,omitempty"` ExpiresCount int `json:"expires_count,omitempty"` } // wellknownCollector probes a small, fixed set of standardised URIs // served at the apex of the host. Today it covers: // // - /.well-known/security.txt (RFC 9116) โ€” security disclosure contact // - /robots.txt (RFC 9309) โ€” crawler directives // // It uses the first IP only because these documents are expected to be // host-uniform: there is nothing to learn from probing every backend. type wellknownCollector struct{} func (wellknownCollector) Key() string { return ObservationKeyWellKnown } func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) { if len(t.IPs) == 0 { return nil, fmt.Errorf("no IPs to probe") } transport, cleanup := newPinnedHTTPSTransport(t.IPs[0], t.Host, t.Timeout) defer cleanup() client := &http.Client{Transport: transport} out := WellKnownData{URIs: make(map[string]WellKnownProbe, 2)} // robots.txt: presence and status are all the (future) rule needs. out.URIs["/robots.txt"] = WellKnownProbe{ PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10), } // security.txt: read the body so the rule can tell a genuine RFC 9116 // file from a soft-404 page that merely returns 200. out.URIs["/.well-known/security.txt"] = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10) return &out, nil } // fetchSecurityTxt fetches path, captures the generic probe fields, and counts // the RFC 9116 required fields (Contact, Expires) found in the body. func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe { probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit) out := WellKnownProbe{PathProbe: probe} out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body) return out } // countSecurityTxtFields counts occurrences of the Contact and Expires fields // in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines // beginning with "#" (comments) are ignored, and field names are // case-insensitive (RFC 9116 ยง2.4). PGP signature blocks are not parsed. func countSecurityTxtFields(body []byte) (contacts, expires int) { for raw := range strings.Lines(string(body)) { line := strings.TrimSpace(raw) if line == "" || strings.HasPrefix(line, "#") { continue } name, _, ok := strings.Cut(line, ":") if !ok { continue } switch strings.ToLower(strings.TrimSpace(name)) { case "contact": contacts++ case "expires": expires++ } } return contacts, expires } func init() { RegisterCollector(wellknownCollector{}) }