checker-http/checker/collector_wellknown.go
Pierre-Olivier Mercier ffa3fbe1f9 checker: validate security.txt is a real RFC 9116 file
The http.security_txt rule reported OK for any 200 response with a
non-empty body, so a soft-404 (status 200 + HTML body) served for
/.well-known/security.txt was misread as "published".

Capture the response Content-Type and count the RFC 9116 required
fields (Contact, Expires) in the body. OK now requires text/plain with
at least one Contact and exactly one Expires; a non-conforming 200
yields a new Warn http.security_txt.invalid explaining the defect.
Redirects are still followed and the final response is validated, per
RFC 9116 §3.
2026-06-14 18:14:00 +09:00

104 lines
3.7 KiB
Go

// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
package checker
import (
"context"
"fmt"
"net/http"
"strings"
)
// ObservationKeyWellKnown is the Extensions[] key under which
// wellknownCollector publishes its observation.
const ObservationKeyWellKnown = "wellknown"
// WellKnownData captures whether each well-known URI returned a usable
// document. It is intentionally narrow: per-URI presence and HTTP status
// are enough for the current rule set; deeper parsing (e.g. PGP-signed
// security.txt fields) is left to dedicated collectors when the need
// arises.
type WellKnownData struct {
URIs map[string]WellKnownProbe `json:"uris"`
}
// WellKnownProbe is a single (URI → outcome) entry. It embeds the generic
// PathProbe and adds the few security.txt signals the rule needs to decide
// whether the response is an actual RFC 9116 file rather than, say, a soft-404
// HTML page. ContactCount/ExpiresCount are only populated for security.txt.
type WellKnownProbe struct {
PathProbe
ContactCount int `json:"contact_count,omitempty"`
ExpiresCount int `json:"expires_count,omitempty"`
}
// wellknownCollector probes a small, fixed set of standardised URIs
// served at the apex of the host. Today it covers:
//
// - /.well-known/security.txt (RFC 9116) — security disclosure contact
// - /robots.txt (RFC 9309) — crawler directives
//
// It uses the first IP only because these documents are expected to be
// host-uniform: there is nothing to learn from probing every backend.
type wellknownCollector struct{}
func (wellknownCollector) Key() string { return ObservationKeyWellKnown }
func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) {
if len(t.IPs) == 0 {
return nil, fmt.Errorf("no IPs to probe")
}
transport, cleanup := newPinnedHTTPSTransport(t.IPs[0], t.Host, t.Timeout)
defer cleanup()
client := &http.Client{Transport: transport}
out := WellKnownData{URIs: make(map[string]WellKnownProbe, 2)}
// robots.txt: presence and status are all the (future) rule needs.
out.URIs["/robots.txt"] = WellKnownProbe{
PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10),
}
// security.txt: read the body so the rule can tell a genuine RFC 9116
// file from a soft-404 page that merely returns 200.
out.URIs["/.well-known/security.txt"] = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10)
return &out, nil
}
// fetchSecurityTxt fetches path, captures the generic probe fields, and counts
// the RFC 9116 required fields (Contact, Expires) found in the body.
func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe {
probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
out := WellKnownProbe{PathProbe: probe}
out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body)
return out
}
// countSecurityTxtFields counts occurrences of the Contact and Expires fields
// in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines
// beginning with "#" (comments) are ignored, and field names are
// case-insensitive (RFC 9116 §2.4). PGP signature blocks are not parsed.
func countSecurityTxtFields(body []byte) (contacts, expires int) {
for raw := range strings.Lines(string(body)) {
line := strings.TrimSpace(raw)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
name, _, ok := strings.Cut(line, ":")
if !ok {
continue
}
switch strings.ToLower(strings.TrimSpace(name)) {
case "contact":
contacts++
case "expires":
expires++
}
}
return contacts, expires
}
func init() { RegisterCollector(wellknownCollector{}) }