The http.security_txt rule reported OK for any 200 response with a non-empty body, so a soft-404 (status 200 + HTML body) served for /.well-known/security.txt was misread as "published". Capture the response Content-Type and count the RFC 9116 required fields (Contact, Expires) in the body. OK now requires text/plain with at least one Contact and exactly one Expires; a non-conforming 200 yields a new Warn http.security_txt.invalid explaining the defect. Redirects are still followed and the final response is validated, per RFC 9116 §3.
104 lines
3.7 KiB
Go
104 lines
3.7 KiB
Go
// This file is part of the happyDomain (R) project.
|
|
// Copyright (c) 2020-2026 happyDomain
|
|
// Authors: Pierre-Olivier Mercier, et al.
|
|
|
|
package checker
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
)
|
|
|
|
// ObservationKeyWellKnown is the Extensions[] key under which
|
|
// wellknownCollector publishes its observation.
|
|
const ObservationKeyWellKnown = "wellknown"
|
|
|
|
// WellKnownData captures whether each well-known URI returned a usable
|
|
// document. It is intentionally narrow: per-URI presence and HTTP status
|
|
// are enough for the current rule set; deeper parsing (e.g. PGP-signed
|
|
// security.txt fields) is left to dedicated collectors when the need
|
|
// arises.
|
|
type WellKnownData struct {
|
|
URIs map[string]WellKnownProbe `json:"uris"`
|
|
}
|
|
|
|
// WellKnownProbe is a single (URI → outcome) entry. It embeds the generic
|
|
// PathProbe and adds the few security.txt signals the rule needs to decide
|
|
// whether the response is an actual RFC 9116 file rather than, say, a soft-404
|
|
// HTML page. ContactCount/ExpiresCount are only populated for security.txt.
|
|
type WellKnownProbe struct {
|
|
PathProbe
|
|
ContactCount int `json:"contact_count,omitempty"`
|
|
ExpiresCount int `json:"expires_count,omitempty"`
|
|
}
|
|
|
|
// wellknownCollector probes a small, fixed set of standardised URIs
|
|
// served at the apex of the host. Today it covers:
|
|
//
|
|
// - /.well-known/security.txt (RFC 9116) — security disclosure contact
|
|
// - /robots.txt (RFC 9309) — crawler directives
|
|
//
|
|
// It uses the first IP only because these documents are expected to be
|
|
// host-uniform: there is nothing to learn from probing every backend.
|
|
type wellknownCollector struct{}
|
|
|
|
func (wellknownCollector) Key() string { return ObservationKeyWellKnown }
|
|
|
|
func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) {
|
|
if len(t.IPs) == 0 {
|
|
return nil, fmt.Errorf("no IPs to probe")
|
|
}
|
|
transport, cleanup := newPinnedHTTPSTransport(t.IPs[0], t.Host, t.Timeout)
|
|
defer cleanup()
|
|
client := &http.Client{Transport: transport}
|
|
|
|
out := WellKnownData{URIs: make(map[string]WellKnownProbe, 2)}
|
|
|
|
// robots.txt: presence and status are all the (future) rule needs.
|
|
out.URIs["/robots.txt"] = WellKnownProbe{
|
|
PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10),
|
|
}
|
|
|
|
// security.txt: read the body so the rule can tell a genuine RFC 9116
|
|
// file from a soft-404 page that merely returns 200.
|
|
out.URIs["/.well-known/security.txt"] = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10)
|
|
|
|
return &out, nil
|
|
}
|
|
|
|
// fetchSecurityTxt fetches path, captures the generic probe fields, and counts
|
|
// the RFC 9116 required fields (Contact, Expires) found in the body.
|
|
func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe {
|
|
probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
|
|
out := WellKnownProbe{PathProbe: probe}
|
|
out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body)
|
|
return out
|
|
}
|
|
|
|
// countSecurityTxtFields counts occurrences of the Contact and Expires fields
|
|
// in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines
|
|
// beginning with "#" (comments) are ignored, and field names are
|
|
// case-insensitive (RFC 9116 §2.4). PGP signature blocks are not parsed.
|
|
func countSecurityTxtFields(body []byte) (contacts, expires int) {
|
|
for raw := range strings.Lines(string(body)) {
|
|
line := strings.TrimSpace(raw)
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|
continue
|
|
}
|
|
name, _, ok := strings.Cut(line, ":")
|
|
if !ok {
|
|
continue
|
|
}
|
|
switch strings.ToLower(strings.TrimSpace(name)) {
|
|
case "contact":
|
|
contacts++
|
|
case "expires":
|
|
expires++
|
|
}
|
|
}
|
|
return contacts, expires
|
|
}
|
|
|
|
func init() { RegisterCollector(wellknownCollector{}) }
|