checker: validate security.txt is a real RFC 9116 file

The http.security_txt rule reported OK for any 200 response with a
non-empty body, so a soft-404 (status 200 + HTML body) served for
/.well-known/security.txt was misread as "published".

Capture the response Content-Type and count the RFC 9116 required
fields (Contact, Expires) in the body. OK now requires text/plain with
at least one Contact and exactly one Expires; a non-conforming 200
yields a new Warn http.security_txt.invalid explaining the defect.
Redirects are still followed and the final response is validated, per
RFC 9116 §3.
This commit is contained in:
nemunaire 2026-06-14 12:56:43 +09:00
commit ffa3fbe1f9
4 changed files with 281 additions and 22 deletions

View file

@ -44,33 +44,49 @@ type Collector interface {
// embedded by collector-specific probe types that may add extra fields
// (e.g. HoneypotProbe adds Critical).
type PathProbe struct {
URL string `json:"url"`
StatusCode int `json:"status_code,omitempty"`
Bytes int `json:"bytes,omitempty"`
Error string `json:"error,omitempty"`
URL string `json:"url"`
StatusCode int `json:"status_code,omitempty"`
Bytes int `json:"bytes,omitempty"`
ContentType string `json:"content_type,omitempty"`
Error string `json:"error,omitempty"`
}
// fetchHTTPSPath issues a single GET against the given path using client,
// reads up to limit bytes (just to measure size), and returns a PathProbe.
// Callers that need the body itself should use fetchHTTPSPathBody.
func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe {
probe, _ := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
return probe
}
// fetchHTTPSPathBody is fetchHTTPSPath but also returns the response body,
// truncated to limit bytes. Probe.Bytes equals len(body).
func fetchHTTPSPathBody(ctx context.Context, client *http.Client, host, path, ua string, limit int64) (PathProbe, []byte) {
u := (&url.URL{Scheme: "https", Host: host, Path: path}).String()
probe := PathProbe{URL: u}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil {
probe.Error = err.Error()
return probe
return probe, nil
}
req.Header.Set("User-Agent", ua)
resp, err := client.Do(req)
if err != nil {
probe.Error = err.Error()
return probe
return probe, nil
}
defer resp.Body.Close()
probe.StatusCode = resp.StatusCode
n, _ := io.Copy(io.Discard, io.LimitReader(resp.Body, limit))
probe.Bytes = int(n)
return probe
probe.ContentType = resp.Header.Get("Content-Type")
body, err := io.ReadAll(io.LimitReader(resp.Body, limit))
probe.Bytes = len(body)
// A read error here (e.g. connection reset mid-body) means body is
// truncated and any counts derived from it are unreliable; record it so
// callers don't treat the partial body as a complete response.
if err != nil {
probe.Error = err.Error()
}
return probe, body
}
// newPinnedHTTPSTransport returns an http.Transport that dials every request