checker-http/checker/collector.go
Pierre-Olivier Mercier ffa3fbe1f9 checker: validate security.txt is a real RFC 9116 file
The http.security_txt rule reported OK for any 200 response with a
non-empty body, so a soft-404 (status 200 + HTML body) served for
/.well-known/security.txt was misread as "published".

Capture the response Content-Type and count the RFC 9116 required
fields (Contact, Expires) in the body. OK now requires text/plain with
at least one Contact and exactly one Expires; a non-conforming 200
yields a new Warn http.security_txt.invalid explaining the defect.
Redirects are still followed and the final response is validated, per
RFC 9116 §3.
2026-06-14 18:14:00 +09:00

108 lines
3.8 KiB
Go

// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
package checker
import (
"context"
"crypto/tls"
"io"
"net"
"net/http"
"net/url"
"time"
)
// Target captures everything a Collector needs to probe one logical host.
// It is built once by the orchestrator from CheckerOptions and passed to
// every Collector, so individual collectors don't have to re-parse options
// or re-resolve IPs.
type Target struct {
Host string
IPs []string
Timeout time.Duration
MaxRedirects int
UserAgent string
}
// Collector contributes a typed observation about a Target. Each collector
// owns one slice of the work (root probe, well-known endpoints, CORS
// preflight, etc.) and writes its result under Key() in the final
// payload's Extensions map.
//
// The current orchestrator wires only the root collector and writes its
// result directly under ObservationKeyHTTP for backward compatibility.
// Additional collectors are introduced in step 4; they will populate
// HTTPData.Extensions[Key()] without disturbing existing rules.
type Collector interface {
Key() string
Collect(ctx context.Context, t Target) (any, error)
}
// PathProbe is the common result of a single HTTPS path probe. It is
// embedded by collector-specific probe types that may add extra fields
// (e.g. HoneypotProbe adds Critical).
type PathProbe struct {
URL string `json:"url"`
StatusCode int `json:"status_code,omitempty"`
Bytes int `json:"bytes,omitempty"`
ContentType string `json:"content_type,omitempty"`
Error string `json:"error,omitempty"`
}
// fetchHTTPSPath issues a single GET against the given path using client,
// reads up to limit bytes (just to measure size), and returns a PathProbe.
// Callers that need the body itself should use fetchHTTPSPathBody.
func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe {
probe, _ := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
return probe
}
// fetchHTTPSPathBody is fetchHTTPSPath but also returns the response body,
// truncated to limit bytes. Probe.Bytes equals len(body).
func fetchHTTPSPathBody(ctx context.Context, client *http.Client, host, path, ua string, limit int64) (PathProbe, []byte) {
u := (&url.URL{Scheme: "https", Host: host, Path: path}).String()
probe := PathProbe{URL: u}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil {
probe.Error = err.Error()
return probe, nil
}
req.Header.Set("User-Agent", ua)
resp, err := client.Do(req)
if err != nil {
probe.Error = err.Error()
return probe, nil
}
defer resp.Body.Close()
probe.StatusCode = resp.StatusCode
probe.ContentType = resp.Header.Get("Content-Type")
body, err := io.ReadAll(io.LimitReader(resp.Body, limit))
probe.Bytes = len(body)
// A read error here (e.g. connection reset mid-body) means body is
// truncated and any counts derived from it are unreliable; record it so
// callers don't treat the partial body as a complete response.
if err != nil {
probe.Error = err.Error()
}
return probe, body
}
// newPinnedHTTPSTransport returns an http.Transport that dials every request
// to ip:443 and presents host as the TLS ServerName. The caller must defer
// the returned cleanup func to drain idle connections.
func newPinnedHTTPSTransport(ip, host string, timeout time.Duration) (*http.Transport, func()) {
addr := net.JoinHostPort(ip, "443")
dialer := &net.Dialer{Timeout: timeout}
t := &http.Transport{
DialContext: func(ctx context.Context, network, _ string) (net.Conn, error) {
return dialer.DialContext(ctx, network, addr)
},
TLSClientConfig: &tls.Config{ServerName: host},
TLSHandshakeTimeout: timeout,
ResponseHeaderTimeout: timeout,
DisableKeepAlives: true,
}
return t, t.CloseIdleConnections
}