checker-http/checker/collector_wellknown.go

99 lines
3.1 KiB
Go

// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
package checker
import (
"context"
"crypto/tls"
"fmt"
"io"
"net"
"net/http"
"net/url"
)
// ObservationKeyWellKnown is the Extensions[] key under which
// wellknownCollector publishes its observation.
const ObservationKeyWellKnown = "wellknown"
// WellKnownData captures whether each well-known URI returned a usable
// document. It is intentionally narrow: per-URI presence and HTTP status
// are enough for the current rule set; deeper parsing (e.g. PGP-signed
// security.txt fields) is left to dedicated collectors when the need
// arises.
type WellKnownData struct {
URIs map[string]WellKnownProbe `json:"uris"`
}
// WellKnownProbe is a single (URI → outcome) entry.
type WellKnownProbe struct {
URL string `json:"url"`
StatusCode int `json:"status_code,omitempty"`
Bytes int `json:"bytes,omitempty"`
Error string `json:"error,omitempty"`
}
// wellknownCollector probes a small, fixed set of standardised URIs
// served at the apex of the host. Today it covers:
//
// - /.well-known/security.txt (RFC 9116) — security disclosure contact
// - /robots.txt (RFC 9309) — crawler directives
//
// It uses the first IP only because these documents are expected to be
// host-uniform: there is nothing to learn from probing every backend.
type wellknownCollector struct{}
func (wellknownCollector) Key() string { return ObservationKeyWellKnown }
func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) {
if len(t.IPs) == 0 {
return nil, fmt.Errorf("no IPs to probe")
}
addr := net.JoinHostPort(t.IPs[0], "443")
dialer := &net.Dialer{Timeout: t.Timeout}
transport := &http.Transport{
DialContext: func(ctx context.Context, network, _ string) (net.Conn, error) {
return dialer.DialContext(ctx, network, addr)
},
TLSClientConfig: &tls.Config{ServerName: t.Host},
TLSHandshakeTimeout: t.Timeout,
ResponseHeaderTimeout: t.Timeout,
DisableKeepAlives: true,
}
defer transport.CloseIdleConnections()
client := &http.Client{Transport: transport}
uris := []string{"/.well-known/security.txt", "/robots.txt"}
out := WellKnownData{URIs: make(map[string]WellKnownProbe, len(uris))}
for _, path := range uris {
out.URIs[path] = fetchOne(ctx, client, t.Host, path, t.UserAgent)
}
return &out, nil
}
func fetchOne(ctx context.Context, client *http.Client, host, path, ua string) WellKnownProbe {
u := (&url.URL{Scheme: "https", Host: host, Path: path}).String()
probe := WellKnownProbe{URL: u}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil {
probe.Error = err.Error()
return probe
}
req.Header.Set("User-Agent", ua)
resp, err := client.Do(req)
if err != nil {
probe.Error = err.Error()
return probe
}
defer resp.Body.Close()
probe.StatusCode = resp.StatusCode
// Cap the read so a misconfigured server can't pull megabytes for a
// "did this exist?" probe.
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64<<10))
probe.Bytes = len(body)
return probe
}
func init() { RegisterCollector(wellknownCollector{}) }