99 lines
3.1 KiB
Go
99 lines
3.1 KiB
Go
// This file is part of the happyDomain (R) project.
|
|
// Copyright (c) 2020-2026 happyDomain
|
|
// Authors: Pierre-Olivier Mercier, et al.
|
|
|
|
package checker
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
)
|
|
|
|
// ObservationKeyWellKnown is the Extensions[] key under which
|
|
// wellknownCollector publishes its observation.
|
|
const ObservationKeyWellKnown = "wellknown"
|
|
|
|
// WellKnownData captures whether each well-known URI returned a usable
|
|
// document. It is intentionally narrow: per-URI presence and HTTP status
|
|
// are enough for the current rule set; deeper parsing (e.g. PGP-signed
|
|
// security.txt fields) is left to dedicated collectors when the need
|
|
// arises.
|
|
type WellKnownData struct {
|
|
URIs map[string]WellKnownProbe `json:"uris"`
|
|
}
|
|
|
|
// WellKnownProbe is a single (URI → outcome) entry.
|
|
type WellKnownProbe struct {
|
|
URL string `json:"url"`
|
|
StatusCode int `json:"status_code,omitempty"`
|
|
Bytes int `json:"bytes,omitempty"`
|
|
Error string `json:"error,omitempty"`
|
|
}
|
|
|
|
// wellknownCollector probes a small, fixed set of standardised URIs
|
|
// served at the apex of the host. Today it covers:
|
|
//
|
|
// - /.well-known/security.txt (RFC 9116) — security disclosure contact
|
|
// - /robots.txt (RFC 9309) — crawler directives
|
|
//
|
|
// It uses the first IP only because these documents are expected to be
|
|
// host-uniform: there is nothing to learn from probing every backend.
|
|
type wellknownCollector struct{}
|
|
|
|
func (wellknownCollector) Key() string { return ObservationKeyWellKnown }
|
|
|
|
func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) {
|
|
if len(t.IPs) == 0 {
|
|
return nil, fmt.Errorf("no IPs to probe")
|
|
}
|
|
addr := net.JoinHostPort(t.IPs[0], "443")
|
|
dialer := &net.Dialer{Timeout: t.Timeout}
|
|
transport := &http.Transport{
|
|
DialContext: func(ctx context.Context, network, _ string) (net.Conn, error) {
|
|
return dialer.DialContext(ctx, network, addr)
|
|
},
|
|
TLSClientConfig: &tls.Config{ServerName: t.Host},
|
|
TLSHandshakeTimeout: t.Timeout,
|
|
ResponseHeaderTimeout: t.Timeout,
|
|
DisableKeepAlives: true,
|
|
}
|
|
defer transport.CloseIdleConnections()
|
|
client := &http.Client{Transport: transport}
|
|
|
|
uris := []string{"/.well-known/security.txt", "/robots.txt"}
|
|
out := WellKnownData{URIs: make(map[string]WellKnownProbe, len(uris))}
|
|
for _, path := range uris {
|
|
out.URIs[path] = fetchOne(ctx, client, t.Host, path, t.UserAgent)
|
|
}
|
|
return &out, nil
|
|
}
|
|
|
|
func fetchOne(ctx context.Context, client *http.Client, host, path, ua string) WellKnownProbe {
|
|
u := (&url.URL{Scheme: "https", Host: host, Path: path}).String()
|
|
probe := WellKnownProbe{URL: u}
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
|
if err != nil {
|
|
probe.Error = err.Error()
|
|
return probe
|
|
}
|
|
req.Header.Set("User-Agent", ua)
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
probe.Error = err.Error()
|
|
return probe
|
|
}
|
|
defer resp.Body.Close()
|
|
probe.StatusCode = resp.StatusCode
|
|
// Cap the read so a misconfigured server can't pull megabytes for a
|
|
// "did this exist?" probe.
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 64<<10))
|
|
probe.Bytes = len(body)
|
|
return probe
|
|
}
|
|
|
|
func init() { RegisterCollector(wellknownCollector{}) }
|