All checks were successful
continuous-integration/drone/push Build is passing
robots.txt and security.txt are independent requests to the same host; fetch them in parallel so the collector's latency is the slower of the two rather than their sum (each pays its own TLS handshake under the keep-alive-disabled transport). Each goroutine writes its own variable and the map is assembled after both finish, so no locking is needed.
120 lines
4.1 KiB
Go
120 lines
4.1 KiB
Go
// This file is part of the happyDomain (R) project.
|
|
// Copyright (c) 2020-2026 happyDomain
|
|
// Authors: Pierre-Olivier Mercier, et al.
|
|
|
|
package checker
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
// ObservationKeyWellKnown is the Extensions[] key under which
|
|
// wellknownCollector publishes its observation.
|
|
const ObservationKeyWellKnown = "wellknown"
|
|
|
|
// WellKnownData captures whether each well-known URI returned a usable
|
|
// document. It is intentionally narrow: per-URI presence and HTTP status
|
|
// are enough for the current rule set; deeper parsing (e.g. PGP-signed
|
|
// security.txt fields) is left to dedicated collectors when the need
|
|
// arises.
|
|
type WellKnownData struct {
|
|
URIs map[string]WellKnownProbe `json:"uris"`
|
|
}
|
|
|
|
// WellKnownProbe is a single (URI → outcome) entry. It embeds the generic
|
|
// PathProbe and adds the few security.txt signals the rule needs to decide
|
|
// whether the response is an actual RFC 9116 file rather than, say, a soft-404
|
|
// HTML page. ContactCount/ExpiresCount are only populated for security.txt.
|
|
type WellKnownProbe struct {
|
|
PathProbe
|
|
ContactCount int `json:"contact_count,omitempty"`
|
|
ExpiresCount int `json:"expires_count,omitempty"`
|
|
}
|
|
|
|
// wellknownCollector probes a small, fixed set of standardised URIs
|
|
// served at the apex of the host. Today it covers:
|
|
//
|
|
// - /.well-known/security.txt (RFC 9116) — security disclosure contact
|
|
// - /robots.txt (RFC 9309) — crawler directives
|
|
//
|
|
// It uses the first IP only because these documents are expected to be
|
|
// host-uniform: there is nothing to learn from probing every backend.
|
|
type wellknownCollector struct{}
|
|
|
|
func (wellknownCollector) Key() string { return ObservationKeyWellKnown }
|
|
|
|
func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) {
|
|
if len(t.IPs) == 0 {
|
|
return nil, fmt.Errorf("no IPs to probe")
|
|
}
|
|
transport, cleanup := newPinnedHTTPSTransport(t.IPs[0], t.Host, t.Timeout)
|
|
defer cleanup()
|
|
client := &http.Client{Transport: transport}
|
|
|
|
// The two URIs are independent requests to the same host, so probe them
|
|
// concurrently. Each goroutine writes its own variable; the map is
|
|
// assembled after both finish, so no locking is needed.
|
|
var robots, securityTxt WellKnownProbe
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
// robots.txt: presence and status are all the (future) rule needs.
|
|
go func() {
|
|
defer wg.Done()
|
|
robots = WellKnownProbe{
|
|
PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10),
|
|
}
|
|
}()
|
|
// security.txt: read the body so the rule can tell a genuine RFC 9116
|
|
// file from a soft-404 page that merely returns 200.
|
|
go func() {
|
|
defer wg.Done()
|
|
securityTxt = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10)
|
|
}()
|
|
wg.Wait()
|
|
|
|
out := WellKnownData{URIs: map[string]WellKnownProbe{
|
|
"/robots.txt": robots,
|
|
"/.well-known/security.txt": securityTxt,
|
|
}}
|
|
|
|
return &out, nil
|
|
}
|
|
|
|
// fetchSecurityTxt fetches path, captures the generic probe fields, and counts
|
|
// the RFC 9116 required fields (Contact, Expires) found in the body.
|
|
func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe {
|
|
probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
|
|
out := WellKnownProbe{PathProbe: probe}
|
|
out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body)
|
|
return out
|
|
}
|
|
|
|
// countSecurityTxtFields counts occurrences of the Contact and Expires fields
|
|
// in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines
|
|
// beginning with "#" (comments) are ignored, and field names are
|
|
// case-insensitive (RFC 9116 §2.4). PGP signature blocks are not parsed.
|
|
func countSecurityTxtFields(body []byte) (contacts, expires int) {
|
|
for raw := range strings.Lines(string(body)) {
|
|
line := strings.TrimSpace(raw)
|
|
if line == "" || strings.HasPrefix(line, "#") {
|
|
continue
|
|
}
|
|
name, _, ok := strings.Cut(line, ":")
|
|
if !ok {
|
|
continue
|
|
}
|
|
switch strings.ToLower(strings.TrimSpace(name)) {
|
|
case "contact":
|
|
contacts++
|
|
case "expires":
|
|
expires++
|
|
}
|
|
}
|
|
return contacts, expires
|
|
}
|
|
|
|
func init() { RegisterCollector(wellknownCollector{}) }
|