// This file is part of the happyDomain (R) project. // Copyright (c) 2020-2026 happyDomain // Authors: Pierre-Olivier Mercier, et al. package checker import ( "context" "log" "sync" "time" ) // rootCollector probes the target host on HTTP/80 and HTTPS/443 for every // known IP, captures headers/cookies/redirects on each, and parses the // HTML body of the first successful HTTPS probe (so SRI-style rules have // something to evaluate). This is the original behaviour of Collect() // before the Collector interface was introduced. type rootCollector struct{} func (rootCollector) Key() string { return ObservationKeyHTTP } func (rootCollector) Collect(ctx context.Context, t Target) (any, error) { data := &HTTPData{ Domain: t.Host, CollectedAt: time.Now(), } type job struct { scheme string port uint16 ip string // parseHTML controls whether the HTML body is parsed and its // references kept on the probe. Only the first HTTPS probe gets // it, to keep payload size bounded. parseHTML bool } var jobs []job htmlPicked := false for _, ip := range t.IPs { jobs = append(jobs, job{scheme: "http", port: DefaultHTTPPort, ip: ip}) j := job{scheme: "https", port: DefaultHTTPSPort, ip: ip} if !htmlPicked { j.parseHTML = true htmlPicked = true } jobs = append(jobs, j) } var mu sync.Mutex var wg sync.WaitGroup sem := make(chan struct{}, MaxConcurrentProbes) for _, j := range jobs { wg.Add(1) sem <- struct{}{} go func(j job) { defer wg.Done() defer func() { <-sem }() probe := runProbe(ctx, t.Host, j.ip, j.scheme, j.port, t.Timeout, t.MaxRedirects, t.UserAgent, j.parseHTML) if verboseLogging { log.Printf("checker-http: %s ip=%s status=%d redirects=%d err=%q", j.scheme, j.ip, probe.StatusCode, len(probe.RedirectChain), probe.Error) } mu.Lock() data.Probes = append(data.Probes, probe) mu.Unlock() }(j) } wg.Wait() return data, nil }