// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.

package checker

import (
	"context"
	"log"
	"sync"
	"time"
)

// rootCollector probes the target host on HTTP/80 and HTTPS/443 for every
// known IP, captures headers/cookies/redirects on each, and parses the
// HTML body of the first successful HTTPS probe (so SRI-style rules have
// something to evaluate). This is the original behaviour of Collect()
// before the Collector interface was introduced.
type rootCollector struct{}

func (rootCollector) Key() string { return ObservationKeyHTTP }

func (rootCollector) Collect(ctx context.Context, t Target) (any, error) {
	data := &HTTPData{
		Domain:      t.Host,
		CollectedAt: time.Now(),
	}

	type job struct {
		scheme string
		port   uint16
		ip     string
		// parseHTML controls whether the HTML body is parsed and its
		// references kept on the probe. Only the first HTTPS probe gets
		// it, to keep payload size bounded.
		parseHTML bool
	}

	var jobs []job
	htmlPicked := false
	for _, ip := range t.IPs {
		jobs = append(jobs, job{scheme: "http", port: DefaultHTTPPort, ip: ip})
		j := job{scheme: "https", port: DefaultHTTPSPort, ip: ip}
		if !htmlPicked {
			j.parseHTML = true
			htmlPicked = true
		}
		jobs = append(jobs, j)
	}

	var mu sync.Mutex
	var wg sync.WaitGroup
	sem := make(chan struct{}, MaxConcurrentProbes)
	for _, j := range jobs {
		wg.Add(1)
		sem <- struct{}{}
		go func(j job) {
			defer wg.Done()
			defer func() { <-sem }()
			probe := runProbe(ctx, t.Host, j.ip, j.scheme, j.port, t.Timeout, t.MaxRedirects, t.UserAgent, j.parseHTML)
			if verboseLogging {
				log.Printf("checker-http: %s ip=%s status=%d redirects=%d err=%q",
					j.scheme, j.ip, probe.StatusCode, len(probe.RedirectChain), probe.Error)
			}
			mu.Lock()
			data.Probes = append(data.Probes, probe)
			mu.Unlock()
		}(j)
	}
	wg.Wait()

	return data, nil
}