checker-http/checker/collect_test.go

305 lines
9.6 KiB
Go

// This file is part of the happyDomain (R) project.
// Copyright (c) 2020-2026 happyDomain
// Authors: Pierre-Olivier Mercier, et al.
package checker
import (
"context"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"net/url"
"strconv"
"strings"
"testing"
"time"
"golang.org/x/net/html"
)
// splitHostPort parses an httptest server URL into ("ip", port).
func splitHostPort(t *testing.T, raw string) (string, uint16) {
t.Helper()
u, err := url.Parse(raw)
if err != nil {
t.Fatalf("parse %q: %v", raw, err)
}
host, portStr, err := net.SplitHostPort(u.Host)
if err != nil {
t.Fatalf("split host port %q: %v", u.Host, err)
}
p, err := strconv.ParseUint(portStr, 10, 16)
if err != nil {
t.Fatalf("port %q: %v", portStr, err)
}
return host, uint16(p)
}
func TestRunProbe_HTTPSuccess(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Frame-Options", "DENY")
w.Header().Set("Content-Type", "text/plain")
http.SetCookie(w, &http.Cookie{Name: "sid", Value: "v", Secure: true, HttpOnly: true, SameSite: http.SameSiteLaxMode})
_, _ = io.WriteString(w, "hello")
}))
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
probe := runProbe(context.Background(), ip /* host=ip so default Host header matches */, ip, "http", port, 2*time.Second, 0, "test-ua", false)
if probe.Error != "" {
t.Fatalf("unexpected error: %q", probe.Error)
}
if !probe.TCPConnected || probe.StatusCode != 200 {
t.Fatalf("unexpected probe result: %+v", probe)
}
if probe.Headers["x-frame-options"] != "DENY" {
t.Errorf("missing x-frame-options header: %+v", probe.Headers)
}
if len(probe.Cookies) != 1 || probe.Cookies[0].Name != "sid" || !probe.Cookies[0].Secure || !probe.Cookies[0].HttpOnly || probe.Cookies[0].SameSite != "Lax" {
t.Errorf("unexpected cookies: %+v", probe.Cookies)
}
if probe.IsIPv6 {
t.Errorf("IPv4 address mis-detected as IPv6")
}
if probe.Address != net.JoinHostPort(ip, fmt.Sprintf("%d", port)) {
t.Errorf("address: %q", probe.Address)
}
}
func TestRunProbe_TCPConnectionRefused(t *testing.T) {
// Pick a port we know nothing listens on by binding then immediately closing.
l, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("listen: %v", err)
}
_, portStr, _ := net.SplitHostPort(l.Addr().String())
p, _ := strconv.ParseUint(portStr, 10, 16)
_ = l.Close()
probe := runProbe(context.Background(), "127.0.0.1", "127.0.0.1", "http", uint16(p), 500*time.Millisecond, 0, "ua", false)
if probe.Error == "" {
t.Fatal("expected error from probing closed port")
}
if probe.TCPConnected {
t.Errorf("TCPConnected should be false on dial failure: %+v", probe)
}
if probe.StatusCode != 0 {
t.Errorf("StatusCode should be 0, got %d", probe.StatusCode)
}
}
func TestRunProbe_BodyTruncation(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/html")
// Write more than MaxBodyBytes.
buf := strings.Repeat("a", MaxBodyBytes+4096)
_, _ = io.WriteString(w, buf)
}))
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
probe := runProbe(context.Background(), ip, ip, "http", port, 5*time.Second, 0, "ua", true)
if !probe.BodyTruncated {
t.Errorf("expected BodyTruncated=true, got probe=%+v", probe)
}
if probe.HTMLBytes != MaxBodyBytes {
t.Errorf("HTMLBytes = %d, want %d", probe.HTMLBytes, MaxBodyBytes)
}
}
func TestRunProbe_RedirectFollowedSameHost(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/dst" {
w.WriteHeader(204)
return
}
http.Redirect(w, r, "/dst", http.StatusFound)
})
srv := httptest.NewServer(mux)
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
probe := runProbe(context.Background(), ip, ip, "http", port, 5*time.Second, 5, "ua", false)
if probe.StatusCode != 204 {
t.Errorf("status: got %d, want 204; chain=%+v err=%q", probe.StatusCode, probe.RedirectChain, probe.Error)
}
if len(probe.RedirectChain) != 1 {
t.Errorf("redirect chain length: got %d, want 1: %+v", len(probe.RedirectChain), probe.RedirectChain)
}
}
func TestRunProbe_RedirectStoppedCrossHost(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Redirect to a different host: the probe must NOT follow.
http.Redirect(w, r, "https://elsewhere.invalid/", http.StatusMovedPermanently)
}))
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
probe := runProbe(context.Background(), ip, ip, "http", port, 5*time.Second, 5, "ua", false)
if probe.StatusCode != http.StatusMovedPermanently {
t.Errorf("status: got %d, want 301", probe.StatusCode)
}
if len(probe.RedirectChain) != 1 {
t.Errorf("expected 1 recorded hop, got %d: %+v", len(probe.RedirectChain), probe.RedirectChain)
}
}
func TestRunProbe_HTMLResourceExtraction(t *testing.T) {
html := `<!doctype html><html><head>
<script src="https://cdn.example/lib.js" integrity="sha384-abc"></script>
<script src="/local.js"></script>
<link rel="stylesheet" href="https://cdn.example/style.css">
<link rel="icon" href="/favicon.ico">
</head><body></body></html>`
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = io.WriteString(w, html)
}))
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
probe := runProbe(context.Background(), ip, ip, "http", port, 5*time.Second, 0, "ua", true)
if probe.Error != "" {
t.Fatalf("error: %q", probe.Error)
}
if len(probe.Resources) != 3 {
// Expect: cdn script, local script, cdn stylesheet. The icon link is ignored (rel=icon).
t.Fatalf("got %d resources, want 3: %+v", len(probe.Resources), probe.Resources)
}
var cdnScript, localScript, stylesheet bool
for _, r := range probe.Resources {
switch r.URL {
case "https://cdn.example/lib.js":
cdnScript = true
if !r.CrossOrigin || r.Integrity != "sha384-abc" {
t.Errorf("cdn script: %+v", r)
}
case "/local.js":
localScript = true
if r.CrossOrigin {
t.Errorf("local script flagged as cross-origin")
}
case "https://cdn.example/style.css":
stylesheet = true
if !r.CrossOrigin || r.Integrity != "" {
t.Errorf("stylesheet: %+v", r)
}
}
}
if !cdnScript || !localScript || !stylesheet {
t.Errorf("missing expected resources: cdnScript=%v local=%v stylesheet=%v", cdnScript, localScript, stylesheet)
}
}
func TestRunProbe_NonHTMLContentTypeSkipsExtraction(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = io.WriteString(w, `<script src="/x.js"></script>`)
}))
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
probe := runProbe(context.Background(), ip, ip, "http", port, 5*time.Second, 0, "ua", true)
if len(probe.Resources) != 0 {
t.Errorf("non-HTML content-type should skip parsing, got %+v", probe.Resources)
}
}
func TestRunProbe_ContextCancelled(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
<-r.Context().Done()
}))
defer srv.Close()
ip, port := splitHostPort(t, srv.URL)
ctx, cancel := context.WithCancel(context.Background())
cancel()
probe := runProbe(ctx, ip, ip, "http", port, 5*time.Second, 0, "ua", false)
if probe.Error == "" {
t.Errorf("expected error when context is already cancelled, got probe=%+v", probe)
}
}
func TestIsHTMLContent(t *testing.T) {
yes := []string{"text/html", "TEXT/HTML; charset=utf-8", "application/xhtml+xml"}
no := []string{"", "application/json", "text/plain", "image/png"}
for _, ct := range yes {
if !isHTMLContent(ct) {
t.Errorf("isHTMLContent(%q) = false, want true", ct)
}
}
for _, ct := range no {
if isHTMLContent(ct) {
t.Errorf("isHTMLContent(%q) = true, want false", ct)
}
}
}
func TestRelIsAsset(t *testing.T) {
yes := []string{"stylesheet", "preload", "modulepreload", "STYLESHEET", "preload stylesheet"}
no := []string{"", "icon", "alternate", "canonical"}
for _, r := range yes {
if !relIsAsset(r) {
t.Errorf("relIsAsset(%q) = false, want true", r)
}
}
for _, r := range no {
if relIsAsset(r) {
t.Errorf("relIsAsset(%q) = true, want false", r)
}
}
}
func TestExtractResources_EmptyAndMalformed(t *testing.T) {
// The HTML parser is forgiving: even garbage produces no resources rather than panicking.
if got := extractResources([]byte(""), "h"); got != nil {
t.Errorf("empty body: got %+v, want nil", got)
}
if got := extractResources([]byte("<<<not really html>>>"), "h"); got != nil {
t.Errorf("garbage: got %+v, want nil", got)
}
}
func TestExtractResources_SkipsScriptWithoutSrc(t *testing.T) {
body := `<html><body><script>alert(1)</script><script src=""></script></body></html>`
if got := extractResources([]byte(body), "h"); len(got) != 0 {
t.Errorf("inline/empty-src scripts should not produce resources: %+v", got)
}
}
func TestAttrCaseInsensitive(t *testing.T) {
doc, err := html.Parse(strings.NewReader(`<a HREF="x" Integrity="i"></a>`))
if err != nil {
t.Fatal(err)
}
var found *html.Node
var walk func(*html.Node)
walk = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
found = n
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
}
walk(doc)
if found == nil {
t.Fatal("anchor not found")
}
if v, ok := attr(found, "href"); !ok || v != "x" {
t.Errorf("href: got (%q,%v)", v, ok)
}
if v, ok := attr(found, "INTEGRITY"); !ok || v != "i" {
t.Errorf("INTEGRITY: got (%q,%v)", v, ok)
}
if _, ok := attr(found, "missing"); ok {
t.Errorf("missing attr should return ok=false")
}
}