checker: validate security.txt is a real RFC 9116 file

The http.security_txt rule reported OK for any 200 response with a
non-empty body, so a soft-404 (status 200 + HTML body) served for
/.well-known/security.txt was misread as "published".

Capture the response Content-Type and count the RFC 9116 required
fields (Contact, Expires) in the body. OK now requires text/plain with
at least one Contact and exactly one Expires; a non-conforming 200
yields a new Warn http.security_txt.invalid explaining the defect.
Redirects are still followed and the final response is validated, per
RFC 9116 §3.
This commit is contained in:
nemunaire 2026-06-14 12:56:43 +09:00
commit ffa3fbe1f9
4 changed files with 281 additions and 22 deletions

View file

@ -47,30 +47,46 @@ type PathProbe struct {
URL string `json:"url"` URL string `json:"url"`
StatusCode int `json:"status_code,omitempty"` StatusCode int `json:"status_code,omitempty"`
Bytes int `json:"bytes,omitempty"` Bytes int `json:"bytes,omitempty"`
ContentType string `json:"content_type,omitempty"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
} }
// fetchHTTPSPath issues a single GET against the given path using client, // fetchHTTPSPath issues a single GET against the given path using client,
// reads up to limit bytes (just to measure size), and returns a PathProbe. // reads up to limit bytes (just to measure size), and returns a PathProbe.
// Callers that need the body itself should use fetchHTTPSPathBody.
func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe { func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe {
probe, _ := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
return probe
}
// fetchHTTPSPathBody is fetchHTTPSPath but also returns the response body,
// truncated to limit bytes. Probe.Bytes equals len(body).
func fetchHTTPSPathBody(ctx context.Context, client *http.Client, host, path, ua string, limit int64) (PathProbe, []byte) {
u := (&url.URL{Scheme: "https", Host: host, Path: path}).String() u := (&url.URL{Scheme: "https", Host: host, Path: path}).String()
probe := PathProbe{URL: u} probe := PathProbe{URL: u}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
if err != nil { if err != nil {
probe.Error = err.Error() probe.Error = err.Error()
return probe return probe, nil
} }
req.Header.Set("User-Agent", ua) req.Header.Set("User-Agent", ua)
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
probe.Error = err.Error() probe.Error = err.Error()
return probe return probe, nil
} }
defer resp.Body.Close() defer resp.Body.Close()
probe.StatusCode = resp.StatusCode probe.StatusCode = resp.StatusCode
n, _ := io.Copy(io.Discard, io.LimitReader(resp.Body, limit)) probe.ContentType = resp.Header.Get("Content-Type")
probe.Bytes = int(n) body, err := io.ReadAll(io.LimitReader(resp.Body, limit))
return probe probe.Bytes = len(body)
// A read error here (e.g. connection reset mid-body) means body is
// truncated and any counts derived from it are unreliable; record it so
// callers don't treat the partial body as a complete response.
if err != nil {
probe.Error = err.Error()
}
return probe, body
} }
// newPinnedHTTPSTransport returns an http.Transport that dials every request // newPinnedHTTPSTransport returns an http.Transport that dials every request

View file

@ -8,6 +8,7 @@ import (
"context" "context"
"fmt" "fmt"
"net/http" "net/http"
"strings"
) )
// ObservationKeyWellKnown is the Extensions[] key under which // ObservationKeyWellKnown is the Extensions[] key under which
@ -23,8 +24,15 @@ type WellKnownData struct {
URIs map[string]WellKnownProbe `json:"uris"` URIs map[string]WellKnownProbe `json:"uris"`
} }
// WellKnownProbe is a single (URI → outcome) entry. // WellKnownProbe is a single (URI → outcome) entry. It embeds the generic
type WellKnownProbe = PathProbe // PathProbe and adds the few security.txt signals the rule needs to decide
// whether the response is an actual RFC 9116 file rather than, say, a soft-404
// HTML page. ContactCount/ExpiresCount are only populated for security.txt.
type WellKnownProbe struct {
PathProbe
ContactCount int `json:"contact_count,omitempty"`
ExpiresCount int `json:"expires_count,omitempty"`
}
// wellknownCollector probes a small, fixed set of standardised URIs // wellknownCollector probes a small, fixed set of standardised URIs
// served at the apex of the host. Today it covers: // served at the apex of the host. Today it covers:
@ -46,12 +54,51 @@ func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) {
defer cleanup() defer cleanup()
client := &http.Client{Transport: transport} client := &http.Client{Transport: transport}
uris := []string{"/.well-known/security.txt", "/robots.txt"} out := WellKnownData{URIs: make(map[string]WellKnownProbe, 2)}
out := WellKnownData{URIs: make(map[string]WellKnownProbe, len(uris))}
for _, path := range uris { // robots.txt: presence and status are all the (future) rule needs.
out.URIs[path] = fetchHTTPSPath(ctx, client, t.Host, path, t.UserAgent, 64<<10) out.URIs["/robots.txt"] = WellKnownProbe{
PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10),
} }
// security.txt: read the body so the rule can tell a genuine RFC 9116
// file from a soft-404 page that merely returns 200.
out.URIs["/.well-known/security.txt"] = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10)
return &out, nil return &out, nil
} }
// fetchSecurityTxt fetches path, captures the generic probe fields, and counts
// the RFC 9116 required fields (Contact, Expires) found in the body.
func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe {
probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit)
out := WellKnownProbe{PathProbe: probe}
out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body)
return out
}
// countSecurityTxtFields counts occurrences of the Contact and Expires fields
// in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines
// beginning with "#" (comments) are ignored, and field names are
// case-insensitive (RFC 9116 §2.4). PGP signature blocks are not parsed.
func countSecurityTxtFields(body []byte) (contacts, expires int) {
for raw := range strings.Lines(string(body)) {
line := strings.TrimSpace(raw)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
name, _, ok := strings.Cut(line, ":")
if !ok {
continue
}
switch strings.ToLower(strings.TrimSpace(name)) {
case "contact":
contacts++
case "expires":
expires++
}
}
return contacts, expires
}
func init() { RegisterCollector(wellknownCollector{}) } func init() { RegisterCollector(wellknownCollector{}) }

View file

@ -7,6 +7,7 @@ package checker
import ( import (
"context" "context"
"fmt" "fmt"
"mime"
sdk "git.happydns.org/checker-sdk-go/checker" sdk "git.happydns.org/checker-sdk-go/checker"
) )
@ -37,8 +38,25 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette
return []sdk.CheckState{unknownState("http.security_txt.no_data", "Well-known collector did not run.")} return []sdk.CheckState{unknownState("http.security_txt.no_data", "Well-known collector did not run.")}
} }
probe := wk.URIs["/.well-known/security.txt"] probe := wk.URIs["/.well-known/security.txt"]
valid, defect := checkSecurityTxt(probe)
switch { switch {
case probe.StatusCode == 200 && probe.Bytes > 0: case probe.StatusCode == 200 && probe.Error != "":
// The server answered 200 but the body could not be fully read, so
// the field counts are unreliable; don't pass a verdict on it.
return []sdk.CheckState{{
Status: sdk.StatusWarn,
Code: "http.security_txt.read_error",
Subject: data.Domain,
Message: fmt.Sprintf("/.well-known/security.txt responded 200 but could not be read fully (%s).", probe.Error),
}}
case probe.StatusCode == 200 && probe.Bytes == 0:
return []sdk.CheckState{{
Status: sdk.StatusWarn,
Code: "http.security_txt.empty",
Subject: data.Domain,
Message: "/.well-known/security.txt responded 200 but is empty.",
}}
case probe.StatusCode == 200 && valid:
return []sdk.CheckState{{ return []sdk.CheckState{{
Status: sdk.StatusOK, Status: sdk.StatusOK,
Code: "http.security_txt.ok", Code: "http.security_txt.ok",
@ -46,11 +64,26 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette
Message: fmt.Sprintf("/.well-known/security.txt is published (%d bytes).", probe.Bytes), Message: fmt.Sprintf("/.well-known/security.txt is published (%d bytes).", probe.Bytes),
}} }}
case probe.StatusCode == 200: case probe.StatusCode == 200:
// 200 but the body is not a conforming RFC 9116 file. With no Contact
// or Expires fields at all it is typically a soft-404 page (e.g. an
// HTML 404 served with status 200); when the fields are present it is
// a genuine file that is merely non-conforming (wrong Content-Type,
// duplicate Expires, …), so don't mislabel it a soft-404.
msg := fmt.Sprintf("/.well-known/security.txt responded 200 but is not a valid RFC 9116 file (%s).", defect)
if probe.ContactCount == 0 && probe.ExpiresCount == 0 {
msg += " It looks like a soft-404 or placeholder rather than a published security.txt."
}
return []sdk.CheckState{{ return []sdk.CheckState{{
Status: sdk.StatusWarn, Status: sdk.StatusWarn,
Code: "http.security_txt.empty", Code: "http.security_txt.invalid",
Subject: data.Domain, Subject: data.Domain,
Message: "/.well-known/security.txt responded 200 but is empty.", Message: msg,
Meta: map[string]any{
"content_type": probe.ContentType,
"contact_count": probe.ContactCount,
"expires_count": probe.ExpiresCount,
"fix": "Serve /.well-known/security.txt as text/plain with at least one Contact: field and exactly one Expires: field (RFC 9116). If you do not publish one, return 404 for this path.",
},
}} }}
default: default:
return []sdk.CheckState{{ return []sdk.CheckState{{
@ -62,3 +95,33 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette
}} }}
} }
} }
// checkSecurityTxt reports whether a 200 response is a conforming RFC 9116
// file (served as text/plain, with at least one Contact field and exactly one
// Expires field) and, when it is not, a short human-readable reason why.
func checkSecurityTxt(p WellKnownProbe) (valid bool, defect string) {
switch {
case !isTextPlain(p.ContentType):
ct := p.ContentType
if ct == "" {
ct = "no Content-Type"
}
return false, fmt.Sprintf("Content-Type is %q, not text/plain", ct)
case p.ContactCount == 0:
return false, "missing required Contact field"
case p.ExpiresCount == 0:
return false, "missing required Expires field"
case p.ExpiresCount > 1:
return false, fmt.Sprintf("has %d Expires fields, exactly one is required", p.ExpiresCount)
default:
return true, ""
}
}
// isTextPlain reports whether a Content-Type header value denotes text/plain,
// tolerating an optional charset (or other) parameter such as
// "text/plain; charset=utf-8".
func isTextPlain(contentType string) bool {
mediaType, _, _ := mime.ParseMediaType(contentType)
return mediaType == "text/plain"
}

View file

@ -6,6 +6,7 @@ package checker
import ( import (
"encoding/json" "encoding/json"
"strings"
"testing" "testing"
sdk "git.happydns.org/checker-sdk-go/checker" sdk "git.happydns.org/checker-sdk-go/checker"
@ -25,8 +26,12 @@ func TestSecurityTxtRule_OK(t *testing.T) {
Domain: "example.test", Domain: "example.test",
Probes: []HTTPProbe{httpsProbe("a:443")}, Probes: []HTTPProbe{httpsProbe("a:443")},
Extensions: wellKnownData(t, map[string]WellKnownProbe{ Extensions: wellKnownData(t, map[string]WellKnownProbe{
"/.well-known/security.txt": {StatusCode: 200, Bytes: 128}, "/.well-known/security.txt": {
"/robots.txt": {StatusCode: 200, Bytes: 42}, PathProbe: PathProbe{StatusCode: 200, Bytes: 128, ContentType: "text/plain; charset=utf-8"},
ContactCount: 1,
ExpiresCount: 1,
},
"/robots.txt": {PathProbe: PathProbe{StatusCode: 200, Bytes: 42}},
}), }),
} }
states := runRule(t, &securityTxtRule{}, data, nil) states := runRule(t, &securityTxtRule{}, data, nil)
@ -41,7 +46,7 @@ func TestSecurityTxtRule_Empty(t *testing.T) {
Domain: "example.test", Domain: "example.test",
Probes: []HTTPProbe{httpsProbe("a:443")}, Probes: []HTTPProbe{httpsProbe("a:443")},
Extensions: wellKnownData(t, map[string]WellKnownProbe{ Extensions: wellKnownData(t, map[string]WellKnownProbe{
"/.well-known/security.txt": {StatusCode: 200, Bytes: 0}, "/.well-known/security.txt": {PathProbe: PathProbe{StatusCode: 200, Bytes: 0}},
}), }),
} }
states := runRule(t, &securityTxtRule{}, data, nil) states := runRule(t, &securityTxtRule{}, data, nil)
@ -51,12 +56,123 @@ func TestSecurityTxtRule_Empty(t *testing.T) {
} }
} }
func TestSecurityTxtRule_ReadError(t *testing.T) {
data := &HTTPData{
Domain: "example.test",
Probes: []HTTPProbe{httpsProbe("a:443")},
Extensions: wellKnownData(t, map[string]WellKnownProbe{
// 200 with a partial body and a read error: counts are unreliable.
"/.well-known/security.txt": {
PathProbe: PathProbe{StatusCode: 200, Bytes: 12, ContentType: "text/plain", Error: "unexpected EOF"},
ContactCount: 1,
},
}),
}
states := runRule(t, &securityTxtRule{}, data, nil)
mustStatus(t, states, sdk.StatusWarn)
if !hasCode(states, "http.security_txt.read_error") {
t.Errorf("expected read_error, got %+v", states)
}
}
func TestSecurityTxtRule_Invalid(t *testing.T) {
cases := []struct {
name string
probe WellKnownProbe
}{
{
name: "soft-404 html",
probe: WellKnownProbe{
PathProbe: PathProbe{StatusCode: 200, Bytes: 6320, ContentType: "text/html; charset=utf-8"},
},
},
{
name: "no contact",
probe: WellKnownProbe{
PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"},
ContactCount: 0,
ExpiresCount: 1,
},
},
{
name: "no expires",
probe: WellKnownProbe{
PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"},
ContactCount: 1,
ExpiresCount: 0,
},
},
{
name: "two expires",
probe: WellKnownProbe{
PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"},
ContactCount: 1,
ExpiresCount: 2,
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
data := &HTTPData{
Domain: "example.test",
Probes: []HTTPProbe{httpsProbe("a:443")},
Extensions: wellKnownData(t, map[string]WellKnownProbe{
"/.well-known/security.txt": tc.probe,
}),
}
states := runRule(t, &securityTxtRule{}, data, nil)
mustStatus(t, states, sdk.StatusWarn)
if !hasCode(states, "http.security_txt.invalid") {
t.Errorf("expected invalid, got %+v", states)
}
if states[0].Meta["fix"] == nil {
t.Errorf("expected fix hint in meta, got %+v", states[0].Meta)
}
})
}
}
func TestSecurityTxtRule_InvalidWording(t *testing.T) {
// A genuine file (Contact + Expires present) served with the wrong
// Content-Type is invalid, but must not be mislabelled a soft-404.
data := &HTTPData{
Domain: "example.test",
Probes: []HTTPProbe{httpsProbe("a:443")},
Extensions: wellKnownData(t, map[string]WellKnownProbe{
"/.well-known/security.txt": {
PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "application/octet-stream"},
ContactCount: 1,
ExpiresCount: 1,
},
}),
}
states := runRule(t, &securityTxtRule{}, data, nil)
mustStatus(t, states, sdk.StatusWarn)
if !hasCode(states, "http.security_txt.invalid") {
t.Fatalf("expected invalid, got %+v", states)
}
if strings.Contains(states[0].Message, "soft-404") {
t.Errorf("genuine file should not be labelled soft-404, got %q", states[0].Message)
}
// A bodyless/placeholder page with no fields keeps the soft-404 hint.
data.Extensions = wellKnownData(t, map[string]WellKnownProbe{
"/.well-known/security.txt": {
PathProbe: PathProbe{StatusCode: 200, Bytes: 6320, ContentType: "text/html; charset=utf-8"},
},
})
states = runRule(t, &securityTxtRule{}, data, nil)
if !strings.Contains(states[0].Message, "soft-404") {
t.Errorf("placeholder page should mention soft-404, got %q", states[0].Message)
}
}
func TestSecurityTxtRule_Missing(t *testing.T) { func TestSecurityTxtRule_Missing(t *testing.T) {
data := &HTTPData{ data := &HTTPData{
Domain: "example.test", Domain: "example.test",
Probes: []HTTPProbe{httpsProbe("a:443")}, Probes: []HTTPProbe{httpsProbe("a:443")},
Extensions: wellKnownData(t, map[string]WellKnownProbe{ Extensions: wellKnownData(t, map[string]WellKnownProbe{
"/.well-known/security.txt": {StatusCode: 404}, "/.well-known/security.txt": {PathProbe: PathProbe{StatusCode: 404}},
}), }),
} }
states := runRule(t, &securityTxtRule{}, data, nil) states := runRule(t, &securityTxtRule{}, data, nil)
@ -69,6 +185,23 @@ func TestSecurityTxtRule_Missing(t *testing.T) {
} }
} }
func TestCountSecurityTxtFields(t *testing.T) {
body := "# comment: not a Contact\n" +
"\n" +
"Contact: mailto:security@example.test\n" +
"contact: https://example.test/security\n" +
" CONTACT : tel:+1-201-555-0123\n" +
"Expires: 2026-12-31T23:59:59z\n" +
"Preferred-Languages: en\n"
contacts, expires := countSecurityTxtFields([]byte(body))
if contacts != 3 {
t.Errorf("contacts = %d, want 3", contacts)
}
if expires != 1 {
t.Errorf("expires = %d, want 1", expires)
}
}
func TestSecurityTxtRule_NoCollectorData(t *testing.T) { func TestSecurityTxtRule_NoCollectorData(t *testing.T) {
data := &HTTPData{ data := &HTTPData{
Domain: "example.test", Domain: "example.test",