From ffa3fbe1f93ebae357ffdd1b1232af95f7f86347 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Sun, 14 Jun 2026 12:56:43 +0900 Subject: [PATCH] checker: validate security.txt is a real RFC 9116 file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The http.security_txt rule reported OK for any 200 response with a non-empty body, so a soft-404 (status 200 + HTML body) served for /.well-known/security.txt was misread as "published". Capture the response Content-Type and count the RFC 9116 required fields (Contact, Expires) in the body. OK now requires text/plain with at least one Contact and exactly one Expires; a non-conforming 200 yields a new Warn http.security_txt.invalid explaining the defect. Redirects are still followed and the final response is validated, per RFC 9116 §3. --- checker/collector.go | 34 ++++++-- checker/collector_wellknown.go | 59 +++++++++++-- checker/rules_wellknown.go | 69 +++++++++++++++- checker/rules_wellknown_test.go | 141 +++++++++++++++++++++++++++++++- 4 files changed, 281 insertions(+), 22 deletions(-) diff --git a/checker/collector.go b/checker/collector.go index 8d314ed..fc36038 100644 --- a/checker/collector.go +++ b/checker/collector.go @@ -44,33 +44,49 @@ type Collector interface { // embedded by collector-specific probe types that may add extra fields // (e.g. HoneypotProbe adds Critical). type PathProbe struct { - URL string `json:"url"` - StatusCode int `json:"status_code,omitempty"` - Bytes int `json:"bytes,omitempty"` - Error string `json:"error,omitempty"` + URL string `json:"url"` + StatusCode int `json:"status_code,omitempty"` + Bytes int `json:"bytes,omitempty"` + ContentType string `json:"content_type,omitempty"` + Error string `json:"error,omitempty"` } // fetchHTTPSPath issues a single GET against the given path using client, // reads up to limit bytes (just to measure size), and returns a PathProbe. +// Callers that need the body itself should use fetchHTTPSPathBody. func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe { + probe, _ := fetchHTTPSPathBody(ctx, client, host, path, ua, limit) + return probe +} + +// fetchHTTPSPathBody is fetchHTTPSPath but also returns the response body, +// truncated to limit bytes. Probe.Bytes equals len(body). +func fetchHTTPSPathBody(ctx context.Context, client *http.Client, host, path, ua string, limit int64) (PathProbe, []byte) { u := (&url.URL{Scheme: "https", Host: host, Path: path}).String() probe := PathProbe{URL: u} req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { probe.Error = err.Error() - return probe + return probe, nil } req.Header.Set("User-Agent", ua) resp, err := client.Do(req) if err != nil { probe.Error = err.Error() - return probe + return probe, nil } defer resp.Body.Close() probe.StatusCode = resp.StatusCode - n, _ := io.Copy(io.Discard, io.LimitReader(resp.Body, limit)) - probe.Bytes = int(n) - return probe + probe.ContentType = resp.Header.Get("Content-Type") + body, err := io.ReadAll(io.LimitReader(resp.Body, limit)) + probe.Bytes = len(body) + // A read error here (e.g. connection reset mid-body) means body is + // truncated and any counts derived from it are unreliable; record it so + // callers don't treat the partial body as a complete response. + if err != nil { + probe.Error = err.Error() + } + return probe, body } // newPinnedHTTPSTransport returns an http.Transport that dials every request diff --git a/checker/collector_wellknown.go b/checker/collector_wellknown.go index 5cd26ca..759f662 100644 --- a/checker/collector_wellknown.go +++ b/checker/collector_wellknown.go @@ -8,6 +8,7 @@ import ( "context" "fmt" "net/http" + "strings" ) // ObservationKeyWellKnown is the Extensions[] key under which @@ -23,8 +24,15 @@ type WellKnownData struct { URIs map[string]WellKnownProbe `json:"uris"` } -// WellKnownProbe is a single (URI → outcome) entry. -type WellKnownProbe = PathProbe +// WellKnownProbe is a single (URI → outcome) entry. It embeds the generic +// PathProbe and adds the few security.txt signals the rule needs to decide +// whether the response is an actual RFC 9116 file rather than, say, a soft-404 +// HTML page. ContactCount/ExpiresCount are only populated for security.txt. +type WellKnownProbe struct { + PathProbe + ContactCount int `json:"contact_count,omitempty"` + ExpiresCount int `json:"expires_count,omitempty"` +} // wellknownCollector probes a small, fixed set of standardised URIs // served at the apex of the host. Today it covers: @@ -46,12 +54,51 @@ func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) { defer cleanup() client := &http.Client{Transport: transport} - uris := []string{"/.well-known/security.txt", "/robots.txt"} - out := WellKnownData{URIs: make(map[string]WellKnownProbe, len(uris))} - for _, path := range uris { - out.URIs[path] = fetchHTTPSPath(ctx, client, t.Host, path, t.UserAgent, 64<<10) + out := WellKnownData{URIs: make(map[string]WellKnownProbe, 2)} + + // robots.txt: presence and status are all the (future) rule needs. + out.URIs["/robots.txt"] = WellKnownProbe{ + PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10), } + + // security.txt: read the body so the rule can tell a genuine RFC 9116 + // file from a soft-404 page that merely returns 200. + out.URIs["/.well-known/security.txt"] = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10) + return &out, nil } +// fetchSecurityTxt fetches path, captures the generic probe fields, and counts +// the RFC 9116 required fields (Contact, Expires) found in the body. +func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe { + probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit) + out := WellKnownProbe{PathProbe: probe} + out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body) + return out +} + +// countSecurityTxtFields counts occurrences of the Contact and Expires fields +// in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines +// beginning with "#" (comments) are ignored, and field names are +// case-insensitive (RFC 9116 §2.4). PGP signature blocks are not parsed. +func countSecurityTxtFields(body []byte) (contacts, expires int) { + for raw := range strings.Lines(string(body)) { + line := strings.TrimSpace(raw) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + name, _, ok := strings.Cut(line, ":") + if !ok { + continue + } + switch strings.ToLower(strings.TrimSpace(name)) { + case "contact": + contacts++ + case "expires": + expires++ + } + } + return contacts, expires +} + func init() { RegisterCollector(wellknownCollector{}) } diff --git a/checker/rules_wellknown.go b/checker/rules_wellknown.go index 40d8795..dcc01a0 100644 --- a/checker/rules_wellknown.go +++ b/checker/rules_wellknown.go @@ -7,6 +7,7 @@ package checker import ( "context" "fmt" + "mime" sdk "git.happydns.org/checker-sdk-go/checker" ) @@ -37,8 +38,25 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette return []sdk.CheckState{unknownState("http.security_txt.no_data", "Well-known collector did not run.")} } probe := wk.URIs["/.well-known/security.txt"] + valid, defect := checkSecurityTxt(probe) switch { - case probe.StatusCode == 200 && probe.Bytes > 0: + case probe.StatusCode == 200 && probe.Error != "": + // The server answered 200 but the body could not be fully read, so + // the field counts are unreliable; don't pass a verdict on it. + return []sdk.CheckState{{ + Status: sdk.StatusWarn, + Code: "http.security_txt.read_error", + Subject: data.Domain, + Message: fmt.Sprintf("/.well-known/security.txt responded 200 but could not be read fully (%s).", probe.Error), + }} + case probe.StatusCode == 200 && probe.Bytes == 0: + return []sdk.CheckState{{ + Status: sdk.StatusWarn, + Code: "http.security_txt.empty", + Subject: data.Domain, + Message: "/.well-known/security.txt responded 200 but is empty.", + }} + case probe.StatusCode == 200 && valid: return []sdk.CheckState{{ Status: sdk.StatusOK, Code: "http.security_txt.ok", @@ -46,11 +64,26 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette Message: fmt.Sprintf("/.well-known/security.txt is published (%d bytes).", probe.Bytes), }} case probe.StatusCode == 200: + // 200 but the body is not a conforming RFC 9116 file. With no Contact + // or Expires fields at all it is typically a soft-404 page (e.g. an + // HTML 404 served with status 200); when the fields are present it is + // a genuine file that is merely non-conforming (wrong Content-Type, + // duplicate Expires, …), so don't mislabel it a soft-404. + msg := fmt.Sprintf("/.well-known/security.txt responded 200 but is not a valid RFC 9116 file (%s).", defect) + if probe.ContactCount == 0 && probe.ExpiresCount == 0 { + msg += " It looks like a soft-404 or placeholder rather than a published security.txt." + } return []sdk.CheckState{{ Status: sdk.StatusWarn, - Code: "http.security_txt.empty", + Code: "http.security_txt.invalid", Subject: data.Domain, - Message: "/.well-known/security.txt responded 200 but is empty.", + Message: msg, + Meta: map[string]any{ + "content_type": probe.ContentType, + "contact_count": probe.ContactCount, + "expires_count": probe.ExpiresCount, + "fix": "Serve /.well-known/security.txt as text/plain with at least one Contact: field and exactly one Expires: field (RFC 9116). If you do not publish one, return 404 for this path.", + }, }} default: return []sdk.CheckState{{ @@ -62,3 +95,33 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette }} } } + +// checkSecurityTxt reports whether a 200 response is a conforming RFC 9116 +// file (served as text/plain, with at least one Contact field and exactly one +// Expires field) and, when it is not, a short human-readable reason why. +func checkSecurityTxt(p WellKnownProbe) (valid bool, defect string) { + switch { + case !isTextPlain(p.ContentType): + ct := p.ContentType + if ct == "" { + ct = "no Content-Type" + } + return false, fmt.Sprintf("Content-Type is %q, not text/plain", ct) + case p.ContactCount == 0: + return false, "missing required Contact field" + case p.ExpiresCount == 0: + return false, "missing required Expires field" + case p.ExpiresCount > 1: + return false, fmt.Sprintf("has %d Expires fields, exactly one is required", p.ExpiresCount) + default: + return true, "" + } +} + +// isTextPlain reports whether a Content-Type header value denotes text/plain, +// tolerating an optional charset (or other) parameter such as +// "text/plain; charset=utf-8". +func isTextPlain(contentType string) bool { + mediaType, _, _ := mime.ParseMediaType(contentType) + return mediaType == "text/plain" +} diff --git a/checker/rules_wellknown_test.go b/checker/rules_wellknown_test.go index 86137a5..a0eb5e3 100644 --- a/checker/rules_wellknown_test.go +++ b/checker/rules_wellknown_test.go @@ -6,6 +6,7 @@ package checker import ( "encoding/json" + "strings" "testing" sdk "git.happydns.org/checker-sdk-go/checker" @@ -25,8 +26,12 @@ func TestSecurityTxtRule_OK(t *testing.T) { Domain: "example.test", Probes: []HTTPProbe{httpsProbe("a:443")}, Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": {StatusCode: 200, Bytes: 128}, - "/robots.txt": {StatusCode: 200, Bytes: 42}, + "/.well-known/security.txt": { + PathProbe: PathProbe{StatusCode: 200, Bytes: 128, ContentType: "text/plain; charset=utf-8"}, + ContactCount: 1, + ExpiresCount: 1, + }, + "/robots.txt": {PathProbe: PathProbe{StatusCode: 200, Bytes: 42}}, }), } states := runRule(t, &securityTxtRule{}, data, nil) @@ -41,7 +46,7 @@ func TestSecurityTxtRule_Empty(t *testing.T) { Domain: "example.test", Probes: []HTTPProbe{httpsProbe("a:443")}, Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": {StatusCode: 200, Bytes: 0}, + "/.well-known/security.txt": {PathProbe: PathProbe{StatusCode: 200, Bytes: 0}}, }), } states := runRule(t, &securityTxtRule{}, data, nil) @@ -51,12 +56,123 @@ func TestSecurityTxtRule_Empty(t *testing.T) { } } +func TestSecurityTxtRule_ReadError(t *testing.T) { + data := &HTTPData{ + Domain: "example.test", + Probes: []HTTPProbe{httpsProbe("a:443")}, + Extensions: wellKnownData(t, map[string]WellKnownProbe{ + // 200 with a partial body and a read error: counts are unreliable. + "/.well-known/security.txt": { + PathProbe: PathProbe{StatusCode: 200, Bytes: 12, ContentType: "text/plain", Error: "unexpected EOF"}, + ContactCount: 1, + }, + }), + } + states := runRule(t, &securityTxtRule{}, data, nil) + mustStatus(t, states, sdk.StatusWarn) + if !hasCode(states, "http.security_txt.read_error") { + t.Errorf("expected read_error, got %+v", states) + } +} + +func TestSecurityTxtRule_Invalid(t *testing.T) { + cases := []struct { + name string + probe WellKnownProbe + }{ + { + name: "soft-404 html", + probe: WellKnownProbe{ + PathProbe: PathProbe{StatusCode: 200, Bytes: 6320, ContentType: "text/html; charset=utf-8"}, + }, + }, + { + name: "no contact", + probe: WellKnownProbe{ + PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"}, + ContactCount: 0, + ExpiresCount: 1, + }, + }, + { + name: "no expires", + probe: WellKnownProbe{ + PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"}, + ContactCount: 1, + ExpiresCount: 0, + }, + }, + { + name: "two expires", + probe: WellKnownProbe{ + PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"}, + ContactCount: 1, + ExpiresCount: 2, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + data := &HTTPData{ + Domain: "example.test", + Probes: []HTTPProbe{httpsProbe("a:443")}, + Extensions: wellKnownData(t, map[string]WellKnownProbe{ + "/.well-known/security.txt": tc.probe, + }), + } + states := runRule(t, &securityTxtRule{}, data, nil) + mustStatus(t, states, sdk.StatusWarn) + if !hasCode(states, "http.security_txt.invalid") { + t.Errorf("expected invalid, got %+v", states) + } + if states[0].Meta["fix"] == nil { + t.Errorf("expected fix hint in meta, got %+v", states[0].Meta) + } + }) + } +} + +func TestSecurityTxtRule_InvalidWording(t *testing.T) { + // A genuine file (Contact + Expires present) served with the wrong + // Content-Type is invalid, but must not be mislabelled a soft-404. + data := &HTTPData{ + Domain: "example.test", + Probes: []HTTPProbe{httpsProbe("a:443")}, + Extensions: wellKnownData(t, map[string]WellKnownProbe{ + "/.well-known/security.txt": { + PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "application/octet-stream"}, + ContactCount: 1, + ExpiresCount: 1, + }, + }), + } + states := runRule(t, &securityTxtRule{}, data, nil) + mustStatus(t, states, sdk.StatusWarn) + if !hasCode(states, "http.security_txt.invalid") { + t.Fatalf("expected invalid, got %+v", states) + } + if strings.Contains(states[0].Message, "soft-404") { + t.Errorf("genuine file should not be labelled soft-404, got %q", states[0].Message) + } + + // A bodyless/placeholder page with no fields keeps the soft-404 hint. + data.Extensions = wellKnownData(t, map[string]WellKnownProbe{ + "/.well-known/security.txt": { + PathProbe: PathProbe{StatusCode: 200, Bytes: 6320, ContentType: "text/html; charset=utf-8"}, + }, + }) + states = runRule(t, &securityTxtRule{}, data, nil) + if !strings.Contains(states[0].Message, "soft-404") { + t.Errorf("placeholder page should mention soft-404, got %q", states[0].Message) + } +} + func TestSecurityTxtRule_Missing(t *testing.T) { data := &HTTPData{ Domain: "example.test", Probes: []HTTPProbe{httpsProbe("a:443")}, Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": {StatusCode: 404}, + "/.well-known/security.txt": {PathProbe: PathProbe{StatusCode: 404}}, }), } states := runRule(t, &securityTxtRule{}, data, nil) @@ -69,6 +185,23 @@ func TestSecurityTxtRule_Missing(t *testing.T) { } } +func TestCountSecurityTxtFields(t *testing.T) { + body := "# comment: not a Contact\n" + + "\n" + + "Contact: mailto:security@example.test\n" + + "contact: https://example.test/security\n" + + " CONTACT : tel:+1-201-555-0123\n" + + "Expires: 2026-12-31T23:59:59z\n" + + "Preferred-Languages: en\n" + contacts, expires := countSecurityTxtFields([]byte(body)) + if contacts != 3 { + t.Errorf("contacts = %d, want 3", contacts) + } + if expires != 1 { + t.Errorf("expires = %d, want 1", expires) + } +} + func TestSecurityTxtRule_NoCollectorData(t *testing.T) { data := &HTTPData{ Domain: "example.test",