diff --git a/checker/collector.go b/checker/collector.go index fc36038..8d314ed 100644 --- a/checker/collector.go +++ b/checker/collector.go @@ -44,49 +44,33 @@ type Collector interface { // embedded by collector-specific probe types that may add extra fields // (e.g. HoneypotProbe adds Critical). type PathProbe struct { - URL string `json:"url"` - StatusCode int `json:"status_code,omitempty"` - Bytes int `json:"bytes,omitempty"` - ContentType string `json:"content_type,omitempty"` - Error string `json:"error,omitempty"` + URL string `json:"url"` + StatusCode int `json:"status_code,omitempty"` + Bytes int `json:"bytes,omitempty"` + Error string `json:"error,omitempty"` } // fetchHTTPSPath issues a single GET against the given path using client, // reads up to limit bytes (just to measure size), and returns a PathProbe. -// Callers that need the body itself should use fetchHTTPSPathBody. func fetchHTTPSPath(ctx context.Context, client *http.Client, host, path, ua string, limit int64) PathProbe { - probe, _ := fetchHTTPSPathBody(ctx, client, host, path, ua, limit) - return probe -} - -// fetchHTTPSPathBody is fetchHTTPSPath but also returns the response body, -// truncated to limit bytes. Probe.Bytes equals len(body). -func fetchHTTPSPathBody(ctx context.Context, client *http.Client, host, path, ua string, limit int64) (PathProbe, []byte) { u := (&url.URL{Scheme: "https", Host: host, Path: path}).String() probe := PathProbe{URL: u} req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { probe.Error = err.Error() - return probe, nil + return probe } req.Header.Set("User-Agent", ua) resp, err := client.Do(req) if err != nil { probe.Error = err.Error() - return probe, nil + return probe } defer resp.Body.Close() probe.StatusCode = resp.StatusCode - probe.ContentType = resp.Header.Get("Content-Type") - body, err := io.ReadAll(io.LimitReader(resp.Body, limit)) - probe.Bytes = len(body) - // A read error here (e.g. connection reset mid-body) means body is - // truncated and any counts derived from it are unreliable; record it so - // callers don't treat the partial body as a complete response. - if err != nil { - probe.Error = err.Error() - } - return probe, body + n, _ := io.Copy(io.Discard, io.LimitReader(resp.Body, limit)) + probe.Bytes = int(n) + return probe } // newPinnedHTTPSTransport returns an http.Transport that dials every request diff --git a/checker/collector_wellknown.go b/checker/collector_wellknown.go index 06b8361..5cd26ca 100644 --- a/checker/collector_wellknown.go +++ b/checker/collector_wellknown.go @@ -8,8 +8,6 @@ import ( "context" "fmt" "net/http" - "strings" - "sync" ) // ObservationKeyWellKnown is the Extensions[] key under which @@ -25,15 +23,8 @@ type WellKnownData struct { URIs map[string]WellKnownProbe `json:"uris"` } -// WellKnownProbe is a single (URI → outcome) entry. It embeds the generic -// PathProbe and adds the few security.txt signals the rule needs to decide -// whether the response is an actual RFC 9116 file rather than, say, a soft-404 -// HTML page. ContactCount/ExpiresCount are only populated for security.txt. -type WellKnownProbe struct { - PathProbe - ContactCount int `json:"contact_count,omitempty"` - ExpiresCount int `json:"expires_count,omitempty"` -} +// WellKnownProbe is a single (URI → outcome) entry. +type WellKnownProbe = PathProbe // wellknownCollector probes a small, fixed set of standardised URIs // served at the apex of the host. Today it covers: @@ -55,66 +46,12 @@ func (wellknownCollector) Collect(ctx context.Context, t Target) (any, error) { defer cleanup() client := &http.Client{Transport: transport} - // The two URIs are independent requests to the same host, so probe them - // concurrently. Each goroutine writes its own variable; the map is - // assembled after both finish, so no locking is needed. - var robots, securityTxt WellKnownProbe - var wg sync.WaitGroup - wg.Add(2) - // robots.txt: presence and status are all the (future) rule needs. - go func() { - defer wg.Done() - robots = WellKnownProbe{ - PathProbe: fetchHTTPSPath(ctx, client, t.Host, "/robots.txt", t.UserAgent, 64<<10), - } - }() - // security.txt: read the body so the rule can tell a genuine RFC 9116 - // file from a soft-404 page that merely returns 200. - go func() { - defer wg.Done() - securityTxt = fetchSecurityTxt(ctx, client, t.Host, "/.well-known/security.txt", t.UserAgent, 64<<10) - }() - wg.Wait() - - out := WellKnownData{URIs: map[string]WellKnownProbe{ - "/robots.txt": robots, - "/.well-known/security.txt": securityTxt, - }} - + uris := []string{"/.well-known/security.txt", "/robots.txt"} + out := WellKnownData{URIs: make(map[string]WellKnownProbe, len(uris))} + for _, path := range uris { + out.URIs[path] = fetchHTTPSPath(ctx, client, t.Host, path, t.UserAgent, 64<<10) + } return &out, nil } -// fetchSecurityTxt fetches path, captures the generic probe fields, and counts -// the RFC 9116 required fields (Contact, Expires) found in the body. -func fetchSecurityTxt(ctx context.Context, client *http.Client, host, path, ua string, limit int64) WellKnownProbe { - probe, body := fetchHTTPSPathBody(ctx, client, host, path, ua, limit) - out := WellKnownProbe{PathProbe: probe} - out.ContactCount, out.ExpiresCount = countSecurityTxtFields(body) - return out -} - -// countSecurityTxtFields counts occurrences of the Contact and Expires fields -// in an RFC 9116 file. Fields are "name: value" lines; blank lines and lines -// beginning with "#" (comments) are ignored, and field names are -// case-insensitive (RFC 9116 §2.4). PGP signature blocks are not parsed. -func countSecurityTxtFields(body []byte) (contacts, expires int) { - for raw := range strings.Lines(string(body)) { - line := strings.TrimSpace(raw) - if line == "" || strings.HasPrefix(line, "#") { - continue - } - name, _, ok := strings.Cut(line, ":") - if !ok { - continue - } - switch strings.ToLower(strings.TrimSpace(name)) { - case "contact": - contacts++ - case "expires": - expires++ - } - } - return contacts, expires -} - func init() { RegisterCollector(wellknownCollector{}) } diff --git a/checker/rules_wellknown.go b/checker/rules_wellknown.go index dcc01a0..40d8795 100644 --- a/checker/rules_wellknown.go +++ b/checker/rules_wellknown.go @@ -7,7 +7,6 @@ package checker import ( "context" "fmt" - "mime" sdk "git.happydns.org/checker-sdk-go/checker" ) @@ -38,25 +37,8 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette return []sdk.CheckState{unknownState("http.security_txt.no_data", "Well-known collector did not run.")} } probe := wk.URIs["/.well-known/security.txt"] - valid, defect := checkSecurityTxt(probe) switch { - case probe.StatusCode == 200 && probe.Error != "": - // The server answered 200 but the body could not be fully read, so - // the field counts are unreliable; don't pass a verdict on it. - return []sdk.CheckState{{ - Status: sdk.StatusWarn, - Code: "http.security_txt.read_error", - Subject: data.Domain, - Message: fmt.Sprintf("/.well-known/security.txt responded 200 but could not be read fully (%s).", probe.Error), - }} - case probe.StatusCode == 200 && probe.Bytes == 0: - return []sdk.CheckState{{ - Status: sdk.StatusWarn, - Code: "http.security_txt.empty", - Subject: data.Domain, - Message: "/.well-known/security.txt responded 200 but is empty.", - }} - case probe.StatusCode == 200 && valid: + case probe.StatusCode == 200 && probe.Bytes > 0: return []sdk.CheckState{{ Status: sdk.StatusOK, Code: "http.security_txt.ok", @@ -64,26 +46,11 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette Message: fmt.Sprintf("/.well-known/security.txt is published (%d bytes).", probe.Bytes), }} case probe.StatusCode == 200: - // 200 but the body is not a conforming RFC 9116 file. With no Contact - // or Expires fields at all it is typically a soft-404 page (e.g. an - // HTML 404 served with status 200); when the fields are present it is - // a genuine file that is merely non-conforming (wrong Content-Type, - // duplicate Expires, …), so don't mislabel it a soft-404. - msg := fmt.Sprintf("/.well-known/security.txt responded 200 but is not a valid RFC 9116 file (%s).", defect) - if probe.ContactCount == 0 && probe.ExpiresCount == 0 { - msg += " It looks like a soft-404 or placeholder rather than a published security.txt." - } return []sdk.CheckState{{ Status: sdk.StatusWarn, - Code: "http.security_txt.invalid", + Code: "http.security_txt.empty", Subject: data.Domain, - Message: msg, - Meta: map[string]any{ - "content_type": probe.ContentType, - "contact_count": probe.ContactCount, - "expires_count": probe.ExpiresCount, - "fix": "Serve /.well-known/security.txt as text/plain with at least one Contact: field and exactly one Expires: field (RFC 9116). If you do not publish one, return 404 for this path.", - }, + Message: "/.well-known/security.txt responded 200 but is empty.", }} default: return []sdk.CheckState{{ @@ -95,33 +62,3 @@ func (r *securityTxtRule) Evaluate(ctx context.Context, obs sdk.ObservationGette }} } } - -// checkSecurityTxt reports whether a 200 response is a conforming RFC 9116 -// file (served as text/plain, with at least one Contact field and exactly one -// Expires field) and, when it is not, a short human-readable reason why. -func checkSecurityTxt(p WellKnownProbe) (valid bool, defect string) { - switch { - case !isTextPlain(p.ContentType): - ct := p.ContentType - if ct == "" { - ct = "no Content-Type" - } - return false, fmt.Sprintf("Content-Type is %q, not text/plain", ct) - case p.ContactCount == 0: - return false, "missing required Contact field" - case p.ExpiresCount == 0: - return false, "missing required Expires field" - case p.ExpiresCount > 1: - return false, fmt.Sprintf("has %d Expires fields, exactly one is required", p.ExpiresCount) - default: - return true, "" - } -} - -// isTextPlain reports whether a Content-Type header value denotes text/plain, -// tolerating an optional charset (or other) parameter such as -// "text/plain; charset=utf-8". -func isTextPlain(contentType string) bool { - mediaType, _, _ := mime.ParseMediaType(contentType) - return mediaType == "text/plain" -} diff --git a/checker/rules_wellknown_test.go b/checker/rules_wellknown_test.go index a0eb5e3..86137a5 100644 --- a/checker/rules_wellknown_test.go +++ b/checker/rules_wellknown_test.go @@ -6,7 +6,6 @@ package checker import ( "encoding/json" - "strings" "testing" sdk "git.happydns.org/checker-sdk-go/checker" @@ -26,12 +25,8 @@ func TestSecurityTxtRule_OK(t *testing.T) { Domain: "example.test", Probes: []HTTPProbe{httpsProbe("a:443")}, Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": { - PathProbe: PathProbe{StatusCode: 200, Bytes: 128, ContentType: "text/plain; charset=utf-8"}, - ContactCount: 1, - ExpiresCount: 1, - }, - "/robots.txt": {PathProbe: PathProbe{StatusCode: 200, Bytes: 42}}, + "/.well-known/security.txt": {StatusCode: 200, Bytes: 128}, + "/robots.txt": {StatusCode: 200, Bytes: 42}, }), } states := runRule(t, &securityTxtRule{}, data, nil) @@ -46,7 +41,7 @@ func TestSecurityTxtRule_Empty(t *testing.T) { Domain: "example.test", Probes: []HTTPProbe{httpsProbe("a:443")}, Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": {PathProbe: PathProbe{StatusCode: 200, Bytes: 0}}, + "/.well-known/security.txt": {StatusCode: 200, Bytes: 0}, }), } states := runRule(t, &securityTxtRule{}, data, nil) @@ -56,123 +51,12 @@ func TestSecurityTxtRule_Empty(t *testing.T) { } } -func TestSecurityTxtRule_ReadError(t *testing.T) { - data := &HTTPData{ - Domain: "example.test", - Probes: []HTTPProbe{httpsProbe("a:443")}, - Extensions: wellKnownData(t, map[string]WellKnownProbe{ - // 200 with a partial body and a read error: counts are unreliable. - "/.well-known/security.txt": { - PathProbe: PathProbe{StatusCode: 200, Bytes: 12, ContentType: "text/plain", Error: "unexpected EOF"}, - ContactCount: 1, - }, - }), - } - states := runRule(t, &securityTxtRule{}, data, nil) - mustStatus(t, states, sdk.StatusWarn) - if !hasCode(states, "http.security_txt.read_error") { - t.Errorf("expected read_error, got %+v", states) - } -} - -func TestSecurityTxtRule_Invalid(t *testing.T) { - cases := []struct { - name string - probe WellKnownProbe - }{ - { - name: "soft-404 html", - probe: WellKnownProbe{ - PathProbe: PathProbe{StatusCode: 200, Bytes: 6320, ContentType: "text/html; charset=utf-8"}, - }, - }, - { - name: "no contact", - probe: WellKnownProbe{ - PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"}, - ContactCount: 0, - ExpiresCount: 1, - }, - }, - { - name: "no expires", - probe: WellKnownProbe{ - PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"}, - ContactCount: 1, - ExpiresCount: 0, - }, - }, - { - name: "two expires", - probe: WellKnownProbe{ - PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "text/plain"}, - ContactCount: 1, - ExpiresCount: 2, - }, - }, - } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - data := &HTTPData{ - Domain: "example.test", - Probes: []HTTPProbe{httpsProbe("a:443")}, - Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": tc.probe, - }), - } - states := runRule(t, &securityTxtRule{}, data, nil) - mustStatus(t, states, sdk.StatusWarn) - if !hasCode(states, "http.security_txt.invalid") { - t.Errorf("expected invalid, got %+v", states) - } - if states[0].Meta["fix"] == nil { - t.Errorf("expected fix hint in meta, got %+v", states[0].Meta) - } - }) - } -} - -func TestSecurityTxtRule_InvalidWording(t *testing.T) { - // A genuine file (Contact + Expires present) served with the wrong - // Content-Type is invalid, but must not be mislabelled a soft-404. - data := &HTTPData{ - Domain: "example.test", - Probes: []HTTPProbe{httpsProbe("a:443")}, - Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": { - PathProbe: PathProbe{StatusCode: 200, Bytes: 64, ContentType: "application/octet-stream"}, - ContactCount: 1, - ExpiresCount: 1, - }, - }), - } - states := runRule(t, &securityTxtRule{}, data, nil) - mustStatus(t, states, sdk.StatusWarn) - if !hasCode(states, "http.security_txt.invalid") { - t.Fatalf("expected invalid, got %+v", states) - } - if strings.Contains(states[0].Message, "soft-404") { - t.Errorf("genuine file should not be labelled soft-404, got %q", states[0].Message) - } - - // A bodyless/placeholder page with no fields keeps the soft-404 hint. - data.Extensions = wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": { - PathProbe: PathProbe{StatusCode: 200, Bytes: 6320, ContentType: "text/html; charset=utf-8"}, - }, - }) - states = runRule(t, &securityTxtRule{}, data, nil) - if !strings.Contains(states[0].Message, "soft-404") { - t.Errorf("placeholder page should mention soft-404, got %q", states[0].Message) - } -} - func TestSecurityTxtRule_Missing(t *testing.T) { data := &HTTPData{ Domain: "example.test", Probes: []HTTPProbe{httpsProbe("a:443")}, Extensions: wellKnownData(t, map[string]WellKnownProbe{ - "/.well-known/security.txt": {PathProbe: PathProbe{StatusCode: 404}}, + "/.well-known/security.txt": {StatusCode: 404}, }), } states := runRule(t, &securityTxtRule{}, data, nil) @@ -185,23 +69,6 @@ func TestSecurityTxtRule_Missing(t *testing.T) { } } -func TestCountSecurityTxtFields(t *testing.T) { - body := "# comment: not a Contact\n" + - "\n" + - "Contact: mailto:security@example.test\n" + - "contact: https://example.test/security\n" + - " CONTACT : tel:+1-201-555-0123\n" + - "Expires: 2026-12-31T23:59:59z\n" + - "Preferred-Languages: en\n" - contacts, expires := countSecurityTxtFields([]byte(body)) - if contacts != 3 { - t.Errorf("contacts = %d, want 3", contacts) - } - if expires != 1 { - t.Errorf("expires = %d, want 1", expires) - } -} - func TestSecurityTxtRule_NoCollectorData(t *testing.T) { data := &HTTPData{ Domain: "example.test",