From 66cf1fc9aa8a7701addc039ecc6d5b9e64da98a4 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Sun, 26 Apr 2026 17:23:08 +0700 Subject: [PATCH] Initial commit --- .gitignore | 2 + Dockerfile | 15 ++ LICENSE | 21 +++ Makefile | 28 +++ NOTICE | 26 +++ README.md | 109 ++++++++++++ checker/collect.go | 57 ++++++ checker/definition.go | 54 ++++++ checker/dnsbl.go | 338 +++++++++++++++++++++++++++++++++++ checker/dnsbl_test.go | 74 ++++++++ checker/httpclient.go | 32 ++++ checker/interactive.go | 89 ++++++++++ checker/openphish.go | 204 +++++++++++++++++++++ checker/provider.go | 52 ++++++ checker/report.go | 341 ++++++++++++++++++++++++++++++++++++ checker/report_test.go | 81 +++++++++ checker/rule.go | 110 ++++++++++++ checker/safebrowsing.go | 187 ++++++++++++++++++++ checker/source.go | 146 +++++++++++++++ checker/source_test.go | 40 +++++ checker/testhelpers_test.go | 26 +++ checker/types.go | 51 ++++++ checker/urlhaus.go | 201 +++++++++++++++++++++ checker/urlhaus_test.go | 91 ++++++++++ checker/virustotal.go | 220 +++++++++++++++++++++++ checker/virustotal_test.go | 81 +++++++++ go.mod | 8 + go.sum | 4 + main.go | 27 +++ plugin/plugin.go | 20 +++ 30 files changed, 2735 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 NOTICE create mode 100644 README.md create mode 100644 checker/collect.go create mode 100644 checker/definition.go create mode 100644 checker/dnsbl.go create mode 100644 checker/dnsbl_test.go create mode 100644 checker/httpclient.go create mode 100644 checker/interactive.go create mode 100644 checker/openphish.go create mode 100644 checker/provider.go create mode 100644 checker/report.go create mode 100644 checker/report_test.go create mode 100644 checker/rule.go create mode 100644 checker/safebrowsing.go create mode 100644 checker/source.go create mode 100644 checker/source_test.go create mode 100644 checker/testhelpers_test.go create mode 100644 checker/types.go create mode 100644 checker/urlhaus.go create mode 100644 checker/urlhaus_test.go create mode 100644 checker/virustotal.go create mode 100644 checker/virustotal_test.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 plugin/plugin.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b8efa82 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +checker-blacklist +checker-blacklist.so diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..27b4df8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM golang:1.25-alpine AS builder + +ARG CHECKER_VERSION=custom-build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -ldflags "-X main.Version=${CHECKER_VERSION}" -o /checker-blacklist . + +FROM scratch +COPY --from=builder /checker-blacklist /checker-blacklist +USER 65534:65534 +EXPOSE 8080 +ENTRYPOINT ["/checker-blacklist"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..07d44d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 The happyDomain Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..74ab199 --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +CHECKER_NAME := checker-blacklist +CHECKER_IMAGE := happydomain/$(CHECKER_NAME) +CHECKER_VERSION ?= custom-build + +CHECKER_SOURCES := main.go $(wildcard checker/*.go) + +GO_LDFLAGS := -X main.Version=$(CHECKER_VERSION) + +.PHONY: all plugin docker test clean + +all: $(CHECKER_NAME) + +$(CHECKER_NAME): $(CHECKER_SOURCES) + go build -tags standalone -ldflags "$(GO_LDFLAGS)" -o $@ . + +plugin: $(CHECKER_NAME).so + +$(CHECKER_NAME).so: $(CHECKER_SOURCES) $(wildcard plugin/*.go) + go build -buildmode=plugin -ldflags "$(GO_LDFLAGS)" -o $@ ./plugin/ + +docker: + docker build --build-arg CHECKER_VERSION=$(CHECKER_VERSION) -t $(CHECKER_IMAGE) . + +test: + go test -tags standalone ./... + +clean: + rm -f $(CHECKER_NAME) $(CHECKER_NAME).so diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..ae61715 --- /dev/null +++ b/NOTICE @@ -0,0 +1,26 @@ +checker-dummy +Copyright (c) 2026 The happyDomain Authors + +This product is licensed under the MIT License (see LICENSE). + +------------------------------------------------------------------------------- +Third-party notices +------------------------------------------------------------------------------- + +This product includes software developed as part of the checker-sdk-go +project (https://git.happydns.org/happyDomain/checker-sdk-go), licensed +under the Apache License, Version 2.0: + + checker-sdk-go + Copyright 2020-2026 The happyDomain Authors + + This product includes software developed as part of the happyDomain + project (https://happydomain.org). + + Portions of this code were originally written for the happyDomain + server (licensed under AGPL-3.0 and a commercial license) and are + made available there under the Apache License, Version 2.0 to enable + a permissively licensed ecosystem of checker plugins. + +You may obtain a copy of the Apache License 2.0 at: + http://www.apache.org/licenses/LICENSE-2.0 diff --git a/README.md b/README.md new file mode 100644 index 0000000..6c6c138 --- /dev/null +++ b/README.md @@ -0,0 +1,109 @@ +# checker-blacklist + +happyDomain checker that flags whether a domain is currently listed on +widely-used reputation systems. + +## Sources + +| Source | Type | API key needed | Configurable | +|-----------------------|-----------------|----------------|--------------| +| Spamhaus DBL | DNS-based DBL | no | admin (default on) | +| SURBL multi | DNS-based DBL | no | admin (default on) | +| URIBL multi | DNS-based DBL | no | admin (default on) | +| Extra DNSBL zones | DNS-based DBL | no | admin | +| Google Safe Browsing | HTTPS lookup | yes (admin) | admin | +| OpenPhish public feed | downloaded list | no | user (default on) | +| abuse.ch URLhaus | HTTPS lookup | optional Auth-Key (admin) | user (default on) | +| VirusTotal v3 | HTTPS lookup | yes (admin) | admin | + +DNS-based blocklists are queried in parallel. The OpenPhish feed is +downloaded once per hour by the provider and cached in memory. + +## Common failure scenarios surfaced in the HTML report + +The report opens with a diagnosis-first "Action required" section that +lists the most common, high-impact problems with a one-shot remediation: + +1. **Listed on Spamhaus DBL / SURBL / URIBL**: direct lookup link and + removal procedure URL per operator. +2. **Flagged by Google Safe Browsing**: link to Google Search Console's + security-issues review request. +3. **Listed in the OpenPhish feed**: instructions to treat the host as + compromised (audit recently-added files, rotate credentials), plus a + link to OpenPhish feedback. +4. **Listed in URLhaus (active malware distribution)**: direct link to + the abuse.ch reference page and per-URL takedown notification flow. +5. **VirusTotal multi-vendor flag**: Critical when at least one vendor + reports `malicious`, Warning when only `suspicious`. Lists the + flagging engines and links to the VT GUI page for re-scan / vendor + contact. +6. **DNSBL query refused / API quota exhausted**: most public resolvers + are blocked by DBL/URIBL operators; surfaced as a warning so it does + not pollute the OK status. + +A per-source detail table follows for full context (return codes, TXT +records, threat types, sample phishing URLs). + +## Adding a new source + +Every reputation backend implements the `Source` interface in its own +file and registers itself from `init()`. Skeleton: + +```go +package checker + +import ( + "context" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func init() { Register(&mySource{}) } + +type mySource struct{} + +func (*mySource) ID() string { return "mybl" } +func (*mySource) Name() string { return "My Blocklist" } + +func (*mySource) Options() SourceOptions { + return SourceOptions{ + Admin: []sdk.CheckerOptionField{ /* … */ }, + } +} + +func (*mySource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + res := SourceResult{SourceID: "mybl", SourceName: "My Blocklist", Enabled: true} + // …populate Listed / Severity / Reasons / Evidence / Reference / Error + return []SourceResult{res} +} + +func (*mySource) Diagnose(res SourceResult) Diagnosis { + return Diagnosis{Severity: SeverityCrit, Title: "Listed", Detail: "…"} +} +``` + +That's it: rules, the report, metrics, the standalone `/check` form +and the definition pick the new source up automatically. Sources that +need richer rendering (a per-vendor table, etc.) additionally +implement `RenderDetail(SourceResult) (template.HTML, error)`. + +## Build + +```bash +make # standalone binary (HTTP server + /check form) +make plugin # checker-blacklist.so for happyDomain dynamic load +make docker # container image +``` + +## Running standalone + +```bash +./checker-blacklist -listen :8080 +# then GET /check, /definition, /health, … +``` + +The standalone binary embeds an `interactive` form on `GET /check` so a +human can paste a domain and run the full pipeline without happyDomain. + +## License + +MIT (checker code); Apache 2.0 SDK dependency. diff --git a/checker/collect.go b/checker/collect.go new file mode 100644 index 0000000..446a195 --- /dev/null +++ b/checker/collect.go @@ -0,0 +1,57 @@ +package checker + +import ( + "context" + "strings" + "sync" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" + "golang.org/x/net/publicsuffix" +) + +// Collect fans out the registered sources concurrently and folds their +// results into a single observation. Adding a new source means +// implementing the Source interface in its own file and calling +// Register(...) from init(); Collect needs no changes. +func (p *blacklistProvider) Collect(ctx context.Context, opts sdk.CheckerOptions) (any, error) { + domain := normalizeDomain(stringOpt(opts, "domain_name")) + if domain == "" { + // Standalone /check form posts "domain"; happyDomain auto-fills + // "domain_name". Accept both so the path stays uniform. + domain = normalizeDomain(stringOpt(opts, "domain")) + } + + registered, _ := publicsuffix.EffectiveTLDPlusOne(domain) + if registered == "" { + registered = domain + } + + data := &BlacklistData{ + Domain: domain, + RegisteredDomain: registered, + CollectedAt: time.Now(), + } + + sources := Sources() + per := make([][]SourceResult, len(sources)) + + var wg sync.WaitGroup + for i, s := range sources { + wg.Add(1) + go func(i int, s Source) { + defer wg.Done() + per[i] = s.Query(ctx, domain, registered, opts) + }(i, s) + } + wg.Wait() + + for _, batch := range per { + data.Results = append(data.Results, batch...) + } + return data, nil +} + +func normalizeDomain(s string) string { + return strings.ToLower(strings.TrimSuffix(strings.TrimSpace(s), ".")) +} diff --git a/checker/definition.go b/checker/definition.go new file mode 100644 index 0000000..14c67e9 --- /dev/null +++ b/checker/definition.go @@ -0,0 +1,54 @@ +package checker + +import ( + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Version is overridden at link time by the standalone or plugin entrypoints. +var Version = "built-in" + +// Definition assembles the checker definition by aggregating each +// registered Source's options into the SDK's audience-grouped layout. +// Adding a source automatically adds its option fields here: no edit +// to this file needed. +func Definition() *sdk.CheckerDefinition { + opts := sdk.CheckerOptionsDocumentation{ + DomainOpts: []sdk.CheckerOptionDocumentation{ + { + Id: "domain_name", + Label: "Domain name", + AutoFill: sdk.AutoFillDomainName, + }, + }, + } + for _, s := range Sources() { + o := s.Options() + opts.AdminOpts = append(opts.AdminOpts, o.Admin...) + opts.UserOpts = append(opts.UserOpts, o.User...) + } + + return &sdk.CheckerDefinition{ + ID: "blacklist", + Name: "Blacklist & reputation", + Version: Version, + + Availability: sdk.CheckerAvailability{ + ApplyToDomain: true, + }, + ObservationKeys: []sdk.ObservationKey{ObservationKeyBlacklist}, + + Options: opts, + Rules: Rules(), + + Interval: &sdk.CheckIntervalSpec{ + Min: 30 * time.Minute, + Max: 24 * time.Hour, + Default: 6 * time.Hour, + }, + + HasHTMLReport: true, + HasMetrics: true, + } +} diff --git a/checker/dnsbl.go b/checker/dnsbl.go new file mode 100644 index 0000000..e0d7dff --- /dev/null +++ b/checker/dnsbl.go @@ -0,0 +1,338 @@ +package checker + +import ( + "context" + "encoding/json" + "fmt" + "net" + "slices" + "strings" + "sync" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func init() { Register(&dnsblSource{}) } + +// dnsblSource fans out a configurable list of DNS-based blocklist +// queries. Unlike the other sources, it returns one SourceResult per +// zone (so Spamhaus / SURBL / URIBL each get their own row in the +// report) while remaining a single Source from the registry's point of +// view: one ID, one option group, one rule entry. +type dnsblSource struct{} + +func (*dnsblSource) ID() string { return "dnsbl" } +func (*dnsblSource) Name() string { return "DNS blocklists" } + +func (*dnsblSource) Options() SourceOptions { + return SourceOptions{ + Admin: []sdk.CheckerOptionField{ + { + Id: "disabled_dnsbls", + Type: "string", + Label: "Disabled DNSBL zones", + Description: "Comma-separated list of DNSBL zone suffixes to skip (e.g. \"multi.surbl.org\").", + }, + { + Id: "extra_dnsbls", + Type: "string", + Label: "Extra DNSBL zones", + Description: "Comma-separated list of extra DNSBL zone suffixes to query in addition to the defaults. Their return codes are surfaced verbatim.", + }, + }, + } +} + +// DNSBLZone is the table row describing one zone known to this source. +type DNSBLZone struct { + Zone string + Label string + LookupURL string + RemovalURL string + Decode func(ip net.IP) []string + IsBlockedIP func(ip net.IP) bool // returns true when the IP signals a blocked resolver, not a real listing +} + +// DefaultDNSBLZones is the curated list shipped with the checker. +// Sources for the return-code semantics: +// - DBL: https://www.spamhaus.org/dbl/ +// - SURBL: https://surbl.org/lists/ +// - URIBL: https://uribl.com/about.shtml +var DefaultDNSBLZones = []DNSBLZone{ + { + Zone: "dbl.spamhaus.org", + Label: "Spamhaus DBL", + LookupURL: "https://check.spamhaus.org/results/?query=%s", + RemovalURL: "https://www.spamhaus.org/dbl/removal/", + Decode: decodeSpamhausDBL, + IsBlockedIP: func(ip net.IP) bool { + s := ip.String() + return s == "127.0.1.255" || s == "127.255.255.254" + }, + }, + { + Zone: "multi.surbl.org", + Label: "SURBL multi", + LookupURL: "https://surbl.org/surbl-analysis?d=%s", + RemovalURL: "https://surbl.org/surbl-analysis?d=%s", + Decode: decodeSURBLMulti, + }, + { + Zone: "multi.uribl.com", + Label: "URIBL multi", + LookupURL: "https://admin.uribl.com/?section=lookup&query=%s", + RemovalURL: "https://admin.uribl.com/?section=remove", + Decode: decodeURIBLMulti, + IsBlockedIP: func(ip net.IP) bool { + v4 := ip.To4() + return v4 != nil && v4[3] == 1 + }, + }, +} + +func (s *dnsblSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + zones := zonesFromOptions(opts) + if registered == "" || len(zones) == 0 { + return []SourceResult{{ + SourceID: s.ID(), SourceName: s.Name(), Enabled: false, + }} + } + + out := make([]SourceResult, len(zones)) + var wg sync.WaitGroup + for i, z := range zones { + wg.Add(1) + go func(i int, z DNSBLZone) { + defer wg.Done() + out[i] = s.queryOne(ctx, registered, z) + }(i, z) + } + wg.Wait() + return out +} + +func (s *dnsblSource) queryOne(ctx context.Context, registered string, z DNSBLZone) SourceResult { + q := registered + "." + z.Zone + res := SourceResult{ + SourceID: s.ID(), + SourceName: z.Label, + Subject: z.Zone, + Enabled: true, + LookupURL: formatURL(z.LookupURL, registered), + RemovalURL: formatURL(z.RemovalURL, registered), + } + + addrs, err := net.DefaultResolver.LookupIP(ctx, "ip4", q) + if err != nil { + // NXDOMAIN is the standard "not listed" reply. + if dnsErr, ok := err.(*net.DNSError); ok && dnsErr.IsNotFound { + return res + } + res.Error = err.Error() + return res + } + + seen := map[string]bool{} + blockedCount := 0 + for _, a := range addrs { + ip := a.To4() + if ip == nil { + continue + } + code := ip.String() + if seen[code] { + continue + } + seen[code] = true + if z.IsBlockedIP != nil && z.IsBlockedIP(ip) { + blockedCount++ + } + res.Evidence = append(res.Evidence, Evidence{ + Label: "Return code", + Value: code, + }) + if z.Decode != nil { + res.Reasons = append(res.Reasons, z.Decode(ip)...) + } + } + if blockedCount > 0 && blockedCount == len(seen) { + // All returned IPs signal a blocked resolver, not a real domain listing. + res.BlockedQuery = true + return res + } + res.Listed = true + res.Severity = SeverityCrit + if len(res.Reasons) == 0 { + res.Reasons = append(res.Reasons, "Listed (no detail decoded)") + } + + // TXT lookup is best-effort: operators often embed a pointer URL + // with the precise reason. + if txt, terr := net.DefaultResolver.LookupTXT(ctx, q); terr == nil { + res.Details = mustJSON(map[string]any{"txt": txt, "queried": q}) + } else { + res.Details = mustJSON(map[string]any{"queried": q}) + } + return res +} + +func (*dnsblSource) Diagnose(res SourceResult) Diagnosis { + return Diagnosis{ + Severity: SeverityCrit, + Title: fmt.Sprintf("Listed on %s", res.SourceName), + Detail: fmt.Sprintf( + "Reason(s): %s. Senders relaying mail through this domain (or recipients receiving links to it) will see deliveries rejected. Confirm with the lookup link, then follow the operator's removal procedure: automated requests usually take 24 to 72h to propagate.", + joinNonEmpty(res.Reasons, "; "), + ), + LookupURL: res.LookupURL, + RemovalURL: res.RemovalURL, + Fix: res.RemovalURL, + FixIsURL: res.RemovalURL != "", + } +} + +// ---------- helpers (shared with other sources) ---------- + +func formatURL(tmpl, domain string) string { + if tmpl == "" { + return "" + } + if !strings.Contains(tmpl, "%s") { + return tmpl + } + return fmt.Sprintf(tmpl, domain) +} + +func zonesFromOptions(opts sdk.CheckerOptions) []DNSBLZone { + zones := DefaultDNSBLZones + + if disabledRaw, ok := sdk.GetOption[string](opts, "disabled_dnsbls"); ok && disabledRaw != "" { + disabled := splitList(disabledRaw) + filtered := zones[:0:0] + for _, z := range zones { + if slices.Contains(disabled, strings.ToLower(z.Zone)) { + continue + } + filtered = append(filtered, z) + } + zones = filtered + } + + if extraRaw, ok := sdk.GetOption[string](opts, "extra_dnsbls"); ok && extraRaw != "" { + for _, e := range splitList(extraRaw) { + zones = append(zones, DNSBLZone{Zone: e, Label: e}) + } + } + return zones +} + +func splitList(s string) []string { + var out []string + for _, part := range strings.FieldsFunc(s, func(r rune) bool { + return r == ',' || r == '\n' || r == '\r' || r == ' ' || r == '\t' || r == ';' + }) { + if p := strings.TrimSpace(part); p != "" { + out = append(out, strings.ToLower(p)) + } + } + return out +} + +func joinNonEmpty(parts []string, sep string) string { + if len(parts) == 0 { + return "listed" + } + return strings.Join(parts, sep) +} + +func mustJSON(v any) []byte { + b, err := json.Marshal(v) + if err != nil { + panic("checker: mustJSON: " + err.Error()) + } + return b +} + +// ---------- return-code decoders ---------- + +func decodeSpamhausDBL(ip net.IP) []string { + switch ip.String() { + case "127.0.1.2": + return []string{"Spam domain"} + case "127.0.1.4": + return []string{"Phishing domain"} + case "127.0.1.5": + return []string{"Malware domain"} + case "127.0.1.6": + return []string{"Botnet C&C domain"} + case "127.0.1.102": + return []string{"Abused legit spam"} + case "127.0.1.103": + return []string{"Abused legit spammed redirector"} + case "127.0.1.104": + return []string{"Abused legit phish"} + case "127.0.1.105": + return []string{"Abused legit malware"} + case "127.0.1.106": + return []string{"Abused legit botnet C&C"} + case "127.0.1.255", "127.255.255.254": + return []string{"DBL refused the query (resolver blocked, not a domain listing)"} + } + return []string{"Listed (code " + ip.String() + ")"} +} + +func decodeSURBLMulti(ip net.IP) []string { + v4 := ip.To4() + if v4 == nil || v4[0] != 127 { + return []string{"Listed (" + ip.String() + ")"} + } + bits := v4[3] + var out []string + if bits&2 != 0 { + out = append(out, "Listed in SURBL abuse (sa-blacklist)") + } + if bits&4 != 0 { + out = append(out, "Phishing") + } + if bits&8 != 0 { + out = append(out, "Malware") + } + if bits&16 != 0 { + out = append(out, "Cracked / compromised site") + } + if bits&32 != 0 { + out = append(out, "Abuse (general)") + } + if bits&64 != 0 { + out = append(out, "Abused redirector") + } + if len(out) == 0 { + out = append(out, "Listed (code "+ip.String()+")") + } + return out +} + +func decodeURIBLMulti(ip net.IP) []string { + v4 := ip.To4() + if v4 == nil || v4[0] != 127 { + return []string{"Listed (" + ip.String() + ")"} + } + bits := v4[3] + if bits == 1 { + return []string{"URIBL: query blocked (resolver on free-use blocklist)"} + } + var out []string + if bits&2 != 0 { + out = append(out, "URIBL black (active spam source)") + } + if bits&4 != 0 { + out = append(out, "URIBL grey (suspicious)") + } + if bits&8 != 0 { + out = append(out, "URIBL red (newly observed)") + } + if len(out) == 0 { + out = append(out, "Listed (code "+ip.String()+")") + } + return out +} diff --git a/checker/dnsbl_test.go b/checker/dnsbl_test.go new file mode 100644 index 0000000..1c81ef8 --- /dev/null +++ b/checker/dnsbl_test.go @@ -0,0 +1,74 @@ +package checker + +import ( + "net" + "reflect" + "slices" + "strings" + "testing" +) + +func TestDecodeSpamhausDBL(t *testing.T) { + cases := []struct { + ip string + contains string + }{ + {"127.0.1.2", "Spam"}, + {"127.0.1.4", "Phishing"}, + {"127.0.1.5", "Malware"}, + {"127.0.1.6", "Botnet"}, + {"127.0.1.255", "refused"}, + {"127.255.255.254", "refused"}, + {"127.0.1.99", "code"}, + } + for _, c := range cases { + got := decodeSpamhausDBL(net.ParseIP(c.ip)) + if len(got) != 1 || !strings.Contains(got[0], c.contains) { + t.Errorf("decodeSpamhausDBL(%s) = %v, want substring %q", c.ip, got, c.contains) + } + } +} + +func TestDecodeSURBLMulti(t *testing.T) { + got := decodeSURBLMulti(net.ParseIP("127.0.0.12")) // 4 + 8 + if len(got) != 2 || !strings.Contains(got[0], "Phishing") || !strings.Contains(got[1], "Malware") { + t.Errorf("decodeSURBLMulti = %v", got) + } +} + +func TestDecodeURIBLMulti(t *testing.T) { + got := decodeURIBLMulti(net.ParseIP("127.0.0.2")) + if len(got) != 1 || !strings.Contains(got[0], "black") { + t.Errorf("decodeURIBLMulti(black) = %v", got) + } + got = decodeURIBLMulti(net.ParseIP("127.0.0.1")) + if len(got) != 1 || !strings.Contains(got[0], "blocked") { + t.Errorf("decodeURIBLMulti(refused) = %v", got) + } +} + +func TestSplitList(t *testing.T) { + got := splitList("a, b\nc;d e") + want := []string{"a", "b", "c", "d", "e"} + if !reflect.DeepEqual(got, want) { + t.Errorf("splitList = %v, want %v", got, want) + } + if !slices.Contains(got, "c") { + t.Errorf("expected 'c' in %v", got) + } +} + +func TestNormalizeDomain(t *testing.T) { + if got := normalizeDomain(" Example.COM. "); got != "example.com" { + t.Errorf("normalizeDomain = %q", got) + } +} + +func TestFormatURL(t *testing.T) { + if got := formatURL("https://x/?q=%s", "abc"); got != "https://x/?q=abc" { + t.Errorf("formatURL = %q", got) + } + if got := formatURL("https://x/", "abc"); got != "https://x/" { + t.Errorf("formatURL noplaceholder = %q", got) + } +} diff --git a/checker/httpclient.go b/checker/httpclient.go new file mode 100644 index 0000000..11dca02 --- /dev/null +++ b/checker/httpclient.go @@ -0,0 +1,32 @@ +package checker + +import ( + "fmt" + "io" + "net/http" + "time" +) + +// sharedHTTPClient is reused across sources so connection pooling and +// keep-alives kick in. Per-call deadlines are expressed via +// context.WithTimeout on the request context, not on the client. +var sharedHTTPClient = &http.Client{Timeout: 60 * time.Second} + +// httpDo executes req on the shared client, reads up to maxBytes from +// the response body, and returns the body, the HTTP status code and any +// error. Status-code semantics differ per API (404 means "unknown" on +// VirusTotal, body-level fields drive URLhaus, …) so the caller decides +// how to interpret status; this helper only handles the boilerplate +// common to every JSON-ish source. +func httpDo(req *http.Request, maxBytes int64) (body []byte, status int, err error) { + resp, err := sharedHTTPClient.Do(req) + if err != nil { + return nil, 0, err + } + defer resp.Body.Close() + body, err = io.ReadAll(io.LimitReader(resp.Body, maxBytes)) + if err != nil { + return nil, resp.StatusCode, fmt.Errorf("read body: %w", err) + } + return body, resp.StatusCode, nil +} diff --git a/checker/interactive.go b/checker/interactive.go new file mode 100644 index 0000000..12872cf --- /dev/null +++ b/checker/interactive.go @@ -0,0 +1,89 @@ +//go:build standalone + +package checker + +import ( + "errors" + "net/http" + "strconv" + "strings" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// RenderForm builds the standalone form by aggregating each registered +// source's option fields. The "domain" field is hard-coded since it +// applies to every source; everything else is contributed by sources. +func (p *blacklistProvider) RenderForm() []sdk.CheckerOptionField { + fields := []sdk.CheckerOptionField{ + { + Id: "domain", + Type: "string", + Label: "Domain", + Placeholder: "example.com", + Description: "Domain to test against the configured reputation sources.", + Required: true, + }, + } + for _, s := range Sources() { + o := s.Options() + fields = append(fields, o.Admin...) + fields = append(fields, o.User...) + } + return fields +} + +// ParseForm walks every option field declared by the sources and reads +// it from the form. The generic loop means a new source's fields +// appear in /check automatically. +func (p *blacklistProvider) ParseForm(r *http.Request) (sdk.CheckerOptions, error) { + domain := strings.TrimSpace(r.FormValue("domain")) + if domain == "" { + return nil, errors.New("a domain is required") + } + opts := sdk.CheckerOptions{ + "domain": domain, + "domain_name": domain, + } + for _, s := range Sources() { + o := s.Options() + for _, f := range append(append([]sdk.CheckerOptionField{}, o.Admin...), o.User...) { + raw := strings.TrimSpace(r.FormValue(f.Id)) + if raw == "" { + if f.Type == "bool" { + opts[f.Id] = boolDefault(f.Default) + } + continue + } + switch f.Type { + case "bool": + opts[f.Id] = parseFormBool(raw, true) + case "number", "uint": + if n, err := strconv.ParseFloat(raw, 64); err == nil { + opts[f.Id] = n + } + default: + opts[f.Id] = raw + } + } + } + return opts, nil +} + +func parseFormBool(s string, defaultVal bool) bool { + switch strings.ToLower(strings.TrimSpace(s)) { + case "": + return defaultVal + case "true", "on", "1", "yes": + return true + default: + return false + } +} + +func boolDefault(v any) bool { + if b, ok := v.(bool); ok { + return b + } + return false +} diff --git a/checker/openphish.go b/checker/openphish.go new file mode 100644 index 0000000..065652a --- /dev/null +++ b/checker/openphish.go @@ -0,0 +1,204 @@ +package checker + +import ( + "bufio" + "context" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "sync" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +const openPhishFeedURL = "https://openphish.com/feed.txt" + +func init() { + Register(&openPhishSource{ + cache: newPhishCache(openPhishFeedURL, 1*time.Hour), + }) +} + +// openPhishSource downloads the public OpenPhish feed once per cache +// TTL and matches the registered domain (and all subdomains) against +// every URL in the feed. The cache is per-source-instance so it lives +// for as long as the process. +type openPhishSource struct { + cache *phishCache +} + +func (*openPhishSource) ID() string { return "openphish" } +func (*openPhishSource) Name() string { return "OpenPhish feed" } + +func (*openPhishSource) Options() SourceOptions { + return SourceOptions{ + User: []sdk.CheckerOptionField{ + { + Id: "enable_openphish", + Type: "bool", + Label: "Use the OpenPhish public feed", + Description: "Download the OpenPhish public feed (refreshed every 12h) and check the domain against it.", + Default: true, + }, + }, + } +} + +func (s *openPhishSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + if !sdk.GetBoolOption(opts, "enable_openphish", true) || registered == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: false}} + } + + urls, size, fetched, err := s.cache.lookup(ctx, registered) + res := SourceResult{ + SourceID: s.ID(), SourceName: s.Name(), Enabled: true, + Reference: "https://openphish.com/", + Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}), + } + if err != nil { + res.Error = err.Error() + // Fall through with whatever the cache could provide. + } + if len(urls) > 0 { + res.Listed = true + res.Severity = SeverityCrit + res.Reasons = []string{"Phishing"} + for _, u := range urls { + res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u}) + } + } + return []SourceResult{res} +} + +func (*openPhishSource) Diagnose(res SourceResult) Diagnosis { + urls := make([]string, 0, len(res.Evidence)) + for _, e := range res.Evidence { + urls = append(urls, e.Value) + } + previewN := min(len(urls), 5) + return Diagnosis{ + Severity: SeverityCrit, + Title: "Listed in the OpenPhish phishing feed", + Detail: fmt.Sprintf( + "%d URL(s) hosted on this domain are tracked as phishing by OpenPhish. Treat the host as compromised: rotate credentials, audit recently-added files (look for /wp-includes/, /uploads/, lookalike admin paths), then request review at OpenPhish. Examples: %s", + len(urls), joinNonEmpty(urls[:previewN], ", "), + ), + Fix: "https://openphish.com/feedback.html", + FixIsURL: true, + } +} + +// ---------- feed cache ---------- + +type phishCache struct { + mu sync.Mutex + urls []string + byHost map[string][]string + fetchedAt time.Time + lastAttemptAt time.Time + refreshing bool + ttl time.Duration + failBackoff time.Duration + feedURL string +} + +func newPhishCache(feedURL string, ttl time.Duration) *phishCache { + if feedURL == "" { + feedURL = openPhishFeedURL + } + if ttl <= 0 { + ttl = 1 * time.Hour + } + return &phishCache{ttl: ttl, feedURL: feedURL, failBackoff: 1 * time.Minute} +} + +func (c *phishCache) lookup(ctx context.Context, domain string) (urls []string, size int, fetchedAt time.Time, err error) { + domain = strings.ToLower(strings.TrimSuffix(domain, ".")) + + c.mu.Lock() + stale := c.byHost == nil || time.Since(c.fetchedAt) > c.ttl + doRefresh := stale && !c.refreshing && time.Since(c.lastAttemptAt) > c.failBackoff + if doRefresh { + c.refreshing = true + } + c.mu.Unlock() + + if doRefresh { + // Fetch without holding the cache lock so concurrent lookups + // can still serve stale data. Only one refresh runs at a time. + newURLs, newByHost, ferr := c.fetch(ctx) + c.mu.Lock() + c.refreshing = false + c.lastAttemptAt = time.Now() + if ferr == nil { + c.urls = newURLs + c.byHost = newByHost + c.fetchedAt = c.lastAttemptAt + } else { + err = ferr + } + c.mu.Unlock() + } + + c.mu.Lock() + for host, hostURLs := range c.byHost { + if host == domain || strings.HasSuffix(host, "."+domain) { + urls = append(urls, hostURLs...) + } + } + size = len(c.urls) + fetchedAt = c.fetchedAt + c.mu.Unlock() + return urls, size, fetchedAt, err +} + +func (c *phishCache) fetch(ctx context.Context) ([]string, map[string][]string, error) { + reqCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, c.feedURL, nil) + if err != nil { + return nil, nil, err + } + req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0") + + resp, err := sharedHTTPClient.Do(req) + if err != nil { + return nil, nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, nil, fmt.Errorf("openphish HTTP %d", resp.StatusCode) + } + + urls := make([]string, 0, 8192) + byHost := make(map[string][]string, 8192) + scanner := bufio.NewScanner(io.LimitReader(resp.Body, 64<<20)) + scanner.Buffer(make([]byte, 0, 64*1024), 1<<20) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + urls = append(urls, line) + if h := hostOfURL(line); h != "" { + byHost[h] = append(byHost[h], line) + } + } + if err := scanner.Err(); err != nil { + return nil, nil, err + } + return urls, byHost, nil +} + +func hostOfURL(s string) string { + u, err := url.Parse(s) + if err != nil { + return "" + } + return strings.ToLower(u.Hostname()) +} diff --git a/checker/provider.go b/checker/provider.go new file mode 100644 index 0000000..fd4e0ea --- /dev/null +++ b/checker/provider.go @@ -0,0 +1,52 @@ +package checker + +import ( + "encoding/json" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func Provider() sdk.ObservationProvider { return &blacklistProvider{} } + +type blacklistProvider struct{} + +func (p *blacklistProvider) Key() sdk.ObservationKey { return ObservationKeyBlacklist } +func (p *blacklistProvider) Definition() *sdk.CheckerDefinition { return Definition() } + +// ExtractMetrics turns the (now uniform) per-source results into a +// small set of generic gauges. Source-specific metrics (VT engine +// counts, URLhaus URL count, …) live in SourceResult.Details and are +// rendered in the HTML report; metrics here stay coarse so the +// scheduler / Prometheus side does not have to know which sources are +// installed. +func (p *blacklistProvider) ExtractMetrics(ctx sdk.ReportContext, collectedAt time.Time) ([]sdk.CheckMetric, error) { + var data BlacklistData + if err := json.Unmarshal(ctx.Data(), &data); err != nil { + return nil, err + } + + metrics := []sdk.CheckMetric{ + { + Name: "blacklist_total_hits", Value: float64(data.TotalHits()), + Unit: "results", Timestamp: collectedAt, + }, + } + for _, r := range data.Results { + if !r.Enabled { + continue + } + v := 0.0 + if r.Listed { + v = 1 + } + metrics = append(metrics, sdk.CheckMetric{ + Name: "blacklist_source_listed", + Value: v, + Unit: "bool", + Labels: map[string]string{"source": r.SourceID, "subject": r.Subject}, + Timestamp: collectedAt, + }) + } + return metrics, nil +} diff --git a/checker/report.go b/checker/report.go new file mode 100644 index 0000000..5649812 --- /dev/null +++ b/checker/report.go @@ -0,0 +1,341 @@ +package checker + +import ( + "bytes" + "encoding/json" + "fmt" + "html/template" + "sort" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// GetHTMLReport renders a generic, source-agnostic HTML report. The +// per-source rich detail (VT vendor table, URLhaus URL list, …) is +// rendered by sources implementing DetailRenderer; everything else +// (headline, action-required cards, summary table) walks the uniform +// SourceResult envelope without any source-specific switch. +func (p *blacklistProvider) GetHTMLReport(ctx sdk.ReportContext) (string, error) { + var data BlacklistData + if err := json.Unmarshal(ctx.Data(), &data); err != nil { + return "", fmt.Errorf("decode blacklist data: %w", err) + } + + view := reportView{ + Domain: data.Domain, + RegisteredDomain: data.RegisteredDomain, + CollectedAt: data.CollectedAt.Format("2006-01-02 15:04 MST"), + TotalHits: data.TotalHits(), + Diagnoses: diagnose(&data), + Sections: buildSections(&data), + CSS: template.CSS(reportCSS), + } + view.Headline, view.HeadlineClass = headline(view.TotalHits) + + var b bytes.Buffer + if err := reportTemplate.Execute(&b, view); err != nil { + return "", fmt.Errorf("render blacklist report: %w", err) + } + return b.String(), nil +} + +type reportView struct { + Domain string + RegisteredDomain string + CollectedAt string + TotalHits int + Headline string + HeadlineClass string + Diagnoses []Diagnosis + Sections []sourceSection + CSS template.CSS +} + +// sourceSection is one rendered card per Source (not per result): a +// multi-result source like DNSBL is collapsed to a single section that +// lists each subject as a row. Rich sources contribute extra HTML via +// their RenderDetail implementation; plain sources fall back to the +// generic Reasons/Evidence rendering. +type sourceSection struct { + SourceID string + SourceName string + StatusLabel string + StatusClass string + Subjects []subjectRow + RichHTML template.HTML + Reference string +} + +type subjectRow struct { + Subject string + StatusLabel string + StatusClass string + Reasons []string + Evidence []Evidence + LookupURL string + RemovalURL string + Reference string + Error string + Disabled bool +} + +func headline(hits int) (string, string) { + switch hits { + case 0: + return "Domain is clean across all configured reputation sources.", SeverityOK + case 1: + return "Domain is currently listed on 1 source. Act now: a single listing already breaks email delivery and browser access.", SeverityCrit + default: + return fmt.Sprintf("Domain is currently listed on %d sources. This is severe: most mail and browsers will block access.", hits), SeverityCrit + } +} + +// diagnose builds the action-required cards by delegating to each +// listed result's source. The generic code only orders cards by +// severity; the wording and remediation are owned by the source. +func diagnose(d *BlacklistData) []Diagnosis { + byID := make(map[string]Source, len(Sources())) + for _, s := range Sources() { + byID[s.ID()] = s + } + + var out []Diagnosis + for _, r := range d.Results { + if !r.Listed { + continue + } + if s, ok := byID[r.SourceID]; ok { + out = append(out, s.Diagnose(r)) + } + } + // Errors are surfaced as warnings so a flaky source is visible + // without dominating the page. + for _, r := range d.Results { + if r.Error == "" { + continue + } + title := "Could not query " + r.SourceName + if r.Subject != "" && r.Subject != r.SourceName { + title = fmt.Sprintf("Could not query %s (%s)", r.SourceName, r.Subject) + } + out = append(out, Diagnosis{ + Severity: SeverityWarn, + Title: title, + Detail: r.Error + ": the listing status of this source is unknown for this run.", + }) + } + + sort.SliceStable(out, func(i, j int) bool { return sevRank(out[i].Severity) < sevRank(out[j].Severity) }) + return out +} + +func sevRank(s string) int { + switch s { + case SeverityCrit: + return 0 + case SeverityWarn: + return 1 + case SeverityInfo: + return 2 + } + return 3 +} + +// buildSections groups results by source, collapses multi-result +// sources into a single card, and asks each source for its rich detail +// HTML when applicable. +func buildSections(d *BlacklistData) []sourceSection { + byID := make(map[string]Source, len(Sources())) + order := make([]string, 0, len(Sources())) + for _, s := range Sources() { + byID[s.ID()] = s + order = append(order, s.ID()) + } + + grouped := make(map[string][]SourceResult) + for _, r := range d.Results { + grouped[r.SourceID] = append(grouped[r.SourceID], r) + } + + out := make([]sourceSection, 0, len(grouped)) + for _, id := range order { + results := grouped[id] + if len(results) == 0 { + continue + } + section := sourceSection{ + SourceID: id, + SourceName: byID[id].Name(), + } + section.StatusLabel, section.StatusClass = sectionStatus(results) + for _, r := range results { + if r.Reference != "" && section.Reference == "" { + section.Reference = r.Reference + } + section.Subjects = append(section.Subjects, subjectRow{ + Subject: subjectLabel(byID[id].Name(), r), + StatusLabel: subjectStatusLabel(r), + StatusClass: subjectStatusClass(r), + Reasons: r.Reasons, + Evidence: r.Evidence, + LookupURL: r.LookupURL, + RemovalURL: r.RemovalURL, + Reference: r.Reference, + Error: r.Error, + Disabled: !r.Enabled, + }) + } + // Rich detail: use the first listed result's payload (single- + // subject sources have at most one). Plain sources skip this. + if dr, ok := byID[id].(DetailRenderer); ok { + for _, r := range results { + if !r.Listed && len(r.Details) == 0 { + continue + } + html, err := dr.RenderDetail(r) + if err == nil && html != "" { + section.RichHTML = html + break + } + } + } + out = append(out, section) + } + return out +} + +func sectionStatus(results []SourceResult) (string, string) { + listed, errs, enabled := 0, 0, 0 + for _, r := range results { + if r.Enabled { + enabled++ + } + if r.Listed { + listed++ + } else if r.Error != "" { + errs++ + } + } + switch { + case enabled == 0: + return "Disabled", "muted" + case listed > 0: + return fmt.Sprintf("LISTED (%d)", listed), "crit" + case errs > 0: + return "Errors", "warn" + } + return "Clean", "ok" +} + +func subjectLabel(srcName string, r SourceResult) string { + if r.Subject != "" && r.Subject != srcName { + return r.Subject + } + return srcName +} + +func subjectStatusLabel(r SourceResult) string { + switch { + case !r.Enabled: + return "Disabled" + case r.Listed: + return "LISTED" + case r.Error != "": + return "Error" + } + return "Clean" +} + +func subjectStatusClass(r SourceResult) string { + switch { + case !r.Enabled: + return "muted" + case r.Listed: + return r.Severity + case r.Error != "": + return "warn" + } + return "ok" +} + +var reportTemplate = template.Must(template.New("blacklist").Parse(` + + + +Blacklist report — {{.Domain}} + + +
+

Blacklist & reputation

+

{{.Domain}}{{if and .RegisteredDomain (ne .RegisteredDomain .Domain)}} (queried as {{.RegisteredDomain}}){{end}} · collected {{.CollectedAt}}

+ +
{{.Headline}}
+ +{{with .Diagnoses}}
+

Action required

+{{range .}} +{{end}}
+{{end}} + +{{range .Sections}}
+

{{.SourceName}} {{.StatusLabel}}

+{{if .RichHTML}}{{.RichHTML}}{{end}} +{{if .Subjects}} + + +{{range .Subjects}} + + + + +{{end}} +
SubjectStatusDetail
{{.Subject}}{{.StatusLabel}} +{{if .Disabled}}disabled +{{else if .Error}}{{.Error}} +{{else}} +{{range .Reasons}}
{{.}}
{{end}} +{{if .Evidence}}
{{len .Evidence}} evidence item(s)
    {{range .Evidence}}
  • {{.Value}}{{with .Status}} ({{.}}){{end}}
  • {{end}}
{{end}} +{{if .LookupURL}}
Lookup{{if .RemovalURL}} · Request removal{{end}}
{{end}} +{{end}} +
{{end}} +
+{{end}} + +
`)) + +const reportCSS = `body{font-family:system-ui,sans-serif;margin:0;background:#fafbfc;color:#1b1f23;} +main{max-width:980px;margin:0 auto;padding:1.5rem;} +h1{margin:0 0 .25rem 0;} +.meta{color:#586069;margin:0 0 1rem 0;} +section{margin-bottom:2rem;} +h2{border-bottom:1px solid #e1e4e8;padding-bottom:.25rem;} +.badge{font-size:.7rem;padding:.1rem .4rem;border-radius:3px;vertical-align:middle;background:#eee;color:#1b1f23;font-weight:600;} +.badge.status-crit{background:#ffeef0;color:#d73a49;} +.badge.status-warn{background:#fff5d4;color:#b08800;} +.badge.status-ok{background:#dcffe4;color:#22863a;} +.badge.status-muted{background:#eee;color:#586069;} +.headline{padding:.75rem 1rem;border-radius:4px;margin-bottom:1.5rem;} +.headline.status-ok{background:#dcffe4;border-left:4px solid #22863a;} +.headline.status-crit{background:#ffeef0;border-left:4px solid #d73a49;} +.finding{border-left:4px solid;padding:.75rem 1rem;margin:.75rem 0;background:#fff;border-radius:4px;} +.finding h3{margin:0 0 .25rem 0;font-size:1rem;} +.finding.sev-crit{border-color:#d73a49;} +.finding.sev-warn{border-color:#dbab09;} +.finding.sev-info{border-color:#0366d6;} +.fix{background:#1b1f23;color:#fafbfc;padding:.5rem .75rem;border-radius:4px;overflow-x:auto;font-size:.85rem;} +table{width:100%;border-collapse:collapse;background:#fff;} +th,td{padding:.5rem .75rem;border-bottom:1px solid #e1e4e8;text-align:left;vertical-align:top;} +tr.row-crit td:nth-child(2){color:#d73a49;font-weight:600;} +tr.row-warn td:nth-child(2){color:#b08800;font-weight:600;} +tr.row-ok td:nth-child(2){color:#22863a;font-weight:600;} +tr.row-muted td:nth-child(2){color:#586069;} +.ok{color:#22863a;} +.warn{color:#b08800;} +.muted{color:#586069;} +code{font-size:.85rem;} +small{color:#586069;} +details{margin:.25rem 0;}` diff --git a/checker/report_test.go b/checker/report_test.go new file mode 100644 index 0000000..1ff5fad --- /dev/null +++ b/checker/report_test.go @@ -0,0 +1,81 @@ +package checker + +import ( + "strings" + "testing" + "time" +) + +func TestDiagnoseAndReportRender(t *testing.T) { + d := &BlacklistData{ + Domain: "example.com", + RegisteredDomain: "example.com", + CollectedAt: time.Now(), + Results: []SourceResult{ + { + SourceID: "dnsbl", SourceName: "Spamhaus DBL", + Subject: "dbl.spamhaus.org", + Enabled: true, Listed: true, Severity: SeverityCrit, + Reasons: []string{"Phishing domain"}, + LookupURL: "https://check.spamhaus.org/results/?query=example.com", + RemovalURL: "https://www.spamhaus.org/dbl/removal/", + }, + { + SourceID: "dnsbl", SourceName: "URIBL multi", + Subject: "multi.uribl.com", + Enabled: true, Error: "i/o timeout", + }, + { + SourceID: "openphish", SourceName: "OpenPhish feed", + Enabled: true, Listed: true, Severity: SeverityCrit, + Evidence: []Evidence{{Label: "URL", Value: "http://example.com/login"}}, + }, + }, + } + + diags := diagnose(d) + if len(diags) < 2 { + t.Fatalf("expected at least 2 diagnoses, got %d", len(diags)) + } + if diags[0].Severity != SeverityCrit { + t.Errorf("first diagnosis severity = %q, want crit", diags[0].Severity) + } + + p := &blacklistProvider{} + html, err := p.GetHTMLReport(staticCtx{data: jsonOf(t, d)}) + if err != nil { + t.Fatalf("GetHTMLReport: %v", err) + } + for _, want := range []string{"Spamhaus DBL", "Action required", "OpenPhish"} { + if !strings.Contains(html, want) { + t.Errorf("report missing %q", want) + } + } +} + +func TestHeadline(t *testing.T) { + if h, c := headline(0); c != SeverityOK || !strings.Contains(h, "clean") { + t.Errorf("headline(0) = %q/%q", h, c) + } + if h, c := headline(1); c != SeverityCrit || !strings.Contains(h, "1") { + t.Errorf("headline(1) = %q/%q", h, c) + } + if h, c := headline(3); c != SeverityCrit || !strings.Contains(h, "3") { + t.Errorf("headline(3) = %q/%q", h, c) + } +} + +func TestSectionStatus(t *testing.T) { + if l, c := sectionStatus([]SourceResult{{Enabled: true, Listed: true, Severity: SeverityCrit}}); c != "crit" || !strings.HasPrefix(l, "LISTED") { + t.Errorf("sectionStatus listed = %q/%q", l, c) + } + if l, c := sectionStatus([]SourceResult{{Enabled: true}}); c != "ok" || l != "Clean" { + t.Errorf("sectionStatus clean = %q/%q", l, c) + } + if l, c := sectionStatus([]SourceResult{{Enabled: false}}); c != "muted" || l != "Disabled" { + t.Errorf("sectionStatus disabled = %q/%q", l, c) + } + if l, c := sectionStatus([]SourceResult{{Enabled: true, Error: "boom"}}); c != "warn" || l != "Errors" { + t.Errorf("sectionStatus error = %q/%q", l, c) + } +} diff --git a/checker/rule.go b/checker/rule.go new file mode 100644 index 0000000..d389026 --- /dev/null +++ b/checker/rule.go @@ -0,0 +1,110 @@ +package checker + +import ( + "context" + "fmt" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Rules returns the rule set surfaced to happyDomain. After the +// registry refactor we expose a single, generic rule that emits one +// CheckState per source result: the per-source verdict lives in +// CheckState.Subject (the source name) and CheckState.Code carries the +// canonical hit / clean / disabled / error flavour. +func Rules() []sdk.CheckRule { + return []sdk.CheckRule{&sourceRule{}} +} + +type sourceRule struct{} + +func (*sourceRule) Name() string { return "source_listed" } +func (*sourceRule) Description() string { + return "Emits one state per reputation source: Critical/Warning when the source flags the domain, OK when clean, Info when the source is disabled, and Warning on transient query errors." +} + +func (*sourceRule) Evaluate(ctx context.Context, obs sdk.ObservationGetter, opts sdk.CheckerOptions) []sdk.CheckState { + var data BlacklistData + if err := obs.Get(ctx, ObservationKeyBlacklist, &data); err != nil { + return []sdk.CheckState{{ + Status: sdk.StatusError, + Message: fmt.Sprintf("failed to get observation: %v", err), + Code: "blacklist_obs_error", + }} + } + + if len(data.Results) == 0 { + return []sdk.CheckState{{ + Status: sdk.StatusInfo, Message: "No reputation sources registered.", + Code: "blacklist_no_sources", + }} + } + + out := make([]sdk.CheckState, 0, len(data.Results)) + for _, r := range data.Results { + out = append(out, evaluateOne(r)) + } + return out +} + +func evaluateOne(r SourceResult) sdk.CheckState { + subj := r.SourceName + if r.Subject != "" && r.Subject != r.SourceName { + subj = r.SourceName + " / " + r.Subject + } + switch { + case !r.Enabled: + return sdk.CheckState{ + Status: sdk.StatusUnknown, Subject: subj, + Message: subj + ": disabled or not configured.", + Code: "source_disabled", + } + case r.BlockedQuery: + return sdk.CheckState{ + Status: sdk.StatusError, + Subject: subj, + Message: fmt.Sprintf("%s: resolver is blocked, result unreliable: %s", subj, joinNonEmpty(r.Reasons, "; ")), + Code: "source_resolver_blocked", + } + case r.Error != "": + return sdk.CheckState{ + Status: sdk.StatusWarn, Subject: subj, + Message: subj + ": query failed: " + r.Error, + Code: "source_error", + } + case r.Listed: + return sdk.CheckState{ + Status: severityToStatus(r.Severity), + Subject: subj, + Message: fmt.Sprintf("Listed in %s: %s", subj, joinNonEmpty(r.Reasons, "; ")), + Code: "source_listed", + Meta: map[string]any{ + "source_id": r.SourceID, + "reasons": r.Reasons, + "lookup_url": r.LookupURL, + "removal_url": r.RemovalURL, + "reference": r.Reference, + }, + } + default: + return sdk.CheckState{ + Status: sdk.StatusOK, Subject: subj, + Message: subj + ": clean.", + Code: "source_clean", + } + } +} + +func severityToStatus(sev string) sdk.Status { + switch sev { + case SeverityCrit: + return sdk.StatusCrit + case SeverityWarn: + return sdk.StatusWarn + case SeverityInfo: + return sdk.StatusInfo + case SeverityOK: + return sdk.StatusOK + } + return sdk.StatusCrit +} diff --git a/checker/safebrowsing.go b/checker/safebrowsing.go new file mode 100644 index 0000000..a9a9c03 --- /dev/null +++ b/checker/safebrowsing.go @@ -0,0 +1,187 @@ +package checker + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "strings" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func init() { Register(&safeBrowsingSource{endpoint: safeBrowsingEndpoint}) } + +const safeBrowsingEndpoint = "https://safebrowsing.googleapis.com/v4/threatMatches:find?key=%s" + +// safeBrowsingSource calls Google Safe Browsing v4. The endpoint is +// kept on the struct so tests can swap it for httptest. +type safeBrowsingSource struct { + endpoint string +} + +func (*safeBrowsingSource) ID() string { return "google_safe_browsing" } +func (*safeBrowsingSource) Name() string { return "Google Safe Browsing" } + +func (*safeBrowsingSource) Options() SourceOptions { + return SourceOptions{ + Admin: []sdk.CheckerOptionField{ + { + Id: "google_safe_browsing_api_key", + Type: "string", + Label: "Google Safe Browsing API key", + Description: "Google Cloud API key with the Safe Browsing API enabled. Leave empty to skip Safe Browsing lookups.", + Secret: true, + }, + { + Id: "google_safe_browsing_client_id", + Type: "string", + Label: "Safe Browsing client ID", + Default: "happydomain", + }, + { + Id: "google_safe_browsing_client_version", + Type: "string", + Label: "Safe Browsing client version", + Default: "1.0", + }, + }, + } +} + +func (s *safeBrowsingSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + apiKey := stringOpt(opts, "google_safe_browsing_api_key") + if apiKey == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: false}} + } + if registered == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: true}} + } + clientID := stringOptDefault(opts, "google_safe_browsing_client_id", "happydomain") + clientVersion := stringOptDefault(opts, "google_safe_browsing_client_version", "1.0") + + res := SourceResult{SourceID: s.ID(), SourceName: s.Name(), Enabled: true} + + body := map[string]any{ + "client": map[string]string{"clientId": clientID, "clientVersion": clientVersion}, + "threatInfo": map[string]any{ + "threatTypes": []string{ + "MALWARE", "SOCIAL_ENGINEERING", + "UNWANTED_SOFTWARE", "POTENTIALLY_HARMFUL_APPLICATION", + }, + "platformTypes": []string{"ANY_PLATFORM"}, + "threatEntryTypes": []string{"URL"}, + "threatEntries": []map[string]string{ + {"url": "http://" + registered + "/"}, + {"url": "https://" + registered + "/"}, + {"url": registered}, + }, + }, + } + buf, err := json.Marshal(body) + if err != nil { + res.Error = err.Error() + return []SourceResult{res} + } + + reqCtx, cancel := context.WithTimeout(ctx, 15*time.Second) + defer cancel() + + url := fmt.Sprintf(s.endpoint, apiKey) + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, url, bytes.NewReader(buf)) + if err != nil { + res.Error = redactSecret(err.Error(), apiKey) + return []SourceResult{res} + } + req.Header.Set("Content-Type", "application/json") + + raw, status, err := httpDo(req, 1<<20) + if err != nil { + res.Error = redactSecret(err.Error(), apiKey) + return []SourceResult{res} + } + if status != http.StatusOK { + res.Error = fmt.Sprintf("HTTP %d: %s", status, redactSecret(truncate(string(raw), 200), apiKey)) + return []SourceResult{res} + } + + var parsed struct { + Matches []struct { + ThreatType string `json:"threatType"` + PlatformType string `json:"platformType"` + Threat struct { + URL string `json:"url"` + } `json:"threat"` + } `json:"matches"` + } + if err := json.Unmarshal(raw, &parsed); err != nil { + res.Error = "decode: " + err.Error() + return []SourceResult{res} + } + + if len(parsed.Matches) == 0 { + return []SourceResult{res} + } + res.Listed = true + res.Severity = SeverityCrit + res.Reference = "https://transparencyreport.google.com/safe-browsing/search?url=" + registered + seenType := map[string]bool{} + for _, m := range parsed.Matches { + if !seenType[m.ThreatType] { + seenType[m.ThreatType] = true + res.Reasons = append(res.Reasons, m.ThreatType) + } + res.Evidence = append(res.Evidence, Evidence{ + Label: "URL", + Value: m.Threat.URL, + Status: m.ThreatType, + Extra: map[string]string{"platform": m.PlatformType}, + }) + } + return []SourceResult{res} +} + +func (*safeBrowsingSource) Diagnose(res SourceResult) Diagnosis { + return Diagnosis{ + Severity: SeverityCrit, + Title: "Flagged by Google Safe Browsing", + Detail: fmt.Sprintf( + "Threat type(s): %s. Visitors using Chrome, Firefox, Safari and most major browsers see a red interstitial when opening any URL on this domain. Investigate compromised pages, clean them, then request a review through Google Search Console: listings typically clear within 24h after a successful review.", + joinNonEmpty(res.Reasons, ", "), + ), + Fix: "https://search.google.com/search-console/security-issues", + FixIsURL: true, + } +} + +// redactSecret removes occurrences of secret from s. Used to scrub API +// keys out of transport errors before they reach the report payload: +// *url.Error renders the full request URL, which for Safe Browsing +// includes ?key=… as a query parameter. +func redactSecret(s, secret string) string { + if secret == "" { + return s + } + return strings.ReplaceAll(s, secret, "REDACTED") +} + +func truncate(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "…" +} + +func stringOpt(opts sdk.CheckerOptions, key string) string { + v, _ := sdk.GetOption[string](opts, key) + return v +} + +func stringOptDefault(opts sdk.CheckerOptions, key, def string) string { + if v := stringOpt(opts, key); v != "" { + return v + } + return def +} diff --git a/checker/source.go b/checker/source.go new file mode 100644 index 0000000..793d919 --- /dev/null +++ b/checker/source.go @@ -0,0 +1,146 @@ +package checker + +import ( + "context" + "encoding/json" + "html/template" + "sync" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// Severity strings shared between sources, rules, and the HTML report. +const ( + SeverityCrit = "crit" + SeverityWarn = "warn" + SeverityInfo = "info" + SeverityOK = "ok" +) + +// Source is the contract every reputation source implements. The +// registry collects one Source per backend (DNSBL family, Safe +// Browsing, URLhaus, VirusTotal, OpenPhish, …); Collect fans out over +// the registry concurrently and folds the per-source results into the +// observation payload. Adding a new source is a single file plus a +// `Register(...)` call in init(). +// +// A Source returns *one or more* SourceResult values. Most sources +// return exactly one (`{Listed, Reasons, …}`); the DNSBL family returns +// one result per zone. Returning many results from one Source keeps the +// definition tidy (one ID, one set of options, one rule entry) while +// still surfacing per-zone detail in the report. +type Source interface { + ID() string + Name() string + + // Options contributes the option fields the source needs. They are + // merged into the global CheckerDefinition at startup. + Options() SourceOptions + + // Query runs the source against `registered` (the eTLD+1 of the + // target domain) and returns one result per logical sub-target. The + // implementation should never return nil: when the source is + // disabled, return a single SourceResult with Enabled=false. + Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult + + // Diagnose produces the action-required card for a *listed* result. + // Implementations should focus on the operator's next step; the + // generic report wraps it with the title bar and severity styling. + // Called only when SourceResult.Listed is true. + Diagnose(res SourceResult) Diagnosis +} + +// DetailRenderer is an optional interface a Source can implement when +// the generic SourceResult shape (Reasons + Evidence + URLs) cannot +// fully express its output. Examples: VirusTotal's per-vendor verdict +// table, URLhaus' URL list with online/offline status. The returned +// HTML fragment is dropped into the source's section verbatim and is +// expected to be safe (use html/template or template.HTMLEscape). +type DetailRenderer interface { + Source + RenderDetail(res SourceResult) (template.HTML, error) +} + +// SourceOptions describes the option fields a source contributes to the +// CheckerDefinition. Audiences map directly to the SDK's +// CheckerOptionsDocumentation buckets. +type SourceOptions struct { + Admin []sdk.CheckerOptionField + User []sdk.CheckerOptionField +} + +// SourceResult is the unified envelope every source produces. Source- +// specific structured data lives in Details (json.RawMessage), so the +// generic code (rules, headline, base diagnosis card, summary table) +// can operate on the envelope without source-specific switches; the +// rich report sections fish Details back through DetailRenderer. +type SourceResult struct { + SourceID string `json:"source_id"` + SourceName string `json:"source_name"` + Subject string `json:"subject,omitempty"` // e.g. zone label for DNSBL + Enabled bool `json:"enabled"` + Listed bool `json:"listed"` + BlockedQuery bool `json:"blocked_query,omitempty"` // resolver blocked, not a real listing + Severity string `json:"severity,omitempty"` // when Listed + Reasons []string `json:"reasons,omitempty"` + Evidence []Evidence `json:"evidence,omitempty"` + LookupURL string `json:"lookup_url,omitempty"` + RemovalURL string `json:"removal_url,omitempty"` + Reference string `json:"reference,omitempty"` + Error string `json:"error,omitempty"` + Details json.RawMessage `json:"details,omitempty"` +} + +// Evidence is a single observation that supports a verdict. Keeping it +// loosely typed (Label/Value/Status + free-form Extra) covers DNSBL +// return codes, OpenPhish URLs, URLhaus URLs, VT engine verdicts, … +// without growing the schema for each source. +type Evidence struct { + Label string `json:"label"` + Value string `json:"value"` + Status string `json:"status,omitempty"` + Extra map[string]string `json:"extra,omitempty"` +} + +// Diagnosis is the action-required card surfaced at the top of the +// report. Sources build it in their Diagnose method. +type Diagnosis struct { + Severity string + Title string + Detail string + Fix string + FixIsURL bool + LookupURL string + RemovalURL string +} + +// ---------- registry ---------- + +var ( + registryMu sync.RWMutex + registry []Source +) + +// Register adds a Source to the global registry. Intended to be called +// from init(). Panics on duplicate IDs so misconfigurations fail loudly +// at startup rather than producing silently-overlapping rules/options. +func Register(s Source) { + registryMu.Lock() + defer registryMu.Unlock() + for _, existing := range registry { + if existing.ID() == s.ID() { + panic("checker-blacklist: duplicate source ID " + s.ID()) + } + } + registry = append(registry, s) +} + +// Sources returns a snapshot of the registered sources, in registration +// order. Callers must not mutate the slice. +func Sources() []Source { + registryMu.RLock() + defer registryMu.RUnlock() + out := make([]Source, len(registry)) + copy(out, registry) + return out +} diff --git a/checker/source_test.go b/checker/source_test.go new file mode 100644 index 0000000..2bd1cd6 --- /dev/null +++ b/checker/source_test.go @@ -0,0 +1,40 @@ +package checker + +import ( + "testing" +) + +// TestRegisteredSourcesAreSane is a smoke test that runs over every +// init()-registered source and verifies basic invariants. New sources +// added later are picked up automatically. +func TestRegisteredSourcesAreSane(t *testing.T) { + seen := map[string]bool{} + for _, s := range Sources() { + if s.ID() == "" || s.Name() == "" { + t.Errorf("source has empty ID or Name: %+v", s) + } + if seen[s.ID()] { + t.Errorf("duplicate source ID: %s", s.ID()) + } + seen[s.ID()] = true + + o := s.Options() + for _, f := range append(append([]any{}, toAny(o.Admin)...), toAny(o.User)...) { + _ = f + } + } + // At least the built-in sources are present. + for _, want := range []string{"dnsbl", "google_safe_browsing", "openphish", "urlhaus", "virustotal"} { + if !seen[want] { + t.Errorf("missing built-in source %q", want) + } + } +} + +func toAny[T any](in []T) []any { + out := make([]any, len(in)) + for i, v := range in { + out[i] = v + } + return out +} diff --git a/checker/testhelpers_test.go b/checker/testhelpers_test.go new file mode 100644 index 0000000..4f6edcd --- /dev/null +++ b/checker/testhelpers_test.go @@ -0,0 +1,26 @@ +package checker + +import ( + "encoding/json" + "testing" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +// staticCtx is a minimal sdk.ReportContext used by report tests. +type staticCtx struct { + data json.RawMessage +} + +func (s staticCtx) Data() json.RawMessage { return s.data } +func (staticCtx) Related(sdk.ObservationKey) []sdk.RelatedObservation { return nil } +func (staticCtx) States() []sdk.CheckState { return nil } + +func jsonOf(t *testing.T, v any) []byte { + t.Helper() + b, err := json.Marshal(v) + if err != nil { + t.Fatalf("marshal: %v", err) + } + return b +} diff --git a/checker/types.go b/checker/types.go new file mode 100644 index 0000000..4022687 --- /dev/null +++ b/checker/types.go @@ -0,0 +1,51 @@ +// Package checker implements the happyDomain blacklist checker. +// +// It tells you whether a domain is currently listed on widely-used +// reputation systems (DNS-based blocklists, Google Safe Browsing, +// OpenPhish, URLhaus, VirusTotal, …). Every source plugs into a small +// internal registry: adding a new one is a single file declaring a +// Source implementation and a Register call from init(). +package checker + +import "time" + +// ObservationKeyBlacklist is the unique observation key produced by this +// checker. Persisted in storage and referenced by the definition. +const ObservationKeyBlacklist = "blacklist" + +// BlacklistData is the snapshot Collect produces for one domain. The +// per-source structs that used to live here are gone; the report and +// rules walk Results directly and the source-specific extras are kept +// in SourceResult.Details (json.RawMessage) when needed. +type BlacklistData struct { + Domain string `json:"domain"` + RegisteredDomain string `json:"registered_domain,omitempty"` + CollectedAt time.Time `json:"collected_at"` + Results []SourceResult `json:"results"` +} + +// TotalHits returns the number of distinct *sources* (not subjects) +// where the domain is currently flagged. A multi-zone source like +// DNSBL counts as many hits as it has listed zones, mirroring the way +// the report visualises severity. +func (d *BlacklistData) TotalHits() int { + n := 0 + for _, r := range d.Results { + if r.Listed { + n++ + } + } + return n +} + +// FilterListed returns the subset of results that are currently +// flagged. Order is preserved (registration → query order). +func (d *BlacklistData) FilterListed() []SourceResult { + out := make([]SourceResult, 0, len(d.Results)) + for _, r := range d.Results { + if r.Listed { + out = append(out, r) + } + } + return out +} diff --git a/checker/urlhaus.go b/checker/urlhaus.go new file mode 100644 index 0000000..4636ac8 --- /dev/null +++ b/checker/urlhaus.go @@ -0,0 +1,201 @@ +package checker + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "html/template" + "net/http" + "net/url" + "strings" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +const urlhausHostEndpoint = "https://urlhaus-api.abuse.ch/v1/host/" + +func init() { Register(&urlhausSource{endpoint: urlhausHostEndpoint}) } + +type urlhausSource struct { + endpoint string +} + +func (*urlhausSource) ID() string { return "urlhaus" } +func (*urlhausSource) Name() string { return "abuse.ch URLhaus" } + +func (*urlhausSource) Options() SourceOptions { + return SourceOptions{ + User: []sdk.CheckerOptionField{ + { + Id: "enable_urlhaus", + Type: "bool", + Label: "Use abuse.ch URLhaus", + Description: "Query the URLhaus host endpoint for active malware-distribution URLs hosted on the domain.", + Default: true, + }, + }, + Admin: []sdk.CheckerOptionField{ + { + Id: "urlhaus_auth_key", + Type: "string", + Label: "URLhaus Auth-Key", + Description: "abuse.ch URLhaus Auth-Key (free, requires an abuse.ch account). Required: the URLhaus API rejects anonymous requests with HTTP 401. Without this key the source is disabled.", + Secret: true, + }, + }, + } +} + +// urlhausDetails is the source-specific extras kept in +// SourceResult.Details so the rich detail renderer can show a per-URL +// table with online/offline state, threat type, tags and date added. +type urlhausDetails struct { + URLs []urlhausURL `json:"urls"` +} + +type urlhausURL struct { + URL string `json:"url"` + Status string `json:"status"` + Threat string `json:"threat"` + Tags []string `json:"tags,omitempty"` + DateAdded string `json:"date_added,omitempty"` + Reference string `json:"reference,omitempty"` +} + +func (s *urlhausSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + authKey := stringOpt(opts, "urlhaus_auth_key") + if !sdk.GetBoolOption(opts, "enable_urlhaus", true) || registered == "" || authKey == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: false}} + } + + res := SourceResult{SourceID: s.ID(), SourceName: s.Name(), Enabled: true} + + reqCtx, cancel := context.WithTimeout(ctx, 15*time.Second) + defer cancel() + + form := url.Values{"host": {registered}} + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, s.endpoint, strings.NewReader(form.Encode())) + if err != nil { + res.Error = err.Error() + return []SourceResult{res} + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0") + if authKey != "" { + req.Header.Set("Auth-Key", authKey) + } + + body, status, err := httpDo(req, 4<<20) + if err != nil { + res.Error = err.Error() + return []SourceResult{res} + } + if status != http.StatusOK { + res.Error = fmt.Sprintf("HTTP %d: %s", status, truncate(string(body), 200)) + return []SourceResult{res} + } + + var parsed struct { + QueryStatus string `json:"query_status"` + Reference string `json:"urlhaus_reference"` + URLs []struct { + URL string `json:"url"` + Status string `json:"url_status"` + Threat string `json:"threat"` + Tags []string `json:"tags"` + DateAdded string `json:"date_added"` + Reference string `json:"urlhaus_reference"` + } `json:"urls"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + res.Error = "decode: " + err.Error() + return []SourceResult{res} + } + + res.Reference = parsed.Reference + switch parsed.QueryStatus { + case "ok": + if len(parsed.URLs) == 0 { + return []SourceResult{res} + } + res.Listed = true + res.Severity = SeverityCrit + threats := map[string]bool{} + details := urlhausDetails{} + for _, u := range parsed.URLs { + if u.Threat != "" && !threats[u.Threat] { + threats[u.Threat] = true + res.Reasons = append(res.Reasons, u.Threat) + } + res.Evidence = append(res.Evidence, Evidence{ + Label: "URL", Value: u.URL, Status: u.Status, + }) + details.URLs = append(details.URLs, urlhausURL{ + URL: u.URL, Status: u.Status, Threat: u.Threat, + Tags: u.Tags, DateAdded: u.DateAdded, Reference: u.Reference, + }) + } + res.Details = mustJSON(details) + case "no_results": + // Clean. + case "invalid_host", "http_post_expected": + res.Error = "rejected query: " + parsed.QueryStatus + default: + res.Error = "query_status=" + parsed.QueryStatus + } + return []SourceResult{res} +} + +func (*urlhausSource) Diagnose(res SourceResult) Diagnosis { + online := 0 + for _, e := range res.Evidence { + if e.Status == "online" { + online++ + } + } + return Diagnosis{ + Severity: SeverityCrit, + Title: "Listed in abuse.ch URLhaus (active malware distribution)", + Detail: fmt.Sprintf( + "%d URL(s) tracked, %d still online; threat type(s): %s. URLhaus indexes URLs that actively serve malware payloads. Treat the host as compromised: take the offending pages offline, audit the web stack (CMS plugins, recently-uploaded files, cron jobs), then submit a takedown notification through the URLhaus reference page.", + len(res.Evidence), online, joinNonEmpty(res.Reasons, ", "), + ), + Fix: res.Reference, + FixIsURL: res.Reference != "", + } +} + +// RenderDetail renders the URLhaus URL table. Implementing +// DetailRenderer keeps the rich per-source view alongside the source +// implementation rather than scattered in the report code. +func (*urlhausSource) RenderDetail(res SourceResult) (template.HTML, error) { + var d urlhausDetails + if len(res.Details) > 0 { + if err := json.Unmarshal(res.Details, &d); err != nil { + return "", fmt.Errorf("urlhaus: decode details: %w", err) + } + } + if len(d.URLs) == 0 { + return "", nil + } + + var b bytes.Buffer + if err := urlhausDetailTpl.Execute(&b, d); err != nil { + return "", err + } + return template.HTML(b.String()), nil +} + +var urlhausDetailTpl = template.Must(template.New("urlhaus_detail").Parse(` + + +{{range .URLs}} + + + + + +{{end}} +
URLStatusThreatTagsAdded
{{.URL}}{{with .Reference}} {{end}}{{.Status}}{{.Threat}}{{range .Tags}}{{.}} {{end}}{{.DateAdded}}
`)) diff --git a/checker/urlhaus_test.go b/checker/urlhaus_test.go new file mode 100644 index 0000000..0a10921 --- /dev/null +++ b/checker/urlhaus_test.go @@ -0,0 +1,91 @@ +package checker + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func TestURLhausSource_NoResults(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"query_status":"no_results"}`)) + })) + defer srv.Close() + + s := &urlhausSource{endpoint: srv.URL} + results := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{"enable_urlhaus": true, "urlhaus_auth_key": "k"}) + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + r := results[0] + if !r.Enabled || r.Listed || r.Error != "" { + t.Fatalf("expected enabled+clean, got %+v", r) + } +} + +func TestURLhausSource_Listed(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = r.ParseForm() + if r.FormValue("host") == "" { + t.Errorf("missing host form value") + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "query_status":"ok", + "urlhaus_reference":"https://urlhaus.abuse.ch/host/example.com/", + "urls":[ + {"url":"http://example.com/payload.exe","url_status":"online","threat":"malware_download","tags":["exe","emotet"],"date_added":"2024-01-01","urlhaus_reference":"https://urlhaus.abuse.ch/url/1/"} + ] + }`)) + })) + defer srv.Close() + + s := &urlhausSource{endpoint: srv.URL} + r := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{"enable_urlhaus": true, "urlhaus_auth_key": "k"})[0] + if !r.Listed || len(r.Evidence) != 1 { + t.Fatalf("expected 1 listed evidence, got %+v", r) + } + if r.Evidence[0].Status != "online" { + t.Errorf("evidence status = %q", r.Evidence[0].Status) + } + + // Details should round-trip. + var d urlhausDetails + if err := json.Unmarshal(r.Details, &d); err != nil || len(d.URLs) != 1 || d.URLs[0].Threat != "malware_download" { + t.Errorf("details round-trip wrong: %+v", d) + } + + // Rich detail renderer should produce a non-empty table. + html, err := s.RenderDetail(r) + if err != nil || !strings.Contains(string(html), "payload.exe") { + t.Errorf("RenderDetail: html=%q err=%v", html, err) + } +} + +func TestURLhausSource_HTTPError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte("missing key")) + })) + defer srv.Close() + + s := &urlhausSource{endpoint: srv.URL} + r := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{"enable_urlhaus": true, "urlhaus_auth_key": "k"})[0] + if r.Error == "" || !strings.Contains(r.Error, "401") { + t.Errorf("expected 401 error, got %+v", r) + } +} + +func TestURLhausSource_Disabled(t *testing.T) { + s := &urlhausSource{endpoint: "http://nope"} + r := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{"enable_urlhaus": false})[0] + if r.Enabled { + t.Errorf("expected disabled, got %+v", r) + } +} diff --git a/checker/virustotal.go b/checker/virustotal.go new file mode 100644 index 0000000..5213df9 --- /dev/null +++ b/checker/virustotal.go @@ -0,0 +1,220 @@ +package checker + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "html/template" + "net/http" + "sort" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +const virusTotalEndpoint = "https://www.virustotal.com/api/v3/domains/" + +func init() { Register(&virusTotalSource{endpoint: virusTotalEndpoint}) } + +type virusTotalSource struct { + endpoint string +} + +func (*virusTotalSource) ID() string { return "virustotal" } +func (*virusTotalSource) Name() string { return "VirusTotal" } + +func (*virusTotalSource) Options() SourceOptions { + return SourceOptions{ + Admin: []sdk.CheckerOptionField{ + { + Id: "virustotal_api_key", + Type: "string", + Label: "VirusTotal API key", + Description: "VirusTotal v3 API key. Free tier is limited to 4 req/min and 500 req/day.", + Secret: true, + }, + }, + } +} + +// vtDetails persists the structured VT response so the rich detail +// renderer can show the per-vendor verdict table and the +// {malicious,suspicious,harmless,undetected} counts. +type vtDetails struct { + Malicious int `json:"malicious"` + Suspicious int `json:"suspicious"` + Harmless int `json:"harmless"` + Undetected int `json:"undetected"` + Total int `json:"total"` + Reputation int `json:"reputation"` + Vendors []vtVendorVerdict `json:"vendors"` +} + +type vtVendorVerdict struct { + Engine string `json:"engine"` + Category string `json:"category"` + Result string `json:"result"` +} + +func (s *virusTotalSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + apiKey := stringOpt(opts, "virustotal_api_key") + if apiKey == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: false}} + } + if registered == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: true}} + } + + res := SourceResult{ + SourceID: s.ID(), SourceName: s.Name(), Enabled: true, + Reference: "https://www.virustotal.com/gui/domain/" + registered, + } + + reqCtx, cancel := context.WithTimeout(ctx, 20*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, s.endpoint+registered, nil) + if err != nil { + res.Error = err.Error() + return []SourceResult{res} + } + req.Header.Set("x-apikey", apiKey) + req.Header.Set("Accept", "application/json") + + body, status, err := httpDo(req, 4<<20) + if err != nil { + res.Error = err.Error() + return []SourceResult{res} + } + if status == http.StatusNotFound { + // VT has never seen this domain → quiet "not listed". + return []SourceResult{res} + } + if status != http.StatusOK { + res.Error = fmt.Sprintf("HTTP %d: %s", status, truncate(string(body), 200)) + return []SourceResult{res} + } + + var parsed struct { + Data struct { + Attributes struct { + LastAnalysisStats struct { + Harmless int `json:"harmless"` + Malicious int `json:"malicious"` + Suspicious int `json:"suspicious"` + Undetected int `json:"undetected"` + Timeout int `json:"timeout"` + } `json:"last_analysis_stats"` + Reputation int `json:"reputation"` + LastAnalysisRes map[string]struct { + Category string `json:"category"` + Result string `json:"result"` + EngineName string `json:"engine_name"` + } `json:"last_analysis_results"` + } `json:"attributes"` + } `json:"data"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + res.Error = "decode: " + err.Error() + return []SourceResult{res} + } + + stats := parsed.Data.Attributes.LastAnalysisStats + d := vtDetails{ + Malicious: stats.Malicious, + Suspicious: stats.Suspicious, + Harmless: stats.Harmless, + Undetected: stats.Undetected, + Total: stats.Harmless + stats.Malicious + stats.Suspicious + stats.Undetected + stats.Timeout, + Reputation: parsed.Data.Attributes.Reputation, + } + for engine, v := range parsed.Data.Attributes.LastAnalysisRes { + if v.Category != "malicious" && v.Category != "suspicious" { + continue + } + name := v.EngineName + if name == "" { + name = engine + } + d.Vendors = append(d.Vendors, vtVendorVerdict{Engine: name, Category: v.Category, Result: v.Result}) + } + sort.Slice(d.Vendors, func(i, j int) bool { + if d.Vendors[i].Category != d.Vendors[j].Category { + return d.Vendors[i].Category == "malicious" + } + return d.Vendors[i].Engine < d.Vendors[j].Engine + }) + res.Details = mustJSON(d) + + if d.Malicious == 0 && d.Suspicious == 0 { + // Clean. + return []SourceResult{res} + } + res.Listed = true + if d.Malicious > 0 { + res.Severity = SeverityCrit + } else { + res.Severity = SeverityWarn + } + for _, v := range d.Vendors { + res.Reasons = append(res.Reasons, v.Engine) + res.Evidence = append(res.Evidence, Evidence{ + Label: "Engine", Value: v.Engine, Status: v.Category, + Extra: map[string]string{"result": v.Result}, + }) + } + return []SourceResult{res} +} + +func (*virusTotalSource) Diagnose(res SourceResult) Diagnosis { + var d vtDetails + _ = json.Unmarshal(res.Details, &d) + previewN := min(len(d.Vendors), 5) + preview := make([]string, 0, previewN) + for _, v := range d.Vendors[:previewN] { + preview = append(preview, v.Engine) + } + gravity := "Suspicious" + sev := SeverityWarn + if d.Malicious > 0 { + gravity = "Malicious" + sev = SeverityCrit + } + return Diagnosis{ + Severity: sev, + Title: fmt.Sprintf("VirusTotal: %d/%d engine(s) flagged the domain (%s)", d.Malicious+d.Suspicious, d.Total, gravity), + Detail: fmt.Sprintf( + "Reputation %d. Vendors flagging this domain include: %s. Open the VirusTotal page to see the per-engine verdicts and the related URLs/downloads. If you believe the verdicts are stale, request a re-scan from the VirusTotal page; for false positives, contact each engine vendor directly (VT does not arbitrate).", + d.Reputation, joinNonEmpty(preview, ", "), + ), + Fix: res.Reference, + FixIsURL: res.Reference != "", + } +} + +func (*virusTotalSource) RenderDetail(res SourceResult) (template.HTML, error) { + var d vtDetails + if len(res.Details) > 0 { + if err := json.Unmarshal(res.Details, &d); err != nil { + return "", fmt.Errorf("virustotal: decode details: %w", err) + } + } + if d.Total == 0 && len(d.Vendors) == 0 { + return "", nil + } + var b bytes.Buffer + if err := vtDetailTpl.Execute(&b, d); err != nil { + return "", err + } + return template.HTML(b.String()), nil +} + +var vtDetailTpl = template.Must(template.New("vt_detail").Parse(` +

Engines: {{.Malicious}} malicious, {{.Suspicious}} suspicious, {{.Harmless}} harmless, {{.Undetected}} undetected (total {{.Total}}). Reputation score: {{.Reputation}}.

+{{if .Vendors}} + +{{range .Vendors}} + +{{end}} +
EngineVerdictResult
{{.Engine}}{{.Category}}{{.Result}}
{{end}}`)) diff --git a/checker/virustotal_test.go b/checker/virustotal_test.go new file mode 100644 index 0000000..aba955c --- /dev/null +++ b/checker/virustotal_test.go @@ -0,0 +1,81 @@ +package checker + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +func newVTServer(t *testing.T, status int, body string) (string, func()) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("x-apikey") == "" { + t.Errorf("missing x-apikey header") + } + w.WriteHeader(status) + _, _ = w.Write([]byte(body)) + })) + return srv.URL + "/", srv.Close +} + +func TestVTSource_NoKey(t *testing.T) { + s := &virusTotalSource{endpoint: virusTotalEndpoint} + r := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{})[0] + if r.Enabled { + t.Errorf("expected disabled without API key, got %+v", r) + } +} + +func TestVTSource_Listed(t *testing.T) { + body := `{"data":{"attributes":{ + "reputation":-25, + "last_analysis_stats":{"harmless":50,"malicious":3,"suspicious":1,"undetected":40,"timeout":0}, + "last_analysis_results":{ + "E1":{"category":"malicious","result":"phishing","engine_name":"E1"}, + "E2":{"category":"suspicious","result":"susp","engine_name":"E2"}, + "E3":{"category":"harmless","result":"clean","engine_name":"E3"} + } + }}}` + endpoint, stop := newVTServer(t, http.StatusOK, body) + defer stop() + + s := &virusTotalSource{endpoint: endpoint} + r := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{"virustotal_api_key": "k"})[0] + if !r.Listed || r.Severity != SeverityCrit { + t.Errorf("expected listed+crit, got %+v", r) + } + var d vtDetails + if err := json.Unmarshal(r.Details, &d); err != nil { + t.Fatalf("details decode: %v", err) + } + if d.Malicious != 3 || d.Suspicious != 1 || d.Reputation != -25 { + t.Errorf("counts wrong: %+v", d) + } + if len(d.Vendors) != 2 || d.Vendors[0].Category != "malicious" { + t.Errorf("vendor ordering wrong: %+v", d.Vendors) + } + + html, err := s.RenderDetail(r) + if err != nil || !strings.Contains(string(html), "malicious") { + t.Errorf("RenderDetail html=%q err=%v", html, err) + } +} + +func TestVTSource_NotFound(t *testing.T) { + endpoint, stop := newVTServer(t, http.StatusNotFound, `{"error":{"code":"NotFoundError"}}`) + defer stop() + + s := &virusTotalSource{endpoint: endpoint} + r := s.Query(context.Background(), "example.com", "example.com", sdk.CheckerOptions{"virustotal_api_key": "k"})[0] + if r.Listed || r.Error != "" { + t.Errorf("404 should be quiet not-listed: %+v", r) + } + if !strings.Contains(r.Reference, "example.com") { + t.Errorf("reference URL missing: %+v", r) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..042aaf7 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module git.happydns.org/checker-blacklist + +go 1.25.0 + +require ( + git.happydns.org/checker-sdk-go v1.5.0 + golang.org/x/net v0.34.0 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..5631c56 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +git.happydns.org/checker-sdk-go v1.5.0 h1:5uD5Cm6xJ+lwnhbJ09iCXGHbYS9zRh+Yh0NeBHkAPBY= +git.happydns.org/checker-sdk-go v1.5.0/go.mod h1:aNAcfYFfbhvH9kJhE0Njp5GX0dQbxdRB0rJ0KvSC5nI= +golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= +golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= diff --git a/main.go b/main.go new file mode 100644 index 0000000..db25a34 --- /dev/null +++ b/main.go @@ -0,0 +1,27 @@ +package main + +import ( + "flag" + "log" + + bl "git.happydns.org/checker-blacklist/checker" + "git.happydns.org/checker-sdk-go/checker/server" +) + +// Version is the standalone binary's version. Override at link time: +// +// go build -ldflags "-X main.Version=1.2.3" . +var Version = "custom-build" + +var listenAddr = flag.String("listen", ":8080", "HTTP listen address") + +func main() { + flag.Parse() + + bl.Version = Version + + srv := server.New(bl.Provider()) + if err := srv.ListenAndServe(*listenAddr); err != nil { + log.Fatalf("server error: %v", err) + } +} diff --git a/plugin/plugin.go b/plugin/plugin.go new file mode 100644 index 0000000..05bf2f8 --- /dev/null +++ b/plugin/plugin.go @@ -0,0 +1,20 @@ +// Command plugin is the happyDomain plugin entrypoint for the blacklist +// checker. +// +// Build with `go build -buildmode=plugin -o checker-blacklist.so ./plugin`. +package main + +import ( + bl "git.happydns.org/checker-blacklist/checker" + sdk "git.happydns.org/checker-sdk-go/checker" +) + +var Version = "custom-build" + +// NewCheckerPlugin is the symbol resolved by happyDomain when loading +// the .so file. It returns the checker definition and the observation +// provider that the host registers globally. +func NewCheckerPlugin() (*sdk.CheckerDefinition, sdk.ObservationProvider, error) { + bl.Version = Version + return bl.Definition(), bl.Provider(), nil +}