From 6b08676ec5f641ddb198fc5d625e4a5c95f3cea6 Mon Sep 17 00:00:00 2001 From: Pierre-Olivier Mercier Date: Fri, 15 May 2026 18:05:56 +0800 Subject: [PATCH] Add PhishTank as a new blacklist source --- README.md | 3 +- checker/phishtank.go | 241 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 checker/phishtank.go diff --git a/README.md b/README.md index 7df0389..0b541df 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ widely-used reputation systems. | Extra DNSBL zones | DNS-based DBL | no | admin | | Google Safe Browsing | HTTPS lookup | yes (admin) | admin | | OpenPhish public feed | downloaded list | no | user (default on) | -| abuse.ch URLhaus | HTTPS lookup | optional Auth-Key (admin) | user (default on) | +| PhishTank | downloaded list | no | user (default on) | +| abuse.ch URLhaus | HTTPS lookup | free Auth-Key (admin) | user (default on) | | VirusTotal v3 | HTTPS lookup | yes (admin) | admin | ### Obtaining API keys diff --git a/checker/phishtank.go b/checker/phishtank.go new file mode 100644 index 0000000..003fb7b --- /dev/null +++ b/checker/phishtank.go @@ -0,0 +1,241 @@ +package checker + +import ( + "bufio" + "compress/gzip" + "context" + "encoding/csv" + "fmt" + "io" + "net/http" + "strconv" + "strings" + "sync" + "time" + + sdk "git.happydns.org/checker-sdk-go/checker" +) + +const ( + phishTankFeedURL = "https://data.phishtank.com/data/online-valid.csv.gz" + phishTankDefaultTTL = 12 * time.Hour +) + +var phishTankGlobalCache = newPhishTankCache() + +func init() { Register(&phishTankSource{}) } + +type phishTankSource struct{} + +func (*phishTankSource) ID() string { return "phishtank" } +func (*phishTankSource) Name() string { return "PhishTank" } + +func (*phishTankSource) Options() SourceOptions { + return SourceOptions{ + Admin: []sdk.CheckerOptionField{ + { + Id: "phishtank_refresh_hours", + Type: "string", + Label: "PhishTank feed refresh interval (hours)", + Description: "How often to re-download the PhishTank online-valid feed. Minimum: 1. Default: 12.", + Default: "12", + }, + }, + User: []sdk.CheckerOptionField{ + { + Id: "enable_phishtank", + Type: "bool", + Label: "Use the PhishTank feed", + Description: "Download the PhishTank verified phishing list and check the domain against it.", + Default: true, + }, + }, + } +} + +func (s *phishTankSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { + if !sdk.GetBoolOption(opts, "enable_phishtank", true) || registered == "" { + return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: false}} + } + + if ttlRaw, ok := sdk.GetOption[string](opts, "phishtank_refresh_hours"); ok && ttlRaw != "" { + if hours, err := strconv.Atoi(ttlRaw); err == nil && hours >= 1 { + phishTankGlobalCache.setTTL(time.Duration(hours) * time.Hour) + } + } + + urls, size, fetched, err := phishTankGlobalCache.lookup(ctx, registered) + res := SourceResult{ + SourceID: s.ID(), SourceName: s.Name(), Enabled: true, + Reference: "https://www.phishtank.com/", + Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}), + } + if err != nil { + res.Error = err.Error() + } + if len(urls) > 0 { + res.Reasons = []string{"Phishing"} + for _, u := range urls { + res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u}) + } + } + return []SourceResult{res} +} + +func (*phishTankSource) Evaluate(r SourceResult) (bool, string) { + if r.Enabled && r.Error == "" && len(r.Evidence) > 0 { + return true, SeverityCrit + } + return false, "" +} + +func (*phishTankSource) Diagnose(res SourceResult) Diagnosis { + urls := make([]string, 0, len(res.Evidence)) + for _, e := range res.Evidence { + urls = append(urls, e.Value) + } + previewN := min(len(urls), 5) + return Diagnosis{ + Severity: SeverityCrit, + Title: "Listed in the PhishTank phishing database", + Detail: fmt.Sprintf( + "%d URL(s) hosted on this domain are tracked as verified phishing by PhishTank. Examples: %s", + len(urls), joinNonEmpty(urls[:previewN], ", "), + ), + Fix: "https://www.phishtank.com/developer_info.php", + FixIsURL: true, + } +} + +// ---------- feed cache ---------- + +type phishTankCache struct { + mu sync.Mutex + urls []string + byHost map[string][]string + fetchedAt time.Time + lastAttemptAt time.Time + refreshing bool + ttl time.Duration + failBackoff time.Duration +} + +func newPhishTankCache() *phishTankCache { + return &phishTankCache{ + ttl: phishTankDefaultTTL, + failBackoff: 1 * time.Minute, + } +} + +func (c *phishTankCache) setTTL(d time.Duration) { + c.mu.Lock() + c.ttl = d + c.mu.Unlock() +} + +func (c *phishTankCache) lookup(ctx context.Context, domain string) (urls []string, size int, fetchedAt time.Time, err error) { + domain = strings.ToLower(strings.TrimSuffix(domain, ".")) + + c.mu.Lock() + stale := c.byHost == nil || time.Since(c.fetchedAt) > c.ttl + doRefresh := stale && !c.refreshing && time.Since(c.lastAttemptAt) > c.failBackoff + if doRefresh { + c.refreshing = true + } + c.mu.Unlock() + + if doRefresh { + newURLs, newByHost, ferr := c.fetch(ctx) + c.mu.Lock() + c.refreshing = false + c.lastAttemptAt = time.Now() + if ferr == nil { + c.urls = newURLs + c.byHost = newByHost + c.fetchedAt = c.lastAttemptAt + } else { + err = ferr + } + c.mu.Unlock() + } + + c.mu.Lock() + for host, hostURLs := range c.byHost { + if host == domain || strings.HasSuffix(host, "."+domain) { + urls = append(urls, hostURLs...) + } + } + size = len(c.urls) + fetchedAt = c.fetchedAt + c.mu.Unlock() + return urls, size, fetchedAt, err +} + +func (c *phishTankCache) fetch(ctx context.Context) ([]string, map[string][]string, error) { + reqCtx, cancel := context.WithTimeout(ctx, 120*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, phishTankFeedURL, nil) + if err != nil { + return nil, nil, err + } + req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0") + + resp, err := sharedHTTPClient.Do(req) + if err != nil { + return nil, nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, nil, fmt.Errorf("phishtank HTTP %d", resp.StatusCode) + } + + gz, err := gzip.NewReader(io.LimitReader(resp.Body, 128<<20)) + if err != nil { + return nil, nil, fmt.Errorf("phishtank gzip: %w", err) + } + defer gz.Close() + + r := csv.NewReader(bufio.NewReader(gz)) + r.ReuseRecord = true + + header, err := r.Read() + if err != nil { + return nil, nil, fmt.Errorf("phishtank csv header: %w", err) + } + urlIdx := -1 + for i, col := range header { + if col == "url" { + urlIdx = i + break + } + } + if urlIdx < 0 { + return nil, nil, fmt.Errorf("phishtank csv: no 'url' column in header") + } + + urls := make([]string, 0, 32768) + byHost := make(map[string][]string, 32768) + for { + record, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, nil, fmt.Errorf("phishtank csv: %w", err) + } + if urlIdx >= len(record) { + continue + } + u := strings.TrimSpace(record[urlIdx]) + if u == "" { + continue + } + urls = append(urls, u) + if h := hostOfURL(u); h != "" { + byHost[h] = append(byHost[h], u) + } + } + return urls, byHost, nil +}