package checker import ( "bufio" "compress/gzip" "context" "encoding/csv" "fmt" "io" "net/http" "strconv" "strings" "time" sdk "git.happydns.org/checker-sdk-go/checker" ) const ( phishTankFeedURL = "https://data.phishtank.com/data/online-valid.csv.gz" phishTankDefaultTTL = 12 * time.Hour ) var phishTankGlobalCache = newFeedCache(phishTankDefaultTTL, phishTankFetch) func init() { Register(&phishTankSource{}) } type phishTankSource struct{} func (*phishTankSource) ID() string { return "phishtank" } func (*phishTankSource) Name() string { return "PhishTank" } func (*phishTankSource) Options() SourceOptions { return SourceOptions{ Admin: []sdk.CheckerOptionField{ { Id: "phishtank_refresh_hours", Type: "string", Label: "PhishTank feed refresh interval (hours)", Description: "How often to re-download the PhishTank online-valid feed. Minimum: 1. Default: 12.", Default: "12", }, }, User: []sdk.CheckerOptionField{ { Id: "enable_phishtank", Type: "bool", Label: "Use the PhishTank feed", Description: "Download the PhishTank verified phishing list and check the domain against it.", Default: true, }, }, } } func (s *phishTankSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult { if !sdk.GetBoolOption(opts, "enable_phishtank", true) || registered == "" { return disabledResult(s.ID(), s.Name()) } if ttlRaw, ok := sdk.GetOption[string](opts, "phishtank_refresh_hours"); ok && ttlRaw != "" { if hours, err := strconv.Atoi(ttlRaw); err == nil && hours >= 1 { phishTankGlobalCache.setTTL(time.Duration(hours) * time.Hour) } } urls, size, fetched, err := phishTankGlobalCache.lookup(ctx, registered) res := SourceResult{ SourceID: s.ID(), SourceName: s.Name(), Enabled: true, Reference: "https://www.phishtank.com/", Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}), } if err != nil { res.Error = err.Error() } if len(urls) > 0 { res.Reasons = []string{"Phishing"} for _, u := range urls { res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u}) } } return []SourceResult{res} } func (*phishTankSource) Evaluate(r SourceResult) (bool, string) { return evidenceEval(r, SeverityCrit) } func (*phishTankSource) Diagnose(res SourceResult) Diagnosis { urls := make([]string, 0, len(res.Evidence)) for _, e := range res.Evidence { urls = append(urls, e.Value) } previewN := min(len(urls), 5) return Diagnosis{ Severity: SeverityCrit, Title: "Listed in the PhishTank phishing database", Detail: fmt.Sprintf( "%d URL(s) hosted on this domain are tracked as verified phishing by PhishTank. Examples: %s", len(urls), joinNonEmpty(urls[:previewN], ", "), ), Fix: "https://www.phishtank.com/developer_info.php", FixIsURL: true, } } // phishTankFetch downloads and parses the PhishTank gzip-compressed CSV feed. func phishTankFetch(ctx context.Context) ([]string, map[string][]string, error) { reqCtx, cancel := context.WithTimeout(ctx, 120*time.Second) defer cancel() req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, phishTankFeedURL, nil) if err != nil { return nil, nil, err } req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0") resp, err := sharedHTTPClient.Do(req) if err != nil { return nil, nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, nil, fmt.Errorf("phishtank HTTP %d", resp.StatusCode) } gz, err := gzip.NewReader(io.LimitReader(resp.Body, 128<<20)) if err != nil { return nil, nil, fmt.Errorf("phishtank gzip: %w", err) } defer gz.Close() r := csv.NewReader(bufio.NewReader(gz)) r.ReuseRecord = true header, err := r.Read() if err != nil { return nil, nil, fmt.Errorf("phishtank csv header: %w", err) } urlIdx := -1 for i, col := range header { if col == "url" { urlIdx = i break } } if urlIdx < 0 { return nil, nil, fmt.Errorf("phishtank csv: no 'url' column in header") } urls := make([]string, 0, 32768) byHost := make(map[string][]string, 32768) for { record, err := r.Read() if err == io.EOF { break } if err != nil { return nil, nil, fmt.Errorf("phishtank csv: %w", err) } if urlIdx >= len(record) { continue } u := strings.TrimSpace(record[urlIdx]) if u == "" { continue } urls = append(urls, u) if h := hostOfURL(u); h != "" { byHost[h] = append(byHost[h], u) } } return urls, byHost, nil }