checker-blacklist/checker/phishtank.go
Pierre-Olivier Mercier 6b1d2e2540 Extract disabledResult and evidenceEval helpers to reduce boilerplate
Add two shared helpers to source.go and apply them across all sources:
- disabledResult(id, name) replaces the repeated inline SourceResult literal
- evidenceEval(r, severity) replaces the identical Evaluate body in 6 sources
2026-05-15 21:36:24 +08:00

174 lines
4.5 KiB
Go

package checker
import (
"bufio"
"compress/gzip"
"context"
"encoding/csv"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
const (
phishTankFeedURL = "https://data.phishtank.com/data/online-valid.csv.gz"
phishTankDefaultTTL = 12 * time.Hour
)
var phishTankGlobalCache = newFeedCache(phishTankDefaultTTL, phishTankFetch)
func init() { Register(&phishTankSource{}) }
type phishTankSource struct{}
func (*phishTankSource) ID() string { return "phishtank" }
func (*phishTankSource) Name() string { return "PhishTank" }
func (*phishTankSource) Options() SourceOptions {
return SourceOptions{
Admin: []sdk.CheckerOptionField{
{
Id: "phishtank_refresh_hours",
Type: "string",
Label: "PhishTank feed refresh interval (hours)",
Description: "How often to re-download the PhishTank online-valid feed. Minimum: 1. Default: 12.",
Default: "12",
},
},
User: []sdk.CheckerOptionField{
{
Id: "enable_phishtank",
Type: "bool",
Label: "Use the PhishTank feed",
Description: "Download the PhishTank verified phishing list and check the domain against it.",
Default: true,
},
},
}
}
func (s *phishTankSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult {
if !sdk.GetBoolOption(opts, "enable_phishtank", true) || registered == "" {
return disabledResult(s.ID(), s.Name())
}
if ttlRaw, ok := sdk.GetOption[string](opts, "phishtank_refresh_hours"); ok && ttlRaw != "" {
if hours, err := strconv.Atoi(ttlRaw); err == nil && hours >= 1 {
phishTankGlobalCache.setTTL(time.Duration(hours) * time.Hour)
}
}
urls, size, fetched, err := phishTankGlobalCache.lookup(ctx, registered)
res := SourceResult{
SourceID: s.ID(), SourceName: s.Name(), Enabled: true,
Reference: "https://www.phishtank.com/",
Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}),
}
if err != nil {
res.Error = err.Error()
}
if len(urls) > 0 {
res.Reasons = []string{"Phishing"}
for _, u := range urls {
res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u})
}
}
return []SourceResult{res}
}
func (*phishTankSource) Evaluate(r SourceResult) (bool, string) {
return evidenceEval(r, SeverityCrit)
}
func (*phishTankSource) Diagnose(res SourceResult) Diagnosis {
urls := make([]string, 0, len(res.Evidence))
for _, e := range res.Evidence {
urls = append(urls, e.Value)
}
previewN := min(len(urls), 5)
return Diagnosis{
Severity: SeverityCrit,
Title: "Listed in the PhishTank phishing database",
Detail: fmt.Sprintf(
"%d URL(s) hosted on this domain are tracked as verified phishing by PhishTank. Examples: %s",
len(urls), joinNonEmpty(urls[:previewN], ", "),
),
Fix: "https://www.phishtank.com/developer_info.php",
FixIsURL: true,
}
}
// phishTankFetch downloads and parses the PhishTank gzip-compressed CSV feed.
func phishTankFetch(ctx context.Context) ([]string, map[string][]string, error) {
reqCtx, cancel := context.WithTimeout(ctx, 120*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, phishTankFeedURL, nil)
if err != nil {
return nil, nil, err
}
req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0")
resp, err := sharedHTTPClient.Do(req)
if err != nil {
return nil, nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, nil, fmt.Errorf("phishtank HTTP %d", resp.StatusCode)
}
gz, err := gzip.NewReader(io.LimitReader(resp.Body, 128<<20))
if err != nil {
return nil, nil, fmt.Errorf("phishtank gzip: %w", err)
}
defer gz.Close()
r := csv.NewReader(bufio.NewReader(gz))
r.ReuseRecord = true
header, err := r.Read()
if err != nil {
return nil, nil, fmt.Errorf("phishtank csv header: %w", err)
}
urlIdx := -1
for i, col := range header {
if col == "url" {
urlIdx = i
break
}
}
if urlIdx < 0 {
return nil, nil, fmt.Errorf("phishtank csv: no 'url' column in header")
}
urls := make([]string, 0, 32768)
byHost := make(map[string][]string, 32768)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, nil, fmt.Errorf("phishtank csv: %w", err)
}
if urlIdx >= len(record) {
continue
}
u := strings.TrimSpace(record[urlIdx])
if u == "" {
continue
}
urls = append(urls, u)
if h := hostOfURL(u); h != "" {
byHost[h] = append(byHost[h], u)
}
}
return urls, byHost, nil
}