checker-blacklist/checker/phishtank.go
Pierre-Olivier Mercier c3cda1f104
All checks were successful
continuous-integration/drone/tag Build is passing
continuous-integration/drone/push Build is passing
Replace per-source enable booleans with SourcePrecheck and bump SDK to v1.9.0
Sources that always work (botvrij, disconnect, oisd, openphish, phishtank, quad9) drop their user-facing enable_* option; the rule's enabled/disabled state is now solely controlled by the SDK rule toggle. Sources that require credentials (criminalip, malwarebazaar, otx, pulsedive, safebrowsing, threatfox, urlhaus, virustotal) instead implement the new SourcePrecheck interface so the host UI can surface "not configured" before attempting a query.
2026-05-20 14:26:42 +08:00

165 lines
4.1 KiB
Go

package checker
import (
"bufio"
"compress/gzip"
"context"
"encoding/csv"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"time"
sdk "git.happydns.org/checker-sdk-go/checker"
)
const (
phishTankFeedURL = "https://data.phishtank.com/data/online-valid.csv.gz"
phishTankDefaultTTL = 12 * time.Hour
)
var phishTankGlobalCache = newFeedCache(phishTankDefaultTTL, phishTankFetch)
func init() { Register(&phishTankSource{}) }
type phishTankSource struct{}
func (*phishTankSource) ID() string { return "phishtank" }
func (*phishTankSource) Name() string { return "PhishTank" }
func (*phishTankSource) Options() SourceOptions {
return SourceOptions{
Admin: []sdk.CheckerOptionField{
{
Id: "phishtank_refresh_hours",
Type: "string",
Label: "PhishTank feed refresh interval (hours)",
Description: "How often to re-download the PhishTank online-valid feed. Minimum: 1. Default: 12.",
Default: "12",
},
},
}
}
func (s *phishTankSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult {
if registered == "" {
return disabledResult(s.ID(), s.Name())
}
if ttlRaw, ok := sdk.GetOption[string](opts, "phishtank_refresh_hours"); ok && ttlRaw != "" {
if hours, err := strconv.Atoi(ttlRaw); err == nil && hours >= 1 {
phishTankGlobalCache.setTTL(time.Duration(hours) * time.Hour)
}
}
urls, size, fetched, err := phishTankGlobalCache.lookup(ctx, registered)
res := SourceResult{
SourceID: s.ID(), SourceName: s.Name(), Enabled: true,
Reference: "https://www.phishtank.com/",
Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}),
}
if err != nil {
res.Error = err.Error()
}
if len(urls) > 0 {
res.Reasons = []string{"Phishing"}
for _, u := range urls {
res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u})
}
}
return []SourceResult{res}
}
func (*phishTankSource) Evaluate(r SourceResult) (bool, string) {
return evidenceEval(r, SeverityCrit)
}
func (*phishTankSource) Diagnose(res SourceResult) Diagnosis {
urls := make([]string, 0, len(res.Evidence))
for _, e := range res.Evidence {
urls = append(urls, e.Value)
}
previewN := min(len(urls), 5)
return Diagnosis{
Severity: SeverityCrit,
Title: "Listed in the PhishTank phishing database",
Detail: fmt.Sprintf(
"%d URL(s) hosted on this domain are tracked as verified phishing by PhishTank. Examples: %s",
len(urls), joinNonEmpty(urls[:previewN], ", "),
),
Fix: "https://www.phishtank.com/developer_info.php",
FixIsURL: true,
}
}
// phishTankFetch downloads and parses the PhishTank gzip-compressed CSV feed.
func phishTankFetch(ctx context.Context) ([]string, map[string][]string, error) {
reqCtx, cancel := context.WithTimeout(ctx, 120*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, phishTankFeedURL, nil)
if err != nil {
return nil, nil, err
}
req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0")
resp, err := sharedHTTPClient.Do(req)
if err != nil {
return nil, nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, nil, fmt.Errorf("phishtank HTTP %d", resp.StatusCode)
}
gz, err := gzip.NewReader(io.LimitReader(resp.Body, 128<<20))
if err != nil {
return nil, nil, fmt.Errorf("phishtank gzip: %w", err)
}
defer gz.Close()
r := csv.NewReader(bufio.NewReader(gz))
r.ReuseRecord = true
header, err := r.Read()
if err != nil {
return nil, nil, fmt.Errorf("phishtank csv header: %w", err)
}
urlIdx := -1
for i, col := range header {
if col == "url" {
urlIdx = i
break
}
}
if urlIdx < 0 {
return nil, nil, fmt.Errorf("phishtank csv: no 'url' column in header")
}
urls := make([]string, 0, 32768)
byHost := make(map[string][]string, 32768)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, nil, fmt.Errorf("phishtank csv: %w", err)
}
if urlIdx >= len(record) {
continue
}
u := strings.TrimSpace(record[urlIdx])
if u == "" {
continue
}
urls = append(urls, u)
if h := hostOfURL(u); h != "" {
byHost[h] = append(byHost[h], u)
}
}
return urls, byHost, nil
}