Sources that always work (botvrij, disconnect, oisd, openphish, phishtank, quad9) drop their user-facing enable_* option; the rule's enabled/disabled state is now solely controlled by the SDK rule toggle. Sources that require credentials (criminalip, malwarebazaar, otx, pulsedive, safebrowsing, threatfox, urlhaus, virustotal) instead implement the new SourcePrecheck interface so the host UI can surface "not configured" before attempting a query.
165 lines
4.1 KiB
Go
165 lines
4.1 KiB
Go
package checker
|
|
|
|
import (
|
|
"bufio"
|
|
"compress/gzip"
|
|
"context"
|
|
"encoding/csv"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
sdk "git.happydns.org/checker-sdk-go/checker"
|
|
)
|
|
|
|
const (
|
|
phishTankFeedURL = "https://data.phishtank.com/data/online-valid.csv.gz"
|
|
phishTankDefaultTTL = 12 * time.Hour
|
|
)
|
|
|
|
var phishTankGlobalCache = newFeedCache(phishTankDefaultTTL, phishTankFetch)
|
|
|
|
func init() { Register(&phishTankSource{}) }
|
|
|
|
type phishTankSource struct{}
|
|
|
|
func (*phishTankSource) ID() string { return "phishtank" }
|
|
func (*phishTankSource) Name() string { return "PhishTank" }
|
|
|
|
func (*phishTankSource) Options() SourceOptions {
|
|
return SourceOptions{
|
|
Admin: []sdk.CheckerOptionField{
|
|
{
|
|
Id: "phishtank_refresh_hours",
|
|
Type: "string",
|
|
Label: "PhishTank feed refresh interval (hours)",
|
|
Description: "How often to re-download the PhishTank online-valid feed. Minimum: 1. Default: 12.",
|
|
Default: "12",
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (s *phishTankSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult {
|
|
if registered == "" {
|
|
return disabledResult(s.ID(), s.Name())
|
|
}
|
|
|
|
if ttlRaw, ok := sdk.GetOption[string](opts, "phishtank_refresh_hours"); ok && ttlRaw != "" {
|
|
if hours, err := strconv.Atoi(ttlRaw); err == nil && hours >= 1 {
|
|
phishTankGlobalCache.setTTL(time.Duration(hours) * time.Hour)
|
|
}
|
|
}
|
|
|
|
urls, size, fetched, err := phishTankGlobalCache.lookup(ctx, registered)
|
|
res := SourceResult{
|
|
SourceID: s.ID(), SourceName: s.Name(), Enabled: true,
|
|
Reference: "https://www.phishtank.com/",
|
|
Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}),
|
|
}
|
|
if err != nil {
|
|
res.Error = err.Error()
|
|
}
|
|
if len(urls) > 0 {
|
|
res.Reasons = []string{"Phishing"}
|
|
for _, u := range urls {
|
|
res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u})
|
|
}
|
|
}
|
|
return []SourceResult{res}
|
|
}
|
|
|
|
func (*phishTankSource) Evaluate(r SourceResult) (bool, string) {
|
|
return evidenceEval(r, SeverityCrit)
|
|
}
|
|
|
|
func (*phishTankSource) Diagnose(res SourceResult) Diagnosis {
|
|
urls := make([]string, 0, len(res.Evidence))
|
|
for _, e := range res.Evidence {
|
|
urls = append(urls, e.Value)
|
|
}
|
|
previewN := min(len(urls), 5)
|
|
return Diagnosis{
|
|
Severity: SeverityCrit,
|
|
Title: "Listed in the PhishTank phishing database",
|
|
Detail: fmt.Sprintf(
|
|
"%d URL(s) hosted on this domain are tracked as verified phishing by PhishTank. Examples: %s",
|
|
len(urls), joinNonEmpty(urls[:previewN], ", "),
|
|
),
|
|
Fix: "https://www.phishtank.com/developer_info.php",
|
|
FixIsURL: true,
|
|
}
|
|
}
|
|
|
|
// phishTankFetch downloads and parses the PhishTank gzip-compressed CSV feed.
|
|
func phishTankFetch(ctx context.Context) ([]string, map[string][]string, error) {
|
|
reqCtx, cancel := context.WithTimeout(ctx, 120*time.Second)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, phishTankFeedURL, nil)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0")
|
|
|
|
resp, err := sharedHTTPClient.Do(req)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, nil, fmt.Errorf("phishtank HTTP %d", resp.StatusCode)
|
|
}
|
|
|
|
gz, err := gzip.NewReader(io.LimitReader(resp.Body, 128<<20))
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("phishtank gzip: %w", err)
|
|
}
|
|
defer gz.Close()
|
|
|
|
r := csv.NewReader(bufio.NewReader(gz))
|
|
r.ReuseRecord = true
|
|
|
|
header, err := r.Read()
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("phishtank csv header: %w", err)
|
|
}
|
|
urlIdx := -1
|
|
for i, col := range header {
|
|
if col == "url" {
|
|
urlIdx = i
|
|
break
|
|
}
|
|
}
|
|
if urlIdx < 0 {
|
|
return nil, nil, fmt.Errorf("phishtank csv: no 'url' column in header")
|
|
}
|
|
|
|
urls := make([]string, 0, 32768)
|
|
byHost := make(map[string][]string, 32768)
|
|
for {
|
|
record, err := r.Read()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("phishtank csv: %w", err)
|
|
}
|
|
if urlIdx >= len(record) {
|
|
continue
|
|
}
|
|
u := strings.TrimSpace(record[urlIdx])
|
|
if u == "" {
|
|
continue
|
|
}
|
|
urls = append(urls, u)
|
|
if h := hostOfURL(u); h != "" {
|
|
byHost[h] = append(byHost[h], u)
|
|
}
|
|
}
|
|
return urls, byHost, nil
|
|
}
|