Add PhishTank as a new blacklist source
This commit is contained in:
parent
829863e5a0
commit
6b08676ec5
2 changed files with 243 additions and 1 deletions
|
|
@ -13,7 +13,8 @@ widely-used reputation systems.
|
||||||
| Extra DNSBL zones | DNS-based DBL | no | admin |
|
| Extra DNSBL zones | DNS-based DBL | no | admin |
|
||||||
| Google Safe Browsing | HTTPS lookup | yes (admin) | admin |
|
| Google Safe Browsing | HTTPS lookup | yes (admin) | admin |
|
||||||
| OpenPhish public feed | downloaded list | no | user (default on) |
|
| OpenPhish public feed | downloaded list | no | user (default on) |
|
||||||
| abuse.ch URLhaus | HTTPS lookup | optional Auth-Key (admin) | user (default on) |
|
| PhishTank | downloaded list | no | user (default on) |
|
||||||
|
| abuse.ch URLhaus | HTTPS lookup | free Auth-Key (admin) | user (default on) |
|
||||||
| VirusTotal v3 | HTTPS lookup | yes (admin) | admin |
|
| VirusTotal v3 | HTTPS lookup | yes (admin) | admin |
|
||||||
|
|
||||||
### Obtaining API keys
|
### Obtaining API keys
|
||||||
|
|
|
||||||
241
checker/phishtank.go
Normal file
241
checker/phishtank.go
Normal file
|
|
@ -0,0 +1,241 @@
|
||||||
|
package checker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"compress/gzip"
|
||||||
|
"context"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
sdk "git.happydns.org/checker-sdk-go/checker"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
phishTankFeedURL = "https://data.phishtank.com/data/online-valid.csv.gz"
|
||||||
|
phishTankDefaultTTL = 12 * time.Hour
|
||||||
|
)
|
||||||
|
|
||||||
|
var phishTankGlobalCache = newPhishTankCache()
|
||||||
|
|
||||||
|
func init() { Register(&phishTankSource{}) }
|
||||||
|
|
||||||
|
type phishTankSource struct{}
|
||||||
|
|
||||||
|
func (*phishTankSource) ID() string { return "phishtank" }
|
||||||
|
func (*phishTankSource) Name() string { return "PhishTank" }
|
||||||
|
|
||||||
|
func (*phishTankSource) Options() SourceOptions {
|
||||||
|
return SourceOptions{
|
||||||
|
Admin: []sdk.CheckerOptionField{
|
||||||
|
{
|
||||||
|
Id: "phishtank_refresh_hours",
|
||||||
|
Type: "string",
|
||||||
|
Label: "PhishTank feed refresh interval (hours)",
|
||||||
|
Description: "How often to re-download the PhishTank online-valid feed. Minimum: 1. Default: 12.",
|
||||||
|
Default: "12",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
User: []sdk.CheckerOptionField{
|
||||||
|
{
|
||||||
|
Id: "enable_phishtank",
|
||||||
|
Type: "bool",
|
||||||
|
Label: "Use the PhishTank feed",
|
||||||
|
Description: "Download the PhishTank verified phishing list and check the domain against it.",
|
||||||
|
Default: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *phishTankSource) Query(ctx context.Context, domain, registered string, opts sdk.CheckerOptions) []SourceResult {
|
||||||
|
if !sdk.GetBoolOption(opts, "enable_phishtank", true) || registered == "" {
|
||||||
|
return []SourceResult{{SourceID: s.ID(), SourceName: s.Name(), Enabled: false}}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ttlRaw, ok := sdk.GetOption[string](opts, "phishtank_refresh_hours"); ok && ttlRaw != "" {
|
||||||
|
if hours, err := strconv.Atoi(ttlRaw); err == nil && hours >= 1 {
|
||||||
|
phishTankGlobalCache.setTTL(time.Duration(hours) * time.Hour)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
urls, size, fetched, err := phishTankGlobalCache.lookup(ctx, registered)
|
||||||
|
res := SourceResult{
|
||||||
|
SourceID: s.ID(), SourceName: s.Name(), Enabled: true,
|
||||||
|
Reference: "https://www.phishtank.com/",
|
||||||
|
Details: mustJSON(map[string]any{"feed_size": size, "fetched_at": fetched}),
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
res.Error = err.Error()
|
||||||
|
}
|
||||||
|
if len(urls) > 0 {
|
||||||
|
res.Reasons = []string{"Phishing"}
|
||||||
|
for _, u := range urls {
|
||||||
|
res.Evidence = append(res.Evidence, Evidence{Label: "URL", Value: u})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return []SourceResult{res}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*phishTankSource) Evaluate(r SourceResult) (bool, string) {
|
||||||
|
if r.Enabled && r.Error == "" && len(r.Evidence) > 0 {
|
||||||
|
return true, SeverityCrit
|
||||||
|
}
|
||||||
|
return false, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*phishTankSource) Diagnose(res SourceResult) Diagnosis {
|
||||||
|
urls := make([]string, 0, len(res.Evidence))
|
||||||
|
for _, e := range res.Evidence {
|
||||||
|
urls = append(urls, e.Value)
|
||||||
|
}
|
||||||
|
previewN := min(len(urls), 5)
|
||||||
|
return Diagnosis{
|
||||||
|
Severity: SeverityCrit,
|
||||||
|
Title: "Listed in the PhishTank phishing database",
|
||||||
|
Detail: fmt.Sprintf(
|
||||||
|
"%d URL(s) hosted on this domain are tracked as verified phishing by PhishTank. Examples: %s",
|
||||||
|
len(urls), joinNonEmpty(urls[:previewN], ", "),
|
||||||
|
),
|
||||||
|
Fix: "https://www.phishtank.com/developer_info.php",
|
||||||
|
FixIsURL: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- feed cache ----------
|
||||||
|
|
||||||
|
type phishTankCache struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
urls []string
|
||||||
|
byHost map[string][]string
|
||||||
|
fetchedAt time.Time
|
||||||
|
lastAttemptAt time.Time
|
||||||
|
refreshing bool
|
||||||
|
ttl time.Duration
|
||||||
|
failBackoff time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func newPhishTankCache() *phishTankCache {
|
||||||
|
return &phishTankCache{
|
||||||
|
ttl: phishTankDefaultTTL,
|
||||||
|
failBackoff: 1 * time.Minute,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *phishTankCache) setTTL(d time.Duration) {
|
||||||
|
c.mu.Lock()
|
||||||
|
c.ttl = d
|
||||||
|
c.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *phishTankCache) lookup(ctx context.Context, domain string) (urls []string, size int, fetchedAt time.Time, err error) {
|
||||||
|
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
|
||||||
|
|
||||||
|
c.mu.Lock()
|
||||||
|
stale := c.byHost == nil || time.Since(c.fetchedAt) > c.ttl
|
||||||
|
doRefresh := stale && !c.refreshing && time.Since(c.lastAttemptAt) > c.failBackoff
|
||||||
|
if doRefresh {
|
||||||
|
c.refreshing = true
|
||||||
|
}
|
||||||
|
c.mu.Unlock()
|
||||||
|
|
||||||
|
if doRefresh {
|
||||||
|
newURLs, newByHost, ferr := c.fetch(ctx)
|
||||||
|
c.mu.Lock()
|
||||||
|
c.refreshing = false
|
||||||
|
c.lastAttemptAt = time.Now()
|
||||||
|
if ferr == nil {
|
||||||
|
c.urls = newURLs
|
||||||
|
c.byHost = newByHost
|
||||||
|
c.fetchedAt = c.lastAttemptAt
|
||||||
|
} else {
|
||||||
|
err = ferr
|
||||||
|
}
|
||||||
|
c.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
c.mu.Lock()
|
||||||
|
for host, hostURLs := range c.byHost {
|
||||||
|
if host == domain || strings.HasSuffix(host, "."+domain) {
|
||||||
|
urls = append(urls, hostURLs...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size = len(c.urls)
|
||||||
|
fetchedAt = c.fetchedAt
|
||||||
|
c.mu.Unlock()
|
||||||
|
return urls, size, fetchedAt, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *phishTankCache) fetch(ctx context.Context) ([]string, map[string][]string, error) {
|
||||||
|
reqCtx, cancel := context.WithTimeout(ctx, 120*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, phishTankFeedURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "happydomain-checker-blacklist/1.0")
|
||||||
|
|
||||||
|
resp, err := sharedHTTPClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil, fmt.Errorf("phishtank HTTP %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
gz, err := gzip.NewReader(io.LimitReader(resp.Body, 128<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("phishtank gzip: %w", err)
|
||||||
|
}
|
||||||
|
defer gz.Close()
|
||||||
|
|
||||||
|
r := csv.NewReader(bufio.NewReader(gz))
|
||||||
|
r.ReuseRecord = true
|
||||||
|
|
||||||
|
header, err := r.Read()
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("phishtank csv header: %w", err)
|
||||||
|
}
|
||||||
|
urlIdx := -1
|
||||||
|
for i, col := range header {
|
||||||
|
if col == "url" {
|
||||||
|
urlIdx = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if urlIdx < 0 {
|
||||||
|
return nil, nil, fmt.Errorf("phishtank csv: no 'url' column in header")
|
||||||
|
}
|
||||||
|
|
||||||
|
urls := make([]string, 0, 32768)
|
||||||
|
byHost := make(map[string][]string, 32768)
|
||||||
|
for {
|
||||||
|
record, err := r.Read()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("phishtank csv: %w", err)
|
||||||
|
}
|
||||||
|
if urlIdx >= len(record) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
u := strings.TrimSpace(record[urlIdx])
|
||||||
|
if u == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
urls = append(urls, u)
|
||||||
|
if h := hostOfURL(u); h != "" {
|
||||||
|
byHost[h] = append(byHost[h], u)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return urls, byHost, nil
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue