89 lines
2.1 KiB
Go
89 lines
2.1 KiB
Go
package checker
|
|
|
|
import (
|
|
"context"
|
|
"net/url"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// feedCache is a generic URL-feed cache shared between phishing-feed
|
|
// sources (OpenPhish, PhishTank). It holds a hostname-indexed snapshot
|
|
// of the feed, refreshes on TTL expiry, and ensures only one refresh is
|
|
// in flight at a time so concurrent lookups still serve stale data
|
|
// during a refresh.
|
|
type feedCache struct {
|
|
mu sync.Mutex
|
|
urls []string
|
|
byHost map[string][]string
|
|
fetchedAt time.Time
|
|
lastAttemptAt time.Time
|
|
refreshing bool
|
|
ttl time.Duration
|
|
failBackoff time.Duration
|
|
fetchFn func(ctx context.Context) (urls []string, byHost map[string][]string, err error)
|
|
}
|
|
|
|
func newFeedCache(ttl time.Duration, fetch func(context.Context) ([]string, map[string][]string, error)) *feedCache {
|
|
if ttl <= 0 {
|
|
ttl = time.Hour
|
|
}
|
|
return &feedCache{
|
|
ttl: ttl,
|
|
failBackoff: time.Minute,
|
|
fetchFn: fetch,
|
|
}
|
|
}
|
|
|
|
func (c *feedCache) setTTL(d time.Duration) {
|
|
c.mu.Lock()
|
|
c.ttl = d
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
func (c *feedCache) lookup(ctx context.Context, domain string) (urls []string, size int, fetchedAt time.Time, err error) {
|
|
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
|
|
|
|
c.mu.Lock()
|
|
stale := c.byHost == nil || time.Since(c.fetchedAt) > c.ttl
|
|
doRefresh := stale && !c.refreshing && time.Since(c.lastAttemptAt) > c.failBackoff
|
|
if doRefresh {
|
|
c.refreshing = true
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
if doRefresh {
|
|
newURLs, newByHost, ferr := c.fetchFn(ctx)
|
|
c.mu.Lock()
|
|
c.refreshing = false
|
|
c.lastAttemptAt = time.Now()
|
|
if ferr == nil {
|
|
c.urls = newURLs
|
|
c.byHost = newByHost
|
|
c.fetchedAt = c.lastAttemptAt
|
|
} else {
|
|
err = ferr
|
|
}
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
c.mu.Lock()
|
|
for host, hostURLs := range c.byHost {
|
|
if host == domain || strings.HasSuffix(host, "."+domain) {
|
|
urls = append(urls, hostURLs...)
|
|
}
|
|
}
|
|
size = len(c.urls)
|
|
fetchedAt = c.fetchedAt
|
|
c.mu.Unlock()
|
|
return urls, size, fetchedAt, err
|
|
}
|
|
|
|
func hostOfURL(s string) string {
|
|
u, err := url.Parse(s)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.ToLower(u.Hostname())
|
|
}
|