Merge duplicate phishCache/phishTankCache into shared feedCache
This commit is contained in:
parent
229e7a8f02
commit
061b5361ca
3 changed files with 127 additions and 172 deletions
89
checker/feedcache.go
Normal file
89
checker/feedcache.go
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
package checker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// feedCache is a generic URL-feed cache shared between phishing-feed
|
||||
// sources (OpenPhish, PhishTank). It holds a hostname-indexed snapshot
|
||||
// of the feed, refreshes on TTL expiry, and ensures only one refresh is
|
||||
// in flight at a time so concurrent lookups still serve stale data
|
||||
// during a refresh.
|
||||
type feedCache struct {
|
||||
mu sync.Mutex
|
||||
urls []string
|
||||
byHost map[string][]string
|
||||
fetchedAt time.Time
|
||||
lastAttemptAt time.Time
|
||||
refreshing bool
|
||||
ttl time.Duration
|
||||
failBackoff time.Duration
|
||||
fetchFn func(ctx context.Context) (urls []string, byHost map[string][]string, err error)
|
||||
}
|
||||
|
||||
func newFeedCache(ttl time.Duration, fetch func(context.Context) ([]string, map[string][]string, error)) *feedCache {
|
||||
if ttl <= 0 {
|
||||
ttl = time.Hour
|
||||
}
|
||||
return &feedCache{
|
||||
ttl: ttl,
|
||||
failBackoff: time.Minute,
|
||||
fetchFn: fetch,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *feedCache) setTTL(d time.Duration) {
|
||||
c.mu.Lock()
|
||||
c.ttl = d
|
||||
c.mu.Unlock()
|
||||
}
|
||||
|
||||
func (c *feedCache) lookup(ctx context.Context, domain string) (urls []string, size int, fetchedAt time.Time, err error) {
|
||||
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
|
||||
|
||||
c.mu.Lock()
|
||||
stale := c.byHost == nil || time.Since(c.fetchedAt) > c.ttl
|
||||
doRefresh := stale && !c.refreshing && time.Since(c.lastAttemptAt) > c.failBackoff
|
||||
if doRefresh {
|
||||
c.refreshing = true
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
if doRefresh {
|
||||
newURLs, newByHost, ferr := c.fetchFn(ctx)
|
||||
c.mu.Lock()
|
||||
c.refreshing = false
|
||||
c.lastAttemptAt = time.Now()
|
||||
if ferr == nil {
|
||||
c.urls = newURLs
|
||||
c.byHost = newByHost
|
||||
c.fetchedAt = c.lastAttemptAt
|
||||
} else {
|
||||
err = ferr
|
||||
}
|
||||
c.mu.Unlock()
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
for host, hostURLs := range c.byHost {
|
||||
if host == domain || strings.HasSuffix(host, "."+domain) {
|
||||
urls = append(urls, hostURLs...)
|
||||
}
|
||||
}
|
||||
size = len(c.urls)
|
||||
fetchedAt = c.fetchedAt
|
||||
c.mu.Unlock()
|
||||
return urls, size, fetchedAt, err
|
||||
}
|
||||
|
||||
func hostOfURL(s string) string {
|
||||
u, err := url.Parse(s)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.ToLower(u.Hostname())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue