package checker import ( "context" "net/url" "strings" "sync" "time" ) // feedCache is a generic URL-feed cache shared between phishing-feed // sources (OpenPhish, PhishTank). It holds a hostname-indexed snapshot // of the feed, refreshes on TTL expiry, and ensures only one refresh is // in flight at a time so concurrent lookups still serve stale data // during a refresh. type feedCache struct { mu sync.Mutex urls []string byHost map[string][]string fetchedAt time.Time lastAttemptAt time.Time refreshing bool ttl time.Duration failBackoff time.Duration fetchFn func(ctx context.Context) (urls []string, byHost map[string][]string, err error) } func newFeedCache(ttl time.Duration, fetch func(context.Context) ([]string, map[string][]string, error)) *feedCache { if ttl <= 0 { ttl = time.Hour } return &feedCache{ ttl: ttl, failBackoff: time.Minute, fetchFn: fetch, } } func (c *feedCache) setTTL(d time.Duration) { c.mu.Lock() c.ttl = d c.mu.Unlock() } func (c *feedCache) lookup(ctx context.Context, domain string) (urls []string, size int, fetchedAt time.Time, err error) { domain = strings.ToLower(strings.TrimSuffix(domain, ".")) c.mu.Lock() stale := c.byHost == nil || time.Since(c.fetchedAt) > c.ttl doRefresh := stale && !c.refreshing && time.Since(c.lastAttemptAt) > c.failBackoff if doRefresh { c.refreshing = true } c.mu.Unlock() if doRefresh { newURLs, newByHost, ferr := c.fetchFn(ctx) c.mu.Lock() c.refreshing = false c.lastAttemptAt = time.Now() if ferr == nil { c.urls = newURLs c.byHost = newByHost c.fetchedAt = c.lastAttemptAt } else { err = ferr } c.mu.Unlock() } c.mu.Lock() for host, hostURLs := range c.byHost { if host == domain || strings.HasSuffix(host, "."+domain) { urls = append(urls, hostURLs...) } } size = len(c.urls) fetchedAt = c.fetchedAt c.mu.Unlock() return urls, size, fetchedAt, err } func hostOfURL(s string) string { u, err := url.Parse(s) if err != nil { return "" } return strings.ToLower(u.Hostname()) }