ingest: rotate recently-played tracks oldest-first when anti-repeat is exhausted

When every candidate is within the anti-repeat window, the fallback replayed
at random, ignoring how long ago each was heard. With a small feed (or a
window larger than a source's pool) this is the *normal* path, and random
picking clusters the same tracks together.

Play the least-recently-heard candidate instead, so tracks rotate at the
widest spacing the pool allows.

- db: add last_played_at(keys) -> key -> most-recent play timestamp.
- providers/listenbrainz: sort the exhausted pool oldest-first.
- scheduler: on exhaustion, return the oldest-played of the drawn candidates
  rather than the last one drawn.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
nemunaire 2026-07-04 16:41:43 +08:00
commit 49783218d8
3 changed files with 38 additions and 6 deletions

View file

@ -79,6 +79,26 @@ class Database:
).fetchall()
return {r["track_key"] for r in rows}
def last_played_at(self, keys: set[str]) -> dict[str, float]:
"""Map each of ``keys`` to the timestamp of its most recent play.
Keys never played are absent from the result (treat as played "at 0",
i.e. longest ago). Used by the anti-repeat fallback to play the
least-recently-heard candidate instead of a random one when every
candidate is within the recent window.
"""
if not keys:
return {}
placeholders = ",".join("?" * len(keys))
with self._lock:
rows = self._conn.execute(
"SELECT track_key, MAX(played_at) AS last"
f" FROM history WHERE track_key IN ({placeholders})"
" GROUP BY track_key",
tuple(keys),
).fetchall()
return {r["track_key"]: r["last"] for r in rows}
def recent_locators(self, limit: int) -> set[str]:
"""Raw backend locators recently played (providers' cheap local filter)."""
with self._lock:

View file

@ -103,8 +103,16 @@ class ListenBrainzProvider:
if not recs:
return None
recent = self._db.recent_keys(config.ANTIREPEAT_WINDOW)
pool = [r for r in recs if f"mbid:{r['mbid']}" not in recent] or list(recs)
random.shuffle(pool)
fresh = [r for r in recs if f"mbid:{r['mbid']}" not in recent]
if fresh:
random.shuffle(fresh) # genuinely unheard recently: order is free
pool = fresh
else:
# Every rec is within the recent window (small feed / large window):
# don't replay at random, march through them least-recently-heard
# first so each recurs at the widest spacing the feed allows.
last = self._db.last_played_at({f"mbid:{r['mbid']}" for r in recs})
pool = sorted(recs, key=lambda r: last.get(f"mbid:{r['mbid']}", 0.0))
for rec in pool:
track = self._resolve(rec)
if track is not None:

View file

@ -34,7 +34,7 @@ class Scheduler:
if not self._entries:
return None
recent = self._db.recent_keys(config.ANTIREPEAT_WINDOW)
last = None
drawn = []
for _ in range(config.SCHEDULER_MAX_TRIES):
track = self._pick()
if track is None:
@ -42,10 +42,14 @@ class Scheduler:
track = self._canonicalizer.canonicalize(track)
if track.key not in recent:
return track
last = track # recently played; try another
drawn.append(track) # recently played; try another
log.debug("skipping recent %s", track)
# Everything drawn was recent (e.g. tiny library): play the last anyway.
return last
# Every draw was recent (e.g. tiny library): don't just replay the last
# one drawn — play whichever of them we've gone longest without hearing.
if not drawn:
return None
last = self._db.last_played_at({t.key for t in drawn})
return min(drawn, key=lambda t: last.get(t.key, 0.0))
def _pick(self):
"""Weighted provider draw, falling through to the others when empty."""