ingest: give every track a source link

The player's "source" link only worked for direct yt-dlp URLs. Two other
cases had no linkable page: ListenBrainz picks resolved via ytsearch1: (the
locator is a search query) and Subsonic library tracks (an opaque song id).

Centralise the rule in Track.page_url and cover both: the yt-dlp fetcher now
records the concrete video URL it resolved into source_url, and a Subsonic
track links to the stream's new /share endpoint, which asks ingest to mint a
public share (createShare) on demand and redirects to it — so a share is only
created when a listener actually clicks, never per played track.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
nemunaire 2026-07-04 11:42:29 +08:00
commit efd7307cc6
9 changed files with 154 additions and 18 deletions

View file

@ -105,7 +105,7 @@ def _build_pipeline(db: Database, canonicalizer):
if not providers:
log.warning("no source active: the stream plays its local cache only.")
return providers, fetchers
return providers, fetchers, subsonic_client
def _sweep_temp_files() -> None:
@ -152,12 +152,14 @@ def main() -> None:
canonicalizer = _NullCanonicalizer()
log.info("Canonicalizer disabled: tracks keyed by (artist, title).")
providers, fetchers = _build_pipeline(db, canonicalizer)
providers, fetchers, subsonic_client = _build_pipeline(db, canonicalizer)
scheduler = Scheduler(providers, canonicalizer, db)
queue = TrackQueue(scheduler, fetchers, db)
queue.start()
server = IngestServer((config.HTTP_HOST, config.HTTP_PORT), queue, db)
server = IngestServer(
(config.HTTP_HOST, config.HTTP_PORT), queue, db, subsonic=subsonic_client
)
log.info(
"ingest listening on %s:%d (cache=%s, state=%s)",
config.HTTP_HOST,

View file

@ -16,11 +16,13 @@ import json
import logging
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from urllib.parse import parse_qs, urlsplit
from . import config
from .db import Database
from .models import Track
from .queue import TrackQueue
from .subsonic import SubsonicClient
log = logging.getLogger("radieo.api")
@ -39,18 +41,24 @@ def annotate_uri(path: Path, track: Track) -> str:
f'origin="{esc(track.origin)}"',
]
# Web page the track was pulled from, so the player can link back to the
# source. Only http(s) locators qualify (yt-dlp tracks); a Subsonic song id
# is opaque and points at no public page.
if track.locator.startswith(("http://", "https://")):
fields.append(f'url="{esc(track.locator)}"')
# source (see Track.page_url for how it's derived per backend).
if track.page_url is not None:
fields.append(f'url="{esc(track.page_url)}"')
return f'annotate:{",".join(fields)}:{path}'
class IngestServer(ThreadingHTTPServer):
def __init__(self, address, queue: TrackQueue, db: Database):
def __init__(
self,
address,
queue: TrackQueue,
db: Database,
subsonic: SubsonicClient | None = None,
):
super().__init__(address, _Handler)
self.queue = queue
self.db = db
self.subsonic = subsonic
class _Handler(BaseHTTPRequestHandler):
@ -70,6 +78,35 @@ class _Handler(BaseHTTPRequestHandler):
else:
self._text(404, "not found\n")
def do_POST(self): # noqa: N802 (name imposed by BaseHTTPRequestHandler)
parsed = urlsplit(self.path)
if parsed.path == "/share":
self._serve_share(parse_qs(parsed.query))
else:
self._text(404, "not found\n")
def _serve_share(self, query: dict[str, list[str]]):
# Mint a public Subsonic share for one song id, on demand. Called by the
# stream when a listener clicks a subsonic track's source link, so no
# share is created for tracks nobody opens.
client = self.server.subsonic
if client is None:
self._text(503, "subsonic not configured\n")
return
song_id = (query.get("id") or [""])[0]
if not song_id:
self._text(400, "missing id\n")
return
try:
url = client.create_share(song_id)
except Exception as exc: # sharing disabled, network error, bad id…
log.warning("createShare failed for %s: %s", song_id, exc)
self._text(502, "share unavailable\n")
return
self._text(
200, json.dumps({"url": url}) + "\n", "application/json; charset=utf-8"
)
def _serve_next(self):
result = self.server.queue.pop_next()
if result is None:

View file

@ -44,4 +44,6 @@ class SubsonicFetcher:
raise
log.info("downloaded %s -> %s", track, dest.name)
# Subsonic files are already tagged by the library server; pass through.
# The source link is minted lazily when a listener clicks it — see the
# /share endpoint — so no share is created here.
return dest, track

View file

@ -22,6 +22,22 @@ from ..models import Track
log = logging.getLogger("radieo.fetcher.ytdlp")
def _media_url(info: dict) -> str | None:
"""The concrete media page URL yt-dlp actually resolved.
A ``ytsearch1:`` query resolves to a playlist whose single entry is the
chosen video; its ``webpage_url`` is the real, linkable page (the search
query string itself is not). Returns None when no http(s) URL is available.
"""
entries = info.get("entries")
if entries:
info = entries[0] or {}
url = info.get("webpage_url") or info.get("original_url")
if url and url.startswith(("http://", "https://")):
return url
return None
def cache_stem(locator: str) -> str:
"""Cache filename stem for a yt-dlp locator (shared with the provider)."""
h = hashlib.sha1(locator.encode()).hexdigest()[:16]
@ -79,7 +95,16 @@ class YtdlpFetcher:
leftover.unlink(missing_ok=True)
raise
log.info("downloaded %s -> %s", track, dest.name)
return dest, self._retag(dest, info, track)
result = self._retag(dest, info, track)
# A ``ytsearch1:`` locator (ListenBrainz picks resolved to yt-dlp) is not
# a linkable page. yt-dlp did resolve a concrete video, though, so record
# its real URL for the player's "source" link — but only when the locator
# itself isn't already an http page (direct URLs link to themselves).
if not track.locator.startswith(("http://", "https://")):
resolved = _media_url(info)
if resolved is not None:
result = replace(result, source_url=resolved)
return dest, result
def _retag(self, dest: Path, info: dict, track: Track) -> Track:
"""For bandcamp downloads, ensure sane ID3 tags and refine the Track.

View file

@ -8,6 +8,7 @@ de-duplication and anti-repeat.
import re
from dataclasses import dataclass
from urllib.parse import quote
_WS = re.compile(r"\s+")
@ -43,5 +44,23 @@ class Track:
return f"mbid:{self.mbid}"
return f"name:{norm_name(self.artist)}|{norm_name(self.title)}"
@property
def page_url(self) -> str | None:
"""A link a listener can open for this track, or None.
- a direct yt-dlp URL is its own page (the locator);
- a ListenBrainz pick resolved via ``ytsearch1:`` links to the resolved
video URL the fetcher recorded in ``source_url``;
- a Subsonic song id is opaque, so it links to the stream's ``/share``
endpoint, which mints a public share on demand (only when clicked) and
redirects to it avoiding a share per played track.
"""
for candidate in (self.locator, self.source_url):
if candidate and candidate.startswith(("http://", "https://")):
return candidate
if self.backend == "subsonic":
return f"/share?song={quote(self.locator, safe='')}"
return None
def __str__(self) -> str:
return f"{self.artist}{self.title} [{self.origin}]"

View file

@ -97,8 +97,8 @@ class TrackQueue:
"artist": track.artist,
"origin": track.origin,
}
if track.locator.startswith(("http://", "https://")):
entry["url"] = track.locator
if track.page_url is not None:
entry["url"] = track.page_url
items.append(entry)
return items

View file

@ -103,6 +103,20 @@ class SubsonicClient:
)
return body.get("searchResult3", {}).get("song", [])
def create_share(self, song_id: str) -> str:
"""Create a public share for a song and return its web URL.
Needs sharing enabled on the server (Navidrome: ``ND_ENABLESHARING=true``);
otherwise the server replies with an error, raised as ``SubsonicError``.
Each call creates a *new* share, so callers should reuse the returned URL
rather than re-sharing the same song.
"""
body = self._get_json("createShare", id=song_id)
shares = body.get("shares", {}).get("share", [])
if not shares or not shares[0].get("url"):
raise SubsonicError(f"createShare {song_id}: no share url returned")
return shares[0]["url"]
def download(self, song_id: str, dest: Path, hint_ext: str | None = None) -> str:
"""Download a song to ``dest``; return the file extension used.