ingest: give every track a source link
The player's "source" link only worked for direct yt-dlp URLs. Two other cases had no linkable page: ListenBrainz picks resolved via ytsearch1: (the locator is a search query) and Subsonic library tracks (an opaque song id). Centralise the rule in Track.page_url and cover both: the yt-dlp fetcher now records the concrete video URL it resolved into source_url, and a Subsonic track links to the stream's new /share endpoint, which asks ingest to mint a public share (createShare) on demand and redirects to it — so a share is only created when a listener actually clicks, never per played track. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d30f687185
commit
efd7307cc6
9 changed files with 154 additions and 18 deletions
|
|
@ -105,7 +105,7 @@ def _build_pipeline(db: Database, canonicalizer):
|
|||
|
||||
if not providers:
|
||||
log.warning("no source active: the stream plays its local cache only.")
|
||||
return providers, fetchers
|
||||
return providers, fetchers, subsonic_client
|
||||
|
||||
|
||||
def _sweep_temp_files() -> None:
|
||||
|
|
@ -152,12 +152,14 @@ def main() -> None:
|
|||
canonicalizer = _NullCanonicalizer()
|
||||
log.info("Canonicalizer disabled: tracks keyed by (artist, title).")
|
||||
|
||||
providers, fetchers = _build_pipeline(db, canonicalizer)
|
||||
providers, fetchers, subsonic_client = _build_pipeline(db, canonicalizer)
|
||||
scheduler = Scheduler(providers, canonicalizer, db)
|
||||
queue = TrackQueue(scheduler, fetchers, db)
|
||||
queue.start()
|
||||
|
||||
server = IngestServer((config.HTTP_HOST, config.HTTP_PORT), queue, db)
|
||||
server = IngestServer(
|
||||
(config.HTTP_HOST, config.HTTP_PORT), queue, db, subsonic=subsonic_client
|
||||
)
|
||||
log.info(
|
||||
"ingest listening on %s:%d (cache=%s, state=%s)",
|
||||
config.HTTP_HOST,
|
||||
|
|
|
|||
|
|
@ -16,11 +16,13 @@ import json
|
|||
import logging
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from pathlib import Path
|
||||
from urllib.parse import parse_qs, urlsplit
|
||||
|
||||
from . import config
|
||||
from .db import Database
|
||||
from .models import Track
|
||||
from .queue import TrackQueue
|
||||
from .subsonic import SubsonicClient
|
||||
|
||||
log = logging.getLogger("radieo.api")
|
||||
|
||||
|
|
@ -39,18 +41,24 @@ def annotate_uri(path: Path, track: Track) -> str:
|
|||
f'origin="{esc(track.origin)}"',
|
||||
]
|
||||
# Web page the track was pulled from, so the player can link back to the
|
||||
# source. Only http(s) locators qualify (yt-dlp tracks); a Subsonic song id
|
||||
# is opaque and points at no public page.
|
||||
if track.locator.startswith(("http://", "https://")):
|
||||
fields.append(f'url="{esc(track.locator)}"')
|
||||
# source (see Track.page_url for how it's derived per backend).
|
||||
if track.page_url is not None:
|
||||
fields.append(f'url="{esc(track.page_url)}"')
|
||||
return f'annotate:{",".join(fields)}:{path}'
|
||||
|
||||
|
||||
class IngestServer(ThreadingHTTPServer):
|
||||
def __init__(self, address, queue: TrackQueue, db: Database):
|
||||
def __init__(
|
||||
self,
|
||||
address,
|
||||
queue: TrackQueue,
|
||||
db: Database,
|
||||
subsonic: SubsonicClient | None = None,
|
||||
):
|
||||
super().__init__(address, _Handler)
|
||||
self.queue = queue
|
||||
self.db = db
|
||||
self.subsonic = subsonic
|
||||
|
||||
|
||||
class _Handler(BaseHTTPRequestHandler):
|
||||
|
|
@ -70,6 +78,35 @@ class _Handler(BaseHTTPRequestHandler):
|
|||
else:
|
||||
self._text(404, "not found\n")
|
||||
|
||||
def do_POST(self): # noqa: N802 (name imposed by BaseHTTPRequestHandler)
|
||||
parsed = urlsplit(self.path)
|
||||
if parsed.path == "/share":
|
||||
self._serve_share(parse_qs(parsed.query))
|
||||
else:
|
||||
self._text(404, "not found\n")
|
||||
|
||||
def _serve_share(self, query: dict[str, list[str]]):
|
||||
# Mint a public Subsonic share for one song id, on demand. Called by the
|
||||
# stream when a listener clicks a subsonic track's source link, so no
|
||||
# share is created for tracks nobody opens.
|
||||
client = self.server.subsonic
|
||||
if client is None:
|
||||
self._text(503, "subsonic not configured\n")
|
||||
return
|
||||
song_id = (query.get("id") or [""])[0]
|
||||
if not song_id:
|
||||
self._text(400, "missing id\n")
|
||||
return
|
||||
try:
|
||||
url = client.create_share(song_id)
|
||||
except Exception as exc: # sharing disabled, network error, bad id…
|
||||
log.warning("createShare failed for %s: %s", song_id, exc)
|
||||
self._text(502, "share unavailable\n")
|
||||
return
|
||||
self._text(
|
||||
200, json.dumps({"url": url}) + "\n", "application/json; charset=utf-8"
|
||||
)
|
||||
|
||||
def _serve_next(self):
|
||||
result = self.server.queue.pop_next()
|
||||
if result is None:
|
||||
|
|
|
|||
|
|
@ -44,4 +44,6 @@ class SubsonicFetcher:
|
|||
raise
|
||||
log.info("downloaded %s -> %s", track, dest.name)
|
||||
# Subsonic files are already tagged by the library server; pass through.
|
||||
# The source link is minted lazily when a listener clicks it — see the
|
||||
# /share endpoint — so no share is created here.
|
||||
return dest, track
|
||||
|
|
|
|||
|
|
@ -22,6 +22,22 @@ from ..models import Track
|
|||
log = logging.getLogger("radieo.fetcher.ytdlp")
|
||||
|
||||
|
||||
def _media_url(info: dict) -> str | None:
|
||||
"""The concrete media page URL yt-dlp actually resolved.
|
||||
|
||||
A ``ytsearch1:`` query resolves to a playlist whose single entry is the
|
||||
chosen video; its ``webpage_url`` is the real, linkable page (the search
|
||||
query string itself is not). Returns None when no http(s) URL is available.
|
||||
"""
|
||||
entries = info.get("entries")
|
||||
if entries:
|
||||
info = entries[0] or {}
|
||||
url = info.get("webpage_url") or info.get("original_url")
|
||||
if url and url.startswith(("http://", "https://")):
|
||||
return url
|
||||
return None
|
||||
|
||||
|
||||
def cache_stem(locator: str) -> str:
|
||||
"""Cache filename stem for a yt-dlp locator (shared with the provider)."""
|
||||
h = hashlib.sha1(locator.encode()).hexdigest()[:16]
|
||||
|
|
@ -79,7 +95,16 @@ class YtdlpFetcher:
|
|||
leftover.unlink(missing_ok=True)
|
||||
raise
|
||||
log.info("downloaded %s -> %s", track, dest.name)
|
||||
return dest, self._retag(dest, info, track)
|
||||
result = self._retag(dest, info, track)
|
||||
# A ``ytsearch1:`` locator (ListenBrainz picks resolved to yt-dlp) is not
|
||||
# a linkable page. yt-dlp did resolve a concrete video, though, so record
|
||||
# its real URL for the player's "source" link — but only when the locator
|
||||
# itself isn't already an http page (direct URLs link to themselves).
|
||||
if not track.locator.startswith(("http://", "https://")):
|
||||
resolved = _media_url(info)
|
||||
if resolved is not None:
|
||||
result = replace(result, source_url=resolved)
|
||||
return dest, result
|
||||
|
||||
def _retag(self, dest: Path, info: dict, track: Track) -> Track:
|
||||
"""For bandcamp downloads, ensure sane ID3 tags and refine the Track.
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ de-duplication and anti-repeat.
|
|||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from urllib.parse import quote
|
||||
|
||||
_WS = re.compile(r"\s+")
|
||||
|
||||
|
|
@ -43,5 +44,23 @@ class Track:
|
|||
return f"mbid:{self.mbid}"
|
||||
return f"name:{norm_name(self.artist)}|{norm_name(self.title)}"
|
||||
|
||||
@property
|
||||
def page_url(self) -> str | None:
|
||||
"""A link a listener can open for this track, or None.
|
||||
|
||||
- a direct yt-dlp URL is its own page (the locator);
|
||||
- a ListenBrainz pick resolved via ``ytsearch1:`` links to the resolved
|
||||
video URL the fetcher recorded in ``source_url``;
|
||||
- a Subsonic song id is opaque, so it links to the stream's ``/share``
|
||||
endpoint, which mints a public share on demand (only when clicked) and
|
||||
redirects to it — avoiding a share per played track.
|
||||
"""
|
||||
for candidate in (self.locator, self.source_url):
|
||||
if candidate and candidate.startswith(("http://", "https://")):
|
||||
return candidate
|
||||
if self.backend == "subsonic":
|
||||
return f"/share?song={quote(self.locator, safe='')}"
|
||||
return None
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.artist} — {self.title} [{self.origin}]"
|
||||
|
|
|
|||
|
|
@ -97,8 +97,8 @@ class TrackQueue:
|
|||
"artist": track.artist,
|
||||
"origin": track.origin,
|
||||
}
|
||||
if track.locator.startswith(("http://", "https://")):
|
||||
entry["url"] = track.locator
|
||||
if track.page_url is not None:
|
||||
entry["url"] = track.page_url
|
||||
items.append(entry)
|
||||
return items
|
||||
|
||||
|
|
|
|||
|
|
@ -103,6 +103,20 @@ class SubsonicClient:
|
|||
)
|
||||
return body.get("searchResult3", {}).get("song", [])
|
||||
|
||||
def create_share(self, song_id: str) -> str:
|
||||
"""Create a public share for a song and return its web URL.
|
||||
|
||||
Needs sharing enabled on the server (Navidrome: ``ND_ENABLESHARING=true``);
|
||||
otherwise the server replies with an error, raised as ``SubsonicError``.
|
||||
Each call creates a *new* share, so callers should reuse the returned URL
|
||||
rather than re-sharing the same song.
|
||||
"""
|
||||
body = self._get_json("createShare", id=song_id)
|
||||
shares = body.get("shares", {}).get("share", [])
|
||||
if not shares or not shares[0].get("url"):
|
||||
raise SubsonicError(f"createShare {song_id}: no share url returned")
|
||||
return shares[0]["url"]
|
||||
|
||||
def download(self, song_id: str, dest: Path, hint_ext: str | None = None) -> str:
|
||||
"""Download a song to ``dest``; return the file extension used.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue