Change !yt to use youtube-dl

youtube-dl does a good job at extracting all the information from hundreds of website, presenting it in a standardized way and staying up-to-date. Signed-off-by: Ivan Delalande <colona@ycc.fr>
2015-06-11 07:06:44 +02:00 · 2015-06-11 07:06:44 +02:00 · 8a7ca25d6f
commit 8a7ca25d6f
parent 5f29fe8167
1 changed files with 44 additions and 22 deletions
--- a/modules/youtube-title.py
+++ b/modules/youtube-title.py
@ -1,6 +1,5 @@
 import re
 from urllib.parse import urlparse
-from bs4 import BeautifulSoup
+import re, json, subprocess
 from nemubot.exception import IRCException
 from nemubot.hooks import hook
@ -14,8 +13,45 @@ nemubotversion = 3.4
 def help_full():
  return "!yt [<url>]: with an argument, get information about the given link; without arguments, use the latest youtube link seen on the current channel."
 def _get_ytdl(links):
  cmd = 'youtube-dl -j --'.split()
  cmd.extend(links)
  res = []
  with subprocess.Popen(cmd, stdout=subprocess.PIPE) as p:
    if p.wait() > 0:
      raise IRCException("Error while retrieving video information.")
    for line in p.stdout.read().split(b"\n"):
      localres = ''
      if not line:
        continue
      info = json.loads(line.decode('utf-8'))
      if info.get('fulltitle'):
        localres += info['fulltitle']
      elif info.get('title'):
        localres += info['title']
      else:
        continue
      if info.get('duration'):
        d = info['duration']
        localres += ' [{0}:{1:06.3f}]'.format(int(d/60), d%60)
      if info.get('age_limit'):
        localres += ' [-{}]'.format(info['age_limit'])
      if info.get('uploader'):
        localres += ' by {}'.format(info['uploader'])
      if info.get('upload_date'):
        localres += ' on {}'.format(info['upload_date'])
      if info.get('description'):
        localres += ': ' +  info['description']
      if info.get('webpage_url'):
        localres += ' | ' +  info['webpage_url']
      res.append(localres)
  if not res:
    raise IRCException("No video information to retrieve about this. Sorry!")
  return res
 LAST_URLS = dict()
@hook("cmd_hook", "yt")
 def get_info_yt(msg):
  links = list()
@ -30,22 +66,10 @@ def get_info_yt(msg):
    for url in msg.args:
      links.append(url)
-  titles = list()
+  data = _get_ytdl(links)
  descrip = list()
  for url in links:
    if not re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ ]+)", url):
      url = "http://youtube.com/watch?v=" + url
    soup = BeautifulSoup(getURLContent(url))
    shortlink = soup.head.find("link", rel="shortlink")
    titl = soup.body.find(id='eow-title')
    titles.append("%s : %s" % (shortlink["href"], titl.text.strip()))
    desc = soup.body.find(id='eow-description')
    descrip.append(desc.text.strip())
  res = Response(channel=msg.channel)
-  if len(titles) > 0:
+  for msg in data:
-    res.append_message(titles)
+    res.append_message(msg)
    for d in descrip:
      res.append_message(d)
  return res
@ -64,8 +88,6 @@ def parseresponse(msg):
      if o.scheme != "":
        if o.netloc == "" and len(o.path) < 10:
          continue
        if (o.netloc == "youtube.com" or o.netloc == "www.youtube.com" or
            o.netloc == "youtu.be" or o.netloc == "www.youtu.be"):
        if msg.channel not in LAST_URLS:
          LAST_URLS[msg.channel] = list()
        LAST_URLS[msg.channel].append(url)