diff --git a/modules/youtube-title.py b/modules/youtube-title.py index 5142115..24cb82b 100644 --- a/modules/youtube-title.py +++ b/modules/youtube-title.py @@ -1,27 +1,72 @@ -import urllib.request +import re +from urllib.parse import urlparse from bs4 import BeautifulSoup + from nemubot.exception import IRCException from nemubot.hooks import hook from nemubot.tools.web import getURLContent from more import Response +"""Get information of youtube videos""" + nemubotversion = 3.4 -def help_tiny(): - return "Return the video title from a youtube link" - def help_full(): - return "No help " + return "!yt []: with an argument, get information about the given link; without arguments, use the latest youtube link seen on the current channel." + +LAST_URLS = dict() @hook("cmd_hook", "yt") def get_info_yt(msg): - if len(msg.args) <= 0: - raise IRCException("Please provide an URL from youtube.com") + links = list() - res = list() - for url in msg.args: - req = getURLContent(url) - soup = BeautifulSoup(req) - desc = soup.body.find(id='eow-title') - res.append(desc.text.strip()) - return Response(res, channel=msg.channel, nomore="No more description") + if len(msg.args) <= 0: + global LAST_URLS + if msg.channel in LAST_URLS and len(LAST_URLS[msg.channel]) > 0: + links.append(LAST_URLS[msg.channel].pop()) + else: + raise IRCException("I don't have any youtube URL for now, please provide me one to get information!") + else: + for url in msg.args: + links.append(url) + + titles = list() + descrip = list() + for url in links: + if not re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ ]+)", url): + url = "http://youtube.com/watch?v=" + url + soup = BeautifulSoup(getURLContent(url)) + shortlink = soup.head.find("link", rel="shortlink") + titl = soup.body.find(id='eow-title') + titles.append("%s : %s" % (shortlink["href"], titl.text.strip())) + desc = soup.body.find(id='eow-description') + descrip.append(desc.text.strip()) + res = Response(channel=msg.channel) + if len(titles) > 0: + res.append_message(titles) + for d in descrip: + res.append_message(d) + return res + + +@hook("msg_default") +def parselisten(msg): + parseresponse(msg) + return None + + +@hook("all_post") +def parseresponse(msg): + global LAST_URLS + urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ ]+)", msg.text) + for url in urls: + o = urlparse(url) + if o.scheme != "": + if o.netloc == "" and len(o.path) < 10: + continue + if (o.netloc == "youtube.com" or o.netloc == "www.youtube.com" or + o.netloc == "youtu.be" or o.netloc == "www.youtu.be"): + if msg.channel not in LAST_URLS: + LAST_URLS[msg.channel] = list() + LAST_URLS[msg.channel].append(url) + return msg