[yt] Improve module: track last video URL

This commit is contained in:
nemunaire 2015-05-30 15:17:46 +02:00
parent b3274c0dc7
commit 63a6654331

View File

@ -1,27 +1,72 @@
import urllib.request
import re
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from nemubot.exception import IRCException
from nemubot.hooks import hook
from nemubot.tools.web import getURLContent
from more import Response
"""Get information of youtube videos"""
nemubotversion = 3.4
def help_tiny():
return "Return the video title from a youtube link"
def help_full():
return "No help "
return "!yt [<url>]: with an argument, get information about the given link; without arguments, use the latest youtube link seen on the current channel."
LAST_URLS = dict()
@hook("cmd_hook", "yt")
def get_info_yt(msg):
if len(msg.args) <= 0:
raise IRCException("Please provide an URL from youtube.com")
links = list()
res = list()
for url in msg.args:
req = getURLContent(url)
soup = BeautifulSoup(req)
desc = soup.body.find(id='eow-title')
res.append(desc.text.strip())
return Response(res, channel=msg.channel, nomore="No more description")
if len(msg.args) <= 0:
global LAST_URLS
if msg.channel in LAST_URLS and len(LAST_URLS[msg.channel]) > 0:
links.append(LAST_URLS[msg.channel].pop())
else:
raise IRCException("I don't have any youtube URL for now, please provide me one to get information!")
else:
for url in msg.args:
links.append(url)
titles = list()
descrip = list()
for url in links:
if not re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ ]+)", url):
url = "http://youtube.com/watch?v=" + url
soup = BeautifulSoup(getURLContent(url))
shortlink = soup.head.find("link", rel="shortlink")
titl = soup.body.find(id='eow-title')
titles.append("%s : %s" % (shortlink["href"], titl.text.strip()))
desc = soup.body.find(id='eow-description')
descrip.append(desc.text.strip())
res = Response(channel=msg.channel)
if len(titles) > 0:
res.append_message(titles)
for d in descrip:
res.append_message(d)
return res
@hook("msg_default")
def parselisten(msg):
parseresponse(msg)
return None
@hook("all_post")
def parseresponse(msg):
global LAST_URLS
urls = re.findall("([a-zA-Z0-9+.-]+:(?://)?[^ ]+)", msg.text)
for url in urls:
o = urlparse(url)
if o.scheme != "":
if o.netloc == "" and len(o.path) < 10:
continue
if (o.netloc == "youtube.com" or o.netloc == "www.youtube.com" or
o.netloc == "youtu.be" or o.netloc == "www.youtu.be"):
if msg.channel not in LAST_URLS:
LAST_URLS[msg.channel] = list()
LAST_URLS[msg.channel].append(url)
return msg