2015-05-30 13:17:46 +00:00
import re
from urllib . parse import urlparse
2015-05-29 11:54:00 +00:00
from bs4 import BeautifulSoup
2015-05-30 13:17:46 +00:00
2015-05-29 11:54:00 +00:00
from nemubot . exception import IRCException
from nemubot . hooks import hook
from nemubot . tools . web import getURLContent
from more import Response
2015-05-30 13:17:46 +00:00
""" Get information of youtube videos """
2015-05-29 11:54:00 +00:00
2015-05-30 13:17:46 +00:00
nemubotversion = 3.4
2015-05-29 11:54:00 +00:00
def help_full ( ) :
2015-05-30 13:17:46 +00:00
return " !yt [<url>]: with an argument, get information about the given link; without arguments, use the latest youtube link seen on the current channel. "
LAST_URLS = dict ( )
2015-05-29 11:54:00 +00:00
@hook ( " cmd_hook " , " yt " )
def get_info_yt ( msg ) :
2015-05-30 13:17:46 +00:00
links = list ( )
2015-05-29 11:54:00 +00:00
if len ( msg . args ) < = 0 :
2015-05-30 13:17:46 +00:00
global LAST_URLS
if msg . channel in LAST_URLS and len ( LAST_URLS [ msg . channel ] ) > 0 :
links . append ( LAST_URLS [ msg . channel ] . pop ( ) )
else :
raise IRCException ( " I don ' t have any youtube URL for now, please provide me one to get information! " )
else :
for url in msg . args :
links . append ( url )
titles = list ( )
descrip = list ( )
for url in links :
if not re . findall ( " ([a-zA-Z0-9+.-]+:(?://)?[^ ]+) " , url ) :
url = " http://youtube.com/watch?v= " + url
soup = BeautifulSoup ( getURLContent ( url ) )
shortlink = soup . head . find ( " link " , rel = " shortlink " )
titl = soup . body . find ( id = ' eow-title ' )
titles . append ( " %s : %s " % ( shortlink [ " href " ] , titl . text . strip ( ) ) )
desc = soup . body . find ( id = ' eow-description ' )
descrip . append ( desc . text . strip ( ) )
res = Response ( channel = msg . channel )
if len ( titles ) > 0 :
res . append_message ( titles )
for d in descrip :
res . append_message ( d )
return res
@hook ( " msg_default " )
def parselisten ( msg ) :
parseresponse ( msg )
return None
@hook ( " all_post " )
def parseresponse ( msg ) :
global LAST_URLS
urls = re . findall ( " ([a-zA-Z0-9+.-]+:(?://)?[^ ]+) " , msg . text )
for url in urls :
o = urlparse ( url )
if o . scheme != " " :
if o . netloc == " " and len ( o . path ) < 10 :
continue
if ( o . netloc == " youtube.com " or o . netloc == " www.youtube.com " or
o . netloc == " youtu.be " or o . netloc == " www.youtu.be " ) :
if msg . channel not in LAST_URLS :
LAST_URLS [ msg . channel ] = list ( )
LAST_URLS [ msg . channel ] . append ( url )
return msg