2015-09-28 04:53:59 +00:00
|
|
|
"""Display latests news from a website"""
|
|
|
|
|
|
|
|
# PYTHON STUFFS #######################################################
|
|
|
|
|
|
|
|
import datetime
|
|
|
|
import re
|
|
|
|
from urllib.parse import urljoin
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
2015-10-30 20:57:45 +00:00
|
|
|
from nemubot.exception import IMException
|
2015-09-28 04:53:59 +00:00
|
|
|
from nemubot.hooks import hook
|
|
|
|
from nemubot.tools import web
|
|
|
|
|
|
|
|
from more import Response
|
|
|
|
from nemubot.tools.feed import Feed, AtomEntry
|
|
|
|
|
|
|
|
|
|
|
|
# HELP ################################################################
|
|
|
|
|
|
|
|
def help_full():
|
|
|
|
return "Display the latests news from a given URL: !news URL"
|
|
|
|
|
|
|
|
|
|
|
|
# MODULE CORE #########################################################
|
|
|
|
|
|
|
|
def find_rss_links(url):
|
2015-10-11 15:08:30 +00:00
|
|
|
url = web.getNormalizedURL(url)
|
2015-09-28 04:53:59 +00:00
|
|
|
soup = BeautifulSoup(web.getURLContent(url))
|
2015-09-30 22:27:45 +00:00
|
|
|
for rss in soup.find_all('link', attrs={"type": re.compile("^application/(atom|rss)")}):
|
2015-09-28 04:53:59 +00:00
|
|
|
yield urljoin(url, rss["href"])
|
|
|
|
|
|
|
|
def get_last_news(url):
|
2015-09-30 22:27:45 +00:00
|
|
|
from xml.parsers.expat import ExpatError
|
|
|
|
try:
|
|
|
|
feed = Feed(web.getURLContent(url))
|
|
|
|
return feed.entries
|
|
|
|
except ExpatError:
|
|
|
|
return []
|
2015-09-28 04:53:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
# MODULE INTERFACE ####################################################
|
|
|
|
|
2015-11-02 19:19:12 +00:00
|
|
|
@hook.command("news")
|
2015-09-28 04:53:59 +00:00
|
|
|
def cmd_news(msg):
|
|
|
|
if not len(msg.args):
|
2015-10-30 20:57:45 +00:00
|
|
|
raise IMException("Indicate the URL to visit.")
|
2015-09-28 04:53:59 +00:00
|
|
|
|
|
|
|
url = " ".join(msg.args)
|
|
|
|
links = [x for x in find_rss_links(url)]
|
|
|
|
if len(links) == 0: links = [ url ]
|
|
|
|
|
|
|
|
res = Response(channel=msg.channel, nomore="No more news from %s" % url)
|
|
|
|
for n in get_last_news(links[0]):
|
|
|
|
res.append_message("%s published %s: %s %s" % (("\x02" + web.striphtml(n.title) + "\x0F") if n.title else "An article without title",
|
|
|
|
(n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else "someday") if isinstance(n, AtomEntry) else n.pubDate,
|
|
|
|
web.striphtml(n.summary) if n.summary else "",
|
|
|
|
n.link if n.link else ""))
|
|
|
|
return res
|