nemubot/modules/news.py

"""Display latests news from a website"""

# PYTHON STUFFS #######################################################

import datetime
import re
from urllib.parse import urljoin

from bs4 import BeautifulSoup

from nemubot.exception import IMException
from nemubot.hooks import hook
from nemubot.tools import web

from nemubot.module.more import Response
from nemubot.module.urlreducer import reduce_inline
from nemubot.tools.feed import Feed, AtomEntry


# HELP ################################################################

def help_full():
    return "Display the latests news from a given URL: !news URL"


# MODULE CORE #########################################################

def find_rss_links(url):
    url = web.getNormalizedURL(url)
    soup = BeautifulSoup(web.getURLContent(url))
    for rss in soup.find_all('link', attrs={"type": re.compile("^application/(atom|rss)")}):
        yield urljoin(url, rss["href"])

def get_last_news(url):
    from xml.parsers.expat import ExpatError
    try:
        feed = Feed(web.getURLContent(url))
        return feed.entries
    except ExpatError:
        return []


# MODULE INTERFACE ####################################################

@hook.command("news")
def cmd_news(msg):
    if not len(msg.args):
        raise IMException("Indicate the URL to visit.")

    url = " ".join(msg.args)
    links = [x for x in find_rss_links(url)]
    if len(links) == 0: links = [ url ]

    res = Response(channel=msg.channel, nomore="No more news from %s" % url, line_treat=reduce_inline)
    for n in get_last_news(links[0]):
        res.append_message("%s published %s: %s %s" % (("\x02" + web.striphtml(n.title) + "\x0F") if n.title else "An article without title",
                                                       (n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else "someday") if isinstance(n, AtomEntry) else n.pubDate,
                                                       web.striphtml(n.summary) if n.summary else "",
                                                       n.link if n.link else ""))

    return res
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`"""Display latests news from a website"""`

			`# PYTHON STUFFS #######################################################`

			`import datetime`
			`import re`
			`from urllib.parse import urljoin`

			`from bs4 import BeautifulSoup`

Replace IRCException by IMException, as nemubot is not only built for IRC 2015-10-30 20:57:45 +00:00			`from nemubot.exception import IMException`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`from nemubot.hooks import hook`
			`from nemubot.tools import web`

Virtualy move all nemubot modules into nemubot.module.* hierarchy, to avoid conflict with system/vendor modules 2017-08-27 16:22:53 +00:00			`from nemubot.module.more import Response`
news: reduce link URL by default 2019-02-02 18:56:41 +00:00			`from nemubot.module.urlreducer import reduce_inline`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`from nemubot.tools.feed import Feed, AtomEntry`


			`# HELP ################################################################`

			`def help_full():`
			`return "Display the latests news from a given URL: !news URL"`


			`# MODULE CORE #########################################################`

			`def find_rss_links(url):`
[news] normalize URL before performing a join 2015-10-11 15:08:30 +00:00			`url = web.getNormalizedURL(url)`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`soup = BeautifulSoup(web.getURLContent(url))`
[news] Add support for RSS feeds and catch ExpatError when trying to parse a bad URL 2015-09-30 22:27:45 +00:00			`for rss in soup.find_all('link', attrs={"type": re.compile("^application/(atom\|rss)")}):`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`yield urljoin(url, rss["href"])`

			`def get_last_news(url):`
[news] Add support for RSS feeds and catch ExpatError when trying to parse a bad URL 2015-09-30 22:27:45 +00:00			`from xml.parsers.expat import ExpatError`
			`try:`
			`feed = Feed(web.getURLContent(url))`
			`return feed.entries`
			`except ExpatError:`
			`return []`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00

			`# MODULE INTERFACE ####################################################`

Refactors hooks registration 2015-11-02 19:19:12 +00:00			`@hook.command("news")`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`def cmd_news(msg):`
			`if not len(msg.args):`
Replace IRCException by IMException, as nemubot is not only built for IRC 2015-10-30 20:57:45 +00:00			`raise IMException("Indicate the URL to visit.")`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00
			`url = " ".join(msg.args)`
			`links = [x for x in find_rss_links(url)]`
			`if len(links) == 0: links = [ url ]`

news: reduce link URL by default 2019-02-02 18:56:41 +00:00			`res = Response(channel=msg.channel, nomore="No more news from %s" % url, line_treat=reduce_inline)`
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`for n in get_last_news(links[0]):`
			`res.append_message("%s published %s: %s %s" % (("\x02" + web.striphtml(n.title) + "\x0F") if n.title else "An article without title",`
			`(n.updated.strftime("on %A %d. %B %Y at %H:%M") if n.updated else "someday") if isinstance(n, AtomEntry) else n.pubDate,`
			`web.striphtml(n.summary) if n.summary else "",`
			`n.link if n.link else ""))`
news: reduce link URL by default 2019-02-02 18:56:41 +00:00
[news] Introduce new module News: it fetchs atom feed from a website and display it 2015-09-28 04:53:59 +00:00			`return res`