"""Alert on changes on websites""" import logging from random import randint import urllib.parse from urllib.parse import urlparse from nemubot.event import ModuleEvent from nemubot.exception import IMException from nemubot.tools.web import getNormalizedURL from nemubot.tools.xmlparser.node import ModuleState logger = logging.getLogger("nemubot.module.networking.watchWebsite") from nemubot.module.more import Response from . import page DATAS = None def load(datas): """Register events on watched website""" global DATAS DATAS = datas DATAS.setIndex("url", "watch") for site in DATAS.getNodes("watch"): if site.hasNode("alert"): start_watching(site, randint(-30, 30)) else: print("No alert defined for this site: " + site["url"]) #DATAS.delChild(site) def watchedon(channel): """Get a list of currently watched URL on the given channel. """ res = list() for site in DATAS.getNodes("watch"): if site.hasNode("alert"): for a in site.getNodes("alert"): if a["channel"] == channel: res.append("%s (%s)" % (site["url"], site["type"])) break return res def del_site(url, nick, channel, frm_owner): """Remove a site from watching list Argument: url -- URL to unwatch """ o = urlparse(getNormalizedURL(url), "http") if o.scheme != "" and url in DATAS.index: site = DATAS.index[url] for a in site.getNodes("alert"): if a["channel"] == channel: # if not (nick == a["nick"] or frm_owner): # raise IMException("you cannot unwatch this URL.") site.delChild(a) if not site.hasNode("alert"): del_event(site["_evt_id"]) DATAS.delChild(site) save() return Response("I don't watch this URL anymore.", channel=channel, nick=nick) raise IMException("I didn't watch this URL!") def add_site(url, nick, channel, server, diffType="diff"): """Add a site to watching list Argument: url -- URL to watch """ o = urlparse(getNormalizedURL(url), "http") if o.netloc == "": raise IMException("sorry, I can't watch this URL :(") alert = ModuleState("alert") alert["nick"] = nick alert["server"] = server alert["channel"] = channel alert["message"] = "{url} just changed!" if url not in DATAS.index: watch = ModuleState("watch") watch["type"] = diffType watch["url"] = url watch["time"] = 123 DATAS.addChild(watch) watch.addChild(alert) start_watching(watch) else: DATAS.index[url].addChild(alert) save() return Response(channel=channel, nick=nick, message="this site is now under my supervision.") def format_response(site, link='%s', title='%s', categ='%s', content='%s'): """Format and send response for given site Argument: site -- DATAS structure representing a site to watch Keyword arguments: link -- link to the content title -- for ATOM feed: title of the new article categ -- for ATOM feed: category of the new article content -- content of the page/new article """ for a in site.getNodes("alert"): send_response(a["server"], Response(a["message"].format(url=site["url"], link=link, title=title, categ=categ, content=content), channel=a["channel"], server=a["server"])) def alert_change(content, site): """Function called when a change is detected on a given site Arguments: content -- The new content site -- DATAS structure representing a site to watch """ if site["type"] == "updown": if site["lastcontent"] is None: site["lastcontent"] = content is not None if (content is not None) != site.getBool("lastcontent"): format_response(site, link=site["url"]) site["lastcontent"] = content is not None start_watching(site) return if content is None: start_watching(site) return if site["type"] == "atom": from nemubot.tools.feed import Feed if site["_lastpage"] is None: if site["lastcontent"] is None or site["lastcontent"] == "": site["lastcontent"] = content site["_lastpage"] = Feed(site["lastcontent"]) try: page = Feed(content) except: print("An error occurs during Atom parsing. Restart event...") start_watching(site) return diff = site["_lastpage"] & page if len(diff) > 0: site["_lastpage"] = page diff.reverse() for d in diff: site.setIndex("term", "category") categories = site.index if len(categories) > 0: if d.category is None or d.category not in categories: format_response(site, link=d.link, categ=categories[""]["part"], title=d.title) else: format_response(site, link=d.link, categ=categories[d.category]["part"], title=d.title) else: format_response(site, link=d.link, title=urllib.parse.unquote(d.title)) else: start_watching(site) return # Stop here, no changes, so don't save else: # Just looking for any changes format_response(site, link=site["url"], content=content) site["lastcontent"] = content start_watching(site) save() def fwatch(url): cnt = page.fetch(url, None) if cnt is not None: render = page._render(cnt) if render is None or render == "": return cnt return render return None def start_watching(site, offset=0): """Launch the event watching given site Argument: site -- DATAS structure representing a site to watch Keyword argument: offset -- offset time to delay the launch of the first check """ o = urlparse(getNormalizedURL(site["url"]), "http") #print_debug("Add %s event for site: %s" % (site["type"], o.netloc)) try: evt = ModuleEvent(func=fwatch, cmp_data=site["lastcontent"], func_data=site["url"], offset=offset, interval=site.getInt("time"), call=alert_change, call_data=site) site["_evt_id"] = add_event(evt) except IMException: logger.exception("Unable to watch %s", site["url"])