1
0
Fork 0
nemubot/modules/networking/watchWebsite.py

224 lines
6.8 KiB
Python

"""Alert on changes on websites"""
import logging
from random import randint
import urllib.parse
from urllib.parse import urlparse
from nemubot.event import ModuleEvent
from nemubot.exception import IMException
from nemubot.tools.web import getNormalizedURL
from nemubot.tools.xmlparser.node import ModuleState
logger = logging.getLogger("nemubot.module.networking.watchWebsite")
from nemubot.module.more import Response
from . import page
DATAS = None
def load(datas):
"""Register events on watched website"""
global DATAS
DATAS = datas
DATAS.setIndex("url", "watch")
for site in DATAS.getNodes("watch"):
if site.hasNode("alert"):
start_watching(site, randint(-30, 30))
else:
print("No alert defined for this site: " + site["url"])
#DATAS.delChild(site)
def watchedon(channel):
"""Get a list of currently watched URL on the given channel.
"""
res = list()
for site in DATAS.getNodes("watch"):
if site.hasNode("alert"):
for a in site.getNodes("alert"):
if a["channel"] == channel:
res.append("%s (%s)" % (site["url"], site["type"]))
break
return res
def del_site(url, nick, channel, frm_owner):
"""Remove a site from watching list
Argument:
url -- URL to unwatch
"""
o = urlparse(getNormalizedURL(url), "http")
if o.scheme != "" and url in DATAS.index:
site = DATAS.index[url]
for a in site.getNodes("alert"):
if a["channel"] == channel:
# if not (nick == a["nick"] or frm_owner):
# raise IMException("you cannot unwatch this URL.")
site.delChild(a)
if not site.hasNode("alert"):
del_event(site["_evt_id"])
DATAS.delChild(site)
save()
return Response("I don't watch this URL anymore.",
channel=channel, nick=nick)
raise IMException("I didn't watch this URL!")
def add_site(url, nick, channel, server, diffType="diff"):
"""Add a site to watching list
Argument:
url -- URL to watch
"""
o = urlparse(getNormalizedURL(url), "http")
if o.netloc == "":
raise IMException("sorry, I can't watch this URL :(")
alert = ModuleState("alert")
alert["nick"] = nick
alert["server"] = server
alert["channel"] = channel
alert["message"] = "{url} just changed!"
if url not in DATAS.index:
watch = ModuleState("watch")
watch["type"] = diffType
watch["url"] = url
watch["time"] = 123
DATAS.addChild(watch)
watch.addChild(alert)
start_watching(watch)
else:
DATAS.index[url].addChild(alert)
save()
return Response(channel=channel, nick=nick,
message="this site is now under my supervision.")
def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
"""Format and send response for given site
Argument:
site -- DATAS structure representing a site to watch
Keyword arguments:
link -- link to the content
title -- for ATOM feed: title of the new article
categ -- for ATOM feed: category of the new article
content -- content of the page/new article
"""
for a in site.getNodes("alert"):
send_response(a["server"],
Response(a["message"].format(url=site["url"],
link=link,
title=title,
categ=categ,
content=content),
channel=a["channel"],
server=a["server"]))
def alert_change(content, site):
"""Function called when a change is detected on a given site
Arguments:
content -- The new content
site -- DATAS structure representing a site to watch
"""
if site["type"] == "updown":
if site["lastcontent"] is None:
site["lastcontent"] = content is not None
if (content is not None) != site.getBool("lastcontent"):
format_response(site, link=site["url"])
site["lastcontent"] = content is not None
start_watching(site)
return
if content is None:
start_watching(site)
return
if site["type"] == "atom":
from nemubot.tools.feed import Feed
if site["_lastpage"] is None:
if site["lastcontent"] is None or site["lastcontent"] == "":
site["lastcontent"] = content
site["_lastpage"] = Feed(site["lastcontent"])
try:
page = Feed(content)
except:
print("An error occurs during Atom parsing. Restart event...")
start_watching(site)
return
diff = site["_lastpage"] & page
if len(diff) > 0:
site["_lastpage"] = page
diff.reverse()
for d in diff:
site.setIndex("term", "category")
categories = site.index
if len(categories) > 0:
if d.category is None or d.category not in categories:
format_response(site, link=d.link, categ=categories[""]["part"], title=d.title)
else:
format_response(site, link=d.link, categ=categories[d.category]["part"], title=d.title)
else:
format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
else:
start_watching(site)
return # Stop here, no changes, so don't save
else: # Just looking for any changes
format_response(site, link=site["url"], content=content)
site["lastcontent"] = content
start_watching(site)
save()
def fwatch(url):
cnt = page.fetch(url, None)
if cnt is not None:
render = page._render(cnt)
if render is None or render == "":
return cnt
return render
return None
def start_watching(site, offset=0):
"""Launch the event watching given site
Argument:
site -- DATAS structure representing a site to watch
Keyword argument:
offset -- offset time to delay the launch of the first check
"""
o = urlparse(getNormalizedURL(site["url"]), "http")
#print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
try:
evt = ModuleEvent(func=fwatch,
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)
except IMException:
logger.exception("Unable to watch %s", site["url"])