2014-08-27 23:39:31 +00:00
|
|
|
"""Alert on changes on websites"""
|
|
|
|
|
2014-09-19 17:28:48 +00:00
|
|
|
from random import randint
|
2012-11-07 17:55:17 +00:00
|
|
|
import urllib.parse
|
|
|
|
from urllib.parse import urlparse
|
2012-06-29 17:20:29 +00:00
|
|
|
|
2015-01-04 22:57:09 +00:00
|
|
|
from nemubot.event import ModuleEvent
|
|
|
|
from nemubot.exception import IRCException
|
2015-01-03 19:34:44 +00:00
|
|
|
from nemubot.hooks import hook
|
|
|
|
from nemubot.tools.xmlparser.node import ModuleState
|
2014-08-12 18:10:19 +00:00
|
|
|
|
2014-09-26 16:00:22 +00:00
|
|
|
nemubotversion = 3.4
|
|
|
|
|
2015-01-03 19:34:44 +00:00
|
|
|
from more import Response
|
|
|
|
|
2012-09-01 14:16:31 +00:00
|
|
|
from .atom import Atom
|
2014-12-04 20:51:28 +00:00
|
|
|
from . import page
|
2012-06-29 17:20:29 +00:00
|
|
|
|
2014-12-29 06:50:27 +00:00
|
|
|
DATAS = None
|
2012-06-29 17:20:29 +00:00
|
|
|
|
2014-12-29 06:50:27 +00:00
|
|
|
|
|
|
|
def load(datas):
|
2014-12-04 20:51:28 +00:00
|
|
|
"""Register events on watched website"""
|
2014-12-29 06:50:27 +00:00
|
|
|
|
|
|
|
global DATAS
|
|
|
|
DATAS = datas
|
|
|
|
|
2012-11-07 17:55:17 +00:00
|
|
|
DATAS.setIndex("url", "watch")
|
2012-09-01 14:16:31 +00:00
|
|
|
for site in DATAS.getNodes("watch"):
|
2012-12-21 10:31:03 +00:00
|
|
|
if site.hasNode("alert"):
|
2014-09-19 17:28:48 +00:00
|
|
|
start_watching(site, randint(-30, 30))
|
2012-12-21 10:31:03 +00:00
|
|
|
else:
|
|
|
|
print("No alert defined for this site: " + site["url"])
|
|
|
|
#DATAS.delChild(site)
|
2012-09-01 14:16:31 +00:00
|
|
|
|
|
|
|
|
2014-12-29 06:50:27 +00:00
|
|
|
def del_site(url, nick, channel, frm_owner):
|
2014-12-04 20:51:28 +00:00
|
|
|
"""Remove a site from watching list
|
2012-09-01 14:16:31 +00:00
|
|
|
|
2014-12-04 20:51:28 +00:00
|
|
|
Argument:
|
|
|
|
url -- URL to unwatch
|
|
|
|
"""
|
2012-11-07 17:55:17 +00:00
|
|
|
|
|
|
|
o = urlparse(url, "http")
|
|
|
|
if o.scheme != "" and url in DATAS.index:
|
|
|
|
site = DATAS.index[url]
|
|
|
|
for a in site.getNodes("alert"):
|
2014-12-29 06:50:27 +00:00
|
|
|
if a["channel"] == channel:
|
|
|
|
if not (nick == a["nick"] or frm_owner):
|
2014-12-04 20:51:28 +00:00
|
|
|
raise IRCException("you cannot unwatch this URL.")
|
2014-07-25 16:02:30 +00:00
|
|
|
site.delChild(a)
|
|
|
|
if not site.hasNode("alert"):
|
|
|
|
del_event(site["_evt_id"])
|
|
|
|
DATAS.delChild(site)
|
|
|
|
save()
|
2014-12-04 20:51:28 +00:00
|
|
|
return Response("I don't watch this URL anymore.",
|
2014-12-29 06:50:27 +00:00
|
|
|
channel=channel, nick=nick)
|
2014-12-04 20:51:28 +00:00
|
|
|
raise IRCException("I didn't watch this URL!")
|
2012-09-01 14:16:31 +00:00
|
|
|
|
2014-08-12 18:10:19 +00:00
|
|
|
|
2014-12-29 06:50:27 +00:00
|
|
|
def add_site(url, nick, channel, server, diffType="diff"):
|
2014-12-04 20:51:28 +00:00
|
|
|
"""Add a site to watching list
|
2012-09-01 14:16:31 +00:00
|
|
|
|
2014-12-04 20:51:28 +00:00
|
|
|
Argument:
|
|
|
|
url -- URL to watch
|
|
|
|
"""
|
2012-11-07 17:55:17 +00:00
|
|
|
|
|
|
|
o = urlparse(url, "http")
|
2014-07-25 16:02:30 +00:00
|
|
|
if o.netloc == "":
|
2014-12-04 20:51:28 +00:00
|
|
|
raise IRCException("sorry, I can't watch this URL :(")
|
2014-07-25 16:02:30 +00:00
|
|
|
|
|
|
|
alert = ModuleState("alert")
|
2014-12-29 06:50:27 +00:00
|
|
|
alert["nick"] = nick
|
|
|
|
alert["server"] = server
|
|
|
|
alert["channel"] = channel
|
2014-12-04 20:51:28 +00:00
|
|
|
alert["message"] = "{url} just changed!"
|
2014-07-25 16:02:30 +00:00
|
|
|
|
|
|
|
if url not in DATAS.index:
|
|
|
|
watch = ModuleState("watch")
|
|
|
|
watch["type"] = diffType
|
|
|
|
watch["url"] = url
|
|
|
|
watch["time"] = 123
|
|
|
|
DATAS.addChild(watch)
|
|
|
|
watch.addChild(alert)
|
|
|
|
start_watching(watch)
|
2012-11-07 17:55:17 +00:00
|
|
|
else:
|
2014-07-25 16:02:30 +00:00
|
|
|
DATAS.index[url].addChild(alert)
|
2012-09-01 14:16:31 +00:00
|
|
|
|
|
|
|
save()
|
2014-12-29 06:50:27 +00:00
|
|
|
return Response(channel=channel, nick=nick,
|
2014-12-04 20:51:28 +00:00
|
|
|
message="this site is now under my supervision.")
|
|
|
|
|
2012-09-01 14:16:31 +00:00
|
|
|
|
2014-09-10 10:19:25 +00:00
|
|
|
def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
|
2014-12-04 20:51:28 +00:00
|
|
|
"""Format and send response for given site
|
|
|
|
|
|
|
|
Argument:
|
|
|
|
site -- DATAS structure representing a site to watch
|
|
|
|
|
|
|
|
Keyword arguments:
|
|
|
|
link -- link to the content
|
|
|
|
title -- for ATOM feed: title of the new article
|
|
|
|
categ -- for ATOM feed: category of the new article
|
|
|
|
content -- content of the page/new article
|
|
|
|
"""
|
|
|
|
|
2012-11-07 17:55:17 +00:00
|
|
|
for a in site.getNodes("alert"):
|
2014-12-04 20:51:28 +00:00
|
|
|
send_response(a["server"],
|
|
|
|
Response(a["message"].format(url=site["url"],
|
|
|
|
link=link,
|
|
|
|
title=title,
|
|
|
|
categ=categ,
|
|
|
|
content=content),
|
|
|
|
channel=a["channel"],
|
|
|
|
server=a["server"]))
|
|
|
|
|
2012-11-07 17:55:17 +00:00
|
|
|
|
2012-09-01 14:16:31 +00:00
|
|
|
def alert_change(content, site):
|
2014-12-04 20:51:28 +00:00
|
|
|
"""Function called when a change is detected on a given site
|
|
|
|
|
|
|
|
Arguments:
|
|
|
|
content -- The new content
|
|
|
|
site -- DATAS structure representing a site to watch
|
|
|
|
"""
|
|
|
|
|
2012-12-07 20:18:21 +00:00
|
|
|
if site["type"] == "updown":
|
|
|
|
if site["lastcontent"] is None:
|
|
|
|
site["lastcontent"] = content is not None
|
|
|
|
|
2012-12-08 08:21:27 +00:00
|
|
|
if (content is not None) != site.getBool("lastcontent"):
|
2012-12-07 20:18:21 +00:00
|
|
|
format_response(site, link=site["url"])
|
|
|
|
site["lastcontent"] = content is not None
|
2012-12-08 08:21:27 +00:00
|
|
|
start_watching(site)
|
2012-12-07 20:18:21 +00:00
|
|
|
return
|
|
|
|
|
2012-09-01 14:16:31 +00:00
|
|
|
if content is None:
|
2012-10-15 00:49:48 +00:00
|
|
|
start_watching(site)
|
2012-09-01 14:16:31 +00:00
|
|
|
return
|
|
|
|
|
|
|
|
if site["type"] == "atom":
|
|
|
|
if site["_lastpage"] is None:
|
2013-01-25 16:47:11 +00:00
|
|
|
if site["lastcontent"] is None or site["lastcontent"] == "":
|
|
|
|
site["lastcontent"] = content
|
|
|
|
site["_lastpage"] = Atom(site["lastcontent"])
|
2012-11-01 08:27:19 +00:00
|
|
|
try:
|
|
|
|
page = Atom(content)
|
|
|
|
except:
|
2014-12-04 20:51:28 +00:00
|
|
|
print("An error occurs during Atom parsing. Restart event...")
|
2012-11-01 08:27:19 +00:00
|
|
|
start_watching(site)
|
|
|
|
return
|
2012-09-01 14:16:31 +00:00
|
|
|
diff = site["_lastpage"].diff(page)
|
|
|
|
if len(diff) > 0:
|
|
|
|
site["_lastpage"] = page
|
|
|
|
diff.reverse()
|
|
|
|
for d in diff:
|
2012-09-07 12:59:22 +00:00
|
|
|
site.setIndex("term", "category")
|
|
|
|
categories = site.index
|
2012-09-01 14:16:31 +00:00
|
|
|
|
2012-11-07 17:55:17 +00:00
|
|
|
if len(categories) > 0:
|
2012-09-01 14:16:31 +00:00
|
|
|
if d.category is None or d.category not in categories:
|
2012-12-01 01:20:09 +00:00
|
|
|
format_response(site, link=d.link, categ=categories[""]["part"], title=d.title)
|
2012-09-01 14:16:31 +00:00
|
|
|
else:
|
2012-12-01 01:20:09 +00:00
|
|
|
format_response(site, link=d.link, categ=categories[d.category]["part"], title=d.title)
|
2012-09-01 14:16:31 +00:00
|
|
|
else:
|
2012-12-01 01:20:09 +00:00
|
|
|
format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
|
2012-09-01 14:16:31 +00:00
|
|
|
else:
|
2012-10-15 00:49:48 +00:00
|
|
|
start_watching(site)
|
2014-12-04 20:51:28 +00:00
|
|
|
return # Stop here, no changes, so don't save
|
2012-06-29 17:20:29 +00:00
|
|
|
|
2014-12-04 20:51:28 +00:00
|
|
|
else: # Just looking for any changes
|
2014-09-10 10:19:25 +00:00
|
|
|
format_response(site, link=site["url"], content=content)
|
2012-09-01 14:16:31 +00:00
|
|
|
site["lastcontent"] = content
|
2012-10-15 00:49:48 +00:00
|
|
|
start_watching(site)
|
2012-09-01 14:16:31 +00:00
|
|
|
save()
|
2014-12-04 20:51:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
def start_watching(site, offset=0):
|
|
|
|
"""Launch the event watching given site
|
|
|
|
|
|
|
|
Argument:
|
|
|
|
site -- DATAS structure representing a site to watch
|
|
|
|
|
|
|
|
Keyword argument:
|
|
|
|
offset -- offset time to delay the launch of the first check
|
|
|
|
"""
|
|
|
|
|
|
|
|
o = urlparse(site["url"], "http")
|
|
|
|
print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
|
|
|
|
|
|
|
|
evt = ModuleEvent(func=lambda url: page.render(url, None),
|
|
|
|
cmp_data=site["lastcontent"],
|
|
|
|
func_data=site["url"], offset=offset,
|
|
|
|
interval=site.getInt("time"),
|
|
|
|
call=alert_change, call_data=site)
|
|
|
|
site["_evt_id"] = add_event(evt)
|