1
0
Fork 0
nemubot/modules/networking/watchWebsite.py

224 lines
6.8 KiB
Python
Raw Normal View History

2014-08-27 23:39:31 +00:00
"""Alert on changes on websites"""
import logging
from random import randint
import urllib.parse
from urllib.parse import urlparse
from nemubot.event import ModuleEvent
from nemubot.exception import IMException
2015-10-13 22:17:02 +00:00
from nemubot.tools.web import getNormalizedURL
from nemubot.tools.xmlparser.node import ModuleState
logger = logging.getLogger("nemubot.module.networking.watchWebsite")
from nemubot.module.more import Response
from . import page
DATAS = None
def load(datas):
"""Register events on watched website"""
global DATAS
DATAS = datas
DATAS.setIndex("url", "watch")
for site in DATAS.getNodes("watch"):
if site.hasNode("alert"):
start_watching(site, randint(-30, 30))
else:
print("No alert defined for this site: " + site["url"])
#DATAS.delChild(site)
def watchedon(channel):
"""Get a list of currently watched URL on the given channel.
"""
res = list()
for site in DATAS.getNodes("watch"):
if site.hasNode("alert"):
for a in site.getNodes("alert"):
if a["channel"] == channel:
res.append("%s (%s)" % (site["url"], site["type"]))
break
return res
def del_site(url, nick, channel, frm_owner):
"""Remove a site from watching list
Argument:
url -- URL to unwatch
"""
2015-10-13 22:17:02 +00:00
o = urlparse(getNormalizedURL(url), "http")
if o.scheme != "" and url in DATAS.index:
site = DATAS.index[url]
for a in site.getNodes("alert"):
if a["channel"] == channel:
# if not (nick == a["nick"] or frm_owner):
# raise IMException("you cannot unwatch this URL.")
2014-07-25 16:02:30 +00:00
site.delChild(a)
if not site.hasNode("alert"):
del_event(site["_evt_id"])
DATAS.delChild(site)
save()
return Response("I don't watch this URL anymore.",
channel=channel, nick=nick)
raise IMException("I didn't watch this URL!")
def add_site(url, nick, channel, server, diffType="diff"):
"""Add a site to watching list
Argument:
url -- URL to watch
"""
2015-10-13 22:17:02 +00:00
o = urlparse(getNormalizedURL(url), "http")
2014-07-25 16:02:30 +00:00
if o.netloc == "":
raise IMException("sorry, I can't watch this URL :(")
2014-07-25 16:02:30 +00:00
alert = ModuleState("alert")
alert["nick"] = nick
alert["server"] = server
alert["channel"] = channel
alert["message"] = "{url} just changed!"
2014-07-25 16:02:30 +00:00
if url not in DATAS.index:
watch = ModuleState("watch")
watch["type"] = diffType
watch["url"] = url
watch["time"] = 123
DATAS.addChild(watch)
watch.addChild(alert)
start_watching(watch)
else:
2014-07-25 16:02:30 +00:00
DATAS.index[url].addChild(alert)
save()
return Response(channel=channel, nick=nick,
message="this site is now under my supervision.")
def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
"""Format and send response for given site
Argument:
site -- DATAS structure representing a site to watch
Keyword arguments:
link -- link to the content
title -- for ATOM feed: title of the new article
categ -- for ATOM feed: category of the new article
content -- content of the page/new article
"""
for a in site.getNodes("alert"):
send_response(a["server"],
Response(a["message"].format(url=site["url"],
link=link,
title=title,
categ=categ,
content=content),
channel=a["channel"],
server=a["server"]))
def alert_change(content, site):
"""Function called when a change is detected on a given site
Arguments:
content -- The new content
site -- DATAS structure representing a site to watch
"""
if site["type"] == "updown":
if site["lastcontent"] is None:
site["lastcontent"] = content is not None
if (content is not None) != site.getBool("lastcontent"):
format_response(site, link=site["url"])
site["lastcontent"] = content is not None
start_watching(site)
return
if content is None:
start_watching(site)
return
if site["type"] == "atom":
from nemubot.tools.feed import Feed
if site["_lastpage"] is None:
if site["lastcontent"] is None or site["lastcontent"] == "":
site["lastcontent"] = content
site["_lastpage"] = Feed(site["lastcontent"])
try:
page = Feed(content)
except:
print("An error occurs during Atom parsing. Restart event...")
start_watching(site)
return
diff = site["_lastpage"] & page
if len(diff) > 0:
site["_lastpage"] = page
diff.reverse()
for d in diff:
site.setIndex("term", "category")
categories = site.index
if len(categories) > 0:
if d.category is None or d.category not in categories:
2012-12-01 01:20:09 +00:00
format_response(site, link=d.link, categ=categories[""]["part"], title=d.title)
else:
2012-12-01 01:20:09 +00:00
format_response(site, link=d.link, categ=categories[d.category]["part"], title=d.title)
else:
2012-12-01 01:20:09 +00:00
format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
else:
start_watching(site)
return # Stop here, no changes, so don't save
else: # Just looking for any changes
format_response(site, link=site["url"], content=content)
site["lastcontent"] = content
start_watching(site)
save()
def fwatch(url):
cnt = page.fetch(url, None)
if cnt is not None:
render = page._render(cnt)
if render is None or render == "":
return cnt
return render
return None
def start_watching(site, offset=0):
"""Launch the event watching given site
Argument:
site -- DATAS structure representing a site to watch
Keyword argument:
offset -- offset time to delay the launch of the first check
"""
2015-10-13 22:17:02 +00:00
o = urlparse(getNormalizedURL(site["url"]), "http")
2015-02-11 17:12:39 +00:00
#print_debug("Add %s event for site: %s" % (site["type"], o.netloc))
try:
evt = ModuleEvent(func=fwatch,
cmp_data=site["lastcontent"],
func_data=site["url"], offset=offset,
interval=site.getInt("time"),
call=alert_change, call_data=site)
site["_evt_id"] = add_event(evt)
except IMException:
logger.exception("Unable to watch %s", site["url"])