168 lines
5.4 KiB
Python
168 lines
5.4 KiB
Python
# coding=utf-8
|
|
|
|
"""Alert on changes on websites"""
|
|
|
|
from datetime import datetime
|
|
from datetime import timedelta
|
|
import http.client
|
|
import hashlib
|
|
import re
|
|
import socket
|
|
import sys
|
|
import urllib.parse
|
|
from urllib.parse import urlparse
|
|
from urllib.request import urlopen
|
|
|
|
from hooks import hook
|
|
|
|
from .atom import Atom
|
|
|
|
nemubotversion = 3.4
|
|
|
|
def help_full ():
|
|
return "This module is autonomous you can't interract with it."
|
|
|
|
def load(context):
|
|
"""Register watched website"""
|
|
DATAS.setIndex("url", "watch")
|
|
for site in DATAS.getNodes("watch"):
|
|
if site.hasNode("alert"):
|
|
start_watching(site)
|
|
else:
|
|
print("No alert defined for this site: " + site["url"])
|
|
#DATAS.delChild(site)
|
|
|
|
def getPageContent(url):
|
|
"""Returns the content of the given url"""
|
|
print_debug("Get page %s" % url)
|
|
try:
|
|
raw = urlopen(url, timeout=10)
|
|
return raw.read().decode()
|
|
except:
|
|
return None
|
|
|
|
def start_watching(site):
|
|
o = urlparse(site["url"], "http")
|
|
print_debug("Add event for site: %s" % o.netloc)
|
|
evt = ModuleEvent(func=getPageContent, cmp_data=site["lastcontent"],
|
|
func_data=site["url"],
|
|
intervalle=site.getInt("time"),
|
|
call=alert_change, call_data=site)
|
|
site["_evt_id"] = add_event(evt)
|
|
|
|
|
|
@hook("cmd_hook", "unwatch")
|
|
def del_site(msg):
|
|
if len(msg.cmds) <= 1:
|
|
raise IRCException("quel site dois-je arrêter de surveiller ?")
|
|
|
|
url = msg.cmds[1]
|
|
|
|
o = urlparse(url, "http")
|
|
if o.scheme != "" and url in DATAS.index:
|
|
site = DATAS.index[url]
|
|
for a in site.getNodes("alert"):
|
|
if a["channel"] == msg.channel:
|
|
if not (msg.sender == a["sender"] or msg.is_owner):
|
|
raise IRCException("vous ne pouvez pas supprimer cette URL.")
|
|
site.delChild(a)
|
|
if not site.hasNode("alert"):
|
|
del_event(site["_evt_id"])
|
|
DATAS.delChild(site)
|
|
save()
|
|
return Response("je ne surveille désormais plus cette URL.",
|
|
channel=msg.channel, nick=msg.nick)
|
|
raise IRCException("je ne surveillais pas cette URL !")
|
|
|
|
|
|
@hook("cmd_hook", "watch", data="diff")
|
|
@hook("cmd_hook", "updown", data="updown")
|
|
def add_site(msg, diffType="diff"):
|
|
print (diffType)
|
|
if len(msg.cmds) <= 1:
|
|
raise IRCException("quel site dois-je surveiller ?")
|
|
|
|
url = msg.cmds[1]
|
|
|
|
o = urlparse(url, "http")
|
|
if o.netloc == "":
|
|
raise IRCException("je ne peux pas surveiller cette URL")
|
|
|
|
alert = ModuleState("alert")
|
|
alert["sender"] = msg.sender
|
|
alert["server"] = msg.server
|
|
alert["channel"] = msg.channel
|
|
alert["message"] = "{url} a changé !"
|
|
|
|
if url not in DATAS.index:
|
|
watch = ModuleState("watch")
|
|
watch["type"] = diffType
|
|
watch["url"] = url
|
|
watch["time"] = 123
|
|
DATAS.addChild(watch)
|
|
watch.addChild(alert)
|
|
start_watching(watch)
|
|
else:
|
|
DATAS.index[url].addChild(alert)
|
|
|
|
save()
|
|
return Response(channel=msg.channel, nick=msg.nick,
|
|
message="ce site est maintenant sous ma surveillance.")
|
|
|
|
def format_response(site, link='%s', title='%s', categ='%s', content='%s'):
|
|
for a in site.getNodes("alert"):
|
|
send_response(a["server"], Response(a["message"].format(url=site["url"], link=link, title=title, categ=categ, content=content),
|
|
channel=a["channel"], server=a["server"]))
|
|
|
|
def alert_change(content, site):
|
|
"""Alert when a change is detected"""
|
|
if site["type"] == "updown":
|
|
if site["lastcontent"] is None:
|
|
site["lastcontent"] = content is not None
|
|
|
|
if (content is not None) != site.getBool("lastcontent"):
|
|
format_response(site, link=site["url"])
|
|
site["lastcontent"] = content is not None
|
|
start_watching(site)
|
|
return
|
|
|
|
if content is None:
|
|
start_watching(site)
|
|
return
|
|
|
|
if site["type"] == "atom":
|
|
if site["_lastpage"] is None:
|
|
if site["lastcontent"] is None or site["lastcontent"] == "":
|
|
site["lastcontent"] = content
|
|
site["_lastpage"] = Atom(site["lastcontent"])
|
|
try:
|
|
page = Atom(content)
|
|
except:
|
|
print ("An error occurs during Atom parsing. Restart event...")
|
|
start_watching(site)
|
|
return
|
|
diff = site["_lastpage"].diff(page)
|
|
if len(diff) > 0:
|
|
site["_lastpage"] = page
|
|
diff.reverse()
|
|
for d in diff:
|
|
site.setIndex("term", "category")
|
|
categories = site.index
|
|
|
|
if len(categories) > 0:
|
|
if d.category is None or d.category not in categories:
|
|
format_response(site, link=d.link, categ=categories[""]["part"], title=d.title)
|
|
else:
|
|
format_response(site, link=d.link, categ=categories[d.category]["part"], title=d.title)
|
|
else:
|
|
format_response(site, link=d.link, title=urllib.parse.unquote(d.title))
|
|
else:
|
|
start_watching(site)
|
|
return #Stop here, no changes, so don't save
|
|
|
|
else: # Just looking for any changes
|
|
format_response(site, link=site["url"], content=content)
|
|
site["lastcontent"] = content
|
|
start_watching(site)
|
|
save()
|