nemubot/modules/watchWebsite/__init__.py

172 lines
6.2 KiB
Python
Raw Normal View History

# coding=utf-8
from datetime import datetime
from datetime import timedelta
import http.client
import hashlib
import re
import socket
import sys
import traceback
from urllib.parse import unquote
from .atom import Atom
nemubotversion = 3.2
def help_tiny ():
"""Line inserted in the response to the command !help"""
return "Alert on changes on websites"
def help_full ():
return "This module is autonomous you can't interract with it."
CONTEXT = None
def load(context):
"""Register watched website"""
global CONTEXT
CONTEXT = context
for site in DATAS.getNodes("watch"):
start_watching(site)
def unload(context):
"""Unregister watched website"""
for site in DATAS.getNodes("watch"):
context.del_event(site["evt_id"])
def start_watching(site):
print_debug("Add event for site: http://%s%s" % (site["server"], site["page"]))
evt = ModuleEvent(func=getPage, cmp_data=site["lastcontent"],
func_data=dict(s=site["server"], p=site["page"]),
intervalle=site.getInt("time"),
call=alert_change, call_data=site)
site["evt_id"] = CONTEXT.add_event(evt)
def explore_url(url):
return re.match("^(http://)?([^/:]+)(/.*)$", url)
def found_site(s, p):
for site in DATAS.getNodes("watch"):
if site is not None and site["server"] == s and site["page"] == p:
return site
return None
def del_site(msg):
if len(msg.cmd) <= 1:
return Response(msg.sender, "quel site dois-je arrêter de surveiller ?",
msg.channel, msg.nick)
rx = explore_url(msg.cmd[1])
if rx is not None:
site = found_site(rx.group(2), rx.group(3))
if site is not None and (msg.sender == site["sender"] or msg.is_owner):
CONTEXT.del_event(site["evt_id"])
DATAS.delChild(site)
save()
return Response(msg.sender, "je ne surveille désormais plus cette URL.",
channel=msg.channel, nick=msg.nick)
elif site is None:
return Response(msg.sender, "je ne surveillais pas cette URL, impossible de la supprimer.",
channel=msg.channel, nick=msg.nick)
else:
return Response(msg.sender, "Vous ne pouvez pas supprimer cette URL.",
channel=msg.channel, nick=msg.nick)
return Response(msg.sender, "je ne surveillais pas cette URL pour vous.",
channel=msg.channel, nick=msg.nick)
def add_site(msg):
if len(msg.cmd) <= 1:
return Response(msg.sender, "quel site dois-je surveiller ?",
msg.channel, msg.nick)
rx = explore_url(msg.cmd[1])
if rx is None:
return Response(msg.sender, "je ne peux pas surveiller cette URL",
channel=msg.channel, nick=msg.nick)
else:
watch = ModuleState("watch")
watch["sender"] = msg.sender
watch["irc"] = msg.srv.id
watch["channel"] = msg.channel
watch["type"] = "diff"
watch["server"] = rx.group(2)
watch["page"] = rx.group(3)
watch["time"] = 123
watch["message"] = "http://%s%s a changé !" % (watch["server"],
watch["page"])
DATAS.addChild(watch)
start_watching(watch)
save()
return Response(msg.sender, channel=msg.channel, nick=msg.nick,
message="ce site est maintenant sous ma surveillance.")
def alert_change(content, site):
"""Alert when a change is detected"""
start_watching(site)
if content is None:
return
if site["type"] == "atom":
if site["_lastpage"] is None:
if site["lastcontent"] is None:
site["_lastpage"] = Atom(content)
else:
site["_lastpage"] = Atom(site["lastcontent"])
page = Atom(content)
diff = site["_lastpage"].diff(page)
if len(diff) > 0:
site["_lastpage"] = page
print_debug("[%s] Page differ!" % site["server"])
diff.reverse()
for d in diff:
site.setIndex("term", "category")
categories = site.index
if site["message"].count("%s") == 2 and len(categories) > 0:
if d.category is None or d.category not in categories:
messageI = site["message"] % (categories[""]["part"], "%s")
else:
messageI = site["message"] % (categories[d.category]["part"], "%s")
send_response(site["irc"], Response(site["sender"],
messageI % d.link,
site["channel"]))
elif site["message"].count("%s") == 2:
send_response(site["irc"], Response(site["sender"],
site["message"] % (unquote(d.title), d.link),
site["channel"]))
elif site["message"].count("%s") == 1:
send_response(site["irc"], Response(site["sender"],
site["message"] % unquote (d.title),
site["channel"]))
else:
send_response(site["irc"], Response(site["sender"],
site["message"],
site["channel"]))
else:
return #Stop here, no changes, so don't save
else: # Just looking for any changes
send_response(site["irc"], Response(site["sender"], site["message"], site["channel"]))
site["lastcontent"] = content
save()
#TODO: built-in this function
def getPage(s, p):
"""Return the page content"""
print_debug("Looking http://%s%s"%(s,p))
conn = http.client.HTTPConnection(s, timeout=10)
try:
conn.request("GET", p)
res = conn.getresponse()
data = res.read()
except:
print ("[%s] impossible de récupérer la page %s."%(s, p))
return None
conn.close()
return data.decode()