nemubot/modules/watchWebsite.py

207 lines
5.6 KiB
Python
Raw Normal View History

2012-01-29 14:28:02 +00:00
# coding=utf-8
2012-04-09 02:19:39 +00:00
from datetime import timedelta
from datetime import datetime
from datetime import date
2012-04-09 02:19:39 +00:00
import http.client
2012-01-29 14:28:02 +00:00
import hashlib
import sys
import traceback
import socket
2012-01-29 14:28:02 +00:00
import time
import base64
import threading
2012-04-09 02:19:39 +00:00
from urllib.parse import unquote
from module_state import ModuleState
nemubotversion = 3.0
2012-04-09 02:19:39 +00:00
import atom
def help_tiny ():
"""Line inserted in the response to the command !help"""
return "Alert on changes on websites"
def help_full ():
return "This module is autonomous you can't interract with it."
WATCHER = None
def load():
global WATCHER, DATAS
#Load the watcher
WATCHER = Watcher()
for site in DATAS.getNodes("watch"):
s = Site(site)
WATCHER.addServer(s)
WATCHER.start()
def close():
global WATCHER
if WATCHER is not None:
WATCHER.stop = True
WATCHER.newSrv.set()
class Watcher(threading.Thread):
def __init__(self):
self.servers = list()
self.stop = False
self.newSrv = threading.Event()
threading.Thread.__init__(self)
def addServer(self, server):
self.servers.append(server)
self.newSrv.set()
def check(self, closer):
closer.check()
self.newSrv.set()
def run(self):
while not self.stop:
self.newSrv.clear()
closer = None
#Gets the closer server update
for server in self.servers:
if server.update < datetime.now():
#print ("Closer now: %s à %s"%(server.url, server.update))
self.check(server)
elif closer is None or server.update < closer.update:
closer = server
if closer is not None:
#print ("Closer: %s à %s"%(closer.url, closer.update))
timeleft = (closer.update - datetime.now()).seconds
timer = threading.Timer(timeleft, self.check, (closer,))
timer.start()
#print ("Start timer (%ds)"%timeleft)
self.newSrv.wait()
if closer is not None and closer.update is not None and closer.update > datetime.now():
timer.cancel()
def stop(self):
self.stop = True
self.newSrv.set()
2012-04-09 02:19:39 +00:00
class Site:
def __init__(self, item):
self.server = item.getAttribute("server")
self.page = item.getAttribute("page")
if len(self.page) <= 0 or self.page[0] != "/":
self.page = "/" + self.page
if item.hasAttribute("type"):
self.type = item.getAttribute("type")
else:
self.type = "hash"
self.message = item.getAttribute("message")
if item.hasAttribute("time"):
self.updateTime = item.getInt("time")
else:
self.updateTime = 60
self.lastChange = datetime.now()
self.lastpage = None
self.channels = list()
for channel in item.getNodes('channel'):
self.channels.append(channel.getAttribute("name"))
2012-05-18 09:38:50 +00:00
self.categories = dict()
for category in item.getNodes('category'):
2012-05-18 09:38:50 +00:00
self.categories[category.getAttribute("term")] = category.getAttribute("part")
@property
def update(self):
if self.lastpage is None:
return self.lastChange
else:
return self.lastChange + timedelta(seconds=self.updateTime)
@property
def url(self):
return self.server + self.page
def send_message (self, msg):
global SRVS
if len(self.channels) > 0:
for server in SRVS.keys():
for chan in self.channels:
SRVS[server].send_msg (chan, msg)
else:
for server in SRVS.keys():
SRVS[server].send_global (msg)
def treat_atom (self, content):
change=False
f = atom.Atom (content)
if self.lastpage is not None:
diff = self.lastpage.diff (f)
if len(diff) > 0:
print ("[%s] Page differ!"%self.server)
2012-05-18 09:38:50 +00:00
diff.reverse()
for d in diff:
if self.message.count("%s") == 2 and len(self.categories) > 0:
if d.category is None or d.category not in self.categories:
messageI = self.message % (self.categories[""], "%s")
else:
2012-05-18 09:38:50 +00:00
messageI = self.message % (self.categories[d.category], "%s")
self.send_message (messageI % unquote (d.link))
2012-05-18 09:38:50 +00:00
elif self.message.count("%s") == 2:
if f.id == youtube.idAtom:
youtube.send_global (d.link2, self.message % (d.title, unquote (d.link)))
else:
self.send_message (self.message % (d.title, unquote (d.link)))
elif self.message.count("%s") == 1:
self.send_message(self.message % unquote (d.title))
else:
self.send_message(self.message)
change=True
return (f, change)
def check (self):
try:
#print ("Check %s"%(self.url))
(status, content) = getPage(self.server, self.page)
if content is None:
return
if self.type == "atom":
(self.lastpage, change) = self.treat_atom (content)
else:
hash = hashlib.sha224(content).hexdigest()
if hash != self.lastpage:
if self.lastpage is not None:
self.send_message (self.message)
self.lastpage = hash
self.lastChange = datetime.now()
# if self.updateTime < 10:
# self.updateTime = 10
# if self.updateTime > 400:
# self.updateTime = 400
except:
print ("Une erreur est survenue lors de la récupération de la page " + self.server + "/" + self.page)
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback.print_exception(exc_type, exc_value, exc_traceback)
self.updateTime *= 2
def getPage(s, p):
conn = http.client.HTTPConnection(s)
try:
conn.request("GET", p)
res = conn.getresponse()
data = res.read()
except socket.gaierror:
print ("[%s] impossible de récupérer la page %s."%(s, p))
return (None, None)
conn.close()
return (res.status, data)