diff --git a/modules/watchWebsite.xml b/modules/watchWebsite.xml
index f68c219..ed391b5 100644
--- a/modules/watchWebsite.xml
+++ b/modules/watchWebsite.xml
@@ -1,2 +1,5 @@
-
\ No newline at end of file
+
+
+
+
diff --git a/modules/watchWebsite/Site.py b/modules/watchWebsite/Site.py
deleted file mode 100644
index 7d68961..0000000
--- a/modules/watchWebsite/Site.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# coding=utf-8
-
-from datetime import datetime
-from datetime import timedelta
-import http.client
-import hashlib
-import socket
-import sys
-import traceback
-from urllib.parse import unquote
-
-from .atom import Atom
-
-class Site:
- def __init__(self, item):
- self.server = item.getAttribute("server")
- self.page = item.getAttribute("page")
- if len(self.page) <= 0 or self.page[0] != "/":
- self.page = "/" + self.page
- if item.hasAttribute("type"):
- self.type = item.getAttribute("type")
- else:
- self.type = "hash"
- self.message = item.getAttribute("message")
-
- if item.hasAttribute("time"):
- self.updateTime = item.getInt("time")
- else:
- self.updateTime = 60
- self.lastChange = datetime.now()
- self.lastpage = None
-
- self.channels = list()
- for channel in item.getNodes('channel'):
- self.channels.append(channel.getAttribute("name"))
-
- self.categories = dict()
- for category in item.getNodes('category'):
- self.categories[category.getAttribute("term")] = category.getAttribute("part")
-
- @property
- def update(self):
- if self.lastpage is None:
- return self.lastChange
- else:
- return self.lastChange + timedelta(seconds=self.updateTime)
-
- @property
- def url(self):
- return self.server + self.page
-
- def send_message (self, msg):
- global SRVS
- if len(self.channels) > 0:
- for server in SRVS.keys():
- for chan in self.channels:
- SRVS[server].send_msg (chan, msg)
- else:
- for server in SRVS.keys():
- SRVS[server].send_global (msg)
-
- def treat_atom (self, content):
- change=False
- f = Atom(content)
- if self.lastpage is not None:
- diff = self.lastpage.diff (f)
- if len(diff) > 0:
- print ("[%s] Page differ!"%self.server)
- diff.reverse()
- for d in diff:
- if self.message.count("%s") == 2 and len(self.categories) > 0:
- if d.category is None or d.category not in self.categories:
- messageI = self.message % (self.categories[""], "%s")
- else:
- messageI = self.message % (self.categories[d.category], "%s")
- self.send_message (messageI % d.link)
- elif self.message.count("%s") == 2:
- if f.id == youtube.idAtom:
- youtube.send_global (d.link2, self.message % (unquote(d.title), d.link))
- else:
- self.send_message (self.message % (unquote(d.title), d.link))
- elif self.message.count("%s") == 1:
- self.send_message(self.message % unquote (d.title))
- else:
- self.send_message(self.message)
- change=True
- return (f, change)
-
- def check (self):
- try:
- #print ("Check %s"%(self.url))
- (status, content) = getPage(self.server, self.page)
- if content is None:
- return
-
- if self.type == "atom":
- (self.lastpage, change) = self.treat_atom(content)
- else:
- hash = hashlib.sha224(content).hexdigest()
- if hash != self.lastpage:
- if self.lastpage is not None:
- self.send_message (self.message)
- self.lastpage = hash
-
- self.lastChange = datetime.now()
-
-# if self.updateTime < 10:
-# self.updateTime = 10
-# if self.updateTime > 400:
-# self.updateTime = 400
- except:
- print ("Une erreur est survenue lors de la récupération de la page " + self.server + "/" + self.page)
- exc_type, exc_value, exc_traceback = sys.exc_info()
- traceback.print_exception(exc_type, exc_value, exc_traceback)
- self.updateTime *= 2
-
-
-def getPage(s, p):
- conn = http.client.HTTPConnection(s, timeout=10)
- try:
- conn.request("GET", p)
-
- res = conn.getresponse()
- data = res.read()
- except:
- print ("[%s] impossible de récupérer la page %s."%(s, p))
- return (None, None)
-
- conn.close()
- return (res.status, data)
diff --git a/modules/watchWebsite/Watcher.py b/modules/watchWebsite/Watcher.py
deleted file mode 100644
index bc77192..0000000
--- a/modules/watchWebsite/Watcher.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# coding=utf-8
-
-from datetime import datetime
-import threading
-
-class Watcher(threading.Thread):
- def __init__(self):
- self.servers = list()
- self.stop = False
- self.newSrv = threading.Event()
- threading.Thread.__init__(self)
-
- def addServer(self, server):
- self.servers.append(server)
- self.newSrv.set()
-
- def check(self, closer):
- closer.check()
- self.newSrv.set()
-
- def run(self):
- while not self.stop:
- self.newSrv.clear()
- closer = None
- #Gets the closer server update
- for server in self.servers:
- if server.update < datetime.now():
- #print ("Closer now: %s à %s"%(server.url, server.update))
- self.check(server)
- elif closer is None or server.update < closer.update:
- closer = server
- if closer is not None:
- #print ("Closer: %s à %s"%(closer.url, closer.update))
- timeleft = (closer.update - datetime.now()).seconds
- timer = threading.Timer(timeleft, self.check, (closer,))
- timer.start()
- #print ("Start timer (%ds)"%timeleft)
-
- self.newSrv.wait()
-
- if closer is not None and closer.update is not None and closer.update > datetime.now():
- timer.cancel()
-
- def stop(self):
- self.stop = True
- self.newSrv.set()
diff --git a/modules/watchWebsite/__init__.py b/modules/watchWebsite/__init__.py
index 6842007..8e2c79c 100644
--- a/modules/watchWebsite/__init__.py
+++ b/modules/watchWebsite/__init__.py
@@ -1,9 +1,18 @@
# coding=utf-8
-nemubotversion = 3.0
+from datetime import datetime
+from datetime import timedelta
+import http.client
+import hashlib
+import re
+import socket
+import sys
+import traceback
+from urllib.parse import unquote
-from .Watcher import Watcher
-from . import Site
+from .atom import Atom
+
+nemubotversion = 3.2
def help_tiny ():
"""Line inserted in the response to the command !help"""
@@ -12,22 +21,141 @@ def help_tiny ():
def help_full ():
return "This module is autonomous you can't interract with it."
+CONTEXT = None
-WATCHER = None
+def load(context):
+ """Register watched website"""
+ global CONTEXT
+ CONTEXT = context
+ for site in DATAS.getNodes("watch"):
+ start_watching(site)
+
+def unload(context):
+ """Unregister watched website"""
+ for site in DATAS.getNodes("watch"):
+ context.del_event(site["evt_id"])
+
+def start_watching(site):
+ print_debug("Add event for site: http://%s%s" % (site["server"], site["page"]))
+ evt = ModuleEvent(func=getPage, cmp_data=site["lastcontent"],
+ func_data=dict(s=site["server"], p=site["page"]),
+ intervalle=site.getInt("time"),
+ call=alert_change, call_data=site)
+ site["evt_id"] = CONTEXT.add_event(evt)
-def load():
- global WATCHER, DATAS, SRVS
- #Load the watcher
- Site.SRVS = SRVS
- WATCHER = Watcher()
- for site in DATAS.getNodes("watch"):
- s = Site.Site(site)
- WATCHER.addServer(s)
- WATCHER.start()
+def explore_url(url):
+ return re.match("^(http://)?([^/]+)(/.*)$", url)
-def close():
- global WATCHER
- if WATCHER is not None:
- WATCHER.stop = True
- WATCHER.newSrv.set()
+def found_site(s, p):
+ for site in DATAS:
+ if site["server"] == s and site["page"] == p:
+ return site
+ return None
+
+def del_site(msg):
+ if len(msg.cmd) <= 1:
+ return Response(msg.sender, "quel site dois-je arrêter de surveiller ?",
+ msg.channel, msg.nick)
+
+ rx = explore_url(msg.cmd[1])
+ if rx is not None:
+ site = found_site(rx.group(2), rx.group(3))
+ if site is not None and (msg.sender == site["sender"] or msg.is_owner):
+ CONTEXT.del_event(site["evt_id"])
+ DATAS.delChild(site)
+ return Response(msg.sender, "je ne surveille désormais plus cette URL.",
+ channel=msg.channel, nick=msg.nick)
+ return Response(msg.sender, "je ne surveillais pas cette URL pour vous.",
+ channel=msg.channel, nick=msg.nick)
+
+def add_site(msg):
+ if len(msg.cmd) <= 1:
+ return Response(msg.sender, "quel site dois-je surveiller ?",
+ msg.channel, msg.nick)
+
+ rx = explore_url(msg.cmd[1])
+ if rx is None:
+ return Response(msg.sender, "je ne peux pas surveiller cette URL",
+ channel=msg.channel, nick=msg.nick)
+ else:
+ watch = ModuleState("watch")
+ watch["sender"] = msg.sender
+ watch["irc"] = msg.srv.id
+ watch["channel"] = msg.channel
+ watch["type"] = "diff"
+ watch["server"] = rx.group(2)
+ watch["page"] = rx.group(3)
+ watch["time"] = 123
+ watch["message"] = "http://%s%s a changé !" % (watch["server"],
+ watch["page"])
+ DATAS.addChild(watch)
+ start_watching(watch)
+
+ save()
+ return Response(msg.sender, channel=msg.channel, nick=msg.nick
+ message="ce site est maintenant sous ma surveillance.")
+
+def alert_change(content, site):
+ """Alert when a change is detected"""
+ start_watching(site)
+ if content is None:
+ return
+
+ if site["type"] == "atom":
+ if site["_lastpage"] is None:
+ site["_lastpage"] = Atom(site["lastcontent"])
+ page = Atom(content)
+ diff = site["_lastpage"].diff(page)
+ if len(diff) > 0:
+ site["_lastpage"] = page
+ print_debug("[%s] Page differ!" % site["server"])
+ diff.reverse()
+ for d in diff:
+ categories = site.getNodes("categories")
+ categories.setIndex("term")
+
+ if site["message"].count("%s") == 2 and len(categories) > 0:
+ if d.category is None or d.category not in categories:
+ messageI = site["message"] % (categories[""], "%s")
+ else:
+ messageI = site["message"] % (categories[d.category], "%s")
+ send_response(site["irc"], Response(site["sender"],
+ messageI % d.link,
+ site["channel"]))
+ elif site["message"].count("%s") == 2:
+ send_response(site["irc"], Response(site["sender"],
+ site["message"] % (unquote(d.title), d.link),
+ site["channel"]))
+ elif site["message"].count("%s") == 1:
+ send_response(site["irc"], Response(site["sender"],
+ site["message"] % unquote (d.title),
+ site["channel"]))
+ else:
+ send_response(site["irc"], Response(site["sender"],
+ site["message"],
+ site["channel"]))
+ else:
+ return #Stop here, no changes, so don't save
+
+ else: # Just looking for any changes
+ send_response(site["irc"], Response(site["sender"], site["message"], site["channel"]))
+ site["lastcontent"] = content
+ save()
+
+#TODO: built-in this function
+def getPage(s, p):
+ """Return the page content"""
+ print_debug("Looking http://%s%s"%(s,p))
+ conn = http.client.HTTPConnection(s, timeout=10)
+ try:
+ conn.request("GET", p)
+
+ res = conn.getresponse()
+ data = res.read()
+ except:
+ print ("[%s] impossible de récupérer la page %s."%(s, p))
+ return None
+
+ conn.close()
+ return data.decode()
diff --git a/modules/watchWebsite/atom.py b/modules/watchWebsite/atom.py
index 8915b40..30272e0 100755
--- a/modules/watchWebsite/atom.py
+++ b/modules/watchWebsite/atom.py
@@ -7,59 +7,63 @@ from xml.dom.minidom import parseString
from xml.dom.minidom import getDOMImplementation
class AtomEntry:
- def __init__ (self, node):
- self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
- if node.getElementsByTagName("title")[0].firstChild is not None:
- self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
- else:
- self.title = ""
- try:
- self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:19], "%Y-%m-%dT%H:%M:%S")
- except:
- try:
- self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10], "%Y-%m-%d")
- except:
- print (node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
- self.updated = time.localtime ()
- if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
- self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
- else:
- self.summary = None
- if len(node.getElementsByTagName("link")) > 0:
- self.link = node.getElementsByTagName("link")[0].getAttribute ("href")
- else:
- self.link = None
- if len (node.getElementsByTagName("category")) >= 1:
- self.category = node.getElementsByTagName("category")[0].getAttribute ("term")
- else:
- self.category = None
- if len (node.getElementsByTagName("link")) > 1:
- self.link2 = node.getElementsByTagName("link")[1].getAttribute ("href")
- else:
- self.link2 = None
+ def __init__ (self, node):
+ self.id = node.getElementsByTagName("id")[0].firstChild.nodeValue
+ if node.getElementsByTagName("title")[0].firstChild is not None:
+ self.title = node.getElementsByTagName("title")[0].firstChild.nodeValue
+ else:
+ self.title = ""
+ try:
+ self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:19], "%Y-%m-%dT%H:%M:%S")
+ except:
+ try:
+ self.updated = time.strptime(node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10], "%Y-%m-%d")
+ except:
+ print (node.getElementsByTagName("updated")[0].firstChild.nodeValue[:10])
+ self.updated = time.localtime ()
+ if len(node.getElementsByTagName("summary")) > 0 and node.getElementsByTagName("summary")[0].firstChild is not None:
+ self.summary = node.getElementsByTagName("summary")[0].firstChild.nodeValue
+ else:
+ self.summary = None
+ if len(node.getElementsByTagName("link")) > 0:
+ self.link = node.getElementsByTagName("link")[0].getAttribute ("href")
+ else:
+ self.link = None
+ if len (node.getElementsByTagName("category")) >= 1:
+ self.category = node.getElementsByTagName("category")[0].getAttribute ("term")
+ else:
+ self.category = None
+ if len (node.getElementsByTagName("link")) > 1:
+ self.link2 = node.getElementsByTagName("link")[1].getAttribute ("href")
+ else:
+ self.link2 = None
class Atom:
- def __init__ (self, string):
- self.feed = parseString (string).documentElement
- self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
- self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
+ def __init__ (self, string):
+ self.raw = string
+ self.feed = parseString (string).documentElement
+ self.id = self.feed.getElementsByTagName("id")[0].firstChild.nodeValue
+ self.title = self.feed.getElementsByTagName("title")[0].firstChild.nodeValue
- self.updated = None
- self.entries = dict ()
- for item in self.feed.getElementsByTagName("entry"):
- entry = AtomEntry (item)
- self.entries[entry.id] = entry
- if self.updated is None or self.updated < entry.updated:
- self.updated = entry.updated
+ self.updated = None
+ self.entries = dict ()
+ for item in self.feed.getElementsByTagName("entry"):
+ entry = AtomEntry (item)
+ self.entries[entry.id] = entry
+ if self.updated is None or self.updated < entry.updated:
+ self.updated = entry.updated
- def diff (self, other):
- differ = list ()
- for k in other.entries.keys ():
- if self.updated is None and k not in self.entries:
- self.updated = other.entries[k].updated
- if k not in self.entries and other.entries[k].updated >= self.updated:
- differ.append (other.entries[k])
- return differ
+ def __str__(self):
+ return self.raw
+
+ def diff (self, other):
+ differ = list ()
+ for k in other.entries.keys ():
+ if self.updated is None and k not in self.entries:
+ self.updated = other.entries[k].updated
+ if k not in self.entries and other.entries[k].updated >= self.updated:
+ differ.append (other.entries[k])
+ return differ
if __name__ == "__main__":